Line data Source code
1 : /*
2 : * Copyright(c) 2019 Intel Corporation
3 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 : */
5 : /*
6 : * Copyright(c) 2019 Netflix, Inc.
7 : * SPDX - License - Identifier: BSD - 2 - Clause - Patent
8 : */
9 :
10 : #include <stdio.h>
11 :
12 : #include "aom_dsp_rtcd.h"
13 : #include "EbDefinitions.h"
14 :
15 : #include "EbPictureControlSet.h"
16 : #include "EbSequenceControlSet.h"
17 : #include "EbMotionEstimation.h"
18 : #include "EbUtility.h"
19 :
20 : #include "EbComputeSAD.h"
21 : #include "EbReferenceObject.h"
22 : #include "EbMeSadCalculation.h"
23 :
24 : #include "EbIntraPrediction.h"
25 : #include "EbLambdaRateTables.h"
26 : #include "EbPictureOperators.h"
27 : #define OIS_TH_COUNT 4
28 :
29 : int32_t OisPointTh[3][MAX_TEMPORAL_LAYERS][OIS_TH_COUNT] = {
30 : {// Light OIS
31 : {-20, 50, 150, 200},
32 : {-20, 50, 150, 200},
33 : {-20, 50, 100, 150},
34 : {-20, 50, 200, 300},
35 : {-20, 50, 200, 300},
36 : {-20, 50, 200, 300}},
37 : {// Default OIS
38 : {-150, 0, 150, 200},
39 : {-150, 0, 150, 200},
40 : {-125, 0, 100, 150},
41 : {-50, 50, 200, 300},
42 : {-50, 50, 200, 300},
43 : {-50, 50, 200, 300}},
44 : {// Heavy OIS
45 : {-400, -300, -200, 0},
46 : {-400, -300, -200, 0},
47 : {-400, -300, -200, 0},
48 : {-400, -300, -200, 0},
49 : {-400, -300, -200, 0},
50 : {-400, -300, -200, 0}}};
51 :
52 : #define AVCCODEL
53 : /********************************************
54 : * Constants
55 : ********************************************/
56 :
57 : #define MAX_INTRA_IN_MD 9
58 : #define REFERENCE_PIC_LIST_0 0
59 : #define REFERENCE_PIC_LIST_1 1
60 :
61 : /*******************************************
62 : * Compute8x4SAD_Default
63 : * Unoptimized 8x4 SAD
64 : *******************************************/
65 0 : uint32_t compute8x4_sad_kernel_c(
66 : uint8_t *src, // input parameter, source samples Ptr
67 : uint32_t src_stride, // input parameter, source stride
68 : uint8_t *ref, // input parameter, reference samples Ptr
69 : uint32_t ref_stride) // input parameter, reference stride
70 : {
71 : uint32_t rowNumberInBlock8x4;
72 0 : uint32_t sadBlock8x4 = 0;
73 :
74 0 : for (rowNumberInBlock8x4 = 0; rowNumberInBlock8x4 < 4;
75 0 : ++rowNumberInBlock8x4) {
76 0 : sadBlock8x4 += EB_ABS_DIFF(src[0x00], ref[0x00]);
77 0 : sadBlock8x4 += EB_ABS_DIFF(src[0x01], ref[0x01]);
78 0 : sadBlock8x4 += EB_ABS_DIFF(src[0x02], ref[0x02]);
79 0 : sadBlock8x4 += EB_ABS_DIFF(src[0x03], ref[0x03]);
80 0 : sadBlock8x4 += EB_ABS_DIFF(src[0x04], ref[0x04]);
81 0 : sadBlock8x4 += EB_ABS_DIFF(src[0x05], ref[0x05]);
82 0 : sadBlock8x4 += EB_ABS_DIFF(src[0x06], ref[0x06]);
83 0 : sadBlock8x4 += EB_ABS_DIFF(src[0x07], ref[0x07]);
84 0 : src += src_stride;
85 0 : ref += ref_stride;
86 : }
87 :
88 0 : return sadBlock8x4;
89 : }
90 : /*******************************************
91 : * Compute8x8SAD_Default
92 : * Unoptimized 8x8 SAD
93 : *******************************************/
94 0 : uint32_t compute8x8_sad_kernel_c(
95 : uint8_t *src, // input parameter, source samples Ptr
96 : uint32_t src_stride, // input parameter, source stride
97 : uint8_t *ref, // input parameter, reference samples Ptr
98 : uint32_t ref_stride) // input parameter, reference stride
99 : {
100 : uint32_t rowNumberInBlock8x8;
101 0 : uint32_t sadBlock8x8 = 0;
102 :
103 0 : for (rowNumberInBlock8x8 = 0; rowNumberInBlock8x8 < 8;
104 0 : ++rowNumberInBlock8x8) {
105 0 : sadBlock8x8 += EB_ABS_DIFF(src[0x00], ref[0x00]);
106 0 : sadBlock8x8 += EB_ABS_DIFF(src[0x01], ref[0x01]);
107 0 : sadBlock8x8 += EB_ABS_DIFF(src[0x02], ref[0x02]);
108 0 : sadBlock8x8 += EB_ABS_DIFF(src[0x03], ref[0x03]);
109 0 : sadBlock8x8 += EB_ABS_DIFF(src[0x04], ref[0x04]);
110 0 : sadBlock8x8 += EB_ABS_DIFF(src[0x05], ref[0x05]);
111 0 : sadBlock8x8 += EB_ABS_DIFF(src[0x06], ref[0x06]);
112 0 : sadBlock8x8 += EB_ABS_DIFF(src[0x07], ref[0x07]);
113 0 : src += src_stride;
114 0 : ref += ref_stride;
115 : }
116 :
117 0 : return sadBlock8x8;
118 : }
119 :
120 : /*******************************************
121 : Calcualte SAD for 16x16 and its 8x8 sublcoks
122 : and check if there is improvment, if yes keep
123 : the best SAD+MV
124 : *******************************************/
125 0 : void ext_sad_calculation_8x8_16x16_c(
126 : uint8_t *src, uint32_t src_stride, uint8_t *ref, uint32_t ref_stride,
127 : uint32_t *p_best_sad8x8, uint32_t *p_best_sad16x16, uint32_t *p_best_mv8x8,
128 : uint32_t *p_best_mv16x16, uint32_t mv, uint32_t *p_sad16x16,
129 : uint32_t *p_sad8x8, EbBool sub_sad) {
130 : uint32_t sad16x16;
131 :
132 0 : if (sub_sad) {
133 0 : p_sad8x8[0] = (compute8x4_sad_kernel_c(src + 0 * src_stride + 0,
134 : 2 * src_stride,
135 : ref + 0 * ref_stride + 0,
136 : 2 * ref_stride))
137 0 : << 1;
138 0 : p_sad8x8[1] = (compute8x4_sad_kernel_c(src + 0 * src_stride + 8,
139 : 2 * src_stride,
140 : ref + 0 * ref_stride + 8,
141 : 2 * ref_stride))
142 0 : << 1;
143 0 : p_sad8x8[2] = (compute8x4_sad_kernel_c(src + 8 * src_stride + 0,
144 : 2 * src_stride,
145 0 : ref + 8 * ref_stride + 0,
146 : 2 * ref_stride))
147 0 : << 1;
148 0 : p_sad8x8[3] = (compute8x4_sad_kernel_c(src + 8 * src_stride + 8,
149 : 2 * src_stride,
150 0 : ref + 8 * ref_stride + 8,
151 : 2 * ref_stride))
152 0 : << 1;
153 : } else {
154 0 : p_sad8x8[0] = compute8x8_sad_kernel_c(src + 0 * src_stride + 0,
155 : src_stride,
156 : ref + 0 * ref_stride + 0,
157 : ref_stride);
158 0 : p_sad8x8[1] = compute8x8_sad_kernel_c(src + 0 * src_stride + 8,
159 : src_stride,
160 : ref + 0 * ref_stride + 8,
161 : ref_stride);
162 0 : p_sad8x8[2] = compute8x8_sad_kernel_c(src + 8 * src_stride + 0,
163 : src_stride,
164 0 : ref + 8 * ref_stride + 0,
165 : ref_stride);
166 0 : p_sad8x8[3] = compute8x8_sad_kernel_c(src + 8 * src_stride + 8,
167 : src_stride,
168 0 : ref + 8 * ref_stride + 8,
169 : ref_stride);
170 : }
171 :
172 0 : if (p_sad8x8[0] < p_best_sad8x8[0]) {
173 0 : p_best_sad8x8[0] = (uint32_t)p_sad8x8[0];
174 0 : p_best_mv8x8[0] = mv;
175 : }
176 :
177 0 : if (p_sad8x8[1] < p_best_sad8x8[1]) {
178 0 : p_best_sad8x8[1] = (uint32_t)p_sad8x8[1];
179 0 : p_best_mv8x8[1] = mv;
180 : }
181 :
182 0 : if (p_sad8x8[2] < p_best_sad8x8[2]) {
183 0 : p_best_sad8x8[2] = (uint32_t)p_sad8x8[2];
184 0 : p_best_mv8x8[2] = mv;
185 : }
186 :
187 0 : if (p_sad8x8[3] < p_best_sad8x8[3]) {
188 0 : p_best_sad8x8[3] = (uint32_t)p_sad8x8[3];
189 0 : p_best_mv8x8[3] = mv;
190 : }
191 :
192 0 : sad16x16 = p_sad8x8[0] + p_sad8x8[1] + p_sad8x8[2] + p_sad8x8[3];
193 0 : if (sad16x16 < p_best_sad16x16[0]) {
194 0 : p_best_sad16x16[0] = (uint32_t)sad16x16;
195 0 : p_best_mv16x16[0] = mv;
196 : }
197 :
198 0 : *p_sad16x16 = (uint32_t)sad16x16;
199 0 : }
200 :
201 : /*******************************************
202 : Calcualte SAD for 32x32,64x64 from 16x16
203 : and check if there is improvment, if yes keep
204 : the best SAD+MV
205 : *******************************************/
206 0 : void ext_sad_calculation_32x32_64x64_c(uint32_t *p_sad16x16,
207 : uint32_t *p_best_sad32x32,
208 : uint32_t *p_best_sad64x64,
209 : uint32_t *p_best_mv32x32,
210 : uint32_t *p_best_mv64x64, uint32_t mv,
211 : uint32_t *p_sad32x32) {
212 : uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64;
213 :
214 0 : p_sad32x32[0] = sad32x32_0 =
215 0 : p_sad16x16[0] + p_sad16x16[1] + p_sad16x16[2] + p_sad16x16[3];
216 0 : if (sad32x32_0 < p_best_sad32x32[0]) {
217 0 : p_best_sad32x32[0] = sad32x32_0;
218 0 : p_best_mv32x32[0] = mv;
219 : }
220 :
221 0 : p_sad32x32[1] = sad32x32_1 =
222 0 : p_sad16x16[4] + p_sad16x16[5] + p_sad16x16[6] + p_sad16x16[7];
223 0 : if (sad32x32_1 < p_best_sad32x32[1]) {
224 0 : p_best_sad32x32[1] = sad32x32_1;
225 0 : p_best_mv32x32[1] = mv;
226 : }
227 :
228 0 : p_sad32x32[2] = sad32x32_2 =
229 0 : p_sad16x16[8] + p_sad16x16[9] + p_sad16x16[10] + p_sad16x16[11];
230 0 : if (sad32x32_2 < p_best_sad32x32[2]) {
231 0 : p_best_sad32x32[2] = sad32x32_2;
232 0 : p_best_mv32x32[2] = mv;
233 : }
234 :
235 0 : p_sad32x32[3] = sad32x32_3 =
236 0 : p_sad16x16[12] + p_sad16x16[13] + p_sad16x16[14] + p_sad16x16[15];
237 0 : if (sad32x32_3 < p_best_sad32x32[3]) {
238 0 : p_best_sad32x32[3] = sad32x32_3;
239 0 : p_best_mv32x32[3] = mv;
240 : }
241 0 : sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3;
242 0 : if (sad64x64 < p_best_sad64x64[0]) {
243 0 : p_best_sad64x64[0] = sad64x64;
244 0 : p_best_mv64x64[0] = mv;
245 : }
246 0 : }
247 :
248 : /*******************************************
249 : * GetEightHorizontalSearchPointResults_8x8_16x16_PU
250 : *******************************************/
251 0 : void get_eight_horizontal_search_point_results_8x8_16x16_pu_c(
252 : uint8_t *src, uint32_t src_stride, uint8_t *ref, uint32_t ref_stride,
253 : uint32_t *p_best_sad8x8, uint32_t *p_best_mv8x8, uint32_t *p_best_sad16x16,
254 : uint32_t *p_best_mv16x16, uint32_t mv, uint16_t *p_sad16x16,
255 : EbBool sub_sad) {
256 : uint32_t xSearchIndex;
257 : int16_t xMv, yMv;
258 : uint32_t sad8x8[4];
259 : uint16_t sad16x16;
260 :
261 : /*
262 : ------------------------------------- -----------------------------------
263 : | 8x8_00 | 8x8_01 | 8x8_04 | 8x8_05 | 8x8_16 | 8x8_17 | 8x8_20 | 8x8_21 |
264 : ------------------------------------- -----------------------------------
265 : | 8x8_02 | 8x8_03 | 8x8_06 | 8x8_07 | 8x8_18 | 8x8_19 | 8x8_22 | 8x8_23 |
266 : ----------------------- ----------- ---------------------- ----------
267 : | 8x8_08 | 8x8_09 | 8x8_12 | 8x8_13 | 8x8_24 | 8x8_25 | 8x8_29 | 8x8_29 |
268 : ---------------------- ----------- --------------------- ----------
269 : | 8x8_10 | 8x8_11 | 8x8_14 | 8x8_15 | 8x8_26 | 8x8_27 | 8x8_30 | 8x8_31 |
270 : ------------------------------------- -----------------------------------
271 :
272 : ------------------------------------- -----------------------------------
273 : | 8x8_32 | 8x8_33 | 8x8_36 | 8x8_37 | 8x8_48 | 8x8_49 | 8x8_52 | 8x8_53 |
274 : ------------------------------------- -----------------------------------
275 : | 8x8_34 | 8x8_35 | 8x8_38 | 8x8_39 | 8x8_50 | 8x8_51 | 8x8_54 | 8x8_55 |
276 : ----------------------- ----------- ---------------------- ----------
277 : | 8x8_40 | 8x8_41 | 8x8_44 | 8x8_45 | 8x8_56 | 8x8_57 | 8x8_60 | 8x8_61 |
278 : ---------------------- ----------- --------------------- ----------
279 : | 8x8_42 | 8x8_43 | 8x8_46 | 8x8_48 | 8x8_58 | 8x8_59 | 8x8_62 | 8x8_63 |
280 : ------------------------------------- -----------------------------------
281 : */
282 :
283 : /*
284 : ---------------------- ----------------------
285 : | 16x16_0 | 16x16_1 | 16x16_4 | 16x16_5 |
286 : ---------------------- ----------------------
287 : | 16x16_2 | 16x16_3 | 16x16_6 | 16x16_7 |
288 : ----------------------- -----------------------
289 : | 16x16_8 | 16x16_9 | 16x16_12 | 16x16_13 |
290 : ---------------------- ----------------------
291 : | 16x16_10 | 16x16_11 | 16x16_14 | 16x16_15 |
292 : ----------------------- -----------------------
293 : */
294 :
295 0 : for (xSearchIndex = 0; xSearchIndex < 8; xSearchIndex++) {
296 0 : if (sub_sad) {
297 0 : sad8x8[0] = compute8x4_sad_kernel_c(
298 : src + 0 * src_stride + 0,
299 : 2 * src_stride,
300 0 : ref + 0 * ref_stride + 0 + xSearchIndex,
301 : 2 * ref_stride)
302 0 : << 1;
303 0 : sad8x8[1] = compute8x4_sad_kernel_c(
304 : src + 0 * src_stride + 8,
305 : 2 * src_stride,
306 0 : ref + 0 * ref_stride + 8 + xSearchIndex,
307 : 2 * ref_stride)
308 0 : << 1;
309 0 : sad8x8[2] = compute8x4_sad_kernel_c(
310 0 : src + 8 * src_stride + 0,
311 : 2 * src_stride,
312 0 : ref + 8 * ref_stride + 0 + xSearchIndex,
313 : 2 * ref_stride)
314 0 : << 1;
315 0 : sad8x8[3] = compute8x4_sad_kernel_c(
316 0 : src + 8 * src_stride + 8,
317 : 2 * src_stride,
318 0 : ref + 8 * ref_stride + 8 + xSearchIndex,
319 : 2 * ref_stride)
320 0 : << 1;
321 : } else {
322 0 : sad8x8[0] =
323 0 : compute8x8_sad_kernel_c(src + 0 * src_stride + 0,
324 : src_stride,
325 0 : ref + 0 * ref_stride + 0 + xSearchIndex,
326 : ref_stride);
327 0 : sad8x8[1] =
328 0 : compute8x8_sad_kernel_c(src + 0 * src_stride + 8,
329 : src_stride,
330 0 : ref + 0 * ref_stride + 8 + xSearchIndex,
331 : ref_stride);
332 0 : sad8x8[2] =
333 0 : compute8x8_sad_kernel_c(src + 8 * src_stride + 0,
334 : src_stride,
335 0 : ref + 8 * ref_stride + 0 + xSearchIndex,
336 : ref_stride);
337 0 : sad8x8[3] =
338 0 : compute8x8_sad_kernel_c(src + 8 * src_stride + 8,
339 : src_stride,
340 0 : ref + 8 * ref_stride + 8 + xSearchIndex,
341 : ref_stride);
342 : }
343 :
344 : // 8x8_0
345 0 : if (sad8x8[0] < p_best_sad8x8[0]) {
346 0 : p_best_sad8x8[0] = sad8x8[0];
347 0 : xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
348 0 : yMv = _MVYT(mv);
349 0 : p_best_mv8x8[0] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
350 : }
351 :
352 : // 8x8_1
353 0 : if (sad8x8[1] < p_best_sad8x8[1]) {
354 0 : p_best_sad8x8[1] = sad8x8[1];
355 0 : xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
356 0 : yMv = _MVYT(mv);
357 0 : p_best_mv8x8[1] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
358 : }
359 :
360 : // 8x8_2
361 0 : if (sad8x8[2] < p_best_sad8x8[2]) {
362 0 : p_best_sad8x8[2] = sad8x8[2];
363 0 : xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
364 0 : yMv = _MVYT(mv);
365 0 : p_best_mv8x8[2] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
366 : }
367 :
368 : // 8x8_3
369 0 : if (sad8x8[3] < p_best_sad8x8[3]) {
370 0 : p_best_sad8x8[3] = sad8x8[3];
371 0 : xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
372 0 : yMv = _MVYT(mv);
373 0 : p_best_mv8x8[3] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
374 : }
375 :
376 : // 16x16
377 0 : sad16x16 = (uint16_t)(sad8x8[0] + sad8x8[1] + sad8x8[2] + sad8x8[3]);
378 0 : p_sad16x16[xSearchIndex] =
379 : sad16x16; // store the intermediate 16x16 SAD for 32x32.
380 0 : if ((uint32_t)(sad16x16) < p_best_sad16x16[0]) {
381 0 : p_best_sad16x16[0] = sad16x16;
382 0 : xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
383 0 : yMv = _MVYT(mv);
384 0 : p_best_mv16x16[0] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
385 : }
386 : }
387 0 : }
388 :
389 : /*******************************************
390 : Calcualte SAD for 32x32,64x64 from 16x16
391 : and check if there is improvement, if yes keep
392 : the best SAD+MV
393 : *******************************************/
394 0 : void get_eight_horizontal_search_point_results_32x32_64x64_pu_c(
395 : uint16_t *p_sad16x16, uint32_t *p_best_sad32x32, uint32_t *p_best_sad64x64,
396 : uint32_t *p_best_mv32x32, uint32_t *p_best_mv64x64, uint32_t mv) {
397 : int16_t xMv, yMv;
398 : uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64;
399 : uint32_t xSearchIndex;
400 :
401 : /*--------------------
402 : | 32x32_0 | 32x32_1
403 : ----------------------
404 : | 32x32_2 | 32x32_3
405 : ----------------------*/
406 :
407 : /* data ordering in p_sad16x16 buffer
408 :
409 : Search Search Search
410 : Point 0 Point 1 Point 7
411 : ---------------------------------------
412 : 16x16_0 | x | x | ...... | x |
413 : ---------------------------------------
414 : 16x16_1 | x | x | ...... | x |
415 :
416 : 16x16_n | x | x | ...... | x |
417 :
418 : ---------------------------------------
419 : 16x16_15 | x | x | ...... | x |
420 : ---------------------------------------
421 : */
422 :
423 0 : for (xSearchIndex = 0; xSearchIndex < 8; xSearchIndex++) {
424 : // 32x32_0
425 0 : sad32x32_0 = p_sad16x16[0 * 8 + xSearchIndex] +
426 0 : p_sad16x16[1 * 8 + xSearchIndex] +
427 0 : p_sad16x16[2 * 8 + xSearchIndex] +
428 0 : p_sad16x16[3 * 8 + xSearchIndex];
429 :
430 0 : if (sad32x32_0 < p_best_sad32x32[0]) {
431 0 : p_best_sad32x32[0] = sad32x32_0;
432 0 : xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
433 0 : yMv = _MVYT(mv);
434 0 : p_best_mv32x32[0] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
435 : }
436 :
437 : // 32x32_1
438 0 : sad32x32_1 = p_sad16x16[4 * 8 + xSearchIndex] +
439 0 : p_sad16x16[5 * 8 + xSearchIndex] +
440 0 : p_sad16x16[6 * 8 + xSearchIndex] +
441 0 : p_sad16x16[7 * 8 + xSearchIndex];
442 :
443 0 : if (sad32x32_1 < p_best_sad32x32[1]) {
444 0 : p_best_sad32x32[1] = sad32x32_1;
445 0 : xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
446 0 : yMv = _MVYT(mv);
447 0 : p_best_mv32x32[1] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
448 : }
449 :
450 : // 32x32_2
451 0 : sad32x32_2 = p_sad16x16[8 * 8 + xSearchIndex] +
452 0 : p_sad16x16[9 * 8 + xSearchIndex] +
453 0 : p_sad16x16[10 * 8 + xSearchIndex] +
454 0 : p_sad16x16[11 * 8 + xSearchIndex];
455 :
456 0 : if (sad32x32_2 < p_best_sad32x32[2]) {
457 0 : p_best_sad32x32[2] = sad32x32_2;
458 0 : xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
459 0 : yMv = _MVYT(mv);
460 0 : p_best_mv32x32[2] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
461 : }
462 :
463 : // 32x32_3
464 0 : sad32x32_3 = p_sad16x16[12 * 8 + xSearchIndex] +
465 0 : p_sad16x16[13 * 8 + xSearchIndex] +
466 0 : p_sad16x16[14 * 8 + xSearchIndex] +
467 0 : p_sad16x16[15 * 8 + xSearchIndex];
468 :
469 0 : if (sad32x32_3 < p_best_sad32x32[3]) {
470 0 : p_best_sad32x32[3] = sad32x32_3;
471 0 : xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
472 0 : yMv = _MVYT(mv);
473 0 : p_best_mv32x32[3] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
474 : }
475 :
476 : // 64x64
477 0 : sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3;
478 0 : if (sad64x64 < p_best_sad64x64[0]) {
479 0 : p_best_sad64x64[0] = sad64x64;
480 0 : xMv = _MVXT(mv) + (int16_t)xSearchIndex * 4;
481 0 : yMv = _MVYT(mv);
482 0 : p_best_mv64x64[0] = ((uint16_t)yMv << 16) | ((uint16_t)xMv);
483 : }
484 : }
485 0 : }
486 :
487 : /*******************************************
488 : Calcualte SAD for 16x16 and its 8x8 sublcoks
489 : and check if there is improvment, if yes keep
490 : the best SAD+MV
491 : *******************************************/
492 0 : void sad_calculation_8x8_16x16_c(uint8_t *src, uint32_t src_stride, uint8_t *ref,
493 : uint32_t ref_stride, uint32_t *p_best_sad8x8,
494 : uint32_t *p_best_sad16x16,
495 : uint32_t *p_best_mv8x8, uint32_t *p_best_mv16x16,
496 : uint32_t mv, uint32_t *p_sad16x16,
497 : EbBool sub_sad) {
498 : uint64_t sad8x8[4];
499 : uint64_t sad16x16;
500 :
501 0 : if (sub_sad) {
502 0 : sad8x8[0] = (compute8x4_sad_kernel_c(src + 0 * src_stride + 0,
503 : 2 * src_stride,
504 : ref + 0 * ref_stride + 0,
505 : 2 * ref_stride))
506 0 : << 1;
507 0 : sad8x8[1] = (compute8x4_sad_kernel_c(src + 0 * src_stride + 8,
508 : 2 * src_stride,
509 : ref + 0 * ref_stride + 8,
510 : 2 * ref_stride))
511 0 : << 1;
512 0 : sad8x8[2] = (compute8x4_sad_kernel_c(src + 8 * src_stride + 0,
513 : 2 * src_stride,
514 0 : ref + 8 * ref_stride + 0,
515 : 2 * ref_stride))
516 0 : << 1;
517 0 : sad8x8[3] = (compute8x4_sad_kernel_c(src + 8 * src_stride + 8,
518 : 2 * src_stride,
519 0 : ref + 8 * ref_stride + 8,
520 : 2 * ref_stride))
521 0 : << 1;
522 : } else {
523 0 : sad8x8[0] = compute8x8_sad_kernel_c(src + 0 * src_stride + 0,
524 : src_stride,
525 : ref + 0 * ref_stride + 0,
526 : ref_stride);
527 0 : sad8x8[1] = compute8x8_sad_kernel_c(src + 0 * src_stride + 8,
528 : src_stride,
529 : ref + 0 * ref_stride + 8,
530 : ref_stride);
531 0 : sad8x8[2] = compute8x8_sad_kernel_c(src + 8 * src_stride + 0,
532 : src_stride,
533 0 : ref + 8 * ref_stride + 0,
534 : ref_stride);
535 0 : sad8x8[3] = compute8x8_sad_kernel_c(src + 8 * src_stride + 8,
536 : src_stride,
537 0 : ref + 8 * ref_stride + 8,
538 : ref_stride);
539 : }
540 :
541 0 : if (sad8x8[0] < p_best_sad8x8[0]) {
542 0 : p_best_sad8x8[0] = (uint32_t)sad8x8[0];
543 0 : p_best_mv8x8[0] = mv;
544 : }
545 :
546 0 : if (sad8x8[1] < p_best_sad8x8[1]) {
547 0 : p_best_sad8x8[1] = (uint32_t)sad8x8[1];
548 0 : p_best_mv8x8[1] = mv;
549 : }
550 :
551 0 : if (sad8x8[2] < p_best_sad8x8[2]) {
552 0 : p_best_sad8x8[2] = (uint32_t)sad8x8[2];
553 0 : p_best_mv8x8[2] = mv;
554 : }
555 :
556 0 : if (sad8x8[3] < p_best_sad8x8[3]) {
557 0 : p_best_sad8x8[3] = (uint32_t)sad8x8[3];
558 0 : p_best_mv8x8[3] = mv;
559 : }
560 :
561 0 : sad16x16 = sad8x8[0] + sad8x8[1] + sad8x8[2] + sad8x8[3];
562 0 : if (sad16x16 < p_best_sad16x16[0]) {
563 0 : p_best_sad16x16[0] = (uint32_t)sad16x16;
564 0 : p_best_mv16x16[0] = mv;
565 : }
566 :
567 0 : *p_sad16x16 = (uint32_t)sad16x16;
568 0 : }
569 :
570 : /*******************************************
571 : Calcualte SAD for 32x32,64x64 from 16x16
572 : and check if there is improvment, if yes keep
573 : the best SAD+MV
574 : *******************************************/
575 0 : void sad_calculation_32x32_64x64_c(uint32_t *p_sad16x16,
576 : uint32_t *p_best_sad32x32,
577 : uint32_t *p_best_sad64x64,
578 : uint32_t *p_best_mv32x32,
579 : uint32_t *p_best_mv64x64, uint32_t mv) {
580 : uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64;
581 :
582 0 : sad32x32_0 = p_sad16x16[0] + p_sad16x16[1] + p_sad16x16[2] + p_sad16x16[3];
583 0 : if (sad32x32_0 < p_best_sad32x32[0]) {
584 0 : p_best_sad32x32[0] = sad32x32_0;
585 0 : p_best_mv32x32[0] = mv;
586 : }
587 :
588 0 : sad32x32_1 = p_sad16x16[4] + p_sad16x16[5] + p_sad16x16[6] + p_sad16x16[7];
589 0 : if (sad32x32_1 < p_best_sad32x32[1]) {
590 0 : p_best_sad32x32[1] = sad32x32_1;
591 0 : p_best_mv32x32[1] = mv;
592 : }
593 :
594 0 : sad32x32_2 =
595 0 : p_sad16x16[8] + p_sad16x16[9] + p_sad16x16[10] + p_sad16x16[11];
596 0 : if (sad32x32_2 < p_best_sad32x32[2]) {
597 0 : p_best_sad32x32[2] = sad32x32_2;
598 0 : p_best_mv32x32[2] = mv;
599 : }
600 :
601 0 : sad32x32_3 =
602 0 : p_sad16x16[12] + p_sad16x16[13] + p_sad16x16[14] + p_sad16x16[15];
603 0 : if (sad32x32_3 < p_best_sad32x32[3]) {
604 0 : p_best_sad32x32[3] = sad32x32_3;
605 0 : p_best_mv32x32[3] = mv;
606 : }
607 :
608 0 : sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3;
609 0 : if (sad64x64 < p_best_sad64x64[0]) {
610 0 : p_best_sad64x64[0] = sad64x64;
611 0 : p_best_mv64x64[0] = mv;
612 : }
613 0 : }
614 :
615 : #define BLK_NUM 5
616 : /**********************************************************
617 : Calcualte the best SAD from Rect H, V and H4, V4 partitions
618 :
619 : and return the best partition index
620 : ***********************************************************/
621 0 : void nsq_me_analysis(uint32_t *p_sad64x32, uint32_t *p_sad32x16,
622 : uint32_t *p_sad16x8, uint32_t *p_sad32x64,
623 : uint32_t *p_sad16x32, uint32_t *p_sad8x16,
624 : uint32_t *p_sad32x8, uint32_t *p_sad8x32,
625 : uint32_t *p_sad64x16, uint32_t *p_sad16x64,
626 : uint8_t *p_nsq_64x64, uint8_t *p_nsq_32x32,
627 : uint8_t *p_nsq_16x16, uint8_t *p_nsq_8x8) {
628 : uint32_t sad[BLK_NUM]; // sad_N, sad_H, sad_V, sad_H4, sad_V4, sad_S;
629 : uint32_t best_nsq_sad;
630 : uint8_t nsq_index;
631 : /*64x64*/
632 : // sad[0] = p_sad64x64;
633 0 : sad[1] = p_sad64x32[0] + p_sad64x32[1];
634 0 : sad[2] = p_sad32x64[0] + p_sad32x64[1];
635 0 : sad[3] = p_sad64x16[0] + p_sad64x16[1] + p_sad64x16[2] + p_sad64x16[3];
636 0 : sad[4] = p_sad16x64[0] + p_sad16x64[1] + p_sad16x64[2] + p_sad16x64[3];
637 : // sad[5] = p_sad32x32[0] + p_sad32x32[1] + p_sad32x32[2] + p_sad32x32[3];
638 0 : best_nsq_sad = MAX_SAD_VALUE;
639 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
640 0 : if (sad[nsq_index] < best_nsq_sad) {
641 0 : best_nsq_sad = sad[nsq_index];
642 0 : *p_nsq_64x64 = nsq_index;
643 : }
644 : }
645 : /*32x32*/
646 : // 32x32_0
647 : // sad[0] = p_sad32x32[0];
648 0 : sad[1] = p_sad32x16[0] + p_sad32x16[1];
649 0 : sad[2] = p_sad16x32[0] + p_sad16x32[1];
650 0 : sad[3] = p_sad32x8[0] + p_sad32x8[1] + p_sad32x8[2] + p_sad32x8[3];
651 0 : sad[4] = p_sad8x32[0] + p_sad8x32[1] + p_sad8x32[2] + p_sad8x32[3];
652 : // sad[5] = p_sad16x16[0] + p_sad16x16[1] + p_sad16x16[2] + p_sad16x16[3];
653 0 : best_nsq_sad = MAX_SAD_VALUE;
654 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
655 0 : if (sad[nsq_index] < best_nsq_sad) {
656 0 : best_nsq_sad = sad[nsq_index];
657 0 : p_nsq_32x32[0] = nsq_index;
658 : }
659 : }
660 : // 32x32_1
661 : // sad[0] = p_sad32x32[1];
662 0 : sad[1] = p_sad32x16[2] + p_sad32x16[3];
663 0 : sad[2] = p_sad16x32[2] + p_sad16x32[3];
664 0 : sad[3] = p_sad32x8[4] + p_sad32x8[5] + p_sad32x8[6] + p_sad32x8[7];
665 0 : sad[4] = p_sad8x32[4] + p_sad8x32[5] + p_sad8x32[6] + p_sad8x32[7];
666 : // sad[5] = p_sad16x16[4] + p_sad16x16[5] + p_sad16x16[6] + p_sad16x16[7];
667 0 : best_nsq_sad = MAX_SAD_VALUE;
668 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
669 0 : if (sad[nsq_index] < best_nsq_sad) {
670 0 : best_nsq_sad = sad[nsq_index];
671 0 : p_nsq_32x32[1] = nsq_index;
672 : }
673 : }
674 : // 32x32_2
675 : // sad[0] = p_sad32x32[2];
676 0 : sad[1] = p_sad32x16[4] + p_sad32x16[5];
677 0 : sad[2] = p_sad16x32[4] + p_sad16x32[5];
678 0 : sad[3] = p_sad32x8[8] + p_sad32x8[9] + p_sad32x8[10] + p_sad32x8[11];
679 0 : sad[4] = p_sad8x32[8] + p_sad8x32[9] + p_sad8x32[10] + p_sad8x32[11];
680 : // sad[5] = p_sad16x16[8] + p_sad16x16[9] + p_sad16x16[10] + p_sad16x16[11];
681 0 : best_nsq_sad = MAX_SAD_VALUE;
682 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
683 0 : if (sad[nsq_index] < best_nsq_sad) {
684 0 : best_nsq_sad = sad[nsq_index];
685 0 : p_nsq_32x32[2] = nsq_index;
686 : }
687 : }
688 : // 32x32_3
689 : // sad[0] = p_sad32x32[3];
690 0 : sad[1] = p_sad32x16[6] + p_sad32x16[7];
691 0 : sad[2] = p_sad16x32[6] + p_sad16x32[7];
692 0 : sad[3] = p_sad32x8[12] + p_sad32x8[13] + p_sad32x8[14] + p_sad32x8[15];
693 0 : sad[4] = p_sad8x32[12] + p_sad8x32[13] + p_sad8x32[14] + p_sad8x32[15];
694 : // sad[5] = p_sad16x16[12] + p_sad16x16[13] + p_sad16x16[14] +
695 : // p_sad16x16[15];
696 0 : best_nsq_sad = MAX_SAD_VALUE;
697 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
698 0 : if (sad[nsq_index] < best_nsq_sad) {
699 0 : best_nsq_sad = sad[nsq_index];
700 0 : p_nsq_32x32[3] = nsq_index;
701 : }
702 : }
703 : /*16x16*/
704 : // 16x16_0
705 : // sad[0] = p_sad16x16[0];
706 0 : sad[1] = p_sad16x8[0] + p_sad16x8[1];
707 0 : sad[2] = p_sad8x16[0] + p_sad8x16[1];
708 0 : sad[3] = MAX_SAD_VALUE;
709 0 : sad[4] = MAX_SAD_VALUE;
710 : // sad[5] = p_sad8x8[0] + p_sad8x8[1] + p_sad8x8[2] + p_sad8x8[3];
711 0 : best_nsq_sad = MAX_SAD_VALUE;
712 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
713 0 : if (sad[nsq_index] < best_nsq_sad) {
714 0 : best_nsq_sad = sad[nsq_index];
715 0 : p_nsq_16x16[0] = nsq_index;
716 : }
717 : }
718 0 : p_nsq_8x8[0] = p_nsq_8x8[1] = p_nsq_8x8[2] = p_nsq_8x8[3] = p_nsq_16x16[0];
719 : // 16x16_1
720 : // sad[0] = p_sad16x16[1];
721 0 : sad[1] = p_sad16x8[2] + p_sad16x8[3];
722 0 : sad[2] = p_sad8x16[2] + p_sad8x16[3];
723 0 : sad[3] = MAX_SAD_VALUE;
724 0 : sad[4] = MAX_SAD_VALUE;
725 : // sad[5] = p_sad8x8[4] + p_sad8x8[5] + p_sad8x8[6] + p_sad8x8[7];
726 0 : best_nsq_sad = MAX_SAD_VALUE;
727 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
728 0 : if (sad[nsq_index] < best_nsq_sad) {
729 0 : best_nsq_sad = sad[nsq_index];
730 0 : p_nsq_16x16[1] = nsq_index;
731 : }
732 : }
733 0 : p_nsq_8x8[4] = p_nsq_8x8[5] = p_nsq_8x8[6] = p_nsq_8x8[7] = p_nsq_16x16[1];
734 : // 16x16_2
735 : // sad[0] = p_sad16x16[2];
736 0 : sad[1] = p_sad16x8[4] + p_sad16x8[5];
737 0 : sad[2] = p_sad8x16[4] + p_sad8x16[5];
738 0 : sad[3] = MAX_SAD_VALUE;
739 0 : sad[4] = MAX_SAD_VALUE;
740 : // sad[5] = p_sad8x8[8] + p_sad8x8[9] + p_sad8x8[10] + p_sad8x8[11];
741 0 : best_nsq_sad = MAX_SAD_VALUE;
742 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
743 0 : if (sad[nsq_index] < best_nsq_sad) {
744 0 : best_nsq_sad = sad[nsq_index];
745 0 : p_nsq_16x16[2] = nsq_index;
746 : }
747 : }
748 0 : p_nsq_8x8[8] = p_nsq_8x8[9] = p_nsq_8x8[10] = p_nsq_8x8[11] =
749 : p_nsq_16x16[2];
750 : // 16x16_3
751 : // sad[0] = p_sad16x16[3];
752 0 : sad[1] = p_sad16x8[6] + p_sad16x8[7];
753 0 : sad[2] = p_sad8x16[6] + p_sad8x16[7];
754 0 : sad[3] = MAX_SAD_VALUE;
755 0 : sad[4] = MAX_SAD_VALUE;
756 : // sad[5] = p_sad8x8[12] + p_sad8x8[13] + p_sad8x8[14] + p_sad8x8[15];
757 0 : best_nsq_sad = MAX_SAD_VALUE;
758 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
759 0 : if (sad[nsq_index] < best_nsq_sad) {
760 0 : best_nsq_sad = sad[nsq_index];
761 0 : p_nsq_16x16[3] = nsq_index;
762 : }
763 : }
764 0 : p_nsq_8x8[12] = p_nsq_8x8[13] = p_nsq_8x8[14] = p_nsq_8x8[15] =
765 : p_nsq_16x16[3];
766 : // 16x16_4
767 : // sad[0] = p_sad16x16[4];
768 0 : sad[1] = p_sad16x8[8] + p_sad16x8[9];
769 0 : sad[2] = p_sad8x16[8] + p_sad8x16[9];
770 0 : sad[3] = MAX_SAD_VALUE;
771 0 : sad[4] = MAX_SAD_VALUE;
772 : // sad[5] = p_sad8x8[16] + p_sad8x8[17] + p_sad8x8[18] + p_sad8x8[19];
773 0 : best_nsq_sad = MAX_SAD_VALUE;
774 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
775 0 : if (sad[nsq_index] < best_nsq_sad) {
776 0 : best_nsq_sad = sad[nsq_index];
777 0 : p_nsq_16x16[4] = nsq_index;
778 : }
779 : }
780 0 : p_nsq_8x8[16] = p_nsq_8x8[17] = p_nsq_8x8[18] = p_nsq_8x8[19] =
781 : p_nsq_16x16[4];
782 : // 16x16_5
783 : // sad[0] = p_sad16x16[5];
784 0 : sad[1] = p_sad16x8[10] + p_sad16x8[11];
785 0 : sad[2] = p_sad8x16[10] + p_sad8x16[11];
786 0 : sad[3] = MAX_SAD_VALUE;
787 0 : sad[4] = MAX_SAD_VALUE;
788 : // sad[5] = p_sad8x8[20] + p_sad8x8[21] + p_sad8x8[22] + p_sad8x8[23];
789 0 : best_nsq_sad = MAX_SAD_VALUE;
790 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
791 0 : if (sad[nsq_index] < best_nsq_sad) {
792 0 : best_nsq_sad = sad[nsq_index];
793 0 : p_nsq_16x16[5] = nsq_index;
794 : }
795 : }
796 0 : p_nsq_8x8[20] = p_nsq_8x8[21] = p_nsq_8x8[22] = p_nsq_8x8[23] =
797 : p_nsq_16x16[5];
798 : // 16x16_6
799 : // sad[0] = p_sad16x16[6];
800 0 : sad[1] = p_sad16x8[12] + p_sad16x8[13];
801 0 : sad[2] = p_sad8x16[12] + p_sad8x16[13];
802 0 : sad[3] = MAX_SAD_VALUE;
803 0 : sad[4] = MAX_SAD_VALUE;
804 : // sad[5] = p_sad8x8[24] + p_sad8x8[25] + p_sad8x8[26] + p_sad8x8[27];
805 0 : best_nsq_sad = MAX_SAD_VALUE;
806 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
807 0 : if (sad[nsq_index] < best_nsq_sad) {
808 0 : best_nsq_sad = sad[nsq_index];
809 0 : p_nsq_16x16[6] = nsq_index;
810 : }
811 : }
812 0 : p_nsq_8x8[24] = p_nsq_8x8[25] = p_nsq_8x8[26] = p_nsq_8x8[27] =
813 : p_nsq_16x16[6];
814 : // 16x16_7
815 : // sad[0] = p_sad16x16[7];
816 0 : sad[1] = p_sad16x8[14] + p_sad16x8[15];
817 0 : sad[2] = p_sad8x16[14] + p_sad8x16[15];
818 0 : sad[3] = MAX_SAD_VALUE;
819 0 : sad[4] = MAX_SAD_VALUE;
820 : // sad[5] = p_sad8x8[28] + p_sad8x8[29] + p_sad8x8[30] + p_sad8x8[31];
821 0 : best_nsq_sad = MAX_SAD_VALUE;
822 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
823 0 : if (sad[nsq_index] < best_nsq_sad) {
824 0 : best_nsq_sad = sad[nsq_index];
825 0 : p_nsq_16x16[7] = nsq_index;
826 : }
827 : }
828 0 : p_nsq_8x8[28] = p_nsq_8x8[29] = p_nsq_8x8[30] = p_nsq_8x8[31] =
829 : p_nsq_16x16[7];
830 : // 16x16_8
831 : // sad[0] = p_sad16x16[8];
832 0 : sad[1] = p_sad16x8[16] + p_sad16x8[17];
833 0 : sad[2] = p_sad8x16[16] + p_sad8x16[17];
834 0 : sad[3] = MAX_SAD_VALUE;
835 0 : sad[4] = MAX_SAD_VALUE;
836 : // sad[5] = p_sad8x8[32] + p_sad8x8[33] + p_sad8x8[34] + p_sad8x8[35];
837 0 : best_nsq_sad = MAX_SAD_VALUE;
838 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
839 0 : if (sad[nsq_index] < best_nsq_sad) {
840 0 : best_nsq_sad = sad[nsq_index];
841 0 : p_nsq_16x16[8] = nsq_index;
842 : }
843 : }
844 0 : p_nsq_8x8[32] = p_nsq_8x8[33] = p_nsq_8x8[34] = p_nsq_8x8[35] =
845 : p_nsq_16x16[8];
846 : // 16x16_9
847 : // sad[0] = p_sad16x16[9];
848 0 : sad[1] = p_sad16x8[18] + p_sad16x8[19];
849 0 : sad[2] = p_sad8x16[18] + p_sad8x16[19];
850 0 : sad[3] = MAX_SAD_VALUE;
851 0 : sad[4] = MAX_SAD_VALUE;
852 : // sad[5] = p_sad8x8[36] + p_sad8x8[37] + p_sad8x8[38] + p_sad8x8[39];
853 0 : best_nsq_sad = MAX_SAD_VALUE;
854 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
855 0 : if (sad[nsq_index] < best_nsq_sad) {
856 0 : best_nsq_sad = sad[nsq_index];
857 0 : p_nsq_16x16[9] = nsq_index;
858 : }
859 : }
860 0 : p_nsq_8x8[36] = p_nsq_8x8[37] = p_nsq_8x8[38] = p_nsq_8x8[39] =
861 : p_nsq_16x16[9];
862 : // 16x16_10
863 : // sad[0] = p_sad16x16[10];
864 0 : sad[1] = p_sad16x8[20] + p_sad16x8[21];
865 0 : sad[2] = p_sad8x16[20] + p_sad8x16[21];
866 0 : sad[3] = MAX_SAD_VALUE;
867 0 : sad[4] = MAX_SAD_VALUE;
868 : // sad[5] = p_sad8x8[40] + p_sad8x8[41] + p_sad8x8[42] + p_sad8x8[43];
869 0 : best_nsq_sad = MAX_SAD_VALUE;
870 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
871 0 : if (sad[nsq_index] < best_nsq_sad) {
872 0 : best_nsq_sad = sad[nsq_index];
873 0 : p_nsq_16x16[10] = nsq_index;
874 : }
875 : }
876 0 : p_nsq_8x8[40] = p_nsq_8x8[41] = p_nsq_8x8[42] = p_nsq_8x8[43] =
877 : p_nsq_16x16[10];
878 : // 16x16_11
879 : // sad[0] = p_sad16x16[11];
880 0 : sad[1] = p_sad16x8[22] + p_sad16x8[23];
881 0 : sad[2] = p_sad8x16[22] + p_sad8x16[23];
882 0 : sad[3] = MAX_SAD_VALUE;
883 0 : sad[4] = MAX_SAD_VALUE;
884 : // sad[5] = p_sad8x8[44] + p_sad8x8[45] + p_sad8x8[46] + p_sad8x8[47];
885 0 : best_nsq_sad = MAX_SAD_VALUE;
886 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
887 0 : if (sad[nsq_index] < best_nsq_sad) {
888 0 : best_nsq_sad = sad[nsq_index];
889 0 : p_nsq_16x16[11] = nsq_index;
890 : }
891 : }
892 0 : p_nsq_8x8[44] = p_nsq_8x8[45] = p_nsq_8x8[46] = p_nsq_8x8[47] =
893 : p_nsq_16x16[11];
894 : // 16x16_12
895 : // sad[0] = p_sad16x16[12];
896 0 : sad[1] = p_sad16x8[24] + p_sad16x8[25];
897 0 : sad[2] = p_sad8x16[24] + p_sad8x16[25];
898 0 : sad[3] = MAX_SAD_VALUE;
899 0 : sad[4] = MAX_SAD_VALUE;
900 : // sad[5] = p_sad8x8[48] + p_sad8x8[49] + p_sad8x8[50] + p_sad8x8[51];
901 0 : best_nsq_sad = MAX_SAD_VALUE;
902 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
903 0 : if (sad[nsq_index] < best_nsq_sad) {
904 0 : best_nsq_sad = sad[nsq_index];
905 0 : p_nsq_16x16[12] = nsq_index;
906 : }
907 : }
908 0 : p_nsq_8x8[48] = p_nsq_8x8[49] = p_nsq_8x8[50] = p_nsq_8x8[51] =
909 : p_nsq_16x16[12];
910 : // 16x16_13
911 : // sad[0] = p_sad16x16[13];
912 0 : sad[1] = p_sad16x8[26] + p_sad16x8[27];
913 0 : sad[2] = p_sad8x16[26] + p_sad8x16[27];
914 0 : sad[3] = MAX_SAD_VALUE;
915 0 : sad[4] = MAX_SAD_VALUE;
916 : // sad[5] = p_sad8x8[52] + p_sad8x8[53] + p_sad8x8[54] + p_sad8x8[55];
917 0 : best_nsq_sad = MAX_SAD_VALUE;
918 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
919 0 : if (sad[nsq_index] < best_nsq_sad) {
920 0 : best_nsq_sad = sad[nsq_index];
921 0 : p_nsq_16x16[13] = nsq_index;
922 : }
923 : }
924 0 : p_nsq_8x8[52] = p_nsq_8x8[53] = p_nsq_8x8[54] = p_nsq_8x8[55] =
925 : p_nsq_16x16[13];
926 : // 16x16_14
927 : // sad[0] = p_sad16x16[14];
928 0 : sad[1] = p_sad16x8[28] + p_sad16x8[29];
929 0 : sad[2] = p_sad8x16[28] + p_sad8x16[29];
930 0 : sad[3] = MAX_SAD_VALUE;
931 0 : sad[4] = MAX_SAD_VALUE;
932 : // sad[5] = p_sad8x8[56] + p_sad8x8[57] + p_sad8x8[58] + p_sad8x8[59];
933 0 : best_nsq_sad = MAX_SAD_VALUE;
934 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
935 0 : if (sad[nsq_index] < best_nsq_sad) {
936 0 : best_nsq_sad = sad[nsq_index];
937 0 : p_nsq_16x16[14] = nsq_index;
938 : }
939 : }
940 0 : p_nsq_8x8[56] = p_nsq_8x8[57] = p_nsq_8x8[58] = p_nsq_8x8[59] =
941 : p_nsq_16x16[14];
942 : // 16x16_15
943 : // sad[0] = p_sad16x16[15];
944 0 : sad[1] = p_sad16x8[30] + p_sad16x8[31];
945 0 : sad[2] = p_sad8x16[30] + p_sad8x16[31];
946 0 : sad[3] = MAX_SAD_VALUE;
947 0 : sad[4] = MAX_SAD_VALUE;
948 : // sad[5] = p_sad8x8[60] + p_sad8x8[61] + p_sad8x8[62] + p_sad8x8[63];
949 0 : best_nsq_sad = MAX_SAD_VALUE;
950 0 : for (nsq_index = 1; nsq_index < BLK_NUM; nsq_index++) {
951 0 : if (sad[nsq_index] < best_nsq_sad) {
952 0 : best_nsq_sad = sad[nsq_index];
953 0 : p_nsq_16x16[15] = nsq_index;
954 : }
955 : }
956 0 : p_nsq_8x8[60] = p_nsq_8x8[61] = p_nsq_8x8[62] = p_nsq_8x8[63] =
957 : p_nsq_16x16[15];
958 0 : }
959 :
960 : /****************************************************
961 : Calcualte SAD for Rect H, V and H4, V4 partitions
962 :
963 : and update its Motion info if the result SAD is better
964 : ****************************************************/
965 0 : void ExtSadCalculation(uint32_t *p_sad8x8, uint32_t *p_sad16x16,
966 : uint32_t *p_sad32x32, uint32_t *p_best_sad64x32,
967 : uint32_t *p_best_mv64x32, uint32_t *p_best_sad32x16,
968 : uint32_t *p_best_mv32x16, uint32_t *p_best_sad16x8,
969 : uint32_t *p_best_mv16x8, uint32_t *p_best_sad32x64,
970 : uint32_t *p_best_mv32x64, uint32_t *p_best_sad16x32,
971 : uint32_t *p_best_mv16x32, uint32_t *p_best_sad8x16,
972 : uint32_t *p_best_mv8x16, uint32_t *p_best_sad32x8,
973 : uint32_t *p_best_mv32x8, uint32_t *p_best_sad8x32,
974 : uint32_t *p_best_mv8x32, uint32_t *p_best_sad64x16,
975 : uint32_t *p_best_mv64x16, uint32_t *p_best_sad16x64,
976 : uint32_t *p_best_mv16x64, uint32_t mv) {
977 : uint32_t sad;
978 :
979 : uint32_t sad_16x8[32];
980 : uint32_t sad_8x16[32];
981 : uint32_t sad_32x16[8];
982 : uint32_t sad_16x32[8];
983 :
984 : // 64x32
985 0 : sad = p_sad32x32[0] + p_sad32x32[1];
986 0 : if (sad < p_best_sad64x32[0]) {
987 0 : p_best_sad64x32[0] = sad;
988 0 : p_best_mv64x32[0] = mv;
989 : }
990 :
991 0 : sad = p_sad32x32[2] + p_sad32x32[3];
992 0 : if (sad < p_best_sad64x32[1]) {
993 0 : p_best_sad64x32[1] = sad;
994 0 : p_best_mv64x32[1] = mv;
995 : }
996 :
997 : // 32x16
998 0 : sad_32x16[0] = p_sad16x16[0] + p_sad16x16[1];
999 0 : if (sad_32x16[0] < p_best_sad32x16[0]) {
1000 0 : p_best_sad32x16[0] = sad_32x16[0];
1001 0 : p_best_mv32x16[0] = mv;
1002 : }
1003 :
1004 0 : sad_32x16[1] = p_sad16x16[2] + p_sad16x16[3];
1005 0 : if (sad_32x16[1] < p_best_sad32x16[1]) {
1006 0 : p_best_sad32x16[1] = sad_32x16[1];
1007 0 : p_best_mv32x16[1] = mv;
1008 : }
1009 :
1010 0 : sad_32x16[2] = p_sad16x16[4] + p_sad16x16[5];
1011 0 : if (sad_32x16[2] < p_best_sad32x16[2]) {
1012 0 : p_best_sad32x16[2] = sad_32x16[2];
1013 0 : p_best_mv32x16[2] = mv;
1014 : }
1015 :
1016 0 : sad_32x16[3] = p_sad16x16[6] + p_sad16x16[7];
1017 0 : if (sad_32x16[3] < p_best_sad32x16[3]) {
1018 0 : p_best_sad32x16[3] = sad_32x16[3];
1019 0 : p_best_mv32x16[3] = mv;
1020 : }
1021 :
1022 0 : sad_32x16[4] = p_sad16x16[8] + p_sad16x16[9];
1023 0 : if (sad_32x16[4] < p_best_sad32x16[4]) {
1024 0 : p_best_sad32x16[4] = sad_32x16[4];
1025 0 : p_best_mv32x16[4] = mv;
1026 : }
1027 :
1028 0 : sad_32x16[5] = p_sad16x16[10] + p_sad16x16[11];
1029 0 : if (sad_32x16[5] < p_best_sad32x16[5]) {
1030 0 : p_best_sad32x16[5] = sad_32x16[5];
1031 0 : p_best_mv32x16[5] = mv;
1032 : }
1033 :
1034 0 : sad_32x16[6] = p_sad16x16[12] + p_sad16x16[13];
1035 0 : if (sad_32x16[6] < p_best_sad32x16[6]) {
1036 0 : p_best_sad32x16[6] = sad_32x16[6];
1037 0 : p_best_mv32x16[6] = mv;
1038 : }
1039 :
1040 0 : sad_32x16[7] = p_sad16x16[14] + p_sad16x16[15];
1041 0 : if (sad_32x16[7] < p_best_sad32x16[7]) {
1042 0 : p_best_sad32x16[7] = sad_32x16[7];
1043 0 : p_best_mv32x16[7] = mv;
1044 : }
1045 :
1046 : // 64x16
1047 0 : sad = sad_32x16[0] + sad_32x16[2];
1048 0 : if (sad < p_best_sad64x16[0]) {
1049 0 : p_best_sad64x16[0] = sad;
1050 0 : p_best_mv64x16[0] = mv;
1051 : }
1052 0 : sad = sad_32x16[1] + sad_32x16[3];
1053 0 : if (sad < p_best_sad64x16[1]) {
1054 0 : p_best_sad64x16[1] = sad;
1055 0 : p_best_mv64x16[1] = mv;
1056 : }
1057 :
1058 0 : sad = sad_32x16[4] + sad_32x16[6];
1059 0 : if (sad < p_best_sad64x16[2]) {
1060 0 : p_best_sad64x16[2] = sad;
1061 0 : p_best_mv64x16[2] = mv;
1062 : }
1063 0 : sad = sad_32x16[5] + sad_32x16[7];
1064 0 : if (sad < p_best_sad64x16[3]) {
1065 0 : p_best_sad64x16[3] = sad;
1066 0 : p_best_mv64x16[3] = mv;
1067 : }
1068 :
1069 : // 16x8
1070 0 : sad_16x8[0] = p_sad8x8[0] + p_sad8x8[1];
1071 0 : if (sad_16x8[0] < p_best_sad16x8[0]) {
1072 0 : p_best_sad16x8[0] = sad_16x8[0];
1073 0 : p_best_mv16x8[0] = mv;
1074 : }
1075 :
1076 0 : sad_16x8[1] = p_sad8x8[2] + p_sad8x8[3];
1077 0 : if (sad_16x8[1] < p_best_sad16x8[1]) {
1078 0 : p_best_sad16x8[1] = sad_16x8[1];
1079 0 : p_best_mv16x8[1] = mv;
1080 : }
1081 :
1082 0 : sad_16x8[2] = p_sad8x8[4] + p_sad8x8[5];
1083 0 : if (sad_16x8[2] < p_best_sad16x8[2]) {
1084 0 : p_best_sad16x8[2] = sad_16x8[2];
1085 0 : p_best_mv16x8[2] = mv;
1086 : }
1087 :
1088 0 : sad_16x8[3] = p_sad8x8[6] + p_sad8x8[7];
1089 0 : if (sad_16x8[3] < p_best_sad16x8[3]) {
1090 0 : p_best_sad16x8[3] = sad_16x8[3];
1091 0 : p_best_mv16x8[3] = mv;
1092 : }
1093 :
1094 0 : sad_16x8[4] = p_sad8x8[8] + p_sad8x8[9];
1095 0 : if (sad_16x8[4] < p_best_sad16x8[4]) {
1096 0 : p_best_sad16x8[4] = sad_16x8[4];
1097 0 : p_best_mv16x8[4] = mv;
1098 : }
1099 :
1100 0 : sad_16x8[5] = p_sad8x8[10] + p_sad8x8[11];
1101 0 : if (sad_16x8[5] < p_best_sad16x8[5]) {
1102 0 : p_best_sad16x8[5] = sad_16x8[5];
1103 0 : p_best_mv16x8[5] = mv;
1104 : }
1105 :
1106 0 : sad_16x8[6] = p_sad8x8[12] + p_sad8x8[13];
1107 0 : if (sad_16x8[6] < p_best_sad16x8[6]) {
1108 0 : p_best_sad16x8[6] = sad_16x8[6];
1109 0 : p_best_mv16x8[6] = mv;
1110 : }
1111 :
1112 0 : sad_16x8[7] = p_sad8x8[14] + p_sad8x8[15];
1113 0 : if (sad_16x8[7] < p_best_sad16x8[7]) {
1114 0 : p_best_sad16x8[7] = sad_16x8[7];
1115 0 : p_best_mv16x8[7] = mv;
1116 : }
1117 :
1118 0 : sad_16x8[8] = p_sad8x8[16] + p_sad8x8[17];
1119 0 : if (sad_16x8[8] < p_best_sad16x8[8]) {
1120 0 : p_best_sad16x8[8] = sad_16x8[8];
1121 0 : p_best_mv16x8[8] = mv;
1122 : }
1123 :
1124 0 : sad_16x8[9] = p_sad8x8[18] + p_sad8x8[19];
1125 0 : if (sad_16x8[9] < p_best_sad16x8[9]) {
1126 0 : p_best_sad16x8[9] = sad_16x8[9];
1127 0 : p_best_mv16x8[9] = mv;
1128 : }
1129 :
1130 0 : sad_16x8[10] = p_sad8x8[20] + p_sad8x8[21];
1131 0 : if (sad_16x8[10] < p_best_sad16x8[10]) {
1132 0 : p_best_sad16x8[10] = sad_16x8[10];
1133 0 : p_best_mv16x8[10] = mv;
1134 : }
1135 :
1136 0 : sad_16x8[11] = p_sad8x8[22] + p_sad8x8[23];
1137 0 : if (sad_16x8[11] < p_best_sad16x8[11]) {
1138 0 : p_best_sad16x8[11] = sad_16x8[11];
1139 0 : p_best_mv16x8[11] = mv;
1140 : }
1141 :
1142 0 : sad_16x8[12] = p_sad8x8[24] + p_sad8x8[25];
1143 0 : if (sad_16x8[12] < p_best_sad16x8[12]) {
1144 0 : p_best_sad16x8[12] = sad_16x8[12];
1145 0 : p_best_mv16x8[12] = mv;
1146 : }
1147 :
1148 0 : sad_16x8[13] = p_sad8x8[26] + p_sad8x8[27];
1149 0 : if (sad_16x8[13] < p_best_sad16x8[13]) {
1150 0 : p_best_sad16x8[13] = sad_16x8[13];
1151 0 : p_best_mv16x8[13] = mv;
1152 : }
1153 :
1154 0 : sad_16x8[14] = p_sad8x8[28] + p_sad8x8[29];
1155 0 : if (sad_16x8[14] < p_best_sad16x8[14]) {
1156 0 : p_best_sad16x8[14] = sad_16x8[14];
1157 0 : p_best_mv16x8[14] = mv;
1158 : }
1159 :
1160 0 : sad_16x8[15] = p_sad8x8[30] + p_sad8x8[31];
1161 0 : if (sad_16x8[15] < p_best_sad16x8[15]) {
1162 0 : p_best_sad16x8[15] = sad_16x8[15];
1163 0 : p_best_mv16x8[15] = mv;
1164 : }
1165 :
1166 0 : sad_16x8[16] = p_sad8x8[32] + p_sad8x8[33];
1167 0 : if (sad_16x8[16] < p_best_sad16x8[16]) {
1168 0 : p_best_sad16x8[16] = sad_16x8[16];
1169 0 : p_best_mv16x8[16] = mv;
1170 : }
1171 :
1172 0 : sad_16x8[17] = p_sad8x8[34] + p_sad8x8[35];
1173 0 : if (sad_16x8[17] < p_best_sad16x8[17]) {
1174 0 : p_best_sad16x8[17] = sad_16x8[17];
1175 0 : p_best_mv16x8[17] = mv;
1176 : }
1177 :
1178 0 : sad_16x8[18] = p_sad8x8[36] + p_sad8x8[37];
1179 0 : if (sad_16x8[18] < p_best_sad16x8[18]) {
1180 0 : p_best_sad16x8[18] = sad_16x8[18];
1181 0 : p_best_mv16x8[18] = mv;
1182 : }
1183 :
1184 0 : sad_16x8[19] = p_sad8x8[38] + p_sad8x8[39];
1185 0 : if (sad_16x8[19] < p_best_sad16x8[19]) {
1186 0 : p_best_sad16x8[19] = sad_16x8[19];
1187 0 : p_best_mv16x8[19] = mv;
1188 : }
1189 :
1190 0 : sad_16x8[20] = p_sad8x8[40] + p_sad8x8[41];
1191 0 : if (sad_16x8[20] < p_best_sad16x8[20]) {
1192 0 : p_best_sad16x8[20] = sad_16x8[20];
1193 0 : p_best_mv16x8[20] = mv;
1194 : }
1195 :
1196 0 : sad_16x8[21] = p_sad8x8[42] + p_sad8x8[43];
1197 0 : if (sad_16x8[21] < p_best_sad16x8[21]) {
1198 0 : p_best_sad16x8[21] = sad_16x8[21];
1199 0 : p_best_mv16x8[21] = mv;
1200 : }
1201 :
1202 0 : sad_16x8[22] = p_sad8x8[44] + p_sad8x8[45];
1203 0 : if (sad_16x8[22] < p_best_sad16x8[22]) {
1204 0 : p_best_sad16x8[22] = sad_16x8[22];
1205 0 : p_best_mv16x8[22] = mv;
1206 : }
1207 :
1208 0 : sad_16x8[23] = p_sad8x8[46] + p_sad8x8[47];
1209 0 : if (sad_16x8[23] < p_best_sad16x8[23]) {
1210 0 : p_best_sad16x8[23] = sad_16x8[23];
1211 0 : p_best_mv16x8[23] = mv;
1212 : }
1213 :
1214 0 : sad_16x8[24] = p_sad8x8[48] + p_sad8x8[49];
1215 0 : if (sad_16x8[24] < p_best_sad16x8[24]) {
1216 0 : p_best_sad16x8[24] = sad_16x8[24];
1217 0 : p_best_mv16x8[24] = mv;
1218 : }
1219 :
1220 0 : sad_16x8[25] = p_sad8x8[50] + p_sad8x8[51];
1221 0 : if (sad_16x8[25] < p_best_sad16x8[25]) {
1222 0 : p_best_sad16x8[25] = sad_16x8[25];
1223 0 : p_best_mv16x8[25] = mv;
1224 : }
1225 :
1226 0 : sad_16x8[26] = p_sad8x8[52] + p_sad8x8[53];
1227 0 : if (sad_16x8[26] < p_best_sad16x8[26]) {
1228 0 : p_best_sad16x8[26] = sad_16x8[26];
1229 0 : p_best_mv16x8[26] = mv;
1230 : }
1231 :
1232 0 : sad_16x8[27] = p_sad8x8[54] + p_sad8x8[55];
1233 0 : if (sad_16x8[27] < p_best_sad16x8[27]) {
1234 0 : p_best_sad16x8[27] = sad_16x8[27];
1235 0 : p_best_mv16x8[27] = mv;
1236 : }
1237 :
1238 0 : sad_16x8[28] = p_sad8x8[56] + p_sad8x8[57];
1239 0 : if (sad_16x8[28] < p_best_sad16x8[28]) {
1240 0 : p_best_sad16x8[28] = sad_16x8[28];
1241 0 : p_best_mv16x8[28] = mv;
1242 : }
1243 :
1244 0 : sad_16x8[29] = p_sad8x8[58] + p_sad8x8[59];
1245 0 : if (sad_16x8[29] < p_best_sad16x8[29]) {
1246 0 : p_best_sad16x8[29] = sad_16x8[29];
1247 0 : p_best_mv16x8[29] = mv;
1248 : }
1249 :
1250 0 : sad_16x8[30] = p_sad8x8[60] + p_sad8x8[61];
1251 0 : if (sad_16x8[30] < p_best_sad16x8[30]) {
1252 0 : p_best_sad16x8[30] = sad_16x8[30];
1253 0 : p_best_mv16x8[30] = mv;
1254 : }
1255 :
1256 0 : sad_16x8[31] = p_sad8x8[62] + p_sad8x8[63];
1257 0 : if (sad_16x8[31] < p_best_sad16x8[31]) {
1258 0 : p_best_sad16x8[31] = sad_16x8[31];
1259 0 : p_best_mv16x8[31] = mv;
1260 : }
1261 :
1262 : // 32x64
1263 0 : sad = p_sad32x32[0] + p_sad32x32[2];
1264 0 : if (sad < p_best_sad32x64[0]) {
1265 0 : p_best_sad32x64[0] = sad;
1266 0 : p_best_mv32x64[0] = mv;
1267 : }
1268 :
1269 0 : sad = p_sad32x32[1] + p_sad32x32[3];
1270 0 : if (sad < p_best_sad32x64[1]) {
1271 0 : p_best_sad32x64[1] = sad;
1272 0 : p_best_mv32x64[1] = mv;
1273 : }
1274 :
1275 : // 16x32
1276 0 : sad_16x32[0] = p_sad16x16[0] + p_sad16x16[2];
1277 0 : if (sad_16x32[0] < p_best_sad16x32[0]) {
1278 0 : p_best_sad16x32[0] = sad_16x32[0];
1279 0 : p_best_mv16x32[0] = mv;
1280 : }
1281 :
1282 0 : sad_16x32[1] = p_sad16x16[1] + p_sad16x16[3];
1283 0 : if (sad_16x32[1] < p_best_sad16x32[1]) {
1284 0 : p_best_sad16x32[1] = sad_16x32[1];
1285 0 : p_best_mv16x32[1] = mv;
1286 : }
1287 :
1288 0 : sad_16x32[2] = p_sad16x16[4] + p_sad16x16[6];
1289 0 : if (sad_16x32[2] < p_best_sad16x32[2]) {
1290 0 : p_best_sad16x32[2] = sad_16x32[2];
1291 0 : p_best_mv16x32[2] = mv;
1292 : }
1293 :
1294 0 : sad_16x32[3] = p_sad16x16[5] + p_sad16x16[7];
1295 0 : if (sad_16x32[3] < p_best_sad16x32[3]) {
1296 0 : p_best_sad16x32[3] = sad_16x32[3];
1297 0 : p_best_mv16x32[3] = mv;
1298 : }
1299 :
1300 0 : sad_16x32[4] = p_sad16x16[8] + p_sad16x16[10];
1301 0 : if (sad_16x32[4] < p_best_sad16x32[4]) {
1302 0 : p_best_sad16x32[4] = sad_16x32[4];
1303 0 : p_best_mv16x32[4] = mv;
1304 : }
1305 :
1306 0 : sad_16x32[5] = p_sad16x16[9] + p_sad16x16[11];
1307 0 : if (sad_16x32[5] < p_best_sad16x32[5]) {
1308 0 : p_best_sad16x32[5] = sad_16x32[5];
1309 0 : p_best_mv16x32[5] = mv;
1310 : }
1311 :
1312 0 : sad_16x32[6] = p_sad16x16[12] + p_sad16x16[14];
1313 0 : if (sad_16x32[6] < p_best_sad16x32[6]) {
1314 0 : p_best_sad16x32[6] = sad_16x32[6];
1315 0 : p_best_mv16x32[6] = mv;
1316 : }
1317 :
1318 0 : sad_16x32[7] = p_sad16x16[13] + p_sad16x16[15];
1319 0 : if (sad_16x32[7] < p_best_sad16x32[7]) {
1320 0 : p_best_sad16x32[7] = sad_16x32[7];
1321 0 : p_best_mv16x32[7] = mv;
1322 : }
1323 :
1324 0 : sad = sad_16x32[0] + sad_16x32[4];
1325 0 : if (sad < p_best_sad16x64[0]) {
1326 0 : p_best_sad16x64[0] = sad;
1327 0 : p_best_mv16x64[0] = mv;
1328 : }
1329 0 : sad = sad_16x32[1] + sad_16x32[5];
1330 0 : if (sad < p_best_sad16x64[1]) {
1331 0 : p_best_sad16x64[1] = sad;
1332 0 : p_best_mv16x64[1] = mv;
1333 : }
1334 :
1335 0 : sad = sad_16x32[2] + sad_16x32[6];
1336 0 : if (sad < p_best_sad16x64[2]) {
1337 0 : p_best_sad16x64[2] = sad;
1338 0 : p_best_mv16x64[2] = mv;
1339 : }
1340 :
1341 0 : sad = sad_16x32[3] + sad_16x32[7];
1342 0 : if (sad < p_best_sad16x64[3]) {
1343 0 : p_best_sad16x64[3] = sad;
1344 0 : p_best_mv16x64[3] = mv;
1345 : }
1346 :
1347 : // 8x16
1348 0 : sad_8x16[0] = p_sad8x8[0] + p_sad8x8[2];
1349 0 : if (sad_8x16[0] < p_best_sad8x16[0]) {
1350 0 : p_best_sad8x16[0] = sad_8x16[0];
1351 0 : p_best_mv8x16[0] = mv;
1352 : }
1353 :
1354 0 : sad_8x16[1] = p_sad8x8[1] + p_sad8x8[3];
1355 0 : if (sad_8x16[1] < p_best_sad8x16[1]) {
1356 0 : p_best_sad8x16[1] = sad_8x16[1];
1357 0 : p_best_mv8x16[1] = mv;
1358 : }
1359 :
1360 0 : sad_8x16[2] = p_sad8x8[4] + p_sad8x8[6];
1361 0 : if (sad_8x16[2] < p_best_sad8x16[2]) {
1362 0 : p_best_sad8x16[2] = sad_8x16[2];
1363 0 : p_best_mv8x16[2] = mv;
1364 : }
1365 :
1366 0 : sad_8x16[3] = p_sad8x8[5] + p_sad8x8[7];
1367 0 : if (sad_8x16[3] < p_best_sad8x16[3]) {
1368 0 : p_best_sad8x16[3] = sad_8x16[3];
1369 0 : p_best_mv8x16[3] = mv;
1370 : }
1371 :
1372 0 : sad_8x16[4] = p_sad8x8[8] + p_sad8x8[10];
1373 0 : if (sad_8x16[4] < p_best_sad8x16[4]) {
1374 0 : p_best_sad8x16[4] = sad_8x16[4];
1375 0 : p_best_mv8x16[4] = mv;
1376 : }
1377 :
1378 0 : sad_8x16[5] = p_sad8x8[9] + p_sad8x8[11];
1379 0 : if (sad_8x16[5] < p_best_sad8x16[5]) {
1380 0 : p_best_sad8x16[5] = sad_8x16[5];
1381 0 : p_best_mv8x16[5] = mv;
1382 : }
1383 :
1384 0 : sad_8x16[6] = p_sad8x8[12] + p_sad8x8[14];
1385 0 : if (sad_8x16[6] < p_best_sad8x16[6]) {
1386 0 : p_best_sad8x16[6] = sad_8x16[6];
1387 0 : p_best_mv8x16[6] = mv;
1388 : }
1389 :
1390 0 : sad_8x16[7] = p_sad8x8[13] + p_sad8x8[15];
1391 0 : if (sad_8x16[7] < p_best_sad8x16[7]) {
1392 0 : p_best_sad8x16[7] = sad_8x16[7];
1393 0 : p_best_mv8x16[7] = mv;
1394 : }
1395 :
1396 0 : sad_8x16[8] = p_sad8x8[16] + p_sad8x8[18];
1397 0 : if (sad_8x16[8] < p_best_sad8x16[8]) {
1398 0 : p_best_sad8x16[8] = sad_8x16[8];
1399 0 : p_best_mv8x16[8] = mv;
1400 : }
1401 :
1402 0 : sad_8x16[9] = p_sad8x8[17] + p_sad8x8[19];
1403 0 : if (sad_8x16[9] < p_best_sad8x16[9]) {
1404 0 : p_best_sad8x16[9] = sad_8x16[9];
1405 0 : p_best_mv8x16[9] = mv;
1406 : }
1407 :
1408 0 : sad_8x16[10] = p_sad8x8[20] + p_sad8x8[22];
1409 0 : if (sad_8x16[10] < p_best_sad8x16[10]) {
1410 0 : p_best_sad8x16[10] = sad_8x16[10];
1411 0 : p_best_mv8x16[10] = mv;
1412 : }
1413 :
1414 0 : sad_8x16[11] = p_sad8x8[21] + p_sad8x8[23];
1415 0 : if (sad_8x16[11] < p_best_sad8x16[11]) {
1416 0 : p_best_sad8x16[11] = sad_8x16[11];
1417 0 : p_best_mv8x16[11] = mv;
1418 : }
1419 :
1420 0 : sad_8x16[12] = p_sad8x8[24] + p_sad8x8[26];
1421 0 : if (sad_8x16[12] < p_best_sad8x16[12]) {
1422 0 : p_best_sad8x16[12] = sad_8x16[12];
1423 0 : p_best_mv8x16[12] = mv;
1424 : }
1425 :
1426 0 : sad_8x16[13] = p_sad8x8[25] + p_sad8x8[27];
1427 0 : if (sad_8x16[13] < p_best_sad8x16[13]) {
1428 0 : p_best_sad8x16[13] = sad_8x16[13];
1429 0 : p_best_mv8x16[13] = mv;
1430 : }
1431 :
1432 0 : sad_8x16[14] = p_sad8x8[28] + p_sad8x8[30];
1433 0 : if (sad_8x16[14] < p_best_sad8x16[14]) {
1434 0 : p_best_sad8x16[14] = sad_8x16[14];
1435 0 : p_best_mv8x16[14] = mv;
1436 : }
1437 :
1438 0 : sad_8x16[15] = p_sad8x8[29] + p_sad8x8[31];
1439 0 : if (sad_8x16[15] < p_best_sad8x16[15]) {
1440 0 : p_best_sad8x16[15] = sad_8x16[15];
1441 0 : p_best_mv8x16[15] = mv;
1442 : }
1443 :
1444 0 : sad_8x16[16] = p_sad8x8[32] + p_sad8x8[34];
1445 0 : if (sad_8x16[16] < p_best_sad8x16[16]) {
1446 0 : p_best_sad8x16[16] = sad_8x16[16];
1447 0 : p_best_mv8x16[16] = mv;
1448 : }
1449 :
1450 0 : sad_8x16[17] = p_sad8x8[33] + p_sad8x8[35];
1451 0 : if (sad_8x16[17] < p_best_sad8x16[17]) {
1452 0 : p_best_sad8x16[17] = sad_8x16[17];
1453 0 : p_best_mv8x16[17] = mv;
1454 : }
1455 :
1456 0 : sad_8x16[18] = p_sad8x8[36] + p_sad8x8[38];
1457 0 : if (sad_8x16[18] < p_best_sad8x16[18]) {
1458 0 : p_best_sad8x16[18] = sad_8x16[18];
1459 0 : p_best_mv8x16[18] = mv;
1460 : }
1461 :
1462 0 : sad_8x16[19] = p_sad8x8[37] + p_sad8x8[39];
1463 0 : if (sad_8x16[19] < p_best_sad8x16[19]) {
1464 0 : p_best_sad8x16[19] = sad_8x16[19];
1465 0 : p_best_mv8x16[19] = mv;
1466 : }
1467 :
1468 0 : sad_8x16[20] = p_sad8x8[40] + p_sad8x8[42];
1469 0 : if (sad_8x16[20] < p_best_sad8x16[20]) {
1470 0 : p_best_sad8x16[20] = sad_8x16[20];
1471 0 : p_best_mv8x16[20] = mv;
1472 : }
1473 :
1474 0 : sad_8x16[21] = p_sad8x8[41] + p_sad8x8[43];
1475 0 : if (sad_8x16[21] < p_best_sad8x16[21]) {
1476 0 : p_best_sad8x16[21] = sad_8x16[21];
1477 0 : p_best_mv8x16[21] = mv;
1478 : }
1479 :
1480 0 : sad_8x16[22] = p_sad8x8[44] + p_sad8x8[46];
1481 0 : if (sad_8x16[22] < p_best_sad8x16[22]) {
1482 0 : p_best_sad8x16[22] = sad_8x16[22];
1483 0 : p_best_mv8x16[22] = mv;
1484 : }
1485 :
1486 0 : sad_8x16[23] = p_sad8x8[45] + p_sad8x8[47];
1487 0 : if (sad_8x16[23] < p_best_sad8x16[23]) {
1488 0 : p_best_sad8x16[23] = sad_8x16[23];
1489 0 : p_best_mv8x16[23] = mv;
1490 : }
1491 :
1492 0 : sad_8x16[24] = p_sad8x8[48] + p_sad8x8[50];
1493 0 : if (sad_8x16[24] < p_best_sad8x16[24]) {
1494 0 : p_best_sad8x16[24] = sad_8x16[24];
1495 0 : p_best_mv8x16[24] = mv;
1496 : }
1497 :
1498 0 : sad_8x16[25] = p_sad8x8[49] + p_sad8x8[51];
1499 0 : if (sad_8x16[25] < p_best_sad8x16[25]) {
1500 0 : p_best_sad8x16[25] = sad_8x16[25];
1501 0 : p_best_mv8x16[25] = mv;
1502 : }
1503 :
1504 0 : sad_8x16[26] = p_sad8x8[52] + p_sad8x8[54];
1505 0 : if (sad_8x16[26] < p_best_sad8x16[26]) {
1506 0 : p_best_sad8x16[26] = sad_8x16[26];
1507 0 : p_best_mv8x16[26] = mv;
1508 : }
1509 :
1510 0 : sad_8x16[27] = p_sad8x8[53] + p_sad8x8[55];
1511 0 : if (sad_8x16[27] < p_best_sad8x16[27]) {
1512 0 : p_best_sad8x16[27] = sad_8x16[27];
1513 0 : p_best_mv8x16[27] = mv;
1514 : }
1515 :
1516 0 : sad_8x16[28] = p_sad8x8[56] + p_sad8x8[58];
1517 0 : if (sad_8x16[28] < p_best_sad8x16[28]) {
1518 0 : p_best_sad8x16[28] = sad_8x16[28];
1519 0 : p_best_mv8x16[28] = mv;
1520 : }
1521 :
1522 0 : sad_8x16[29] = p_sad8x8[57] + p_sad8x8[59];
1523 0 : if (sad_8x16[29] < p_best_sad8x16[29]) {
1524 0 : p_best_sad8x16[29] = sad_8x16[29];
1525 0 : p_best_mv8x16[29] = mv;
1526 : }
1527 :
1528 0 : sad_8x16[30] = p_sad8x8[60] + p_sad8x8[62];
1529 0 : if (sad_8x16[30] < p_best_sad8x16[30]) {
1530 0 : p_best_sad8x16[30] = sad_8x16[30];
1531 0 : p_best_mv8x16[30] = mv;
1532 : }
1533 :
1534 0 : sad_8x16[31] = p_sad8x8[61] + p_sad8x8[63];
1535 0 : if (sad_8x16[31] < p_best_sad8x16[31]) {
1536 0 : p_best_sad8x16[31] = sad_8x16[31];
1537 0 : p_best_mv8x16[31] = mv;
1538 : }
1539 :
1540 : // 32x8
1541 0 : sad = sad_16x8[0] + sad_16x8[2];
1542 0 : if (sad < p_best_sad32x8[0]) {
1543 0 : p_best_sad32x8[0] = sad;
1544 0 : p_best_mv32x8[0] = mv;
1545 : }
1546 :
1547 0 : sad = sad_16x8[1] + sad_16x8[3];
1548 0 : if (sad < p_best_sad32x8[1]) {
1549 0 : p_best_sad32x8[1] = sad;
1550 0 : p_best_mv32x8[1] = mv;
1551 : }
1552 :
1553 0 : sad = sad_16x8[4] + sad_16x8[6];
1554 0 : if (sad < p_best_sad32x8[2]) {
1555 0 : p_best_sad32x8[2] = sad;
1556 0 : p_best_mv32x8[2] = mv;
1557 : }
1558 :
1559 0 : sad = sad_16x8[5] + sad_16x8[7];
1560 0 : if (sad < p_best_sad32x8[3]) {
1561 0 : p_best_sad32x8[3] = sad;
1562 0 : p_best_mv32x8[3] = mv;
1563 : }
1564 :
1565 0 : sad = sad_16x8[8] + sad_16x8[10];
1566 0 : if (sad < p_best_sad32x8[4]) {
1567 0 : p_best_sad32x8[4] = sad;
1568 0 : p_best_mv32x8[4] = mv;
1569 : }
1570 :
1571 0 : sad = sad_16x8[9] + sad_16x8[11];
1572 0 : if (sad < p_best_sad32x8[5]) {
1573 0 : p_best_sad32x8[5] = sad;
1574 0 : p_best_mv32x8[5] = mv;
1575 : }
1576 :
1577 0 : sad = sad_16x8[12] + sad_16x8[14];
1578 0 : if (sad < p_best_sad32x8[6]) {
1579 0 : p_best_sad32x8[6] = sad;
1580 0 : p_best_mv32x8[6] = mv;
1581 : }
1582 :
1583 0 : sad = sad_16x8[13] + sad_16x8[15];
1584 0 : if (sad < p_best_sad32x8[7]) {
1585 0 : p_best_sad32x8[7] = sad;
1586 0 : p_best_mv32x8[7] = mv;
1587 : }
1588 :
1589 0 : sad = sad_16x8[16] + sad_16x8[18];
1590 0 : if (sad < p_best_sad32x8[8]) {
1591 0 : p_best_sad32x8[8] = sad;
1592 0 : p_best_mv32x8[8] = mv;
1593 : }
1594 :
1595 0 : sad = sad_16x8[17] + sad_16x8[19];
1596 0 : if (sad < p_best_sad32x8[9]) {
1597 0 : p_best_sad32x8[9] = sad;
1598 0 : p_best_mv32x8[9] = mv;
1599 : }
1600 :
1601 0 : sad = sad_16x8[20] + sad_16x8[22];
1602 0 : if (sad < p_best_sad32x8[10]) {
1603 0 : p_best_sad32x8[10] = sad;
1604 0 : p_best_mv32x8[10] = mv;
1605 : }
1606 :
1607 0 : sad = sad_16x8[21] + sad_16x8[23];
1608 0 : if (sad < p_best_sad32x8[11]) {
1609 0 : p_best_sad32x8[11] = sad;
1610 0 : p_best_mv32x8[11] = mv;
1611 : }
1612 :
1613 0 : sad = sad_16x8[24] + sad_16x8[26];
1614 0 : if (sad < p_best_sad32x8[12]) {
1615 0 : p_best_sad32x8[12] = sad;
1616 0 : p_best_mv32x8[12] = mv;
1617 : }
1618 :
1619 0 : sad = sad_16x8[25] + sad_16x8[27];
1620 0 : if (sad < p_best_sad32x8[13]) {
1621 0 : p_best_sad32x8[13] = sad;
1622 0 : p_best_mv32x8[13] = mv;
1623 : }
1624 :
1625 0 : sad = sad_16x8[28] + sad_16x8[30];
1626 0 : if (sad < p_best_sad32x8[14]) {
1627 0 : p_best_sad32x8[14] = sad;
1628 0 : p_best_mv32x8[14] = mv;
1629 : }
1630 :
1631 0 : sad = sad_16x8[29] + sad_16x8[31];
1632 0 : if (sad < p_best_sad32x8[15]) {
1633 0 : p_best_sad32x8[15] = sad;
1634 0 : p_best_mv32x8[15] = mv;
1635 : }
1636 :
1637 : // 8x32
1638 0 : sad = sad_8x16[0] + sad_8x16[4];
1639 0 : if (sad < p_best_sad8x32[0]) {
1640 0 : p_best_sad8x32[0] = sad;
1641 0 : p_best_mv8x32[0] = mv;
1642 : }
1643 :
1644 0 : sad = sad_8x16[1] + sad_8x16[5];
1645 0 : if (sad < p_best_sad8x32[1]) {
1646 0 : p_best_sad8x32[1] = sad;
1647 0 : p_best_mv8x32[1] = mv;
1648 : }
1649 :
1650 0 : sad = sad_8x16[2] + sad_8x16[6];
1651 0 : if (sad < p_best_sad8x32[2]) {
1652 0 : p_best_sad8x32[2] = sad;
1653 0 : p_best_mv8x32[2] = mv;
1654 : }
1655 :
1656 0 : sad = sad_8x16[3] + sad_8x16[7];
1657 0 : if (sad < p_best_sad8x32[3]) {
1658 0 : p_best_sad8x32[3] = sad;
1659 0 : p_best_mv8x32[3] = mv;
1660 : }
1661 :
1662 0 : sad = sad_8x16[8] + sad_8x16[12];
1663 0 : if (sad < p_best_sad8x32[4]) {
1664 0 : p_best_sad8x32[4] = sad;
1665 0 : p_best_mv8x32[4] = mv;
1666 : }
1667 :
1668 0 : sad = sad_8x16[9] + sad_8x16[13];
1669 0 : if (sad < p_best_sad8x32[5]) {
1670 0 : p_best_sad8x32[5] = sad;
1671 0 : p_best_mv8x32[5] = mv;
1672 : }
1673 :
1674 0 : sad = sad_8x16[10] + sad_8x16[14];
1675 0 : if (sad < p_best_sad8x32[6]) {
1676 0 : p_best_sad8x32[6] = sad;
1677 0 : p_best_mv8x32[6] = mv;
1678 : }
1679 :
1680 0 : sad = sad_8x16[11] + sad_8x16[15];
1681 0 : if (sad < p_best_sad8x32[7]) {
1682 0 : p_best_sad8x32[7] = sad;
1683 0 : p_best_mv8x32[7] = mv;
1684 : }
1685 :
1686 0 : sad = sad_8x16[16] + sad_8x16[20];
1687 0 : if (sad < p_best_sad8x32[8]) {
1688 0 : p_best_sad8x32[8] = sad;
1689 0 : p_best_mv8x32[8] = mv;
1690 : }
1691 :
1692 0 : sad = sad_8x16[17] + sad_8x16[21];
1693 0 : if (sad < p_best_sad8x32[9]) {
1694 0 : p_best_sad8x32[9] = sad;
1695 0 : p_best_mv8x32[9] = mv;
1696 : }
1697 :
1698 0 : sad = sad_8x16[18] + sad_8x16[22];
1699 0 : if (sad < p_best_sad8x32[10]) {
1700 0 : p_best_sad8x32[10] = sad;
1701 0 : p_best_mv8x32[10] = mv;
1702 : }
1703 :
1704 0 : sad = sad_8x16[19] + sad_8x16[23];
1705 0 : if (sad < p_best_sad8x32[11]) {
1706 0 : p_best_sad8x32[11] = sad;
1707 0 : p_best_mv8x32[11] = mv;
1708 : }
1709 :
1710 0 : sad = sad_8x16[24] + sad_8x16[28];
1711 0 : if (sad < p_best_sad8x32[12]) {
1712 0 : p_best_sad8x32[12] = sad;
1713 0 : p_best_mv8x32[12] = mv;
1714 : }
1715 :
1716 0 : sad = sad_8x16[25] + sad_8x16[29];
1717 0 : if (sad < p_best_sad8x32[13]) {
1718 0 : p_best_sad8x32[13] = sad;
1719 0 : p_best_mv8x32[13] = mv;
1720 : }
1721 :
1722 0 : sad = sad_8x16[26] + sad_8x16[30];
1723 0 : if (sad < p_best_sad8x32[14]) {
1724 0 : p_best_sad8x32[14] = sad;
1725 0 : p_best_mv8x32[14] = mv;
1726 : }
1727 :
1728 0 : sad = sad_8x16[27] + sad_8x16[31];
1729 0 : if (sad < p_best_sad8x32[15]) {
1730 0 : p_best_sad8x32[15] = sad;
1731 0 : p_best_mv8x32[15] = mv;
1732 : }
1733 0 : }
1734 :
1735 : /****************************************************
1736 : Calcualte SAD for Rect H, V and H4, V4 partitions
1737 : and update its Motion info if the result SAD is better
1738 : ****************************************************/
1739 0 : void ext_eigth_sad_calculation_nsq_c(
1740 : uint32_t p_sad8x8[64][8], uint32_t p_sad16x16[16][8],
1741 : uint32_t p_sad32x32[4][8], uint32_t *p_best_sad64x32,
1742 : uint32_t *p_best_mv64x32, uint32_t *p_best_sad32x16,
1743 : uint32_t *p_best_mv32x16, uint32_t *p_best_sad16x8, uint32_t *p_best_mv16x8,
1744 : uint32_t *p_best_sad32x64, uint32_t *p_best_mv32x64,
1745 : uint32_t *p_best_sad16x32, uint32_t *p_best_mv16x32,
1746 : uint32_t *p_best_sad8x16, uint32_t *p_best_mv8x16, uint32_t *p_best_sad32x8,
1747 : uint32_t *p_best_mv32x8, uint32_t *p_best_sad8x32, uint32_t *p_best_mv8x32,
1748 : uint32_t *p_best_sad64x16, uint32_t *p_best_mv64x16,
1749 : uint32_t *p_best_sad16x64, uint32_t *p_best_mv16x64, uint32_t mv) {
1750 : uint8_t search_index;
1751 : uint32_t sad;
1752 : uint32_t sad_16x8[32];
1753 : uint32_t sad_8x16[32];
1754 : uint32_t sad_32x16[8];
1755 : uint32_t sad_16x32[8];
1756 :
1757 : int16_t x_mv, y_mv;
1758 :
1759 0 : for (search_index = 0; search_index < 8; search_index++) {
1760 : // 64x32
1761 0 : sad = p_sad32x32[0][search_index] + p_sad32x32[1][search_index];
1762 0 : if (sad < p_best_sad64x32[0]) {
1763 0 : p_best_sad64x32[0] = sad;
1764 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1765 0 : y_mv = _MVYT(mv);
1766 0 : p_best_mv64x32[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1767 : }
1768 :
1769 0 : sad = p_sad32x32[2][search_index] + p_sad32x32[3][search_index];
1770 0 : if (sad < p_best_sad64x32[1]) {
1771 0 : p_best_sad64x32[1] = sad;
1772 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1773 0 : y_mv = _MVYT(mv);
1774 0 : p_best_mv64x32[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1775 : }
1776 :
1777 : // 32x16
1778 0 : sad_32x16[0] =
1779 0 : p_sad16x16[0][search_index] + p_sad16x16[1][search_index];
1780 0 : if (sad_32x16[0] < p_best_sad32x16[0]) {
1781 0 : p_best_sad32x16[0] = sad_32x16[0];
1782 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1783 0 : y_mv = _MVYT(mv);
1784 0 : p_best_mv32x16[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1785 : }
1786 :
1787 0 : sad_32x16[1] =
1788 0 : p_sad16x16[2][search_index] + p_sad16x16[3][search_index];
1789 0 : if (sad_32x16[1] < p_best_sad32x16[1]) {
1790 0 : p_best_sad32x16[1] = sad_32x16[1];
1791 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1792 0 : y_mv = _MVYT(mv);
1793 0 : p_best_mv32x16[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1794 : }
1795 :
1796 0 : sad_32x16[2] =
1797 0 : p_sad16x16[4][search_index] + p_sad16x16[5][search_index];
1798 0 : if (sad_32x16[2] < p_best_sad32x16[2]) {
1799 0 : p_best_sad32x16[2] = sad_32x16[2];
1800 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1801 0 : y_mv = _MVYT(mv);
1802 0 : p_best_mv32x16[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1803 : }
1804 :
1805 0 : sad_32x16[3] =
1806 0 : p_sad16x16[6][search_index] + p_sad16x16[7][search_index];
1807 0 : if (sad_32x16[3] < p_best_sad32x16[3]) {
1808 0 : p_best_sad32x16[3] = sad_32x16[3];
1809 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1810 0 : y_mv = _MVYT(mv);
1811 0 : p_best_mv32x16[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1812 : }
1813 :
1814 0 : sad_32x16[4] =
1815 0 : p_sad16x16[8][search_index] + p_sad16x16[9][search_index];
1816 0 : if (sad_32x16[4] < p_best_sad32x16[4]) {
1817 0 : p_best_sad32x16[4] = sad_32x16[4];
1818 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1819 0 : y_mv = _MVYT(mv);
1820 0 : p_best_mv32x16[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1821 : }
1822 :
1823 0 : sad_32x16[5] =
1824 0 : p_sad16x16[10][search_index] + p_sad16x16[11][search_index];
1825 0 : if (sad_32x16[5] < p_best_sad32x16[5]) {
1826 0 : p_best_sad32x16[5] = sad_32x16[5];
1827 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1828 0 : y_mv = _MVYT(mv);
1829 0 : p_best_mv32x16[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1830 : }
1831 :
1832 0 : sad_32x16[6] =
1833 0 : p_sad16x16[12][search_index] + p_sad16x16[13][search_index];
1834 0 : if (sad_32x16[6] < p_best_sad32x16[6]) {
1835 0 : p_best_sad32x16[6] = sad_32x16[6];
1836 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1837 0 : y_mv = _MVYT(mv);
1838 0 : p_best_mv32x16[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1839 : }
1840 :
1841 0 : sad_32x16[7] =
1842 0 : p_sad16x16[14][search_index] + p_sad16x16[15][search_index];
1843 0 : if (sad_32x16[7] < p_best_sad32x16[7]) {
1844 0 : p_best_sad32x16[7] = sad_32x16[7];
1845 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1846 0 : y_mv = _MVYT(mv);
1847 0 : p_best_mv32x16[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1848 : }
1849 :
1850 : // 64x16
1851 0 : sad = sad_32x16[0] + sad_32x16[2];
1852 0 : if (sad < p_best_sad64x16[0]) {
1853 0 : p_best_sad64x16[0] = sad;
1854 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1855 0 : y_mv = _MVYT(mv);
1856 0 : p_best_mv64x16[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1857 : }
1858 0 : sad = sad_32x16[1] + sad_32x16[3];
1859 0 : if (sad < p_best_sad64x16[1]) {
1860 0 : p_best_sad64x16[1] = sad;
1861 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1862 0 : y_mv = _MVYT(mv);
1863 0 : p_best_mv64x16[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1864 : }
1865 :
1866 0 : sad = sad_32x16[4] + sad_32x16[6];
1867 0 : if (sad < p_best_sad64x16[2]) {
1868 0 : p_best_sad64x16[2] = sad;
1869 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1870 0 : y_mv = _MVYT(mv);
1871 0 : p_best_mv64x16[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1872 : }
1873 0 : sad = sad_32x16[5] + sad_32x16[7];
1874 0 : if (sad < p_best_sad64x16[3]) {
1875 0 : p_best_sad64x16[3] = sad;
1876 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1877 0 : y_mv = _MVYT(mv);
1878 0 : p_best_mv64x16[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1879 : }
1880 : // 16x8
1881 0 : sad_16x8[0] = p_sad8x8[0][search_index] + p_sad8x8[1][search_index];
1882 0 : if (sad_16x8[0] < p_best_sad16x8[0]) {
1883 0 : p_best_sad16x8[0] = sad_16x8[0];
1884 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1885 0 : y_mv = _MVYT(mv);
1886 0 : p_best_mv16x8[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1887 : }
1888 :
1889 0 : sad_16x8[1] = p_sad8x8[2][search_index] + p_sad8x8[3][search_index];
1890 0 : if (sad_16x8[1] < p_best_sad16x8[1]) {
1891 0 : p_best_sad16x8[1] = sad_16x8[1];
1892 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1893 0 : y_mv = _MVYT(mv);
1894 0 : p_best_mv16x8[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1895 : }
1896 :
1897 0 : sad_16x8[2] = p_sad8x8[4][search_index] + p_sad8x8[5][search_index];
1898 0 : if (sad_16x8[2] < p_best_sad16x8[2]) {
1899 0 : p_best_sad16x8[2] = sad_16x8[2];
1900 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1901 0 : y_mv = _MVYT(mv);
1902 0 : p_best_mv16x8[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1903 : }
1904 :
1905 0 : sad_16x8[3] = p_sad8x8[6][search_index] + p_sad8x8[7][search_index];
1906 0 : if (sad_16x8[3] < p_best_sad16x8[3]) {
1907 0 : p_best_sad16x8[3] = sad_16x8[3];
1908 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1909 0 : y_mv = _MVYT(mv);
1910 0 : p_best_mv16x8[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1911 : }
1912 :
1913 0 : sad_16x8[4] = p_sad8x8[8][search_index] + p_sad8x8[9][search_index];
1914 0 : if (sad_16x8[4] < p_best_sad16x8[4]) {
1915 0 : p_best_sad16x8[4] = sad_16x8[4];
1916 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1917 0 : y_mv = _MVYT(mv);
1918 0 : p_best_mv16x8[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1919 : }
1920 :
1921 0 : sad_16x8[5] = p_sad8x8[10][search_index] + p_sad8x8[11][search_index];
1922 0 : if (sad_16x8[5] < p_best_sad16x8[5]) {
1923 0 : p_best_sad16x8[5] = sad_16x8[5];
1924 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1925 0 : y_mv = _MVYT(mv);
1926 0 : p_best_mv16x8[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1927 : }
1928 :
1929 0 : sad_16x8[6] = p_sad8x8[12][search_index] + p_sad8x8[13][search_index];
1930 0 : if (sad_16x8[6] < p_best_sad16x8[6]) {
1931 0 : p_best_sad16x8[6] = sad_16x8[6];
1932 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1933 0 : y_mv = _MVYT(mv);
1934 0 : p_best_mv16x8[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1935 : }
1936 :
1937 0 : sad_16x8[7] = p_sad8x8[14][search_index] + p_sad8x8[15][search_index];
1938 0 : if (sad_16x8[7] < p_best_sad16x8[7]) {
1939 0 : p_best_sad16x8[7] = sad_16x8[7];
1940 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1941 0 : y_mv = _MVYT(mv);
1942 0 : p_best_mv16x8[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1943 : }
1944 :
1945 0 : sad_16x8[8] = p_sad8x8[16][search_index] + p_sad8x8[17][search_index];
1946 0 : if (sad_16x8[8] < p_best_sad16x8[8]) {
1947 0 : p_best_sad16x8[8] = sad_16x8[8];
1948 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1949 0 : y_mv = _MVYT(mv);
1950 0 : p_best_mv16x8[8] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1951 : }
1952 :
1953 0 : sad_16x8[9] = p_sad8x8[18][search_index] + p_sad8x8[19][search_index];
1954 0 : if (sad_16x8[9] < p_best_sad16x8[9]) {
1955 0 : p_best_sad16x8[9] = sad_16x8[9];
1956 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1957 0 : y_mv = _MVYT(mv);
1958 0 : p_best_mv16x8[9] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1959 : }
1960 :
1961 0 : sad_16x8[10] = p_sad8x8[20][search_index] + p_sad8x8[21][search_index];
1962 0 : if (sad_16x8[10] < p_best_sad16x8[10]) {
1963 0 : p_best_sad16x8[10] = sad_16x8[10];
1964 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1965 0 : y_mv = _MVYT(mv);
1966 0 : p_best_mv16x8[10] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1967 : }
1968 :
1969 0 : sad_16x8[11] = p_sad8x8[22][search_index] + p_sad8x8[23][search_index];
1970 0 : if (sad_16x8[11] < p_best_sad16x8[11]) {
1971 0 : p_best_sad16x8[11] = sad_16x8[11];
1972 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1973 0 : y_mv = _MVYT(mv);
1974 0 : p_best_mv16x8[11] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1975 : }
1976 :
1977 0 : sad_16x8[12] = p_sad8x8[24][search_index] + p_sad8x8[25][search_index];
1978 0 : if (sad_16x8[12] < p_best_sad16x8[12]) {
1979 0 : p_best_sad16x8[12] = sad_16x8[12];
1980 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1981 0 : y_mv = _MVYT(mv);
1982 0 : p_best_mv16x8[12] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1983 : }
1984 :
1985 0 : sad_16x8[13] = p_sad8x8[26][search_index] + p_sad8x8[27][search_index];
1986 0 : if (sad_16x8[13] < p_best_sad16x8[13]) {
1987 0 : p_best_sad16x8[13] = sad_16x8[13];
1988 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1989 0 : y_mv = _MVYT(mv);
1990 0 : p_best_mv16x8[13] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1991 : }
1992 :
1993 0 : sad_16x8[14] = p_sad8x8[28][search_index] + p_sad8x8[29][search_index];
1994 0 : if (sad_16x8[14] < p_best_sad16x8[14]) {
1995 0 : p_best_sad16x8[14] = sad_16x8[14];
1996 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
1997 0 : y_mv = _MVYT(mv);
1998 0 : p_best_mv16x8[14] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
1999 : }
2000 :
2001 0 : sad_16x8[15] = p_sad8x8[30][search_index] + p_sad8x8[31][search_index];
2002 0 : if (sad_16x8[15] < p_best_sad16x8[15]) {
2003 0 : p_best_sad16x8[15] = sad_16x8[15];
2004 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2005 0 : y_mv = _MVYT(mv);
2006 0 : p_best_mv16x8[15] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2007 : }
2008 :
2009 0 : sad_16x8[16] = p_sad8x8[32][search_index] + p_sad8x8[33][search_index];
2010 0 : if (sad_16x8[16] < p_best_sad16x8[16]) {
2011 0 : p_best_sad16x8[16] = sad_16x8[16];
2012 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2013 0 : y_mv = _MVYT(mv);
2014 0 : p_best_mv16x8[16] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2015 : }
2016 :
2017 0 : sad_16x8[17] = p_sad8x8[34][search_index] + p_sad8x8[35][search_index];
2018 0 : if (sad_16x8[17] < p_best_sad16x8[17]) {
2019 0 : p_best_sad16x8[17] = sad_16x8[17];
2020 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2021 0 : y_mv = _MVYT(mv);
2022 0 : p_best_mv16x8[17] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2023 : }
2024 :
2025 0 : sad_16x8[18] = p_sad8x8[36][search_index] + p_sad8x8[37][search_index];
2026 0 : if (sad_16x8[18] < p_best_sad16x8[18]) {
2027 0 : p_best_sad16x8[18] = sad_16x8[18];
2028 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2029 0 : y_mv = _MVYT(mv);
2030 0 : p_best_mv16x8[18] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2031 : }
2032 :
2033 0 : sad_16x8[19] = p_sad8x8[38][search_index] + p_sad8x8[39][search_index];
2034 0 : if (sad_16x8[19] < p_best_sad16x8[19]) {
2035 0 : p_best_sad16x8[19] = sad_16x8[19];
2036 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2037 0 : y_mv = _MVYT(mv);
2038 0 : p_best_mv16x8[19] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2039 : }
2040 :
2041 0 : sad_16x8[20] = p_sad8x8[40][search_index] + p_sad8x8[41][search_index];
2042 0 : if (sad_16x8[20] < p_best_sad16x8[20]) {
2043 0 : p_best_sad16x8[20] = sad_16x8[20];
2044 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2045 0 : y_mv = _MVYT(mv);
2046 0 : p_best_mv16x8[20] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2047 : }
2048 :
2049 0 : sad_16x8[21] = p_sad8x8[42][search_index] + p_sad8x8[43][search_index];
2050 0 : if (sad_16x8[21] < p_best_sad16x8[21]) {
2051 0 : p_best_sad16x8[21] = sad_16x8[21];
2052 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2053 0 : y_mv = _MVYT(mv);
2054 0 : p_best_mv16x8[21] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2055 : }
2056 :
2057 0 : sad_16x8[22] = p_sad8x8[44][search_index] + p_sad8x8[45][search_index];
2058 0 : if (sad_16x8[22] < p_best_sad16x8[22]) {
2059 0 : p_best_sad16x8[22] = sad_16x8[22];
2060 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2061 0 : y_mv = _MVYT(mv);
2062 0 : p_best_mv16x8[22] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2063 : }
2064 :
2065 0 : sad_16x8[23] = p_sad8x8[46][search_index] + p_sad8x8[47][search_index];
2066 0 : if (sad_16x8[23] < p_best_sad16x8[23]) {
2067 0 : p_best_sad16x8[23] = sad_16x8[23];
2068 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2069 0 : y_mv = _MVYT(mv);
2070 0 : p_best_mv16x8[23] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2071 : }
2072 :
2073 0 : sad_16x8[24] = p_sad8x8[48][search_index] + p_sad8x8[49][search_index];
2074 0 : if (sad_16x8[24] < p_best_sad16x8[24]) {
2075 0 : p_best_sad16x8[24] = sad_16x8[24];
2076 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2077 0 : y_mv = _MVYT(mv);
2078 0 : p_best_mv16x8[24] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2079 : }
2080 :
2081 0 : sad_16x8[25] = p_sad8x8[50][search_index] + p_sad8x8[51][search_index];
2082 0 : if (sad_16x8[25] < p_best_sad16x8[25]) {
2083 0 : p_best_sad16x8[25] = sad_16x8[25];
2084 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2085 0 : y_mv = _MVYT(mv);
2086 0 : p_best_mv16x8[25] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2087 : }
2088 :
2089 0 : sad_16x8[26] = p_sad8x8[52][search_index] + p_sad8x8[53][search_index];
2090 0 : if (sad_16x8[26] < p_best_sad16x8[26]) {
2091 0 : p_best_sad16x8[26] = sad_16x8[26];
2092 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2093 0 : y_mv = _MVYT(mv);
2094 0 : p_best_mv16x8[26] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2095 : }
2096 :
2097 0 : sad_16x8[27] = p_sad8x8[54][search_index] + p_sad8x8[55][search_index];
2098 0 : if (sad_16x8[27] < p_best_sad16x8[27]) {
2099 0 : p_best_sad16x8[27] = sad_16x8[27];
2100 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2101 0 : y_mv = _MVYT(mv);
2102 0 : p_best_mv16x8[27] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2103 : }
2104 :
2105 0 : sad_16x8[28] = p_sad8x8[56][search_index] + p_sad8x8[57][search_index];
2106 0 : if (sad_16x8[28] < p_best_sad16x8[28]) {
2107 0 : p_best_sad16x8[28] = sad_16x8[28];
2108 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2109 0 : y_mv = _MVYT(mv);
2110 0 : p_best_mv16x8[28] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2111 : }
2112 :
2113 0 : sad_16x8[29] = p_sad8x8[58][search_index] + p_sad8x8[59][search_index];
2114 0 : if (sad_16x8[29] < p_best_sad16x8[29]) {
2115 0 : p_best_sad16x8[29] = sad_16x8[29];
2116 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2117 0 : y_mv = _MVYT(mv);
2118 0 : p_best_mv16x8[29] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2119 : }
2120 :
2121 0 : sad_16x8[30] = p_sad8x8[60][search_index] + p_sad8x8[61][search_index];
2122 0 : if (sad_16x8[30] < p_best_sad16x8[30]) {
2123 0 : p_best_sad16x8[30] = sad_16x8[30];
2124 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2125 0 : y_mv = _MVYT(mv);
2126 0 : p_best_mv16x8[30] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2127 : }
2128 :
2129 0 : sad_16x8[31] = p_sad8x8[62][search_index] + p_sad8x8[63][search_index];
2130 0 : if (sad_16x8[31] < p_best_sad16x8[31]) {
2131 0 : p_best_sad16x8[31] = sad_16x8[31];
2132 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2133 0 : y_mv = _MVYT(mv);
2134 0 : p_best_mv16x8[31] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2135 : }
2136 :
2137 : // 32x64
2138 0 : sad = p_sad32x32[0][search_index] + p_sad32x32[2][search_index];
2139 0 : if (sad < p_best_sad32x64[0]) {
2140 0 : p_best_sad32x64[0] = sad;
2141 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2142 0 : y_mv = _MVYT(mv);
2143 0 : p_best_mv32x64[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2144 : }
2145 :
2146 0 : sad = p_sad32x32[1][search_index] + p_sad32x32[3][search_index];
2147 0 : if (sad < p_best_sad32x64[1]) {
2148 0 : p_best_sad32x64[1] = sad;
2149 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2150 0 : y_mv = _MVYT(mv);
2151 0 : p_best_mv32x64[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2152 : }
2153 :
2154 : // 16x32
2155 0 : sad_16x32[0] =
2156 0 : p_sad16x16[0][search_index] + p_sad16x16[2][search_index];
2157 0 : if (sad_16x32[0] < p_best_sad16x32[0]) {
2158 0 : p_best_sad16x32[0] = sad_16x32[0];
2159 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2160 0 : y_mv = _MVYT(mv);
2161 0 : p_best_mv16x32[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2162 : }
2163 :
2164 0 : sad_16x32[1] =
2165 0 : p_sad16x16[1][search_index] + p_sad16x16[3][search_index];
2166 0 : if (sad_16x32[1] < p_best_sad16x32[1]) {
2167 0 : p_best_sad16x32[1] = sad_16x32[1];
2168 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2169 0 : y_mv = _MVYT(mv);
2170 0 : p_best_mv16x32[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2171 : }
2172 :
2173 0 : sad_16x32[2] =
2174 0 : p_sad16x16[4][search_index] + p_sad16x16[6][search_index];
2175 0 : if (sad_16x32[2] < p_best_sad16x32[2]) {
2176 0 : p_best_sad16x32[2] = sad_16x32[2];
2177 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2178 0 : y_mv = _MVYT(mv);
2179 0 : p_best_mv16x32[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2180 : }
2181 :
2182 0 : sad_16x32[3] =
2183 0 : p_sad16x16[5][search_index] + p_sad16x16[7][search_index];
2184 0 : if (sad_16x32[3] < p_best_sad16x32[3]) {
2185 0 : p_best_sad16x32[3] = sad_16x32[3];
2186 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2187 0 : y_mv = _MVYT(mv);
2188 0 : p_best_mv16x32[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2189 : }
2190 :
2191 0 : sad_16x32[4] =
2192 0 : p_sad16x16[8][search_index] + p_sad16x16[10][search_index];
2193 0 : if (sad_16x32[4] < p_best_sad16x32[4]) {
2194 0 : p_best_sad16x32[4] = sad_16x32[4];
2195 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2196 0 : y_mv = _MVYT(mv);
2197 0 : p_best_mv16x32[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2198 : }
2199 :
2200 0 : sad_16x32[5] =
2201 0 : p_sad16x16[9][search_index] + p_sad16x16[11][search_index];
2202 0 : if (sad_16x32[5] < p_best_sad16x32[5]) {
2203 0 : p_best_sad16x32[5] = sad_16x32[5];
2204 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2205 0 : y_mv = _MVYT(mv);
2206 0 : p_best_mv16x32[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2207 : }
2208 :
2209 0 : sad_16x32[6] =
2210 0 : p_sad16x16[12][search_index] + p_sad16x16[14][search_index];
2211 0 : if (sad_16x32[6] < p_best_sad16x32[6]) {
2212 0 : p_best_sad16x32[6] = sad_16x32[6];
2213 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2214 0 : y_mv = _MVYT(mv);
2215 0 : p_best_mv16x32[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2216 : }
2217 :
2218 0 : sad_16x32[7] =
2219 0 : p_sad16x16[13][search_index] + p_sad16x16[15][search_index];
2220 0 : if (sad_16x32[7] < p_best_sad16x32[7]) {
2221 0 : p_best_sad16x32[7] = sad_16x32[7];
2222 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2223 0 : y_mv = _MVYT(mv);
2224 0 : p_best_mv16x32[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2225 : }
2226 :
2227 0 : sad = sad_16x32[0] + sad_16x32[4];
2228 0 : if (sad < p_best_sad16x64[0]) {
2229 0 : p_best_sad16x64[0] = sad;
2230 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2231 0 : y_mv = _MVYT(mv);
2232 0 : p_best_mv16x64[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2233 : }
2234 0 : sad = sad_16x32[1] + sad_16x32[5];
2235 0 : if (sad < p_best_sad16x64[1]) {
2236 0 : p_best_sad16x64[1] = sad;
2237 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2238 0 : y_mv = _MVYT(mv);
2239 0 : p_best_mv16x64[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2240 : }
2241 :
2242 0 : sad = sad_16x32[2] + sad_16x32[6];
2243 0 : if (sad < p_best_sad16x64[2]) {
2244 0 : p_best_sad16x64[2] = sad;
2245 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2246 0 : y_mv = _MVYT(mv);
2247 0 : p_best_mv16x64[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2248 : }
2249 :
2250 0 : sad = sad_16x32[3] + sad_16x32[7];
2251 0 : if (sad < p_best_sad16x64[3]) {
2252 0 : p_best_sad16x64[3] = sad;
2253 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2254 0 : y_mv = _MVYT(mv);
2255 0 : p_best_mv16x64[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2256 : }
2257 : // 8x16
2258 0 : sad_8x16[0] = p_sad8x8[0][search_index] + p_sad8x8[2][search_index];
2259 0 : if (sad_8x16[0] < p_best_sad8x16[0]) {
2260 0 : p_best_sad8x16[0] = sad_8x16[0];
2261 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2262 0 : y_mv = _MVYT(mv);
2263 0 : p_best_mv8x16[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2264 : }
2265 :
2266 0 : sad_8x16[1] = p_sad8x8[1][search_index] + p_sad8x8[3][search_index];
2267 0 : if (sad_8x16[1] < p_best_sad8x16[1]) {
2268 0 : p_best_sad8x16[1] = sad_8x16[1];
2269 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2270 0 : y_mv = _MVYT(mv);
2271 0 : p_best_mv8x16[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2272 : }
2273 :
2274 0 : sad_8x16[2] = p_sad8x8[4][search_index] + p_sad8x8[6][search_index];
2275 0 : if (sad_8x16[2] < p_best_sad8x16[2]) {
2276 0 : p_best_sad8x16[2] = sad_8x16[2];
2277 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2278 0 : y_mv = _MVYT(mv);
2279 0 : p_best_mv8x16[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2280 : }
2281 :
2282 0 : sad_8x16[3] = p_sad8x8[5][search_index] + p_sad8x8[7][search_index];
2283 0 : if (sad_8x16[3] < p_best_sad8x16[3]) {
2284 0 : p_best_sad8x16[3] = sad_8x16[3];
2285 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2286 0 : y_mv = _MVYT(mv);
2287 0 : p_best_mv8x16[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2288 : }
2289 :
2290 0 : sad_8x16[4] = p_sad8x8[8][search_index] + p_sad8x8[10][search_index];
2291 0 : if (sad_8x16[4] < p_best_sad8x16[4]) {
2292 0 : p_best_sad8x16[4] = sad_8x16[4];
2293 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2294 0 : y_mv = _MVYT(mv);
2295 0 : p_best_mv8x16[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2296 : }
2297 :
2298 0 : sad_8x16[5] = p_sad8x8[9][search_index] + p_sad8x8[11][search_index];
2299 0 : if (sad_8x16[5] < p_best_sad8x16[5]) {
2300 0 : p_best_sad8x16[5] = sad_8x16[5];
2301 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2302 0 : y_mv = _MVYT(mv);
2303 0 : p_best_mv8x16[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2304 : }
2305 :
2306 0 : sad_8x16[6] = p_sad8x8[12][search_index] + p_sad8x8[14][search_index];
2307 0 : if (sad_8x16[6] < p_best_sad8x16[6]) {
2308 0 : p_best_sad8x16[6] = sad_8x16[6];
2309 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2310 0 : y_mv = _MVYT(mv);
2311 0 : p_best_mv8x16[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2312 : }
2313 :
2314 0 : sad_8x16[7] = p_sad8x8[13][search_index] + p_sad8x8[15][search_index];
2315 0 : if (sad_8x16[7] < p_best_sad8x16[7]) {
2316 0 : p_best_sad8x16[7] = sad_8x16[7];
2317 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2318 0 : y_mv = _MVYT(mv);
2319 0 : p_best_mv8x16[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2320 : }
2321 :
2322 0 : sad_8x16[8] = p_sad8x8[16][search_index] + p_sad8x8[18][search_index];
2323 0 : if (sad_8x16[8] < p_best_sad8x16[8]) {
2324 0 : p_best_sad8x16[8] = sad_8x16[8];
2325 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2326 0 : y_mv = _MVYT(mv);
2327 0 : p_best_mv8x16[8] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2328 : }
2329 :
2330 0 : sad_8x16[9] = p_sad8x8[17][search_index] + p_sad8x8[19][search_index];
2331 0 : if (sad_8x16[9] < p_best_sad8x16[9]) {
2332 0 : p_best_sad8x16[9] = sad_8x16[9];
2333 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2334 0 : y_mv = _MVYT(mv);
2335 0 : p_best_mv8x16[9] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2336 : }
2337 :
2338 0 : sad_8x16[10] = p_sad8x8[20][search_index] + p_sad8x8[22][search_index];
2339 0 : if (sad_8x16[10] < p_best_sad8x16[10]) {
2340 0 : p_best_sad8x16[10] = sad_8x16[10];
2341 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2342 0 : y_mv = _MVYT(mv);
2343 0 : p_best_mv8x16[10] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2344 : }
2345 :
2346 0 : sad_8x16[11] = p_sad8x8[21][search_index] + p_sad8x8[23][search_index];
2347 0 : if (sad_8x16[11] < p_best_sad8x16[11]) {
2348 0 : p_best_sad8x16[11] = sad_8x16[11];
2349 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2350 0 : y_mv = _MVYT(mv);
2351 0 : p_best_mv8x16[11] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2352 : }
2353 :
2354 0 : sad_8x16[12] = p_sad8x8[24][search_index] + p_sad8x8[26][search_index];
2355 0 : if (sad_8x16[12] < p_best_sad8x16[12]) {
2356 0 : p_best_sad8x16[12] = sad_8x16[12];
2357 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2358 0 : y_mv = _MVYT(mv);
2359 0 : p_best_mv8x16[12] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2360 : }
2361 :
2362 0 : sad_8x16[13] = p_sad8x8[25][search_index] + p_sad8x8[27][search_index];
2363 0 : if (sad_8x16[13] < p_best_sad8x16[13]) {
2364 0 : p_best_sad8x16[13] = sad_8x16[13];
2365 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2366 0 : y_mv = _MVYT(mv);
2367 0 : p_best_mv8x16[13] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2368 : }
2369 :
2370 0 : sad_8x16[14] = p_sad8x8[28][search_index] + p_sad8x8[30][search_index];
2371 0 : if (sad_8x16[14] < p_best_sad8x16[14]) {
2372 0 : p_best_sad8x16[14] = sad_8x16[14];
2373 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2374 0 : y_mv = _MVYT(mv);
2375 0 : p_best_mv8x16[14] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2376 : }
2377 :
2378 0 : sad_8x16[15] = p_sad8x8[29][search_index] + p_sad8x8[31][search_index];
2379 0 : if (sad_8x16[15] < p_best_sad8x16[15]) {
2380 0 : p_best_sad8x16[15] = sad_8x16[15];
2381 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2382 0 : y_mv = _MVYT(mv);
2383 0 : p_best_mv8x16[15] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2384 : }
2385 :
2386 0 : sad_8x16[16] = p_sad8x8[32][search_index] + p_sad8x8[34][search_index];
2387 0 : if (sad_8x16[16] < p_best_sad8x16[16]) {
2388 0 : p_best_sad8x16[16] = sad_8x16[16];
2389 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2390 0 : y_mv = _MVYT(mv);
2391 0 : p_best_mv8x16[16] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2392 : }
2393 :
2394 0 : sad_8x16[17] = p_sad8x8[33][search_index] + p_sad8x8[35][search_index];
2395 0 : if (sad_8x16[17] < p_best_sad8x16[17]) {
2396 0 : p_best_sad8x16[17] = sad_8x16[17];
2397 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2398 0 : y_mv = _MVYT(mv);
2399 0 : p_best_mv8x16[17] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2400 : }
2401 :
2402 0 : sad_8x16[18] = p_sad8x8[36][search_index] + p_sad8x8[38][search_index];
2403 0 : if (sad_8x16[18] < p_best_sad8x16[18]) {
2404 0 : p_best_sad8x16[18] = sad_8x16[18];
2405 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2406 0 : y_mv = _MVYT(mv);
2407 0 : p_best_mv8x16[18] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2408 : }
2409 :
2410 0 : sad_8x16[19] = p_sad8x8[37][search_index] + p_sad8x8[39][search_index];
2411 0 : if (sad_8x16[19] < p_best_sad8x16[19]) {
2412 0 : p_best_sad8x16[19] = sad_8x16[19];
2413 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2414 0 : y_mv = _MVYT(mv);
2415 0 : p_best_mv8x16[19] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2416 : }
2417 :
2418 0 : sad_8x16[20] = p_sad8x8[40][search_index] + p_sad8x8[42][search_index];
2419 0 : if (sad_8x16[20] < p_best_sad8x16[20]) {
2420 0 : p_best_sad8x16[20] = sad_8x16[20];
2421 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2422 0 : y_mv = _MVYT(mv);
2423 0 : p_best_mv8x16[20] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2424 : }
2425 :
2426 0 : sad_8x16[21] = p_sad8x8[41][search_index] + p_sad8x8[43][search_index];
2427 0 : if (sad_8x16[21] < p_best_sad8x16[21]) {
2428 0 : p_best_sad8x16[21] = sad_8x16[21];
2429 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2430 0 : y_mv = _MVYT(mv);
2431 0 : p_best_mv8x16[21] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2432 : }
2433 :
2434 0 : sad_8x16[22] = p_sad8x8[44][search_index] + p_sad8x8[46][search_index];
2435 0 : if (sad_8x16[22] < p_best_sad8x16[22]) {
2436 0 : p_best_sad8x16[22] = sad_8x16[22];
2437 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2438 0 : y_mv = _MVYT(mv);
2439 0 : p_best_mv8x16[22] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2440 : }
2441 :
2442 0 : sad_8x16[23] = p_sad8x8[45][search_index] + p_sad8x8[47][search_index];
2443 0 : if (sad_8x16[23] < p_best_sad8x16[23]) {
2444 0 : p_best_sad8x16[23] = sad_8x16[23];
2445 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2446 0 : y_mv = _MVYT(mv);
2447 0 : p_best_mv8x16[23] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2448 : }
2449 :
2450 0 : sad_8x16[24] = p_sad8x8[48][search_index] + p_sad8x8[50][search_index];
2451 0 : if (sad_8x16[24] < p_best_sad8x16[24]) {
2452 0 : p_best_sad8x16[24] = sad_8x16[24];
2453 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2454 0 : y_mv = _MVYT(mv);
2455 0 : p_best_mv8x16[24] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2456 : }
2457 :
2458 0 : sad_8x16[25] = p_sad8x8[49][search_index] + p_sad8x8[51][search_index];
2459 0 : if (sad_8x16[25] < p_best_sad8x16[25]) {
2460 0 : p_best_sad8x16[25] = sad_8x16[25];
2461 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2462 0 : y_mv = _MVYT(mv);
2463 0 : p_best_mv8x16[25] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2464 : }
2465 :
2466 0 : sad_8x16[26] = p_sad8x8[52][search_index] + p_sad8x8[54][search_index];
2467 0 : if (sad_8x16[26] < p_best_sad8x16[26]) {
2468 0 : p_best_sad8x16[26] = sad_8x16[26];
2469 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2470 0 : y_mv = _MVYT(mv);
2471 0 : p_best_mv8x16[26] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2472 : }
2473 :
2474 0 : sad_8x16[27] = p_sad8x8[53][search_index] + p_sad8x8[55][search_index];
2475 0 : if (sad_8x16[27] < p_best_sad8x16[27]) {
2476 0 : p_best_sad8x16[27] = sad_8x16[27];
2477 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2478 0 : y_mv = _MVYT(mv);
2479 0 : p_best_mv8x16[27] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2480 : }
2481 :
2482 0 : sad_8x16[28] = p_sad8x8[56][search_index] + p_sad8x8[58][search_index];
2483 0 : if (sad_8x16[28] < p_best_sad8x16[28]) {
2484 0 : p_best_sad8x16[28] = sad_8x16[28];
2485 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2486 0 : y_mv = _MVYT(mv);
2487 0 : p_best_mv8x16[28] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2488 : }
2489 :
2490 0 : sad_8x16[29] = p_sad8x8[57][search_index] + p_sad8x8[59][search_index];
2491 0 : if (sad_8x16[29] < p_best_sad8x16[29]) {
2492 0 : p_best_sad8x16[29] = sad_8x16[29];
2493 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2494 0 : y_mv = _MVYT(mv);
2495 0 : p_best_mv8x16[29] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2496 : }
2497 :
2498 0 : sad_8x16[30] = p_sad8x8[60][search_index] + p_sad8x8[62][search_index];
2499 0 : if (sad_8x16[30] < p_best_sad8x16[30]) {
2500 0 : p_best_sad8x16[30] = sad_8x16[30];
2501 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2502 0 : y_mv = _MVYT(mv);
2503 0 : p_best_mv8x16[30] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2504 : }
2505 :
2506 0 : sad_8x16[31] = p_sad8x8[61][search_index] + p_sad8x8[63][search_index];
2507 0 : if (sad_8x16[31] < p_best_sad8x16[31]) {
2508 0 : p_best_sad8x16[31] = sad_8x16[31];
2509 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2510 0 : y_mv = _MVYT(mv);
2511 0 : p_best_mv8x16[31] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2512 : }
2513 :
2514 : // 32x8
2515 0 : sad = sad_16x8[0] + sad_16x8[2];
2516 0 : if (sad < p_best_sad32x8[0]) {
2517 0 : p_best_sad32x8[0] = sad;
2518 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2519 0 : y_mv = _MVYT(mv);
2520 0 : p_best_mv32x8[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2521 : }
2522 :
2523 0 : sad = sad_16x8[1] + sad_16x8[3];
2524 0 : if (sad < p_best_sad32x8[1]) {
2525 0 : p_best_sad32x8[1] = sad;
2526 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2527 0 : y_mv = _MVYT(mv);
2528 0 : p_best_mv32x8[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2529 : }
2530 :
2531 0 : sad = sad_16x8[4] + sad_16x8[6];
2532 0 : if (sad < p_best_sad32x8[2]) {
2533 0 : p_best_sad32x8[2] = sad;
2534 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2535 0 : y_mv = _MVYT(mv);
2536 0 : p_best_mv32x8[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2537 : }
2538 :
2539 0 : sad = sad_16x8[5] + sad_16x8[7];
2540 0 : if (sad < p_best_sad32x8[3]) {
2541 0 : p_best_sad32x8[3] = sad;
2542 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2543 0 : y_mv = _MVYT(mv);
2544 0 : p_best_mv32x8[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2545 : }
2546 :
2547 0 : sad = sad_16x8[8] + sad_16x8[10];
2548 0 : if (sad < p_best_sad32x8[4]) {
2549 0 : p_best_sad32x8[4] = sad;
2550 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2551 0 : y_mv = _MVYT(mv);
2552 0 : p_best_mv32x8[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2553 : }
2554 :
2555 0 : sad = sad_16x8[9] + sad_16x8[11];
2556 0 : if (sad < p_best_sad32x8[5]) {
2557 0 : p_best_sad32x8[5] = sad;
2558 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2559 0 : y_mv = _MVYT(mv);
2560 0 : p_best_mv32x8[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2561 : }
2562 :
2563 0 : sad = sad_16x8[12] + sad_16x8[14];
2564 0 : if (sad < p_best_sad32x8[6]) {
2565 0 : p_best_sad32x8[6] = sad;
2566 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2567 0 : y_mv = _MVYT(mv);
2568 0 : p_best_mv32x8[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2569 : }
2570 :
2571 0 : sad = sad_16x8[13] + sad_16x8[15];
2572 0 : if (sad < p_best_sad32x8[7]) {
2573 0 : p_best_sad32x8[7] = sad;
2574 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2575 0 : y_mv = _MVYT(mv);
2576 0 : p_best_mv32x8[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2577 : }
2578 :
2579 0 : sad = sad_16x8[16] + sad_16x8[18];
2580 0 : if (sad < p_best_sad32x8[8]) {
2581 0 : p_best_sad32x8[8] = sad;
2582 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2583 0 : y_mv = _MVYT(mv);
2584 0 : p_best_mv32x8[8] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2585 : }
2586 :
2587 0 : sad = sad_16x8[17] + sad_16x8[19];
2588 0 : if (sad < p_best_sad32x8[9]) {
2589 0 : p_best_sad32x8[9] = sad;
2590 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2591 0 : y_mv = _MVYT(mv);
2592 0 : p_best_mv32x8[9] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2593 : }
2594 :
2595 0 : sad = sad_16x8[20] + sad_16x8[22];
2596 0 : if (sad < p_best_sad32x8[10]) {
2597 0 : p_best_sad32x8[10] = sad;
2598 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2599 0 : y_mv = _MVYT(mv);
2600 0 : p_best_mv32x8[10] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2601 : }
2602 :
2603 0 : sad = sad_16x8[21] + sad_16x8[23];
2604 0 : if (sad < p_best_sad32x8[11]) {
2605 0 : p_best_sad32x8[11] = sad;
2606 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2607 0 : y_mv = _MVYT(mv);
2608 0 : p_best_mv32x8[11] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2609 : }
2610 :
2611 0 : sad = sad_16x8[24] + sad_16x8[26];
2612 0 : if (sad < p_best_sad32x8[12]) {
2613 0 : p_best_sad32x8[12] = sad;
2614 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2615 0 : y_mv = _MVYT(mv);
2616 0 : p_best_mv32x8[12] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2617 : }
2618 :
2619 0 : sad = sad_16x8[25] + sad_16x8[27];
2620 0 : if (sad < p_best_sad32x8[13]) {
2621 0 : p_best_sad32x8[13] = sad;
2622 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2623 0 : y_mv = _MVYT(mv);
2624 0 : p_best_mv32x8[13] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2625 : }
2626 :
2627 0 : sad = sad_16x8[28] + sad_16x8[30];
2628 0 : if (sad < p_best_sad32x8[14]) {
2629 0 : p_best_sad32x8[14] = sad;
2630 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2631 0 : y_mv = _MVYT(mv);
2632 0 : p_best_mv32x8[14] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2633 : }
2634 :
2635 0 : sad = sad_16x8[29] + sad_16x8[31];
2636 0 : if (sad < p_best_sad32x8[15]) {
2637 0 : p_best_sad32x8[15] = sad;
2638 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2639 0 : y_mv = _MVYT(mv);
2640 0 : p_best_mv32x8[15] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2641 : }
2642 : // 8x32
2643 0 : sad = sad_8x16[0] + sad_8x16[4];
2644 0 : if (sad < p_best_sad8x32[0]) {
2645 0 : p_best_sad8x32[0] = sad;
2646 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2647 0 : y_mv = _MVYT(mv);
2648 0 : p_best_mv8x32[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2649 : }
2650 :
2651 0 : sad = sad_8x16[1] + sad_8x16[5];
2652 0 : if (sad < p_best_sad8x32[1]) {
2653 0 : p_best_sad8x32[1] = sad;
2654 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2655 0 : y_mv = _MVYT(mv);
2656 0 : p_best_mv8x32[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2657 : }
2658 :
2659 0 : sad = sad_8x16[2] + sad_8x16[6];
2660 0 : if (sad < p_best_sad8x32[2]) {
2661 0 : p_best_sad8x32[2] = sad;
2662 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2663 0 : y_mv = _MVYT(mv);
2664 0 : p_best_mv8x32[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2665 : }
2666 :
2667 0 : sad = sad_8x16[3] + sad_8x16[7];
2668 0 : if (sad < p_best_sad8x32[3]) {
2669 0 : p_best_sad8x32[3] = sad;
2670 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2671 0 : y_mv = _MVYT(mv);
2672 0 : p_best_mv8x32[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2673 : }
2674 :
2675 0 : sad = sad_8x16[8] + sad_8x16[12];
2676 0 : if (sad < p_best_sad8x32[4]) {
2677 0 : p_best_sad8x32[4] = sad;
2678 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2679 0 : y_mv = _MVYT(mv);
2680 0 : p_best_mv8x32[4] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2681 : }
2682 :
2683 0 : sad = sad_8x16[9] + sad_8x16[13];
2684 0 : if (sad < p_best_sad8x32[5]) {
2685 0 : p_best_sad8x32[5] = sad;
2686 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2687 0 : y_mv = _MVYT(mv);
2688 0 : p_best_mv8x32[5] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2689 : }
2690 :
2691 0 : sad = sad_8x16[10] + sad_8x16[14];
2692 0 : if (sad < p_best_sad8x32[6]) {
2693 0 : p_best_sad8x32[6] = sad;
2694 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2695 0 : y_mv = _MVYT(mv);
2696 0 : p_best_mv8x32[6] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2697 : }
2698 :
2699 0 : sad = sad_8x16[11] + sad_8x16[15];
2700 0 : if (sad < p_best_sad8x32[7]) {
2701 0 : p_best_sad8x32[7] = sad;
2702 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2703 0 : y_mv = _MVYT(mv);
2704 0 : p_best_mv8x32[7] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2705 : }
2706 :
2707 0 : sad = sad_8x16[16] + sad_8x16[20];
2708 0 : if (sad < p_best_sad8x32[8]) {
2709 0 : p_best_sad8x32[8] = sad;
2710 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2711 0 : y_mv = _MVYT(mv);
2712 0 : p_best_mv8x32[8] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2713 : }
2714 :
2715 0 : sad = sad_8x16[17] + sad_8x16[21];
2716 0 : if (sad < p_best_sad8x32[9]) {
2717 0 : p_best_sad8x32[9] = sad;
2718 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2719 0 : y_mv = _MVYT(mv);
2720 0 : p_best_mv8x32[9] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2721 : }
2722 :
2723 0 : sad = sad_8x16[18] + sad_8x16[22];
2724 0 : if (sad < p_best_sad8x32[10]) {
2725 0 : p_best_sad8x32[10] = sad;
2726 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2727 0 : y_mv = _MVYT(mv);
2728 0 : p_best_mv8x32[10] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2729 : }
2730 :
2731 0 : sad = sad_8x16[19] + sad_8x16[23];
2732 0 : if (sad < p_best_sad8x32[11]) {
2733 0 : p_best_sad8x32[11] = sad;
2734 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2735 0 : y_mv = _MVYT(mv);
2736 0 : p_best_mv8x32[11] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2737 : }
2738 :
2739 0 : sad = sad_8x16[24] + sad_8x16[28];
2740 0 : if (sad < p_best_sad8x32[12]) {
2741 0 : p_best_sad8x32[12] = sad;
2742 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2743 0 : y_mv = _MVYT(mv);
2744 0 : p_best_mv8x32[12] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2745 : }
2746 :
2747 0 : sad = sad_8x16[25] + sad_8x16[29];
2748 0 : if (sad < p_best_sad8x32[13]) {
2749 0 : p_best_sad8x32[13] = sad;
2750 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2751 0 : y_mv = _MVYT(mv);
2752 0 : p_best_mv8x32[13] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2753 : }
2754 :
2755 0 : sad = sad_8x16[26] + sad_8x16[30];
2756 0 : if (sad < p_best_sad8x32[14]) {
2757 0 : p_best_sad8x32[14] = sad;
2758 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2759 0 : y_mv = _MVYT(mv);
2760 0 : p_best_mv8x32[14] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2761 : }
2762 :
2763 0 : sad = sad_8x16[27] + sad_8x16[31];
2764 0 : if (sad < p_best_sad8x32[15]) {
2765 0 : p_best_sad8x32[15] = sad;
2766 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2767 0 : y_mv = _MVYT(mv);
2768 0 : p_best_mv8x32[15] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2769 : }
2770 : }
2771 0 : }
2772 :
2773 : /*******************************************
2774 : * ext_eight_sad_calculation_8x8_16x16
2775 : *******************************************/
2776 0 : static void ext_eight_sad_calculation_8x8_16x16(
2777 : uint8_t *src, uint32_t src_stride, uint8_t *ref, uint32_t ref_stride,
2778 : uint32_t mv, uint32_t start_16x16_pos, uint32_t *p_best_sad8x8,
2779 : uint32_t *p_best_sad16x16, uint32_t *p_best_mv8x8, uint32_t *p_best_mv16x16,
2780 : uint32_t p_eight_sad16x16[16][8], uint32_t p_eight_sad8x8[64][8]) {
2781 0 : const uint32_t start_8x8_pos = 4 * start_16x16_pos;
2782 : uint32_t sad8x8_0, sad8x8_1, sad8x8_2, sad8x8_3;
2783 : uint32_t sad16x16;
2784 : uint32_t search_index;
2785 : int16_t x_mv, y_mv;
2786 0 : uint32_t srcStrideSub = (src_stride << 1);
2787 0 : uint32_t refStrideSub = (ref_stride << 1);
2788 :
2789 0 : p_best_sad8x8 += start_8x8_pos;
2790 0 : p_best_mv8x8 += start_8x8_pos;
2791 0 : p_best_sad16x16 += start_16x16_pos;
2792 0 : p_best_mv16x16 += start_16x16_pos;
2793 :
2794 0 : for (search_index = 0; search_index < 8; search_index++) {
2795 0 : p_eight_sad8x8[0 + start_8x8_pos][search_index] = sad8x8_0 =
2796 0 : (compute8x4_sad_kernel_c(
2797 : src, srcStrideSub, ref + search_index, refStrideSub))
2798 : << 1;
2799 0 : if (sad8x8_0 < p_best_sad8x8[0]) {
2800 0 : p_best_sad8x8[0] = (uint32_t)sad8x8_0;
2801 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2802 0 : y_mv = _MVYT(mv);
2803 0 : p_best_mv8x8[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2804 : }
2805 :
2806 0 : p_eight_sad8x8[1 + start_8x8_pos][search_index] = sad8x8_1 =
2807 0 : (compute8x4_sad_kernel_c(
2808 0 : src + 8, srcStrideSub, ref + 8 + search_index, refStrideSub))
2809 : << 1;
2810 0 : if (sad8x8_1 < p_best_sad8x8[1]) {
2811 0 : p_best_sad8x8[1] = (uint32_t)sad8x8_1;
2812 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2813 0 : y_mv = _MVYT(mv);
2814 0 : p_best_mv8x8[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2815 : }
2816 :
2817 0 : p_eight_sad8x8[2 + start_8x8_pos][search_index] = sad8x8_2 =
2818 0 : (compute8x4_sad_kernel_c(
2819 0 : src + (src_stride << 3),
2820 : srcStrideSub,
2821 0 : ref + (ref_stride << 3) + search_index,
2822 : refStrideSub))
2823 : << 1;
2824 0 : if (sad8x8_2 < p_best_sad8x8[2]) {
2825 0 : p_best_sad8x8[2] = (uint32_t)sad8x8_2;
2826 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2827 0 : y_mv = _MVYT(mv);
2828 0 : p_best_mv8x8[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2829 : }
2830 :
2831 0 : p_eight_sad8x8[3 + start_8x8_pos][search_index] = sad8x8_3 =
2832 0 : (compute8x4_sad_kernel_c(
2833 0 : src + (src_stride << 3) + 8,
2834 : srcStrideSub,
2835 0 : ref + (ref_stride << 3) + 8 + search_index,
2836 : refStrideSub))
2837 : << 1;
2838 0 : if (sad8x8_3 < p_best_sad8x8[3]) {
2839 0 : p_best_sad8x8[3] = (uint32_t)sad8x8_3;
2840 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2841 0 : y_mv = _MVYT(mv);
2842 0 : p_best_mv8x8[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2843 : }
2844 :
2845 0 : p_eight_sad16x16[start_16x16_pos][search_index] = sad16x16 =
2846 0 : sad8x8_0 + sad8x8_1 + sad8x8_2 + sad8x8_3;
2847 0 : if (sad16x16 < p_best_sad16x16[0]) {
2848 0 : p_best_sad16x16[0] = (uint32_t)sad16x16;
2849 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2850 0 : y_mv = _MVYT(mv);
2851 0 : p_best_mv16x16[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2852 : }
2853 : }
2854 0 : }
2855 :
2856 0 : void ext_all_sad_calculation_8x8_16x16_c(
2857 : uint8_t *src, uint32_t src_stride, uint8_t *ref, uint32_t ref_stride,
2858 : uint32_t mv, uint32_t *p_best_sad8x8, uint32_t *p_best_sad16x16,
2859 : uint32_t *p_best_mv8x8, uint32_t *p_best_mv16x16,
2860 : uint32_t p_eight_sad16x16[16][8], uint32_t p_eight_sad8x8[64][8]) {
2861 : static const char offsets[16] = {
2862 : 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
2863 :
2864 : //---- 16x16 : 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15
2865 0 : for (int y = 0; y < 4; y++) {
2866 0 : for (int x = 0; x < 4; x++) {
2867 0 : const uint32_t blockIndex = 16 * y * src_stride + 16 * x;
2868 0 : const uint32_t searchPositionIndex = 16 * y * ref_stride + 16 * x;
2869 0 : ext_eight_sad_calculation_8x8_16x16(src + blockIndex,
2870 : src_stride,
2871 : ref + searchPositionIndex,
2872 : ref_stride,
2873 : mv,
2874 0 : offsets[4 * y + x],
2875 : p_best_sad8x8,
2876 : p_best_sad16x16,
2877 : p_best_mv8x8,
2878 : p_best_mv16x16,
2879 : p_eight_sad16x16,
2880 : p_eight_sad8x8);
2881 : }
2882 : }
2883 0 : }
2884 :
2885 : /*******************************************
2886 : Calcualte SAD for 32x32,64x64 from 16x16
2887 : and check if there is improvment, if yes keep
2888 : the best SAD+MV
2889 : *******************************************/
2890 0 : void ext_eight_sad_calculation_32x32_64x64_c(
2891 : uint32_t p_sad16x16[16][8], uint32_t *p_best_sad32x32,
2892 : uint32_t *p_best_sad64x64, uint32_t *p_best_mv32x32,
2893 : uint32_t *p_best_mv64x64, uint32_t mv, uint32_t p_sad32x32[4][8]) {
2894 : uint32_t search_index;
2895 : int16_t x_mv, y_mv;
2896 0 : for (search_index = 0; search_index < 8; search_index++) {
2897 : uint32_t sad32x32_0, sad32x32_1, sad32x32_2, sad32x32_3, sad64x64;
2898 :
2899 0 : p_sad32x32[0][search_index] = sad32x32_0 =
2900 0 : p_sad16x16[0][search_index] + p_sad16x16[1][search_index] +
2901 0 : p_sad16x16[2][search_index] + p_sad16x16[3][search_index];
2902 0 : if (sad32x32_0 < p_best_sad32x32[0]) {
2903 0 : p_best_sad32x32[0] = sad32x32_0;
2904 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2905 0 : y_mv = _MVYT(mv);
2906 0 : p_best_mv32x32[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2907 : }
2908 :
2909 0 : p_sad32x32[1][search_index] = sad32x32_1 =
2910 0 : p_sad16x16[4][search_index] + p_sad16x16[5][search_index] +
2911 0 : p_sad16x16[6][search_index] + p_sad16x16[7][search_index];
2912 0 : if (sad32x32_1 < p_best_sad32x32[1]) {
2913 0 : p_best_sad32x32[1] = sad32x32_1;
2914 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2915 0 : y_mv = _MVYT(mv);
2916 0 : p_best_mv32x32[1] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2917 : }
2918 :
2919 0 : p_sad32x32[2][search_index] = sad32x32_2 =
2920 0 : p_sad16x16[8][search_index] + p_sad16x16[9][search_index] +
2921 0 : p_sad16x16[10][search_index] + p_sad16x16[11][search_index];
2922 0 : if (sad32x32_2 < p_best_sad32x32[2]) {
2923 0 : p_best_sad32x32[2] = sad32x32_2;
2924 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2925 0 : y_mv = _MVYT(mv);
2926 0 : p_best_mv32x32[2] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2927 : }
2928 :
2929 0 : p_sad32x32[3][search_index] = sad32x32_3 =
2930 0 : p_sad16x16[12][search_index] + p_sad16x16[13][search_index] +
2931 0 : p_sad16x16[14][search_index] + p_sad16x16[15][search_index];
2932 0 : if (sad32x32_3 < p_best_sad32x32[3]) {
2933 0 : p_best_sad32x32[3] = sad32x32_3;
2934 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2935 0 : y_mv = _MVYT(mv);
2936 0 : p_best_mv32x32[3] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2937 : }
2938 :
2939 0 : sad64x64 = sad32x32_0 + sad32x32_1 + sad32x32_2 + sad32x32_3;
2940 0 : if (sad64x64 < p_best_sad64x64[0]) {
2941 0 : p_best_sad64x64[0] = sad64x64;
2942 0 : x_mv = _MVXT(mv) + (int16_t)search_index * 4;
2943 0 : y_mv = _MVYT(mv);
2944 0 : p_best_mv64x64[0] = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
2945 : }
2946 : }
2947 0 : }
2948 :
2949 : /*******************************************
2950 : * open_loop_me_get_search_point_results_block
2951 : *******************************************/
2952 0 : static void open_loop_me_get_eight_search_point_results_block(
2953 : MeContext
2954 : *context_ptr, // input parameter, ME context Ptr, used to get SB Ptr
2955 : uint32_t listIndex, // input parameter, reference list index
2956 : uint32_t ref_pic_index,
2957 : uint32_t searchRegionIndex, // input parameter, search area origin, used to
2958 : // point to reference samples
2959 : int32_t xSearchIndex, // input parameter, search region position in the
2960 : // horizontal direction, used to derive xMV
2961 : int32_t ySearchIndex // input parameter, search region position in the
2962 : // vertical direction, used to derive yMV
2963 : ) {
2964 : // uint32_t reflumaStride = refPicPtr->stride_y; // NADER
2965 : // uint8_t *refPtr = refPicPtr->buffer_y; // NADER
2966 0 : uint32_t reflumaStride =
2967 : context_ptr->interpolated_full_stride[listIndex][ref_pic_index];
2968 0 : uint8_t *refPtr =
2969 0 : context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] +
2970 0 : ((ME_FILTER_TAP >> 1) *
2971 0 : context_ptr->interpolated_full_stride[listIndex][ref_pic_index]) +
2972 0 : (ME_FILTER_TAP >> 1) + searchRegionIndex;
2973 :
2974 0 : uint32_t currMV1 = (((uint16_t)ySearchIndex) << 18);
2975 0 : uint16_t currMV2 = (((uint16_t)xSearchIndex << 2));
2976 0 : uint32_t currMV = currMV1 | currMV2;
2977 :
2978 0 : ext_all_sad_calculation_8x8_16x16(
2979 : context_ptr->sb_src_ptr,
2980 : context_ptr->sb_src_stride,
2981 : refPtr,
2982 : reflumaStride,
2983 : currMV,
2984 : context_ptr->p_best_sad8x8,
2985 : context_ptr->p_best_sad16x16,
2986 : context_ptr->p_best_mv8x8,
2987 : context_ptr->p_best_mv16x16,
2988 0 : context_ptr->p_eight_sad16x16,
2989 0 : context_ptr->p_eight_sad8x8);
2990 :
2991 0 : ext_eight_sad_calculation_32x32_64x64(
2992 0 : context_ptr->p_eight_sad16x16,
2993 : context_ptr->p_best_sad32x32,
2994 : context_ptr->p_best_sad64x64,
2995 : context_ptr->p_best_mv32x32,
2996 : context_ptr->p_best_mv64x64,
2997 : currMV,
2998 0 : context_ptr->p_eight_sad32x32);
2999 :
3000 0 : ext_eigth_sad_calculation_nsq(
3001 0 : context_ptr->p_eight_sad8x8,
3002 0 : context_ptr->p_eight_sad16x16,
3003 0 : context_ptr->p_eight_sad32x32,
3004 : context_ptr->p_best_sad64x32,
3005 : context_ptr->p_best_mv64x32,
3006 : context_ptr->p_best_sad32x16,
3007 : context_ptr->p_best_mv32x16,
3008 : context_ptr->p_best_sad16x8,
3009 : context_ptr->p_best_mv16x8,
3010 : context_ptr->p_best_sad32x64,
3011 : context_ptr->p_best_mv32x64,
3012 : context_ptr->p_best_sad16x32,
3013 : context_ptr->p_best_mv16x32,
3014 : context_ptr->p_best_sad8x16,
3015 : context_ptr->p_best_mv8x16,
3016 : context_ptr->p_best_sad32x8,
3017 : context_ptr->p_best_mv32x8,
3018 : context_ptr->p_best_sad8x32,
3019 : context_ptr->p_best_mv8x32,
3020 : context_ptr->p_best_sad64x16,
3021 : context_ptr->p_best_mv64x16,
3022 : context_ptr->p_best_sad16x64,
3023 : context_ptr->p_best_mv16x64,
3024 : currMV);
3025 0 : }
3026 :
3027 : /*******************************************
3028 : * nsq_get_analysis_results_block returns the
3029 : * the best partition for each sq_block based
3030 : * on the ME SAD
3031 : *******************************************/
3032 0 : static void nsq_get_analysis_results_block(MeContext *context_ptr) {
3033 0 : uint32_t *p_best_sad64x32 = context_ptr->p_best_sad64x32;
3034 0 : uint32_t *p_best_sad32x16 = context_ptr->p_best_sad32x16;
3035 0 : uint32_t *p_best_sad16x8 = context_ptr->p_best_sad16x8;
3036 0 : uint32_t *p_best_sad32x64 = context_ptr->p_best_sad32x64;
3037 0 : uint32_t *p_best_sad16x32 = context_ptr->p_best_sad16x32;
3038 0 : uint32_t *p_best_sad8x16 = context_ptr->p_best_sad8x16;
3039 0 : uint32_t *p_best_sad32x8 = context_ptr->p_best_sad32x8;
3040 0 : uint32_t *p_best_sad8x32 = context_ptr->p_best_sad8x32;
3041 0 : uint32_t *p_best_sad64x16 = context_ptr->p_best_sad64x16;
3042 0 : uint32_t *p_best_sad16x64 = context_ptr->p_best_sad16x64;
3043 0 : uint8_t *p_best_nsq_64x64 = context_ptr->p_best_nsq64x64;
3044 0 : uint8_t *p_best_nsq_32x32 = context_ptr->p_best_nsq32x32;
3045 0 : uint8_t *p_best_nsq_16x16 = context_ptr->p_best_nsq16x16;
3046 0 : uint8_t *p_best_nsq_8x8 = context_ptr->p_best_nsq8x8;
3047 :
3048 0 : nsq_me_analysis(p_best_sad64x32,
3049 : p_best_sad32x16,
3050 : p_best_sad16x8,
3051 : p_best_sad32x64,
3052 : p_best_sad16x32,
3053 : p_best_sad8x16,
3054 : p_best_sad32x8,
3055 : p_best_sad8x32,
3056 : p_best_sad64x16,
3057 : p_best_sad16x64,
3058 : p_best_nsq_64x64,
3059 : p_best_nsq_32x32,
3060 : p_best_nsq_16x16,
3061 : p_best_nsq_8x8);
3062 0 : }
3063 :
3064 : /*******************************************
3065 : * open_loop_me_get_search_point_results_block
3066 : *******************************************/
3067 0 : static void open_loop_me_get_search_point_results_block(
3068 : MeContext
3069 : *context_ptr, // input parameter, ME context Ptr, used to get SB Ptr
3070 : uint32_t listIndex, // input parameter, reference list index
3071 : uint32_t ref_pic_index,
3072 : uint32_t searchRegionIndex, // input parameter, search area origin, used to
3073 : // point to reference samples
3074 : int32_t xSearchIndex, // input parameter, search region position in the
3075 : // horizontal direction, used to derive xMV
3076 : int32_t ySearchIndex) // input parameter, search region position in the
3077 : // vertical direction, used to derive yMV
3078 : {
3079 0 : const EbBool sub_sad = (context_ptr->me_search_method == SUB_SAD_SEARCH);
3080 0 : uint8_t *src_ptr = context_ptr->sb_src_ptr;
3081 :
3082 : // uint8_t *refPtr = refPicPtr->buffer_y; // NADER
3083 0 : uint8_t *refPtr =
3084 0 : context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] +
3085 0 : (ME_FILTER_TAP >> 1) +
3086 0 : ((ME_FILTER_TAP >> 1) *
3087 0 : context_ptr->interpolated_full_stride[listIndex][ref_pic_index]);
3088 : // uint32_t reflumaStride = refPicPtr->stride_y; // NADER
3089 0 : uint32_t reflumaStride =
3090 : context_ptr->interpolated_full_stride[listIndex][ref_pic_index];
3091 0 : uint32_t searchPositionTLIndex = searchRegionIndex;
3092 : uint32_t searchPositionIndex;
3093 : uint32_t blockIndex;
3094 0 : uint32_t srcNext16x16Offset = (BLOCK_SIZE_64 << 4);
3095 : // uint32_t refNext16x16Offset = (refPicPtr->stride_y << 4); // NADER
3096 0 : uint32_t refNext16x16Offset = (reflumaStride << 4);
3097 0 : uint32_t currMV1 = (((uint16_t)ySearchIndex) << 18);
3098 0 : uint16_t currMV2 = (((uint16_t)xSearchIndex << 2));
3099 0 : uint32_t currMV = currMV1 | currMV2;
3100 0 : uint32_t *p_best_sad8x8 = context_ptr->p_best_sad8x8;
3101 0 : uint32_t *p_best_sad16x16 = context_ptr->p_best_sad16x16;
3102 0 : uint32_t *p_best_sad32x32 = context_ptr->p_best_sad32x32;
3103 0 : uint32_t *p_best_sad64x64 = context_ptr->p_best_sad64x64;
3104 0 : uint32_t *p_best_sad64x32 = context_ptr->p_best_sad64x32;
3105 0 : uint32_t *p_best_sad32x16 = context_ptr->p_best_sad32x16;
3106 0 : uint32_t *p_best_sad16x8 = context_ptr->p_best_sad16x8;
3107 0 : uint32_t *p_best_sad32x64 = context_ptr->p_best_sad32x64;
3108 0 : uint32_t *p_best_sad16x32 = context_ptr->p_best_sad16x32;
3109 0 : uint32_t *p_best_sad8x16 = context_ptr->p_best_sad8x16;
3110 0 : uint32_t *p_best_sad32x8 = context_ptr->p_best_sad32x8;
3111 0 : uint32_t *p_best_sad8x32 = context_ptr->p_best_sad8x32;
3112 0 : uint32_t *p_best_sad64x16 = context_ptr->p_best_sad64x16;
3113 0 : uint32_t *p_best_sad16x64 = context_ptr->p_best_sad16x64;
3114 0 : uint32_t *p_best_mv8x8 = context_ptr->p_best_mv8x8;
3115 0 : uint32_t *p_best_mv16x16 = context_ptr->p_best_mv16x16;
3116 0 : uint32_t *p_best_mv32x32 = context_ptr->p_best_mv32x32;
3117 0 : uint32_t *p_best_mv64x64 = context_ptr->p_best_mv64x64;
3118 0 : uint32_t *p_best_mv64x32 = context_ptr->p_best_mv64x32;
3119 0 : uint32_t *p_best_mv32x16 = context_ptr->p_best_mv32x16;
3120 0 : uint32_t *p_best_mv16x8 = context_ptr->p_best_mv16x8;
3121 0 : uint32_t *p_best_mv32x64 = context_ptr->p_best_mv32x64;
3122 0 : uint32_t *p_best_mv16x32 = context_ptr->p_best_mv16x32;
3123 0 : uint32_t *p_best_mv8x16 = context_ptr->p_best_mv8x16;
3124 0 : uint32_t *p_best_mv32x8 = context_ptr->p_best_mv32x8;
3125 0 : uint32_t *p_best_mv8x32 = context_ptr->p_best_mv8x32;
3126 0 : uint32_t *p_sad32x32 = context_ptr->p_sad32x32;
3127 0 : uint32_t *p_sad16x16 = context_ptr->p_sad16x16;
3128 0 : uint32_t *p_sad8x8 = context_ptr->p_sad8x8;
3129 0 : uint32_t *p_best_mv64x16 = context_ptr->p_best_mv64x16;
3130 0 : uint32_t *p_best_mv16x64 = context_ptr->p_best_mv16x64;
3131 :
3132 : // TODO: blockIndex searchPositionIndex could be removed
3133 :
3134 0 : const uint32_t src_stride = context_ptr->sb_src_stride;
3135 0 : srcNext16x16Offset = src_stride << 4;
3136 :
3137 : //---- 16x16 : 0
3138 0 : blockIndex = 0;
3139 0 : searchPositionIndex = searchPositionTLIndex;
3140 :
3141 0 : ext_sad_calculation_8x8_16x16(
3142 : src_ptr + blockIndex,
3143 : src_stride,
3144 : refPtr + searchPositionIndex,
3145 : reflumaStride,
3146 : &p_best_sad8x8[0],
3147 : &p_best_sad16x16[0],
3148 : &p_best_mv8x8[0],
3149 : &p_best_mv16x16[0],
3150 : currMV,
3151 : &p_sad16x16[0],
3152 : &p_sad8x8[0],
3153 : sub_sad);
3154 :
3155 : //---- 16x16 : 1
3156 0 : blockIndex = blockIndex + 16;
3157 0 : searchPositionIndex = searchPositionTLIndex + 16;
3158 0 : ext_sad_calculation_8x8_16x16(
3159 : src_ptr + blockIndex,
3160 : src_stride,
3161 : refPtr + searchPositionIndex,
3162 : reflumaStride,
3163 : &p_best_sad8x8[4],
3164 : &p_best_sad16x16[1],
3165 : &p_best_mv8x8[4],
3166 : &p_best_mv16x16[1],
3167 : currMV,
3168 : &p_sad16x16[1],
3169 : &p_sad8x8[4],
3170 : sub_sad);
3171 : //---- 16x16 : 4
3172 0 : blockIndex = blockIndex + 16;
3173 0 : searchPositionIndex = searchPositionIndex + 16;
3174 :
3175 0 : ext_sad_calculation_8x8_16x16(
3176 : src_ptr + blockIndex,
3177 : src_stride,
3178 : refPtr + searchPositionIndex,
3179 : reflumaStride,
3180 : &p_best_sad8x8[16],
3181 : &p_best_sad16x16[4],
3182 : &p_best_mv8x8[16],
3183 : &p_best_mv16x16[4],
3184 : currMV,
3185 : &p_sad16x16[4],
3186 : &p_sad8x8[16],
3187 : sub_sad);
3188 :
3189 : //---- 16x16 : 5
3190 0 : blockIndex = blockIndex + 16;
3191 0 : searchPositionIndex = searchPositionIndex + 16;
3192 0 : ext_sad_calculation_8x8_16x16(
3193 : src_ptr + blockIndex,
3194 : src_stride,
3195 : refPtr + searchPositionIndex,
3196 : reflumaStride,
3197 : &p_best_sad8x8[20],
3198 : &p_best_sad16x16[5],
3199 : &p_best_mv8x8[20],
3200 : &p_best_mv16x16[5],
3201 : currMV,
3202 : &p_sad16x16[5],
3203 : &p_sad8x8[20],
3204 : sub_sad);
3205 :
3206 : //---- 16x16 : 2
3207 0 : blockIndex = srcNext16x16Offset;
3208 0 : searchPositionIndex = searchPositionTLIndex + refNext16x16Offset;
3209 0 : ext_sad_calculation_8x8_16x16(
3210 : src_ptr + blockIndex,
3211 : src_stride,
3212 : refPtr + searchPositionIndex,
3213 : reflumaStride,
3214 : &p_best_sad8x8[8],
3215 : &p_best_sad16x16[2],
3216 : &p_best_mv8x8[8],
3217 : &p_best_mv16x16[2],
3218 : currMV,
3219 : &p_sad16x16[2],
3220 : &p_sad8x8[8],
3221 : sub_sad);
3222 : //---- 16x16 : 3
3223 0 : blockIndex = blockIndex + 16;
3224 0 : searchPositionIndex = searchPositionIndex + 16;
3225 0 : ext_sad_calculation_8x8_16x16(
3226 : src_ptr + blockIndex,
3227 : src_stride,
3228 : refPtr + searchPositionIndex,
3229 : reflumaStride,
3230 : &p_best_sad8x8[12],
3231 : &p_best_sad16x16[3],
3232 : &p_best_mv8x8[12],
3233 : &p_best_mv16x16[3],
3234 : currMV,
3235 : &p_sad16x16[3],
3236 : &p_sad8x8[12],
3237 : sub_sad);
3238 : //---- 16x16 : 6
3239 0 : blockIndex = blockIndex + 16;
3240 0 : searchPositionIndex = searchPositionIndex + 16;
3241 0 : ext_sad_calculation_8x8_16x16(
3242 : src_ptr + blockIndex,
3243 : src_stride,
3244 : refPtr + searchPositionIndex,
3245 : reflumaStride,
3246 : &p_best_sad8x8[24],
3247 : &p_best_sad16x16[6],
3248 : &p_best_mv8x8[24],
3249 : &p_best_mv16x16[6],
3250 : currMV,
3251 : &p_sad16x16[6],
3252 : &p_sad8x8[24],
3253 : sub_sad);
3254 : //---- 16x16 : 7
3255 0 : blockIndex = blockIndex + 16;
3256 0 : searchPositionIndex = searchPositionIndex + 16;
3257 0 : ext_sad_calculation_8x8_16x16(
3258 : src_ptr + blockIndex,
3259 : src_stride,
3260 : refPtr + searchPositionIndex,
3261 : reflumaStride,
3262 : &p_best_sad8x8[28],
3263 : &p_best_sad16x16[7],
3264 : &p_best_mv8x8[28],
3265 : &p_best_mv16x16[7],
3266 : currMV,
3267 : &p_sad16x16[7],
3268 : &p_sad8x8[28],
3269 : sub_sad);
3270 :
3271 : //---- 16x16 : 8
3272 0 : blockIndex = (srcNext16x16Offset << 1);
3273 0 : searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset << 1);
3274 0 : ext_sad_calculation_8x8_16x16(
3275 : src_ptr + blockIndex,
3276 : src_stride,
3277 : refPtr + searchPositionIndex,
3278 : reflumaStride,
3279 : &p_best_sad8x8[32],
3280 : &p_best_sad16x16[8],
3281 : &p_best_mv8x8[32],
3282 : &p_best_mv16x16[8],
3283 : currMV,
3284 : &p_sad16x16[8],
3285 : &p_sad8x8[32],
3286 : sub_sad);
3287 : //---- 16x16 : 9
3288 0 : blockIndex = blockIndex + 16;
3289 0 : searchPositionIndex = searchPositionIndex + 16;
3290 0 : ext_sad_calculation_8x8_16x16(
3291 : src_ptr + blockIndex,
3292 : src_stride,
3293 : refPtr + searchPositionIndex,
3294 : reflumaStride,
3295 : &p_best_sad8x8[36],
3296 : &p_best_sad16x16[9],
3297 : &p_best_mv8x8[36],
3298 : &p_best_mv16x16[9],
3299 : currMV,
3300 : &p_sad16x16[9],
3301 : &p_sad8x8[36],
3302 : sub_sad);
3303 : //---- 16x16 : 12
3304 0 : blockIndex = blockIndex + 16;
3305 0 : searchPositionIndex = searchPositionIndex + 16;
3306 0 : ext_sad_calculation_8x8_16x16(
3307 : src_ptr + blockIndex,
3308 : src_stride,
3309 : refPtr + searchPositionIndex,
3310 : reflumaStride,
3311 : &p_best_sad8x8[48],
3312 : &p_best_sad16x16[12],
3313 : &p_best_mv8x8[48],
3314 : &p_best_mv16x16[12],
3315 : currMV,
3316 : &p_sad16x16[12],
3317 : &p_sad8x8[48],
3318 : sub_sad);
3319 : //---- 16x16 : 13
3320 0 : blockIndex = blockIndex + 16;
3321 0 : searchPositionIndex = searchPositionIndex + 16;
3322 0 : ext_sad_calculation_8x8_16x16(
3323 : src_ptr + blockIndex,
3324 : src_stride,
3325 : refPtr + searchPositionIndex,
3326 : reflumaStride,
3327 : &p_best_sad8x8[52],
3328 : &p_best_sad16x16[13],
3329 : &p_best_mv8x8[52],
3330 : &p_best_mv16x16[13],
3331 : currMV,
3332 : &p_sad16x16[13],
3333 : &p_sad8x8[52],
3334 : sub_sad);
3335 :
3336 : //---- 16x16 : 10
3337 0 : blockIndex = (srcNext16x16Offset * 3);
3338 0 : searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset * 3);
3339 0 : ext_sad_calculation_8x8_16x16(
3340 : src_ptr + blockIndex,
3341 : src_stride,
3342 : refPtr + searchPositionIndex,
3343 : reflumaStride,
3344 : &p_best_sad8x8[40],
3345 : &p_best_sad16x16[10],
3346 : &p_best_mv8x8[40],
3347 : &p_best_mv16x16[10],
3348 : currMV,
3349 : &p_sad16x16[10],
3350 : &p_sad8x8[40],
3351 : sub_sad);
3352 : //---- 16x16 : 11
3353 0 : blockIndex = blockIndex + 16;
3354 0 : searchPositionIndex = searchPositionIndex + 16;
3355 0 : ext_sad_calculation_8x8_16x16(
3356 : src_ptr + blockIndex,
3357 : src_stride,
3358 : refPtr + searchPositionIndex,
3359 : reflumaStride,
3360 : &p_best_sad8x8[44],
3361 : &p_best_sad16x16[11],
3362 : &p_best_mv8x8[44],
3363 : &p_best_mv16x16[11],
3364 : currMV,
3365 : &p_sad16x16[11],
3366 : &p_sad8x8[44],
3367 : sub_sad);
3368 : //---- 16x16 : 14
3369 0 : blockIndex = blockIndex + 16;
3370 0 : searchPositionIndex = searchPositionIndex + 16;
3371 0 : ext_sad_calculation_8x8_16x16(
3372 : src_ptr + blockIndex,
3373 : src_stride,
3374 : refPtr + searchPositionIndex,
3375 : reflumaStride,
3376 : &p_best_sad8x8[56],
3377 : &p_best_sad16x16[14],
3378 : &p_best_mv8x8[56],
3379 : &p_best_mv16x16[14],
3380 : currMV,
3381 : &p_sad16x16[14],
3382 : &p_sad8x8[56],
3383 : sub_sad);
3384 : //---- 16x16 : 15
3385 0 : blockIndex = blockIndex + 16;
3386 0 : searchPositionIndex = searchPositionIndex + 16;
3387 0 : ext_sad_calculation_8x8_16x16(
3388 : src_ptr + blockIndex,
3389 : src_stride,
3390 : refPtr + searchPositionIndex,
3391 : reflumaStride,
3392 : &p_best_sad8x8[60],
3393 : &p_best_sad16x16[15],
3394 : &p_best_mv8x8[60],
3395 : &p_best_mv16x16[15],
3396 : currMV,
3397 : &p_sad16x16[15],
3398 : &p_sad8x8[60],
3399 : sub_sad);
3400 :
3401 0 : ext_sad_calculation_32x32_64x64(p_sad16x16,
3402 : p_best_sad32x32,
3403 : p_best_sad64x64,
3404 : p_best_mv32x32,
3405 : p_best_mv64x64,
3406 : currMV,
3407 : &p_sad32x32[0]);
3408 :
3409 0 : ExtSadCalculation(p_sad8x8,
3410 : p_sad16x16,
3411 : p_sad32x32,
3412 : p_best_sad64x32,
3413 : p_best_mv64x32,
3414 : p_best_sad32x16,
3415 : p_best_mv32x16,
3416 : p_best_sad16x8,
3417 : p_best_mv16x8,
3418 : p_best_sad32x64,
3419 : p_best_mv32x64,
3420 : p_best_sad16x32,
3421 : p_best_mv16x32,
3422 : p_best_sad8x16,
3423 : p_best_mv8x16,
3424 : p_best_sad32x8,
3425 : p_best_mv32x8,
3426 : p_best_sad8x32,
3427 : p_best_mv8x32,
3428 : p_best_sad64x16,
3429 : p_best_mv64x16,
3430 : p_best_sad16x64,
3431 : p_best_mv16x64,
3432 : currMV);
3433 0 : }
3434 :
3435 : /*******************************************
3436 : * GetSearchPointResults
3437 : *******************************************/
3438 0 : static void GetSearchPointResults(
3439 : MeContext
3440 : *context_ptr, // input parameter, ME context Ptr, used to get SB Ptr
3441 : uint32_t listIndex, // input parameter, reference list index
3442 : uint32_t ref_pic_index,
3443 : uint32_t searchRegionIndex, // input parameter, search area origin, used to
3444 : // point to reference samples
3445 : int32_t xSearchIndex, // input parameter, search region position in the
3446 : // horizontal direction, used to derive xMV
3447 : int32_t ySearchIndex) // input parameter, search region position in the
3448 : // vertical direction, used to derive yMV
3449 : {
3450 0 : const EbBool sub_sad = (context_ptr->me_search_method == SUB_SAD_SEARCH);
3451 0 : uint8_t *src_ptr = context_ptr->sb_src_ptr;
3452 :
3453 : // uint8_t *refPtr = refPicPtr->buffer_y; // NADER
3454 0 : uint8_t *refPtr =
3455 0 : context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] +
3456 0 : (ME_FILTER_TAP >> 1) +
3457 0 : ((ME_FILTER_TAP >> 1) *
3458 0 : context_ptr->interpolated_full_stride[listIndex][ref_pic_index]);
3459 : // uint32_t reflumaStride = refPicPtr->stride_y; // NADER
3460 0 : uint32_t reflumaStride =
3461 : context_ptr->interpolated_full_stride[listIndex][ref_pic_index];
3462 :
3463 0 : uint32_t searchPositionTLIndex = searchRegionIndex;
3464 : uint32_t searchPositionIndex;
3465 : uint32_t blockIndex;
3466 :
3467 0 : uint32_t srcNext16x16Offset = (BLOCK_SIZE_64 << 4);
3468 : // uint32_t refNext16x16Offset = (refPicPtr->stride_y << 4); // NADER
3469 0 : uint32_t refNext16x16Offset = (reflumaStride << 4);
3470 :
3471 0 : uint32_t currMV1 = (((uint16_t)ySearchIndex) << 18);
3472 0 : uint16_t currMV2 = (((uint16_t)xSearchIndex << 2));
3473 0 : uint32_t currMV = currMV1 | currMV2;
3474 :
3475 0 : uint32_t *p_best_sad8x8 = context_ptr->p_best_sad8x8;
3476 0 : uint32_t *p_best_sad16x16 = context_ptr->p_best_sad16x16;
3477 0 : uint32_t *p_best_sad32x32 = context_ptr->p_best_sad32x32;
3478 0 : uint32_t *p_best_sad64x64 = context_ptr->p_best_sad64x64;
3479 :
3480 0 : uint32_t *p_best_mv8x8 = context_ptr->p_best_mv8x8;
3481 0 : uint32_t *p_best_mv16x16 = context_ptr->p_best_mv16x16;
3482 0 : uint32_t *p_best_mv32x32 = context_ptr->p_best_mv32x32;
3483 0 : uint32_t *p_best_mv64x64 = context_ptr->p_best_mv64x64;
3484 0 : uint32_t *p_sad16x16 = context_ptr->p_sad16x16;
3485 :
3486 : // TODO: blockIndex searchPositionIndex could be removed
3487 :
3488 0 : const uint32_t src_stride = context_ptr->sb_src_stride;
3489 0 : srcNext16x16Offset = src_stride << 4;
3490 :
3491 : //---- 16x16 : 0
3492 0 : blockIndex = 0;
3493 0 : searchPositionIndex = searchPositionTLIndex;
3494 :
3495 0 : sad_calculation_8x8_16x16(
3496 : src_ptr + blockIndex,
3497 : src_stride,
3498 : refPtr + searchPositionIndex,
3499 : reflumaStride,
3500 : &p_best_sad8x8[0],
3501 : &p_best_sad16x16[0],
3502 : &p_best_mv8x8[0],
3503 : &p_best_mv16x16[0],
3504 : currMV,
3505 : &p_sad16x16[0],
3506 : sub_sad);
3507 :
3508 : //---- 16x16 : 1
3509 0 : blockIndex = blockIndex + 16;
3510 0 : searchPositionIndex = searchPositionTLIndex + 16;
3511 0 : sad_calculation_8x8_16x16(
3512 : src_ptr + blockIndex,
3513 : src_stride,
3514 : refPtr + searchPositionIndex,
3515 : reflumaStride,
3516 : &p_best_sad8x8[4],
3517 : &p_best_sad16x16[1],
3518 : &p_best_mv8x8[4],
3519 : &p_best_mv16x16[1],
3520 : currMV,
3521 : &p_sad16x16[1],
3522 : sub_sad);
3523 : //---- 16x16 : 4
3524 0 : blockIndex = blockIndex + 16;
3525 0 : searchPositionIndex = searchPositionIndex + 16;
3526 :
3527 0 : sad_calculation_8x8_16x16(
3528 : src_ptr + blockIndex,
3529 : src_stride,
3530 : refPtr + searchPositionIndex,
3531 : reflumaStride,
3532 : &p_best_sad8x8[16],
3533 : &p_best_sad16x16[4],
3534 : &p_best_mv8x8[16],
3535 : &p_best_mv16x16[4],
3536 : currMV,
3537 : &p_sad16x16[4],
3538 : sub_sad);
3539 :
3540 : //---- 16x16 : 5
3541 0 : blockIndex = blockIndex + 16;
3542 0 : searchPositionIndex = searchPositionIndex + 16;
3543 0 : sad_calculation_8x8_16x16(
3544 : src_ptr + blockIndex,
3545 : src_stride,
3546 : refPtr + searchPositionIndex,
3547 : reflumaStride,
3548 : &p_best_sad8x8[20],
3549 : &p_best_sad16x16[5],
3550 : &p_best_mv8x8[20],
3551 : &p_best_mv16x16[5],
3552 : currMV,
3553 : &p_sad16x16[5],
3554 : sub_sad);
3555 :
3556 : //---- 16x16 : 2
3557 0 : blockIndex = srcNext16x16Offset;
3558 0 : searchPositionIndex = searchPositionTLIndex + refNext16x16Offset;
3559 0 : sad_calculation_8x8_16x16(
3560 : src_ptr + blockIndex,
3561 : src_stride,
3562 : refPtr + searchPositionIndex,
3563 : reflumaStride,
3564 : &p_best_sad8x8[8],
3565 : &p_best_sad16x16[2],
3566 : &p_best_mv8x8[8],
3567 : &p_best_mv16x16[2],
3568 : currMV,
3569 : &p_sad16x16[2],
3570 : sub_sad);
3571 : //---- 16x16 : 3
3572 0 : blockIndex = blockIndex + 16;
3573 0 : searchPositionIndex = searchPositionIndex + 16;
3574 0 : sad_calculation_8x8_16x16(
3575 : src_ptr + blockIndex,
3576 : src_stride,
3577 : refPtr + searchPositionIndex,
3578 : reflumaStride,
3579 : &p_best_sad8x8[12],
3580 : &p_best_sad16x16[3],
3581 : &p_best_mv8x8[12],
3582 : &p_best_mv16x16[3],
3583 : currMV,
3584 : &p_sad16x16[3],
3585 : sub_sad);
3586 : //---- 16x16 : 6
3587 0 : blockIndex = blockIndex + 16;
3588 0 : searchPositionIndex = searchPositionIndex + 16;
3589 0 : sad_calculation_8x8_16x16(
3590 : src_ptr + blockIndex,
3591 : src_stride,
3592 : refPtr + searchPositionIndex,
3593 : reflumaStride,
3594 : &p_best_sad8x8[24],
3595 : &p_best_sad16x16[6],
3596 : &p_best_mv8x8[24],
3597 : &p_best_mv16x16[6],
3598 : currMV,
3599 : &p_sad16x16[6],
3600 : sub_sad);
3601 : //---- 16x16 : 7
3602 0 : blockIndex = blockIndex + 16;
3603 0 : searchPositionIndex = searchPositionIndex + 16;
3604 0 : sad_calculation_8x8_16x16(
3605 : src_ptr + blockIndex,
3606 : src_stride,
3607 : refPtr + searchPositionIndex,
3608 : reflumaStride,
3609 : &p_best_sad8x8[28],
3610 : &p_best_sad16x16[7],
3611 : &p_best_mv8x8[28],
3612 : &p_best_mv16x16[7],
3613 : currMV,
3614 : &p_sad16x16[7],
3615 : sub_sad);
3616 :
3617 : //---- 16x16 : 8
3618 0 : blockIndex = (srcNext16x16Offset << 1);
3619 0 : searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset << 1);
3620 0 : sad_calculation_8x8_16x16(
3621 : src_ptr + blockIndex,
3622 : src_stride,
3623 : refPtr + searchPositionIndex,
3624 : reflumaStride,
3625 : &p_best_sad8x8[32],
3626 : &p_best_sad16x16[8],
3627 : &p_best_mv8x8[32],
3628 : &p_best_mv16x16[8],
3629 : currMV,
3630 : &p_sad16x16[8],
3631 : sub_sad);
3632 : //---- 16x16 : 9
3633 0 : blockIndex = blockIndex + 16;
3634 0 : searchPositionIndex = searchPositionIndex + 16;
3635 0 : sad_calculation_8x8_16x16(
3636 : src_ptr + blockIndex,
3637 : src_stride,
3638 : refPtr + searchPositionIndex,
3639 : reflumaStride,
3640 : &p_best_sad8x8[36],
3641 : &p_best_sad16x16[9],
3642 : &p_best_mv8x8[36],
3643 : &p_best_mv16x16[9],
3644 : currMV,
3645 : &p_sad16x16[9],
3646 : sub_sad);
3647 : //---- 16x16 : 12
3648 0 : blockIndex = blockIndex + 16;
3649 0 : searchPositionIndex = searchPositionIndex + 16;
3650 0 : sad_calculation_8x8_16x16(
3651 : src_ptr + blockIndex,
3652 : src_stride,
3653 : refPtr + searchPositionIndex,
3654 : reflumaStride,
3655 : &p_best_sad8x8[48],
3656 : &p_best_sad16x16[12],
3657 : &p_best_mv8x8[48],
3658 : &p_best_mv16x16[12],
3659 : currMV,
3660 : &p_sad16x16[12],
3661 : sub_sad);
3662 : //---- 16x16 : 13
3663 0 : blockIndex = blockIndex + 16;
3664 0 : searchPositionIndex = searchPositionIndex + 16;
3665 0 : sad_calculation_8x8_16x16(
3666 : src_ptr + blockIndex,
3667 : src_stride,
3668 : refPtr + searchPositionIndex,
3669 : reflumaStride,
3670 : &p_best_sad8x8[52],
3671 : &p_best_sad16x16[13],
3672 : &p_best_mv8x8[52],
3673 : &p_best_mv16x16[13],
3674 : currMV,
3675 : &p_sad16x16[13],
3676 : sub_sad);
3677 :
3678 : //---- 16x16 : 10
3679 0 : blockIndex = (srcNext16x16Offset * 3);
3680 0 : searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset * 3);
3681 0 : sad_calculation_8x8_16x16(
3682 : src_ptr + blockIndex,
3683 : src_stride,
3684 : refPtr + searchPositionIndex,
3685 : reflumaStride,
3686 : &p_best_sad8x8[40],
3687 : &p_best_sad16x16[10],
3688 : &p_best_mv8x8[40],
3689 : &p_best_mv16x16[10],
3690 : currMV,
3691 : &p_sad16x16[10],
3692 : sub_sad);
3693 : //---- 16x16 : 11
3694 0 : blockIndex = blockIndex + 16;
3695 0 : searchPositionIndex = searchPositionIndex + 16;
3696 0 : sad_calculation_8x8_16x16(
3697 : src_ptr + blockIndex,
3698 : src_stride,
3699 : refPtr + searchPositionIndex,
3700 : reflumaStride,
3701 : &p_best_sad8x8[44],
3702 : &p_best_sad16x16[11],
3703 : &p_best_mv8x8[44],
3704 : &p_best_mv16x16[11],
3705 : currMV,
3706 : &p_sad16x16[11],
3707 : sub_sad);
3708 : //---- 16x16 : 14
3709 0 : blockIndex = blockIndex + 16;
3710 0 : searchPositionIndex = searchPositionIndex + 16;
3711 0 : sad_calculation_8x8_16x16(
3712 : src_ptr + blockIndex,
3713 : src_stride,
3714 : refPtr + searchPositionIndex,
3715 : reflumaStride,
3716 : &p_best_sad8x8[56],
3717 : &p_best_sad16x16[14],
3718 : &p_best_mv8x8[56],
3719 : &p_best_mv16x16[14],
3720 : currMV,
3721 : &p_sad16x16[14],
3722 : sub_sad);
3723 : //---- 16x16 : 15
3724 0 : blockIndex = blockIndex + 16;
3725 0 : searchPositionIndex = searchPositionIndex + 16;
3726 0 : sad_calculation_8x8_16x16(
3727 : src_ptr + blockIndex,
3728 : src_stride,
3729 : refPtr + searchPositionIndex,
3730 : reflumaStride,
3731 : &p_best_sad8x8[60],
3732 : &p_best_sad16x16[15],
3733 : &p_best_mv8x8[60],
3734 : &p_best_mv16x16[15],
3735 : currMV,
3736 : &p_sad16x16[15],
3737 : sub_sad);
3738 :
3739 0 : sad_calculation_32x32_64x64(p_sad16x16,
3740 : p_best_sad32x32,
3741 : p_best_sad64x64,
3742 : p_best_mv32x32,
3743 : p_best_mv64x64,
3744 : currMV);
3745 0 : }
3746 :
3747 : /*******************************************
3748 : * GetEightHorizontalSearchPointResultsAll85CUs
3749 : *******************************************/
3750 0 : static void GetEightHorizontalSearchPointResultsAll85PUs(
3751 : MeContext *context_ptr, uint32_t listIndex,
3752 : uint32_t ref_pic_index,
3753 : uint32_t searchRegionIndex,
3754 : int32_t xSearchIndex, // input parameter, search region position in the
3755 : // horizontal direction, used to derive xMV
3756 : int32_t ySearchIndex) { // input parameter, search region position in the
3757 : // vertical direction, used to derive yMV
3758 0 : const EbBool sub_sad = (context_ptr->me_search_method == SUB_SAD_SEARCH);
3759 0 : uint8_t *src_ptr = context_ptr->sb_src_ptr;
3760 0 : uint8_t *refPtr =
3761 0 : context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] +
3762 0 : (ME_FILTER_TAP >> 1) +
3763 0 : ((ME_FILTER_TAP >> 1) *
3764 0 : context_ptr->interpolated_full_stride[listIndex][ref_pic_index]);
3765 0 : uint32_t reflumaStride =
3766 : context_ptr->interpolated_full_stride[listIndex][ref_pic_index];
3767 :
3768 0 : uint32_t searchPositionTLIndex = searchRegionIndex;
3769 : uint32_t searchPositionIndex;
3770 : uint32_t blockIndex;
3771 :
3772 0 : uint32_t srcNext16x16Offset = (BLOCK_SIZE_64 << 4);
3773 0 : uint32_t refNext16x16Offset = (reflumaStride << 4);
3774 :
3775 0 : uint32_t currMVy = (((uint16_t)ySearchIndex) << 18);
3776 0 : uint16_t currMVx = (((uint16_t)xSearchIndex << 2));
3777 0 : uint32_t currMV = currMVy | currMVx;
3778 :
3779 0 : uint32_t *p_best_sad8x8 = context_ptr->p_best_sad8x8;
3780 0 : uint32_t *p_best_sad16x16 = context_ptr->p_best_sad16x16;
3781 0 : uint32_t *p_best_sad32x32 = context_ptr->p_best_sad32x32;
3782 0 : uint32_t *p_best_sad64x64 = context_ptr->p_best_sad64x64;
3783 :
3784 0 : uint32_t *p_best_mv8x8 = context_ptr->p_best_mv8x8;
3785 0 : uint32_t *p_best_mv16x16 = context_ptr->p_best_mv16x16;
3786 0 : uint32_t *p_best_mv32x32 = context_ptr->p_best_mv32x32;
3787 0 : uint32_t *p_best_mv64x64 = context_ptr->p_best_mv64x64;
3788 :
3789 0 : uint16_t *p_sad16x16 = context_ptr->p_eight_pos_sad16x16;
3790 :
3791 : /*
3792 : ---------------------- ----------------------
3793 : | 16x16_0 | 16x16_1 | 16x16_4 | 16x16_5 |
3794 : ---------------------- ----------------------
3795 : | 16x16_2 | 16x16_3 | 16x16_6 | 16x16_7 |
3796 : ----------------------- -----------------------
3797 : | 16x16_8 | 16x16_9 | 16x16_12 | 16x16_13 |
3798 : ---------------------- ----------------------
3799 : | 16x16_10 | 16x16_11 | 16x16_14 | 16x16_15 |
3800 : ----------------------- -----------------------
3801 : */
3802 :
3803 0 : const uint32_t src_stride = context_ptr->sb_src_stride;
3804 0 : srcNext16x16Offset = src_stride << 4;
3805 :
3806 : //---- 16x16_0
3807 0 : blockIndex = 0;
3808 0 : searchPositionIndex = searchPositionTLIndex;
3809 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3810 : src_ptr + blockIndex,
3811 : context_ptr->sb_src_stride,
3812 : refPtr + searchPositionIndex,
3813 : reflumaStride,
3814 : &p_best_sad8x8[0],
3815 : &p_best_mv8x8[0],
3816 : &p_best_sad16x16[0],
3817 : &p_best_mv16x16[0],
3818 : currMV,
3819 : &p_sad16x16[0 * 8],
3820 : sub_sad);
3821 : //---- 16x16_1
3822 0 : blockIndex = blockIndex + 16;
3823 0 : searchPositionIndex = searchPositionTLIndex + 16;
3824 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3825 : src_ptr + blockIndex,
3826 : context_ptr->sb_src_stride,
3827 : refPtr + searchPositionIndex,
3828 : reflumaStride,
3829 : &p_best_sad8x8[4],
3830 : &p_best_mv8x8[4],
3831 : &p_best_sad16x16[1],
3832 : &p_best_mv16x16[1],
3833 : currMV,
3834 : &p_sad16x16[1 * 8],
3835 : sub_sad);
3836 : //---- 16x16_4
3837 0 : blockIndex = blockIndex + 16;
3838 0 : searchPositionIndex = searchPositionIndex + 16;
3839 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3840 : src_ptr + blockIndex,
3841 : context_ptr->sb_src_stride,
3842 : refPtr + searchPositionIndex,
3843 : reflumaStride,
3844 : &p_best_sad8x8[16],
3845 : &p_best_mv8x8[16],
3846 : &p_best_sad16x16[4],
3847 : &p_best_mv16x16[4],
3848 : currMV,
3849 : &p_sad16x16[4 * 8],
3850 : sub_sad);
3851 : //---- 16x16_5
3852 0 : blockIndex = blockIndex + 16;
3853 0 : searchPositionIndex = searchPositionIndex + 16;
3854 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3855 : src_ptr + blockIndex,
3856 : context_ptr->sb_src_stride,
3857 : refPtr + searchPositionIndex,
3858 : reflumaStride,
3859 : &p_best_sad8x8[20],
3860 : &p_best_mv8x8[20],
3861 : &p_best_sad16x16[5],
3862 : &p_best_mv16x16[5],
3863 : currMV,
3864 : &p_sad16x16[5 * 8],
3865 : sub_sad);
3866 :
3867 : //---- 16x16_2
3868 0 : blockIndex = srcNext16x16Offset;
3869 0 : searchPositionIndex = searchPositionTLIndex + refNext16x16Offset;
3870 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3871 : src_ptr + blockIndex,
3872 : context_ptr->sb_src_stride,
3873 : refPtr + searchPositionIndex,
3874 : reflumaStride,
3875 : &p_best_sad8x8[8],
3876 : &p_best_mv8x8[8],
3877 : &p_best_sad16x16[2],
3878 : &p_best_mv16x16[2],
3879 : currMV,
3880 : &p_sad16x16[2 * 8],
3881 : sub_sad);
3882 : //---- 16x16_3
3883 0 : blockIndex = blockIndex + 16;
3884 0 : searchPositionIndex = searchPositionIndex + 16;
3885 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3886 : src_ptr + blockIndex,
3887 : context_ptr->sb_src_stride,
3888 : refPtr + searchPositionIndex,
3889 : reflumaStride,
3890 : &p_best_sad8x8[12],
3891 : &p_best_mv8x8[12],
3892 : &p_best_sad16x16[3],
3893 : &p_best_mv16x16[3],
3894 : currMV,
3895 : &p_sad16x16[3 * 8],
3896 : sub_sad);
3897 : //---- 16x16_6
3898 0 : blockIndex = blockIndex + 16;
3899 0 : searchPositionIndex = searchPositionIndex + 16;
3900 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3901 : src_ptr + blockIndex,
3902 : context_ptr->sb_src_stride,
3903 : refPtr + searchPositionIndex,
3904 : reflumaStride,
3905 : &p_best_sad8x8[24],
3906 : &p_best_mv8x8[24],
3907 : &p_best_sad16x16[6],
3908 : &p_best_mv16x16[6],
3909 : currMV,
3910 : &p_sad16x16[6 * 8],
3911 : sub_sad);
3912 : //---- 16x16_7
3913 0 : blockIndex = blockIndex + 16;
3914 0 : searchPositionIndex = searchPositionIndex + 16;
3915 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3916 : src_ptr + blockIndex,
3917 : context_ptr->sb_src_stride,
3918 : refPtr + searchPositionIndex,
3919 : reflumaStride,
3920 : &p_best_sad8x8[28],
3921 : &p_best_mv8x8[28],
3922 : &p_best_sad16x16[7],
3923 : &p_best_mv16x16[7],
3924 : currMV,
3925 : &p_sad16x16[7 * 8],
3926 : sub_sad);
3927 :
3928 : //---- 16x16_8
3929 0 : blockIndex = (srcNext16x16Offset << 1);
3930 0 : searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset << 1);
3931 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3932 : src_ptr + blockIndex,
3933 : context_ptr->sb_src_stride,
3934 : refPtr + searchPositionIndex,
3935 : reflumaStride,
3936 : &p_best_sad8x8[32],
3937 : &p_best_mv8x8[32],
3938 : &p_best_sad16x16[8],
3939 : &p_best_mv16x16[8],
3940 : currMV,
3941 : &p_sad16x16[8 * 8],
3942 : sub_sad);
3943 : //---- 16x16_9
3944 0 : blockIndex = blockIndex + 16;
3945 0 : searchPositionIndex = searchPositionIndex + 16;
3946 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3947 : src_ptr + blockIndex,
3948 : context_ptr->sb_src_stride,
3949 : refPtr + searchPositionIndex,
3950 : reflumaStride,
3951 : &p_best_sad8x8[36],
3952 : &p_best_mv8x8[36],
3953 : &p_best_sad16x16[9],
3954 : &p_best_mv16x16[9],
3955 : currMV,
3956 : &p_sad16x16[9 * 8],
3957 : sub_sad);
3958 : //---- 16x16_12
3959 0 : blockIndex = blockIndex + 16;
3960 0 : searchPositionIndex = searchPositionIndex + 16;
3961 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3962 : src_ptr + blockIndex,
3963 : context_ptr->sb_src_stride,
3964 : refPtr + searchPositionIndex,
3965 : reflumaStride,
3966 : &p_best_sad8x8[48],
3967 : &p_best_mv8x8[48],
3968 : &p_best_sad16x16[12],
3969 : &p_best_mv16x16[12],
3970 : currMV,
3971 : &p_sad16x16[12 * 8],
3972 : sub_sad);
3973 : //---- 16x1_13
3974 0 : blockIndex = blockIndex + 16;
3975 0 : searchPositionIndex = searchPositionIndex + 16;
3976 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3977 : src_ptr + blockIndex,
3978 : context_ptr->sb_src_stride,
3979 : refPtr + searchPositionIndex,
3980 : reflumaStride,
3981 : &p_best_sad8x8[52],
3982 : &p_best_mv8x8[52],
3983 : &p_best_sad16x16[13],
3984 : &p_best_mv16x16[13],
3985 : currMV,
3986 : &p_sad16x16[13 * 8],
3987 : sub_sad);
3988 :
3989 : //---- 16x16_10
3990 0 : blockIndex = (srcNext16x16Offset * 3);
3991 0 : searchPositionIndex = searchPositionTLIndex + (refNext16x16Offset * 3);
3992 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
3993 : src_ptr + blockIndex,
3994 : context_ptr->sb_src_stride,
3995 : refPtr + searchPositionIndex,
3996 : reflumaStride,
3997 : &p_best_sad8x8[40],
3998 : &p_best_mv8x8[40],
3999 : &p_best_sad16x16[10],
4000 : &p_best_mv16x16[10],
4001 : currMV,
4002 : &p_sad16x16[10 * 8],
4003 : sub_sad);
4004 : //---- 16x16_11
4005 0 : blockIndex = blockIndex + 16;
4006 0 : searchPositionIndex = searchPositionIndex + 16;
4007 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
4008 : src_ptr + blockIndex,
4009 : context_ptr->sb_src_stride,
4010 : refPtr + searchPositionIndex,
4011 : reflumaStride,
4012 : &p_best_sad8x8[44],
4013 : &p_best_mv8x8[44],
4014 : &p_best_sad16x16[11],
4015 : &p_best_mv16x16[11],
4016 : currMV,
4017 : &p_sad16x16[11 * 8],
4018 : sub_sad);
4019 : //---- 16x16_14
4020 0 : blockIndex = blockIndex + 16;
4021 0 : searchPositionIndex = searchPositionIndex + 16;
4022 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
4023 : src_ptr + blockIndex,
4024 : context_ptr->sb_src_stride,
4025 : refPtr + searchPositionIndex,
4026 : reflumaStride,
4027 : &p_best_sad8x8[56],
4028 : &p_best_mv8x8[56],
4029 : &p_best_sad16x16[14],
4030 : &p_best_mv16x16[14],
4031 : currMV,
4032 : &p_sad16x16[14 * 8],
4033 : sub_sad);
4034 : //---- 16x16_15
4035 0 : blockIndex = blockIndex + 16;
4036 0 : searchPositionIndex = searchPositionIndex + 16;
4037 0 : get_eight_horizontal_search_point_results_8x8_16x16_pu(
4038 : src_ptr + blockIndex,
4039 : context_ptr->sb_src_stride,
4040 : refPtr + searchPositionIndex,
4041 : reflumaStride,
4042 : &p_best_sad8x8[60],
4043 : &p_best_mv8x8[60],
4044 : &p_best_sad16x16[15],
4045 : &p_best_mv16x16[15],
4046 : currMV,
4047 : &p_sad16x16[15 * 8],
4048 : sub_sad);
4049 : // 32x32 and 64x64
4050 0 : get_eight_horizontal_search_point_results_32x32_64x64_pu(
4051 : p_sad16x16,
4052 : p_best_sad32x32,
4053 : p_best_sad64x64,
4054 : p_best_mv32x32,
4055 : p_best_mv64x64,
4056 : currMV);
4057 0 : }
4058 :
4059 : /*******************************************
4060 : * FullPelSearch_LCU
4061 : *******************************************/
4062 0 : static void FullPelSearch_LCU(MeContext *context_ptr, uint32_t listIndex,
4063 : uint32_t ref_pic_index,
4064 : int16_t x_search_area_origin,
4065 : int16_t y_search_area_origin,
4066 : uint32_t search_area_width,
4067 : uint32_t search_area_height)
4068 : {
4069 : uint32_t xSearchIndex, ySearchIndex;
4070 :
4071 0 : uint32_t searchAreaWidthRest8 = search_area_width & 7;
4072 0 : uint32_t searchAreaWidthMult8 = search_area_width - searchAreaWidthRest8;
4073 :
4074 0 : for (ySearchIndex = 0; ySearchIndex < search_area_height; ySearchIndex++) {
4075 0 : for (xSearchIndex = 0; xSearchIndex < searchAreaWidthMult8;
4076 0 : xSearchIndex += 8) {
4077 : // this function will do: xSearchIndex, +1, +2, ..., +7
4078 0 : GetEightHorizontalSearchPointResultsAll85PUs(
4079 : context_ptr,
4080 : listIndex,
4081 : ref_pic_index,
4082 : xSearchIndex +
4083 0 : ySearchIndex *
4084 : context_ptr->interpolated_full_stride[listIndex]
4085 0 : [ref_pic_index],
4086 0 : (int32_t)xSearchIndex + x_search_area_origin,
4087 0 : (int32_t)ySearchIndex + y_search_area_origin);
4088 : }
4089 :
4090 0 : for (xSearchIndex = searchAreaWidthMult8;
4091 : xSearchIndex < search_area_width;
4092 0 : xSearchIndex++) {
4093 0 : GetSearchPointResults(
4094 : context_ptr,
4095 : listIndex,
4096 : ref_pic_index,
4097 : xSearchIndex +
4098 0 : ySearchIndex *
4099 : context_ptr->interpolated_full_stride[listIndex]
4100 0 : [ref_pic_index],
4101 0 : (int32_t)xSearchIndex + x_search_area_origin,
4102 0 : (int32_t)ySearchIndex + y_search_area_origin);
4103 : }
4104 : }
4105 0 : }
4106 : #if OPTIMISED_EX_SUBPEL
4107 : /*******************************************
4108 : * PU_HalfPelRefinement
4109 : * performs Half Pel refinement for one PU
4110 : *******************************************/
4111 0 : static void half_pel_refinement_block(
4112 : MeContext
4113 : *context_ptr, // input parameter, ME context Ptr, used to get SB Ptr
4114 : uint8_t *ref_buffer, uint32_t ref_stride, uint32_t *p_best_ssd,
4115 : uint32_t src_block_index, // input parameter, PU origin, used to point to
4116 : // source samples
4117 : uint8_t *pos_b_buffer, // input parameter, position "b" interpolated search
4118 : // area Ptr
4119 : uint8_t *pos_h_buffer, // input parameter, position "h" interpolated search
4120 : // area Ptr
4121 : uint8_t *pos_j_buffer, // input parameter, position "j" interpolated search
4122 : // area Ptr
4123 : uint32_t pu_width, // input parameter, PU width
4124 : uint32_t pu_height, // input parameter, PU height
4125 : int16_t x_search_area_origin, // input parameter, search area origin in the
4126 : // horizontal direction, used to point to
4127 : // reference samples
4128 : int16_t y_search_area_origin, // input parameter, search area origin in the
4129 : // vertical direction, used to point to
4130 : // reference samples
4131 : #if OPTIMISED_EX_SUBPEL
4132 : uint32_t search_area_height, // input parameter, search area height
4133 : uint32_t search_area_width, // input parameter, search area width
4134 : #endif
4135 : uint32_t *p_best_sad, uint32_t *p_best_mv,
4136 : uint8_t *p_sub_pel_direction, uint32_t *best_pervious_stage_mv,
4137 : uint32_t ineteger_mv) {
4138 : int32_t search_region_index;
4139 0 : uint64_t distortion_left_position = 0;
4140 0 : uint64_t distortion_top_position = 0;
4141 0 : uint64_t distortion_topleft_position = 0;
4142 0 : uint64_t distortion_topright_position = 0;
4143 : int16_t half_mv_x[8];
4144 : int16_t half_mv_y[8];
4145 : int16_t x_best_mv;
4146 : int16_t y_best_mv;
4147 : int16_t x_mv;
4148 : int16_t y_mv;
4149 : int16_t search_index_x;
4150 : int16_t search_index_y;
4151 : (void)p_sub_pel_direction;
4152 : (void)ineteger_mv;
4153 : // copute distance between best mv and the integer mv candidate
4154 : int16_t offset_x, offset_y;
4155 0 : for (offset_x = -H_PEL_SEARCH_WIND; offset_x <= H_PEL_SEARCH_WIND; offset_x++) {
4156 0 : for (offset_y = -H_PEL_SEARCH_WIND; offset_y <= H_PEL_SEARCH_WIND; offset_y++) {
4157 0 : x_best_mv = _MVXT(*best_pervious_stage_mv);
4158 0 : y_best_mv = _MVYT(*best_pervious_stage_mv);
4159 0 : x_mv = x_best_mv + (offset_x * 4);
4160 0 : y_mv = y_best_mv + (offset_y * 4);
4161 0 : search_index_x = (x_mv >> 2) - x_search_area_origin;
4162 0 : search_index_y = (y_mv >> 2) - y_search_area_origin;
4163 0 : uint32_t integer_mv1 = (((uint16_t)(y_mv >> 2)) << 18);
4164 0 : uint16_t integer_mv2 = (((uint16_t)(x_mv >> 2) << 2));
4165 0 : uint32_t integer_mv = integer_mv1 | integer_mv2;
4166 0 : if (search_index_x < 0 || search_index_x >(int16_t)(search_area_width - 1)) {
4167 0 : continue;
4168 : }
4169 0 : if (search_index_y < 0 || search_index_y >(int16_t)(search_area_height - 1)) {
4170 0 : continue;
4171 : }
4172 0 : half_mv_x[0] = x_mv - 2; // L position
4173 0 : half_mv_x[1] = x_mv + 2; // R position
4174 0 : half_mv_x[2] = x_mv; // T position
4175 0 : half_mv_x[3] = x_mv; // B position
4176 0 : half_mv_x[4] = x_mv - 2; // TL position
4177 0 : half_mv_x[5] = x_mv + 2; // TR position
4178 0 : half_mv_x[6] = x_mv + 2; // BR position
4179 0 : half_mv_x[7] = x_mv - 2; // BL position
4180 0 : half_mv_y[0] = y_mv; // L position
4181 0 : half_mv_y[1] = y_mv; // R position
4182 0 : half_mv_y[2] = y_mv - 2; // T position
4183 0 : half_mv_y[3] = y_mv + 2; // B position
4184 0 : half_mv_y[4] = y_mv - 2; // TL position
4185 0 : half_mv_y[5] = y_mv - 2; // TR position
4186 0 : half_mv_y[6] = y_mv + 2; // BR position
4187 0 : half_mv_y[7] = y_mv + 2; // BL position
4188 : // Compute SSD for the best full search candidate
4189 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4190 0 : uint32_t integer_sse =
4191 0 : (uint32_t)spatial_full_distortion_kernel(
4192 : context_ptr->sb_src_ptr,
4193 : src_block_index,
4194 : context_ptr->sb_src_stride,
4195 : ref_buffer,
4196 0 : search_index_y * ref_stride + search_index_x,
4197 : ref_stride,
4198 : pu_width,
4199 : pu_height);
4200 0 : if (integer_sse < *p_best_ssd) {
4201 0 : *p_best_ssd = integer_sse;
4202 0 : *p_best_mv = integer_mv;
4203 : }
4204 : }
4205 : // L position
4206 0 : search_region_index =
4207 0 : search_index_x +
4208 0 : (int16_t)context_ptr->interpolated_stride * search_index_y;
4209 0 : if (context_ptr->fractional_search_method == SSD_SEARCH)
4210 0 : distortion_left_position = spatial_full_distortion_kernel(
4211 : context_ptr->sb_src_ptr,
4212 : src_block_index,
4213 : context_ptr->sb_src_stride,
4214 : pos_b_buffer,
4215 : search_region_index,
4216 : context_ptr->interpolated_stride,
4217 : pu_width,
4218 : pu_height);
4219 0 : else if (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4220 0 : distortion_left_position = (nxm_sad_kernel(
4221 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4222 0 : context_ptr->sb_src_stride << 1,
4223 0 : &(pos_b_buffer[search_region_index]),
4224 0 : context_ptr->interpolated_stride << 1,
4225 : pu_height >> 1,
4226 0 : pu_width)) << 1;
4227 : else
4228 0 : distortion_left_position = nxm_sad_kernel(
4229 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4230 : context_ptr->sb_src_stride,
4231 0 : &(pos_b_buffer[search_region_index]),
4232 : context_ptr->interpolated_stride,
4233 : pu_height,
4234 : pu_width);
4235 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4236 0 : if (distortion_left_position < *p_best_ssd) {
4237 0 : *p_best_sad = (uint32_t)
4238 0 : nxm_sad_kernel(
4239 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4240 : context_ptr->sb_src_stride,
4241 0 : &(pos_b_buffer[search_region_index]),
4242 : context_ptr->interpolated_stride,
4243 : pu_height,
4244 : pu_width);
4245 0 : *p_best_mv =
4246 0 : ((uint16_t)half_mv_y[0] << 16) | ((uint16_t)half_mv_x[0]);
4247 0 : *p_best_ssd = (uint32_t)distortion_left_position;
4248 : }
4249 : }
4250 : else {
4251 0 : if (distortion_left_position < *p_best_sad) {
4252 0 : *p_best_sad = (uint32_t)distortion_left_position;
4253 0 : *p_best_mv =
4254 0 : ((uint16_t)half_mv_y[0] << 16) | ((uint16_t)half_mv_x[0]);
4255 : }
4256 : }
4257 : // T position
4258 0 : search_region_index =
4259 0 : search_index_x +
4260 0 : (int16_t)context_ptr->interpolated_stride * search_index_y;
4261 0 : if (context_ptr->fractional_search_method == SSD_SEARCH)
4262 0 : distortion_top_position = spatial_full_distortion_kernel(
4263 : context_ptr->sb_src_ptr,
4264 : src_block_index,
4265 : context_ptr->sb_src_stride,
4266 : pos_h_buffer,
4267 : search_region_index,
4268 : context_ptr->interpolated_stride,
4269 : pu_width,
4270 : pu_height);
4271 0 : else if (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4272 0 : distortion_top_position = (nxm_sad_kernel(
4273 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4274 0 : context_ptr->sb_src_stride << 1,
4275 0 : &(pos_h_buffer[search_region_index]),
4276 0 : context_ptr->interpolated_stride << 1,
4277 : pu_height >> 1,
4278 0 : pu_width)) << 1;
4279 : else
4280 0 : distortion_top_position = nxm_sad_kernel(
4281 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4282 : context_ptr->sb_src_stride,
4283 0 : &(pos_h_buffer[search_region_index]),
4284 : context_ptr->interpolated_stride,
4285 : pu_height,
4286 : pu_width);
4287 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4288 0 : if (distortion_top_position < *p_best_ssd) {
4289 0 : *p_best_sad = (uint32_t)
4290 0 : nxm_sad_kernel(
4291 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4292 : context_ptr->sb_src_stride,
4293 0 : &(pos_h_buffer[search_region_index]),
4294 : context_ptr->interpolated_stride,
4295 : pu_height,
4296 : pu_width);
4297 0 : *p_best_mv =
4298 0 : ((uint16_t)half_mv_y[2] << 16) | ((uint16_t)half_mv_x[2]);
4299 0 : *p_best_ssd = (uint32_t)distortion_top_position;
4300 : }
4301 : }
4302 : else {
4303 0 : if (distortion_top_position < *p_best_sad) {
4304 0 : *p_best_sad = (uint32_t)distortion_top_position;
4305 0 : *p_best_mv =
4306 0 : ((uint16_t)half_mv_y[2] << 16) | ((uint16_t)half_mv_x[2]);
4307 : }
4308 : }
4309 : // TL position
4310 0 : search_region_index =
4311 0 : search_index_x +
4312 0 : (int16_t)context_ptr->interpolated_stride * search_index_y;
4313 0 : if (context_ptr->fractional_search_method == SSD_SEARCH)
4314 0 : distortion_topleft_position = spatial_full_distortion_kernel(
4315 : context_ptr->sb_src_ptr,
4316 : src_block_index,
4317 : context_ptr->sb_src_stride,
4318 : pos_j_buffer,
4319 : search_region_index,
4320 : context_ptr->interpolated_stride,
4321 : pu_width,
4322 : pu_height);
4323 0 : else if (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4324 0 : distortion_topleft_position = (nxm_sad_kernel(
4325 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4326 0 : context_ptr->sb_src_stride << 1,
4327 0 : &(pos_j_buffer[search_region_index]),
4328 0 : context_ptr->interpolated_stride << 1,
4329 : pu_height >> 1,
4330 0 : pu_width)) << 1;
4331 : else
4332 0 : distortion_topleft_position = nxm_sad_kernel(
4333 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4334 : context_ptr->sb_src_stride,
4335 0 : &(pos_j_buffer[search_region_index]),
4336 : context_ptr->interpolated_stride,
4337 : pu_height,
4338 : pu_width);
4339 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4340 0 : if (distortion_topleft_position < *p_best_ssd) {
4341 0 : *p_best_sad = (uint32_t)
4342 0 : nxm_sad_kernel(
4343 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4344 : context_ptr->sb_src_stride,
4345 0 : &(pos_j_buffer[search_region_index]),
4346 : context_ptr->interpolated_stride,
4347 : pu_height,
4348 : pu_width);
4349 0 : *p_best_mv =
4350 0 : ((uint16_t)half_mv_y[4] << 16) | ((uint16_t)half_mv_x[4]);
4351 0 : *p_best_ssd = (uint32_t)distortion_topleft_position;
4352 : }
4353 : }
4354 : else {
4355 0 : if (distortion_topleft_position < *p_best_sad) {
4356 0 : *p_best_sad = (uint32_t)distortion_topleft_position;
4357 0 : *p_best_mv =
4358 0 : ((uint16_t)half_mv_y[4] << 16) | ((uint16_t)half_mv_x[4]);
4359 : }
4360 : }
4361 : // TR position
4362 0 : search_region_index++;
4363 0 : if (context_ptr->fractional_search_method == SSD_SEARCH)
4364 0 : distortion_topright_position = spatial_full_distortion_kernel(
4365 : context_ptr->sb_src_ptr,
4366 : src_block_index,
4367 : context_ptr->sb_src_stride,
4368 : pos_j_buffer,
4369 : search_region_index,
4370 : context_ptr->interpolated_stride,
4371 : pu_width,
4372 : pu_height);
4373 0 : else if (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4374 0 : distortion_topright_position = (nxm_sad_kernel(
4375 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4376 0 : context_ptr->sb_src_stride << 1,
4377 0 : &(pos_j_buffer[search_region_index]),
4378 0 : context_ptr->interpolated_stride << 1,
4379 : pu_height >> 1,
4380 0 : pu_width)) << 1;
4381 : else
4382 0 : distortion_topright_position = nxm_sad_kernel(
4383 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4384 : context_ptr->sb_src_stride,
4385 0 : &(pos_j_buffer[search_region_index]),
4386 : context_ptr->interpolated_stride,
4387 : pu_height,
4388 : pu_width);
4389 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4390 0 : if (distortion_topright_position < *p_best_ssd) {
4391 0 : *p_best_sad = (uint32_t)
4392 0 : nxm_sad_kernel(
4393 0 : &(context_ptr->sb_src_ptr[src_block_index]),
4394 : context_ptr->sb_src_stride,
4395 0 : &(pos_j_buffer[search_region_index]),
4396 : context_ptr->interpolated_stride,
4397 : pu_height,
4398 : pu_width);
4399 0 : *p_best_mv =
4400 0 : ((uint16_t)half_mv_y[5] << 16) | ((uint16_t)half_mv_x[5]);
4401 0 : *p_best_ssd = (uint32_t)distortion_topright_position;
4402 : }
4403 : }
4404 : else {
4405 0 : if (distortion_topright_position < *p_best_sad) {
4406 0 : *p_best_sad = (uint32_t)distortion_topright_position;
4407 0 : *p_best_mv =
4408 0 : ((uint16_t)half_mv_y[5] << 16) | ((uint16_t)half_mv_x[5]);
4409 : }
4410 : }
4411 : }
4412 : }
4413 0 : return;
4414 : }
4415 : #else
4416 : /*******************************************
4417 : * PU_HalfPelRefinement
4418 : * performs Half Pel refinement for one PU
4419 : *******************************************/
4420 : static void half_pel_refinement_block(
4421 : MeContext
4422 : *context_ptr, // input parameter, ME context Ptr, used to get SB Ptr
4423 : uint8_t *ref_buffer, uint32_t ref_stride, uint32_t *p_best_ssd,
4424 : uint32_t src_block_index, // input parameter, PU origin, used to point to
4425 : // source samples
4426 : uint8_t *pos_b_buffer, // input parameter, position "b" interpolated search
4427 : // area Ptr
4428 : uint8_t *pos_h_buffer, // input parameter, position "h" interpolated search
4429 : // area Ptr
4430 : uint8_t *pos_j_buffer, // input parameter, position "j" interpolated search
4431 : // area Ptr
4432 : uint32_t pu_width, // input parameter, PU width
4433 : uint32_t pu_height, // input parameter, PU height
4434 : int16_t x_search_area_origin, // input parameter, search area origin in the
4435 : // horizontal direction, used to point to
4436 : // reference samples
4437 : int16_t y_search_area_origin, // input parameter, search area origin in the
4438 : // vertical direction, used to point to
4439 : // reference samples
4440 : uint32_t *p_best_sad, uint32_t *p_best_mv,
4441 : uint8_t *p_sub_pel_direction, uint32_t *best_pervious_stage_mv,
4442 : uint32_t ineteger_mv) {
4443 : int32_t search_region_index;
4444 : uint64_t distortion_left_position = 0;
4445 : uint64_t distortion_top_position = 0;
4446 : uint64_t distortion_topleft_position = 0;
4447 : uint64_t distortion_topright_position = 0;
4448 : int16_t half_mv_x[8];
4449 : int16_t half_mv_y[8];
4450 : // copute distance between best mv and the integer mv candidate
4451 : int16_t int_x_mv = _MVXT(ineteger_mv);
4452 : int16_t int_y_mv = _MVYT(ineteger_mv);
4453 : int16_t int_search_index_x = (int_x_mv >> 2) - x_search_area_origin;
4454 : int16_t int_search_index_y = (int_y_mv >> 2) - y_search_area_origin;
4455 : int16_t x_best_mv = _MVXT(*best_pervious_stage_mv);
4456 : int16_t y_best_mv = _MVYT(*best_pervious_stage_mv);
4457 : int16_t best_search_index_x = (x_best_mv >> 2) - x_search_area_origin;
4458 : int16_t best_search_index_y = (y_best_mv >> 2) - y_search_area_origin;
4459 : int16_t dis_x = ABS(int_search_index_x - best_search_index_x);
4460 : int16_t dis_y = ABS(int_search_index_y - best_search_index_y);
4461 : // Skip half pel if the integer candidate is not inside the desired window.
4462 : if ((dis_x) > H_PEL_SEARCH_WIND)
4463 : return;
4464 : if ((dis_y) > H_PEL_SEARCH_WIND)
4465 : return;
4466 : int16_t x_mv = _MVXT(ineteger_mv);
4467 : int16_t y_mv = _MVYT(ineteger_mv);
4468 : int16_t search_index_x = (x_mv >> 2) - x_search_area_origin;
4469 : int16_t search_index_y = (y_mv >> 2) - y_search_area_origin;
4470 : (void)p_sub_pel_direction;
4471 : half_mv_x[0] = x_mv - 2; // L position
4472 : half_mv_x[1] = x_mv + 2; // R position
4473 : half_mv_x[2] = x_mv; // T position
4474 : half_mv_x[3] = x_mv; // B position
4475 : half_mv_x[4] = x_mv - 2; // TL position
4476 : half_mv_x[5] = x_mv + 2; // TR position
4477 : half_mv_x[6] = x_mv + 2; // BR position
4478 : half_mv_x[7] = x_mv - 2; // BL position
4479 : half_mv_y[0] = y_mv; // L position
4480 : half_mv_y[1] = y_mv; // R position
4481 : half_mv_y[2] = y_mv - 2; // T position
4482 : half_mv_y[3] = y_mv + 2; // B position
4483 : half_mv_y[4] = y_mv - 2; // TL position
4484 : half_mv_y[5] = y_mv - 2; // TR position
4485 : half_mv_y[6] = y_mv + 2; // BR position
4486 : half_mv_y[7] = y_mv + 2; // BL position
4487 : // Compute SSD for the best full search candidate
4488 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4489 : uint32_t integer_sse =
4490 : (uint32_t)spatial_full_distortion_kernel(
4491 : context_ptr->sb_src_ptr,
4492 : src_block_index,
4493 : context_ptr->sb_src_stride,
4494 : ref_buffer,
4495 : search_index_y * ref_stride + search_index_x,
4496 : ref_stride,
4497 : pu_width,
4498 : pu_height);
4499 : if (integer_sse < *p_best_ssd) {
4500 : *p_best_ssd = integer_sse;
4501 : *p_best_mv = ineteger_mv;
4502 : }
4503 : }
4504 : // L position
4505 : search_region_index =
4506 : search_index_x +
4507 : (int16_t)context_ptr->interpolated_stride * search_index_y;
4508 : distortion_left_position =
4509 : (context_ptr->fractional_search_method == SSD_SEARCH)
4510 : ? spatial_full_distortion_kernel(
4511 : context_ptr->sb_src_ptr,
4512 : src_block_index,
4513 : context_ptr->sb_src_stride,
4514 : pos_b_buffer,
4515 : search_region_index,
4516 : context_ptr->interpolated_stride,
4517 : pu_width,
4518 : pu_height)
4519 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4520 : ? (nxm_sad_kernel(
4521 : &(context_ptr->sb_src_ptr[src_block_index]),
4522 : context_ptr->sb_src_stride << 1,
4523 : &(pos_b_buffer[search_region_index]),
4524 : context_ptr->interpolated_stride << 1,
4525 : pu_height >> 1,
4526 : pu_width))
4527 : << 1
4528 : : nxm_sad_kernel(
4529 : &(context_ptr->sb_src_ptr[src_block_index]),
4530 : context_ptr->sb_src_stride,
4531 : &(pos_b_buffer[search_region_index]),
4532 : context_ptr->interpolated_stride,
4533 : pu_height,
4534 : pu_width);
4535 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4536 : if (distortion_left_position < *p_best_ssd) {
4537 : *p_best_sad = (uint32_t)
4538 : nxm_sad_kernel(
4539 : &(context_ptr->sb_src_ptr[src_block_index]),
4540 : context_ptr->sb_src_stride,
4541 : &(pos_b_buffer[search_region_index]),
4542 : context_ptr->interpolated_stride,
4543 : pu_height,
4544 : pu_width);
4545 : *p_best_mv =
4546 : ((uint16_t)half_mv_y[0] << 16) | ((uint16_t)half_mv_x[0]);
4547 : *p_best_ssd = (uint32_t)distortion_left_position;
4548 : }
4549 : } else {
4550 : if (distortion_left_position < *p_best_sad) {
4551 : *p_best_sad = (uint32_t)distortion_left_position;
4552 : *p_best_mv =
4553 : ((uint16_t)half_mv_y[0] << 16) | ((uint16_t)half_mv_x[0]);
4554 : }
4555 : }
4556 : #if !HP_REF_OPT
4557 : // R position
4558 : search_region_index++;
4559 : distortion_right_position =
4560 : (context_ptr->fractional_search_method == SSD_SEARCH)
4561 : ? spatial_full_distortion_kernel(
4562 : &(context_ptr->sb_src_ptr[src_block_index]),
4563 : context_ptr->sb_src_stride,
4564 : &(pos_b_buffer[search_region_index]),
4565 : context_ptr->interpolated_stride,
4566 : pu_width,
4567 : pu_height)
4568 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4569 : ? (nxm_sad_kernel(
4570 : &(context_ptr->sb_src_ptr[src_block_index]),
4571 : context_ptr->sb_src_stride << 1,
4572 : &(pos_b_buffer[search_region_index]),
4573 : context_ptr->interpolated_stride << 1,
4574 : pu_height >> 1,
4575 : pu_width))
4576 : << 1
4577 : : nxm_sad_kernel(
4578 : &(context_ptr->sb_src_ptr[src_block_index]),
4579 : context_ptr->sb_src_stride,
4580 : &(pos_b_buffer[search_region_index]),
4581 : context_ptr->interpolated_stride,
4582 : pu_height,
4583 : pu_width);
4584 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4585 : if (distortion_right_position < *p_best_ssd) {
4586 : *p_best_sad = (uint32_t)
4587 : nxm_sad_kernel(
4588 : &(context_ptr->sb_src_ptr[src_block_index]),
4589 : context_ptr->sb_src_stride,
4590 : &(pos_b_buffer[search_region_index]),
4591 : context_ptr->interpolated_stride,
4592 : pu_height,
4593 : pu_width);
4594 : *p_best_mv =
4595 : ((uint16_t)half_mv_y[1] << 16) | ((uint16_t)half_mv_x[1]);
4596 : *p_best_ssd = (uint32_t)distortion_right_position;
4597 : }
4598 : } else {
4599 : if (distortion_right_position < *p_best_sad) {
4600 : *p_best_sad = (uint32_t)distortion_right_position;
4601 : *p_best_mv =
4602 : ((uint16_t)half_mv_y[1] << 16) | ((uint16_t)half_mv_x[1]);
4603 : }
4604 : }
4605 : #endif
4606 : // T position
4607 : search_region_index =
4608 : search_index_x +
4609 : (int16_t)context_ptr->interpolated_stride * search_index_y;
4610 : distortion_top_position =
4611 : (context_ptr->fractional_search_method == SSD_SEARCH)
4612 : ? spatial_full_distortion_kernel(
4613 : context_ptr->sb_src_ptr,
4614 : src_block_index,
4615 : context_ptr->sb_src_stride,
4616 : pos_h_buffer,
4617 : search_region_index,
4618 : context_ptr->interpolated_stride,
4619 : pu_width,
4620 : pu_height)
4621 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4622 : ? (nxm_sad_kernel(
4623 : &(context_ptr->sb_src_ptr[src_block_index]),
4624 : context_ptr->sb_src_stride << 1,
4625 : &(pos_h_buffer[search_region_index]),
4626 : context_ptr->interpolated_stride << 1,
4627 : pu_height >> 1,
4628 : pu_width))
4629 : << 1
4630 : : nxm_sad_kernel(
4631 : &(context_ptr->sb_src_ptr[src_block_index]),
4632 : context_ptr->sb_src_stride,
4633 : &(pos_h_buffer[search_region_index]),
4634 : context_ptr->interpolated_stride,
4635 : pu_height,
4636 : pu_width);
4637 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4638 : if (distortion_top_position < *p_best_ssd) {
4639 : *p_best_sad = (uint32_t)
4640 : nxm_sad_kernel(
4641 : &(context_ptr->sb_src_ptr[src_block_index]),
4642 : context_ptr->sb_src_stride,
4643 : &(pos_h_buffer[search_region_index]),
4644 : context_ptr->interpolated_stride,
4645 : pu_height,
4646 : pu_width);
4647 : *p_best_mv =
4648 : ((uint16_t)half_mv_y[2] << 16) | ((uint16_t)half_mv_x[2]);
4649 : *p_best_ssd = (uint32_t)distortion_top_position;
4650 : }
4651 : } else {
4652 : if (distortion_top_position < *p_best_sad) {
4653 : *p_best_sad = (uint32_t)distortion_top_position;
4654 : *p_best_mv =
4655 : ((uint16_t)half_mv_y[2] << 16) | ((uint16_t)half_mv_x[2]);
4656 : }
4657 : }
4658 : #if !HP_REF_OPT
4659 : // B position
4660 : search_region_index += (int16_t)context_ptr->interpolated_stride;
4661 : distortion_bottom_position =
4662 : (context_ptr->fractional_search_method == SSD_SEARCH)
4663 : ? spatial_full_distortion_kernel(
4664 : &(context_ptr->sb_src_ptr[src_block_index]),
4665 : context_ptr->sb_src_stride,
4666 : &(pos_h_buffer[search_region_index]),
4667 : context_ptr->interpolated_stride,
4668 : pu_width,
4669 : pu_height)
4670 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4671 : ? (nxm_sad_kernel(
4672 : &(context_ptr->sb_src_ptr[src_block_index]),
4673 : context_ptr->sb_src_stride << 1,
4674 : &(pos_h_buffer[search_region_index]),
4675 : context_ptr->interpolated_stride << 1,
4676 : pu_height >> 1,
4677 : pu_width))
4678 : << 1
4679 : : nxm_sad_kernel(
4680 : &(context_ptr->sb_src_ptr[src_block_index]),
4681 : context_ptr->sb_src_stride,
4682 : &(pos_h_buffer[search_region_index]),
4683 : context_ptr->interpolated_stride,
4684 : pu_height,
4685 : pu_width);
4686 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4687 : if (distortion_bottom_position < *p_best_ssd) {
4688 : *p_best_sad = (uint32_t)
4689 : nxm_sad_kernel(
4690 : &(context_ptr->sb_src_ptr[src_block_index]),
4691 : context_ptr->sb_src_stride,
4692 : &(pos_h_buffer[search_region_index]),
4693 : context_ptr->interpolated_stride,
4694 : pu_height,
4695 : pu_width);
4696 : *p_best_mv =
4697 : ((uint16_t)half_mv_y[3] << 16) | ((uint16_t)half_mv_x[3]);
4698 : *p_best_ssd = (uint32_t)distortion_bottom_position;
4699 : }
4700 : } else {
4701 : if (distortion_bottom_position < *p_best_sad) {
4702 : *p_best_sad = (uint32_t)distortionBottomPosition;
4703 : *p_best_mv =
4704 : ((uint16_t)half_mv_y[3] << 16) | ((uint16_t)half_mv_x[3]);
4705 : }
4706 : }
4707 : #endif
4708 : // TL position
4709 : search_region_index =
4710 : search_index_x +
4711 : (int16_t)context_ptr->interpolated_stride * search_index_y;
4712 : distortion_topleft_position =
4713 : (context_ptr->fractional_search_method == SSD_SEARCH)
4714 : ? spatial_full_distortion_kernel(
4715 : context_ptr->sb_src_ptr,
4716 : src_block_index,
4717 : context_ptr->sb_src_stride,
4718 : pos_j_buffer,
4719 : search_region_index,
4720 : context_ptr->interpolated_stride,
4721 : pu_width,
4722 : pu_height)
4723 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4724 : ? (nxm_sad_kernel(
4725 : &(context_ptr->sb_src_ptr[src_block_index]),
4726 : context_ptr->sb_src_stride << 1,
4727 : &(pos_j_buffer[search_region_index]),
4728 : context_ptr->interpolated_stride << 1,
4729 : pu_height >> 1,
4730 : pu_width))
4731 : << 1
4732 : : nxm_sad_kernel(
4733 : &(context_ptr->sb_src_ptr[src_block_index]),
4734 : context_ptr->sb_src_stride,
4735 : &(pos_j_buffer[search_region_index]),
4736 : context_ptr->interpolated_stride,
4737 : pu_height,
4738 : pu_width);
4739 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4740 : if (distortion_topleft_position < *p_best_ssd) {
4741 : *p_best_sad = (uint32_t)
4742 : nxm_sad_kernel(
4743 : &(context_ptr->sb_src_ptr[src_block_index]),
4744 : context_ptr->sb_src_stride,
4745 : &(pos_j_buffer[search_region_index]),
4746 : context_ptr->interpolated_stride,
4747 : pu_height,
4748 : pu_width);
4749 : *p_best_mv =
4750 : ((uint16_t)half_mv_y[4] << 16) | ((uint16_t)half_mv_x[4]);
4751 : *p_best_ssd = (uint32_t)distortion_topleft_position;
4752 : }
4753 : } else {
4754 : if (distortion_topleft_position < *p_best_sad) {
4755 : *p_best_sad = (uint32_t)distortion_topleft_position;
4756 : *p_best_mv =
4757 : ((uint16_t)half_mv_y[4] << 16) | ((uint16_t)half_mv_x[4]);
4758 : }
4759 : }
4760 : // TR position
4761 : search_region_index++;
4762 : distortion_topright_position =
4763 : (context_ptr->fractional_search_method == SSD_SEARCH)
4764 : ? spatial_full_distortion_kernel(
4765 : context_ptr->sb_src_ptr,
4766 : src_block_index,
4767 : context_ptr->sb_src_stride,
4768 : pos_j_buffer,
4769 : search_region_index,
4770 : context_ptr->interpolated_stride,
4771 : pu_width,
4772 : pu_height)
4773 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4774 : ? (nxm_sad_kernel(
4775 : &(context_ptr->sb_src_ptr[src_block_index]),
4776 : context_ptr->sb_src_stride << 1,
4777 : &(pos_j_buffer[search_region_index]),
4778 : context_ptr->interpolated_stride << 1,
4779 : pu_height >> 1,
4780 : pu_width))
4781 : << 1
4782 : : nxm_sad_kernel(
4783 : &(context_ptr->sb_src_ptr[src_block_index]),
4784 : context_ptr->sb_src_stride,
4785 : &(pos_j_buffer[search_region_index]),
4786 : context_ptr->interpolated_stride,
4787 : pu_height,
4788 : pu_width);
4789 :
4790 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4791 : if (distortion_topright_position < *p_best_ssd) {
4792 : *p_best_sad = (uint32_t)
4793 : nxm_sad_kernel(
4794 : &(context_ptr->sb_src_ptr[src_block_index]),
4795 : context_ptr->sb_src_stride,
4796 : &(pos_j_buffer[search_region_index]),
4797 : context_ptr->interpolated_stride,
4798 : pu_height,
4799 : pu_width);
4800 : *p_best_mv =
4801 : ((uint16_t)half_mv_y[5] << 16) | ((uint16_t)half_mv_x[5]);
4802 : *p_best_ssd = (uint32_t)distortion_topright_position;
4803 : }
4804 : } else {
4805 : if (distortion_topright_position < *p_best_sad) {
4806 : *p_best_sad = (uint32_t)distortion_topright_position;
4807 : *p_best_mv =
4808 : ((uint16_t)half_mv_y[5] << 16) | ((uint16_t)half_mv_x[5]);
4809 : }
4810 : }
4811 : #if !HP_REF_OPT
4812 : // BR position
4813 : search_region_index += (int16_t)context_ptr->interpolated_stride;
4814 : distortion_bottomright_position =
4815 : (context_ptr->fractional_search_method == SSD_SEARCH)
4816 : ? spatial_full_distortion_kernel(
4817 : context_ptr->sb_src_ptr,
4818 : src_block_index,
4819 : context_ptr->sb_src_stride,
4820 : pos_j_buffer,
4821 : search_region_index,
4822 : context_ptr->interpolated_stride,
4823 : pu_width,
4824 : pu_height)
4825 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4826 : ? (nxm_sad_kernel(
4827 : &(context_ptr->sb_src_ptr[src_block_index]),
4828 : context_ptr->sb_src_stride << 1,
4829 : &(pos_j_buffer[search_region_index]),
4830 : context_ptr->interpolated_stride << 1,
4831 : pu_height >> 1,
4832 : pu_width))
4833 : << 1
4834 : : nxm_sad_kernel(
4835 : &(context_ptr->sb_src_ptr[src_block_index]),
4836 : context_ptr->sb_src_stride,
4837 : &(pos_j_buffer[search_region_index]),
4838 : context_ptr->interpolated_stride,
4839 : pu_height,
4840 : pu_width);
4841 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4842 : if (distortion_bottomright_position < *p_best_ssd) {
4843 : *p_best_sad = (uint32_t)
4844 : nxm_sad_kernel(
4845 : &(context_ptr->sb_src_ptr[src_block_index]),
4846 : context_ptr->sb_src_stride,
4847 : &(pos_j_buffer[search_region_index]),
4848 : context_ptr->interpolated_stride,
4849 : pu_height,
4850 : pu_width);
4851 : *p_best_mv =
4852 : ((uint16_t)half_mv_y[6] << 16) | ((uint16_t)half_mv_x[6]);
4853 : *p_best_ssd = (uint32_t)distortion_bottomright_position;
4854 : }
4855 : } else {
4856 : if (distortion_bottomright_position < *p_best_sad) {
4857 : *p_best_sad = (uint32_t)distortion_bottomright_position;
4858 : *p_best_mv =
4859 : ((uint16_t)half_mv_y[6] << 16) | ((uint16_t)half_mv_x[6]);
4860 : }
4861 : }
4862 : // BL position
4863 : search_region_index--;
4864 : distortion_bottomleft_position =
4865 : (context_ptr->fractional_search_method == SSD_SEARCH)
4866 : ? spatial_full_distortion_kernel(
4867 : &(context_ptr->sb_src_ptr[src_block_index]),
4868 : context_ptr->sb_src_stride,
4869 : &(pos_j_buffer[search_region_index]),
4870 : context_ptr->interpolated_stride,
4871 : pu_width,
4872 : pu_height)
4873 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
4874 : ? (nxm_sad_kernel(
4875 : &(context_ptr->sb_src_ptr[src_block_index]),
4876 : context_ptr->sb_src_stride << 1,
4877 : &(pos_j_buffer[search_region_index]),
4878 : context_ptr->interpolated_stride << 1,
4879 : pu_height >> 1,
4880 : pu_width))
4881 : << 1
4882 : : (nxm_sad_kernel(
4883 : &(context_ptr->sb_src_ptr[src_block_index]),
4884 : context_ptr->sb_src_stride,
4885 : &(pos_j_buffer[search_region_index]),
4886 : context_ptr->interpolated_stride,
4887 : pu_height,
4888 : pu_width));
4889 :
4890 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
4891 : if (distortion_bottomleft_position < *p_best_ssd) {
4892 : *p_best_sad = (uint32_t)(
4893 : nxm_sad_kernel(
4894 : &(context_ptr->sb_src_ptr[src_block_index]),
4895 : context_ptr->sb_src_stride,
4896 : &(pos_j_buffer[search_region_index]),
4897 : context_ptr->interpolated_stride,
4898 : pu_height,
4899 : pu_width));
4900 : *p_best_mv =
4901 : ((uint16_t)half_mv_y[7] << 16) | ((uint16_t)half_mv_x[7]);
4902 : *p_best_ssd = (uint32_t)distortion_bottomleft_position;
4903 : }
4904 : } else {
4905 : if (distortion_bottomleft_position < *p_best_sad) {
4906 : *p_best_sad = (uint32_t)distortion_bottomleft_position;
4907 : *p_best_mv =
4908 : ((uint16_t)half_mv_y[7] << 16) | ((uint16_t)half_mv_x[7]);
4909 : }
4910 : }
4911 : #endif
4912 : return;
4913 : }
4914 : #endif
4915 : /*******************************************
4916 : * HalfPelSearch_LCU
4917 : * performs Half Pel refinement for the 85 PUs
4918 : *******************************************/
4919 0 : void half_pel_refinement_sb(
4920 : PictureParentControlSet *picture_control_set_ptr,
4921 : MeContext *context_ptr, // input/output parameter, ME context Ptr, used to
4922 : // get/update ME results
4923 : uint8_t *refBuffer, uint32_t ref_stride,
4924 : uint8_t *pos_b_buffer, // input parameter, position "b" interpolated search
4925 : // area Ptr
4926 : uint8_t *pos_h_buffer, // input parameter, position "h" interpolated search
4927 : // area Ptr
4928 : uint8_t *pos_j_buffer, // input parameter, position "j" interpolated search
4929 : // area Ptr
4930 : int16_t x_search_area_origin, // input parameter, search area origin in the
4931 : // horizontal direction, used to point to
4932 : // reference samples
4933 : int16_t y_search_area_origin, // input parameter, search area origin in the
4934 : // vertical direction, used to point to
4935 : // reference samples
4936 : #if OPTIMISED_EX_SUBPEL
4937 : uint32_t search_area_height, // input parameter, search area height
4938 : uint32_t search_area_width, // input parameter, search area width
4939 : #endif
4940 : uint32_t inetger_mv)
4941 : {
4942 : uint32_t idx;
4943 : uint32_t pu_index;
4944 : uint32_t block_index_shift_x;
4945 : uint32_t block_index_shift_y;
4946 : uint32_t src_block_index;
4947 : uint32_t posb_buffer_index;
4948 : uint32_t posh_buffer_index;
4949 : uint32_t posj_buffer_index;
4950 0 : if (context_ptr->fractional_search64x64)
4951 0 : half_pel_refinement_block(context_ptr,
4952 : &(refBuffer[0]),
4953 : ref_stride,
4954 : context_ptr->p_best_ssd64x64,
4955 : 0,
4956 : &(pos_b_buffer[0]),
4957 : &(pos_h_buffer[0]),
4958 : &(pos_j_buffer[0]),
4959 : 64,
4960 : 64,
4961 : x_search_area_origin,
4962 : y_search_area_origin,
4963 : #if OPTIMISED_EX_SUBPEL
4964 : search_area_height,
4965 : search_area_width,
4966 : #endif
4967 : context_ptr->p_best_sad64x64,
4968 : context_ptr->p_best_mv64x64,
4969 : &context_ptr->psub_pel_direction64x64,
4970 : context_ptr->p_best_full_pel_mv64x64,
4971 : inetger_mv);
4972 : // 32x32 [4 partitions]
4973 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
4974 0 : block_index_shift_x = (pu_index & 0x01) << 5;
4975 0 : block_index_shift_y = (pu_index >> 1) << 5;
4976 0 : src_block_index = block_index_shift_x +
4977 0 : block_index_shift_y * context_ptr->sb_src_stride;
4978 0 : posb_buffer_index =
4979 : block_index_shift_x +
4980 0 : block_index_shift_y * context_ptr->interpolated_stride;
4981 0 : posh_buffer_index =
4982 : block_index_shift_x +
4983 0 : block_index_shift_y * context_ptr->interpolated_stride;
4984 0 : posj_buffer_index =
4985 : block_index_shift_x +
4986 0 : block_index_shift_y * context_ptr->interpolated_stride;
4987 0 : half_pel_refinement_block(
4988 : context_ptr,
4989 0 : &(refBuffer[block_index_shift_y * ref_stride +
4990 : block_index_shift_x]),
4991 : ref_stride,
4992 0 : &context_ptr->p_best_ssd32x32[pu_index],
4993 : src_block_index,
4994 : &(pos_b_buffer[posb_buffer_index]),
4995 : &(pos_h_buffer[posh_buffer_index]),
4996 : &(pos_j_buffer[posj_buffer_index]),
4997 : 32,
4998 : 32,
4999 : x_search_area_origin,
5000 : y_search_area_origin,
5001 : #if OPTIMISED_EX_SUBPEL
5002 : search_area_height,
5003 : search_area_width,
5004 : #endif
5005 0 : &context_ptr->p_best_sad32x32[pu_index],
5006 0 : &context_ptr->p_best_mv32x32[pu_index],
5007 : &context_ptr->psub_pel_direction32x32[pu_index],
5008 0 : &context_ptr->p_best_full_pel_mv32x32[pu_index],
5009 : inetger_mv);
5010 : }
5011 : // 16x16 [16 partitions]
5012 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
5013 0 : idx = tab16x16[pu_index];
5014 0 : block_index_shift_x = (pu_index & 0x03) << 4;
5015 0 : block_index_shift_y = (pu_index >> 2) << 4;
5016 0 : src_block_index = block_index_shift_x +
5017 0 : block_index_shift_y * context_ptr->sb_src_stride;
5018 0 : posb_buffer_index =
5019 : block_index_shift_x +
5020 0 : block_index_shift_y * context_ptr->interpolated_stride;
5021 0 : posh_buffer_index =
5022 : block_index_shift_x +
5023 0 : block_index_shift_y * context_ptr->interpolated_stride;
5024 0 : posj_buffer_index =
5025 : block_index_shift_x +
5026 0 : block_index_shift_y * context_ptr->interpolated_stride;
5027 0 : half_pel_refinement_block(context_ptr,
5028 0 : &(refBuffer[block_index_shift_y * ref_stride +
5029 : block_index_shift_x]),
5030 : ref_stride,
5031 0 : &context_ptr->p_best_ssd16x16[idx],
5032 : src_block_index,
5033 : &(pos_b_buffer[posb_buffer_index]),
5034 : &(pos_h_buffer[posh_buffer_index]),
5035 : &(pos_j_buffer[posj_buffer_index]),
5036 : 16,
5037 : 16,
5038 : x_search_area_origin,
5039 : y_search_area_origin,
5040 : #if OPTIMISED_EX_SUBPEL
5041 : search_area_height,
5042 : search_area_width,
5043 : #endif
5044 0 : &context_ptr->p_best_sad16x16[idx],
5045 0 : &context_ptr->p_best_mv16x16[idx],
5046 : &context_ptr->psub_pel_direction16x16[idx],
5047 0 : &context_ptr->p_best_full_pel_mv16x16[idx],
5048 : inetger_mv);
5049 : }
5050 : // 8x8 [64 partitions]
5051 0 : for (pu_index = 0; pu_index < 64; ++pu_index) {
5052 0 : idx = tab8x8[pu_index]; // TODO bitwise this
5053 0 : block_index_shift_x = (pu_index & 0x07) << 3;
5054 0 : block_index_shift_y = (pu_index >> 3) << 3;
5055 0 : src_block_index = block_index_shift_x +
5056 0 : block_index_shift_y * context_ptr->sb_src_stride;
5057 0 : posb_buffer_index =
5058 : block_index_shift_x +
5059 0 : block_index_shift_y * context_ptr->interpolated_stride;
5060 0 : posh_buffer_index =
5061 : block_index_shift_x +
5062 0 : block_index_shift_y * context_ptr->interpolated_stride;
5063 0 : posj_buffer_index =
5064 : block_index_shift_x +
5065 0 : block_index_shift_y * context_ptr->interpolated_stride;
5066 0 : half_pel_refinement_block(context_ptr,
5067 0 : &(refBuffer[block_index_shift_y * ref_stride +
5068 : block_index_shift_x]),
5069 : ref_stride,
5070 0 : &context_ptr->p_best_ssd8x8[idx],
5071 : src_block_index,
5072 : &(pos_b_buffer[posb_buffer_index]),
5073 : &(pos_h_buffer[posh_buffer_index]),
5074 : &(pos_j_buffer[posj_buffer_index]),
5075 : 8,
5076 : 8,
5077 : x_search_area_origin,
5078 : y_search_area_origin,
5079 : #if OPTIMISED_EX_SUBPEL
5080 : search_area_height,
5081 : search_area_width,
5082 : #endif
5083 0 : &context_ptr->p_best_sad8x8[idx],
5084 0 : &context_ptr->p_best_mv8x8[idx],
5085 : &context_ptr->psub_pel_direction8x8[idx],
5086 0 : &context_ptr->p_best_full_pel_mv8x8[idx],
5087 : inetger_mv);
5088 : }
5089 0 : if (picture_control_set_ptr->pic_depth_mode <= PIC_ALL_C_DEPTH_MODE) {
5090 : // 64x32
5091 0 : for (pu_index = 0; pu_index < 2; ++pu_index) {
5092 0 : block_index_shift_x = 0;
5093 0 : block_index_shift_y = pu_index << 5;
5094 0 : src_block_index = block_index_shift_x +
5095 0 : block_index_shift_y * context_ptr->sb_src_stride;
5096 0 : posb_buffer_index =
5097 : block_index_shift_x +
5098 0 : block_index_shift_y * context_ptr->interpolated_stride;
5099 0 : posh_buffer_index =
5100 : block_index_shift_x +
5101 0 : block_index_shift_y * context_ptr->interpolated_stride;
5102 0 : posj_buffer_index =
5103 : block_index_shift_x +
5104 0 : block_index_shift_y * context_ptr->interpolated_stride;
5105 0 : half_pel_refinement_block(
5106 : context_ptr,
5107 0 : &(refBuffer[block_index_shift_y * ref_stride +
5108 : block_index_shift_x]),
5109 : ref_stride,
5110 0 : &context_ptr->p_best_ssd64x32[pu_index],
5111 : src_block_index,
5112 : &(pos_b_buffer[posb_buffer_index]),
5113 : &(pos_h_buffer[posh_buffer_index]),
5114 : &(pos_j_buffer[posj_buffer_index]),
5115 : 64,
5116 : 32,
5117 : x_search_area_origin,
5118 : y_search_area_origin,
5119 : #if OPTIMISED_EX_SUBPEL
5120 : search_area_height,
5121 : search_area_width,
5122 : #endif
5123 0 : &context_ptr->p_best_sad64x32[pu_index],
5124 0 : &context_ptr->p_best_mv64x32[pu_index],
5125 : &context_ptr->psub_pel_direction64x32[pu_index],
5126 0 : &context_ptr->p_best_full_pel_mv64x32[pu_index],
5127 : inetger_mv);
5128 : }
5129 : // 32x16
5130 0 : for (pu_index = 0; pu_index < 8; ++pu_index) {
5131 0 : idx = tab32x16[pu_index]; // TODO bitwise this
5132 0 : block_index_shift_x = (pu_index & 0x01) << 5;
5133 0 : block_index_shift_y = (pu_index >> 1) << 4;
5134 0 : src_block_index = block_index_shift_x +
5135 0 : block_index_shift_y * context_ptr->sb_src_stride;
5136 0 : posb_buffer_index =
5137 : block_index_shift_x +
5138 0 : block_index_shift_y * context_ptr->interpolated_stride;
5139 0 : posh_buffer_index =
5140 : block_index_shift_x +
5141 0 : block_index_shift_y * context_ptr->interpolated_stride;
5142 0 : posj_buffer_index =
5143 : block_index_shift_x +
5144 0 : block_index_shift_y * context_ptr->interpolated_stride;
5145 0 : half_pel_refinement_block(
5146 : context_ptr,
5147 0 : &(refBuffer[block_index_shift_y * ref_stride +
5148 : block_index_shift_x]),
5149 : ref_stride,
5150 0 : &context_ptr->p_best_ssd32x16[idx],
5151 : src_block_index,
5152 : &(pos_b_buffer[posb_buffer_index]),
5153 : &(pos_h_buffer[posh_buffer_index]),
5154 : &(pos_j_buffer[posj_buffer_index]),
5155 : 32,
5156 : 16,
5157 : x_search_area_origin,
5158 : y_search_area_origin,
5159 : #if OPTIMISED_EX_SUBPEL
5160 : search_area_height,
5161 : search_area_width,
5162 : #endif
5163 0 : &context_ptr->p_best_sad32x16[idx],
5164 0 : &context_ptr->p_best_mv32x16[idx],
5165 : &context_ptr->psub_pel_direction32x16[idx],
5166 0 : &context_ptr->p_best_full_pel_mv32x16[idx],
5167 : inetger_mv);
5168 : }
5169 : // 16x8
5170 0 : for (pu_index = 0; pu_index < 32; ++pu_index) {
5171 0 : idx = tab16x8[pu_index];
5172 0 : block_index_shift_x = (pu_index & 0x03) << 4;
5173 0 : block_index_shift_y = (pu_index >> 2) << 3;
5174 0 : src_block_index = block_index_shift_x +
5175 0 : block_index_shift_y * context_ptr->sb_src_stride;
5176 0 : posb_buffer_index =
5177 : block_index_shift_x +
5178 0 : block_index_shift_y * context_ptr->interpolated_stride;
5179 0 : posh_buffer_index =
5180 : block_index_shift_x +
5181 0 : block_index_shift_y * context_ptr->interpolated_stride;
5182 0 : posj_buffer_index =
5183 : block_index_shift_x +
5184 0 : block_index_shift_y * context_ptr->interpolated_stride;
5185 0 : half_pel_refinement_block(
5186 : context_ptr,
5187 0 : &(refBuffer[block_index_shift_y * ref_stride +
5188 : block_index_shift_x]),
5189 : ref_stride,
5190 0 : &context_ptr->p_best_ssd16x8[idx],
5191 : src_block_index,
5192 : &(pos_b_buffer[posb_buffer_index]),
5193 : &(pos_h_buffer[posh_buffer_index]),
5194 : &(pos_j_buffer[posj_buffer_index]),
5195 : 16,
5196 : 8,
5197 : x_search_area_origin,
5198 : y_search_area_origin,
5199 : #if OPTIMISED_EX_SUBPEL
5200 : search_area_height,
5201 : search_area_width,
5202 : #endif
5203 0 : &context_ptr->p_best_sad16x8[idx],
5204 0 : &context_ptr->p_best_mv16x8[idx],
5205 : &context_ptr->psub_pel_direction16x8[idx],
5206 0 : &context_ptr->p_best_full_pel_mv16x8[idx],
5207 : inetger_mv);
5208 : }
5209 : // 32x64
5210 0 : for (pu_index = 0; pu_index < 2; ++pu_index) {
5211 0 : block_index_shift_x = pu_index << 5;
5212 0 : block_index_shift_y = 0;
5213 0 : src_block_index = block_index_shift_x +
5214 0 : block_index_shift_y * context_ptr->sb_src_stride;
5215 0 : posb_buffer_index =
5216 : block_index_shift_x +
5217 0 : block_index_shift_y * context_ptr->interpolated_stride;
5218 0 : posh_buffer_index =
5219 : block_index_shift_x +
5220 0 : block_index_shift_y * context_ptr->interpolated_stride;
5221 0 : posj_buffer_index =
5222 : block_index_shift_x +
5223 0 : block_index_shift_y * context_ptr->interpolated_stride;
5224 0 : half_pel_refinement_block(
5225 : context_ptr,
5226 0 : &(refBuffer[block_index_shift_y * ref_stride +
5227 : block_index_shift_x]),
5228 : ref_stride,
5229 0 : &context_ptr->p_best_ssd32x64[pu_index],
5230 : src_block_index,
5231 : &(pos_b_buffer[posb_buffer_index]),
5232 : &(pos_h_buffer[posh_buffer_index]),
5233 : &(pos_j_buffer[posj_buffer_index]),
5234 : 32,
5235 : 64,
5236 : x_search_area_origin,
5237 : y_search_area_origin,
5238 : #if OPTIMISED_EX_SUBPEL
5239 : search_area_height,
5240 : search_area_width,
5241 : #endif
5242 0 : &context_ptr->p_best_sad32x64[pu_index],
5243 0 : &context_ptr->p_best_mv32x64[pu_index],
5244 : &context_ptr->psub_pel_direction32x64[pu_index],
5245 0 : &context_ptr->p_best_full_pel_mv32x64[pu_index],
5246 : inetger_mv);
5247 : }
5248 : // 16x32
5249 0 : for (pu_index = 0; pu_index < 8; ++pu_index) {
5250 0 : idx = tab16x32[pu_index];
5251 0 : block_index_shift_x = (pu_index & 0x03) << 4;
5252 0 : block_index_shift_y = (pu_index >> 2) << 5;
5253 0 : src_block_index = block_index_shift_x +
5254 0 : block_index_shift_y * context_ptr->sb_src_stride;
5255 0 : posb_buffer_index =
5256 : block_index_shift_x +
5257 0 : block_index_shift_y * context_ptr->interpolated_stride;
5258 0 : posh_buffer_index =
5259 : block_index_shift_x +
5260 0 : block_index_shift_y * context_ptr->interpolated_stride;
5261 0 : posj_buffer_index =
5262 : block_index_shift_x +
5263 0 : block_index_shift_y * context_ptr->interpolated_stride;
5264 0 : half_pel_refinement_block(
5265 : context_ptr,
5266 0 : &(refBuffer[block_index_shift_y * ref_stride +
5267 : block_index_shift_x]),
5268 : ref_stride,
5269 0 : &context_ptr->p_best_ssd16x32[idx],
5270 : src_block_index,
5271 : &(pos_b_buffer[posb_buffer_index]),
5272 : &(pos_h_buffer[posh_buffer_index]),
5273 : &(pos_j_buffer[posj_buffer_index]),
5274 : 16,
5275 : 32,
5276 : x_search_area_origin,
5277 : y_search_area_origin,
5278 : #if OPTIMISED_EX_SUBPEL
5279 : search_area_height,
5280 : search_area_width,
5281 : #endif
5282 0 : &context_ptr->p_best_sad16x32[idx],
5283 0 : &context_ptr->p_best_mv16x32[idx],
5284 : &context_ptr->psub_pel_direction16x32[idx],
5285 0 : &context_ptr->p_best_full_pel_mv16x32[idx],
5286 : inetger_mv);
5287 : }
5288 : // 8x16
5289 0 : for (pu_index = 0; pu_index < 32; ++pu_index) {
5290 0 : idx = tab8x16[pu_index];
5291 0 : block_index_shift_x = (pu_index & 0x07) << 3;
5292 0 : block_index_shift_y = (pu_index >> 3) << 4;
5293 0 : src_block_index = block_index_shift_x +
5294 0 : block_index_shift_y * context_ptr->sb_src_stride;
5295 0 : posb_buffer_index =
5296 : block_index_shift_x +
5297 0 : block_index_shift_y * context_ptr->interpolated_stride;
5298 0 : posh_buffer_index =
5299 : block_index_shift_x +
5300 0 : block_index_shift_y * context_ptr->interpolated_stride;
5301 0 : posj_buffer_index =
5302 : block_index_shift_x +
5303 0 : block_index_shift_y * context_ptr->interpolated_stride;
5304 0 : half_pel_refinement_block(
5305 : context_ptr,
5306 0 : &(refBuffer[block_index_shift_y * ref_stride +
5307 : block_index_shift_x]),
5308 : ref_stride,
5309 0 : &context_ptr->p_best_ssd8x16[idx],
5310 : src_block_index,
5311 : &(pos_b_buffer[posb_buffer_index]),
5312 : &(pos_h_buffer[posh_buffer_index]),
5313 : &(pos_j_buffer[posj_buffer_index]),
5314 : 8,
5315 : 16,
5316 : x_search_area_origin,
5317 : y_search_area_origin,
5318 : #if OPTIMISED_EX_SUBPEL
5319 : search_area_height,
5320 : search_area_width,
5321 : #endif
5322 0 : &context_ptr->p_best_sad8x16[idx],
5323 0 : &context_ptr->p_best_mv8x16[idx],
5324 : &context_ptr->psub_pel_direction8x16[idx],
5325 0 : &context_ptr->p_best_full_pel_mv8x16[idx],
5326 : inetger_mv);
5327 : }
5328 : // 32x8
5329 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
5330 0 : idx = tab32x8[pu_index];
5331 0 : block_index_shift_x = (pu_index & 0x01) << 5;
5332 0 : block_index_shift_y = (pu_index >> 1) << 3;
5333 0 : src_block_index = block_index_shift_x +
5334 0 : block_index_shift_y * context_ptr->sb_src_stride;
5335 0 : posb_buffer_index =
5336 : block_index_shift_x +
5337 0 : block_index_shift_y * context_ptr->interpolated_stride;
5338 0 : posh_buffer_index =
5339 : block_index_shift_x +
5340 0 : block_index_shift_y * context_ptr->interpolated_stride;
5341 0 : posj_buffer_index =
5342 : block_index_shift_x +
5343 0 : block_index_shift_y * context_ptr->interpolated_stride;
5344 0 : half_pel_refinement_block(
5345 : context_ptr,
5346 0 : &(refBuffer[block_index_shift_y * ref_stride +
5347 : block_index_shift_x]),
5348 : ref_stride,
5349 0 : &context_ptr->p_best_ssd32x8[idx],
5350 : src_block_index,
5351 : &(pos_b_buffer[posb_buffer_index]),
5352 : &(pos_h_buffer[posh_buffer_index]),
5353 : &(pos_j_buffer[posj_buffer_index]),
5354 : 32,
5355 : 8,
5356 : x_search_area_origin,
5357 : y_search_area_origin,
5358 : #if OPTIMISED_EX_SUBPEL
5359 : search_area_height,
5360 : search_area_width,
5361 : #endif
5362 0 : &context_ptr->p_best_sad32x8[idx],
5363 0 : &context_ptr->p_best_mv32x8[idx],
5364 : &context_ptr->psub_pel_direction32x8[idx],
5365 0 : &context_ptr->p_best_full_pel_mv32x8[idx],
5366 : inetger_mv);
5367 : }
5368 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
5369 0 : idx = tab8x32[pu_index];
5370 0 : block_index_shift_x = (pu_index & 0x07) << 3;
5371 0 : block_index_shift_y = (pu_index >> 3) << 5;
5372 0 : src_block_index = block_index_shift_x +
5373 0 : block_index_shift_y * context_ptr->sb_src_stride;
5374 0 : posb_buffer_index =
5375 : block_index_shift_x +
5376 0 : block_index_shift_y * context_ptr->interpolated_stride;
5377 0 : posh_buffer_index =
5378 : block_index_shift_x +
5379 0 : block_index_shift_y * context_ptr->interpolated_stride;
5380 0 : posj_buffer_index =
5381 : block_index_shift_x +
5382 0 : block_index_shift_y * context_ptr->interpolated_stride;
5383 0 : half_pel_refinement_block(
5384 : context_ptr,
5385 0 : &(refBuffer[block_index_shift_y * ref_stride +
5386 : block_index_shift_x]),
5387 : ref_stride,
5388 0 : &context_ptr->p_best_ssd8x32[idx],
5389 : src_block_index,
5390 : &(pos_b_buffer[posb_buffer_index]),
5391 : &(pos_h_buffer[posh_buffer_index]),
5392 : &(pos_j_buffer[posj_buffer_index]),
5393 : 8,
5394 : 32,
5395 : x_search_area_origin,
5396 : y_search_area_origin,
5397 : #if OPTIMISED_EX_SUBPEL
5398 : search_area_height,
5399 : search_area_width,
5400 : #endif
5401 0 : &context_ptr->p_best_sad8x32[idx],
5402 0 : &context_ptr->p_best_mv8x32[idx],
5403 : &context_ptr->psub_pel_direction8x32[idx],
5404 0 : &context_ptr->p_best_full_pel_mv8x32[idx],
5405 : inetger_mv);
5406 : }
5407 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
5408 0 : idx = pu_index;
5409 0 : block_index_shift_x = 0;
5410 0 : block_index_shift_y = pu_index << 4;
5411 0 : src_block_index = block_index_shift_x +
5412 0 : block_index_shift_y * context_ptr->sb_src_stride;
5413 0 : posb_buffer_index =
5414 : block_index_shift_x +
5415 0 : block_index_shift_y * context_ptr->interpolated_stride;
5416 0 : posh_buffer_index =
5417 : block_index_shift_x +
5418 0 : block_index_shift_y * context_ptr->interpolated_stride;
5419 0 : posj_buffer_index =
5420 : block_index_shift_x +
5421 0 : block_index_shift_y * context_ptr->interpolated_stride;
5422 0 : half_pel_refinement_block(
5423 : context_ptr,
5424 0 : &(refBuffer[block_index_shift_y * ref_stride +
5425 : block_index_shift_x]),
5426 : ref_stride,
5427 0 : &context_ptr->p_best_ssd64x16[idx],
5428 : src_block_index,
5429 : &(pos_b_buffer[posb_buffer_index]),
5430 : &(pos_h_buffer[posh_buffer_index]),
5431 : &(pos_j_buffer[posj_buffer_index]),
5432 : 64,
5433 : 16,
5434 : x_search_area_origin,
5435 : y_search_area_origin,
5436 : #if OPTIMISED_EX_SUBPEL
5437 : search_area_height,
5438 : search_area_width,
5439 : #endif
5440 0 : &context_ptr->p_best_sad64x16[idx],
5441 0 : &context_ptr->p_best_mv64x16[idx],
5442 : &context_ptr->psub_pel_direction64x16[idx],
5443 0 : &context_ptr->p_best_full_pel_mv64x16[idx],
5444 : inetger_mv);
5445 : }
5446 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
5447 0 : idx = pu_index;
5448 0 : block_index_shift_x = pu_index << 4;
5449 0 : block_index_shift_y = 0;
5450 0 : src_block_index = block_index_shift_x +
5451 0 : block_index_shift_y * context_ptr->sb_src_stride;
5452 0 : posb_buffer_index =
5453 : block_index_shift_x +
5454 0 : block_index_shift_y * context_ptr->interpolated_stride;
5455 0 : posh_buffer_index =
5456 : block_index_shift_x +
5457 0 : block_index_shift_y * context_ptr->interpolated_stride;
5458 0 : posj_buffer_index =
5459 : block_index_shift_x +
5460 0 : block_index_shift_y * context_ptr->interpolated_stride;
5461 0 : half_pel_refinement_block(
5462 : context_ptr,
5463 0 : &(refBuffer[block_index_shift_y * ref_stride +
5464 : block_index_shift_x]),
5465 : ref_stride,
5466 0 : &context_ptr->p_best_ssd16x64[idx],
5467 : src_block_index,
5468 : &(pos_b_buffer[posb_buffer_index]),
5469 : &(pos_h_buffer[posh_buffer_index]),
5470 : &(pos_j_buffer[posj_buffer_index]),
5471 : 16,
5472 : 64,
5473 : x_search_area_origin,
5474 : y_search_area_origin,
5475 : #if OPTIMISED_EX_SUBPEL
5476 : search_area_height,
5477 : search_area_width,
5478 : #endif
5479 0 : &context_ptr->p_best_sad16x64[idx],
5480 0 : &context_ptr->p_best_mv16x64[idx],
5481 : &context_ptr->psub_pel_direction16x64[idx],
5482 0 : &context_ptr->p_best_full_pel_mv16x64[idx],
5483 : inetger_mv);
5484 : }
5485 : }
5486 0 : return;
5487 : }
5488 : /*******************************************
5489 : * open_loop_me_half_pel_search_sblock
5490 : *******************************************/
5491 : #if OPTIMISED_EX_SUBPEL
5492 0 : static void open_loop_me_half_pel_search_sblock(
5493 : PictureParentControlSet *picture_control_set_ptr, MeContext *context_ptr,
5494 : uint32_t list_index, uint32_t ref_pic_index, int16_t x_search_area_origin,
5495 : int16_t y_search_area_origin, uint32_t search_area_width,
5496 : uint32_t search_area_height)
5497 : {
5498 :
5499 0 : half_pel_refinement_sb(
5500 : picture_control_set_ptr,
5501 : context_ptr,
5502 : #if M0_HIGH_PRECISION_INTERPOLATION
5503 : context_ptr->integer_buffer_ptr[list_index][ref_pic_index] +
5504 : (ME_FILTER_PAD_DISTANCE >> 1) +
5505 : ((ME_FILTER_PAD_DISTANCE >> 1) *
5506 : context_ptr
5507 : ->interpolated_full_stride[listIndex][ref_pic_index]),
5508 : context_ptr
5509 : ->interpolated_full_stride[list_index][ref_pic_index],
5510 : &(context_ptr->pos_b_buffer[list_index][ref_pic_index]
5511 : [(ME_FILTER_PAD_DISTANCE >> 1) *
5512 : context_ptr->interpolated_stride]),
5513 : #else
5514 0 : context_ptr->integer_buffer_ptr[list_index][ref_pic_index] +
5515 0 : (ME_FILTER_TAP >> 1) +
5516 0 : ((ME_FILTER_TAP >> 1) *
5517 : context_ptr
5518 0 : ->interpolated_full_stride[list_index][ref_pic_index]),
5519 : context_ptr
5520 : ->interpolated_full_stride[list_index][ref_pic_index],
5521 0 : &(context_ptr->pos_b_buffer[list_index][ref_pic_index]
5522 0 : [(ME_FILTER_TAP >> 1) *
5523 0 : context_ptr->interpolated_stride]),
5524 : #endif
5525 0 : &(context_ptr->pos_h_buffer[list_index][ref_pic_index][1]),
5526 : &(context_ptr->pos_j_buffer[list_index][ref_pic_index][0]),
5527 : x_search_area_origin,
5528 : y_search_area_origin,
5529 : search_area_height,
5530 : search_area_width,
5531 : 0);
5532 0 : }
5533 : #else
5534 : static void open_loop_me_half_pel_search_sblock(
5535 : PictureParentControlSet *picture_control_set_ptr, MeContext *context_ptr,
5536 : uint32_t list_index, uint32_t ref_pic_index, int16_t x_search_area_origin,
5537 : int16_t y_search_area_origin, uint32_t search_area_width,
5538 : uint32_t search_area_height)
5539 : {
5540 : uint32_t search_index_x, search_index_y;
5541 : for (search_index_y = 0; search_index_y < search_area_height;
5542 : search_index_y++) {
5543 : for (search_index_x = 0; search_index_x < search_area_width;
5544 : search_index_x++) {
5545 : int32_t mvx = (int32_t)search_index_y + x_search_area_origin;
5546 : int32_t mvy = (int32_t)search_index_x + y_search_area_origin;
5547 : uint32_t inetger_mv1 = (((uint16_t)mvy) << 18);
5548 : uint16_t inetger_mv2 = (((uint16_t)mvx << 2));
5549 : uint32_t inetger_mv = inetger_mv1 | inetger_mv2;
5550 : half_pel_refinement_sb(
5551 : picture_control_set_ptr,
5552 : context_ptr,
5553 : #if M0_HIGH_PRECISION_INTERPOLATION
5554 : context_ptr->integer_buffer_ptr[list_index][ref_pic_index] +
5555 : (ME_FILTER_PAD_DISTANCE >> 1) +
5556 : ((ME_FILTER_PAD_DISTANCE >> 1) *
5557 : context_ptr
5558 : ->interpolated_full_stride[listIndex][ref_pic_index]),
5559 : context_ptr
5560 : ->interpolated_full_stride[list_index][ref_pic_index],
5561 : &(context_ptr->pos_b_buffer[list_index][ref_pic_index]
5562 : [(ME_FILTER_PAD_DISTANCE >> 1) *
5563 : context_ptr->interpolated_stride]),
5564 : #else
5565 : context_ptr->integer_buffer_ptr[list_index][ref_pic_index] +
5566 : (ME_FILTER_TAP >> 1) +
5567 : ((ME_FILTER_TAP >> 1) *
5568 : context_ptr
5569 : ->interpolated_full_stride[list_index][ref_pic_index]),
5570 : context_ptr
5571 : ->interpolated_full_stride[list_index][ref_pic_index],
5572 : &(context_ptr->pos_b_buffer[list_index][ref_pic_index]
5573 : [(ME_FILTER_TAP >> 1) *
5574 : context_ptr->interpolated_stride]),
5575 : #endif
5576 : &(context_ptr->pos_h_buffer[list_index][ref_pic_index][1]),
5577 : &(context_ptr->pos_j_buffer[list_index][ref_pic_index][0]),
5578 : x_search_area_origin,
5579 : y_search_area_origin,
5580 : inetger_mv);
5581 : }
5582 : }
5583 : }
5584 : #endif
5585 : static void quarter_pel_refinement_sb(
5586 : MeContext
5587 : *context_ptr, //[IN/OUT] ME context Ptr, used to get/update ME results
5588 : uint8_t *pos_full, //[IN]
5589 : uint32_t full_stride, //[IN]
5590 : uint8_t *pos_b, //[IN]
5591 : uint8_t *pos_h, //[IN]
5592 : uint8_t *pos_j, //[IN]
5593 : int16_t
5594 : x_search_area_origin, //[IN] search area origin in the horizontal
5595 : // direction, used to point to reference samples
5596 : int16_t
5597 : y_search_area_origin, //[IN] search area origin in the vertical
5598 : // direction, used to point to reference samples
5599 : uint32_t integer_mv);
5600 :
5601 : /*******************************************
5602 : * open_loop_me_quarter_pel_search_sblock
5603 : *******************************************/
5604 0 : static void open_loop_me_quarter_pel_search_sblock(
5605 : MeContext *context_ptr,
5606 : uint32_t list_index, uint32_t ref_pic_index, int16_t x_search_area_origin,
5607 : int16_t y_search_area_origin, uint32_t search_area_width,
5608 : uint32_t search_area_height)
5609 : {
5610 : uint32_t search_index_x, search_index_y;
5611 0 : for (search_index_y = 0; search_index_y < search_area_height;
5612 0 : search_index_y++) {
5613 0 : for (search_index_x = 0; search_index_x < search_area_width;
5614 0 : search_index_x++) {
5615 0 : int32_t mvx = (int32_t)search_index_x + x_search_area_origin;
5616 0 : int32_t mvy = (int32_t)search_index_y + y_search_area_origin;
5617 0 : uint32_t mv1 = (((uint16_t)mvy) << 18);
5618 0 : uint16_t mv2 = (((uint16_t)mvx << 2));
5619 0 : uint32_t mv0 = mv1 | mv2;
5620 0 : int16_t x_mv = _MVXT(mv0);
5621 0 : int16_t y_mv = _MVYT(mv0);
5622 0 : uint32_t inetger_mv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
5623 0 : quarter_pel_refinement_sb(
5624 : context_ptr,
5625 : #if M0_HIGH_PRECISION_INTERPOLATION
5626 : context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] +
5627 : (ME_FILTER_PAD_DISTANCE >> 1) +
5628 : ((ME_FILTER_PAD_DISTANCE >> 1) *
5629 : context_ptr
5630 : ->interpolated_full_stride[listIndex][ref_pic_index]),
5631 : context_ptr->interpolated_full_stride[listIndex][ref_pic_index],
5632 : &(context_ptr->pos_b_buffer
5633 : [listIndex][ref_pic_index]
5634 : [(ME_FILTER_PAD_DISTANCE >> 1) *
5635 : context_ptr->interpolated_stride]), // points to b
5636 : // position of the
5637 : // figure above
5638 : #else
5639 0 : context_ptr->integer_buffer_ptr[list_index][ref_pic_index] +
5640 0 : (ME_FILTER_TAP >> 1) +
5641 0 : ((ME_FILTER_TAP >> 1) *
5642 : context_ptr
5643 0 : ->interpolated_full_stride[list_index][ref_pic_index]),
5644 : context_ptr
5645 : ->interpolated_full_stride[list_index][ref_pic_index],
5646 : &(context_ptr->pos_b_buffer
5647 0 : [list_index][ref_pic_index]
5648 0 : [(ME_FILTER_TAP >> 1) *
5649 0 : context_ptr->interpolated_stride]), // points to b
5650 : // position of the
5651 : // figure above
5652 : #endif
5653 0 : &(context_ptr->pos_h_buffer[list_index][ref_pic_index]
5654 : [1]), // points to h position of the
5655 : // figure above
5656 : &(context_ptr->pos_j_buffer[list_index][ref_pic_index]
5657 : [0]), // points to j position of the
5658 : // figure above
5659 : x_search_area_origin,
5660 : y_search_area_origin,
5661 : inetger_mv);
5662 : }
5663 : }
5664 0 : }
5665 : /*******************************************
5666 : * open_loop_me_fullpel_search_sblock
5667 : *******************************************/
5668 0 : static void open_loop_me_fullpel_search_sblock(
5669 : MeContext *context_ptr, uint32_t listIndex,
5670 : uint32_t ref_pic_index,
5671 : int16_t x_search_area_origin, int16_t y_search_area_origin,
5672 : uint32_t search_area_width, uint32_t search_area_height)
5673 : {
5674 : uint32_t xSearchIndex, ySearchIndex;
5675 0 : uint32_t searchAreaWidthRest8 = search_area_width & 7;
5676 0 : uint32_t searchAreaWidthMult8 = search_area_width - searchAreaWidthRest8;
5677 :
5678 0 : for (ySearchIndex = 0; ySearchIndex < search_area_height; ySearchIndex++) {
5679 0 : for (xSearchIndex = 0; xSearchIndex < searchAreaWidthMult8;
5680 0 : xSearchIndex += 8) {
5681 : // this function will do: xSearchIndex, +1, +2, ..., +7
5682 0 : open_loop_me_get_eight_search_point_results_block(
5683 : context_ptr,
5684 : listIndex,
5685 : ref_pic_index,
5686 : xSearchIndex +
5687 0 : ySearchIndex *
5688 : context_ptr->interpolated_full_stride[listIndex]
5689 0 : [ref_pic_index],
5690 0 : (int32_t)xSearchIndex + x_search_area_origin,
5691 0 : (int32_t)ySearchIndex + y_search_area_origin);
5692 : }
5693 :
5694 0 : for (xSearchIndex = searchAreaWidthMult8;
5695 : xSearchIndex < search_area_width;
5696 0 : xSearchIndex++) {
5697 :
5698 0 : open_loop_me_get_search_point_results_block(
5699 : context_ptr,
5700 : listIndex,
5701 : ref_pic_index,
5702 : xSearchIndex +
5703 0 : ySearchIndex *
5704 : context_ptr->interpolated_full_stride[listIndex]
5705 0 : [ref_pic_index],
5706 0 : (int32_t)xSearchIndex + x_search_area_origin,
5707 0 : (int32_t)ySearchIndex + y_search_area_origin);
5708 : }
5709 : }
5710 0 : }
5711 :
5712 : #ifndef AVCCODEL
5713 : /*******************************************
5714 : * HorizontalPelInterpolation
5715 : * interpolates the search region in the horizontal direction
5716 : *******************************************/
5717 : static void HorizontalPelInterpolation(
5718 : uint8_t *src, // input parameter, input samples Ptr
5719 : uint32_t src_stride, // input parameter, input stride
5720 : uint32_t width, // input parameter, input area width
5721 : uint32_t height, // input parameter, input area height
5722 : const int32_t
5723 : *ifCoeff, // input parameter, interpolation filter coefficients Ptr
5724 : uint32_t inputBitDepth, // input parameter, input sample bit depth
5725 : uint32_t dst_stride, // input parameter, output stride
5726 : uint8_t *dst) // output parameter, interpolated samples Ptr
5727 : {
5728 : uint32_t x, y;
5729 : const int32_t maxSampleValue = (1 << inputBitDepth) - 1;
5730 : const int32_t ifOffset = 1 << (IFShift - 1);
5731 : for (y = 0; y < height; ++y) {
5732 : for (x = 0; x < width; ++x) {
5733 : dst[x] = (uint8_t)CLIP3(
5734 : 0,
5735 : (int32_t)maxSampleValue,
5736 : ((((int32_t)src[x] + (int32_t)src[x + 3]) * ifCoeff[0] +
5737 : ((int32_t)src[x + 1] + (int32_t)src[x + 2]) * ifCoeff[1] +
5738 : ifOffset) >>
5739 : IFShift));
5740 : }
5741 : src += src_stride;
5742 : dst += dst_stride;
5743 : }
5744 :
5745 : return;
5746 : }
5747 :
5748 : /*******************************************
5749 : * VerticalPelInterpolation
5750 : * interpolates the serach region in the vertical direction
5751 : *******************************************/
5752 : static void VerticalPelInterpolation(
5753 : uint8_t *src, // input parameter, input samples ptr
5754 : uint32_t src_stride, // input parameter, input stride
5755 : uint32_t width, // input parameter, input area width
5756 : uint32_t height, // input parameter, input area height
5757 : const int32_t
5758 : ifCoeff[4], // input parameter, interpolation filter coefficients Ptr
5759 : uint32_t inputBitDepth, // input parameter, input sample bit depth
5760 : uint32_t dst_stride, // input parameter, output stride
5761 : uint8_t *dst) // output parameter, interpolated samples Ptr
5762 : {
5763 : uint32_t x, y;
5764 :
5765 : const int32_t maxSampleValue = (1 << inputBitDepth) - 1;
5766 : const int32_t ifOffset = 1 << (IFShift - 1);
5767 :
5768 : const uint32_t srcStride2 = src_stride << 1;
5769 : const uint32_t srcStride3 = srcStride2 + src_stride;
5770 :
5771 : for (y = 0; y < height; y++) {
5772 : for (x = 0; x < width; x++) {
5773 : dst[x] = (uint8_t)CLIP3(
5774 : 0,
5775 : maxSampleValue,
5776 : ((((int32_t)src[x] + (int32_t)src[x + srcStride3]) *
5777 : ifCoeff[0] +
5778 : ((int32_t)src[x + src_stride] +
5779 : (int32_t)src[x + srcStride2]) *
5780 : ifCoeff[1] +
5781 : ifOffset) >>
5782 : IFShift));
5783 : }
5784 : src += src_stride;
5785 : dst += dst_stride;
5786 : }
5787 :
5788 : return;
5789 : }
5790 :
5791 : /*******************************************
5792 : * AvcStyleInterpolation
5793 : * interpolates the search region in the horizontal direction
5794 : *******************************************/
5795 : static void AvcStyleInterpolation(
5796 : uint8_t *srcOne, // input parameter, input samples Ptr
5797 : uint32_t srcOneStride, // input parameter, input stride
5798 : uint8_t *srcTwo, // input parameter, input samples Ptr
5799 : uint32_t srcTwoStride, // input parameter, input stride
5800 : uint32_t width, // input parameter, input area width
5801 : uint32_t height, // input parameter, input area height
5802 : uint32_t inputBitDepth, // input parameter, input sample bit depth
5803 : uint32_t dst_stride, // input parameter, output stride
5804 : uint8_t *dst) // output parameter, interpolated samples Ptr
5805 : {
5806 : uint32_t x, y;
5807 : int32_t maxSampleValue = POW2(inputBitDepth) - 1;
5808 :
5809 : for (y = 0; y < height; ++y) {
5810 : for (x = 0; x < width; ++x) {
5811 : dst[x] =
5812 : (uint8_t)CLIP3(0,
5813 : (int32_t)maxSampleValue,
5814 : (((int32_t)srcOne[x] + (int32_t)srcTwo[x] + 1) >>
5815 : IFShiftAvcStyle));
5816 : }
5817 : srcOne += srcOneStride;
5818 : srcTwo += srcTwoStride;
5819 : dst += dst_stride;
5820 : }
5821 :
5822 : return;
5823 : }
5824 : #endif
5825 : /*******************************************
5826 : * InterpolateSearchRegion AVC
5827 : * interpolates the search area
5828 : * the whole search area is interpolated 15 times
5829 : * for each sub position an interpolation is done
5830 : * 15 buffers are required for the storage of the interpolated samples.
5831 : * F0: {-4, 54, 16, -2}
5832 : * F1: {-4, 36, 36, -4}
5833 : * F2: {-2, 16, 54, -4}
5834 : ********************************************/
5835 0 : void InterpolateSearchRegionAVC(
5836 : MeContext *context_ptr, // input/output parameter, ME context ptr, used to
5837 : // get/set interpolated search area Ptr
5838 : uint32_t listIndex, // Refrence picture list index
5839 : uint32_t ref_pic_index,
5840 : uint8_t *searchRegionBuffer, // input parameter, search region index, used
5841 : // to point to reference samples
5842 : uint32_t lumaStride, // input parameter, reference Picture stride
5843 : uint32_t search_area_width, // input parameter, search area width
5844 : uint32_t search_area_height, // input parameter, search area height
5845 : uint32_t inputBitDepth) // input parameter, input sample bit depth
5846 : {
5847 : // 0 1 2 3
5848 : // 0 A a b c
5849 : // 1 d e f g
5850 : // 2 h i j k
5851 : // 3 n p q r
5852 :
5853 : // Position Frac-pos Y Frac-pos X Horizontal filter Vertical filter
5854 : // A 0 0 - -
5855 : // a 0 1 F0 -
5856 : // b 0 2 F1 -
5857 : // c 0 3 F2 -
5858 : // d 1 0 - F0
5859 : // e 1 1 F0 F0
5860 : // f 1 2 F1 F0
5861 : // g 1 3 F2 F0
5862 : // h 2 0 - F1
5863 : // i 2 1 F0 F1
5864 : // j 2 2 F1 F1
5865 : // k 2 3 F2 F1
5866 : // n 3 0 - F2
5867 : // p 3 1 F0 F2
5868 : // q 3 2 F1 F2
5869 : // r 3 3 F2 F2
5870 :
5871 : // Start a b c
5872 :
5873 : // The Search area needs to be a multiple of 8 to align with the ASM kernel
5874 : // Also the search area must be oversized by 2 to account for edge
5875 : // conditions
5876 0 : uint32_t searchAreaWidthForAsm = ROUND_UP_MUL_8(search_area_width + 2);
5877 :
5878 : #ifdef AVCCODEL
5879 :
5880 : (void)inputBitDepth;
5881 : // Half pel interpolation of the search region using f1 -> pos_b_buffer
5882 0 : if (searchAreaWidthForAsm) {
5883 0 : avc_style_luma_interpolation_filter(
5884 0 : searchRegionBuffer - (ME_FILTER_TAP >> 1) * lumaStride -
5885 0 : (ME_FILTER_TAP >> 1) + 1,
5886 : lumaStride,
5887 : context_ptr->pos_b_buffer[listIndex][ref_pic_index],
5888 : context_ptr->interpolated_stride,
5889 : searchAreaWidthForAsm,
5890 : search_area_height + ME_FILTER_TAP,
5891 : context_ptr->avctemp_buffer,
5892 : EB_FALSE,
5893 : 2,
5894 : 2);
5895 : }
5896 :
5897 : // Half pel interpolation of the search region using f1 -> pos_h_buffer
5898 0 : if (searchAreaWidthForAsm) {
5899 0 : avc_style_luma_interpolation_filter(
5900 0 : searchRegionBuffer - (ME_FILTER_TAP >> 1) * lumaStride - 1 +
5901 : lumaStride,
5902 : lumaStride,
5903 : context_ptr->pos_h_buffer[listIndex][ref_pic_index],
5904 : context_ptr->interpolated_stride,
5905 : searchAreaWidthForAsm,
5906 : search_area_height + 1,
5907 : context_ptr->avctemp_buffer,
5908 : EB_FALSE,
5909 : 2,
5910 : 8);
5911 : }
5912 :
5913 0 : if (searchAreaWidthForAsm) {
5914 : // Half pel interpolation of the search region using f1 -> pos_j_buffer
5915 0 : avc_style_luma_interpolation_filter(
5916 0 : context_ptr->pos_b_buffer[listIndex][ref_pic_index] +
5917 0 : context_ptr->interpolated_stride,
5918 : context_ptr->interpolated_stride,
5919 : context_ptr->pos_j_buffer[listIndex][ref_pic_index],
5920 : context_ptr->interpolated_stride,
5921 : searchAreaWidthForAsm,
5922 : search_area_height + 1,
5923 : context_ptr->avctemp_buffer,
5924 : EB_FALSE,
5925 : 2,
5926 : 8);
5927 : }
5928 :
5929 : #else
5930 :
5931 : // Half pel interpolation of the search region using f1 -> pos_b_buffer
5932 : HorizontalPelInterpolation(searchRegionBuffer -
5933 : (ME_FILTER_TAP >> 1) * lumaStride -
5934 : (ME_FILTER_TAP >> 1),
5935 : lumaStride,
5936 : search_area_width + 1,
5937 : search_area_height + ME_FILTER_TAP,
5938 : &(me_if_coeff[F1][0]),
5939 : inputBitDepth,
5940 : context_ptr->interpolated_stride,
5941 : context_ptr->pos_b_buffer);
5942 :
5943 : // Half pel interpolation of the search region using f1 -> pos_h_buffer
5944 : VerticalPelInterpolation(
5945 : searchRegionBuffer - (ME_FILTER_TAP >> 1) * lumaStride - 1,
5946 : lumaStride,
5947 : search_area_width + 2,
5948 : search_area_height + 1,
5949 : &(me_if_coeff[F1][0]),
5950 : inputBitDepth,
5951 : context_ptr->interpolated_stride,
5952 : context_ptr->pos_h_buffer);
5953 :
5954 : // Half pel interpolation of the search region using f1 -> pos_j_buffer
5955 : VerticalPelInterpolation(context_ptr->pos_b_buffer,
5956 : context_ptr->interpolated_stride,
5957 : search_area_width + 1,
5958 : search_area_height + 1,
5959 : &(me_if_coeff[F1][0]),
5960 : inputBitDepth,
5961 : context_ptr->interpolated_stride,
5962 : context_ptr->pos_j_buffer);
5963 :
5964 : #endif
5965 :
5966 0 : return;
5967 : }
5968 :
5969 : /*******************************************
5970 : * InterpolateSearchRegion AVC
5971 : * interpolates the search area
5972 : * the whole search area is interpolated 15 times
5973 : * for each sub position an interpolation is done
5974 : * 15 buffers are required for the storage of the interpolated samples.
5975 : * F0: {-4, 54, 16, -2}
5976 : * F1: {-4, 36, 36, -4}
5977 : * F2: {-2, 16, 54, -4}
5978 : ********************************************/
5979 0 : void interpolate_search_region_AVC_chroma(
5980 : MeContext *context_ptr, // input/output parameter, ME context ptr, used to
5981 : // get/set interpolated search area Ptr
5982 : uint8_t *search_region_buffer_cb, // input parameter, search region buffer
5983 : // cb, used to point to reference samples
5984 : uint8_t *search_region_buffer_cr, // input parameter, search region buffer
5985 : // cr, used to point to reference samples
5986 : uint8_t **pos_b_buffer_ch, uint8_t **pos_h_buffer_ch,
5987 : uint8_t **pos_j_buffer_ch, uint32_t interpolated_stride_ch,
5988 : uint32_t interpolated_full_stride_ch, // input parameter, reference Picture
5989 : // stride
5990 : uint32_t search_area_width, // input parameter, search area width
5991 : uint32_t search_area_height, // input parameter, search area height
5992 : uint32_t input_bit_depth) // input parameter, input sample bit depth
5993 : {
5994 : // 0 1 2 3
5995 : // 0 A a b c
5996 : // 1 d e f g
5997 : // 2 h i j k
5998 : // 3 n p q r
5999 :
6000 : // Position Frac-pos Y Frac-pos X Horizontal filter Vertical filter
6001 : // A 0 0 - -
6002 : // a 0 1 F0 -
6003 : // b 0 2 F1 -
6004 : // c 0 3 F2 -
6005 : // d 1 0 - F0
6006 : // e 1 1 F0 F0
6007 : // f 1 2 F1 F0
6008 : // g 1 3 F2 F0
6009 : // h 2 0 - F1
6010 : // i 2 1 F0 F1
6011 : // j 2 2 F1 F1
6012 : // k 2 3 F2 F1
6013 : // n 3 0 - F2
6014 : // p 3 1 F0 F2
6015 : // q 3 2 F1 F2
6016 : // r 3 3 F2 F2
6017 :
6018 : // Start a b c
6019 :
6020 : // The Search area needs to be a multiple of 8 to align with the ASM kernel
6021 : // Also the search area must be oversized by 2 to account for edge
6022 : // conditions
6023 0 : uint32_t searchAreaWidthForAsm = ROUND_UP_MUL_8(search_area_width + 2);
6024 :
6025 : (void)input_bit_depth;
6026 : // Half pel interpolation of the search region using f1 -> pos_b_buffer
6027 0 : if (searchAreaWidthForAsm) {
6028 : // Cb
6029 0 : avc_style_luma_interpolation_filter(
6030 : search_region_buffer_cb -
6031 0 : (ME_FILTER_TAP >> 1) * interpolated_full_stride_ch -
6032 0 : (ME_FILTER_TAP >> 1) + 1,
6033 : interpolated_full_stride_ch,
6034 : pos_b_buffer_ch[0],
6035 : interpolated_stride_ch,
6036 : searchAreaWidthForAsm,
6037 : search_area_height + ME_FILTER_TAP,
6038 : context_ptr->avctemp_buffer,
6039 : EB_FALSE,
6040 : 2,
6041 : 2);
6042 : // Cr
6043 0 : avc_style_luma_interpolation_filter(
6044 : search_region_buffer_cr -
6045 0 : (ME_FILTER_TAP >> 1) * interpolated_full_stride_ch -
6046 0 : (ME_FILTER_TAP >> 1) + 1,
6047 : interpolated_full_stride_ch,
6048 0 : pos_b_buffer_ch[1],
6049 : interpolated_stride_ch,
6050 : searchAreaWidthForAsm,
6051 : search_area_height + ME_FILTER_TAP,
6052 : context_ptr->avctemp_buffer,
6053 : EB_FALSE,
6054 : 2,
6055 : 2);
6056 : }
6057 :
6058 : // Half pel interpolation of the search region using f1 -> pos_h_buffer
6059 0 : if (searchAreaWidthForAsm) {
6060 : // Cb
6061 0 : avc_style_luma_interpolation_filter(
6062 : search_region_buffer_cb -
6063 0 : (ME_FILTER_TAP >> 1) * interpolated_full_stride_ch - 1 +
6064 : interpolated_full_stride_ch,
6065 : interpolated_full_stride_ch,
6066 : pos_h_buffer_ch[0],
6067 : interpolated_stride_ch,
6068 : searchAreaWidthForAsm,
6069 : search_area_height + 1,
6070 : context_ptr->avctemp_buffer,
6071 : EB_FALSE,
6072 : 2,
6073 : 8);
6074 : // Cr
6075 0 : avc_style_luma_interpolation_filter(
6076 : search_region_buffer_cr -
6077 0 : (ME_FILTER_TAP >> 1) * interpolated_full_stride_ch - 1 +
6078 : interpolated_full_stride_ch,
6079 : interpolated_full_stride_ch,
6080 0 : pos_h_buffer_ch[1],
6081 : interpolated_stride_ch,
6082 : searchAreaWidthForAsm,
6083 : search_area_height + 1,
6084 : context_ptr->avctemp_buffer,
6085 : EB_FALSE,
6086 : 2,
6087 : 8);
6088 : }
6089 :
6090 : // Half pel interpolation of the search region using f1 -> pos_j_buffer
6091 0 : if (searchAreaWidthForAsm) {
6092 : // Cb
6093 0 : avc_style_luma_interpolation_filter(
6094 0 : pos_b_buffer_ch[0] + interpolated_stride_ch,
6095 : interpolated_stride_ch,
6096 : pos_j_buffer_ch[0],
6097 : interpolated_stride_ch,
6098 : searchAreaWidthForAsm,
6099 : search_area_height + 1,
6100 : context_ptr->avctemp_buffer,
6101 : EB_FALSE,
6102 : 2,
6103 : 8);
6104 : // Cr
6105 0 : avc_style_luma_interpolation_filter(
6106 0 : pos_b_buffer_ch[1] + interpolated_stride_ch,
6107 : interpolated_stride_ch,
6108 0 : pos_j_buffer_ch[1],
6109 : interpolated_stride_ch,
6110 : searchAreaWidthForAsm,
6111 : search_area_height + 1,
6112 : context_ptr->avctemp_buffer,
6113 : EB_FALSE,
6114 : 2,
6115 : 8);
6116 : }
6117 0 : }
6118 :
6119 : /*******************************************
6120 : * PU_HalfPelRefinement
6121 : * performs Half Pel refinement for one PU
6122 : *******************************************/
6123 0 : static void PU_HalfPelRefinement(
6124 : SequenceControlSet
6125 : *sequence_control_set_ptr, // input parameter, Sequence control set Ptr
6126 : MeContext
6127 : *context_ptr, // input parameter, ME context Ptr, used to get SB Ptr
6128 : uint8_t *refBuffer, uint32_t ref_stride, uint32_t *pBestSsd,
6129 : uint32_t puLcuBufferIndex, // input parameter, PU origin, used to point to
6130 : // source samples
6131 : uint8_t *pos_b_buffer, // input parameter, position "b" interpolated search
6132 : // area Ptr
6133 : uint8_t *pos_h_buffer, // input parameter, position "h" interpolated search
6134 : // area Ptr
6135 : uint8_t *pos_j_buffer, // input parameter, position "j" interpolated search
6136 : // area Ptr
6137 : uint32_t pu_width, // input parameter, PU width
6138 : uint32_t pu_height, // input parameter, PU height
6139 : int16_t x_search_area_origin, // input parameter, search area origin in the
6140 : // horizontal direction, used to point to
6141 : // reference samples
6142 : int16_t y_search_area_origin, // input parameter, search area origin in the
6143 : // vertical direction, used to point to
6144 : // reference samples
6145 : uint32_t *pBestSad, uint32_t *pBestMV,
6146 : uint8_t *psubPelDirection)
6147 : {
6148 0 : EncodeContext *encode_context_ptr =
6149 : sequence_control_set_ptr->encode_context_ptr;
6150 :
6151 : int32_t searchRegionIndex;
6152 0 : uint64_t bestHalfSad = 0;
6153 0 : uint64_t distortionLeftPosition = 0;
6154 0 : uint64_t distortionRightPosition = 0;
6155 0 : uint64_t distortionTopPosition = 0;
6156 0 : uint64_t distortionBottomPosition = 0;
6157 0 : uint64_t distortionTopLeftPosition = 0;
6158 0 : uint64_t distortionTopRightPosition = 0;
6159 0 : uint64_t distortionBottomLeftPosition = 0;
6160 0 : uint64_t distortionBottomRightPosition = 0;
6161 :
6162 : int16_t xMvHalf[8];
6163 : int16_t yMvHalf[8];
6164 :
6165 0 : int16_t x_mv = _MVXT(*pBestMV);
6166 0 : int16_t y_mv = _MVYT(*pBestMV);
6167 0 : int16_t xSearchIndex = (x_mv >> 2) - x_search_area_origin;
6168 0 : int16_t ySearchIndex = (y_mv >> 2) - y_search_area_origin;
6169 :
6170 : (void)sequence_control_set_ptr;
6171 : (void)encode_context_ptr;
6172 :
6173 : // TODO : remove these, and update the MV by just shifts
6174 :
6175 0 : xMvHalf[0] = x_mv - 2; // L position
6176 0 : xMvHalf[1] = x_mv + 2; // R position
6177 0 : xMvHalf[2] = x_mv; // T position
6178 0 : xMvHalf[3] = x_mv; // B position
6179 0 : xMvHalf[4] = x_mv - 2; // TL position
6180 0 : xMvHalf[5] = x_mv + 2; // TR position
6181 0 : xMvHalf[6] = x_mv + 2; // BR position
6182 0 : xMvHalf[7] = x_mv - 2; // BL position
6183 :
6184 0 : yMvHalf[0] = y_mv; // L position
6185 0 : yMvHalf[1] = y_mv; // R position
6186 0 : yMvHalf[2] = y_mv - 2; // T position
6187 0 : yMvHalf[3] = y_mv + 2; // B position
6188 0 : yMvHalf[4] = y_mv - 2; // TL position
6189 0 : yMvHalf[5] = y_mv - 2; // TR position
6190 0 : yMvHalf[6] = y_mv + 2; // BR position
6191 0 : yMvHalf[7] = y_mv + 2; // BL position
6192 :
6193 : // Compute SSD for the best full search candidate
6194 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
6195 0 : *pBestSsd = (uint32_t)spatial_full_distortion_kernel(
6196 : context_ptr->sb_src_ptr,
6197 : puLcuBufferIndex,
6198 : context_ptr->sb_src_stride,
6199 : refBuffer,
6200 0 : ySearchIndex * ref_stride + xSearchIndex,
6201 : ref_stride,
6202 : pu_width,
6203 : pu_height);
6204 : }
6205 : // Use SATD only when QP mod, and RC are OFF
6206 : // QP mod, and RC assume that ME distotion is always SAD.
6207 : // This problem might be solved by computing SAD for the best position after
6208 : // fractional search is done, or by considring the full pel resolution SAD.
6209 : {
6210 : // L position
6211 0 : searchRegionIndex =
6212 0 : xSearchIndex +
6213 0 : (int16_t)context_ptr->interpolated_stride * ySearchIndex;
6214 0 : distortionLeftPosition =
6215 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
6216 0 : ? spatial_full_distortion_kernel(
6217 : context_ptr->sb_src_ptr,
6218 : puLcuBufferIndex,
6219 : context_ptr->sb_src_stride,
6220 : pos_b_buffer,
6221 : searchRegionIndex,
6222 : context_ptr->interpolated_stride,
6223 : pu_width,
6224 : pu_height)
6225 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
6226 0 : ? (nxm_sad_kernel(
6227 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6228 0 : context_ptr->sb_src_stride << 1,
6229 0 : &(pos_b_buffer[searchRegionIndex]),
6230 0 : context_ptr->interpolated_stride << 1,
6231 : pu_height >> 1,
6232 : pu_width))
6233 0 : << 1
6234 0 : : nxm_sad_kernel(
6235 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6236 : context_ptr->sb_src_stride,
6237 0 : &(pos_b_buffer[searchRegionIndex]),
6238 : context_ptr->interpolated_stride,
6239 : pu_height,
6240 : pu_width);
6241 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
6242 0 : if (distortionLeftPosition < *pBestSsd) {
6243 0 : *pBestSad = (uint32_t)
6244 0 : nxm_sad_kernel(
6245 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6246 : context_ptr->sb_src_stride,
6247 0 : &(pos_b_buffer[searchRegionIndex]),
6248 : context_ptr->interpolated_stride,
6249 : pu_height,
6250 : pu_width);
6251 0 : *pBestMV =
6252 0 : ((uint16_t)yMvHalf[0] << 16) | ((uint16_t)xMvHalf[0]);
6253 0 : *pBestSsd = (uint32_t)distortionLeftPosition;
6254 : }
6255 : } else {
6256 0 : if (distortionLeftPosition < *pBestSad) {
6257 0 : *pBestSad = (uint32_t)distortionLeftPosition;
6258 0 : *pBestMV =
6259 0 : ((uint16_t)yMvHalf[0] << 16) | ((uint16_t)xMvHalf[0]);
6260 : }
6261 : }
6262 : // R position
6263 0 : searchRegionIndex++;
6264 0 : distortionRightPosition =
6265 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
6266 0 : ? spatial_full_distortion_kernel(
6267 : context_ptr->sb_src_ptr,
6268 : puLcuBufferIndex,
6269 : context_ptr->sb_src_stride,
6270 : pos_b_buffer,
6271 : searchRegionIndex,
6272 : context_ptr->interpolated_stride,
6273 : pu_width,
6274 : pu_height)
6275 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
6276 0 : ? (nxm_sad_kernel(
6277 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6278 0 : context_ptr->sb_src_stride << 1,
6279 0 : &(pos_b_buffer[searchRegionIndex]),
6280 0 : context_ptr->interpolated_stride << 1,
6281 : pu_height >> 1,
6282 : pu_width))
6283 0 : << 1
6284 0 : : nxm_sad_kernel(
6285 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6286 : context_ptr->sb_src_stride,
6287 0 : &(pos_b_buffer[searchRegionIndex]),
6288 : context_ptr->interpolated_stride,
6289 : pu_height,
6290 : pu_width);
6291 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
6292 0 : if (distortionRightPosition < *pBestSsd) {
6293 0 : *pBestSad = (uint32_t)
6294 0 : nxm_sad_kernel(
6295 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6296 : context_ptr->sb_src_stride,
6297 0 : &(pos_b_buffer[searchRegionIndex]),
6298 : context_ptr->interpolated_stride,
6299 : pu_height,
6300 : pu_width);
6301 0 : *pBestMV =
6302 0 : ((uint16_t)yMvHalf[1] << 16) | ((uint16_t)xMvHalf[1]);
6303 0 : *pBestSsd = (uint32_t)distortionRightPosition;
6304 : }
6305 : } else {
6306 0 : if (distortionRightPosition < *pBestSad) {
6307 0 : *pBestSad = (uint32_t)distortionRightPosition;
6308 0 : *pBestMV =
6309 0 : ((uint16_t)yMvHalf[1] << 16) | ((uint16_t)xMvHalf[1]);
6310 : }
6311 : }
6312 : // T position
6313 0 : searchRegionIndex =
6314 0 : xSearchIndex +
6315 0 : (int16_t)context_ptr->interpolated_stride * ySearchIndex;
6316 0 : distortionTopPosition =
6317 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
6318 0 : ? spatial_full_distortion_kernel(
6319 : context_ptr->sb_src_ptr,
6320 : puLcuBufferIndex,
6321 : context_ptr->sb_src_stride,
6322 : pos_h_buffer,
6323 : searchRegionIndex,
6324 : context_ptr->interpolated_stride,
6325 : pu_width,
6326 : pu_height)
6327 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
6328 0 : ? (nxm_sad_kernel(
6329 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6330 0 : context_ptr->sb_src_stride << 1,
6331 0 : &(pos_h_buffer[searchRegionIndex]),
6332 0 : context_ptr->interpolated_stride << 1,
6333 : pu_height >> 1,
6334 : pu_width))
6335 0 : << 1
6336 0 : : nxm_sad_kernel(
6337 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6338 : context_ptr->sb_src_stride,
6339 0 : &(pos_h_buffer[searchRegionIndex]),
6340 : context_ptr->interpolated_stride,
6341 : pu_height,
6342 : pu_width);
6343 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
6344 0 : if (distortionTopPosition < *pBestSsd) {
6345 0 : *pBestSad = (uint32_t)
6346 0 : nxm_sad_kernel(
6347 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6348 : context_ptr->sb_src_stride,
6349 0 : &(pos_h_buffer[searchRegionIndex]),
6350 : context_ptr->interpolated_stride,
6351 : pu_height,
6352 : pu_width);
6353 0 : *pBestMV =
6354 0 : ((uint16_t)yMvHalf[2] << 16) | ((uint16_t)xMvHalf[2]);
6355 0 : *pBestSsd = (uint32_t)distortionTopPosition;
6356 : }
6357 : } else {
6358 0 : if (distortionTopPosition < *pBestSad) {
6359 0 : *pBestSad = (uint32_t)distortionTopPosition;
6360 0 : *pBestMV =
6361 0 : ((uint16_t)yMvHalf[2] << 16) | ((uint16_t)xMvHalf[2]);
6362 : }
6363 : }
6364 :
6365 : // B position
6366 0 : searchRegionIndex += (int16_t)context_ptr->interpolated_stride;
6367 0 : distortionBottomPosition =
6368 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
6369 0 : ? spatial_full_distortion_kernel(
6370 : context_ptr->sb_src_ptr,
6371 : puLcuBufferIndex,
6372 : context_ptr->sb_src_stride,
6373 : pos_h_buffer,
6374 : searchRegionIndex,
6375 : context_ptr->interpolated_stride,
6376 : pu_width,
6377 : pu_height)
6378 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
6379 0 : ? (nxm_sad_kernel(
6380 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6381 0 : context_ptr->sb_src_stride << 1,
6382 0 : &(pos_h_buffer[searchRegionIndex]),
6383 0 : context_ptr->interpolated_stride << 1,
6384 : pu_height >> 1,
6385 : pu_width))
6386 0 : << 1
6387 0 : : nxm_sad_kernel(
6388 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6389 : context_ptr->sb_src_stride,
6390 0 : &(pos_h_buffer[searchRegionIndex]),
6391 : context_ptr->interpolated_stride,
6392 : pu_height,
6393 : pu_width);
6394 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
6395 0 : if (distortionBottomPosition < *pBestSsd) {
6396 0 : *pBestSad = (uint32_t)
6397 0 : nxm_sad_kernel(
6398 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6399 : context_ptr->sb_src_stride,
6400 0 : &(pos_h_buffer[searchRegionIndex]),
6401 : context_ptr->interpolated_stride,
6402 : pu_height,
6403 : pu_width);
6404 0 : *pBestMV =
6405 0 : ((uint16_t)yMvHalf[3] << 16) | ((uint16_t)xMvHalf[3]);
6406 0 : *pBestSsd = (uint32_t)distortionBottomPosition;
6407 : }
6408 : } else {
6409 0 : if (distortionBottomPosition < *pBestSad) {
6410 0 : *pBestSad = (uint32_t)distortionBottomPosition;
6411 0 : *pBestMV =
6412 0 : ((uint16_t)yMvHalf[3] << 16) | ((uint16_t)xMvHalf[3]);
6413 : }
6414 : }
6415 :
6416 : // TL position
6417 0 : searchRegionIndex =
6418 0 : xSearchIndex +
6419 0 : (int16_t)context_ptr->interpolated_stride * ySearchIndex;
6420 0 : distortionTopLeftPosition =
6421 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
6422 0 : ? spatial_full_distortion_kernel(
6423 : context_ptr->sb_src_ptr,
6424 : puLcuBufferIndex,
6425 : context_ptr->sb_src_stride,
6426 : pos_j_buffer,
6427 : searchRegionIndex,
6428 : context_ptr->interpolated_stride,
6429 : pu_width,
6430 : pu_height)
6431 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
6432 0 : ? (nxm_sad_kernel(
6433 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6434 0 : context_ptr->sb_src_stride << 1,
6435 0 : &(pos_j_buffer[searchRegionIndex]),
6436 0 : context_ptr->interpolated_stride << 1,
6437 : pu_height >> 1,
6438 : pu_width))
6439 0 : << 1
6440 0 : : nxm_sad_kernel(
6441 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6442 : context_ptr->sb_src_stride,
6443 0 : &(pos_j_buffer[searchRegionIndex]),
6444 : context_ptr->interpolated_stride,
6445 : pu_height,
6446 : pu_width);
6447 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
6448 0 : if (distortionTopLeftPosition < *pBestSsd) {
6449 0 : *pBestSad = (uint32_t)
6450 0 : nxm_sad_kernel(
6451 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6452 : context_ptr->sb_src_stride,
6453 0 : &(pos_j_buffer[searchRegionIndex]),
6454 : context_ptr->interpolated_stride,
6455 : pu_height,
6456 : pu_width);
6457 0 : *pBestMV =
6458 0 : ((uint16_t)yMvHalf[4] << 16) | ((uint16_t)xMvHalf[4]);
6459 0 : *pBestSsd = (uint32_t)distortionTopLeftPosition;
6460 : }
6461 : } else {
6462 0 : if (distortionTopLeftPosition < *pBestSad) {
6463 0 : *pBestSad = (uint32_t)distortionTopLeftPosition;
6464 0 : *pBestMV =
6465 0 : ((uint16_t)yMvHalf[4] << 16) | ((uint16_t)xMvHalf[4]);
6466 : }
6467 : }
6468 :
6469 : // TR position
6470 0 : searchRegionIndex++;
6471 0 : distortionTopRightPosition =
6472 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
6473 0 : ? spatial_full_distortion_kernel(
6474 : context_ptr->sb_src_ptr,
6475 : puLcuBufferIndex,
6476 : context_ptr->sb_src_stride,
6477 : pos_j_buffer,
6478 : searchRegionIndex,
6479 : context_ptr->interpolated_stride,
6480 : pu_width,
6481 : pu_height)
6482 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
6483 0 : ? (nxm_sad_kernel(
6484 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6485 0 : context_ptr->sb_src_stride << 1,
6486 0 : &(pos_j_buffer[searchRegionIndex]),
6487 0 : context_ptr->interpolated_stride << 1,
6488 : pu_height >> 1,
6489 : pu_width))
6490 0 : << 1
6491 0 : : nxm_sad_kernel(
6492 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6493 : context_ptr->sb_src_stride,
6494 0 : &(pos_j_buffer[searchRegionIndex]),
6495 : context_ptr->interpolated_stride,
6496 : pu_height,
6497 : pu_width);
6498 :
6499 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
6500 0 : if (distortionTopRightPosition < *pBestSsd) {
6501 0 : *pBestSad = (uint32_t)
6502 0 : nxm_sad_kernel(
6503 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6504 : context_ptr->sb_src_stride,
6505 0 : &(pos_j_buffer[searchRegionIndex]),
6506 : context_ptr->interpolated_stride,
6507 : pu_height,
6508 : pu_width);
6509 0 : *pBestMV =
6510 0 : ((uint16_t)yMvHalf[5] << 16) | ((uint16_t)xMvHalf[5]);
6511 0 : *pBestSsd = (uint32_t)distortionTopRightPosition;
6512 : }
6513 : } else {
6514 0 : if (distortionTopRightPosition < *pBestSad) {
6515 0 : *pBestSad = (uint32_t)distortionTopRightPosition;
6516 0 : *pBestMV =
6517 0 : ((uint16_t)yMvHalf[5] << 16) | ((uint16_t)xMvHalf[5]);
6518 : }
6519 : }
6520 :
6521 : // BR position
6522 0 : searchRegionIndex += (int16_t)context_ptr->interpolated_stride;
6523 0 : distortionBottomRightPosition =
6524 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
6525 0 : ? spatial_full_distortion_kernel(
6526 : context_ptr->sb_src_ptr,
6527 : puLcuBufferIndex,
6528 : context_ptr->sb_src_stride,
6529 : pos_j_buffer,
6530 : searchRegionIndex,
6531 : context_ptr->interpolated_stride,
6532 : pu_width,
6533 : pu_height)
6534 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
6535 0 : ? (nxm_sad_kernel(
6536 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6537 0 : context_ptr->sb_src_stride << 1,
6538 0 : &(pos_j_buffer[searchRegionIndex]),
6539 0 : context_ptr->interpolated_stride << 1,
6540 : pu_height >> 1,
6541 : pu_width))
6542 0 : << 1
6543 0 : : nxm_sad_kernel(
6544 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6545 : context_ptr->sb_src_stride,
6546 0 : &(pos_j_buffer[searchRegionIndex]),
6547 : context_ptr->interpolated_stride,
6548 : pu_height,
6549 : pu_width);
6550 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
6551 0 : if (distortionBottomRightPosition < *pBestSsd) {
6552 0 : *pBestSad = (uint32_t)
6553 0 : nxm_sad_kernel(
6554 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6555 : context_ptr->sb_src_stride,
6556 0 : &(pos_j_buffer[searchRegionIndex]),
6557 : context_ptr->interpolated_stride,
6558 : pu_height,
6559 : pu_width);
6560 0 : *pBestMV =
6561 0 : ((uint16_t)yMvHalf[6] << 16) | ((uint16_t)xMvHalf[6]);
6562 0 : *pBestSsd = (uint32_t)distortionBottomRightPosition;
6563 : }
6564 : } else {
6565 0 : if (distortionBottomRightPosition < *pBestSad) {
6566 0 : *pBestSad = (uint32_t)distortionBottomRightPosition;
6567 0 : *pBestMV =
6568 0 : ((uint16_t)yMvHalf[6] << 16) | ((uint16_t)xMvHalf[6]);
6569 : }
6570 : }
6571 :
6572 : // BL position
6573 0 : searchRegionIndex--;
6574 0 : distortionBottomLeftPosition =
6575 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
6576 0 : ? spatial_full_distortion_kernel(
6577 : context_ptr->sb_src_ptr,
6578 : puLcuBufferIndex,
6579 : context_ptr->sb_src_stride,
6580 : pos_j_buffer,
6581 : searchRegionIndex,
6582 : context_ptr->interpolated_stride,
6583 : pu_width,
6584 : pu_height)
6585 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
6586 0 : ? (nxm_sad_kernel(
6587 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6588 0 : context_ptr->sb_src_stride << 1,
6589 0 : &(pos_j_buffer[searchRegionIndex]),
6590 0 : context_ptr->interpolated_stride << 1,
6591 : pu_height >> 1,
6592 : pu_width))
6593 0 : << 1
6594 0 : : (nxm_sad_kernel(
6595 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6596 : context_ptr->sb_src_stride,
6597 0 : &(pos_j_buffer[searchRegionIndex]),
6598 : context_ptr->interpolated_stride,
6599 : pu_height,
6600 : pu_width));
6601 :
6602 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
6603 0 : if (distortionBottomLeftPosition < *pBestSsd) {
6604 0 : *pBestSad = (uint32_t)(
6605 0 : nxm_sad_kernel(
6606 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
6607 : context_ptr->sb_src_stride,
6608 0 : &(pos_j_buffer[searchRegionIndex]),
6609 : context_ptr->interpolated_stride,
6610 : pu_height,
6611 : pu_width));
6612 0 : *pBestMV =
6613 0 : ((uint16_t)yMvHalf[7] << 16) | ((uint16_t)xMvHalf[7]);
6614 0 : *pBestSsd = (uint32_t)distortionBottomLeftPosition;
6615 : }
6616 : } else {
6617 0 : if (distortionBottomLeftPosition < *pBestSad) {
6618 0 : *pBestSad = (uint32_t)distortionBottomLeftPosition;
6619 0 : *pBestMV =
6620 0 : ((uint16_t)yMvHalf[7] << 16) | ((uint16_t)xMvHalf[7]);
6621 : }
6622 : }
6623 : }
6624 :
6625 0 : bestHalfSad =
6626 0 : MIN(distortionLeftPosition,
6627 : MIN(distortionRightPosition,
6628 : MIN(distortionTopPosition,
6629 : MIN(distortionBottomPosition,
6630 : MIN(distortionTopLeftPosition,
6631 : MIN(distortionTopRightPosition,
6632 : MIN(distortionBottomLeftPosition,
6633 : distortionBottomRightPosition)))))));
6634 :
6635 0 : if (bestHalfSad == distortionLeftPosition)
6636 0 : *psubPelDirection = LEFT_POSITION;
6637 0 : else if (bestHalfSad == distortionRightPosition)
6638 0 : *psubPelDirection = RIGHT_POSITION;
6639 0 : else if (bestHalfSad == distortionTopPosition)
6640 0 : *psubPelDirection = TOP_POSITION;
6641 0 : else if (bestHalfSad == distortionBottomPosition)
6642 0 : *psubPelDirection = BOTTOM_POSITION;
6643 0 : else if (bestHalfSad == distortionTopLeftPosition)
6644 0 : *psubPelDirection = TOP_LEFT_POSITION;
6645 0 : else if (bestHalfSad == distortionTopRightPosition)
6646 0 : *psubPelDirection = TOP_RIGHT_POSITION;
6647 0 : else if (bestHalfSad == distortionBottomLeftPosition)
6648 0 : *psubPelDirection = BOTTOM_LEFT_POSITION;
6649 0 : else if (bestHalfSad == distortionBottomRightPosition)
6650 0 : *psubPelDirection = BOTTOM_RIGHT_POSITION;
6651 0 : return;
6652 : }
6653 :
6654 : /*******************************************
6655 : * HalfPelSearch_LCU
6656 : * performs Half Pel refinement for the 85 PUs
6657 : *******************************************/
6658 0 : void HalfPelSearch_LCU(
6659 : SequenceControlSet
6660 : *sequence_control_set_ptr, // input parameter, Sequence control set Ptr
6661 : PictureParentControlSet *picture_control_set_ptr,
6662 : MeContext *context_ptr, // input/output parameter, ME context Ptr, used to
6663 : // get/update ME results
6664 : uint8_t *refBuffer, uint32_t ref_stride,
6665 : uint8_t *pos_b_buffer, // input parameter, position "b" interpolated search
6666 : // area Ptr
6667 : uint8_t *pos_h_buffer, // input parameter, position "h" interpolated search
6668 : // area Ptr
6669 : uint8_t *pos_j_buffer, // input parameter, position "j" interpolated search
6670 : // area Ptr
6671 : int16_t x_search_area_origin, // input parameter, search area origin in the
6672 : // horizontal direction, used to point to
6673 : // reference samples
6674 : int16_t y_search_area_origin, // input parameter, search area origin in the
6675 : // vertical direction, used to point to
6676 : // reference samples
6677 : EbBool disable8x8CuInMeFlag, EbBool enableHalfPel32x32,
6678 : EbBool enableHalfPel16x16, EbBool enableHalfPel8x8)
6679 : {
6680 : uint32_t idx;
6681 : uint32_t pu_index;
6682 : uint32_t puShiftXIndex;
6683 : uint32_t puShiftYIndex;
6684 : uint32_t puLcuBufferIndex;
6685 : uint32_t posbBufferIndex;
6686 : uint32_t poshBufferIndex;
6687 : uint32_t posjBufferIndex;
6688 :
6689 0 : if (context_ptr->fractional_search64x64)
6690 0 : PU_HalfPelRefinement(sequence_control_set_ptr,
6691 : context_ptr,
6692 : &(refBuffer[0]),
6693 : ref_stride,
6694 : context_ptr->p_best_ssd64x64,
6695 : 0,
6696 : &(pos_b_buffer[0]),
6697 : &(pos_h_buffer[0]),
6698 : &(pos_j_buffer[0]),
6699 : 64,
6700 : 64,
6701 : x_search_area_origin,
6702 : y_search_area_origin,
6703 : context_ptr->p_best_sad64x64,
6704 : context_ptr->p_best_mv64x64,
6705 : &context_ptr->psub_pel_direction64x64);
6706 :
6707 0 : if (enableHalfPel32x32) {
6708 : // 32x32 [4 partitions]
6709 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
6710 0 : puShiftXIndex = (pu_index & 0x01) << 5;
6711 0 : puShiftYIndex = (pu_index >> 1) << 5;
6712 :
6713 0 : puLcuBufferIndex =
6714 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
6715 0 : posbBufferIndex = puShiftXIndex +
6716 0 : puShiftYIndex * context_ptr->interpolated_stride;
6717 0 : poshBufferIndex = puShiftXIndex +
6718 0 : puShiftYIndex * context_ptr->interpolated_stride;
6719 0 : posjBufferIndex = puShiftXIndex +
6720 0 : puShiftYIndex * context_ptr->interpolated_stride;
6721 :
6722 0 : PU_HalfPelRefinement(
6723 : sequence_control_set_ptr,
6724 : context_ptr,
6725 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
6726 : ref_stride,
6727 0 : &context_ptr->p_best_ssd32x32[pu_index],
6728 : puLcuBufferIndex,
6729 : &(pos_b_buffer[posbBufferIndex]),
6730 : &(pos_h_buffer[poshBufferIndex]),
6731 : &(pos_j_buffer[posjBufferIndex]),
6732 : 32,
6733 : 32,
6734 : x_search_area_origin,
6735 : y_search_area_origin,
6736 0 : &context_ptr->p_best_sad32x32[pu_index],
6737 0 : &context_ptr->p_best_mv32x32[pu_index],
6738 : &context_ptr->psub_pel_direction32x32[pu_index]);
6739 : }
6740 : }
6741 0 : if (enableHalfPel16x16) {
6742 : // 16x16 [16 partitions]
6743 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
6744 0 : idx = tab16x16[pu_index];
6745 :
6746 0 : puShiftXIndex = (pu_index & 0x03) << 4;
6747 0 : puShiftYIndex = (pu_index >> 2) << 4;
6748 :
6749 0 : puLcuBufferIndex =
6750 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
6751 0 : posbBufferIndex = puShiftXIndex +
6752 0 : puShiftYIndex * context_ptr->interpolated_stride;
6753 0 : poshBufferIndex = puShiftXIndex +
6754 0 : puShiftYIndex * context_ptr->interpolated_stride;
6755 0 : posjBufferIndex = puShiftXIndex +
6756 0 : puShiftYIndex * context_ptr->interpolated_stride;
6757 :
6758 0 : PU_HalfPelRefinement(
6759 : sequence_control_set_ptr,
6760 : context_ptr,
6761 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
6762 : ref_stride,
6763 0 : &context_ptr->p_best_ssd16x16[idx],
6764 : puLcuBufferIndex,
6765 : &(pos_b_buffer[posbBufferIndex]),
6766 : &(pos_h_buffer[poshBufferIndex]),
6767 : &(pos_j_buffer[posjBufferIndex]),
6768 : 16,
6769 : 16,
6770 : x_search_area_origin,
6771 : y_search_area_origin,
6772 0 : &context_ptr->p_best_sad16x16[idx],
6773 0 : &context_ptr->p_best_mv16x16[idx],
6774 : &context_ptr->psub_pel_direction16x16[idx]);
6775 : }
6776 : }
6777 0 : if (enableHalfPel8x8) {
6778 : // 8x8 [64 partitions]
6779 0 : if (!disable8x8CuInMeFlag) {
6780 0 : for (pu_index = 0; pu_index < 64; ++pu_index) {
6781 0 : idx = tab8x8[pu_index]; // TODO bitwise this
6782 :
6783 0 : puShiftXIndex = (pu_index & 0x07) << 3;
6784 0 : puShiftYIndex = (pu_index >> 3) << 3;
6785 :
6786 0 : puLcuBufferIndex =
6787 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
6788 :
6789 0 : posbBufferIndex =
6790 : puShiftXIndex +
6791 0 : puShiftYIndex * context_ptr->interpolated_stride;
6792 0 : poshBufferIndex =
6793 : puShiftXIndex +
6794 0 : puShiftYIndex * context_ptr->interpolated_stride;
6795 0 : posjBufferIndex =
6796 : puShiftXIndex +
6797 0 : puShiftYIndex * context_ptr->interpolated_stride;
6798 :
6799 0 : PU_HalfPelRefinement(
6800 : sequence_control_set_ptr,
6801 : context_ptr,
6802 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
6803 : ref_stride,
6804 0 : &context_ptr->p_best_ssd8x8[idx],
6805 : puLcuBufferIndex,
6806 : &(pos_b_buffer[posbBufferIndex]),
6807 : &(pos_h_buffer[poshBufferIndex]),
6808 : &(pos_j_buffer[posjBufferIndex]),
6809 : 8,
6810 : 8,
6811 : x_search_area_origin,
6812 : y_search_area_origin,
6813 0 : &context_ptr->p_best_sad8x8[idx],
6814 0 : &context_ptr->p_best_mv8x8[idx],
6815 : &context_ptr->psub_pel_direction8x8[idx]);
6816 : }
6817 : }
6818 : }
6819 0 : if (picture_control_set_ptr->pic_depth_mode <= PIC_ALL_C_DEPTH_MODE) {
6820 : // 64x32
6821 0 : for (pu_index = 0; pu_index < 2; ++pu_index) {
6822 0 : puShiftXIndex = 0;
6823 0 : puShiftYIndex = pu_index << 5;
6824 :
6825 0 : puLcuBufferIndex =
6826 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
6827 :
6828 0 : posbBufferIndex = puShiftXIndex +
6829 0 : puShiftYIndex * context_ptr->interpolated_stride;
6830 0 : poshBufferIndex = puShiftXIndex +
6831 0 : puShiftYIndex * context_ptr->interpolated_stride;
6832 0 : posjBufferIndex = puShiftXIndex +
6833 0 : puShiftYIndex * context_ptr->interpolated_stride;
6834 :
6835 0 : PU_HalfPelRefinement(
6836 : sequence_control_set_ptr,
6837 : context_ptr,
6838 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
6839 : ref_stride,
6840 0 : &context_ptr->p_best_ssd64x32[pu_index],
6841 : puLcuBufferIndex,
6842 : &(pos_b_buffer[posbBufferIndex]),
6843 : &(pos_h_buffer[poshBufferIndex]),
6844 : &(pos_j_buffer[posjBufferIndex]),
6845 : 64,
6846 : 32,
6847 : x_search_area_origin,
6848 : y_search_area_origin,
6849 0 : &context_ptr->p_best_sad64x32[pu_index],
6850 0 : &context_ptr->p_best_mv64x32[pu_index],
6851 : &context_ptr->psub_pel_direction64x32[pu_index]);
6852 : }
6853 :
6854 : // 32x16
6855 0 : for (pu_index = 0; pu_index < 8; ++pu_index) {
6856 0 : idx = tab32x16[pu_index]; // TODO bitwise this
6857 :
6858 0 : puShiftXIndex = (pu_index & 0x01) << 5;
6859 0 : puShiftYIndex = (pu_index >> 1) << 4;
6860 :
6861 0 : puLcuBufferIndex =
6862 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
6863 :
6864 0 : posbBufferIndex = puShiftXIndex +
6865 0 : puShiftYIndex * context_ptr->interpolated_stride;
6866 0 : poshBufferIndex = puShiftXIndex +
6867 0 : puShiftYIndex * context_ptr->interpolated_stride;
6868 0 : posjBufferIndex = puShiftXIndex +
6869 0 : puShiftYIndex * context_ptr->interpolated_stride;
6870 :
6871 0 : PU_HalfPelRefinement(
6872 : sequence_control_set_ptr,
6873 : context_ptr,
6874 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
6875 : ref_stride,
6876 0 : &context_ptr->p_best_ssd32x16[idx],
6877 : puLcuBufferIndex,
6878 : &(pos_b_buffer[posbBufferIndex]),
6879 : &(pos_h_buffer[poshBufferIndex]),
6880 : &(pos_j_buffer[posjBufferIndex]),
6881 : 32,
6882 : 16,
6883 : x_search_area_origin,
6884 : y_search_area_origin,
6885 0 : &context_ptr->p_best_sad32x16[idx],
6886 0 : &context_ptr->p_best_mv32x16[idx],
6887 : &context_ptr->psub_pel_direction32x16[idx]);
6888 : }
6889 :
6890 : // 16x8
6891 0 : for (pu_index = 0; pu_index < 32; ++pu_index) {
6892 0 : idx = tab16x8[pu_index];
6893 :
6894 0 : puShiftXIndex = (pu_index & 0x03) << 4;
6895 0 : puShiftYIndex = (pu_index >> 2) << 3;
6896 :
6897 0 : puLcuBufferIndex =
6898 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
6899 :
6900 0 : posbBufferIndex = puShiftXIndex +
6901 0 : puShiftYIndex * context_ptr->interpolated_stride;
6902 0 : poshBufferIndex = puShiftXIndex +
6903 0 : puShiftYIndex * context_ptr->interpolated_stride;
6904 0 : posjBufferIndex = puShiftXIndex +
6905 0 : puShiftYIndex * context_ptr->interpolated_stride;
6906 :
6907 0 : PU_HalfPelRefinement(
6908 : sequence_control_set_ptr,
6909 : context_ptr,
6910 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
6911 : ref_stride,
6912 0 : &context_ptr->p_best_ssd16x8[idx],
6913 : puLcuBufferIndex,
6914 : &(pos_b_buffer[posbBufferIndex]),
6915 : &(pos_h_buffer[poshBufferIndex]),
6916 : &(pos_j_buffer[posjBufferIndex]),
6917 : 16,
6918 : 8,
6919 : x_search_area_origin,
6920 : y_search_area_origin,
6921 0 : &context_ptr->p_best_sad16x8[idx],
6922 0 : &context_ptr->p_best_mv16x8[idx],
6923 : &context_ptr->psub_pel_direction16x8[idx]);
6924 : }
6925 :
6926 : // 32x64
6927 0 : for (pu_index = 0; pu_index < 2; ++pu_index) {
6928 0 : puShiftXIndex = pu_index << 5;
6929 0 : puShiftYIndex = 0;
6930 :
6931 0 : puLcuBufferIndex =
6932 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
6933 :
6934 0 : posbBufferIndex = puShiftXIndex +
6935 0 : puShiftYIndex * context_ptr->interpolated_stride;
6936 0 : poshBufferIndex = puShiftXIndex +
6937 0 : puShiftYIndex * context_ptr->interpolated_stride;
6938 0 : posjBufferIndex = puShiftXIndex +
6939 0 : puShiftYIndex * context_ptr->interpolated_stride;
6940 :
6941 0 : PU_HalfPelRefinement(
6942 : sequence_control_set_ptr,
6943 : context_ptr,
6944 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
6945 : ref_stride,
6946 0 : &context_ptr->p_best_ssd32x64[pu_index],
6947 : puLcuBufferIndex,
6948 : &(pos_b_buffer[posbBufferIndex]),
6949 : &(pos_h_buffer[poshBufferIndex]),
6950 : &(pos_j_buffer[posjBufferIndex]),
6951 : 32,
6952 : 64,
6953 : x_search_area_origin,
6954 : y_search_area_origin,
6955 0 : &context_ptr->p_best_sad32x64[pu_index],
6956 0 : &context_ptr->p_best_mv32x64[pu_index],
6957 : &context_ptr->psub_pel_direction32x64[pu_index]);
6958 : }
6959 :
6960 : // 16x32
6961 0 : for (pu_index = 0; pu_index < 8; ++pu_index) {
6962 0 : idx = tab16x32[pu_index];
6963 :
6964 0 : puShiftXIndex = (pu_index & 0x03) << 4;
6965 0 : puShiftYIndex = (pu_index >> 2) << 5;
6966 :
6967 0 : puLcuBufferIndex =
6968 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
6969 :
6970 0 : posbBufferIndex = puShiftXIndex +
6971 0 : puShiftYIndex * context_ptr->interpolated_stride;
6972 0 : poshBufferIndex = puShiftXIndex +
6973 0 : puShiftYIndex * context_ptr->interpolated_stride;
6974 0 : posjBufferIndex = puShiftXIndex +
6975 0 : puShiftYIndex * context_ptr->interpolated_stride;
6976 :
6977 0 : PU_HalfPelRefinement(
6978 : sequence_control_set_ptr,
6979 : context_ptr,
6980 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
6981 : ref_stride,
6982 0 : &context_ptr->p_best_ssd16x32[idx],
6983 : puLcuBufferIndex,
6984 : &(pos_b_buffer[posbBufferIndex]),
6985 : &(pos_h_buffer[poshBufferIndex]),
6986 : &(pos_j_buffer[posjBufferIndex]),
6987 : 16,
6988 : 32,
6989 : x_search_area_origin,
6990 : y_search_area_origin,
6991 0 : &context_ptr->p_best_sad16x32[idx],
6992 0 : &context_ptr->p_best_mv16x32[idx],
6993 : &context_ptr->psub_pel_direction16x32[idx]);
6994 : }
6995 :
6996 : // 8x16
6997 0 : for (pu_index = 0; pu_index < 32; ++pu_index) {
6998 0 : idx = tab8x16[pu_index];
6999 :
7000 0 : puShiftXIndex = (pu_index & 0x07) << 3;
7001 0 : puShiftYIndex = (pu_index >> 3) << 4;
7002 :
7003 0 : puLcuBufferIndex =
7004 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
7005 :
7006 0 : posbBufferIndex = puShiftXIndex +
7007 0 : puShiftYIndex * context_ptr->interpolated_stride;
7008 0 : poshBufferIndex = puShiftXIndex +
7009 0 : puShiftYIndex * context_ptr->interpolated_stride;
7010 0 : posjBufferIndex = puShiftXIndex +
7011 0 : puShiftYIndex * context_ptr->interpolated_stride;
7012 :
7013 0 : PU_HalfPelRefinement(
7014 : sequence_control_set_ptr,
7015 : context_ptr,
7016 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
7017 : ref_stride,
7018 0 : &context_ptr->p_best_ssd8x16[idx],
7019 : puLcuBufferIndex,
7020 : &(pos_b_buffer[posbBufferIndex]),
7021 : &(pos_h_buffer[poshBufferIndex]),
7022 : &(pos_j_buffer[posjBufferIndex]),
7023 : 8,
7024 : 16,
7025 : x_search_area_origin,
7026 : y_search_area_origin,
7027 0 : &context_ptr->p_best_sad8x16[idx],
7028 0 : &context_ptr->p_best_mv8x16[idx],
7029 : &context_ptr->psub_pel_direction8x16[idx]);
7030 : }
7031 :
7032 : // 32x8
7033 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
7034 0 : idx = tab32x8[pu_index];
7035 :
7036 0 : puShiftXIndex = (pu_index & 0x01) << 5;
7037 0 : puShiftYIndex = (pu_index >> 1) << 3;
7038 :
7039 0 : puLcuBufferIndex =
7040 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
7041 :
7042 0 : posbBufferIndex = puShiftXIndex +
7043 0 : puShiftYIndex * context_ptr->interpolated_stride;
7044 0 : poshBufferIndex = puShiftXIndex +
7045 0 : puShiftYIndex * context_ptr->interpolated_stride;
7046 0 : posjBufferIndex = puShiftXIndex +
7047 0 : puShiftYIndex * context_ptr->interpolated_stride;
7048 :
7049 0 : PU_HalfPelRefinement(
7050 : sequence_control_set_ptr,
7051 : context_ptr,
7052 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
7053 : ref_stride,
7054 0 : &context_ptr->p_best_ssd32x8[idx],
7055 : puLcuBufferIndex,
7056 : &(pos_b_buffer[posbBufferIndex]),
7057 : &(pos_h_buffer[poshBufferIndex]),
7058 : &(pos_j_buffer[posjBufferIndex]),
7059 : 32,
7060 : 8,
7061 : x_search_area_origin,
7062 : y_search_area_origin,
7063 0 : &context_ptr->p_best_sad32x8[idx],
7064 0 : &context_ptr->p_best_mv32x8[idx],
7065 : &context_ptr->psub_pel_direction32x8[idx]);
7066 : }
7067 :
7068 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
7069 0 : idx = tab8x32[pu_index];
7070 :
7071 0 : puShiftXIndex = (pu_index & 0x07) << 3;
7072 0 : puShiftYIndex = (pu_index >> 3) << 5;
7073 :
7074 0 : puLcuBufferIndex =
7075 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
7076 0 : posbBufferIndex = puShiftXIndex +
7077 0 : puShiftYIndex * context_ptr->interpolated_stride;
7078 0 : poshBufferIndex = puShiftXIndex +
7079 0 : puShiftYIndex * context_ptr->interpolated_stride;
7080 0 : posjBufferIndex = puShiftXIndex +
7081 0 : puShiftYIndex * context_ptr->interpolated_stride;
7082 :
7083 0 : PU_HalfPelRefinement(
7084 : sequence_control_set_ptr,
7085 : context_ptr,
7086 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
7087 : ref_stride,
7088 0 : &context_ptr->p_best_ssd8x32[idx],
7089 : puLcuBufferIndex,
7090 : &(pos_b_buffer[posbBufferIndex]),
7091 : &(pos_h_buffer[poshBufferIndex]),
7092 : &(pos_j_buffer[posjBufferIndex]),
7093 : 8,
7094 : 32,
7095 : x_search_area_origin,
7096 : y_search_area_origin,
7097 0 : &context_ptr->p_best_sad8x32[idx],
7098 0 : &context_ptr->p_best_mv8x32[idx],
7099 : &context_ptr->psub_pel_direction8x32[idx]);
7100 : }
7101 :
7102 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
7103 0 : idx = pu_index;
7104 :
7105 0 : puShiftXIndex = 0;
7106 0 : puShiftYIndex = pu_index << 4;
7107 :
7108 0 : puLcuBufferIndex =
7109 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
7110 0 : posbBufferIndex = puShiftXIndex +
7111 0 : puShiftYIndex * context_ptr->interpolated_stride;
7112 0 : poshBufferIndex = puShiftXIndex +
7113 0 : puShiftYIndex * context_ptr->interpolated_stride;
7114 0 : posjBufferIndex = puShiftXIndex +
7115 0 : puShiftYIndex * context_ptr->interpolated_stride;
7116 :
7117 0 : PU_HalfPelRefinement(
7118 : sequence_control_set_ptr,
7119 : context_ptr,
7120 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
7121 : ref_stride,
7122 0 : &context_ptr->p_best_ssd64x16[idx],
7123 : puLcuBufferIndex,
7124 : &(pos_b_buffer[posbBufferIndex]),
7125 : &(pos_h_buffer[poshBufferIndex]),
7126 : &(pos_j_buffer[posjBufferIndex]),
7127 : 64,
7128 : 16,
7129 : x_search_area_origin,
7130 : y_search_area_origin,
7131 0 : &context_ptr->p_best_sad64x16[idx],
7132 0 : &context_ptr->p_best_mv64x16[idx],
7133 : &context_ptr->psub_pel_direction64x16[idx]);
7134 : }
7135 :
7136 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
7137 0 : idx = pu_index;
7138 :
7139 0 : puShiftXIndex = pu_index << 4;
7140 0 : puShiftYIndex = 0;
7141 :
7142 0 : puLcuBufferIndex =
7143 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
7144 0 : posbBufferIndex = puShiftXIndex +
7145 0 : puShiftYIndex * context_ptr->interpolated_stride;
7146 0 : poshBufferIndex = puShiftXIndex +
7147 0 : puShiftYIndex * context_ptr->interpolated_stride;
7148 0 : posjBufferIndex = puShiftXIndex +
7149 0 : puShiftYIndex * context_ptr->interpolated_stride;
7150 :
7151 0 : PU_HalfPelRefinement(
7152 : sequence_control_set_ptr,
7153 : context_ptr,
7154 0 : &(refBuffer[puShiftYIndex * ref_stride + puShiftXIndex]),
7155 : ref_stride,
7156 0 : &context_ptr->p_best_ssd16x64[idx],
7157 : puLcuBufferIndex,
7158 : &(pos_b_buffer[posbBufferIndex]),
7159 : &(pos_h_buffer[poshBufferIndex]),
7160 : &(pos_j_buffer[posjBufferIndex]),
7161 : 16,
7162 : 64,
7163 : x_search_area_origin,
7164 : y_search_area_origin,
7165 0 : &context_ptr->p_best_sad16x64[idx],
7166 0 : &context_ptr->p_best_mv16x64[idx],
7167 : &context_ptr->psub_pel_direction16x64[idx]);
7168 : }
7169 : }
7170 :
7171 0 : return;
7172 : }
7173 : /*******************************************
7174 : * combined_averaging_ssd
7175 : *
7176 : *******************************************/
7177 0 : uint32_t combined_averaging_ssd_c(uint8_t *src, ptrdiff_t src_stride,
7178 : uint8_t *ref1, ptrdiff_t ref1_stride,
7179 : uint8_t *ref2, ptrdiff_t ref2_stride,
7180 : uint32_t height, uint32_t width) {
7181 : uint32_t x, y;
7182 0 : uint32_t ssd = 0;
7183 : uint8_t avgpel;
7184 0 : for (y = 0; y < height; y++) {
7185 0 : for (x = 0; x < width; x++) {
7186 0 : avgpel = (ref1[x] + ref2[x] + 1) >> 1;
7187 0 : ssd += SQR((int64_t)(src[x]) - (avgpel));
7188 : }
7189 0 : src += src_stride;
7190 0 : ref1 += ref1_stride;
7191 0 : ref2 += ref2_stride;
7192 : }
7193 0 : return ssd;
7194 : }
7195 : /*******************************************
7196 : * PU_QuarterPelRefinementOnTheFly
7197 : * performs Quarter Pel refinement for each PU
7198 : *******************************************/
7199 0 : static void PU_QuarterPelRefinementOnTheFly(
7200 : MeContext *context_ptr, // [IN] ME context Ptr, used to get SB Ptr
7201 : uint32_t *pBestSsd,
7202 : uint32_t
7203 : puLcuBufferIndex, // [IN] PU origin, used to point to source samples
7204 : uint8_t **buf1, // [IN]
7205 : uint32_t *buf1Stride,
7206 : uint8_t **buf2, // [IN]
7207 : uint32_t *buf2Stride,
7208 : uint32_t pu_width, // [IN] PU width
7209 : uint32_t pu_height, // [IN] PU height
7210 : int16_t
7211 : x_search_area_origin, // [IN] search area origin in the horizontal
7212 : // direction, used to point to reference samples
7213 : int16_t
7214 : y_search_area_origin, // [IN] search area origin in the vertical
7215 : // direction, used to point to reference samples
7216 : uint32_t *pBestSad, uint32_t *pBestMV,
7217 : uint8_t sub_pel_direction) {
7218 0 : int16_t x_mv = _MVXT(*pBestMV);
7219 0 : int16_t y_mv = _MVYT(*pBestMV);
7220 :
7221 0 : int16_t xSearchIndex = ((x_mv + 2) >> 2) - x_search_area_origin;
7222 0 : int16_t ySearchIndex = ((y_mv + 2) >> 2) - y_search_area_origin;
7223 :
7224 : uint64_t dist;
7225 :
7226 : EbBool validTL, validT, validTR, validR, validBR, validB, validBL, validL;
7227 :
7228 : int16_t xMvQuarter[8];
7229 : int16_t yMvQuarter[8];
7230 0 : int32_t searchRegionIndex1 = 0;
7231 0 : int32_t searchRegionIndex2 = 0;
7232 0 : if (context_ptr->full_quarter_pel_refinement) {
7233 0 : validTL = EB_TRUE;
7234 0 : validT = EB_TRUE;
7235 0 : validTR = EB_TRUE;
7236 0 : validR = EB_TRUE;
7237 0 : validBR = EB_TRUE;
7238 0 : validB = EB_TRUE;
7239 0 : validBL = EB_TRUE;
7240 0 : validL = EB_TRUE;
7241 : } else {
7242 0 : if ((y_mv & 2) + ((x_mv & 2) >> 1)) {
7243 0 : validTL = (EbBool)(sub_pel_direction == RIGHT_POSITION ||
7244 0 : sub_pel_direction == BOTTOM_RIGHT_POSITION ||
7245 : sub_pel_direction == BOTTOM_POSITION);
7246 0 : validT = (EbBool)(sub_pel_direction == BOTTOM_RIGHT_POSITION ||
7247 0 : sub_pel_direction == BOTTOM_POSITION ||
7248 : sub_pel_direction == BOTTOM_LEFT_POSITION);
7249 0 : validTR = (EbBool)(sub_pel_direction == BOTTOM_POSITION ||
7250 0 : sub_pel_direction == BOTTOM_LEFT_POSITION ||
7251 : sub_pel_direction == LEFT_POSITION);
7252 0 : validR = (EbBool)(sub_pel_direction == BOTTOM_LEFT_POSITION ||
7253 0 : sub_pel_direction == LEFT_POSITION ||
7254 : sub_pel_direction == TOP_LEFT_POSITION);
7255 0 : validBR = (EbBool)(sub_pel_direction == LEFT_POSITION ||
7256 0 : sub_pel_direction == TOP_LEFT_POSITION ||
7257 : sub_pel_direction == TOP_POSITION);
7258 0 : validB = (EbBool)(sub_pel_direction == TOP_LEFT_POSITION ||
7259 0 : sub_pel_direction == TOP_POSITION ||
7260 : sub_pel_direction == TOP_RIGHT_POSITION);
7261 0 : validBL = (EbBool)(sub_pel_direction == TOP_POSITION ||
7262 0 : sub_pel_direction == TOP_RIGHT_POSITION ||
7263 : sub_pel_direction == RIGHT_POSITION);
7264 0 : validL = (EbBool)(sub_pel_direction == TOP_RIGHT_POSITION ||
7265 0 : sub_pel_direction == RIGHT_POSITION ||
7266 : sub_pel_direction == BOTTOM_RIGHT_POSITION);
7267 : } else {
7268 0 : validTL = (EbBool)(sub_pel_direction == LEFT_POSITION ||
7269 0 : sub_pel_direction == TOP_LEFT_POSITION ||
7270 : sub_pel_direction == TOP_POSITION);
7271 0 : validT = (EbBool)(sub_pel_direction == TOP_LEFT_POSITION ||
7272 0 : sub_pel_direction == TOP_POSITION ||
7273 : sub_pel_direction == TOP_RIGHT_POSITION);
7274 0 : validTR = (EbBool)(sub_pel_direction == TOP_POSITION ||
7275 0 : sub_pel_direction == TOP_RIGHT_POSITION ||
7276 : sub_pel_direction == RIGHT_POSITION);
7277 0 : validR = (EbBool)(sub_pel_direction == TOP_RIGHT_POSITION ||
7278 0 : sub_pel_direction == RIGHT_POSITION ||
7279 : sub_pel_direction == BOTTOM_RIGHT_POSITION);
7280 0 : validBR = (EbBool)(sub_pel_direction == RIGHT_POSITION ||
7281 0 : sub_pel_direction == BOTTOM_RIGHT_POSITION ||
7282 : sub_pel_direction == BOTTOM_POSITION);
7283 0 : validB = (EbBool)(sub_pel_direction == BOTTOM_RIGHT_POSITION ||
7284 0 : sub_pel_direction == BOTTOM_POSITION ||
7285 : sub_pel_direction == BOTTOM_LEFT_POSITION);
7286 0 : validBL = (EbBool)(sub_pel_direction == BOTTOM_POSITION ||
7287 0 : sub_pel_direction == BOTTOM_LEFT_POSITION ||
7288 : sub_pel_direction == LEFT_POSITION);
7289 0 : validL = (EbBool)(sub_pel_direction == BOTTOM_LEFT_POSITION ||
7290 0 : sub_pel_direction == LEFT_POSITION ||
7291 : sub_pel_direction == TOP_LEFT_POSITION);
7292 : }
7293 : }
7294 0 : xMvQuarter[0] = x_mv - 1; // L position
7295 0 : xMvQuarter[1] = x_mv + 1; // R position
7296 0 : xMvQuarter[2] = x_mv; // T position
7297 0 : xMvQuarter[3] = x_mv; // B position
7298 0 : xMvQuarter[4] = x_mv - 1; // TL position
7299 0 : xMvQuarter[5] = x_mv + 1; // TR position
7300 0 : xMvQuarter[6] = x_mv + 1; // BR position
7301 0 : xMvQuarter[7] = x_mv - 1; // BL position
7302 :
7303 0 : yMvQuarter[0] = y_mv; // L position
7304 0 : yMvQuarter[1] = y_mv; // R position
7305 0 : yMvQuarter[2] = y_mv - 1; // T position
7306 0 : yMvQuarter[3] = y_mv + 1; // B position
7307 0 : yMvQuarter[4] = y_mv - 1; // TL position
7308 0 : yMvQuarter[5] = y_mv - 1; // TR position
7309 0 : yMvQuarter[6] = y_mv + 1; // BR position
7310 0 : yMvQuarter[7] = y_mv + 1; // BL position
7311 :
7312 : // Use SATD only when QP mod, and RC are OFF
7313 : // QP mod, and RC assume that ME distotion is always SAD.
7314 : // This problem might be solved by computing SAD for the best position after
7315 : // fractional search is done, or by considring the full pel resolution SAD.
7316 :
7317 : {
7318 : // L position
7319 0 : if (validL) {
7320 0 : searchRegionIndex1 = (int32_t)xSearchIndex +
7321 0 : (int32_t)buf1Stride[0] * (int32_t)ySearchIndex;
7322 0 : searchRegionIndex2 = (int32_t)xSearchIndex +
7323 0 : (int32_t)buf2Stride[0] * (int32_t)ySearchIndex;
7324 :
7325 0 : dist =
7326 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
7327 0 : ? combined_averaging_ssd(
7328 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7329 : BLOCK_SIZE_64,
7330 0 : buf1[0] + searchRegionIndex1,
7331 0 : buf1Stride[0],
7332 0 : buf2[0] + searchRegionIndex2,
7333 0 : buf2Stride[0],
7334 : pu_height,
7335 : pu_width)
7336 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
7337 0 : ? (nxm_sad_avg_kernel(
7338 : &(context_ptr
7339 0 : ->sb_buffer[puLcuBufferIndex]),
7340 : BLOCK_SIZE_64 << 1,
7341 0 : buf1[0] + searchRegionIndex1,
7342 0 : buf1Stride[0] << 1,
7343 0 : buf2[0] + searchRegionIndex2,
7344 0 : buf2Stride[0] << 1,
7345 : pu_height >> 1,
7346 : pu_width))
7347 0 : << 1
7348 0 : : nxm_sad_avg_kernel(
7349 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7350 : BLOCK_SIZE_64,
7351 0 : buf1[0] + searchRegionIndex1,
7352 : buf1Stride[0],
7353 0 : buf2[0] + searchRegionIndex2,
7354 : buf2Stride[0],
7355 : pu_height,
7356 : pu_width);
7357 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
7358 0 : if (dist < *pBestSsd) {
7359 0 : *pBestSad =
7360 0 : (uint32_t)nxm_sad_avg_kernel(
7361 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7362 : BLOCK_SIZE_64,
7363 0 : buf1[0] + searchRegionIndex1,
7364 : buf1Stride[0],
7365 0 : buf2[0] + searchRegionIndex2,
7366 : buf2Stride[0],
7367 : pu_height,
7368 : pu_width);
7369 0 : *pBestMV = ((uint16_t)yMvQuarter[0] << 16) |
7370 0 : ((uint16_t)xMvQuarter[0]);
7371 0 : *pBestSsd = (uint32_t)dist;
7372 : }
7373 : } else {
7374 0 : if (dist < *pBestSad) {
7375 0 : *pBestSad = (uint32_t)dist;
7376 0 : *pBestMV = ((uint16_t)yMvQuarter[0] << 16) |
7377 0 : ((uint16_t)xMvQuarter[0]);
7378 : }
7379 : }
7380 : }
7381 :
7382 : // R positions
7383 0 : if (validR) {
7384 0 : searchRegionIndex1 = (int32_t)xSearchIndex +
7385 0 : (int32_t)buf1Stride[1] * (int32_t)ySearchIndex;
7386 0 : searchRegionIndex2 = (int32_t)xSearchIndex +
7387 0 : (int32_t)buf2Stride[1] * (int32_t)ySearchIndex;
7388 0 : dist =
7389 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
7390 0 : ? combined_averaging_ssd(
7391 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7392 : BLOCK_SIZE_64,
7393 0 : buf1[1] + searchRegionIndex1,
7394 0 : buf1Stride[1],
7395 0 : buf2[1] + searchRegionIndex2,
7396 0 : buf2Stride[1],
7397 : pu_height,
7398 : pu_width)
7399 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
7400 0 : ? (nxm_sad_avg_kernel(
7401 : &(context_ptr
7402 0 : ->sb_buffer[puLcuBufferIndex]),
7403 : BLOCK_SIZE_64 << 1,
7404 0 : buf1[1] + searchRegionIndex1,
7405 0 : buf1Stride[1] << 1,
7406 0 : buf2[1] + searchRegionIndex2,
7407 0 : buf2Stride[1] << 1,
7408 : pu_height >> 1,
7409 : pu_width))
7410 0 : << 1
7411 0 : : nxm_sad_avg_kernel(
7412 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7413 : BLOCK_SIZE_64,
7414 0 : buf1[1] + searchRegionIndex1,
7415 0 : buf1Stride[1],
7416 0 : buf2[1] + searchRegionIndex2,
7417 0 : buf2Stride[1],
7418 : pu_height,
7419 : pu_width);
7420 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
7421 0 : if (dist < *pBestSsd) {
7422 0 : *pBestSad =
7423 0 : (uint32_t)nxm_sad_avg_kernel(
7424 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7425 : BLOCK_SIZE_64,
7426 0 : buf1[1] + searchRegionIndex1,
7427 0 : buf1Stride[1],
7428 0 : buf2[1] + searchRegionIndex2,
7429 0 : buf2Stride[1],
7430 : pu_height,
7431 : pu_width);
7432 0 : *pBestMV = ((uint16_t)yMvQuarter[1] << 16) |
7433 0 : ((uint16_t)xMvQuarter[1]);
7434 0 : *pBestSsd = (uint32_t)dist;
7435 : }
7436 : } else {
7437 0 : if (dist < *pBestSad) {
7438 0 : *pBestSad = (uint32_t)dist;
7439 0 : *pBestMV = ((uint16_t)yMvQuarter[1] << 16) |
7440 0 : ((uint16_t)xMvQuarter[1]);
7441 : }
7442 : }
7443 : }
7444 :
7445 : // T position
7446 0 : if (validT) {
7447 0 : searchRegionIndex1 = (int32_t)xSearchIndex +
7448 0 : (int32_t)buf1Stride[2] * (int32_t)ySearchIndex;
7449 0 : searchRegionIndex2 = (int32_t)xSearchIndex +
7450 0 : (int32_t)buf2Stride[2] * (int32_t)ySearchIndex;
7451 0 : dist =
7452 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
7453 0 : ? combined_averaging_ssd(
7454 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7455 : BLOCK_SIZE_64,
7456 0 : buf1[2] + searchRegionIndex1,
7457 0 : buf1Stride[2],
7458 0 : buf2[2] + searchRegionIndex2,
7459 0 : buf2Stride[2],
7460 : pu_height,
7461 : pu_width)
7462 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
7463 0 : ? (nxm_sad_avg_kernel(
7464 : &(context_ptr
7465 0 : ->sb_buffer[puLcuBufferIndex]),
7466 : BLOCK_SIZE_64 << 1,
7467 0 : buf1[2] + searchRegionIndex1,
7468 0 : buf1Stride[2] << 1,
7469 0 : buf2[2] + searchRegionIndex2,
7470 0 : buf2Stride[2] << 1,
7471 : pu_height >> 1,
7472 : pu_width))
7473 0 : << 1
7474 0 : : nxm_sad_avg_kernel(
7475 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7476 : BLOCK_SIZE_64,
7477 0 : buf1[2] + searchRegionIndex1,
7478 0 : buf1Stride[2],
7479 0 : buf2[2] + searchRegionIndex2,
7480 0 : buf2Stride[2],
7481 : pu_height,
7482 : pu_width);
7483 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
7484 0 : if (dist < *pBestSsd) {
7485 0 : *pBestSad =
7486 0 : (uint32_t)nxm_sad_avg_kernel(
7487 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7488 : BLOCK_SIZE_64,
7489 0 : buf1[2] + searchRegionIndex1,
7490 0 : buf1Stride[2],
7491 0 : buf2[2] + searchRegionIndex2,
7492 0 : buf2Stride[2],
7493 : pu_height,
7494 : pu_width);
7495 0 : *pBestMV = ((uint16_t)yMvQuarter[2] << 16) |
7496 0 : ((uint16_t)xMvQuarter[2]);
7497 0 : *pBestSsd = (uint32_t)dist;
7498 : }
7499 : } else {
7500 0 : if (dist < *pBestSad) {
7501 0 : *pBestSad = (uint32_t)dist;
7502 0 : *pBestMV = ((uint16_t)yMvQuarter[2] << 16) |
7503 0 : ((uint16_t)xMvQuarter[2]);
7504 : }
7505 : }
7506 : }
7507 :
7508 : // B position
7509 0 : if (validB) {
7510 0 : searchRegionIndex1 = (int32_t)xSearchIndex +
7511 0 : (int32_t)buf1Stride[3] * (int32_t)ySearchIndex;
7512 0 : searchRegionIndex2 = (int32_t)xSearchIndex +
7513 0 : (int32_t)buf2Stride[3] * (int32_t)ySearchIndex;
7514 0 : dist =
7515 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
7516 0 : ? combined_averaging_ssd(
7517 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7518 : BLOCK_SIZE_64,
7519 0 : buf1[3] + searchRegionIndex1,
7520 0 : buf1Stride[3],
7521 0 : buf2[3] + searchRegionIndex2,
7522 0 : buf2Stride[3],
7523 : pu_height,
7524 : pu_width)
7525 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
7526 0 : ? (nxm_sad_avg_kernel(
7527 : &(context_ptr
7528 0 : ->sb_buffer[puLcuBufferIndex]),
7529 : BLOCK_SIZE_64 << 1,
7530 0 : buf1[3] + searchRegionIndex1,
7531 0 : buf1Stride[3] << 1,
7532 0 : buf2[3] + searchRegionIndex2,
7533 0 : buf2Stride[3] << 1,
7534 : pu_height >> 1,
7535 : pu_width))
7536 0 : << 1
7537 0 : : nxm_sad_avg_kernel(
7538 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7539 : BLOCK_SIZE_64,
7540 0 : buf1[3] + searchRegionIndex1,
7541 0 : buf1Stride[3],
7542 0 : buf2[3] + searchRegionIndex2,
7543 0 : buf2Stride[3],
7544 : pu_height,
7545 : pu_width);
7546 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
7547 0 : if (dist < *pBestSsd) {
7548 0 : *pBestSad =
7549 0 : (uint32_t)nxm_sad_avg_kernel(
7550 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7551 : BLOCK_SIZE_64,
7552 0 : buf1[3] + searchRegionIndex1,
7553 0 : buf1Stride[3],
7554 0 : buf2[3] + searchRegionIndex2,
7555 0 : buf2Stride[3],
7556 : pu_height,
7557 : pu_width);
7558 0 : *pBestMV = ((uint16_t)yMvQuarter[3] << 16) |
7559 0 : ((uint16_t)xMvQuarter[3]);
7560 0 : *pBestSsd = (uint32_t)dist;
7561 : }
7562 : } else {
7563 0 : if (dist < *pBestSad) {
7564 0 : *pBestSad = (uint32_t)dist;
7565 0 : *pBestMV = ((uint16_t)yMvQuarter[3] << 16) |
7566 0 : ((uint16_t)xMvQuarter[3]);
7567 : }
7568 : }
7569 : }
7570 :
7571 : // TL position
7572 0 : if (validTL) {
7573 0 : searchRegionIndex1 = (int32_t)xSearchIndex +
7574 0 : (int32_t)buf1Stride[4] * (int32_t)ySearchIndex;
7575 0 : searchRegionIndex2 = (int32_t)xSearchIndex +
7576 0 : (int32_t)buf2Stride[4] * (int32_t)ySearchIndex;
7577 0 : dist =
7578 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
7579 0 : ? combined_averaging_ssd(
7580 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7581 : BLOCK_SIZE_64,
7582 0 : buf1[4] + searchRegionIndex1,
7583 0 : buf1Stride[4],
7584 0 : buf2[4] + searchRegionIndex2,
7585 0 : buf2Stride[4],
7586 : pu_height,
7587 : pu_width)
7588 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
7589 0 : ? (nxm_sad_avg_kernel(
7590 : &(context_ptr
7591 0 : ->sb_buffer[puLcuBufferIndex]),
7592 : BLOCK_SIZE_64 << 1,
7593 0 : buf1[4] + searchRegionIndex1,
7594 0 : buf1Stride[4] << 1,
7595 0 : buf2[4] + searchRegionIndex2,
7596 0 : buf2Stride[4] << 1,
7597 : pu_height >> 1,
7598 : pu_width))
7599 0 : << 1
7600 0 : : nxm_sad_avg_kernel(
7601 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7602 : BLOCK_SIZE_64,
7603 0 : buf1[4] + searchRegionIndex1,
7604 0 : buf1Stride[4],
7605 0 : buf2[4] + searchRegionIndex2,
7606 0 : buf2Stride[4],
7607 : pu_height,
7608 : pu_width);
7609 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
7610 0 : if (dist < *pBestSsd) {
7611 0 : *pBestSad =
7612 0 : (uint32_t)nxm_sad_avg_kernel(
7613 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7614 : BLOCK_SIZE_64,
7615 0 : buf1[4] + searchRegionIndex1,
7616 0 : buf1Stride[4],
7617 0 : buf2[4] + searchRegionIndex2,
7618 0 : buf2Stride[4],
7619 : pu_height,
7620 : pu_width);
7621 0 : *pBestMV = ((uint16_t)yMvQuarter[4] << 16) |
7622 0 : ((uint16_t)xMvQuarter[4]);
7623 0 : *pBestSsd = (uint32_t)dist;
7624 : }
7625 : } else {
7626 0 : if (dist < *pBestSad) {
7627 0 : *pBestSad = (uint32_t)dist;
7628 0 : *pBestMV = ((uint16_t)yMvQuarter[4] << 16) |
7629 0 : ((uint16_t)xMvQuarter[4]);
7630 : }
7631 : }
7632 : }
7633 :
7634 : // TR position
7635 0 : if (validTR) {
7636 0 : searchRegionIndex1 = (int32_t)xSearchIndex +
7637 0 : (int32_t)buf1Stride[5] * (int32_t)ySearchIndex;
7638 0 : searchRegionIndex2 = (int32_t)xSearchIndex +
7639 0 : (int32_t)buf2Stride[5] * (int32_t)ySearchIndex;
7640 0 : dist =
7641 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
7642 0 : ? combined_averaging_ssd(
7643 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7644 : BLOCK_SIZE_64,
7645 0 : buf1[5] + searchRegionIndex1,
7646 0 : buf1Stride[5],
7647 0 : buf2[5] + searchRegionIndex2,
7648 0 : buf2Stride[5],
7649 : pu_height,
7650 : pu_width)
7651 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
7652 0 : ? (nxm_sad_avg_kernel(
7653 : &(context_ptr
7654 0 : ->sb_buffer[puLcuBufferIndex]),
7655 : BLOCK_SIZE_64 << 1,
7656 0 : buf1[5] + searchRegionIndex1,
7657 0 : buf1Stride[5] << 1,
7658 0 : buf2[5] + searchRegionIndex2,
7659 0 : buf2Stride[5] << 1,
7660 : pu_height >> 1,
7661 : pu_width))
7662 0 : << 1
7663 0 : : nxm_sad_avg_kernel(
7664 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7665 : BLOCK_SIZE_64,
7666 0 : buf1[5] + searchRegionIndex1,
7667 0 : buf1Stride[5],
7668 0 : buf2[5] + searchRegionIndex2,
7669 0 : buf2Stride[5],
7670 : pu_height,
7671 : pu_width);
7672 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
7673 0 : if (dist < *pBestSsd) {
7674 0 : *pBestSad =
7675 0 : (uint32_t)nxm_sad_avg_kernel(
7676 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7677 : BLOCK_SIZE_64,
7678 0 : buf1[5] + searchRegionIndex1,
7679 0 : buf1Stride[5],
7680 0 : buf2[5] + searchRegionIndex2,
7681 0 : buf2Stride[5],
7682 : pu_height,
7683 : pu_width);
7684 0 : *pBestMV = ((uint16_t)yMvQuarter[5] << 16) |
7685 0 : ((uint16_t)xMvQuarter[5]);
7686 0 : *pBestSsd = (uint32_t)dist;
7687 : }
7688 : } else {
7689 0 : if (dist < *pBestSad) {
7690 0 : *pBestSad = (uint32_t)dist;
7691 0 : *pBestMV = ((uint16_t)yMvQuarter[5] << 16) |
7692 0 : ((uint16_t)xMvQuarter[5]);
7693 : }
7694 : }
7695 : }
7696 :
7697 : // BR position
7698 0 : if (validBR) {
7699 0 : searchRegionIndex1 = (int32_t)xSearchIndex +
7700 0 : (int32_t)buf1Stride[6] * (int32_t)ySearchIndex;
7701 0 : searchRegionIndex2 = (int32_t)xSearchIndex +
7702 0 : (int32_t)buf2Stride[6] * (int32_t)ySearchIndex;
7703 0 : dist =
7704 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
7705 0 : ? combined_averaging_ssd(
7706 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7707 : BLOCK_SIZE_64,
7708 0 : buf1[6] + searchRegionIndex1,
7709 0 : buf1Stride[6],
7710 0 : buf2[6] + searchRegionIndex2,
7711 0 : buf2Stride[6],
7712 : pu_height,
7713 : pu_width)
7714 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
7715 0 : ? (nxm_sad_avg_kernel(
7716 : &(context_ptr
7717 0 : ->sb_buffer[puLcuBufferIndex]),
7718 : BLOCK_SIZE_64 << 1,
7719 0 : buf1[6] + searchRegionIndex1,
7720 0 : buf1Stride[6] << 1,
7721 0 : buf2[6] + searchRegionIndex2,
7722 0 : buf2Stride[6] << 1,
7723 : pu_height >> 1,
7724 : pu_width))
7725 0 : << 1
7726 0 : : nxm_sad_avg_kernel(
7727 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7728 : BLOCK_SIZE_64,
7729 0 : buf1[6] + searchRegionIndex1,
7730 0 : buf1Stride[6],
7731 0 : buf2[6] + searchRegionIndex2,
7732 0 : buf2Stride[6],
7733 : pu_height,
7734 : pu_width);
7735 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
7736 0 : if (dist < *pBestSsd) {
7737 0 : *pBestSad =
7738 0 : (uint32_t)nxm_sad_avg_kernel(
7739 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7740 : BLOCK_SIZE_64,
7741 0 : buf1[6] + searchRegionIndex1,
7742 0 : buf1Stride[6],
7743 0 : buf2[6] + searchRegionIndex2,
7744 0 : buf2Stride[6],
7745 : pu_height,
7746 : pu_width);
7747 0 : *pBestMV = ((uint16_t)yMvQuarter[6] << 16) |
7748 0 : ((uint16_t)xMvQuarter[6]);
7749 0 : *pBestSsd = (uint32_t)dist;
7750 : }
7751 : } else {
7752 0 : if (dist < *pBestSad) {
7753 0 : *pBestSad = (uint32_t)dist;
7754 0 : *pBestMV = ((uint16_t)yMvQuarter[6] << 16) |
7755 0 : ((uint16_t)xMvQuarter[6]);
7756 : }
7757 : }
7758 : }
7759 :
7760 : // BL position
7761 0 : if (validBL) {
7762 0 : searchRegionIndex1 = (int32_t)xSearchIndex +
7763 0 : (int32_t)buf1Stride[7] * (int32_t)ySearchIndex;
7764 0 : searchRegionIndex2 = (int32_t)xSearchIndex +
7765 0 : (int32_t)buf2Stride[7] * (int32_t)ySearchIndex;
7766 0 : dist =
7767 0 : (context_ptr->fractional_search_method == SSD_SEARCH)
7768 0 : ? combined_averaging_ssd(
7769 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7770 : BLOCK_SIZE_64,
7771 0 : buf1[7] + searchRegionIndex1,
7772 0 : buf1Stride[7],
7773 0 : buf2[7] + searchRegionIndex2,
7774 0 : buf2Stride[7],
7775 : pu_height,
7776 : pu_width)
7777 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
7778 0 : ? (nxm_sad_avg_kernel(
7779 : &(context_ptr
7780 0 : ->sb_buffer[puLcuBufferIndex]),
7781 : BLOCK_SIZE_64 << 1,
7782 0 : buf1[7] + searchRegionIndex1,
7783 0 : buf1Stride[7] << 1,
7784 0 : buf2[7] + searchRegionIndex2,
7785 0 : buf2Stride[7] << 1,
7786 : pu_height >> 1,
7787 : pu_width))
7788 0 : << 1
7789 0 : : nxm_sad_avg_kernel(
7790 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7791 : BLOCK_SIZE_64,
7792 0 : buf1[7] + searchRegionIndex1,
7793 0 : buf1Stride[7],
7794 0 : buf2[7] + searchRegionIndex2,
7795 0 : buf2Stride[7],
7796 : pu_height,
7797 : pu_width);
7798 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
7799 0 : if (dist < *pBestSsd) {
7800 0 : *pBestSad =
7801 0 : (uint32_t)nxm_sad_avg_kernel(
7802 0 : &(context_ptr->sb_buffer[puLcuBufferIndex]),
7803 : BLOCK_SIZE_64,
7804 0 : buf1[7] + searchRegionIndex1,
7805 0 : buf1Stride[7],
7806 0 : buf2[7] + searchRegionIndex2,
7807 0 : buf2Stride[7],
7808 : pu_height,
7809 : pu_width);
7810 0 : *pBestMV = ((uint16_t)yMvQuarter[7] << 16) |
7811 0 : ((uint16_t)xMvQuarter[7]);
7812 0 : *pBestSsd = (uint32_t)dist;
7813 : }
7814 : } else {
7815 0 : if (dist < *pBestSad) {
7816 0 : *pBestSad = (uint32_t)dist;
7817 0 : *pBestMV = ((uint16_t)yMvQuarter[7] << 16) |
7818 0 : ((uint16_t)xMvQuarter[7]);
7819 : }
7820 : }
7821 : }
7822 : }
7823 :
7824 0 : return;
7825 : }
7826 :
7827 : /*******************************************
7828 : * SetQuarterPelRefinementInputsOnTheFly
7829 : * determine the 2 half pel buffers to do
7830 : averaging for Quarter Pel Refinement
7831 : *******************************************/
7832 0 : static void SetQuarterPelRefinementInputsOnTheFly(
7833 : uint8_t *pos_Full, //[IN] points to A
7834 : uint32_t FullStride, //[IN]
7835 : uint8_t *pos_b, //[IN] points to b
7836 : uint8_t *pos_h, //[IN] points to h
7837 : uint8_t *pos_j, //[IN] points to j
7838 : uint32_t Stride, //[IN]
7839 : int16_t x_mv, //[IN]
7840 : int16_t y_mv, //[IN]
7841 : uint8_t **buf1, //[OUT]
7842 : uint32_t *buf1Stride, //[OUT]
7843 : uint8_t **buf2, //[OUT]
7844 : uint32_t *buf2Stride //[OUT]
7845 : ) {
7846 0 : uint32_t quarterPelRefinementMethod = (y_mv & 2) + ((x_mv & 2) >> 1);
7847 :
7848 : // for each one of the 8 postions, we need to determine the 2 half pel
7849 : // buffers to do averaging
7850 :
7851 : // A a b c
7852 : // d e f g
7853 : // h i j k
7854 : // n p q r
7855 :
7856 0 : switch (quarterPelRefinementMethod) {
7857 0 : case EB_QUARTER_IN_FULL:
7858 :
7859 0 : /*c=b+A*/ buf1[0] = pos_b;
7860 0 : buf1Stride[0] = Stride;
7861 0 : buf2[0] = pos_Full;
7862 0 : buf2Stride[0] = FullStride;
7863 0 : /*a=A+b*/ buf1[1] = pos_Full;
7864 0 : buf1Stride[1] = FullStride;
7865 0 : buf2[1] = pos_b + 1;
7866 0 : buf2Stride[1] = Stride;
7867 0 : /*n=h+A*/ buf1[2] = pos_h;
7868 0 : buf1Stride[2] = Stride;
7869 0 : buf2[2] = pos_Full;
7870 0 : buf2Stride[2] = FullStride;
7871 0 : /*d=A+h*/ buf1[3] = pos_Full;
7872 0 : buf1Stride[3] = FullStride;
7873 0 : buf2[3] = pos_h + Stride;
7874 0 : buf2Stride[3] = Stride;
7875 0 : /*r=b+h*/ buf1[4] = pos_b;
7876 0 : buf1Stride[4] = Stride;
7877 0 : buf2[4] = pos_h;
7878 0 : buf2Stride[4] = Stride;
7879 0 : /*p=h+b*/ buf1[5] = pos_h;
7880 0 : buf1Stride[5] = Stride;
7881 0 : buf2[5] = pos_b + 1;
7882 0 : buf2Stride[5] = Stride;
7883 0 : /*e=h+b*/ buf1[6] = pos_h + Stride;
7884 0 : buf1Stride[6] = Stride;
7885 0 : buf2[6] = pos_b + 1;
7886 0 : buf2Stride[6] = Stride;
7887 0 : /*g=b+h*/ buf1[7] = pos_b;
7888 0 : buf1Stride[7] = Stride;
7889 0 : buf2[7] = pos_h + Stride;
7890 0 : buf2Stride[7] = Stride;
7891 :
7892 0 : break;
7893 :
7894 0 : case EB_QUARTER_IN_HALF_HORIZONTAL:
7895 :
7896 0 : /*a=A+b*/ buf1[0] = pos_Full - 1;
7897 0 : buf1Stride[0] = FullStride;
7898 0 : buf2[0] = pos_b;
7899 0 : buf2Stride[0] = Stride;
7900 0 : /*c=b+A*/ buf1[1] = pos_b;
7901 0 : buf1Stride[1] = Stride;
7902 0 : buf2[1] = pos_Full;
7903 0 : buf2Stride[1] = FullStride;
7904 0 : /*q=j+b*/ buf1[2] = pos_j;
7905 0 : buf1Stride[2] = Stride;
7906 0 : buf2[2] = pos_b;
7907 0 : buf2Stride[2] = Stride;
7908 0 : /*f=b+j*/ buf1[3] = pos_b;
7909 0 : buf1Stride[3] = Stride;
7910 0 : buf2[3] = pos_j + Stride;
7911 0 : buf2Stride[3] = Stride;
7912 0 : /*p=h+b*/ buf1[4] = pos_h - 1;
7913 0 : buf1Stride[4] = Stride;
7914 0 : buf2[4] = pos_b;
7915 0 : buf2Stride[4] = Stride;
7916 0 : /*r=b+h*/ buf1[5] = pos_b;
7917 0 : buf1Stride[5] = Stride;
7918 0 : buf2[5] = pos_h;
7919 0 : buf2Stride[5] = Stride;
7920 0 : /*g=b+h*/ buf1[6] = pos_b;
7921 0 : buf1Stride[6] = Stride;
7922 0 : buf2[6] = pos_h + Stride;
7923 0 : buf2Stride[6] = Stride;
7924 0 : /*e=h+b*/ buf1[7] = pos_h - 1 + Stride;
7925 0 : buf1Stride[7] = Stride;
7926 0 : buf2[7] = pos_b;
7927 0 : buf2Stride[7] = Stride;
7928 :
7929 0 : break;
7930 :
7931 0 : case EB_QUARTER_IN_HALF_VERTICAL:
7932 :
7933 0 : /*k=j+h*/ buf1[0] = pos_j;
7934 0 : buf1Stride[0] = Stride;
7935 0 : buf2[0] = pos_h;
7936 0 : buf2Stride[0] = Stride;
7937 0 : /*i=h+j*/ buf1[1] = pos_h;
7938 0 : buf1Stride[1] = Stride;
7939 0 : buf2[1] = pos_j + 1;
7940 0 : buf2Stride[1] = Stride;
7941 0 : /*d=A+h*/ buf1[2] = pos_Full - FullStride;
7942 0 : buf1Stride[2] = FullStride;
7943 0 : buf2[2] = pos_h;
7944 0 : buf2Stride[2] = Stride;
7945 0 : /*n=h+A*/ buf1[3] = pos_h;
7946 0 : buf1Stride[3] = Stride;
7947 0 : buf2[3] = pos_Full;
7948 0 : buf2Stride[3] = FullStride;
7949 0 : /*g=b+h*/ buf1[4] = pos_b - Stride;
7950 0 : buf1Stride[4] = Stride;
7951 0 : buf2[4] = pos_h;
7952 0 : buf2Stride[4] = Stride;
7953 0 : /*e=h+b*/ buf1[5] = pos_h;
7954 0 : buf1Stride[5] = Stride;
7955 0 : buf2[5] = pos_b + 1 - Stride;
7956 0 : buf2Stride[5] = Stride;
7957 0 : /*p=h+b*/ buf1[6] = pos_h;
7958 0 : buf1Stride[6] = Stride;
7959 0 : buf2[6] = pos_b + 1;
7960 0 : buf2Stride[6] = Stride;
7961 0 : /*r=b+h*/ buf1[7] = pos_b;
7962 0 : buf1Stride[7] = Stride;
7963 0 : buf2[7] = pos_h;
7964 0 : buf2Stride[7] = Stride;
7965 :
7966 0 : break;
7967 :
7968 0 : case EB_QUARTER_IN_HALF_DIAGONAL:
7969 :
7970 0 : /*i=h+j*/ buf1[0] = pos_h - 1;
7971 0 : buf1Stride[0] = Stride;
7972 0 : buf2[0] = pos_j;
7973 0 : buf2Stride[0] = Stride;
7974 0 : /*k=j+h*/ buf1[1] = pos_j;
7975 0 : buf1Stride[1] = Stride;
7976 0 : buf2[1] = pos_h;
7977 0 : buf2Stride[1] = Stride;
7978 0 : /*f=b+j*/ buf1[2] = pos_b - Stride;
7979 0 : buf1Stride[2] = Stride;
7980 0 : buf2[2] = pos_j;
7981 0 : buf2Stride[2] = Stride;
7982 0 : /*q=j+b*/ buf1[3] = pos_j;
7983 0 : buf1Stride[3] = Stride;
7984 0 : buf2[3] = pos_b;
7985 0 : buf2Stride[3] = Stride;
7986 0 : /*e=h+b*/ buf1[4] = pos_h - 1;
7987 0 : buf1Stride[4] = Stride;
7988 0 : buf2[4] = pos_b - Stride;
7989 0 : buf2Stride[4] = Stride;
7990 0 : /*g=b+h*/ buf1[5] = pos_b - Stride;
7991 0 : buf1Stride[5] = Stride;
7992 0 : buf2[5] = pos_h;
7993 0 : buf2Stride[5] = Stride;
7994 0 : /*r=b+h*/ buf1[6] = pos_b;
7995 0 : buf1Stride[6] = Stride;
7996 0 : buf2[6] = pos_h;
7997 0 : buf2Stride[6] = Stride;
7998 0 : /*p=h+b*/ buf1[7] = pos_h - 1;
7999 0 : buf1Stride[7] = Stride;
8000 0 : buf2[7] = pos_b;
8001 0 : buf2Stride[7] = Stride;
8002 :
8003 0 : break;
8004 :
8005 0 : default: break;
8006 : }
8007 :
8008 0 : return;
8009 : }
8010 :
8011 : /*******************************************
8012 : * QuarterPelSearch_LCU
8013 : * performs Quarter Pel refinement for the 85 PUs
8014 : *******************************************/
8015 0 : static void QuarterPelSearch_LCU(
8016 : MeContext
8017 : *context_ptr, //[IN/OUT] ME context Ptr, used to get/update ME results
8018 : uint8_t *pos_Full, //[IN]
8019 : uint32_t FullStride, //[IN]
8020 : uint8_t *pos_b, //[IN]
8021 : uint8_t *pos_h, //[IN]
8022 : uint8_t *pos_j, //[IN]
8023 : int16_t
8024 : x_search_area_origin, //[IN] search area origin in the horizontal
8025 : // direction, used to point to reference samples
8026 : int16_t
8027 : y_search_area_origin, //[IN] search area origin in the vertical
8028 : // direction, used to point to reference samples
8029 : EbBool disable8x8CuInMeFlag,
8030 : EbBool enable_half_pel32x32, EbBool enable_half_pel16x16,
8031 : EbBool enable_half_pel8x8,
8032 : EbBool enableQuarterPel, EbBool ext_block_flag)
8033 : {
8034 : uint32_t pu_index;
8035 :
8036 : uint32_t puShiftXIndex;
8037 : uint32_t puShiftYIndex;
8038 :
8039 : uint32_t puLcuBufferIndex;
8040 :
8041 : // for each one of the 8 positions, we need to determine the 2 buffers to do
8042 : // averaging
8043 : uint8_t *buf1[8];
8044 : uint8_t *buf2[8];
8045 :
8046 : uint32_t buf1Stride[8];
8047 : uint32_t buf2Stride[8];
8048 :
8049 : int16_t x_mv, y_mv;
8050 : uint32_t nidx;
8051 :
8052 0 : if (context_ptr->fractional_search64x64) {
8053 0 : x_mv = _MVXT(*context_ptr->p_best_mv64x64);
8054 0 : y_mv = _MVYT(*context_ptr->p_best_mv64x64);
8055 :
8056 0 : SetQuarterPelRefinementInputsOnTheFly(pos_Full,
8057 : FullStride,
8058 : pos_b,
8059 : pos_h,
8060 : pos_j,
8061 : context_ptr->interpolated_stride,
8062 : x_mv,
8063 : y_mv,
8064 : buf1,
8065 : buf1Stride,
8066 : buf2,
8067 : buf2Stride);
8068 :
8069 0 : buf1[0] = buf1[0];
8070 0 : buf2[0] = buf2[0];
8071 0 : buf1[1] = buf1[1];
8072 0 : buf2[1] = buf2[1];
8073 0 : buf1[2] = buf1[2];
8074 0 : buf2[2] = buf2[2];
8075 0 : buf1[3] = buf1[3];
8076 0 : buf2[3] = buf2[3];
8077 0 : buf1[4] = buf1[4];
8078 0 : buf2[4] = buf2[4];
8079 0 : buf1[5] = buf1[5];
8080 0 : buf2[5] = buf2[5];
8081 0 : buf1[6] = buf1[6];
8082 0 : buf2[6] = buf2[6];
8083 0 : buf1[7] = buf1[7];
8084 0 : buf2[7] = buf2[7];
8085 :
8086 0 : PU_QuarterPelRefinementOnTheFly(context_ptr,
8087 : context_ptr->p_best_ssd64x64,
8088 : 0,
8089 : buf1,
8090 : buf1Stride,
8091 : buf2,
8092 : buf2Stride,
8093 : 64,
8094 : 64,
8095 : x_search_area_origin,
8096 : y_search_area_origin,
8097 : context_ptr->p_best_sad64x64,
8098 : context_ptr->p_best_mv64x64,
8099 0 : context_ptr->psub_pel_direction64x64);
8100 : }
8101 0 : if (enableQuarterPel && enable_half_pel32x32)
8102 : {
8103 : // 32x32 [4 partitions]
8104 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
8105 0 : x_mv = _MVXT(context_ptr->p_best_mv32x32[pu_index]);
8106 0 : y_mv = _MVYT(context_ptr->p_best_mv32x32[pu_index]);
8107 :
8108 0 : SetQuarterPelRefinementInputsOnTheFly(
8109 : pos_Full,
8110 : FullStride,
8111 : pos_b,
8112 : pos_h,
8113 : pos_j,
8114 : context_ptr->interpolated_stride,
8115 : x_mv,
8116 : y_mv,
8117 : buf1,
8118 : buf1Stride,
8119 : buf2,
8120 : buf2Stride);
8121 :
8122 0 : puShiftXIndex = (pu_index & 0x01) << 5;
8123 0 : puShiftYIndex = (pu_index >> 1) << 5;
8124 :
8125 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8126 :
8127 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8128 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8129 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8130 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8131 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8132 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8133 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8134 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8135 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8136 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8137 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8138 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8139 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8140 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8141 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8142 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8143 :
8144 0 : PU_QuarterPelRefinementOnTheFly(
8145 : context_ptr,
8146 0 : &context_ptr->p_best_ssd32x32[pu_index],
8147 : puLcuBufferIndex,
8148 : buf1,
8149 : buf1Stride,
8150 : buf2,
8151 : buf2Stride,
8152 : 32,
8153 : 32,
8154 : x_search_area_origin,
8155 : y_search_area_origin,
8156 0 : &context_ptr->p_best_sad32x32[pu_index],
8157 0 : &context_ptr->p_best_mv32x32[pu_index],
8158 0 : context_ptr->psub_pel_direction32x32[pu_index]);
8159 : }
8160 : }
8161 :
8162 0 : if (enableQuarterPel && enable_half_pel16x16)
8163 : {
8164 : // 16x16 [16 partitions]
8165 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
8166 0 : nidx = tab16x16[pu_index];
8167 :
8168 0 : x_mv = _MVXT(context_ptr->p_best_mv16x16[nidx]);
8169 0 : y_mv = _MVYT(context_ptr->p_best_mv16x16[nidx]);
8170 :
8171 0 : SetQuarterPelRefinementInputsOnTheFly(
8172 : pos_Full,
8173 : FullStride,
8174 : pos_b,
8175 : pos_h,
8176 : pos_j,
8177 : context_ptr->interpolated_stride,
8178 : x_mv,
8179 : y_mv,
8180 : buf1,
8181 : buf1Stride,
8182 : buf2,
8183 : buf2Stride);
8184 :
8185 0 : puShiftXIndex = (pu_index & 0x03) << 4;
8186 0 : puShiftYIndex = (pu_index >> 2) << 4;
8187 :
8188 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8189 :
8190 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8191 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8192 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8193 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8194 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8195 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8196 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8197 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8198 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8199 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8200 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8201 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8202 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8203 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8204 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8205 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8206 :
8207 0 : PU_QuarterPelRefinementOnTheFly(
8208 : context_ptr,
8209 0 : &context_ptr->p_best_ssd16x16[nidx],
8210 : puLcuBufferIndex,
8211 : buf1,
8212 : buf1Stride,
8213 : buf2,
8214 : buf2Stride,
8215 : 16,
8216 : 16,
8217 : x_search_area_origin,
8218 : y_search_area_origin,
8219 0 : &context_ptr->p_best_sad16x16[nidx],
8220 0 : &context_ptr->p_best_mv16x16[nidx],
8221 0 : context_ptr->psub_pel_direction16x16[nidx]);
8222 : }
8223 : }
8224 :
8225 0 : if (enableQuarterPel && enable_half_pel8x8)
8226 : {
8227 : // 8x8 [64 partitions]
8228 0 : if (!disable8x8CuInMeFlag) {
8229 0 : for (pu_index = 0; pu_index < 64; ++pu_index) {
8230 0 : nidx = tab8x8[pu_index];
8231 :
8232 0 : x_mv = _MVXT(context_ptr->p_best_mv8x8[nidx]);
8233 0 : y_mv = _MVYT(context_ptr->p_best_mv8x8[nidx]);
8234 :
8235 0 : SetQuarterPelRefinementInputsOnTheFly(
8236 : pos_Full,
8237 : FullStride,
8238 : pos_b,
8239 : pos_h,
8240 : pos_j,
8241 : context_ptr->interpolated_stride,
8242 : x_mv,
8243 : y_mv,
8244 : buf1,
8245 : buf1Stride,
8246 : buf2,
8247 : buf2Stride);
8248 :
8249 0 : puShiftXIndex = (pu_index & 0x07) << 3;
8250 0 : puShiftYIndex = (pu_index >> 3) << 3;
8251 :
8252 0 : puLcuBufferIndex =
8253 0 : puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8254 :
8255 0 : buf1[0] =
8256 0 : buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8257 0 : buf2[0] =
8258 0 : buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8259 0 : buf1[1] =
8260 0 : buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8261 0 : buf2[1] =
8262 0 : buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8263 0 : buf1[2] =
8264 0 : buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8265 0 : buf2[2] =
8266 0 : buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8267 0 : buf1[3] =
8268 0 : buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8269 0 : buf2[3] =
8270 0 : buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8271 0 : buf1[4] =
8272 0 : buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8273 0 : buf2[4] =
8274 0 : buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8275 0 : buf1[5] =
8276 0 : buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8277 0 : buf2[5] =
8278 0 : buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8279 0 : buf1[6] =
8280 0 : buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8281 0 : buf2[6] =
8282 0 : buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8283 0 : buf1[7] =
8284 0 : buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8285 0 : buf2[7] =
8286 0 : buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8287 :
8288 0 : PU_QuarterPelRefinementOnTheFly(
8289 : context_ptr,
8290 0 : &context_ptr->p_best_ssd8x8[nidx],
8291 : puLcuBufferIndex,
8292 : buf1,
8293 : buf1Stride,
8294 : buf2,
8295 : buf2Stride,
8296 : 8,
8297 : 8,
8298 : x_search_area_origin,
8299 : y_search_area_origin,
8300 0 : &context_ptr->p_best_sad8x8[nidx],
8301 0 : &context_ptr->p_best_mv8x8[nidx],
8302 0 : context_ptr->psub_pel_direction8x8[nidx]);
8303 : }
8304 : }
8305 : }
8306 :
8307 0 : if (ext_block_flag) {
8308 : // 64x32
8309 0 : for (pu_index = 0; pu_index < 2; ++pu_index) {
8310 0 : puShiftXIndex = 0;
8311 0 : puShiftYIndex = pu_index << 5;
8312 :
8313 0 : x_mv = _MVXT(context_ptr->p_best_mv64x32[pu_index]);
8314 0 : y_mv = _MVYT(context_ptr->p_best_mv64x32[pu_index]);
8315 :
8316 0 : SetQuarterPelRefinementInputsOnTheFly(
8317 : pos_Full,
8318 : FullStride,
8319 : pos_b,
8320 : pos_h,
8321 : pos_j,
8322 : context_ptr->interpolated_stride,
8323 : x_mv,
8324 : y_mv,
8325 : buf1,
8326 : buf1Stride,
8327 : buf2,
8328 : buf2Stride);
8329 :
8330 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8331 :
8332 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8333 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8334 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8335 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8336 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8337 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8338 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8339 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8340 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8341 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8342 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8343 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8344 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8345 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8346 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8347 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8348 :
8349 0 : PU_QuarterPelRefinementOnTheFly(
8350 : context_ptr,
8351 0 : &context_ptr->p_best_ssd64x32[pu_index],
8352 : puLcuBufferIndex,
8353 : buf1,
8354 : buf1Stride,
8355 : buf2,
8356 : buf2Stride,
8357 : 64,
8358 : 32,
8359 : x_search_area_origin,
8360 : y_search_area_origin,
8361 0 : &context_ptr->p_best_sad64x32[pu_index],
8362 0 : &context_ptr->p_best_mv64x32[pu_index],
8363 0 : context_ptr->psub_pel_direction64x32[pu_index]);
8364 : }
8365 :
8366 : // 32x16
8367 0 : for (pu_index = 0; pu_index < 8; ++pu_index) {
8368 0 : nidx = tab32x16[pu_index]; // TODO bitwise this
8369 :
8370 0 : puShiftXIndex = (pu_index & 0x01) << 5;
8371 0 : puShiftYIndex = (pu_index >> 1) << 4;
8372 :
8373 0 : x_mv = _MVXT(context_ptr->p_best_mv32x16[nidx]);
8374 0 : y_mv = _MVYT(context_ptr->p_best_mv32x16[nidx]);
8375 :
8376 0 : SetQuarterPelRefinementInputsOnTheFly(
8377 : pos_Full,
8378 : FullStride,
8379 : pos_b,
8380 : pos_h,
8381 : pos_j,
8382 : context_ptr->interpolated_stride,
8383 : x_mv,
8384 : y_mv,
8385 : buf1,
8386 : buf1Stride,
8387 : buf2,
8388 : buf2Stride);
8389 :
8390 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8391 :
8392 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8393 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8394 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8395 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8396 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8397 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8398 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8399 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8400 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8401 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8402 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8403 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8404 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8405 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8406 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8407 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8408 :
8409 0 : PU_QuarterPelRefinementOnTheFly(
8410 : context_ptr,
8411 0 : &context_ptr->p_best_ssd32x16[nidx],
8412 : puLcuBufferIndex,
8413 : buf1,
8414 : buf1Stride,
8415 : buf2,
8416 : buf2Stride,
8417 : 32,
8418 : 16,
8419 : x_search_area_origin,
8420 : y_search_area_origin,
8421 0 : &context_ptr->p_best_sad32x16[nidx],
8422 0 : &context_ptr->p_best_mv32x16[nidx],
8423 0 : context_ptr->psub_pel_direction32x16[nidx]);
8424 : }
8425 :
8426 : // 16x8
8427 0 : for (pu_index = 0; pu_index < 32; ++pu_index) {
8428 0 : nidx = tab16x8[pu_index];
8429 :
8430 0 : puShiftXIndex = (pu_index & 0x03) << 4;
8431 0 : puShiftYIndex = (pu_index >> 2) << 3;
8432 :
8433 0 : x_mv = _MVXT(context_ptr->p_best_mv16x8[nidx]);
8434 0 : y_mv = _MVYT(context_ptr->p_best_mv16x8[nidx]);
8435 :
8436 0 : SetQuarterPelRefinementInputsOnTheFly(
8437 : pos_Full,
8438 : FullStride,
8439 : pos_b,
8440 : pos_h,
8441 : pos_j,
8442 : context_ptr->interpolated_stride,
8443 : x_mv,
8444 : y_mv,
8445 : buf1,
8446 : buf1Stride,
8447 : buf2,
8448 : buf2Stride);
8449 :
8450 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8451 :
8452 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8453 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8454 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8455 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8456 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8457 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8458 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8459 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8460 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8461 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8462 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8463 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8464 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8465 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8466 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8467 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8468 :
8469 0 : PU_QuarterPelRefinementOnTheFly(
8470 : context_ptr,
8471 0 : &context_ptr->p_best_ssd16x8[nidx],
8472 : puLcuBufferIndex,
8473 : buf1,
8474 : buf1Stride,
8475 : buf2,
8476 : buf2Stride,
8477 : 16,
8478 : 8,
8479 : x_search_area_origin,
8480 : y_search_area_origin,
8481 0 : &context_ptr->p_best_sad16x8[nidx],
8482 0 : &context_ptr->p_best_mv16x8[nidx],
8483 0 : context_ptr->psub_pel_direction16x8[nidx]);
8484 : }
8485 :
8486 : // 32x64
8487 0 : for (pu_index = 0; pu_index < 2; ++pu_index) {
8488 0 : puShiftXIndex = pu_index << 5;
8489 0 : puShiftYIndex = 0;
8490 :
8491 0 : x_mv = _MVXT(context_ptr->p_best_mv32x64[pu_index]);
8492 0 : y_mv = _MVYT(context_ptr->p_best_mv32x64[pu_index]);
8493 :
8494 0 : SetQuarterPelRefinementInputsOnTheFly(
8495 : pos_Full,
8496 : FullStride,
8497 : pos_b,
8498 : pos_h,
8499 : pos_j,
8500 : context_ptr->interpolated_stride,
8501 : x_mv,
8502 : y_mv,
8503 : buf1,
8504 : buf1Stride,
8505 : buf2,
8506 : buf2Stride);
8507 :
8508 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8509 :
8510 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8511 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8512 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8513 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8514 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8515 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8516 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8517 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8518 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8519 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8520 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8521 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8522 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8523 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8524 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8525 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8526 :
8527 0 : PU_QuarterPelRefinementOnTheFly(
8528 : context_ptr,
8529 0 : &context_ptr->p_best_ssd32x64[pu_index],
8530 : puLcuBufferIndex,
8531 : buf1,
8532 : buf1Stride,
8533 : buf2,
8534 : buf2Stride,
8535 : 32,
8536 : 64,
8537 : x_search_area_origin,
8538 : y_search_area_origin,
8539 0 : &context_ptr->p_best_sad32x64[pu_index],
8540 0 : &context_ptr->p_best_mv32x64[pu_index],
8541 0 : context_ptr->psub_pel_direction32x64[pu_index]);
8542 : }
8543 :
8544 : // 16x32
8545 0 : for (pu_index = 0; pu_index < 8; ++pu_index) {
8546 0 : nidx = tab16x32[pu_index];
8547 :
8548 0 : puShiftXIndex = (pu_index & 0x03) << 4;
8549 0 : puShiftYIndex = (pu_index >> 2) << 5;
8550 :
8551 0 : x_mv = _MVXT(context_ptr->p_best_mv16x32[nidx]);
8552 0 : y_mv = _MVYT(context_ptr->p_best_mv16x32[nidx]);
8553 :
8554 0 : SetQuarterPelRefinementInputsOnTheFly(
8555 : pos_Full,
8556 : FullStride,
8557 : pos_b,
8558 : pos_h,
8559 : pos_j,
8560 : context_ptr->interpolated_stride,
8561 : x_mv,
8562 : y_mv,
8563 : buf1,
8564 : buf1Stride,
8565 : buf2,
8566 : buf2Stride);
8567 :
8568 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8569 :
8570 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8571 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8572 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8573 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8574 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8575 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8576 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8577 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8578 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8579 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8580 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8581 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8582 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8583 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8584 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8585 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8586 :
8587 0 : PU_QuarterPelRefinementOnTheFly(
8588 : context_ptr,
8589 0 : &context_ptr->p_best_ssd16x32[nidx],
8590 : puLcuBufferIndex,
8591 : buf1,
8592 : buf1Stride,
8593 : buf2,
8594 : buf2Stride,
8595 : 16,
8596 : 32,
8597 : x_search_area_origin,
8598 : y_search_area_origin,
8599 0 : &context_ptr->p_best_sad16x32[nidx],
8600 0 : &context_ptr->p_best_mv16x32[nidx],
8601 0 : context_ptr->psub_pel_direction16x32[nidx]);
8602 : }
8603 :
8604 : // 8x16
8605 0 : for (pu_index = 0; pu_index < 32; ++pu_index) {
8606 0 : nidx = tab8x16[pu_index];
8607 :
8608 0 : puShiftXIndex = (pu_index & 0x07) << 3;
8609 0 : puShiftYIndex = (pu_index >> 3) << 4;
8610 :
8611 0 : x_mv = _MVXT(context_ptr->p_best_mv8x16[nidx]);
8612 0 : y_mv = _MVYT(context_ptr->p_best_mv8x16[nidx]);
8613 :
8614 0 : SetQuarterPelRefinementInputsOnTheFly(
8615 : pos_Full,
8616 : FullStride,
8617 : pos_b,
8618 : pos_h,
8619 : pos_j,
8620 : context_ptr->interpolated_stride,
8621 : x_mv,
8622 : y_mv,
8623 : buf1,
8624 : buf1Stride,
8625 : buf2,
8626 : buf2Stride);
8627 :
8628 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8629 :
8630 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8631 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8632 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8633 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8634 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8635 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8636 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8637 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8638 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8639 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8640 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8641 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8642 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8643 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8644 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8645 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8646 :
8647 0 : PU_QuarterPelRefinementOnTheFly(
8648 : context_ptr,
8649 0 : &context_ptr->p_best_ssd8x16[nidx],
8650 : puLcuBufferIndex,
8651 : buf1,
8652 : buf1Stride,
8653 : buf2,
8654 : buf2Stride,
8655 : 8,
8656 : 16,
8657 : x_search_area_origin,
8658 : y_search_area_origin,
8659 0 : &context_ptr->p_best_sad8x16[nidx],
8660 0 : &context_ptr->p_best_mv8x16[nidx],
8661 0 : context_ptr->psub_pel_direction8x16[nidx]);
8662 : }
8663 :
8664 : // 32x8
8665 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
8666 0 : nidx = tab32x8[pu_index];
8667 :
8668 0 : puShiftXIndex = (pu_index & 0x01) << 5;
8669 0 : puShiftYIndex = (pu_index >> 1) << 3;
8670 :
8671 0 : x_mv = _MVXT(context_ptr->p_best_mv32x8[nidx]);
8672 0 : y_mv = _MVYT(context_ptr->p_best_mv32x8[nidx]);
8673 :
8674 0 : SetQuarterPelRefinementInputsOnTheFly(
8675 : pos_Full,
8676 : FullStride,
8677 : pos_b,
8678 : pos_h,
8679 : pos_j,
8680 : context_ptr->interpolated_stride,
8681 : x_mv,
8682 : y_mv,
8683 : buf1,
8684 : buf1Stride,
8685 : buf2,
8686 : buf2Stride);
8687 :
8688 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8689 :
8690 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8691 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8692 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8693 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8694 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8695 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8696 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8697 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8698 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8699 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8700 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8701 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8702 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8703 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8704 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8705 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8706 :
8707 0 : PU_QuarterPelRefinementOnTheFly(
8708 : context_ptr,
8709 0 : &context_ptr->p_best_ssd32x8[nidx],
8710 : puLcuBufferIndex,
8711 : buf1,
8712 : buf1Stride,
8713 : buf2,
8714 : buf2Stride,
8715 : 32,
8716 : 8,
8717 : x_search_area_origin,
8718 : y_search_area_origin,
8719 0 : &context_ptr->p_best_sad32x8[nidx],
8720 0 : &context_ptr->p_best_mv32x8[nidx],
8721 0 : context_ptr->psub_pel_direction32x8[nidx]);
8722 : }
8723 :
8724 : // 8x32
8725 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
8726 0 : nidx = tab8x32[pu_index];
8727 :
8728 0 : puShiftXIndex = (pu_index & 0x07) << 3;
8729 0 : puShiftYIndex = (pu_index >> 3) << 5;
8730 :
8731 0 : x_mv = _MVXT(context_ptr->p_best_mv8x32[nidx]);
8732 0 : y_mv = _MVYT(context_ptr->p_best_mv8x32[nidx]);
8733 :
8734 0 : SetQuarterPelRefinementInputsOnTheFly(
8735 : pos_Full,
8736 : FullStride,
8737 : pos_b,
8738 : pos_h,
8739 : pos_j,
8740 : context_ptr->interpolated_stride,
8741 : x_mv,
8742 : y_mv,
8743 : buf1,
8744 : buf1Stride,
8745 : buf2,
8746 : buf2Stride);
8747 :
8748 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8749 :
8750 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8751 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8752 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8753 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8754 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8755 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8756 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8757 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8758 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8759 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8760 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8761 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8762 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8763 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8764 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8765 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8766 :
8767 0 : PU_QuarterPelRefinementOnTheFly(
8768 : context_ptr,
8769 0 : &context_ptr->p_best_ssd8x32[nidx],
8770 : puLcuBufferIndex,
8771 : buf1,
8772 : buf1Stride,
8773 : buf2,
8774 : buf2Stride,
8775 : 8,
8776 : 32,
8777 : x_search_area_origin,
8778 : y_search_area_origin,
8779 0 : &context_ptr->p_best_sad8x32[nidx],
8780 0 : &context_ptr->p_best_mv8x32[nidx],
8781 0 : context_ptr->psub_pel_direction8x32[nidx]);
8782 : }
8783 :
8784 : // 64x16
8785 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
8786 0 : nidx = pu_index;
8787 :
8788 0 : puShiftXIndex = 0;
8789 0 : puShiftYIndex = pu_index << 4;
8790 :
8791 0 : x_mv = _MVXT(context_ptr->p_best_mv64x16[nidx]);
8792 0 : y_mv = _MVYT(context_ptr->p_best_mv64x16[nidx]);
8793 :
8794 0 : SetQuarterPelRefinementInputsOnTheFly(
8795 : pos_Full,
8796 : FullStride,
8797 : pos_b,
8798 : pos_h,
8799 : pos_j,
8800 : context_ptr->interpolated_stride,
8801 : x_mv,
8802 : y_mv,
8803 : buf1,
8804 : buf1Stride,
8805 : buf2,
8806 : buf2Stride);
8807 :
8808 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8809 :
8810 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8811 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8812 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8813 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8814 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8815 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8816 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8817 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8818 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8819 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8820 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8821 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8822 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8823 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8824 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8825 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8826 :
8827 0 : PU_QuarterPelRefinementOnTheFly(
8828 : context_ptr,
8829 0 : &context_ptr->p_best_ssd64x16[nidx],
8830 : puLcuBufferIndex,
8831 : buf1,
8832 : buf1Stride,
8833 : buf2,
8834 : buf2Stride,
8835 : 64,
8836 : 16,
8837 : x_search_area_origin,
8838 : y_search_area_origin,
8839 0 : &context_ptr->p_best_sad64x16[nidx],
8840 0 : &context_ptr->p_best_mv64x16[nidx],
8841 0 : context_ptr->psub_pel_direction64x16[nidx]);
8842 : }
8843 :
8844 : // 16x64
8845 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
8846 0 : nidx = pu_index;
8847 :
8848 0 : puShiftXIndex = pu_index << 4;
8849 0 : puShiftYIndex = 0;
8850 :
8851 0 : x_mv = _MVXT(context_ptr->p_best_mv16x64[nidx]);
8852 0 : y_mv = _MVYT(context_ptr->p_best_mv16x64[nidx]);
8853 :
8854 0 : SetQuarterPelRefinementInputsOnTheFly(
8855 : pos_Full,
8856 : FullStride,
8857 : pos_b,
8858 : pos_h,
8859 : pos_j,
8860 : context_ptr->interpolated_stride,
8861 : x_mv,
8862 : y_mv,
8863 : buf1,
8864 : buf1Stride,
8865 : buf2,
8866 : buf2Stride);
8867 :
8868 0 : puLcuBufferIndex = puShiftXIndex + puShiftYIndex * BLOCK_SIZE_64;
8869 :
8870 0 : buf1[0] = buf1[0] + puShiftXIndex + puShiftYIndex * buf1Stride[0];
8871 0 : buf2[0] = buf2[0] + puShiftXIndex + puShiftYIndex * buf2Stride[0];
8872 0 : buf1[1] = buf1[1] + puShiftXIndex + puShiftYIndex * buf1Stride[1];
8873 0 : buf2[1] = buf2[1] + puShiftXIndex + puShiftYIndex * buf2Stride[1];
8874 0 : buf1[2] = buf1[2] + puShiftXIndex + puShiftYIndex * buf1Stride[2];
8875 0 : buf2[2] = buf2[2] + puShiftXIndex + puShiftYIndex * buf2Stride[2];
8876 0 : buf1[3] = buf1[3] + puShiftXIndex + puShiftYIndex * buf1Stride[3];
8877 0 : buf2[3] = buf2[3] + puShiftXIndex + puShiftYIndex * buf2Stride[3];
8878 0 : buf1[4] = buf1[4] + puShiftXIndex + puShiftYIndex * buf1Stride[4];
8879 0 : buf2[4] = buf2[4] + puShiftXIndex + puShiftYIndex * buf2Stride[4];
8880 0 : buf1[5] = buf1[5] + puShiftXIndex + puShiftYIndex * buf1Stride[5];
8881 0 : buf2[5] = buf2[5] + puShiftXIndex + puShiftYIndex * buf2Stride[5];
8882 0 : buf1[6] = buf1[6] + puShiftXIndex + puShiftYIndex * buf1Stride[6];
8883 0 : buf2[6] = buf2[6] + puShiftXIndex + puShiftYIndex * buf2Stride[6];
8884 0 : buf1[7] = buf1[7] + puShiftXIndex + puShiftYIndex * buf1Stride[7];
8885 0 : buf2[7] = buf2[7] + puShiftXIndex + puShiftYIndex * buf2Stride[7];
8886 :
8887 0 : PU_QuarterPelRefinementOnTheFly(
8888 : context_ptr,
8889 0 : &context_ptr->p_best_ssd16x64[nidx],
8890 : puLcuBufferIndex,
8891 : buf1,
8892 : buf1Stride,
8893 : buf2,
8894 : buf2Stride,
8895 : 16,
8896 : 64,
8897 : x_search_area_origin,
8898 : y_search_area_origin,
8899 0 : &context_ptr->p_best_sad16x64[nidx],
8900 0 : &context_ptr->p_best_mv16x64[nidx],
8901 0 : context_ptr->psub_pel_direction16x64[nidx]);
8902 : }
8903 : }
8904 :
8905 0 : return;
8906 : }
8907 : #define QP_REF_OPT 1
8908 : /*******************************************
8909 : * quarter_pel_refinemnet_block
8910 : * performs Quarter Pel refinement for each block
8911 : *******************************************/
8912 0 : static void quarter_pel_refinemnet_block(
8913 : MeContext *context_ptr, // [IN] ME context Ptr, used to get SB Ptr
8914 : uint32_t *p_best_ssd,
8915 : uint32_t
8916 : src_block_index, // [IN] PU origin, used to point to source samples
8917 : uint8_t **buf1, // [IN]
8918 : uint32_t *buf1_stride,
8919 : uint8_t **buf2, // [IN]
8920 : uint32_t *buf2_stride,
8921 : uint32_t pu_width, // [IN] PU width
8922 : uint32_t pu_height, // [IN] PU height
8923 : int16_t
8924 : x_search_area_origin, // [IN] search area origin in the horizontal
8925 : // direction, used to point to reference samples
8926 : int16_t
8927 : y_search_area_origin, // [IN] search area origin in the vertical
8928 : // direction, used to point to reference samples
8929 : uint32_t candidate_mv, uint32_t *p_best_sad,
8930 : uint32_t *p_best_mv, uint16_t is_frac_candidate) {
8931 0 : int16_t x_mv = _MVXT(candidate_mv);
8932 0 : int16_t y_mv = _MVYT(candidate_mv);
8933 0 : int16_t search_Index_x = ((x_mv + 2) >> 2) - x_search_area_origin;
8934 0 : int16_t search_Index_y = ((y_mv + 2) >> 2) - y_search_area_origin;
8935 : uint64_t dist;
8936 : int16_t quarter_mv_x[8];
8937 : int16_t quarter_mv_y[8];
8938 0 : int32_t search_region_Index1 = 0;
8939 0 : int32_t search_region_Index2 = 0;
8940 0 : quarter_mv_x[0] = x_mv - 1; // L position
8941 0 : quarter_mv_x[1] = x_mv + 1; // R position
8942 0 : quarter_mv_x[2] = x_mv; // T position
8943 0 : quarter_mv_x[3] = x_mv; // B position
8944 0 : quarter_mv_x[4] = x_mv - 1; // TL position
8945 0 : quarter_mv_x[5] = x_mv + 1; // TR position
8946 0 : quarter_mv_x[6] = x_mv + 1; // BR position
8947 0 : quarter_mv_x[7] = x_mv - 1; // BL position
8948 0 : quarter_mv_y[0] = y_mv; // L position
8949 0 : quarter_mv_y[1] = y_mv; // R position
8950 0 : quarter_mv_y[2] = y_mv - 1; // T position
8951 0 : quarter_mv_y[3] = y_mv + 1; // B position
8952 0 : quarter_mv_y[4] = y_mv - 1; // TL position
8953 0 : quarter_mv_y[5] = y_mv - 1; // TR position
8954 0 : quarter_mv_y[6] = y_mv + 1; // BR position
8955 0 : quarter_mv_y[7] = y_mv + 1; // BL position
8956 : // L position
8957 0 : search_region_Index1 = (int32_t)search_Index_x +
8958 0 : (int32_t)buf1_stride[0] * (int32_t)search_Index_y;
8959 0 : search_region_Index2 = (int32_t)search_Index_x +
8960 0 : (int32_t)buf2_stride[0] * (int32_t)search_Index_y;
8961 0 : dist = (context_ptr->fractional_search_method == SSD_SEARCH)
8962 0 : ? combined_averaging_ssd(
8963 0 : &(context_ptr->sb_buffer[src_block_index]),
8964 : BLOCK_SIZE_64,
8965 0 : buf1[0] + search_region_Index1,
8966 0 : buf1_stride[0],
8967 0 : buf2[0] + search_region_Index2,
8968 0 : buf2_stride[0],
8969 : pu_height,
8970 : pu_width)
8971 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
8972 0 : ? (nxm_sad_avg_kernel(
8973 0 : &(context_ptr->sb_buffer[src_block_index]),
8974 : BLOCK_SIZE_64 << 1,
8975 0 : buf1[0] + search_region_Index1,
8976 0 : buf1_stride[0] << 1,
8977 0 : buf2[0] + search_region_Index2,
8978 0 : buf2_stride[0] << 1,
8979 : pu_height >> 1,
8980 : pu_width))
8981 0 : << 1
8982 0 : : nxm_sad_avg_kernel(
8983 0 : &(context_ptr->sb_buffer[src_block_index]),
8984 : BLOCK_SIZE_64,
8985 0 : buf1[0] + search_region_Index1,
8986 : buf1_stride[0],
8987 0 : buf2[0] + search_region_Index2,
8988 : buf2_stride[0],
8989 : pu_height,
8990 : pu_width);
8991 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
8992 0 : if (dist < *p_best_ssd) {
8993 0 : *p_best_sad = (uint32_t)
8994 0 : nxm_sad_avg_kernel(
8995 0 : &(context_ptr->sb_buffer[src_block_index]),
8996 : BLOCK_SIZE_64,
8997 0 : buf1[0] + search_region_Index1,
8998 : buf1_stride[0],
8999 0 : buf2[0] + search_region_Index2,
9000 : buf2_stride[0],
9001 : pu_height,
9002 : pu_width);
9003 0 : *p_best_mv =
9004 0 : ((uint16_t)quarter_mv_y[0] << 16) | ((uint16_t)quarter_mv_x[0]);
9005 0 : *p_best_ssd = (uint32_t)dist;
9006 : }
9007 : } else {
9008 0 : if (dist < *p_best_sad) {
9009 0 : *p_best_sad = (uint32_t)dist;
9010 0 : *p_best_mv =
9011 0 : ((uint16_t)quarter_mv_y[0] << 16) | ((uint16_t)quarter_mv_x[0]);
9012 : }
9013 : }
9014 : // R positions
9015 0 : search_region_Index1 = (int32_t)search_Index_x +
9016 0 : (int32_t)buf1_stride[1] * (int32_t)search_Index_y;
9017 0 : search_region_Index2 = (int32_t)search_Index_x +
9018 0 : (int32_t)buf2_stride[1] * (int32_t)search_Index_y;
9019 0 : dist = (context_ptr->fractional_search_method == SSD_SEARCH)
9020 0 : ? combined_averaging_ssd(
9021 0 : &(context_ptr->sb_buffer[src_block_index]),
9022 : BLOCK_SIZE_64,
9023 0 : buf1[1] + search_region_Index1,
9024 0 : buf1_stride[1],
9025 0 : buf2[1] + search_region_Index2,
9026 0 : buf2_stride[1],
9027 : pu_height,
9028 : pu_width)
9029 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
9030 0 : ? (nxm_sad_avg_kernel(
9031 0 : &(context_ptr->sb_buffer[src_block_index]),
9032 : BLOCK_SIZE_64 << 1,
9033 0 : buf1[1] + search_region_Index1,
9034 0 : buf1_stride[1] << 1,
9035 0 : buf2[1] + search_region_Index2,
9036 0 : buf2_stride[1] << 1,
9037 : pu_height >> 1,
9038 : pu_width))
9039 0 : << 1
9040 0 : : nxm_sad_avg_kernel(
9041 0 : &(context_ptr->sb_buffer[src_block_index]),
9042 : BLOCK_SIZE_64,
9043 0 : buf1[1] + search_region_Index1,
9044 0 : buf1_stride[1],
9045 0 : buf2[1] + search_region_Index2,
9046 0 : buf2_stride[1],
9047 : pu_height,
9048 : pu_width);
9049 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
9050 0 : if (dist < *p_best_ssd) {
9051 0 : *p_best_sad = (uint32_t)
9052 0 : nxm_sad_avg_kernel(
9053 0 : &(context_ptr->sb_buffer[src_block_index]),
9054 : BLOCK_SIZE_64,
9055 0 : buf1[1] + search_region_Index1,
9056 0 : buf1_stride[1],
9057 0 : buf2[1] + search_region_Index2,
9058 0 : buf2_stride[1],
9059 : pu_height,
9060 : pu_width);
9061 0 : *p_best_mv =
9062 0 : ((uint16_t)quarter_mv_y[1] << 16) | ((uint16_t)quarter_mv_x[1]);
9063 0 : *p_best_ssd = (uint32_t)dist;
9064 : }
9065 : } else {
9066 0 : if (dist < *p_best_sad) {
9067 0 : *p_best_sad = (uint32_t)dist;
9068 0 : *p_best_mv =
9069 0 : ((uint16_t)quarter_mv_y[1] << 16) | ((uint16_t)quarter_mv_x[1]);
9070 : }
9071 : }
9072 : // T position
9073 0 : search_region_Index1 = (int32_t)search_Index_x +
9074 0 : (int32_t)buf1_stride[2] * (int32_t)search_Index_y;
9075 0 : search_region_Index2 = (int32_t)search_Index_x +
9076 0 : (int32_t)buf2_stride[2] * (int32_t)search_Index_y;
9077 0 : dist = (context_ptr->fractional_search_method == SSD_SEARCH)
9078 0 : ? combined_averaging_ssd(
9079 0 : &(context_ptr->sb_buffer[src_block_index]),
9080 : BLOCK_SIZE_64,
9081 0 : buf1[2] + search_region_Index1,
9082 0 : buf1_stride[2],
9083 0 : buf2[2] + search_region_Index2,
9084 0 : buf2_stride[2],
9085 : pu_height,
9086 : pu_width)
9087 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
9088 0 : ? (nxm_sad_avg_kernel(
9089 0 : &(context_ptr->sb_buffer[src_block_index]),
9090 : BLOCK_SIZE_64 << 1,
9091 0 : buf1[2] + search_region_Index1,
9092 0 : buf1_stride[2] << 1,
9093 0 : buf2[2] + search_region_Index2,
9094 0 : buf2_stride[2] << 1,
9095 : pu_height >> 1,
9096 : pu_width))
9097 0 : << 1
9098 0 : : nxm_sad_avg_kernel(
9099 0 : &(context_ptr->sb_buffer[src_block_index]),
9100 : BLOCK_SIZE_64,
9101 0 : buf1[2] + search_region_Index1,
9102 0 : buf1_stride[2],
9103 0 : buf2[2] + search_region_Index2,
9104 0 : buf2_stride[2],
9105 : pu_height,
9106 : pu_width);
9107 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
9108 0 : if (dist < *p_best_ssd) {
9109 0 : *p_best_sad = (uint32_t)
9110 0 : nxm_sad_avg_kernel(
9111 0 : &(context_ptr->sb_buffer[src_block_index]),
9112 : BLOCK_SIZE_64,
9113 0 : buf1[2] + search_region_Index1,
9114 0 : buf1_stride[2],
9115 0 : buf2[2] + search_region_Index2,
9116 0 : buf2_stride[2],
9117 : pu_height,
9118 : pu_width);
9119 0 : *p_best_mv =
9120 0 : ((uint16_t)quarter_mv_y[2] << 16) | ((uint16_t)quarter_mv_x[2]);
9121 0 : *p_best_ssd = (uint32_t)dist;
9122 : }
9123 : } else {
9124 0 : if (dist < *p_best_sad) {
9125 0 : *p_best_sad = (uint32_t)dist;
9126 0 : *p_best_mv =
9127 0 : ((uint16_t)quarter_mv_y[2] << 16) | ((uint16_t)quarter_mv_x[2]);
9128 : }
9129 : }
9130 : // B position
9131 0 : search_region_Index1 = (int32_t)search_Index_x +
9132 0 : (int32_t)buf1_stride[3] * (int32_t)search_Index_y;
9133 0 : search_region_Index2 = (int32_t)search_Index_x +
9134 0 : (int32_t)buf2_stride[3] * (int32_t)search_Index_y;
9135 0 : dist = (context_ptr->fractional_search_method == SSD_SEARCH)
9136 0 : ? combined_averaging_ssd(
9137 0 : &(context_ptr->sb_buffer[src_block_index]),
9138 : BLOCK_SIZE_64,
9139 0 : buf1[3] + search_region_Index1,
9140 0 : buf1_stride[3],
9141 0 : buf2[3] + search_region_Index2,
9142 0 : buf2_stride[3],
9143 : pu_height,
9144 : pu_width)
9145 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
9146 0 : ? (nxm_sad_avg_kernel(
9147 0 : &(context_ptr->sb_buffer[src_block_index]),
9148 : BLOCK_SIZE_64 << 1,
9149 0 : buf1[3] + search_region_Index1,
9150 0 : buf1_stride[3] << 1,
9151 0 : buf2[3] + search_region_Index2,
9152 0 : buf2_stride[3] << 1,
9153 : pu_height >> 1,
9154 : pu_width))
9155 0 : << 1
9156 0 : : nxm_sad_avg_kernel(
9157 0 : &(context_ptr->sb_buffer[src_block_index]),
9158 : BLOCK_SIZE_64,
9159 0 : buf1[3] + search_region_Index1,
9160 0 : buf1_stride[3],
9161 0 : buf2[3] + search_region_Index2,
9162 0 : buf2_stride[3],
9163 : pu_height,
9164 : pu_width);
9165 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
9166 0 : if (dist < *p_best_ssd) {
9167 0 : *p_best_sad = (uint32_t)
9168 0 : nxm_sad_avg_kernel(
9169 0 : &(context_ptr->sb_buffer[src_block_index]),
9170 : BLOCK_SIZE_64,
9171 0 : buf1[3] + search_region_Index1,
9172 0 : buf1_stride[3],
9173 0 : buf2[3] + search_region_Index2,
9174 0 : buf2_stride[3],
9175 : pu_height,
9176 : pu_width);
9177 0 : *p_best_mv =
9178 0 : ((uint16_t)quarter_mv_y[3] << 16) | ((uint16_t)quarter_mv_x[3]);
9179 0 : *p_best_ssd = (uint32_t)dist;
9180 : }
9181 : } else {
9182 0 : if (dist < *p_best_sad) {
9183 0 : *p_best_sad = (uint32_t)dist;
9184 0 : *p_best_mv =
9185 0 : ((uint16_t)quarter_mv_y[3] << 16) | ((uint16_t)quarter_mv_x[3]);
9186 : }
9187 : }
9188 : // TL position
9189 : #if QP_REF_OPT
9190 0 : if (!is_frac_candidate) {
9191 : #endif
9192 0 : search_region_Index1 =
9193 0 : (int32_t)search_Index_x +
9194 0 : (int32_t)buf1_stride[4] * (int32_t)search_Index_y;
9195 0 : search_region_Index2 =
9196 0 : (int32_t)search_Index_x +
9197 0 : (int32_t)buf2_stride[4] * (int32_t)search_Index_y;
9198 0 : dist = (context_ptr->fractional_search_method == SSD_SEARCH)
9199 0 : ? combined_averaging_ssd(
9200 0 : &(context_ptr->sb_buffer[src_block_index]),
9201 : BLOCK_SIZE_64,
9202 0 : buf1[4] + search_region_Index1,
9203 0 : buf1_stride[4],
9204 0 : buf2[4] + search_region_Index2,
9205 0 : buf2_stride[4],
9206 : pu_height,
9207 : pu_width)
9208 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
9209 0 : ? (nxm_sad_avg_kernel(
9210 0 : &(context_ptr->sb_buffer[src_block_index]),
9211 : BLOCK_SIZE_64 << 1,
9212 0 : buf1[4] + search_region_Index1,
9213 0 : buf1_stride[4] << 1,
9214 0 : buf2[4] + search_region_Index2,
9215 0 : buf2_stride[4] << 1,
9216 : pu_height >> 1,
9217 : pu_width))
9218 0 : << 1
9219 0 : : nxm_sad_avg_kernel(
9220 0 : &(context_ptr->sb_buffer[src_block_index]),
9221 : BLOCK_SIZE_64,
9222 0 : buf1[4] + search_region_Index1,
9223 0 : buf1_stride[4],
9224 0 : buf2[4] + search_region_Index2,
9225 0 : buf2_stride[4],
9226 : pu_height,
9227 : pu_width);
9228 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
9229 0 : if (dist < *p_best_ssd) {
9230 0 : *p_best_sad = (uint32_t)nxm_sad_avg_kernel(
9231 0 : &(context_ptr->sb_buffer[src_block_index]),
9232 : BLOCK_SIZE_64,
9233 0 : buf1[4] + search_region_Index1,
9234 0 : buf1_stride[4],
9235 0 : buf2[4] + search_region_Index2,
9236 0 : buf2_stride[4],
9237 : pu_height,
9238 : pu_width);
9239 0 : *p_best_mv = ((uint16_t)quarter_mv_y[4] << 16) |
9240 0 : ((uint16_t)quarter_mv_x[4]);
9241 0 : *p_best_ssd = (uint32_t)dist;
9242 : }
9243 : } else {
9244 0 : if (dist < *p_best_sad) {
9245 0 : *p_best_sad = (uint32_t)dist;
9246 0 : *p_best_mv = ((uint16_t)quarter_mv_y[4] << 16) |
9247 0 : ((uint16_t)quarter_mv_x[4]);
9248 : }
9249 : }
9250 : // TR position
9251 0 : search_region_Index1 =
9252 0 : (int32_t)search_Index_x +
9253 0 : (int32_t)buf1_stride[5] * (int32_t)search_Index_y;
9254 0 : search_region_Index2 =
9255 0 : (int32_t)search_Index_x +
9256 0 : (int32_t)buf2_stride[5] * (int32_t)search_Index_y;
9257 0 : dist = (context_ptr->fractional_search_method == SSD_SEARCH)
9258 0 : ? combined_averaging_ssd(
9259 0 : &(context_ptr->sb_buffer[src_block_index]),
9260 : BLOCK_SIZE_64,
9261 0 : buf1[5] + search_region_Index1,
9262 0 : buf1_stride[5],
9263 0 : buf2[5] + search_region_Index2,
9264 0 : buf2_stride[5],
9265 : pu_height,
9266 : pu_width)
9267 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
9268 0 : ? (nxm_sad_avg_kernel(
9269 0 : &(context_ptr->sb_buffer[src_block_index]),
9270 : BLOCK_SIZE_64 << 1,
9271 0 : buf1[5] + search_region_Index1,
9272 0 : buf1_stride[5] << 1,
9273 0 : buf2[5] + search_region_Index2,
9274 0 : buf2_stride[5] << 1,
9275 : pu_height >> 1,
9276 : pu_width))
9277 0 : << 1
9278 0 : : nxm_sad_avg_kernel(
9279 0 : &(context_ptr->sb_buffer[src_block_index]),
9280 : BLOCK_SIZE_64,
9281 0 : buf1[5] + search_region_Index1,
9282 0 : buf1_stride[5],
9283 0 : buf2[5] + search_region_Index2,
9284 0 : buf2_stride[5],
9285 : pu_height,
9286 : pu_width);
9287 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
9288 0 : if (dist < *p_best_ssd) {
9289 0 : *p_best_sad = (uint32_t)nxm_sad_avg_kernel(
9290 0 : &(context_ptr->sb_buffer[src_block_index]),
9291 : BLOCK_SIZE_64,
9292 0 : buf1[5] + search_region_Index1,
9293 0 : buf1_stride[5],
9294 0 : buf2[5] + search_region_Index2,
9295 0 : buf2_stride[5],
9296 : pu_height,
9297 : pu_width);
9298 0 : *p_best_mv = ((uint16_t)quarter_mv_y[5] << 16) |
9299 0 : ((uint16_t)quarter_mv_x[5]);
9300 0 : *p_best_ssd = (uint32_t)dist;
9301 : }
9302 : } else {
9303 0 : if (dist < *p_best_sad) {
9304 0 : *p_best_sad = (uint32_t)dist;
9305 0 : *p_best_mv = ((uint16_t)quarter_mv_y[5] << 16) |
9306 0 : ((uint16_t)quarter_mv_x[5]);
9307 : }
9308 : }
9309 : // BR position
9310 0 : search_region_Index1 =
9311 0 : (int32_t)search_Index_x +
9312 0 : (int32_t)buf1_stride[6] * (int32_t)search_Index_y;
9313 0 : search_region_Index2 =
9314 0 : (int32_t)search_Index_x +
9315 0 : (int32_t)buf2_stride[6] * (int32_t)search_Index_y;
9316 0 : dist = (context_ptr->fractional_search_method == SSD_SEARCH)
9317 0 : ? combined_averaging_ssd(
9318 0 : &(context_ptr->sb_buffer[src_block_index]),
9319 : BLOCK_SIZE_64,
9320 0 : buf1[6] + search_region_Index1,
9321 0 : buf1_stride[6],
9322 0 : buf2[6] + search_region_Index2,
9323 0 : buf2_stride[6],
9324 : pu_height,
9325 : pu_width)
9326 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
9327 0 : ? (nxm_sad_avg_kernel(
9328 0 : &(context_ptr->sb_buffer[src_block_index]),
9329 : BLOCK_SIZE_64 << 1,
9330 0 : buf1[6] + search_region_Index1,
9331 0 : buf1_stride[6] << 1,
9332 0 : buf2[6] + search_region_Index2,
9333 0 : buf2_stride[6] << 1,
9334 : pu_height >> 1,
9335 : pu_width))
9336 0 : << 1
9337 0 : : nxm_sad_avg_kernel(
9338 0 : &(context_ptr->sb_buffer[src_block_index]),
9339 : BLOCK_SIZE_64,
9340 0 : buf1[6] + search_region_Index1,
9341 0 : buf1_stride[6],
9342 0 : buf2[6] + search_region_Index2,
9343 0 : buf2_stride[6],
9344 : pu_height,
9345 : pu_width);
9346 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
9347 0 : if (dist < *p_best_ssd) {
9348 0 : *p_best_sad = (uint32_t)nxm_sad_avg_kernel(
9349 0 : &(context_ptr->sb_buffer[src_block_index]),
9350 : BLOCK_SIZE_64,
9351 0 : buf1[6] + search_region_Index1,
9352 0 : buf1_stride[6],
9353 0 : buf2[6] + search_region_Index2,
9354 0 : buf2_stride[6],
9355 : pu_height,
9356 : pu_width);
9357 0 : *p_best_mv = ((uint16_t)quarter_mv_y[6] << 16) |
9358 0 : ((uint16_t)quarter_mv_x[6]);
9359 0 : *p_best_ssd = (uint32_t)dist;
9360 : }
9361 : } else {
9362 0 : if (dist < *p_best_sad) {
9363 0 : *p_best_sad = (uint32_t)dist;
9364 0 : *p_best_mv = ((uint16_t)quarter_mv_y[6] << 16) |
9365 0 : ((uint16_t)quarter_mv_x[6]);
9366 : }
9367 : }
9368 : // BL position
9369 0 : search_region_Index1 =
9370 0 : (int32_t)search_Index_x +
9371 0 : (int32_t)buf1_stride[7] * (int32_t)search_Index_y;
9372 0 : search_region_Index2 =
9373 0 : (int32_t)search_Index_x +
9374 0 : (int32_t)buf2_stride[7] * (int32_t)search_Index_y;
9375 0 : dist = (context_ptr->fractional_search_method == SSD_SEARCH)
9376 0 : ? combined_averaging_ssd(
9377 0 : &(context_ptr->sb_buffer[src_block_index]),
9378 : BLOCK_SIZE_64,
9379 0 : buf1[7] + search_region_Index1,
9380 0 : buf1_stride[7],
9381 0 : buf2[7] + search_region_Index2,
9382 0 : buf2_stride[7],
9383 : pu_height,
9384 : pu_width)
9385 0 : : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
9386 0 : ? (nxm_sad_avg_kernel(
9387 0 : &(context_ptr->sb_buffer[src_block_index]),
9388 : BLOCK_SIZE_64 << 1,
9389 0 : buf1[7] + search_region_Index1,
9390 0 : buf1_stride[7] << 1,
9391 0 : buf2[7] + search_region_Index2,
9392 0 : buf2_stride[7] << 1,
9393 : pu_height >> 1,
9394 : pu_width))
9395 0 : << 1
9396 0 : : nxm_sad_avg_kernel(
9397 0 : &(context_ptr->sb_buffer[src_block_index]),
9398 : BLOCK_SIZE_64,
9399 0 : buf1[7] + search_region_Index1,
9400 0 : buf1_stride[7],
9401 0 : buf2[7] + search_region_Index2,
9402 0 : buf2_stride[7],
9403 : pu_height,
9404 : pu_width);
9405 0 : if (context_ptr->fractional_search_method == SSD_SEARCH) {
9406 0 : if (dist < *p_best_ssd) {
9407 0 : *p_best_sad = (uint32_t)nxm_sad_avg_kernel(
9408 0 : &(context_ptr->sb_buffer[src_block_index]),
9409 : BLOCK_SIZE_64,
9410 0 : buf1[7] + search_region_Index1,
9411 0 : buf1_stride[7],
9412 0 : buf2[7] + search_region_Index2,
9413 0 : buf2_stride[7],
9414 : pu_height,
9415 : pu_width);
9416 0 : *p_best_mv = ((uint16_t)quarter_mv_y[7] << 16) |
9417 0 : ((uint16_t)quarter_mv_x[7]);
9418 0 : *p_best_ssd = (uint32_t)dist;
9419 : }
9420 : } else {
9421 0 : if (dist < *p_best_sad) {
9422 0 : *p_best_sad = (uint32_t)dist;
9423 0 : *p_best_mv = ((uint16_t)quarter_mv_y[7] << 16) |
9424 0 : ((uint16_t)quarter_mv_x[7]);
9425 : }
9426 : }
9427 : #if QP_REF_OPT
9428 : }
9429 : #endif
9430 0 : return;
9431 : }
9432 : /*******************************************
9433 : * quarter_pel_refinement_sb
9434 : * performs Quarter Pel refinement
9435 : *******************************************/
9436 0 : void quarter_pel_refinement_sb(
9437 : MeContext
9438 : *context_ptr, //[IN/OUT] ME context Ptr, used to get/update ME results
9439 : uint8_t *pos_full, //[IN]
9440 : uint32_t full_stride, //[IN]
9441 : uint8_t *pos_b, //[IN]
9442 : uint8_t *pos_h, //[IN]
9443 : uint8_t *pos_j, //[IN]
9444 : int16_t
9445 : x_search_area_origin, //[IN] search area origin in the horizontal
9446 : // direction, used to point to reference samples
9447 : int16_t
9448 : y_search_area_origin, //[IN] search area origin in the vertical
9449 : // direction, used to point to reference samples
9450 : uint32_t integer_mv)
9451 : {
9452 : uint32_t pu_index;
9453 : uint32_t block_index_shift_x;
9454 : uint32_t block_index_shift_y;
9455 : uint32_t src_block_index;
9456 : uint8_t *buf1[8];
9457 : uint8_t *buf2[8];
9458 : uint32_t buf1_stride[8];
9459 : uint32_t buf2_stride[8];
9460 : int16_t x_mv, y_mv;
9461 : uint32_t nidx;
9462 0 : int16_t int_x_mv = _MVXT(integer_mv);
9463 0 : int16_t int_y_mv = _MVYT(integer_mv);
9464 0 : int16_t int_xSearchIndex = ((int_x_mv + 2) >> 2) - x_search_area_origin;
9465 0 : int16_t int_ySearchIndex = ((int_y_mv + 2) >> 2) - y_search_area_origin;
9466 : int16_t x_best_mv;
9467 : int16_t y_best_mv;
9468 : int16_t best_xSearchIndex;
9469 : int16_t best_ySearchIndex;
9470 : int16_t dis_x;
9471 : int16_t dis_y;
9472 0 : int8_t skip_qp_pel = 0;
9473 : uint32_t testmv;
9474 : int16_t it;
9475 0 : int16_t num_qp_it = 2;
9476 0 : if (context_ptr->fractional_search64x64) {
9477 0 : x_best_mv = _MVXT(*context_ptr->p_best_full_pel_mv64x64);
9478 0 : y_best_mv = _MVYT(*context_ptr->p_best_full_pel_mv64x64);
9479 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
9480 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
9481 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
9482 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
9483 0 : skip_qp_pel = 0;
9484 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
9485 0 : skip_qp_pel = 1;
9486 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
9487 0 : skip_qp_pel = 1;
9488 0 : if (!skip_qp_pel) {
9489 0 : for (it = 0; it < num_qp_it; it++) {
9490 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
9491 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
9492 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
9493 0 : SetQuarterPelRefinementInputsOnTheFly(
9494 : pos_full,
9495 : full_stride,
9496 : pos_b,
9497 : pos_h,
9498 : pos_j,
9499 : context_ptr->interpolated_stride,
9500 : x_mv,
9501 : y_mv,
9502 : buf1,
9503 : buf1_stride,
9504 : buf2,
9505 : buf2_stride);
9506 0 : buf1[0] = buf1[0];
9507 0 : buf2[0] = buf2[0];
9508 0 : buf1[1] = buf1[1];
9509 0 : buf2[1] = buf2[1];
9510 0 : buf1[2] = buf1[2];
9511 0 : buf2[2] = buf2[2];
9512 0 : buf1[3] = buf1[3];
9513 0 : buf2[3] = buf2[3];
9514 0 : buf1[4] = buf1[4];
9515 0 : buf2[4] = buf2[4];
9516 0 : buf1[5] = buf1[5];
9517 0 : buf2[5] = buf2[5];
9518 0 : buf1[6] = buf1[6];
9519 0 : buf2[6] = buf2[6];
9520 0 : buf1[7] = buf1[7];
9521 0 : buf2[7] = buf2[7];
9522 0 : quarter_pel_refinemnet_block(context_ptr,
9523 : context_ptr->p_best_ssd64x64,
9524 : 0,
9525 : buf1,
9526 : buf1_stride,
9527 : buf2,
9528 : buf2_stride,
9529 : 64,
9530 : 64,
9531 : x_search_area_origin,
9532 : y_search_area_origin,
9533 : testmv,
9534 : context_ptr->p_best_sad64x64,
9535 : context_ptr->p_best_mv64x64,
9536 : it);
9537 : }
9538 : }
9539 : }
9540 : // 32x32 [4 partitions]
9541 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
9542 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv32x32[pu_index]);
9543 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv32x32[pu_index]);
9544 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
9545 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
9546 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
9547 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
9548 0 : skip_qp_pel = 0;
9549 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
9550 0 : skip_qp_pel = 1;
9551 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
9552 0 : skip_qp_pel = 1;
9553 0 : if (!skip_qp_pel) {
9554 0 : for (it = 0; it < num_qp_it; it++) {
9555 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
9556 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
9557 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
9558 0 : SetQuarterPelRefinementInputsOnTheFly(
9559 : pos_full,
9560 : full_stride,
9561 : pos_b,
9562 : pos_h,
9563 : pos_j,
9564 : context_ptr->interpolated_stride,
9565 : x_mv,
9566 : y_mv,
9567 : buf1,
9568 : buf1_stride,
9569 : buf2,
9570 : buf2_stride);
9571 0 : block_index_shift_x = (pu_index & 0x01) << 5;
9572 0 : block_index_shift_y = (pu_index >> 1) << 5;
9573 0 : src_block_index =
9574 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
9575 0 : buf1[0] = buf1[0] + block_index_shift_x +
9576 0 : block_index_shift_y * buf1_stride[0];
9577 0 : buf2[0] = buf2[0] + block_index_shift_x +
9578 0 : block_index_shift_y * buf2_stride[0];
9579 0 : buf1[1] = buf1[1] + block_index_shift_x +
9580 0 : block_index_shift_y * buf1_stride[1];
9581 0 : buf2[1] = buf2[1] + block_index_shift_x +
9582 0 : block_index_shift_y * buf2_stride[1];
9583 0 : buf1[2] = buf1[2] + block_index_shift_x +
9584 0 : block_index_shift_y * buf1_stride[2];
9585 0 : buf2[2] = buf2[2] + block_index_shift_x +
9586 0 : block_index_shift_y * buf2_stride[2];
9587 0 : buf1[3] = buf1[3] + block_index_shift_x +
9588 0 : block_index_shift_y * buf1_stride[3];
9589 0 : buf2[3] = buf2[3] + block_index_shift_x +
9590 0 : block_index_shift_y * buf2_stride[3];
9591 0 : buf1[4] = buf1[4] + block_index_shift_x +
9592 0 : block_index_shift_y * buf1_stride[4];
9593 0 : buf2[4] = buf2[4] + block_index_shift_x +
9594 0 : block_index_shift_y * buf2_stride[4];
9595 0 : buf1[5] = buf1[5] + block_index_shift_x +
9596 0 : block_index_shift_y * buf1_stride[5];
9597 0 : buf2[5] = buf2[5] + block_index_shift_x +
9598 0 : block_index_shift_y * buf2_stride[5];
9599 0 : buf1[6] = buf1[6] + block_index_shift_x +
9600 0 : block_index_shift_y * buf1_stride[6];
9601 0 : buf2[6] = buf2[6] + block_index_shift_x +
9602 0 : block_index_shift_y * buf2_stride[6];
9603 0 : buf1[7] = buf1[7] + block_index_shift_x +
9604 0 : block_index_shift_y * buf1_stride[7];
9605 0 : buf2[7] = buf2[7] + block_index_shift_x +
9606 0 : block_index_shift_y * buf2_stride[7];
9607 0 : quarter_pel_refinemnet_block(
9608 : context_ptr,
9609 0 : &context_ptr->p_best_ssd32x32[pu_index],
9610 : src_block_index,
9611 : buf1,
9612 : buf1_stride,
9613 : buf2,
9614 : buf2_stride,
9615 : 32,
9616 : 32,
9617 : x_search_area_origin,
9618 : y_search_area_origin,
9619 : testmv,
9620 0 : &context_ptr->p_best_sad32x32[pu_index],
9621 0 : &context_ptr->p_best_mv32x32[pu_index],
9622 : it);
9623 : }
9624 : }
9625 : }
9626 : // 16x16 [16 partitions]
9627 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
9628 0 : nidx = tab16x16[pu_index];
9629 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv16x16[nidx]);
9630 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv16x16[nidx]);
9631 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
9632 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
9633 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
9634 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
9635 0 : skip_qp_pel = 0;
9636 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
9637 0 : skip_qp_pel = 1;
9638 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
9639 0 : skip_qp_pel = 1;
9640 0 : if (!skip_qp_pel) {
9641 0 : for (it = 0; it < num_qp_it; it++) {
9642 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
9643 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
9644 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
9645 0 : SetQuarterPelRefinementInputsOnTheFly(
9646 : pos_full,
9647 : full_stride,
9648 : pos_b,
9649 : pos_h,
9650 : pos_j,
9651 : context_ptr->interpolated_stride,
9652 : x_mv,
9653 : y_mv,
9654 : buf1,
9655 : buf1_stride,
9656 : buf2,
9657 : buf2_stride);
9658 0 : block_index_shift_x = (pu_index & 0x03) << 4;
9659 0 : block_index_shift_y = (pu_index >> 2) << 4;
9660 0 : src_block_index =
9661 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
9662 0 : buf1[0] = buf1[0] + block_index_shift_x +
9663 0 : block_index_shift_y * buf1_stride[0];
9664 0 : buf2[0] = buf2[0] + block_index_shift_x +
9665 0 : block_index_shift_y * buf2_stride[0];
9666 0 : buf1[1] = buf1[1] + block_index_shift_x +
9667 0 : block_index_shift_y * buf1_stride[1];
9668 0 : buf2[1] = buf2[1] + block_index_shift_x +
9669 0 : block_index_shift_y * buf2_stride[1];
9670 0 : buf1[2] = buf1[2] + block_index_shift_x +
9671 0 : block_index_shift_y * buf1_stride[2];
9672 0 : buf2[2] = buf2[2] + block_index_shift_x +
9673 0 : block_index_shift_y * buf2_stride[2];
9674 0 : buf1[3] = buf1[3] + block_index_shift_x +
9675 0 : block_index_shift_y * buf1_stride[3];
9676 0 : buf2[3] = buf2[3] + block_index_shift_x +
9677 0 : block_index_shift_y * buf2_stride[3];
9678 0 : buf1[4] = buf1[4] + block_index_shift_x +
9679 0 : block_index_shift_y * buf1_stride[4];
9680 0 : buf2[4] = buf2[4] + block_index_shift_x +
9681 0 : block_index_shift_y * buf2_stride[4];
9682 0 : buf1[5] = buf1[5] + block_index_shift_x +
9683 0 : block_index_shift_y * buf1_stride[5];
9684 0 : buf2[5] = buf2[5] + block_index_shift_x +
9685 0 : block_index_shift_y * buf2_stride[5];
9686 0 : buf1[6] = buf1[6] + block_index_shift_x +
9687 0 : block_index_shift_y * buf1_stride[6];
9688 0 : buf2[6] = buf2[6] + block_index_shift_x +
9689 0 : block_index_shift_y * buf2_stride[6];
9690 0 : buf1[7] = buf1[7] + block_index_shift_x +
9691 0 : block_index_shift_y * buf1_stride[7];
9692 0 : buf2[7] = buf2[7] + block_index_shift_x +
9693 0 : block_index_shift_y * buf2_stride[7];
9694 0 : quarter_pel_refinemnet_block(
9695 : context_ptr,
9696 0 : &context_ptr->p_best_ssd16x16[nidx],
9697 : src_block_index,
9698 : buf1,
9699 : buf1_stride,
9700 : buf2,
9701 : buf2_stride,
9702 : 16,
9703 : 16,
9704 : x_search_area_origin,
9705 : y_search_area_origin,
9706 : testmv,
9707 0 : &context_ptr->p_best_sad16x16[nidx],
9708 0 : &context_ptr->p_best_mv16x16[nidx],
9709 : it);
9710 : }
9711 : }
9712 : }
9713 : // 8x8 [64 partitions]
9714 0 : for (pu_index = 0; pu_index < 64; ++pu_index) {
9715 0 : nidx = tab8x8[pu_index];
9716 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv8x8[nidx]);
9717 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv8x8[nidx]);
9718 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
9719 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
9720 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
9721 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
9722 0 : skip_qp_pel = 0;
9723 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
9724 0 : skip_qp_pel = 1;
9725 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
9726 0 : skip_qp_pel = 1;
9727 0 : if (!skip_qp_pel) {
9728 0 : for (it = 0; it < num_qp_it; it++) {
9729 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
9730 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
9731 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
9732 0 : SetQuarterPelRefinementInputsOnTheFly(
9733 : pos_full,
9734 : full_stride,
9735 : pos_b,
9736 : pos_h,
9737 : pos_j,
9738 : context_ptr->interpolated_stride,
9739 : x_mv,
9740 : y_mv,
9741 : buf1,
9742 : buf1_stride,
9743 : buf2,
9744 : buf2_stride);
9745 0 : block_index_shift_x = (pu_index & 0x07) << 3;
9746 0 : block_index_shift_y = (pu_index >> 3) << 3;
9747 0 : src_block_index =
9748 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
9749 0 : buf1[0] = buf1[0] + block_index_shift_x +
9750 0 : block_index_shift_y * buf1_stride[0];
9751 0 : buf2[0] = buf2[0] + block_index_shift_x +
9752 0 : block_index_shift_y * buf2_stride[0];
9753 0 : buf1[1] = buf1[1] + block_index_shift_x +
9754 0 : block_index_shift_y * buf1_stride[1];
9755 0 : buf2[1] = buf2[1] + block_index_shift_x +
9756 0 : block_index_shift_y * buf2_stride[1];
9757 0 : buf1[2] = buf1[2] + block_index_shift_x +
9758 0 : block_index_shift_y * buf1_stride[2];
9759 0 : buf2[2] = buf2[2] + block_index_shift_x +
9760 0 : block_index_shift_y * buf2_stride[2];
9761 0 : buf1[3] = buf1[3] + block_index_shift_x +
9762 0 : block_index_shift_y * buf1_stride[3];
9763 0 : buf2[3] = buf2[3] + block_index_shift_x +
9764 0 : block_index_shift_y * buf2_stride[3];
9765 0 : buf1[4] = buf1[4] + block_index_shift_x +
9766 0 : block_index_shift_y * buf1_stride[4];
9767 0 : buf2[4] = buf2[4] + block_index_shift_x +
9768 0 : block_index_shift_y * buf2_stride[4];
9769 0 : buf1[5] = buf1[5] + block_index_shift_x +
9770 0 : block_index_shift_y * buf1_stride[5];
9771 0 : buf2[5] = buf2[5] + block_index_shift_x +
9772 0 : block_index_shift_y * buf2_stride[5];
9773 0 : buf1[6] = buf1[6] + block_index_shift_x +
9774 0 : block_index_shift_y * buf1_stride[6];
9775 0 : buf2[6] = buf2[6] + block_index_shift_x +
9776 0 : block_index_shift_y * buf2_stride[6];
9777 0 : buf1[7] = buf1[7] + block_index_shift_x +
9778 0 : block_index_shift_y * buf1_stride[7];
9779 0 : buf2[7] = buf2[7] + block_index_shift_x +
9780 0 : block_index_shift_y * buf2_stride[7];
9781 0 : quarter_pel_refinemnet_block(context_ptr,
9782 0 : &context_ptr->p_best_ssd8x8[nidx],
9783 : src_block_index,
9784 : buf1,
9785 : buf1_stride,
9786 : buf2,
9787 : buf2_stride,
9788 : 8,
9789 : 8,
9790 : x_search_area_origin,
9791 : y_search_area_origin,
9792 : testmv,
9793 0 : &context_ptr->p_best_sad8x8[nidx],
9794 0 : &context_ptr->p_best_mv8x8[nidx],
9795 : it);
9796 : }
9797 : }
9798 : }
9799 : // 64x32
9800 0 : for (pu_index = 0; pu_index < 2; ++pu_index) {
9801 0 : block_index_shift_x = 0;
9802 0 : block_index_shift_y = pu_index << 5;
9803 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv64x32[pu_index]);
9804 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv64x32[pu_index]);
9805 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
9806 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
9807 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
9808 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
9809 0 : skip_qp_pel = 0;
9810 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
9811 0 : skip_qp_pel = 1;
9812 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
9813 0 : skip_qp_pel = 1;
9814 0 : if (!skip_qp_pel) {
9815 0 : for (it = 0; it < num_qp_it; it++) {
9816 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
9817 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
9818 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
9819 0 : SetQuarterPelRefinementInputsOnTheFly(
9820 : pos_full,
9821 : full_stride,
9822 : pos_b,
9823 : pos_h,
9824 : pos_j,
9825 : context_ptr->interpolated_stride,
9826 : x_mv,
9827 : y_mv,
9828 : buf1,
9829 : buf1_stride,
9830 : buf2,
9831 : buf2_stride);
9832 0 : src_block_index =
9833 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
9834 0 : buf1[0] = buf1[0] + block_index_shift_x +
9835 0 : block_index_shift_y * buf1_stride[0];
9836 0 : buf2[0] = buf2[0] + block_index_shift_x +
9837 0 : block_index_shift_y * buf2_stride[0];
9838 0 : buf1[1] = buf1[1] + block_index_shift_x +
9839 0 : block_index_shift_y * buf1_stride[1];
9840 0 : buf2[1] = buf2[1] + block_index_shift_x +
9841 0 : block_index_shift_y * buf2_stride[1];
9842 0 : buf1[2] = buf1[2] + block_index_shift_x +
9843 0 : block_index_shift_y * buf1_stride[2];
9844 0 : buf2[2] = buf2[2] + block_index_shift_x +
9845 0 : block_index_shift_y * buf2_stride[2];
9846 0 : buf1[3] = buf1[3] + block_index_shift_x +
9847 0 : block_index_shift_y * buf1_stride[3];
9848 0 : buf2[3] = buf2[3] + block_index_shift_x +
9849 0 : block_index_shift_y * buf2_stride[3];
9850 0 : buf1[4] = buf1[4] + block_index_shift_x +
9851 0 : block_index_shift_y * buf1_stride[4];
9852 0 : buf2[4] = buf2[4] + block_index_shift_x +
9853 0 : block_index_shift_y * buf2_stride[4];
9854 0 : buf1[5] = buf1[5] + block_index_shift_x +
9855 0 : block_index_shift_y * buf1_stride[5];
9856 0 : buf2[5] = buf2[5] + block_index_shift_x +
9857 0 : block_index_shift_y * buf2_stride[5];
9858 0 : buf1[6] = buf1[6] + block_index_shift_x +
9859 0 : block_index_shift_y * buf1_stride[6];
9860 0 : buf2[6] = buf2[6] + block_index_shift_x +
9861 0 : block_index_shift_y * buf2_stride[6];
9862 0 : buf1[7] = buf1[7] + block_index_shift_x +
9863 0 : block_index_shift_y * buf1_stride[7];
9864 0 : buf2[7] = buf2[7] + block_index_shift_x +
9865 0 : block_index_shift_y * buf2_stride[7];
9866 0 : quarter_pel_refinemnet_block(
9867 : context_ptr,
9868 0 : &context_ptr->p_best_ssd64x32[pu_index],
9869 : src_block_index,
9870 : buf1,
9871 : buf1_stride,
9872 : buf2,
9873 : buf2_stride,
9874 : 64,
9875 : 32,
9876 : x_search_area_origin,
9877 : y_search_area_origin,
9878 : testmv,
9879 0 : &context_ptr->p_best_sad64x32[pu_index],
9880 0 : &context_ptr->p_best_mv64x32[pu_index],
9881 : it);
9882 : }
9883 : }
9884 : }
9885 : // 32x16
9886 0 : for (pu_index = 0; pu_index < 8; ++pu_index) {
9887 0 : nidx = tab32x16[pu_index];
9888 0 : block_index_shift_x = (pu_index & 0x01) << 5;
9889 0 : block_index_shift_y = (pu_index >> 1) << 4;
9890 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv32x16[nidx]);
9891 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv32x16[nidx]);
9892 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
9893 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
9894 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
9895 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
9896 0 : skip_qp_pel = 0;
9897 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
9898 0 : skip_qp_pel = 1;
9899 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
9900 0 : skip_qp_pel = 1;
9901 0 : if (!skip_qp_pel) {
9902 0 : for (it = 0; it < num_qp_it; it++) {
9903 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
9904 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
9905 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
9906 0 : SetQuarterPelRefinementInputsOnTheFly(
9907 : pos_full,
9908 : full_stride,
9909 : pos_b,
9910 : pos_h,
9911 : pos_j,
9912 : context_ptr->interpolated_stride,
9913 : x_mv,
9914 : y_mv,
9915 : buf1,
9916 : buf1_stride,
9917 : buf2,
9918 : buf2_stride);
9919 0 : src_block_index =
9920 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
9921 0 : buf1[0] = buf1[0] + block_index_shift_x +
9922 0 : block_index_shift_y * buf1_stride[0];
9923 0 : buf2[0] = buf2[0] + block_index_shift_x +
9924 0 : block_index_shift_y * buf2_stride[0];
9925 0 : buf1[1] = buf1[1] + block_index_shift_x +
9926 0 : block_index_shift_y * buf1_stride[1];
9927 0 : buf2[1] = buf2[1] + block_index_shift_x +
9928 0 : block_index_shift_y * buf2_stride[1];
9929 0 : buf1[2] = buf1[2] + block_index_shift_x +
9930 0 : block_index_shift_y * buf1_stride[2];
9931 0 : buf2[2] = buf2[2] + block_index_shift_x +
9932 0 : block_index_shift_y * buf2_stride[2];
9933 0 : buf1[3] = buf1[3] + block_index_shift_x +
9934 0 : block_index_shift_y * buf1_stride[3];
9935 0 : buf2[3] = buf2[3] + block_index_shift_x +
9936 0 : block_index_shift_y * buf2_stride[3];
9937 0 : buf1[4] = buf1[4] + block_index_shift_x +
9938 0 : block_index_shift_y * buf1_stride[4];
9939 0 : buf2[4] = buf2[4] + block_index_shift_x +
9940 0 : block_index_shift_y * buf2_stride[4];
9941 0 : buf1[5] = buf1[5] + block_index_shift_x +
9942 0 : block_index_shift_y * buf1_stride[5];
9943 0 : buf2[5] = buf2[5] + block_index_shift_x +
9944 0 : block_index_shift_y * buf2_stride[5];
9945 0 : buf1[6] = buf1[6] + block_index_shift_x +
9946 0 : block_index_shift_y * buf1_stride[6];
9947 0 : buf2[6] = buf2[6] + block_index_shift_x +
9948 0 : block_index_shift_y * buf2_stride[6];
9949 0 : buf1[7] = buf1[7] + block_index_shift_x +
9950 0 : block_index_shift_y * buf1_stride[7];
9951 0 : buf2[7] = buf2[7] + block_index_shift_x +
9952 0 : block_index_shift_y * buf2_stride[7];
9953 0 : quarter_pel_refinemnet_block(
9954 : context_ptr,
9955 0 : &context_ptr->p_best_ssd32x16[nidx],
9956 : src_block_index,
9957 : buf1,
9958 : buf1_stride,
9959 : buf2,
9960 : buf2_stride,
9961 : 32,
9962 : 16,
9963 : x_search_area_origin,
9964 : y_search_area_origin,
9965 : testmv,
9966 0 : &context_ptr->p_best_sad32x16[nidx],
9967 0 : &context_ptr->p_best_mv32x16[nidx],
9968 : it);
9969 : }
9970 : }
9971 : }
9972 : // 16x8
9973 0 : for (pu_index = 0; pu_index < 32; ++pu_index) {
9974 0 : nidx = tab16x8[pu_index];
9975 0 : block_index_shift_x = (pu_index & 0x03) << 4;
9976 0 : block_index_shift_y = (pu_index >> 2) << 3;
9977 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv16x8[nidx]);
9978 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv16x8[nidx]);
9979 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
9980 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
9981 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
9982 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
9983 0 : skip_qp_pel = 0;
9984 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
9985 0 : skip_qp_pel = 1;
9986 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
9987 0 : skip_qp_pel = 1;
9988 0 : if (!skip_qp_pel) {
9989 0 : for (it = 0; it < num_qp_it; it++) {
9990 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
9991 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
9992 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
9993 0 : SetQuarterPelRefinementInputsOnTheFly(
9994 : pos_full,
9995 : full_stride,
9996 : pos_b,
9997 : pos_h,
9998 : pos_j,
9999 : context_ptr->interpolated_stride,
10000 : x_mv,
10001 : y_mv,
10002 : buf1,
10003 : buf1_stride,
10004 : buf2,
10005 : buf2_stride);
10006 0 : src_block_index =
10007 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
10008 0 : buf1[0] = buf1[0] + block_index_shift_x +
10009 0 : block_index_shift_y * buf1_stride[0];
10010 0 : buf2[0] = buf2[0] + block_index_shift_x +
10011 0 : block_index_shift_y * buf2_stride[0];
10012 0 : buf1[1] = buf1[1] + block_index_shift_x +
10013 0 : block_index_shift_y * buf1_stride[1];
10014 0 : buf2[1] = buf2[1] + block_index_shift_x +
10015 0 : block_index_shift_y * buf2_stride[1];
10016 0 : buf1[2] = buf1[2] + block_index_shift_x +
10017 0 : block_index_shift_y * buf1_stride[2];
10018 0 : buf2[2] = buf2[2] + block_index_shift_x +
10019 0 : block_index_shift_y * buf2_stride[2];
10020 0 : buf1[3] = buf1[3] + block_index_shift_x +
10021 0 : block_index_shift_y * buf1_stride[3];
10022 0 : buf2[3] = buf2[3] + block_index_shift_x +
10023 0 : block_index_shift_y * buf2_stride[3];
10024 0 : buf1[4] = buf1[4] + block_index_shift_x +
10025 0 : block_index_shift_y * buf1_stride[4];
10026 0 : buf2[4] = buf2[4] + block_index_shift_x +
10027 0 : block_index_shift_y * buf2_stride[4];
10028 0 : buf1[5] = buf1[5] + block_index_shift_x +
10029 0 : block_index_shift_y * buf1_stride[5];
10030 0 : buf2[5] = buf2[5] + block_index_shift_x +
10031 0 : block_index_shift_y * buf2_stride[5];
10032 0 : buf1[6] = buf1[6] + block_index_shift_x +
10033 0 : block_index_shift_y * buf1_stride[6];
10034 0 : buf2[6] = buf2[6] + block_index_shift_x +
10035 0 : block_index_shift_y * buf2_stride[6];
10036 0 : buf1[7] = buf1[7] + block_index_shift_x +
10037 0 : block_index_shift_y * buf1_stride[7];
10038 0 : buf2[7] = buf2[7] + block_index_shift_x +
10039 0 : block_index_shift_y * buf2_stride[7];
10040 0 : quarter_pel_refinemnet_block(context_ptr,
10041 0 : &context_ptr->p_best_ssd16x8[nidx],
10042 : src_block_index,
10043 : buf1,
10044 : buf1_stride,
10045 : buf2,
10046 : buf2_stride,
10047 : 16,
10048 : 8,
10049 : x_search_area_origin,
10050 : y_search_area_origin,
10051 : testmv,
10052 0 : &context_ptr->p_best_sad16x8[nidx],
10053 0 : &context_ptr->p_best_mv16x8[nidx],
10054 : it);
10055 : }
10056 : }
10057 : }
10058 : // 32x64
10059 0 : for (pu_index = 0; pu_index < 2; ++pu_index) {
10060 0 : block_index_shift_x = pu_index << 5;
10061 0 : block_index_shift_y = 0;
10062 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv32x64[pu_index]);
10063 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv32x64[pu_index]);
10064 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
10065 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
10066 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
10067 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
10068 0 : skip_qp_pel = 0;
10069 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
10070 0 : skip_qp_pel = 1;
10071 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
10072 0 : skip_qp_pel = 1;
10073 0 : if (!skip_qp_pel) {
10074 0 : for (it = 0; it < num_qp_it; it++) {
10075 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
10076 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
10077 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
10078 0 : SetQuarterPelRefinementInputsOnTheFly(
10079 : pos_full,
10080 : full_stride,
10081 : pos_b,
10082 : pos_h,
10083 : pos_j,
10084 : context_ptr->interpolated_stride,
10085 : x_mv,
10086 : y_mv,
10087 : buf1,
10088 : buf1_stride,
10089 : buf2,
10090 : buf2_stride);
10091 0 : src_block_index =
10092 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
10093 0 : buf1[0] = buf1[0] + block_index_shift_x +
10094 0 : block_index_shift_y * buf1_stride[0];
10095 0 : buf2[0] = buf2[0] + block_index_shift_x +
10096 0 : block_index_shift_y * buf2_stride[0];
10097 0 : buf1[1] = buf1[1] + block_index_shift_x +
10098 0 : block_index_shift_y * buf1_stride[1];
10099 0 : buf2[1] = buf2[1] + block_index_shift_x +
10100 0 : block_index_shift_y * buf2_stride[1];
10101 0 : buf1[2] = buf1[2] + block_index_shift_x +
10102 0 : block_index_shift_y * buf1_stride[2];
10103 0 : buf2[2] = buf2[2] + block_index_shift_x +
10104 0 : block_index_shift_y * buf2_stride[2];
10105 0 : buf1[3] = buf1[3] + block_index_shift_x +
10106 0 : block_index_shift_y * buf1_stride[3];
10107 0 : buf2[3] = buf2[3] + block_index_shift_x +
10108 0 : block_index_shift_y * buf2_stride[3];
10109 0 : buf1[4] = buf1[4] + block_index_shift_x +
10110 0 : block_index_shift_y * buf1_stride[4];
10111 0 : buf2[4] = buf2[4] + block_index_shift_x +
10112 0 : block_index_shift_y * buf2_stride[4];
10113 0 : buf1[5] = buf1[5] + block_index_shift_x +
10114 0 : block_index_shift_y * buf1_stride[5];
10115 0 : buf2[5] = buf2[5] + block_index_shift_x +
10116 0 : block_index_shift_y * buf2_stride[5];
10117 0 : buf1[6] = buf1[6] + block_index_shift_x +
10118 0 : block_index_shift_y * buf1_stride[6];
10119 0 : buf2[6] = buf2[6] + block_index_shift_x +
10120 0 : block_index_shift_y * buf2_stride[6];
10121 0 : buf1[7] = buf1[7] + block_index_shift_x +
10122 0 : block_index_shift_y * buf1_stride[7];
10123 0 : buf2[7] = buf2[7] + block_index_shift_x +
10124 0 : block_index_shift_y * buf2_stride[7];
10125 0 : quarter_pel_refinemnet_block(
10126 : context_ptr,
10127 0 : &context_ptr->p_best_ssd32x64[pu_index],
10128 : src_block_index,
10129 : buf1,
10130 : buf1_stride,
10131 : buf2,
10132 : buf2_stride,
10133 : 32,
10134 : 64,
10135 : x_search_area_origin,
10136 : y_search_area_origin,
10137 : testmv,
10138 0 : &context_ptr->p_best_sad32x64[pu_index],
10139 0 : &context_ptr->p_best_mv32x64[pu_index],
10140 : it);
10141 : }
10142 : }
10143 : }
10144 : // 16x32
10145 0 : for (pu_index = 0; pu_index < 8; ++pu_index) {
10146 0 : nidx = tab16x32[pu_index];
10147 0 : block_index_shift_x = (pu_index & 0x03) << 4;
10148 0 : block_index_shift_y = (pu_index >> 2) << 5;
10149 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv16x32[nidx]);
10150 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv16x32[nidx]);
10151 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
10152 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
10153 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
10154 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
10155 0 : skip_qp_pel = 0;
10156 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
10157 0 : skip_qp_pel = 1;
10158 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
10159 0 : skip_qp_pel = 1;
10160 0 : if (!skip_qp_pel) {
10161 0 : for (it = 0; it < num_qp_it; it++) {
10162 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
10163 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
10164 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
10165 0 : SetQuarterPelRefinementInputsOnTheFly(
10166 : pos_full,
10167 : full_stride,
10168 : pos_b,
10169 : pos_h,
10170 : pos_j,
10171 : context_ptr->interpolated_stride,
10172 : x_mv,
10173 : y_mv,
10174 : buf1,
10175 : buf1_stride,
10176 : buf2,
10177 : buf2_stride);
10178 0 : src_block_index =
10179 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
10180 0 : buf1[0] = buf1[0] + block_index_shift_x +
10181 0 : block_index_shift_y * buf1_stride[0];
10182 0 : buf2[0] = buf2[0] + block_index_shift_x +
10183 0 : block_index_shift_y * buf2_stride[0];
10184 0 : buf1[1] = buf1[1] + block_index_shift_x +
10185 0 : block_index_shift_y * buf1_stride[1];
10186 0 : buf2[1] = buf2[1] + block_index_shift_x +
10187 0 : block_index_shift_y * buf2_stride[1];
10188 0 : buf1[2] = buf1[2] + block_index_shift_x +
10189 0 : block_index_shift_y * buf1_stride[2];
10190 0 : buf2[2] = buf2[2] + block_index_shift_x +
10191 0 : block_index_shift_y * buf2_stride[2];
10192 0 : buf1[3] = buf1[3] + block_index_shift_x +
10193 0 : block_index_shift_y * buf1_stride[3];
10194 0 : buf2[3] = buf2[3] + block_index_shift_x +
10195 0 : block_index_shift_y * buf2_stride[3];
10196 0 : buf1[4] = buf1[4] + block_index_shift_x +
10197 0 : block_index_shift_y * buf1_stride[4];
10198 0 : buf2[4] = buf2[4] + block_index_shift_x +
10199 0 : block_index_shift_y * buf2_stride[4];
10200 0 : buf1[5] = buf1[5] + block_index_shift_x +
10201 0 : block_index_shift_y * buf1_stride[5];
10202 0 : buf2[5] = buf2[5] + block_index_shift_x +
10203 0 : block_index_shift_y * buf2_stride[5];
10204 0 : buf1[6] = buf1[6] + block_index_shift_x +
10205 0 : block_index_shift_y * buf1_stride[6];
10206 0 : buf2[6] = buf2[6] + block_index_shift_x +
10207 0 : block_index_shift_y * buf2_stride[6];
10208 0 : buf1[7] = buf1[7] + block_index_shift_x +
10209 0 : block_index_shift_y * buf1_stride[7];
10210 0 : buf2[7] = buf2[7] + block_index_shift_x +
10211 0 : block_index_shift_y * buf2_stride[7];
10212 0 : quarter_pel_refinemnet_block(
10213 : context_ptr,
10214 0 : &context_ptr->p_best_ssd16x32[nidx],
10215 : src_block_index,
10216 : buf1,
10217 : buf1_stride,
10218 : buf2,
10219 : buf2_stride,
10220 : 16,
10221 : 32,
10222 : x_search_area_origin,
10223 : y_search_area_origin,
10224 : testmv,
10225 0 : &context_ptr->p_best_sad16x32[nidx],
10226 0 : &context_ptr->p_best_mv16x32[nidx],
10227 : it);
10228 : }
10229 : }
10230 : }
10231 : // 8x16
10232 0 : for (pu_index = 0; pu_index < 32; ++pu_index) {
10233 0 : nidx = tab8x16[pu_index];
10234 0 : block_index_shift_x = (pu_index & 0x07) << 3;
10235 0 : block_index_shift_y = (pu_index >> 3) << 4;
10236 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv8x16[nidx]);
10237 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv8x16[nidx]);
10238 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
10239 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
10240 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
10241 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
10242 0 : skip_qp_pel = 0;
10243 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
10244 0 : skip_qp_pel = 1;
10245 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
10246 0 : skip_qp_pel = 1;
10247 0 : if (!skip_qp_pel) {
10248 0 : for (it = 0; it < num_qp_it; it++) {
10249 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
10250 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
10251 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
10252 0 : SetQuarterPelRefinementInputsOnTheFly(
10253 : pos_full,
10254 : full_stride,
10255 : pos_b,
10256 : pos_h,
10257 : pos_j,
10258 : context_ptr->interpolated_stride,
10259 : x_mv,
10260 : y_mv,
10261 : buf1,
10262 : buf1_stride,
10263 : buf2,
10264 : buf2_stride);
10265 0 : src_block_index =
10266 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
10267 0 : buf1[0] = buf1[0] + block_index_shift_x +
10268 0 : block_index_shift_y * buf1_stride[0];
10269 0 : buf2[0] = buf2[0] + block_index_shift_x +
10270 0 : block_index_shift_y * buf2_stride[0];
10271 0 : buf1[1] = buf1[1] + block_index_shift_x +
10272 0 : block_index_shift_y * buf1_stride[1];
10273 0 : buf2[1] = buf2[1] + block_index_shift_x +
10274 0 : block_index_shift_y * buf2_stride[1];
10275 0 : buf1[2] = buf1[2] + block_index_shift_x +
10276 0 : block_index_shift_y * buf1_stride[2];
10277 0 : buf2[2] = buf2[2] + block_index_shift_x +
10278 0 : block_index_shift_y * buf2_stride[2];
10279 0 : buf1[3] = buf1[3] + block_index_shift_x +
10280 0 : block_index_shift_y * buf1_stride[3];
10281 0 : buf2[3] = buf2[3] + block_index_shift_x +
10282 0 : block_index_shift_y * buf2_stride[3];
10283 0 : buf1[4] = buf1[4] + block_index_shift_x +
10284 0 : block_index_shift_y * buf1_stride[4];
10285 0 : buf2[4] = buf2[4] + block_index_shift_x +
10286 0 : block_index_shift_y * buf2_stride[4];
10287 0 : buf1[5] = buf1[5] + block_index_shift_x +
10288 0 : block_index_shift_y * buf1_stride[5];
10289 0 : buf2[5] = buf2[5] + block_index_shift_x +
10290 0 : block_index_shift_y * buf2_stride[5];
10291 0 : buf1[6] = buf1[6] + block_index_shift_x +
10292 0 : block_index_shift_y * buf1_stride[6];
10293 0 : buf2[6] = buf2[6] + block_index_shift_x +
10294 0 : block_index_shift_y * buf2_stride[6];
10295 0 : buf1[7] = buf1[7] + block_index_shift_x +
10296 0 : block_index_shift_y * buf1_stride[7];
10297 0 : buf2[7] = buf2[7] + block_index_shift_x +
10298 0 : block_index_shift_y * buf2_stride[7];
10299 0 : quarter_pel_refinemnet_block(context_ptr,
10300 0 : &context_ptr->p_best_ssd8x16[nidx],
10301 : src_block_index,
10302 : buf1,
10303 : buf1_stride,
10304 : buf2,
10305 : buf2_stride,
10306 : 8,
10307 : 16,
10308 : x_search_area_origin,
10309 : y_search_area_origin,
10310 : testmv,
10311 0 : &context_ptr->p_best_sad8x16[nidx],
10312 0 : &context_ptr->p_best_mv8x16[nidx],
10313 : it);
10314 : }
10315 : }
10316 : }
10317 : // 32x8
10318 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
10319 0 : nidx = tab32x8[pu_index];
10320 0 : block_index_shift_x = (pu_index & 0x01) << 5;
10321 0 : block_index_shift_y = (pu_index >> 1) << 3;
10322 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv32x8[nidx]);
10323 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv32x8[nidx]);
10324 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
10325 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
10326 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
10327 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
10328 0 : skip_qp_pel = 0;
10329 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
10330 0 : skip_qp_pel = 1;
10331 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
10332 0 : skip_qp_pel = 1;
10333 0 : if (!skip_qp_pel) {
10334 0 : for (it = 0; it < num_qp_it; it++) {
10335 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
10336 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
10337 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
10338 0 : SetQuarterPelRefinementInputsOnTheFly(
10339 : pos_full,
10340 : full_stride,
10341 : pos_b,
10342 : pos_h,
10343 : pos_j,
10344 : context_ptr->interpolated_stride,
10345 : x_mv,
10346 : y_mv,
10347 : buf1,
10348 : buf1_stride,
10349 : buf2,
10350 : buf2_stride);
10351 0 : src_block_index =
10352 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
10353 0 : buf1[0] = buf1[0] + block_index_shift_x +
10354 0 : block_index_shift_y * buf1_stride[0];
10355 0 : buf2[0] = buf2[0] + block_index_shift_x +
10356 0 : block_index_shift_y * buf2_stride[0];
10357 0 : buf1[1] = buf1[1] + block_index_shift_x +
10358 0 : block_index_shift_y * buf1_stride[1];
10359 0 : buf2[1] = buf2[1] + block_index_shift_x +
10360 0 : block_index_shift_y * buf2_stride[1];
10361 0 : buf1[2] = buf1[2] + block_index_shift_x +
10362 0 : block_index_shift_y * buf1_stride[2];
10363 0 : buf2[2] = buf2[2] + block_index_shift_x +
10364 0 : block_index_shift_y * buf2_stride[2];
10365 0 : buf1[3] = buf1[3] + block_index_shift_x +
10366 0 : block_index_shift_y * buf1_stride[3];
10367 0 : buf2[3] = buf2[3] + block_index_shift_x +
10368 0 : block_index_shift_y * buf2_stride[3];
10369 0 : buf1[4] = buf1[4] + block_index_shift_x +
10370 0 : block_index_shift_y * buf1_stride[4];
10371 0 : buf2[4] = buf2[4] + block_index_shift_x +
10372 0 : block_index_shift_y * buf2_stride[4];
10373 0 : buf1[5] = buf1[5] + block_index_shift_x +
10374 0 : block_index_shift_y * buf1_stride[5];
10375 0 : buf2[5] = buf2[5] + block_index_shift_x +
10376 0 : block_index_shift_y * buf2_stride[5];
10377 0 : buf1[6] = buf1[6] + block_index_shift_x +
10378 0 : block_index_shift_y * buf1_stride[6];
10379 0 : buf2[6] = buf2[6] + block_index_shift_x +
10380 0 : block_index_shift_y * buf2_stride[6];
10381 0 : buf1[7] = buf1[7] + block_index_shift_x +
10382 0 : block_index_shift_y * buf1_stride[7];
10383 0 : buf2[7] = buf2[7] + block_index_shift_x +
10384 0 : block_index_shift_y * buf2_stride[7];
10385 0 : quarter_pel_refinemnet_block(context_ptr,
10386 0 : &context_ptr->p_best_ssd32x8[nidx],
10387 : src_block_index,
10388 : buf1,
10389 : buf1_stride,
10390 : buf2,
10391 : buf2_stride,
10392 : 32,
10393 : 8,
10394 : x_search_area_origin,
10395 : y_search_area_origin,
10396 : testmv,
10397 0 : &context_ptr->p_best_sad32x8[nidx],
10398 0 : &context_ptr->p_best_mv32x8[nidx],
10399 : it);
10400 : }
10401 : }
10402 : }
10403 :
10404 : // 8x32
10405 0 : for (pu_index = 0; pu_index < 16; ++pu_index) {
10406 0 : nidx = tab8x32[pu_index];
10407 0 : block_index_shift_x = (pu_index & 0x07) << 3;
10408 0 : block_index_shift_y = (pu_index >> 3) << 5;
10409 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv8x32[nidx]);
10410 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv8x32[nidx]);
10411 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
10412 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
10413 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
10414 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
10415 0 : skip_qp_pel = 0;
10416 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
10417 0 : skip_qp_pel = 1;
10418 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
10419 0 : skip_qp_pel = 1;
10420 0 : if (!skip_qp_pel) {
10421 0 : for (it = 0; it < num_qp_it; it++) {
10422 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
10423 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
10424 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
10425 0 : SetQuarterPelRefinementInputsOnTheFly(
10426 : pos_full,
10427 : full_stride,
10428 : pos_b,
10429 : pos_h,
10430 : pos_j,
10431 : context_ptr->interpolated_stride,
10432 : x_mv,
10433 : y_mv,
10434 : buf1,
10435 : buf1_stride,
10436 : buf2,
10437 : buf2_stride);
10438 0 : src_block_index =
10439 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
10440 0 : buf1[0] = buf1[0] + block_index_shift_x +
10441 0 : block_index_shift_y * buf1_stride[0];
10442 0 : buf2[0] = buf2[0] + block_index_shift_x +
10443 0 : block_index_shift_y * buf2_stride[0];
10444 0 : buf1[1] = buf1[1] + block_index_shift_x +
10445 0 : block_index_shift_y * buf1_stride[1];
10446 0 : buf2[1] = buf2[1] + block_index_shift_x +
10447 0 : block_index_shift_y * buf2_stride[1];
10448 0 : buf1[2] = buf1[2] + block_index_shift_x +
10449 0 : block_index_shift_y * buf1_stride[2];
10450 0 : buf2[2] = buf2[2] + block_index_shift_x +
10451 0 : block_index_shift_y * buf2_stride[2];
10452 0 : buf1[3] = buf1[3] + block_index_shift_x +
10453 0 : block_index_shift_y * buf1_stride[3];
10454 0 : buf2[3] = buf2[3] + block_index_shift_x +
10455 0 : block_index_shift_y * buf2_stride[3];
10456 0 : buf1[4] = buf1[4] + block_index_shift_x +
10457 0 : block_index_shift_y * buf1_stride[4];
10458 0 : buf2[4] = buf2[4] + block_index_shift_x +
10459 0 : block_index_shift_y * buf2_stride[4];
10460 0 : buf1[5] = buf1[5] + block_index_shift_x +
10461 0 : block_index_shift_y * buf1_stride[5];
10462 0 : buf2[5] = buf2[5] + block_index_shift_x +
10463 0 : block_index_shift_y * buf2_stride[5];
10464 0 : buf1[6] = buf1[6] + block_index_shift_x +
10465 0 : block_index_shift_y * buf1_stride[6];
10466 0 : buf2[6] = buf2[6] + block_index_shift_x +
10467 0 : block_index_shift_y * buf2_stride[6];
10468 0 : buf1[7] = buf1[7] + block_index_shift_x +
10469 0 : block_index_shift_y * buf1_stride[7];
10470 0 : buf2[7] = buf2[7] + block_index_shift_x +
10471 0 : block_index_shift_y * buf2_stride[7];
10472 0 : quarter_pel_refinemnet_block(context_ptr,
10473 0 : &context_ptr->p_best_ssd8x32[nidx],
10474 : src_block_index,
10475 : buf1,
10476 : buf1_stride,
10477 : buf2,
10478 : buf2_stride,
10479 : 8,
10480 : 32,
10481 : x_search_area_origin,
10482 : y_search_area_origin,
10483 : testmv,
10484 0 : &context_ptr->p_best_sad8x32[nidx],
10485 0 : &context_ptr->p_best_mv8x32[nidx],
10486 : it);
10487 : }
10488 : }
10489 : }
10490 :
10491 : // 64x16
10492 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
10493 0 : nidx = pu_index;
10494 0 : block_index_shift_x = 0;
10495 0 : block_index_shift_y = pu_index << 4;
10496 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv64x16[nidx]);
10497 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv64x16[nidx]);
10498 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
10499 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
10500 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
10501 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
10502 0 : skip_qp_pel = 0;
10503 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
10504 0 : skip_qp_pel = 1;
10505 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
10506 0 : skip_qp_pel = 1;
10507 0 : if (!skip_qp_pel) {
10508 0 : for (it = 0; it < num_qp_it; it++) {
10509 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
10510 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
10511 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
10512 0 : SetQuarterPelRefinementInputsOnTheFly(
10513 : pos_full,
10514 : full_stride,
10515 : pos_b,
10516 : pos_h,
10517 : pos_j,
10518 : context_ptr->interpolated_stride,
10519 : x_mv,
10520 : y_mv,
10521 : buf1,
10522 : buf1_stride,
10523 : buf2,
10524 : buf2_stride);
10525 0 : src_block_index =
10526 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
10527 0 : buf1[0] = buf1[0] + block_index_shift_x +
10528 0 : block_index_shift_y * buf1_stride[0];
10529 0 : buf2[0] = buf2[0] + block_index_shift_x +
10530 0 : block_index_shift_y * buf2_stride[0];
10531 0 : buf1[1] = buf1[1] + block_index_shift_x +
10532 0 : block_index_shift_y * buf1_stride[1];
10533 0 : buf2[1] = buf2[1] + block_index_shift_x +
10534 0 : block_index_shift_y * buf2_stride[1];
10535 0 : buf1[2] = buf1[2] + block_index_shift_x +
10536 0 : block_index_shift_y * buf1_stride[2];
10537 0 : buf2[2] = buf2[2] + block_index_shift_x +
10538 0 : block_index_shift_y * buf2_stride[2];
10539 0 : buf1[3] = buf1[3] + block_index_shift_x +
10540 0 : block_index_shift_y * buf1_stride[3];
10541 0 : buf2[3] = buf2[3] + block_index_shift_x +
10542 0 : block_index_shift_y * buf2_stride[3];
10543 0 : buf1[4] = buf1[4] + block_index_shift_x +
10544 0 : block_index_shift_y * buf1_stride[4];
10545 0 : buf2[4] = buf2[4] + block_index_shift_x +
10546 0 : block_index_shift_y * buf2_stride[4];
10547 0 : buf1[5] = buf1[5] + block_index_shift_x +
10548 0 : block_index_shift_y * buf1_stride[5];
10549 0 : buf2[5] = buf2[5] + block_index_shift_x +
10550 0 : block_index_shift_y * buf2_stride[5];
10551 0 : buf1[6] = buf1[6] + block_index_shift_x +
10552 0 : block_index_shift_y * buf1_stride[6];
10553 0 : buf2[6] = buf2[6] + block_index_shift_x +
10554 0 : block_index_shift_y * buf2_stride[6];
10555 0 : buf1[7] = buf1[7] + block_index_shift_x +
10556 0 : block_index_shift_y * buf1_stride[7];
10557 0 : buf2[7] = buf2[7] + block_index_shift_x +
10558 0 : block_index_shift_y * buf2_stride[7];
10559 0 : quarter_pel_refinemnet_block(
10560 : context_ptr,
10561 0 : &context_ptr->p_best_ssd64x16[nidx],
10562 : src_block_index,
10563 : buf1,
10564 : buf1_stride,
10565 : buf2,
10566 : buf2_stride,
10567 : 64,
10568 : 16,
10569 : x_search_area_origin,
10570 : y_search_area_origin,
10571 : testmv,
10572 0 : &context_ptr->p_best_sad64x16[nidx],
10573 0 : &context_ptr->p_best_mv64x16[nidx],
10574 : it);
10575 : }
10576 : }
10577 : }
10578 : // 16x64
10579 0 : for (pu_index = 0; pu_index < 4; ++pu_index) {
10580 0 : nidx = pu_index;
10581 0 : block_index_shift_x = pu_index << 4;
10582 0 : block_index_shift_y = 0;
10583 0 : x_best_mv = _MVXT(context_ptr->p_best_full_pel_mv16x64[nidx]);
10584 0 : y_best_mv = _MVYT(context_ptr->p_best_full_pel_mv16x64[nidx]);
10585 0 : best_xSearchIndex = ((x_best_mv + 2) >> 2) - x_search_area_origin;
10586 0 : best_ySearchIndex = ((y_best_mv + 2) >> 2) - y_search_area_origin;
10587 0 : dis_x = ABS(int_xSearchIndex - best_xSearchIndex);
10588 0 : dis_y = ABS(int_ySearchIndex - best_ySearchIndex);
10589 0 : skip_qp_pel = 0;
10590 0 : if ((dis_x) > Q_PEL_SEARCH_WIND)
10591 0 : skip_qp_pel = 1;
10592 0 : if ((dis_y) > Q_PEL_SEARCH_WIND)
10593 0 : skip_qp_pel = 1;
10594 0 : if (!skip_qp_pel) {
10595 0 : for (it = 0; it < num_qp_it; it++) {
10596 0 : x_mv = (int16_t)_MVXT(integer_mv) + (2 * it);
10597 0 : y_mv = (int16_t)_MVYT(integer_mv) + (2 * it);
10598 0 : testmv = ((uint16_t)y_mv << 16) | ((uint16_t)x_mv);
10599 0 : SetQuarterPelRefinementInputsOnTheFly(
10600 : pos_full,
10601 : full_stride,
10602 : pos_b,
10603 : pos_h,
10604 : pos_j,
10605 : context_ptr->interpolated_stride,
10606 : x_mv,
10607 : y_mv,
10608 : buf1,
10609 : buf1_stride,
10610 : buf2,
10611 : buf2_stride);
10612 0 : src_block_index =
10613 0 : block_index_shift_x + block_index_shift_y * BLOCK_SIZE_64;
10614 0 : buf1[0] = buf1[0] + block_index_shift_x +
10615 0 : block_index_shift_y * buf1_stride[0];
10616 0 : buf2[0] = buf2[0] + block_index_shift_x +
10617 0 : block_index_shift_y * buf2_stride[0];
10618 0 : buf1[1] = buf1[1] + block_index_shift_x +
10619 0 : block_index_shift_y * buf1_stride[1];
10620 0 : buf2[1] = buf2[1] + block_index_shift_x +
10621 0 : block_index_shift_y * buf2_stride[1];
10622 0 : buf1[2] = buf1[2] + block_index_shift_x +
10623 0 : block_index_shift_y * buf1_stride[2];
10624 0 : buf2[2] = buf2[2] + block_index_shift_x +
10625 0 : block_index_shift_y * buf2_stride[2];
10626 0 : buf1[3] = buf1[3] + block_index_shift_x +
10627 0 : block_index_shift_y * buf1_stride[3];
10628 0 : buf2[3] = buf2[3] + block_index_shift_x +
10629 0 : block_index_shift_y * buf2_stride[3];
10630 0 : buf1[4] = buf1[4] + block_index_shift_x +
10631 0 : block_index_shift_y * buf1_stride[4];
10632 0 : buf2[4] = buf2[4] + block_index_shift_x +
10633 0 : block_index_shift_y * buf2_stride[4];
10634 0 : buf1[5] = buf1[5] + block_index_shift_x +
10635 0 : block_index_shift_y * buf1_stride[5];
10636 0 : buf2[5] = buf2[5] + block_index_shift_x +
10637 0 : block_index_shift_y * buf2_stride[5];
10638 0 : buf1[6] = buf1[6] + block_index_shift_x +
10639 0 : block_index_shift_y * buf1_stride[6];
10640 0 : buf2[6] = buf2[6] + block_index_shift_x +
10641 0 : block_index_shift_y * buf2_stride[6];
10642 0 : buf1[7] = buf1[7] + block_index_shift_x +
10643 0 : block_index_shift_y * buf1_stride[7];
10644 0 : buf2[7] = buf2[7] + block_index_shift_x +
10645 0 : block_index_shift_y * buf2_stride[7];
10646 0 : quarter_pel_refinemnet_block(
10647 : context_ptr,
10648 0 : &context_ptr->p_best_ssd16x64[nidx],
10649 : src_block_index,
10650 : buf1,
10651 : buf1_stride,
10652 : buf2,
10653 : buf2_stride,
10654 : 16,
10655 : 64,
10656 : x_search_area_origin,
10657 : y_search_area_origin,
10658 : testmv,
10659 0 : &context_ptr->p_best_sad16x64[nidx],
10660 0 : &context_ptr->p_best_mv16x64[nidx],
10661 : it);
10662 : }
10663 : }
10664 : }
10665 0 : return;
10666 : }
10667 0 : void HmeOneQuadrantLevel0(
10668 : PictureParentControlSet *picture_control_set_ptr,
10669 : MeContext *context_ptr, // input/output parameter, ME context Ptr, used to
10670 : // get/update ME results
10671 : int16_t origin_x, // input parameter, SB position in the horizontal
10672 : // direction- sixteenth resolution
10673 : int16_t origin_y, // input parameter, SB position in the vertical
10674 : // direction- sixteenth resolution
10675 : uint32_t sb_width, // input parameter, SB pwidth - sixteenth resolution
10676 : uint32_t sb_height, // input parameter, SB height - sixteenth resolution
10677 : int16_t xHmeSearchCenter, // input parameter, HME search center in the
10678 : // horizontal direction
10679 : int16_t yHmeSearchCenter, // input parameter, HME search center in the
10680 : // vertical direction
10681 : EbPictureBufferDesc *
10682 : sixteenthRefPicPtr, // input parameter, sixteenth reference Picture Ptr
10683 : uint64_t *level0BestSad, // output parameter, Level0 SAD at
10684 : // (searchRegionNumberInWidth,
10685 : // searchRegionNumberInHeight)
10686 : int16_t *xLevel0SearchCenter, // output parameter, Level0 xMV at
10687 : // (searchRegionNumberInWidth,
10688 : // searchRegionNumberInHeight)
10689 : int16_t *yLevel0SearchCenter, // output parameter, Level0 yMV at
10690 : // (searchRegionNumberInWidth,
10691 : // searchRegionNumberInHeight)
10692 : uint32_t searchAreaMultiplierX,
10693 : uint32_t searchAreaMultiplierY)
10694 : {
10695 : int16_t xTopLeftSearchRegion;
10696 : int16_t yTopLeftSearchRegion;
10697 : uint32_t searchRegionIndex;
10698 : int16_t x_search_area_origin;
10699 : int16_t y_search_area_origin;
10700 : int16_t xSearchRegionDistance;
10701 : int16_t ySearchRegionDistance;
10702 :
10703 : int16_t padWidth;
10704 : int16_t padHeight;
10705 :
10706 : (void)picture_control_set_ptr;
10707 : // Round up x_HME_L0 to be a multiple of 16
10708 0 : int16_t search_area_width =
10709 0 : (int16_t)((((((context_ptr->hme_level0_total_search_area_width *
10710 0 : searchAreaMultiplierX) /
10711 0 : 100))) +
10712 0 : 15) &
10713 : ~0x0F);
10714 0 : int16_t search_area_height =
10715 0 : (int16_t)(((context_ptr->hme_level0_total_search_area_height *
10716 0 : searchAreaMultiplierY) /
10717 : 100));
10718 0 : xSearchRegionDistance = xHmeSearchCenter;
10719 0 : ySearchRegionDistance = yHmeSearchCenter;
10720 0 : padWidth = (int16_t)(sixteenthRefPicPtr->origin_x) - 1;
10721 0 : padHeight = (int16_t)(sixteenthRefPicPtr->origin_y) - 1;
10722 :
10723 0 : x_search_area_origin =
10724 0 : -(int16_t)(search_area_width >> 1) + xSearchRegionDistance;
10725 0 : y_search_area_origin =
10726 0 : -(int16_t)(search_area_height >> 1) + ySearchRegionDistance;
10727 :
10728 : // Correct the left edge of the Search Area if it is not on the reference
10729 : // Picture
10730 0 : x_search_area_origin = ((origin_x + x_search_area_origin) < -padWidth)
10731 0 : ? -padWidth - origin_x
10732 : : x_search_area_origin;
10733 :
10734 0 : search_area_width =
10735 0 : ((origin_x + x_search_area_origin) < -padWidth)
10736 0 : ? search_area_width -
10737 0 : (-padWidth - (origin_x + x_search_area_origin))
10738 : : search_area_width;
10739 :
10740 : // Correct the right edge of the Search Area if its not on the reference
10741 : // Picture
10742 0 : x_search_area_origin =
10743 0 : ((origin_x + x_search_area_origin) >
10744 0 : (int16_t)sixteenthRefPicPtr->width - 1)
10745 0 : ? x_search_area_origin - ((origin_x + x_search_area_origin) -
10746 0 : ((int16_t)sixteenthRefPicPtr->width - 1))
10747 : : x_search_area_origin;
10748 :
10749 0 : search_area_width =
10750 0 : ((origin_x + x_search_area_origin + search_area_width) >
10751 0 : (int16_t)sixteenthRefPicPtr->width)
10752 0 : ? MAX(1,
10753 : search_area_width -
10754 : ((origin_x + x_search_area_origin + search_area_width) -
10755 : (int16_t)sixteenthRefPicPtr->width))
10756 : : search_area_width;
10757 :
10758 : // Round down x_HME to be a multiple of 16 as cropping already performed
10759 0 : search_area_width = (search_area_width < 16) ? search_area_width
10760 : : search_area_width & ~0x0F;
10761 : // Correct the top edge of the Search Area if it is not on the reference
10762 : // Picture
10763 0 : y_search_area_origin = ((origin_y + y_search_area_origin) < -padHeight)
10764 0 : ? -padHeight - origin_y
10765 : : y_search_area_origin;
10766 :
10767 0 : search_area_height =
10768 0 : ((origin_y + y_search_area_origin) < -padHeight)
10769 0 : ? search_area_height -
10770 0 : (-padHeight - (origin_y + y_search_area_origin))
10771 : : search_area_height;
10772 :
10773 : // Correct the bottom edge of the Search Area if its not on the reference
10774 : // Picture
10775 0 : y_search_area_origin =
10776 0 : ((origin_y + y_search_area_origin) >
10777 0 : (int16_t)sixteenthRefPicPtr->height - 1)
10778 0 : ? y_search_area_origin - ((origin_y + y_search_area_origin) -
10779 0 : ((int16_t)sixteenthRefPicPtr->height - 1))
10780 : : y_search_area_origin;
10781 :
10782 0 : search_area_height =
10783 0 : (origin_y + y_search_area_origin + search_area_height >
10784 0 : (int16_t)sixteenthRefPicPtr->height)
10785 0 : ? MAX(1,
10786 : search_area_height -
10787 : ((origin_y + y_search_area_origin + search_area_height) -
10788 : (int16_t)sixteenthRefPicPtr->height))
10789 : : search_area_height;
10790 :
10791 0 : xTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_x + origin_x) +
10792 : x_search_area_origin;
10793 0 : yTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_y + origin_y) +
10794 : y_search_area_origin;
10795 0 : searchRegionIndex = xTopLeftSearchRegion +
10796 0 : yTopLeftSearchRegion * sixteenthRefPicPtr->stride_y;
10797 :
10798 0 : if (context_ptr->hme_search_type == HME_SPARSE) {
10799 0 : sad_loop_kernel_sparse(
10800 : &context_ptr->sixteenth_sb_buffer[0],
10801 : context_ptr->sixteenth_sb_buffer_stride,
10802 0 : &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
10803 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
10804 0 : ? sixteenthRefPicPtr->stride_y
10805 0 : : sixteenthRefPicPtr->stride_y * 2,
10806 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
10807 : ? sb_height
10808 : : sb_height >> 1,
10809 : sb_width,
10810 : /* results */
10811 : level0BestSad,
10812 : xLevel0SearchCenter,
10813 : yLevel0SearchCenter,
10814 : /* range */
10815 0 : sixteenthRefPicPtr->stride_y,
10816 : search_area_width,
10817 : search_area_height);
10818 : } else {
10819 0 : if ((search_area_width & 15) == 0) {
10820 : // Only width equals 16 (LCU equals 64) is updated
10821 : // other width sizes work with the old code as the one
10822 : // in"sad_loop_kernel_sse4_1_intrin"
10823 0 : sad_loop_kernel_hme_l0(
10824 : &context_ptr->sixteenth_sb_buffer[0],
10825 : context_ptr->sixteenth_sb_buffer_stride,
10826 0 : &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
10827 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
10828 0 : ? sixteenthRefPicPtr->stride_y
10829 0 : : sixteenthRefPicPtr->stride_y * 2,
10830 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
10831 : ? sb_height
10832 : : sb_height >> 1,
10833 : sb_width,
10834 : /* results */
10835 : level0BestSad,
10836 : xLevel0SearchCenter,
10837 : yLevel0SearchCenter,
10838 : /* range */
10839 0 : sixteenthRefPicPtr->stride_y,
10840 : search_area_width,
10841 : search_area_height);
10842 : } else {
10843 : // Put the first search location into level0 results
10844 0 : sad_loop_kernel(
10845 : &context_ptr->sixteenth_sb_buffer[0],
10846 : context_ptr->sixteenth_sb_buffer_stride,
10847 0 : &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
10848 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
10849 0 : ? sixteenthRefPicPtr->stride_y
10850 0 : : sixteenthRefPicPtr->stride_y * 2,
10851 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
10852 : ? sb_height
10853 : : sb_height >> 1,
10854 : sb_width,
10855 : /* results */
10856 : level0BestSad,
10857 : xLevel0SearchCenter,
10858 : yLevel0SearchCenter,
10859 : /* range */
10860 0 : sixteenthRefPicPtr->stride_y,
10861 : search_area_width,
10862 : search_area_height);
10863 : }
10864 : }
10865 :
10866 0 : *level0BestSad =
10867 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
10868 : ? *level0BestSad
10869 0 : : *level0BestSad *
10870 : 2; // Multiply by 2 because considered only ever other line
10871 0 : *xLevel0SearchCenter += x_search_area_origin;
10872 0 : *xLevel0SearchCenter *=
10873 : 4; // Multiply by 4 because operating on 1/4 resolution
10874 0 : *yLevel0SearchCenter += y_search_area_origin;
10875 0 : *yLevel0SearchCenter *=
10876 : 4; // Multiply by 4 because operating on 1/4 resolution
10877 :
10878 0 : return;
10879 : }
10880 :
10881 0 : void HmeLevel0(
10882 : PictureParentControlSet *picture_control_set_ptr,
10883 : MeContext *context_ptr, // input/output parameter, ME context Ptr, used to
10884 : // get/update ME results
10885 : int16_t origin_x, // input parameter, SB position in the horizontal
10886 : // direction- sixteenth resolution
10887 : int16_t origin_y, // input parameter, SB position in the vertical
10888 : // direction- sixteenth resolution
10889 : uint32_t sb_width, // input parameter, SB pwidth - sixteenth resolution
10890 : uint32_t sb_height, // input parameter, SB height - sixteenth resolution
10891 : int16_t xHmeSearchCenter, // input parameter, HME search center in the
10892 : // horizontal direction
10893 : int16_t yHmeSearchCenter, // input parameter, HME search center in the
10894 : // vertical direction
10895 : EbPictureBufferDesc *
10896 : sixteenthRefPicPtr, // input parameter, sixteenth reference Picture Ptr
10897 : uint32_t searchRegionNumberInWidth, // input parameter, search region
10898 : // number in the horizontal direction
10899 : uint32_t searchRegionNumberInHeight, // input parameter, search region
10900 : // number in the vertical direction
10901 : uint64_t *level0BestSad, // output parameter, Level0 SAD at
10902 : // (searchRegionNumberInWidth,
10903 : // searchRegionNumberInHeight)
10904 : int16_t *xLevel0SearchCenter, // output parameter, Level0 xMV at
10905 : // (searchRegionNumberInWidth,
10906 : // searchRegionNumberInHeight)
10907 : int16_t *yLevel0SearchCenter, // output parameter, Level0 yMV at
10908 : // (searchRegionNumberInWidth,
10909 : // searchRegionNumberInHeight)
10910 : uint32_t searchAreaMultiplierX,
10911 : uint32_t searchAreaMultiplierY)
10912 : {
10913 : int16_t xTopLeftSearchRegion;
10914 : int16_t yTopLeftSearchRegion;
10915 : uint32_t searchRegionIndex;
10916 : int16_t x_search_area_origin;
10917 : int16_t y_search_area_origin;
10918 : int16_t xSearchRegionDistance;
10919 : int16_t ySearchRegionDistance;
10920 :
10921 : int16_t padWidth;
10922 : int16_t padHeight;
10923 :
10924 : // Adjust SR size based on the searchAreaShift
10925 : (void)picture_control_set_ptr;
10926 : // Round up x_HME_L0 to be a multiple of 16
10927 0 : int16_t search_area_width =
10928 : (int16_t)((((((context_ptr->hme_level0_search_area_in_width_array
10929 0 : [searchRegionNumberInWidth] *
10930 0 : searchAreaMultiplierX) /
10931 0 : 100))) +
10932 0 : 15) &
10933 : ~0x0F);
10934 0 : int16_t search_area_height =
10935 : (int16_t)(((context_ptr->hme_level0_search_area_in_height_array
10936 0 : [searchRegionNumberInHeight] *
10937 0 : searchAreaMultiplierY) /
10938 : 100));
10939 :
10940 0 : xSearchRegionDistance = xHmeSearchCenter;
10941 0 : ySearchRegionDistance = yHmeSearchCenter;
10942 0 : padWidth = (int16_t)(sixteenthRefPicPtr->origin_x) - 1;
10943 0 : padHeight = (int16_t)(sixteenthRefPicPtr->origin_y) - 1;
10944 :
10945 0 : while (searchRegionNumberInWidth) {
10946 0 : searchRegionNumberInWidth--;
10947 0 : xSearchRegionDistance +=
10948 0 : (int16_t)(((context_ptr->hme_level0_search_area_in_width_array
10949 0 : [searchRegionNumberInWidth] *
10950 0 : searchAreaMultiplierX) /
10951 : 100));
10952 : }
10953 :
10954 0 : while (searchRegionNumberInHeight) {
10955 0 : searchRegionNumberInHeight--;
10956 0 : ySearchRegionDistance +=
10957 0 : (int16_t)(((context_ptr->hme_level0_search_area_in_height_array
10958 0 : [searchRegionNumberInHeight] *
10959 0 : searchAreaMultiplierY) /
10960 : 100));
10961 : }
10962 0 : x_search_area_origin =
10963 0 : -(int16_t)((((context_ptr->hme_level0_total_search_area_width *
10964 0 : searchAreaMultiplierX) /
10965 0 : 100)) >>
10966 0 : 1) +
10967 : xSearchRegionDistance;
10968 0 : y_search_area_origin =
10969 0 : -(int16_t)((((context_ptr->hme_level0_total_search_area_height *
10970 0 : searchAreaMultiplierY) /
10971 0 : 100)) >>
10972 0 : 1) +
10973 : ySearchRegionDistance;
10974 :
10975 : // Correct the left edge of the Search Area if it is not on the reference
10976 : // Picture
10977 0 : x_search_area_origin = ((origin_x + x_search_area_origin) < -padWidth)
10978 0 : ? -padWidth - origin_x
10979 : : x_search_area_origin;
10980 :
10981 0 : search_area_width =
10982 0 : ((origin_x + x_search_area_origin) < -padWidth)
10983 0 : ? search_area_width -
10984 0 : (-padWidth - (origin_x + x_search_area_origin))
10985 : : search_area_width;
10986 :
10987 : // Correct the right edge of the Search Area if its not on the reference
10988 : // Picture
10989 0 : x_search_area_origin =
10990 0 : ((origin_x + x_search_area_origin) >
10991 0 : (int16_t)sixteenthRefPicPtr->width - 1)
10992 0 : ? x_search_area_origin - ((origin_x + x_search_area_origin) -
10993 0 : ((int16_t)sixteenthRefPicPtr->width - 1))
10994 : : x_search_area_origin;
10995 :
10996 0 : search_area_width =
10997 0 : ((origin_x + x_search_area_origin + search_area_width) >
10998 0 : (int16_t)sixteenthRefPicPtr->width)
10999 0 : ? MAX(1,
11000 : search_area_width -
11001 : ((origin_x + x_search_area_origin + search_area_width) -
11002 : (int16_t)sixteenthRefPicPtr->width))
11003 : : search_area_width;
11004 :
11005 : // Round down x_HME to be a multiple of 16 as cropping already performed
11006 0 : search_area_width = (search_area_width < 16) ? search_area_width
11007 : : search_area_width & ~0x0F;
11008 :
11009 : // Correct the top edge of the Search Area if it is not on the reference
11010 : // Picture
11011 0 : y_search_area_origin = ((origin_y + y_search_area_origin) < -padHeight)
11012 0 : ? -padHeight - origin_y
11013 : : y_search_area_origin;
11014 :
11015 0 : search_area_height =
11016 0 : ((origin_y + y_search_area_origin) < -padHeight)
11017 0 : ? search_area_height -
11018 0 : (-padHeight - (origin_y + y_search_area_origin))
11019 : : search_area_height;
11020 :
11021 : // Correct the bottom edge of the Search Area if its not on the reference
11022 : // Picture
11023 0 : y_search_area_origin =
11024 0 : ((origin_y + y_search_area_origin) >
11025 0 : (int16_t)sixteenthRefPicPtr->height - 1)
11026 0 : ? y_search_area_origin - ((origin_y + y_search_area_origin) -
11027 0 : ((int16_t)sixteenthRefPicPtr->height - 1))
11028 : : y_search_area_origin;
11029 :
11030 0 : search_area_height =
11031 0 : (origin_y + y_search_area_origin + search_area_height >
11032 0 : (int16_t)sixteenthRefPicPtr->height)
11033 0 : ? MAX(1,
11034 : search_area_height -
11035 : ((origin_y + y_search_area_origin + search_area_height) -
11036 : (int16_t)sixteenthRefPicPtr->height))
11037 : : search_area_height;
11038 :
11039 0 : xTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_x + origin_x) +
11040 : x_search_area_origin;
11041 0 : yTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_y + origin_y) +
11042 : y_search_area_origin;
11043 0 : searchRegionIndex = xTopLeftSearchRegion +
11044 0 : yTopLeftSearchRegion * sixteenthRefPicPtr->stride_y;
11045 :
11046 0 : if (((sb_width & 7) == 0) || (sb_width == 4)) {
11047 0 : if ((search_area_width & 15) == 0) {
11048 : // Only width equals 16 (LCU equals 64) is updated
11049 : // other width sizes work with the old code as the one
11050 : // in"sad_loop_kernel_sse4_1_intrin"
11051 0 : sad_loop_kernel_hme_l0(
11052 : &context_ptr->sixteenth_sb_buffer[0],
11053 : context_ptr->sixteenth_sb_buffer_stride,
11054 0 : &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
11055 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11056 0 : ? sixteenthRefPicPtr->stride_y
11057 0 : : sixteenthRefPicPtr->stride_y * 2,
11058 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11059 : ? sb_height
11060 : : sb_height >> 1,
11061 : sb_width,
11062 : /* results */
11063 : level0BestSad,
11064 : xLevel0SearchCenter,
11065 : yLevel0SearchCenter,
11066 : /* range */
11067 0 : sixteenthRefPicPtr->stride_y,
11068 : search_area_width,
11069 : search_area_height);
11070 : } else {
11071 : // Put the first search location into level0 results
11072 0 : sad_loop_kernel(
11073 : &context_ptr->sixteenth_sb_buffer[0],
11074 : context_ptr->sixteenth_sb_buffer_stride,
11075 0 : &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
11076 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11077 0 : ? sixteenthRefPicPtr->stride_y
11078 0 : : sixteenthRefPicPtr->stride_y * 2,
11079 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11080 : ? sb_height
11081 : : sb_height >> 1,
11082 : sb_width,
11083 : /* results */
11084 : level0BestSad,
11085 : xLevel0SearchCenter,
11086 : yLevel0SearchCenter,
11087 : /* range */
11088 0 : sixteenthRefPicPtr->stride_y,
11089 : search_area_width,
11090 : search_area_height);
11091 : }
11092 : } else {
11093 0 : sad_loop_kernel_c(&context_ptr->sixteenth_sb_buffer[0],
11094 : context_ptr->sixteenth_sb_buffer_stride,
11095 0 : &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
11096 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11097 0 : ? sixteenthRefPicPtr->stride_y
11098 0 : : sixteenthRefPicPtr->stride_y * 2,
11099 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11100 : ? sb_height
11101 : : sb_height >> 1,
11102 : sb_width,
11103 : /* results */
11104 : level0BestSad,
11105 : xLevel0SearchCenter,
11106 : yLevel0SearchCenter,
11107 : /* range */
11108 0 : sixteenthRefPicPtr->stride_y,
11109 : search_area_width,
11110 : search_area_height);
11111 : }
11112 :
11113 0 : *level0BestSad =
11114 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11115 : ? *level0BestSad
11116 0 : : *level0BestSad *
11117 : 2; // Multiply by 2 because considered only ever other line
11118 0 : *xLevel0SearchCenter += x_search_area_origin;
11119 0 : *xLevel0SearchCenter *=
11120 : 4; // Multiply by 4 because operating on 1/4 resolution
11121 0 : *yLevel0SearchCenter += y_search_area_origin;
11122 0 : *yLevel0SearchCenter *=
11123 : 4; // Multiply by 4 because operating on 1/4 resolution
11124 :
11125 0 : return;
11126 : }
11127 :
11128 0 : void HmeLevel1(
11129 : MeContext *context_ptr, // input/output parameter, ME context Ptr, used to
11130 : // get/update ME results
11131 : int16_t origin_x, // input parameter, SB position in the horizontal
11132 : // direction - quarter resolution
11133 : int16_t origin_y, // input parameter, SB position in the vertical direction
11134 : // - quarter resolution
11135 : uint32_t sb_width, // input parameter, SB pwidth - quarter resolution
11136 : uint32_t sb_height, // input parameter, SB height - quarter resolution
11137 : EbPictureBufferDesc
11138 : *quarterRefPicPtr, // input parameter, quarter reference Picture Ptr
11139 : int16_t hmeLevel1SearchAreaInWidth, // input parameter, hme level 1 search
11140 : // area in width
11141 : int16_t hmeLevel1SearchAreaInHeight, // input parameter, hme level 1 search
11142 : // area in height
11143 : int16_t xLevel0SearchCenter, // input parameter, best Level0 xMV at
11144 : // (searchRegionNumberInWidth,
11145 : // searchRegionNumberInHeight)
11146 : int16_t yLevel0SearchCenter, // input parameter, best Level0 yMV at
11147 : // (searchRegionNumberInWidth,
11148 : // searchRegionNumberInHeight)
11149 : uint64_t *level1BestSad, // output parameter, Level1 SAD at
11150 : // (searchRegionNumberInWidth,
11151 : // searchRegionNumberInHeight)
11152 : int16_t *xLevel1SearchCenter, // output parameter, Level1 xMV at
11153 : // (searchRegionNumberInWidth,
11154 : // searchRegionNumberInHeight)
11155 : int16_t *yLevel1SearchCenter // output parameter, Level1 yMV at
11156 : // (searchRegionNumberInWidth,
11157 : // searchRegionNumberInHeight)
11158 : ) {
11159 : int16_t xTopLeftSearchRegion;
11160 : int16_t yTopLeftSearchRegion;
11161 : uint32_t searchRegionIndex;
11162 : // Round up x_HME_L0 to be a multiple of 8
11163 0 : int16_t search_area_width =
11164 0 : (int16_t)((hmeLevel1SearchAreaInWidth + 7) & ~0x07);
11165 0 : int16_t search_area_height = hmeLevel1SearchAreaInHeight;
11166 :
11167 : int16_t x_search_area_origin;
11168 : int16_t y_search_area_origin;
11169 :
11170 0 : int16_t padWidth = (int16_t)(quarterRefPicPtr->origin_x) - 1;
11171 0 : int16_t padHeight = (int16_t)(quarterRefPicPtr->origin_y) - 1;
11172 :
11173 0 : x_search_area_origin = -(search_area_width >> 1) + xLevel0SearchCenter;
11174 0 : y_search_area_origin = -(search_area_height >> 1) + yLevel0SearchCenter;
11175 :
11176 : // Correct the left edge of the Search Area if it is not on the reference
11177 : // Picture
11178 0 : x_search_area_origin = ((origin_x + x_search_area_origin) < -padWidth)
11179 0 : ? -padWidth - origin_x
11180 : : x_search_area_origin;
11181 :
11182 0 : search_area_width =
11183 0 : ((origin_x + x_search_area_origin) < -padWidth)
11184 0 : ? search_area_width -
11185 0 : (-padWidth - (origin_x + x_search_area_origin))
11186 : : search_area_width;
11187 : // Correct the right edge of the Search Area if its not on the reference
11188 : // Picture
11189 0 : x_search_area_origin =
11190 0 : ((origin_x + x_search_area_origin) >
11191 0 : (int16_t)quarterRefPicPtr->width - 1)
11192 0 : ? x_search_area_origin - ((origin_x + x_search_area_origin) -
11193 0 : ((int16_t)quarterRefPicPtr->width - 1))
11194 : : x_search_area_origin;
11195 :
11196 0 : search_area_width =
11197 0 : ((origin_x + x_search_area_origin + search_area_width) >
11198 0 : (int16_t)quarterRefPicPtr->width)
11199 0 : ? MAX(1,
11200 : search_area_width -
11201 : ((origin_x + x_search_area_origin + search_area_width) -
11202 : (int16_t)quarterRefPicPtr->width))
11203 : : search_area_width;
11204 :
11205 : // Constrain x_HME_L1 to be a multiple of 8 (round down as cropping already
11206 : // performed)
11207 0 : search_area_width =
11208 : (search_area_width < 8) ? search_area_width : search_area_width & ~0x07;
11209 : // Correct the top edge of the Search Area if it is not on the reference
11210 : // Picture
11211 0 : y_search_area_origin = ((origin_y + y_search_area_origin) < -padHeight)
11212 0 : ? -padHeight - origin_y
11213 : : y_search_area_origin;
11214 :
11215 0 : search_area_height =
11216 0 : ((origin_y + y_search_area_origin) < -padHeight)
11217 0 : ? search_area_height -
11218 0 : (-padHeight - (origin_y + y_search_area_origin))
11219 : : search_area_height;
11220 :
11221 : // Correct the bottom edge of the Search Area if its not on the reference
11222 : // Picture
11223 0 : y_search_area_origin =
11224 0 : ((origin_y + y_search_area_origin) >
11225 0 : (int16_t)quarterRefPicPtr->height - 1)
11226 0 : ? y_search_area_origin - ((origin_y + y_search_area_origin) -
11227 0 : ((int16_t)quarterRefPicPtr->height - 1))
11228 : : y_search_area_origin;
11229 :
11230 0 : search_area_height =
11231 0 : (origin_y + y_search_area_origin + search_area_height >
11232 0 : (int16_t)quarterRefPicPtr->height)
11233 0 : ? MAX(1,
11234 : search_area_height -
11235 : ((origin_y + y_search_area_origin + search_area_height) -
11236 : (int16_t)quarterRefPicPtr->height))
11237 : : search_area_height;
11238 :
11239 : // Move to the top left of the search region
11240 0 : xTopLeftSearchRegion =
11241 0 : ((int16_t)quarterRefPicPtr->origin_x + origin_x) + x_search_area_origin;
11242 0 : yTopLeftSearchRegion =
11243 0 : ((int16_t)quarterRefPicPtr->origin_y + origin_y) + y_search_area_origin;
11244 0 : searchRegionIndex = xTopLeftSearchRegion +
11245 0 : yTopLeftSearchRegion * quarterRefPicPtr->stride_y;
11246 :
11247 0 : if (((sb_width & 7) == 0) || (sb_width == 4)) {
11248 : // Put the first search location into level0 results
11249 0 : sad_loop_kernel(
11250 : &context_ptr->quarter_sb_buffer[0],
11251 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11252 : ? context_ptr->quarter_sb_buffer_stride
11253 0 : : context_ptr->quarter_sb_buffer_stride * 2,
11254 0 : &quarterRefPicPtr->buffer_y[searchRegionIndex],
11255 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11256 0 : ? quarterRefPicPtr->stride_y
11257 0 : : quarterRefPicPtr->stride_y * 2,
11258 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11259 : ? sb_height
11260 : : sb_height >> 1,
11261 : sb_width,
11262 : /* results */
11263 : level1BestSad,
11264 : xLevel1SearchCenter,
11265 : yLevel1SearchCenter,
11266 : /* range */
11267 0 : quarterRefPicPtr->stride_y,
11268 : search_area_width,
11269 : search_area_height);
11270 : } else {
11271 0 : sad_loop_kernel_c(&context_ptr->quarter_sb_buffer[0],
11272 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11273 : ? context_ptr->quarter_sb_buffer_stride
11274 0 : : context_ptr->quarter_sb_buffer_stride * 2,
11275 0 : &quarterRefPicPtr->buffer_y[searchRegionIndex],
11276 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11277 0 : ? quarterRefPicPtr->stride_y
11278 0 : : quarterRefPicPtr->stride_y * 2,
11279 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11280 : ? sb_height
11281 : : sb_height >> 1,
11282 : sb_width,
11283 : /* results */
11284 : level1BestSad,
11285 : xLevel1SearchCenter,
11286 : yLevel1SearchCenter,
11287 : /* range */
11288 0 : quarterRefPicPtr->stride_y,
11289 : search_area_width,
11290 : search_area_height);
11291 : }
11292 :
11293 0 : *level1BestSad =
11294 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11295 : ? *level1BestSad
11296 0 : : *level1BestSad *
11297 : 2; // Multiply by 2 because considered only ever other line
11298 0 : *xLevel1SearchCenter += x_search_area_origin;
11299 0 : *xLevel1SearchCenter *=
11300 : 2; // Multiply by 2 because operating on 1/2 resolution
11301 0 : *yLevel1SearchCenter += y_search_area_origin;
11302 0 : *yLevel1SearchCenter *=
11303 : 2; // Multiply by 2 because operating on 1/2 resolution
11304 :
11305 0 : return;
11306 : }
11307 :
11308 0 : void HmeLevel2(
11309 : PictureParentControlSet
11310 : *picture_control_set_ptr, // input parameter, Picture control set Ptr
11311 : MeContext *context_ptr, // input/output parameter, ME context Ptr, used to
11312 : // get/update ME results
11313 : int16_t
11314 : origin_x, // input parameter, SB position in the horizontal direction
11315 : int16_t origin_y, // input parameter, SB position in the vertical direction
11316 : uint32_t sb_width, // input parameter, SB pwidth - full resolution
11317 : uint32_t sb_height, // input parameter, SB height - full resolution
11318 : EbPictureBufferDesc *refPicPtr, // input parameter, reference Picture Ptr
11319 : uint32_t searchRegionNumberInWidth, // input parameter, search region
11320 : // number in the horizontal direction
11321 : uint32_t searchRegionNumberInHeight, // input parameter, search region
11322 : // number in the vertical direction
11323 : int16_t xLevel1SearchCenter, // input parameter, best Level1 xMV
11324 : // at(searchRegionNumberInWidth,
11325 : // searchRegionNumberInHeight)
11326 : int16_t yLevel1SearchCenter, // input parameter, best Level1 yMV
11327 : // at(searchRegionNumberInWidth,
11328 : // searchRegionNumberInHeight)
11329 : uint64_t *level2BestSad, // output parameter, Level2 SAD at
11330 : // (searchRegionNumberInWidth,
11331 : // searchRegionNumberInHeight)
11332 : int16_t *xLevel2SearchCenter, // output parameter, Level2 xMV at
11333 : // (searchRegionNumberInWidth,
11334 : // searchRegionNumberInHeight)
11335 : int16_t *yLevel2SearchCenter // output parameter, Level2 yMV at
11336 : // (searchRegionNumberInWidth,
11337 : // searchRegionNumberInHeight)
11338 : ) {
11339 : int16_t xTopLeftSearchRegion;
11340 : int16_t yTopLeftSearchRegion;
11341 : uint32_t searchRegionIndex;
11342 :
11343 : // round the search region width to nearest multiple of 8 if it is less than
11344 : // 8 or non multiple of 8 SAD calculation performance is the same for
11345 : // searchregion width from 1 to 8
11346 : (void)picture_control_set_ptr;
11347 0 : int16_t hmeLevel2SearchAreaInWidth =
11348 : (int16_t)context_ptr
11349 0 : ->hme_level2_search_area_in_width_array[searchRegionNumberInWidth];
11350 : // Round up x_HME_L0 to be a multiple of 8
11351 0 : int16_t search_area_width =
11352 0 : (int16_t)((hmeLevel2SearchAreaInWidth + 7) & ~0x07);
11353 0 : int16_t search_area_height =
11354 : (int16_t)context_ptr->hme_level2_search_area_in_height_array
11355 0 : [searchRegionNumberInHeight];
11356 : int16_t x_search_area_origin;
11357 : int16_t y_search_area_origin;
11358 :
11359 0 : int16_t padWidth = (int16_t)BLOCK_SIZE_64 - 1;
11360 0 : int16_t padHeight = (int16_t)BLOCK_SIZE_64 - 1;
11361 :
11362 0 : x_search_area_origin = -(search_area_width >> 1) + xLevel1SearchCenter;
11363 0 : y_search_area_origin = -(search_area_height >> 1) + yLevel1SearchCenter;
11364 :
11365 : // Correct the left edge of the Search Area if it is not on the reference
11366 : // Picture
11367 0 : x_search_area_origin = ((origin_x + x_search_area_origin) < -padWidth)
11368 0 : ? -padWidth - origin_x
11369 : : x_search_area_origin;
11370 :
11371 0 : search_area_width =
11372 0 : ((origin_x + x_search_area_origin) < -padWidth)
11373 0 : ? search_area_width -
11374 0 : (-padWidth - (origin_x + x_search_area_origin))
11375 : : search_area_width;
11376 :
11377 : // Correct the right edge of the Search Area if its not on the reference
11378 : // Picture
11379 0 : x_search_area_origin =
11380 0 : ((origin_x + x_search_area_origin) > (int16_t)refPicPtr->width - 1)
11381 0 : ? x_search_area_origin - ((origin_x + x_search_area_origin) -
11382 0 : ((int16_t)refPicPtr->width - 1))
11383 : : x_search_area_origin;
11384 :
11385 0 : search_area_width =
11386 0 : ((origin_x + x_search_area_origin + search_area_width) >
11387 0 : (int16_t)refPicPtr->width)
11388 0 : ? MAX(1,
11389 : search_area_width -
11390 : ((origin_x + x_search_area_origin + search_area_width) -
11391 : (int16_t)refPicPtr->width))
11392 : : search_area_width;
11393 :
11394 : // Constrain x_HME_L1 to be a multiple of 8 (round down as cropping already
11395 : // performed)
11396 0 : search_area_width =
11397 : (search_area_width < 8) ? search_area_width : search_area_width & ~0x07;
11398 : // Correct the top edge of the Search Area if it is not on the reference
11399 : // Picture
11400 0 : y_search_area_origin = ((origin_y + y_search_area_origin) < -padHeight)
11401 0 : ? -padHeight - origin_y
11402 : : y_search_area_origin;
11403 :
11404 0 : search_area_height =
11405 0 : ((origin_y + y_search_area_origin) < -padHeight)
11406 0 : ? search_area_height -
11407 0 : (-padHeight - (origin_y + y_search_area_origin))
11408 : : search_area_height;
11409 :
11410 : // Correct the bottom edge of the Search Area if its not on the reference
11411 : // Picture
11412 0 : y_search_area_origin =
11413 0 : ((origin_y + y_search_area_origin) > (int16_t)refPicPtr->height - 1)
11414 0 : ? y_search_area_origin - ((origin_y + y_search_area_origin) -
11415 0 : ((int16_t)refPicPtr->height - 1))
11416 : : y_search_area_origin;
11417 :
11418 0 : search_area_height =
11419 0 : (origin_y + y_search_area_origin + search_area_height >
11420 0 : (int16_t)refPicPtr->height)
11421 0 : ? MAX(1,
11422 : search_area_height -
11423 : ((origin_y + y_search_area_origin + search_area_height) -
11424 : (int16_t)refPicPtr->height))
11425 : : search_area_height;
11426 :
11427 : // Move to the top left of the search region
11428 0 : xTopLeftSearchRegion =
11429 0 : ((int16_t)refPicPtr->origin_x + origin_x) + x_search_area_origin;
11430 0 : yTopLeftSearchRegion =
11431 0 : ((int16_t)refPicPtr->origin_y + origin_y) + y_search_area_origin;
11432 0 : searchRegionIndex =
11433 0 : xTopLeftSearchRegion + yTopLeftSearchRegion * refPicPtr->stride_y;
11434 0 : if ((((sb_width & 7) == 0) && (sb_width != 40) && (sb_width != 56))) {
11435 : // Put the first search location into level0 results
11436 0 : sad_loop_kernel(
11437 : context_ptr->sb_src_ptr,
11438 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11439 : ? context_ptr->sb_src_stride
11440 0 : : context_ptr->sb_src_stride * 2,
11441 0 : &refPicPtr->buffer_y[searchRegionIndex],
11442 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11443 0 : ? refPicPtr->stride_y
11444 0 : : refPicPtr->stride_y * 2,
11445 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11446 : ? sb_height
11447 : : sb_height >> 1,
11448 : sb_width,
11449 : /* results */
11450 : level2BestSad,
11451 : xLevel2SearchCenter,
11452 : yLevel2SearchCenter,
11453 : /* range */
11454 0 : refPicPtr->stride_y,
11455 : search_area_width,
11456 : search_area_height);
11457 : } else {
11458 : // Put the first search location into level0 results
11459 0 : sad_loop_kernel_c(context_ptr->sb_src_ptr,
11460 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11461 : ? context_ptr->sb_src_stride
11462 0 : : context_ptr->sb_src_stride * 2,
11463 0 : &refPicPtr->buffer_y[searchRegionIndex],
11464 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11465 0 : ? refPicPtr->stride_y
11466 0 : : refPicPtr->stride_y * 2,
11467 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11468 : ? sb_height
11469 : : sb_height >> 1,
11470 : sb_width,
11471 : /* results */
11472 : level2BestSad,
11473 : xLevel2SearchCenter,
11474 : yLevel2SearchCenter,
11475 : /* range */
11476 0 : refPicPtr->stride_y,
11477 : search_area_width,
11478 : search_area_height);
11479 : }
11480 :
11481 0 : *level2BestSad =
11482 0 : (context_ptr->hme_search_method == FULL_SAD_SEARCH)
11483 : ? *level2BestSad
11484 0 : : *level2BestSad *
11485 : 2; // Multiply by 2 because considered only ever other line
11486 0 : *xLevel2SearchCenter += x_search_area_origin;
11487 0 : *yLevel2SearchCenter += y_search_area_origin;
11488 :
11489 0 : return;
11490 : }
11491 :
11492 0 : static void SelectBuffer(
11493 : uint32_t pu_index, //[IN]
11494 : uint8_t fracPosition, //[IN]
11495 : uint32_t pu_width, //[IN] Refrence picture list index
11496 : uint32_t pu_height, //[IN] Refrence picture index in the list
11497 : uint8_t *pos_Full, //[IN]
11498 : uint8_t *pos_b, //[IN]
11499 : uint8_t *pos_h, //[IN]
11500 : uint8_t *pos_j, //[IN]
11501 : uint32_t refHalfStride, //[IN]
11502 : uint32_t refBufferFullStride,
11503 : uint8_t **dst_ptr, //[OUT]
11504 : uint32_t *DstPtrStride) //[OUT]
11505 : {
11506 : (void)pu_width;
11507 : (void)pu_height;
11508 :
11509 0 : uint32_t puShiftXIndex = pu_search_index_map[pu_index][0];
11510 0 : uint32_t puShiftYIndex = pu_search_index_map[pu_index][1];
11511 0 : uint32_t ref_stride = refHalfStride;
11512 :
11513 : // for each one of the 8 positions, we need to determine the 2 buffers to do
11514 : // averaging
11515 0 : uint8_t *buf1 = pos_Full;
11516 :
11517 0 : switch (fracPosition) {
11518 0 : case 0: // integer
11519 0 : buf1 = pos_Full;
11520 0 : ref_stride = refBufferFullStride;
11521 0 : break;
11522 0 : case 2: // b
11523 0 : buf1 = pos_b;
11524 0 : break;
11525 0 : case 8: // h
11526 0 : buf1 = pos_h;
11527 0 : break;
11528 0 : case 10: // j
11529 0 : buf1 = pos_j;
11530 0 : break;
11531 0 : default: break;
11532 : }
11533 :
11534 0 : buf1 = buf1 + puShiftXIndex + puShiftYIndex * ref_stride;
11535 :
11536 0 : *dst_ptr = buf1;
11537 0 : *DstPtrStride = ref_stride;
11538 :
11539 0 : return;
11540 : }
11541 :
11542 0 : static void QuarterPelCompensation(
11543 : uint32_t pu_index, //[IN]
11544 : uint8_t fracPosition, //[IN]
11545 : uint32_t pu_width, //[IN] Refrence picture list index
11546 : uint32_t pu_height, //[IN] Refrence picture index in the list
11547 : uint8_t *pos_Full, //[IN]
11548 : uint8_t *pos_b, //[IN]
11549 : uint8_t *pos_h, //[IN]
11550 : uint8_t *pos_j, //[IN]
11551 : uint32_t refHalfStride, //[IN]
11552 : uint32_t refBufferFullStride,
11553 : uint8_t *Dst, //[IN]
11554 : uint32_t DstStride) { //[IN]
11555 :
11556 0 : uint32_t puShiftXIndex = pu_search_index_map[pu_index][0];
11557 0 : uint32_t puShiftYIndex = pu_search_index_map[pu_index][1];
11558 0 : uint32_t refStride1 = refHalfStride;
11559 0 : uint32_t refStride2 = refHalfStride;
11560 :
11561 : // for each one of the 8 positions, we need to determine the 2 buffers to do
11562 : // averaging
11563 0 : uint8_t *buf1 = pos_Full;
11564 0 : uint8_t *buf2 = pos_Full;
11565 :
11566 0 : switch (fracPosition) {
11567 0 : case 1: // a
11568 0 : buf1 = pos_Full;
11569 0 : buf2 = pos_b;
11570 0 : refStride1 = refBufferFullStride;
11571 0 : break;
11572 :
11573 0 : case 3: // c
11574 0 : buf1 = pos_b;
11575 0 : buf2 = pos_Full + 1;
11576 0 : refStride2 = refBufferFullStride;
11577 0 : break;
11578 :
11579 0 : case 4: // d
11580 0 : buf1 = pos_Full;
11581 0 : buf2 = pos_h;
11582 0 : refStride1 = refBufferFullStride;
11583 0 : break;
11584 :
11585 0 : case 5: // e
11586 0 : buf1 = pos_b;
11587 0 : buf2 = pos_h;
11588 0 : break;
11589 :
11590 0 : case 6: // f
11591 0 : buf1 = pos_b;
11592 0 : buf2 = pos_j;
11593 0 : break;
11594 :
11595 0 : case 7: // g
11596 0 : buf1 = pos_b;
11597 0 : buf2 = pos_h + 1;
11598 0 : break;
11599 :
11600 0 : case 9: // i
11601 0 : buf1 = pos_h;
11602 0 : buf2 = pos_j;
11603 0 : break;
11604 :
11605 0 : case 11: // k
11606 0 : buf1 = pos_j;
11607 0 : buf2 = pos_h + 1;
11608 0 : break;
11609 :
11610 0 : case 12: // L
11611 0 : buf1 = pos_h;
11612 0 : buf2 = pos_Full + refBufferFullStride;
11613 0 : refStride2 = refBufferFullStride;
11614 0 : break;
11615 :
11616 0 : case 13: // m
11617 0 : buf1 = pos_h;
11618 0 : buf2 = pos_b + refHalfStride;
11619 0 : break;
11620 :
11621 0 : case 14: // n
11622 0 : buf1 = pos_j;
11623 0 : buf2 = pos_b + refHalfStride;
11624 0 : break;
11625 0 : case 15: // 0
11626 0 : buf1 = pos_h + 1;
11627 0 : buf2 = pos_b + refHalfStride;
11628 0 : break;
11629 0 : default: break;
11630 : }
11631 :
11632 0 : buf1 = buf1 + puShiftXIndex + puShiftYIndex * refStride1;
11633 0 : buf2 = buf2 + puShiftXIndex + puShiftYIndex * refStride2;
11634 :
11635 0 : picture_average_kernel(buf1,
11636 : refStride1,
11637 : buf2,
11638 : refStride2,
11639 : Dst,
11640 : DstStride,
11641 : pu_width,
11642 : pu_height);
11643 :
11644 0 : return;
11645 : }
11646 :
11647 : // TODO: Alt-refs - change previous SelectBuffer and QuarterPelCompensation to
11648 : // be applicable for both chroma and luma
11649 0 : static void select_buffer(
11650 : uint32_t pu_index, //[IN]
11651 : EbBool chroma,
11652 : uint8_t fracPosition, //[IN]
11653 : uint32_t pu_width, //[IN] Refrence picture list index
11654 : uint32_t pu_height, //[IN] Refrence picture index in the list
11655 : uint8_t *pos_Full, //[IN]
11656 : uint8_t *pos_b, //[IN]
11657 : uint8_t *pos_h, //[IN]
11658 : uint8_t *pos_j, //[IN]
11659 : uint32_t refHalfStride, //[IN]
11660 : uint32_t refBufferFullStride,
11661 : uint8_t **dst_ptr, //[OUT]
11662 : uint32_t *DstPtrStride) //[OUT]
11663 : {
11664 : (void)pu_width;
11665 : (void)pu_height;
11666 :
11667 : uint32_t puShiftXIndex;
11668 : uint32_t puShiftYIndex;
11669 :
11670 0 : if (chroma == EB_TRUE) {
11671 0 : puShiftXIndex = (pu_search_index_map[pu_index][0]) >> 1;
11672 0 : puShiftYIndex = (pu_search_index_map[pu_index][1]) >> 1;
11673 : } else {
11674 0 : puShiftXIndex = pu_search_index_map[pu_index][0];
11675 0 : puShiftYIndex = pu_search_index_map[pu_index][1];
11676 : }
11677 :
11678 0 : uint32_t ref_stride = refHalfStride;
11679 :
11680 : // for each one of the 8 positions, we need to determine the 2 buffers to do
11681 : // averaging
11682 0 : uint8_t *buf1 = pos_Full;
11683 :
11684 0 : switch (fracPosition) {
11685 0 : case 0: // integer
11686 0 : buf1 = pos_Full;
11687 0 : ref_stride = refBufferFullStride;
11688 0 : break;
11689 0 : case 2: // b
11690 0 : buf1 = pos_b;
11691 0 : break;
11692 0 : case 8: // h
11693 0 : buf1 = pos_h;
11694 0 : break;
11695 0 : case 10: // j
11696 0 : buf1 = pos_j;
11697 0 : break;
11698 0 : default: break;
11699 : }
11700 :
11701 0 : buf1 = buf1 + puShiftXIndex + puShiftYIndex * ref_stride;
11702 :
11703 0 : *dst_ptr = buf1;
11704 0 : *DstPtrStride = ref_stride;
11705 :
11706 0 : return;
11707 : }
11708 :
11709 : // TODO: Alt-refs - change previous SelectBuffer and QuarterPelCompensation to
11710 : // be applicable for both chroma and luma
11711 0 : static void quarter_pel_compensation(
11712 : uint32_t pu_index, //[IN]
11713 : EbBool chroma,
11714 : uint8_t fracPosition, //[IN]
11715 : uint32_t pu_width, //[IN] Refrence picture list index
11716 : uint32_t pu_height, //[IN] Refrence picture index in the list
11717 : uint8_t *pos_Full, //[IN]
11718 : uint8_t *pos_b, //[IN]
11719 : uint8_t *pos_h, //[IN]
11720 : uint8_t *pos_j, //[IN]
11721 : uint32_t refHalfStride, //[IN]
11722 : uint32_t refBufferFullStride,
11723 : uint8_t *Dst, //[IN]
11724 : uint32_t DstStride) { //[IN]
11725 : uint32_t puShiftXIndex;
11726 : uint32_t puShiftYIndex;
11727 :
11728 0 : if (chroma == EB_TRUE) {
11729 0 : puShiftXIndex = (pu_search_index_map[pu_index][0]) >> 1;
11730 0 : puShiftYIndex = (pu_search_index_map[pu_index][1]) >> 1;
11731 : } else {
11732 0 : puShiftXIndex = pu_search_index_map[pu_index][0];
11733 0 : puShiftYIndex = pu_search_index_map[pu_index][1];
11734 : }
11735 :
11736 0 : uint32_t refStride1 = refHalfStride;
11737 0 : uint32_t refStride2 = refHalfStride;
11738 :
11739 : // for each one of the 8 positions, we need to determine the 2 buffers to do
11740 : // averaging
11741 0 : uint8_t *buf1 = pos_Full;
11742 0 : uint8_t *buf2 = pos_Full;
11743 :
11744 0 : switch (fracPosition) {
11745 0 : case 1: // a
11746 0 : buf1 = pos_Full;
11747 0 : buf2 = pos_b;
11748 0 : refStride1 = refBufferFullStride;
11749 0 : break;
11750 :
11751 0 : case 3: // c
11752 0 : buf1 = pos_b;
11753 0 : buf2 = pos_Full + 1;
11754 0 : refStride2 = refBufferFullStride;
11755 0 : break;
11756 :
11757 0 : case 4: // d
11758 0 : buf1 = pos_Full;
11759 0 : buf2 = pos_h;
11760 0 : refStride1 = refBufferFullStride;
11761 0 : break;
11762 :
11763 0 : case 5: // e
11764 0 : buf1 = pos_b;
11765 0 : buf2 = pos_h;
11766 0 : break;
11767 :
11768 0 : case 6: // f
11769 0 : buf1 = pos_b;
11770 0 : buf2 = pos_j;
11771 0 : break;
11772 :
11773 0 : case 7: // g
11774 0 : buf1 = pos_b;
11775 0 : buf2 = pos_h + 1;
11776 0 : break;
11777 :
11778 0 : case 9: // i
11779 0 : buf1 = pos_h;
11780 0 : buf2 = pos_j;
11781 0 : break;
11782 :
11783 0 : case 11: // k
11784 0 : buf1 = pos_j;
11785 0 : buf2 = pos_h + 1;
11786 0 : break;
11787 :
11788 0 : case 12: // L
11789 0 : buf1 = pos_h;
11790 0 : buf2 = pos_Full + refBufferFullStride;
11791 0 : refStride2 = refBufferFullStride;
11792 0 : break;
11793 :
11794 0 : case 13: // m
11795 0 : buf1 = pos_h;
11796 0 : buf2 = pos_b + refHalfStride;
11797 0 : break;
11798 :
11799 0 : case 14: // n
11800 0 : buf1 = pos_j;
11801 0 : buf2 = pos_b + refHalfStride;
11802 0 : break;
11803 0 : case 15: // 0
11804 0 : buf1 = pos_h + 1;
11805 0 : buf2 = pos_b + refHalfStride;
11806 0 : break;
11807 0 : default: break;
11808 : }
11809 :
11810 0 : buf1 = buf1 + puShiftXIndex + puShiftYIndex * refStride1;
11811 0 : buf2 = buf2 + puShiftXIndex + puShiftYIndex * refStride2;
11812 :
11813 0 : picture_average_kernel(buf1,
11814 : refStride1,
11815 : buf2,
11816 : refStride2,
11817 : Dst,
11818 : DstStride,
11819 : pu_width,
11820 : pu_height);
11821 :
11822 0 : return;
11823 : }
11824 :
11825 : /*******************************************************************************
11826 : * Requirement: pu_width = 8, 16, 24, 32, 48 or 64
11827 : * Requirement: pu_height % 2 = 0
11828 : * Requirement: skip = 0 or 1
11829 : * Requirement (x86 only): temp_buf % 16 = 0
11830 : * Requirement (x86 only): (dst->buffer_y + dstLumaIndex ) % 16 = 0 when
11831 : *pu_width %16 = 0 Requirement (x86 only): (dst->bufferCb + dstChromaIndex) % 16
11832 : *= 0 when pu_width %32 = 0 Requirement (x86 only): (dst->bufferCr +
11833 : *dstChromaIndex) % 16 = 0 when pu_width %32 = 0 Requirement (x86 only):
11834 : *dst->stride_y % 16 = 0 when pu_width %16 = 0 Requirement (x86 only):
11835 : *dst->chromaStride % 16 = 0 when pu_width %32 = 0
11836 : *******************************************************************************/
11837 0 : void uni_pred_averaging(uint32_t pu_index, EbBool chroma, uint8_t firstFracPos,
11838 : uint32_t pu_width, uint32_t pu_height,
11839 : uint8_t *firstRefInteger, uint8_t *firstRefPosB,
11840 : uint8_t *firstRefPosH, uint8_t *firstRefPosJ,
11841 : uint32_t refBufferStride,
11842 : uint32_t refBufferFullList0Stride,
11843 : uint8_t *firstRefTempDst, uint8_t **comp_blk_ptr,
11844 : uint32_t *comp_blk_ptr_stride)
11845 : {
11846 : // Buffer Selection and quater-pel compensation on the fly
11847 0 : if (sub_position_type[firstFracPos] != 2) {
11848 0 : select_buffer(pu_index,
11849 : chroma,
11850 : firstFracPos,
11851 : pu_width,
11852 : pu_height,
11853 : firstRefInteger,
11854 : firstRefPosB,
11855 : firstRefPosH,
11856 : firstRefPosJ,
11857 : refBufferStride,
11858 : refBufferFullList0Stride,
11859 : comp_blk_ptr,
11860 : comp_blk_ptr_stride);
11861 : } else {
11862 0 : quarter_pel_compensation(pu_index,
11863 : chroma,
11864 : firstFracPos,
11865 : pu_width,
11866 : pu_height,
11867 : firstRefInteger,
11868 : firstRefPosB,
11869 : firstRefPosH,
11870 : firstRefPosJ,
11871 : refBufferStride,
11872 : refBufferFullList0Stride,
11873 : firstRefTempDst,
11874 : BLOCK_SIZE_64);
11875 :
11876 0 : *comp_blk_ptr = firstRefTempDst;
11877 0 : *comp_blk_ptr_stride = BLOCK_SIZE_64;
11878 : }
11879 0 : }
11880 :
11881 : /*******************************************************************************
11882 : * Requirement: pu_width = 8, 16, 24, 32, 48 or 64
11883 : * Requirement: pu_height % 2 = 0
11884 : * Requirement: skip = 0 or 1
11885 : * Requirement (x86 only): temp_buf % 16 = 0
11886 : * Requirement (x86 only): (dst->buffer_y + dst_luma_index ) % 16 = 0 when
11887 : *pu_width %16 = 0 Requirement (x86 only): (dst->buffer_cb + dst_chroma_index) %
11888 : *16 = 0 when pu_width %32 = 0 Requirement (x86 only): (dst->buffer_cr +
11889 : *dst_chroma_index) % 16 = 0 when pu_width %32 = 0 Requirement (x86 only):
11890 : *dst->stride_y % 16 = 0 when pu_width %16 = 0 Requirement (x86 only):
11891 : *dst->chromaStride % 16 = 0 when pu_width %32 = 0
11892 : *******************************************************************************/
11893 0 : uint32_t BiPredAverging(
11894 : MeContext *context_ptr, MePredUnit *me_candidate, uint32_t pu_index,
11895 : uint8_t *sourcePic, uint32_t lumaStride, uint8_t firstFracPos,
11896 : uint8_t secondFracPos, uint32_t pu_width, uint32_t pu_height,
11897 : uint8_t *firstRefInteger, uint8_t *firstRefPosB, uint8_t *firstRefPosH,
11898 : uint8_t *firstRefPosJ, uint8_t *secondRefInteger, uint8_t *secondRefPosB,
11899 : uint8_t *secondRefPosH, uint8_t *secondRefPosJ, uint32_t refBufferStride,
11900 : uint32_t refBufferFullList0Stride, uint32_t refBufferFullList1Stride,
11901 : uint8_t *firstRefTempDst, uint8_t *secondRefTempDst)
11902 : {
11903 : uint8_t *ptrList0, *ptrList1;
11904 : uint32_t ptrList0Stride, ptrList1Stride;
11905 :
11906 : // Buffer Selection and quater-pel compensation on the fly
11907 0 : if (sub_position_type[firstFracPos] != 2) {
11908 0 : SelectBuffer(pu_index,
11909 : firstFracPos,
11910 : pu_width,
11911 : pu_height,
11912 : firstRefInteger,
11913 : firstRefPosB,
11914 : firstRefPosH,
11915 : firstRefPosJ,
11916 : refBufferStride,
11917 : refBufferFullList0Stride,
11918 : &ptrList0,
11919 : &ptrList0Stride);
11920 : } else {
11921 0 : QuarterPelCompensation(pu_index,
11922 : firstFracPos,
11923 : pu_width,
11924 : pu_height,
11925 : firstRefInteger,
11926 : firstRefPosB,
11927 : firstRefPosH,
11928 : firstRefPosJ,
11929 : refBufferStride,
11930 : refBufferFullList0Stride,
11931 : firstRefTempDst,
11932 : BLOCK_SIZE_64);
11933 :
11934 0 : ptrList0 = firstRefTempDst;
11935 0 : ptrList0Stride = BLOCK_SIZE_64;
11936 : }
11937 :
11938 0 : if (sub_position_type[secondFracPos] != 2) {
11939 0 : SelectBuffer(pu_index,
11940 : secondFracPos,
11941 : pu_width,
11942 : pu_height,
11943 : secondRefInteger,
11944 : secondRefPosB,
11945 : secondRefPosH,
11946 : secondRefPosJ,
11947 : refBufferStride,
11948 : refBufferFullList1Stride,
11949 : &ptrList1,
11950 : &ptrList1Stride);
11951 : } else {
11952 : // uni-prediction List1 luma
11953 : // doing the luma interpolation
11954 0 : QuarterPelCompensation(pu_index,
11955 : secondFracPos,
11956 : pu_width,
11957 : pu_height,
11958 : secondRefInteger,
11959 : secondRefPosB,
11960 : secondRefPosH,
11961 : secondRefPosJ,
11962 : refBufferStride,
11963 : refBufferFullList1Stride,
11964 : secondRefTempDst,
11965 : BLOCK_SIZE_64);
11966 :
11967 0 : ptrList1 = secondRefTempDst;
11968 0 : ptrList1Stride = BLOCK_SIZE_64;
11969 : }
11970 :
11971 : // bi-pred luma
11972 0 : me_candidate->distortion =
11973 0 : (context_ptr->fractional_search_method == SUB_SAD_SEARCH)
11974 0 : ? nxm_sad_avg_kernel(
11975 : sourcePic,
11976 : lumaStride << 1,
11977 : ptrList0,
11978 : ptrList0Stride << 1,
11979 : ptrList1,
11980 : ptrList1Stride << 1,
11981 : pu_height >> 1,
11982 : pu_width)
11983 : << 1
11984 0 : : nxm_sad_avg_kernel(
11985 : sourcePic,
11986 : lumaStride,
11987 : ptrList0,
11988 : ptrList0Stride,
11989 : ptrList1,
11990 : ptrList1Stride,
11991 : pu_height,
11992 : pu_width);
11993 :
11994 0 : return me_candidate->distortion;
11995 : }
11996 :
11997 : /*******************************************
11998 : * BiPredictionComponsation
11999 : * performs componsation fro List 0 and
12000 : * List1 Candidates and then compute the
12001 : * average
12002 : *******************************************/
12003 0 : EbErrorType BiPredictionCompensation(MeContext *context_ptr, uint32_t pu_index,
12004 : MePredUnit *me_candidate,
12005 : uint32_t firstList,
12006 : uint8_t first_list_ref_pic_idx,
12007 : uint32_t firstRefMv, uint32_t secondList,
12008 : uint8_t second_list_ref_pic_idx,
12009 : uint32_t secondRefMv)
12010 : {
12011 0 : EbErrorType return_error = EB_ErrorNone;
12012 :
12013 : int16_t firstRefPosX;
12014 : int16_t firstRefPosY;
12015 : int16_t firstRefIntegPosx;
12016 : int16_t firstRefIntegPosy;
12017 : uint8_t firstRefFracPosx;
12018 : uint8_t firstRefFracPosy;
12019 : uint8_t firstRefFracPos;
12020 : int32_t xfirstSearchIndex;
12021 : int32_t yfirstSearchIndex;
12022 : int32_t firstSearchRegionIndexPosInteg;
12023 : int32_t firstSearchRegionIndexPosb;
12024 : int32_t firstSearchRegionIndexPosh;
12025 : int32_t firstSearchRegionIndexPosj;
12026 :
12027 : int16_t secondRefPosX;
12028 : int16_t secondRefPosY;
12029 : int16_t secondRefIntegPosx;
12030 : int16_t secondRefIntegPosy;
12031 : uint8_t secondRefFracPosx;
12032 : uint8_t secondRefFracPosy;
12033 : uint8_t secondRefFracPos;
12034 : int32_t xsecondSearchIndex;
12035 : int32_t ysecondSearchIndex;
12036 : int32_t secondSearchRegionIndexPosInteg;
12037 : int32_t secondSearchRegionIndexPosb;
12038 : int32_t secondSearchRegionIndexPosh;
12039 : int32_t secondSearchRegionIndexPosj;
12040 :
12041 0 : uint32_t puShiftXIndex = pu_search_index_map[pu_index][0];
12042 0 : uint32_t puShiftYIndex = pu_search_index_map[pu_index][1];
12043 :
12044 0 : const uint32_t puLcuBufferIndex =
12045 0 : puShiftXIndex + puShiftYIndex * context_ptr->sb_src_stride;
12046 :
12047 0 : me_candidate->prediction_direction = BI_PRED;
12048 :
12049 : // First refrence
12050 : // Set Candidate information
12051 0 : firstRefPosX = _MVXT(firstRefMv);
12052 0 : firstRefPosY = _MVYT(firstRefMv);
12053 0 : me_candidate->ref_index[0] = (uint8_t)first_list_ref_pic_idx;
12054 0 : me_candidate->ref0_list = (uint8_t)firstList;
12055 :
12056 0 : firstRefIntegPosx = (firstRefPosX >> 2);
12057 0 : firstRefIntegPosy = (firstRefPosY >> 2);
12058 0 : firstRefFracPosx = (uint8_t)firstRefPosX & 0x03;
12059 0 : firstRefFracPosy = (uint8_t)firstRefPosY & 0x03;
12060 :
12061 0 : firstRefFracPos = (uint8_t)(firstRefFracPosx + (firstRefFracPosy << 2));
12062 0 : xfirstSearchIndex =
12063 0 : (int32_t)firstRefIntegPosx -
12064 0 : context_ptr->x_search_area_origin[firstList][first_list_ref_pic_idx];
12065 0 : yfirstSearchIndex =
12066 0 : (int32_t)firstRefIntegPosy -
12067 0 : context_ptr->y_search_area_origin[firstList][first_list_ref_pic_idx];
12068 0 : firstSearchRegionIndexPosInteg =
12069 0 : (int32_t)(xfirstSearchIndex + (ME_FILTER_TAP >> 1)) +
12070 0 : (int32_t)context_ptr
12071 0 : ->interpolated_full_stride[firstList][first_list_ref_pic_idx] *
12072 0 : (int32_t)(yfirstSearchIndex + (ME_FILTER_TAP >> 1));
12073 :
12074 0 : firstSearchRegionIndexPosb =
12075 0 : (int32_t)(xfirstSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
12076 0 : (int32_t)context_ptr->interpolated_stride *
12077 0 : (int32_t)(yfirstSearchIndex + (ME_FILTER_TAP >> 1));
12078 0 : firstSearchRegionIndexPosh =
12079 0 : (int32_t)(xfirstSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
12080 0 : (int32_t)context_ptr->interpolated_stride *
12081 0 : (int32_t)(yfirstSearchIndex + (ME_FILTER_TAP >> 1) - 1);
12082 0 : firstSearchRegionIndexPosj =
12083 0 : (int32_t)(xfirstSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
12084 0 : (int32_t)context_ptr->interpolated_stride *
12085 0 : (int32_t)(yfirstSearchIndex + (ME_FILTER_TAP >> 1) - 1);
12086 :
12087 : // Second refrence
12088 :
12089 : // Set Candidate information
12090 0 : secondRefPosX = _MVXT(secondRefMv);
12091 0 : secondRefPosY = _MVYT(secondRefMv);
12092 0 : me_candidate->ref_index[1] = (uint8_t)second_list_ref_pic_idx;
12093 0 : me_candidate->ref1_list = (uint8_t)secondList;
12094 0 : secondRefIntegPosx = (secondRefPosX >> 2);
12095 0 : secondRefIntegPosy = (secondRefPosY >> 2);
12096 0 : secondRefFracPosx = (uint8_t)secondRefPosX & 0x03;
12097 0 : secondRefFracPosy = (uint8_t)secondRefPosY & 0x03;
12098 :
12099 0 : secondRefFracPos = (uint8_t)(secondRefFracPosx + (secondRefFracPosy << 2));
12100 0 : xsecondSearchIndex =
12101 0 : secondRefIntegPosx -
12102 0 : context_ptr->x_search_area_origin[secondList][second_list_ref_pic_idx];
12103 0 : ysecondSearchIndex =
12104 0 : secondRefIntegPosy -
12105 0 : context_ptr->y_search_area_origin[secondList][second_list_ref_pic_idx];
12106 0 : secondSearchRegionIndexPosInteg =
12107 0 : (int32_t)(xsecondSearchIndex + (ME_FILTER_TAP >> 1)) +
12108 0 : (int32_t)
12109 : context_ptr->interpolated_full_stride[secondList]
12110 0 : [second_list_ref_pic_idx] *
12111 0 : (int32_t)(ysecondSearchIndex + (ME_FILTER_TAP >> 1));
12112 0 : secondSearchRegionIndexPosb =
12113 0 : (int32_t)(xsecondSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
12114 0 : (int32_t)context_ptr->interpolated_stride *
12115 0 : (int32_t)(ysecondSearchIndex + (ME_FILTER_TAP >> 1));
12116 0 : secondSearchRegionIndexPosh =
12117 0 : (int32_t)(xsecondSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
12118 0 : (int32_t)context_ptr->interpolated_stride *
12119 0 : (int32_t)(ysecondSearchIndex + (ME_FILTER_TAP >> 1) - 1);
12120 0 : secondSearchRegionIndexPosj =
12121 0 : (int32_t)(xsecondSearchIndex + (ME_FILTER_TAP >> 1) - 1) +
12122 0 : (int32_t)context_ptr->interpolated_stride *
12123 0 : (int32_t)(ysecondSearchIndex + (ME_FILTER_TAP >> 1) - 1);
12124 :
12125 : uint32_t nIndex;
12126 :
12127 0 : if (pu_index > 200)
12128 0 : nIndex = pu_index;
12129 0 : else if (pu_index > 184)
12130 0 : nIndex = tab8x32[pu_index - 185] + 185;
12131 0 : else if (pu_index > 168)
12132 0 : nIndex = tab32x8[pu_index - 169] + 169;
12133 0 : else if (pu_index > 136)
12134 0 : nIndex = tab8x16[pu_index - 137] + 137;
12135 0 : else if (pu_index > 128)
12136 0 : nIndex = tab16x32[pu_index - 129] + 129;
12137 0 : else if (pu_index > 126)
12138 0 : nIndex = pu_index;
12139 0 : else if (pu_index > 94)
12140 0 : nIndex = tab16x8[pu_index - 95] + 95;
12141 0 : else if (pu_index > 86)
12142 0 : nIndex = tab32x16[pu_index - 87] + 87;
12143 0 : else if (pu_index > 84)
12144 0 : nIndex = pu_index;
12145 0 : else if (pu_index > 20)
12146 0 : nIndex = tab8x8[pu_index - 21] + 21;
12147 0 : else if (pu_index > 4)
12148 0 : nIndex = tab16x16[pu_index - 5] + 5;
12149 : else
12150 0 : nIndex = pu_index;
12151 0 : context_ptr->p_sb_bipred_sad[nIndex] =
12152 :
12153 0 : BiPredAverging(
12154 : context_ptr,
12155 : me_candidate,
12156 : pu_index,
12157 0 : &(context_ptr->sb_src_ptr[puLcuBufferIndex]),
12158 : context_ptr->sb_src_stride,
12159 : firstRefFracPos,
12160 : secondRefFracPos,
12161 : partition_width[pu_index],
12162 : partition_height[pu_index],
12163 0 : &(context_ptr->integer_buffer_ptr[firstList][first_list_ref_pic_idx]
12164 : [firstSearchRegionIndexPosInteg]),
12165 0 : &(context_ptr->pos_b_buffer[firstList][first_list_ref_pic_idx]
12166 : [firstSearchRegionIndexPosb]),
12167 0 : &(context_ptr->pos_h_buffer[firstList][first_list_ref_pic_idx]
12168 : [firstSearchRegionIndexPosh]),
12169 0 : &(context_ptr->pos_j_buffer[firstList][first_list_ref_pic_idx]
12170 : [firstSearchRegionIndexPosj]),
12171 : &(context_ptr
12172 0 : ->integer_buffer_ptr[secondList][second_list_ref_pic_idx]
12173 : [secondSearchRegionIndexPosInteg]),
12174 0 : &(context_ptr->pos_b_buffer[secondList][second_list_ref_pic_idx]
12175 : [secondSearchRegionIndexPosb]),
12176 0 : &(context_ptr->pos_h_buffer[secondList][second_list_ref_pic_idx]
12177 : [secondSearchRegionIndexPosh]),
12178 0 : &(context_ptr->pos_j_buffer[secondList][second_list_ref_pic_idx]
12179 : [secondSearchRegionIndexPosj]),
12180 : context_ptr->interpolated_stride,
12181 : context_ptr
12182 : ->interpolated_full_stride[firstList][first_list_ref_pic_idx],
12183 : context_ptr
12184 : ->interpolated_full_stride[secondList][second_list_ref_pic_idx],
12185 : &(context_ptr->one_d_intermediate_results_buf0[0]),
12186 : &(context_ptr->one_d_intermediate_results_buf1[0]));
12187 :
12188 0 : return return_error;
12189 : }
12190 :
12191 0 : uint8_t skip_bi_pred(
12192 : PictureParentControlSet *picture_control_set_ptr,
12193 : uint8_t ref_type,
12194 : uint8_t ref_type_table[7]) {
12195 :
12196 0 : if (!picture_control_set_ptr->prune_unipred_at_me)
12197 0 : return 1;
12198 :
12199 0 : uint8_t allow_cand = 0;
12200 : uint8_t ref_idx;
12201 0 : for (ref_idx = 0; ref_idx < PRUNE_REF_ME_TH; ref_idx++) {
12202 0 : if (ref_type == ref_type_table[ref_idx])
12203 0 : allow_cand = 1;
12204 : }
12205 0 : return allow_cand;
12206 : }
12207 :
12208 : /*******************************************
12209 : * BiPredictionSearch
12210 : * performs Bi-Prediction Search (LCU)
12211 : *******************************************/
12212 : // This function enables all 16 Bipred candidates when MRP is ON
12213 0 : EbErrorType BiPredictionSearch(
12214 : SequenceControlSet *sequence_control_set_ptr,
12215 : MeContext *context_ptr, uint32_t pu_index, uint8_t candidateIndex,
12216 : uint32_t activeRefPicFirstLisNum, uint32_t activeRefPicSecondLisNum,
12217 : uint8_t *total_me_candidate_index,
12218 : uint8_t ref_type_table[7],
12219 : PictureParentControlSet *picture_control_set_ptr) {
12220 0 : EbErrorType return_error = EB_ErrorNone;
12221 :
12222 : uint32_t firstListRefPictdx;
12223 : uint32_t secondListRefPictdx;
12224 :
12225 : (void)picture_control_set_ptr;
12226 :
12227 : uint32_t nIndex;
12228 :
12229 0 : if (pu_index > 200)
12230 0 : nIndex = pu_index;
12231 0 : else if (pu_index > 184)
12232 0 : nIndex = tab8x32[pu_index - 185] + 185;
12233 0 : else if (pu_index > 168)
12234 0 : nIndex = tab32x8[pu_index - 169] + 169;
12235 0 : else if (pu_index > 136)
12236 0 : nIndex = tab8x16[pu_index - 137] + 137;
12237 0 : else if (pu_index > 128)
12238 0 : nIndex = tab16x32[pu_index - 129] + 129;
12239 0 : else if (pu_index > 126)
12240 0 : nIndex = pu_index;
12241 0 : else if (pu_index > 94)
12242 0 : nIndex = tab16x8[pu_index - 95] + 95;
12243 0 : else if (pu_index > 86)
12244 0 : nIndex = tab32x16[pu_index - 87] + 87;
12245 0 : else if (pu_index > 84)
12246 0 : nIndex = pu_index;
12247 0 : else if (pu_index > 20)
12248 0 : nIndex = tab8x8[pu_index - 21] + 21;
12249 0 : else if (pu_index > 4)
12250 0 : nIndex = tab16x16[pu_index - 5] + 5;
12251 : else
12252 0 : nIndex = pu_index;
12253 : // NM: Inter list bipred.
12254 : //(LAST,BWD) , (LAST,ALT) and (LAST,ALT2)
12255 : //(LAST2,BWD), (LAST2,ALT) and (LAST2,ALT2)
12256 : //(LAST3,BWD), (LAST3,ALT) and (LAST3,ALT2)
12257 : //(GOLD,BWD) , (GOLD,ALT) and (GOLD,ALT2)
12258 0 : for (firstListRefPictdx = 0; firstListRefPictdx < activeRefPicFirstLisNum;
12259 0 : firstListRefPictdx++) {
12260 0 : for (secondListRefPictdx = 0;
12261 : secondListRefPictdx < activeRefPicSecondLisNum;
12262 0 : secondListRefPictdx++) {
12263 : {
12264 0 : uint8_t to_inject_ref_type_0 = svt_get_ref_frame_type(REF_LIST_0, firstListRefPictdx);
12265 0 : uint8_t to_inject_ref_type_1 = svt_get_ref_frame_type(REF_LIST_1, secondListRefPictdx);
12266 0 : uint8_t add_bi = skip_bi_pred(
12267 : picture_control_set_ptr,
12268 : to_inject_ref_type_0,
12269 : ref_type_table);
12270 0 : add_bi += skip_bi_pred(
12271 : picture_control_set_ptr,
12272 : to_inject_ref_type_1,
12273 : ref_type_table);
12274 :
12275 0 : if (add_bi) {
12276 0 : BiPredictionCompensation(
12277 : context_ptr,
12278 : pu_index,
12279 0 : &(context_ptr->me_candidate[candidateIndex].pu[pu_index]),
12280 : REFERENCE_PIC_LIST_0,
12281 : firstListRefPictdx,
12282 : context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_0]
12283 : [firstListRefPictdx][nIndex],
12284 : REFERENCE_PIC_LIST_1,
12285 : secondListRefPictdx,
12286 : context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_1]
12287 : [secondListRefPictdx][nIndex]);
12288 :
12289 0 : candidateIndex++;
12290 : }
12291 : }
12292 : }
12293 : }
12294 :
12295 0 : if (sequence_control_set_ptr->mrp_mode == 0)
12296 : {
12297 : // NM: Within list 0 bipred: (LAST,LAST2) (LAST,LAST3) (LAST,GOLD)
12298 0 : for (firstListRefPictdx = 1;
12299 : firstListRefPictdx < activeRefPicFirstLisNum;
12300 0 : firstListRefPictdx++) {
12301 0 : uint8_t to_inject_ref_type_0 = svt_get_ref_frame_type(REF_LIST_0, firstListRefPictdx);
12302 0 : uint8_t add_bi = skip_bi_pred(
12303 : picture_control_set_ptr,
12304 : to_inject_ref_type_0,
12305 : ref_type_table);
12306 0 : if (add_bi) {
12307 0 : BiPredictionCompensation(
12308 : context_ptr,
12309 : pu_index,
12310 0 : &(context_ptr->me_candidate[candidateIndex].pu[pu_index]),
12311 : REFERENCE_PIC_LIST_0,
12312 : 0,
12313 : context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_0][0][nIndex],
12314 : REFERENCE_PIC_LIST_0,
12315 : firstListRefPictdx,
12316 : context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_0]
12317 : [firstListRefPictdx][nIndex]);
12318 :
12319 0 : candidateIndex++;
12320 : }
12321 : }
12322 : // NM: Within list 1 bipred: (BWD, ALT)
12323 0 : for (secondListRefPictdx = 1;
12324 0 : secondListRefPictdx < MIN(activeRefPicSecondLisNum, 1);
12325 0 : secondListRefPictdx++) {
12326 0 : uint8_t to_inject_ref_type_0 = svt_get_ref_frame_type(REF_LIST_0, firstListRefPictdx);
12327 0 : uint8_t add_bi = skip_bi_pred(
12328 : picture_control_set_ptr,
12329 : to_inject_ref_type_0,
12330 : ref_type_table);
12331 0 : if (add_bi) {
12332 0 : BiPredictionCompensation(
12333 : context_ptr,
12334 : pu_index,
12335 0 : &(context_ptr->me_candidate[candidateIndex].pu[pu_index]),
12336 : REFERENCE_PIC_LIST_1,
12337 : 0,
12338 : context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_1][0][nIndex],
12339 : REFERENCE_PIC_LIST_1,
12340 : secondListRefPictdx,
12341 : context_ptr->p_sb_best_mv[REFERENCE_PIC_LIST_1]
12342 : [secondListRefPictdx][nIndex]);
12343 :
12344 0 : candidateIndex++;
12345 : }
12346 : }
12347 : }
12348 0 : *total_me_candidate_index = candidateIndex;
12349 :
12350 0 : return return_error;
12351 : }
12352 :
12353 : // Nader - to be replaced by loock-up table
12354 : /*******************************************
12355 : * get_me_info_index
12356 : * search the correct index of the motion
12357 : * info that corresponds to the input
12358 : * md candidate
12359 : *******************************************/
12360 0 : uint32_t get_me_info_index(uint32_t max_me_block, const BlockGeom *blk_geom,
12361 : uint32_t geom_offset_x, uint32_t geom_offset_y) {
12362 : // search for motion info
12363 : uint32_t block_index;
12364 0 : uint32_t me_info_index = 0xFFFFFFF;
12365 :
12366 0 : for (block_index = 0; block_index < max_me_block; block_index++) {
12367 0 : if ((blk_geom->bwidth == partition_width[block_index]) &&
12368 0 : (blk_geom->bheight == partition_height[block_index]) &&
12369 0 : ((blk_geom->origin_x - geom_offset_x) ==
12370 0 : pu_search_index_map[block_index][0]) &&
12371 0 : ((blk_geom->origin_y - geom_offset_y) ==
12372 0 : pu_search_index_map[block_index][1])) {
12373 0 : me_info_index = block_index;
12374 0 : break;
12375 : }
12376 : }
12377 0 : return me_info_index;
12378 : }
12379 :
12380 : // Nader - to be replaced by loock-up table
12381 : /*******************************************
12382 : * get_me_info_index
12383 : * search the correct index of the motion
12384 : * info that corresponds to the input
12385 : * md candidate
12386 : *******************************************/
12387 0 : uint32_t get_in_loop_me_info_index(uint32_t max_me_block, uint8_t is_128_sb,
12388 : const BlockGeom *blk_geom) {
12389 : // search for motion info
12390 : uint32_t block_index;
12391 0 : uint32_t me_info_index = 0xFFFFFFF;
12392 0 : if (is_128_sb) {
12393 0 : for (block_index = 0; block_index < max_me_block; block_index++) {
12394 0 : if (blk_geom->bwidth ==
12395 0 : in_loop_me_block_width_128_sb[block_index] &&
12396 0 : blk_geom->bheight ==
12397 0 : in_loop_me_block_height_128_sb[block_index] &&
12398 0 : blk_geom->origin_x ==
12399 0 : in_loop_me_block_index_128_sb[block_index][0] &&
12400 0 : blk_geom->origin_y ==
12401 0 : in_loop_me_block_index_128_sb[block_index][1]) {
12402 0 : me_info_index = block_index;
12403 0 : break;
12404 : }
12405 : }
12406 : } else {
12407 0 : for (block_index = 0; block_index < max_me_block; block_index++) {
12408 0 : if (blk_geom->bwidth == in_loop_me_block_width[block_index] &&
12409 0 : blk_geom->bheight == in_loop_me_block_height[block_index] &&
12410 0 : blk_geom->origin_x == in_loop_me_block_index[block_index][0] &&
12411 0 : blk_geom->origin_y == in_loop_me_block_index[block_index][1]) {
12412 0 : me_info_index = block_index;
12413 0 : break;
12414 : }
12415 : }
12416 : }
12417 :
12418 0 : return me_info_index;
12419 : }
12420 :
12421 : #define NSET_CAND(mePuResult, num, dist, dir) \
12422 : (mePuResult)->distortion_direction[(num)].distortion = (dist); \
12423 : (mePuResult)->distortion_direction[(num)].direction = (dir);
12424 :
12425 0 : int8_t sort_3_elements(uint32_t a, uint32_t b, uint32_t c) {
12426 0 : uint8_t sortCode = 0;
12427 0 : if (a <= b && a <= c) {
12428 0 : if (b <= c)
12429 0 : sortCode = a_b_c;
12430 : else
12431 0 : sortCode = a_c_b;
12432 0 : } else if (b <= a && b <= c) {
12433 0 : if (a <= c)
12434 0 : sortCode = b_a_c;
12435 : else
12436 0 : sortCode = b_c_a;
12437 0 : } else if (a <= b)
12438 0 : sortCode = c_a_b;
12439 : else
12440 0 : sortCode = c_b_a;
12441 0 : return sortCode;
12442 : }
12443 :
12444 0 : EbErrorType CheckZeroZeroCenter(EbPictureBufferDesc *refPicPtr,
12445 : MeContext *context_ptr, uint32_t sb_origin_x,
12446 : uint32_t sb_origin_y, uint32_t sb_width,
12447 : uint32_t sb_height, int16_t *x_search_center,
12448 : int16_t *y_search_center)
12449 :
12450 : {
12451 0 : EbErrorType return_error = EB_ErrorNone;
12452 : uint32_t searchRegionIndex, zeroMvSad, hmeMvSad, hmeMvdRate;
12453 : uint64_t hmeMvCost, zeroMvCost, searchCenterCost;
12454 0 : int16_t origin_x = (int16_t)sb_origin_x;
12455 0 : int16_t origin_y = (int16_t)sb_origin_y;
12456 0 : uint32_t subsampleSad = 1;
12457 0 : int16_t pad_width = (int16_t)BLOCK_SIZE_64 - 1;
12458 0 : int16_t pad_height = (int16_t)BLOCK_SIZE_64 - 1;
12459 :
12460 0 : searchRegionIndex =
12461 0 : (int16_t)refPicPtr->origin_x + origin_x +
12462 0 : ((int16_t)refPicPtr->origin_y + origin_y) * refPicPtr->stride_y;
12463 :
12464 0 : zeroMvSad = nxm_sad_kernel(
12465 0 : context_ptr->sb_src_ptr,
12466 0 : context_ptr->sb_src_stride << subsampleSad,
12467 0 : &(refPicPtr->buffer_y[searchRegionIndex]),
12468 0 : refPicPtr->stride_y << subsampleSad,
12469 : sb_height >> subsampleSad,
12470 : sb_width);
12471 :
12472 0 : zeroMvSad = zeroMvSad << subsampleSad;
12473 :
12474 : // FIX
12475 : // Correct the left edge of the Search Area if it is not on the reference
12476 : // Picture
12477 0 : *x_search_center = ((origin_x + *x_search_center) < -pad_width)
12478 0 : ? -pad_width - origin_x
12479 : : *x_search_center;
12480 : // Correct the right edge of the Search Area if its not on the reference
12481 : // Picture
12482 0 : *x_search_center =
12483 0 : ((origin_x + *x_search_center) > (int16_t)refPicPtr->width - 1)
12484 0 : ? *x_search_center - ((origin_x + *x_search_center) -
12485 0 : ((int16_t)refPicPtr->width - 1))
12486 : : *x_search_center;
12487 : // Correct the top edge of the Search Area if it is not on the reference
12488 : // Picture
12489 0 : *y_search_center = ((origin_y + *y_search_center) < -pad_height)
12490 0 : ? -pad_height - origin_y
12491 : : *y_search_center;
12492 : // Correct the bottom edge of the Search Area if its not on the reference
12493 : // Picture
12494 0 : *y_search_center =
12495 0 : ((origin_y + *y_search_center) > (int16_t)refPicPtr->height - 1)
12496 0 : ? *y_search_center - ((origin_y + *y_search_center) -
12497 0 : ((int16_t)refPicPtr->height - 1))
12498 : : *y_search_center;
12499 : ///
12500 :
12501 0 : zeroMvCost = zeroMvSad << COST_PRECISION;
12502 0 : searchRegionIndex =
12503 0 : (int16_t)(refPicPtr->origin_x + origin_x) + *x_search_center +
12504 0 : ((int16_t)(refPicPtr->origin_y + origin_y) + *y_search_center) *
12505 0 : refPicPtr->stride_y;
12506 :
12507 0 : hmeMvSad = nxm_sad_kernel(
12508 0 : context_ptr->sb_src_ptr,
12509 0 : context_ptr->sb_src_stride << subsampleSad,
12510 0 : &(refPicPtr->buffer_y[searchRegionIndex]),
12511 0 : refPicPtr->stride_y << subsampleSad,
12512 : sb_height >> subsampleSad,
12513 : sb_width);
12514 :
12515 0 : hmeMvSad = hmeMvSad << subsampleSad;
12516 :
12517 0 : hmeMvdRate = 0;
12518 : // AMIR use AV1 rate estimation functions
12519 : // MeGetMvdFractionBits(
12520 : // ABS(*x_search_center << 2),
12521 : // ABS(*y_search_center << 2),
12522 : // context_ptr->mvd_bits_array,
12523 : // &hmeMvdRate);
12524 :
12525 0 : hmeMvCost = (hmeMvSad << COST_PRECISION) +
12526 0 : (((context_ptr->lambda * hmeMvdRate) + MD_OFFSET) >> MD_SHIFT);
12527 0 : searchCenterCost = MIN(zeroMvCost, hmeMvCost);
12528 :
12529 0 : *x_search_center = (searchCenterCost == zeroMvCost) ? 0 : *x_search_center;
12530 0 : *y_search_center = (searchCenterCost == zeroMvCost) ? 0 : *y_search_center;
12531 :
12532 0 : return return_error;
12533 : }
12534 :
12535 0 : EbErrorType suPelEnable(MeContext *context_ptr,
12536 : PictureParentControlSet *picture_control_set_ptr,
12537 : uint32_t listIndex, uint32_t refPicIndex,
12538 : EbBool *enableHalfPel32x32, EbBool *enableHalfPel16x16,
12539 : EbBool *enableHalfPel8x8) {
12540 0 : EbErrorType return_error = EB_ErrorNone;
12541 :
12542 0 : uint32_t mvMag32x32 = 0;
12543 0 : uint32_t mvMag16x16 = 0;
12544 0 : uint32_t mvMag8x8 = 0;
12545 0 : uint32_t avgSad32x32 = 0;
12546 0 : uint32_t avgSad16x16 = 0;
12547 0 : uint32_t avgSad8x8 = 0;
12548 0 : uint32_t avgMvx32x32 = 0;
12549 0 : uint32_t avgMvy32x32 = 0;
12550 0 : uint32_t avgMvx16x16 = 0;
12551 0 : uint32_t avgMvy16x16 = 0;
12552 0 : uint32_t avgMvx8x8 = 0;
12553 0 : uint32_t avgMvy8x8 = 0;
12554 :
12555 0 : avgMvx32x32 = (_MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12556 0 : [ME_TIER_ZERO_PU_32x32_0]) +
12557 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12558 0 : [ME_TIER_ZERO_PU_32x32_1]) +
12559 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12560 0 : [ME_TIER_ZERO_PU_32x32_2]) +
12561 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12562 0 : [ME_TIER_ZERO_PU_32x32_3])) >>
12563 : 2;
12564 0 : avgMvy32x32 = (_MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12565 0 : [ME_TIER_ZERO_PU_32x32_0]) +
12566 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12567 0 : [ME_TIER_ZERO_PU_32x32_1]) +
12568 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12569 0 : [ME_TIER_ZERO_PU_32x32_2]) +
12570 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12571 0 : [ME_TIER_ZERO_PU_32x32_3])) >>
12572 : 2;
12573 0 : mvMag32x32 = SQR(avgMvx32x32) + SQR(avgMvy32x32);
12574 :
12575 0 : avgMvx16x16 =
12576 0 : (_MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12577 0 : [ME_TIER_ZERO_PU_16x16_0]) +
12578 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12579 0 : [ME_TIER_ZERO_PU_16x16_1]) +
12580 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12581 0 : [ME_TIER_ZERO_PU_16x16_2]) +
12582 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12583 0 : [ME_TIER_ZERO_PU_16x16_3]) +
12584 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12585 0 : [ME_TIER_ZERO_PU_16x16_4]) +
12586 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12587 0 : [ME_TIER_ZERO_PU_16x16_5]) +
12588 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12589 0 : [ME_TIER_ZERO_PU_16x16_6]) +
12590 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12591 0 : [ME_TIER_ZERO_PU_16x16_7]) +
12592 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12593 0 : [ME_TIER_ZERO_PU_16x16_8]) +
12594 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12595 0 : [ME_TIER_ZERO_PU_16x16_9]) +
12596 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12597 0 : [ME_TIER_ZERO_PU_16x16_10]) +
12598 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12599 0 : [ME_TIER_ZERO_PU_16x16_11]) +
12600 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12601 0 : [ME_TIER_ZERO_PU_16x16_12]) +
12602 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12603 0 : [ME_TIER_ZERO_PU_16x16_13]) +
12604 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12605 0 : [ME_TIER_ZERO_PU_16x16_14]) +
12606 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12607 0 : [ME_TIER_ZERO_PU_16x16_15])) >>
12608 : 4;
12609 0 : avgMvy16x16 =
12610 0 : (_MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12611 0 : [ME_TIER_ZERO_PU_16x16_0]) +
12612 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12613 0 : [ME_TIER_ZERO_PU_16x16_1]) +
12614 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12615 0 : [ME_TIER_ZERO_PU_16x16_2]) +
12616 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12617 0 : [ME_TIER_ZERO_PU_16x16_3]) +
12618 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12619 0 : [ME_TIER_ZERO_PU_16x16_4]) +
12620 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12621 0 : [ME_TIER_ZERO_PU_16x16_5]) +
12622 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12623 0 : [ME_TIER_ZERO_PU_16x16_6]) +
12624 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12625 0 : [ME_TIER_ZERO_PU_16x16_7]) +
12626 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12627 0 : [ME_TIER_ZERO_PU_16x16_8]) +
12628 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12629 0 : [ME_TIER_ZERO_PU_16x16_9]) +
12630 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12631 0 : [ME_TIER_ZERO_PU_16x16_10]) +
12632 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12633 0 : [ME_TIER_ZERO_PU_16x16_11]) +
12634 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12635 0 : [ME_TIER_ZERO_PU_16x16_12]) +
12636 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12637 0 : [ME_TIER_ZERO_PU_16x16_13]) +
12638 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12639 0 : [ME_TIER_ZERO_PU_16x16_14]) +
12640 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12641 0 : [ME_TIER_ZERO_PU_16x16_15])) >>
12642 : 4;
12643 0 : mvMag16x16 = SQR(avgMvx16x16) + SQR(avgMvy16x16);
12644 :
12645 0 : avgMvx8x8 = (_MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12646 0 : [ME_TIER_ZERO_PU_8x8_0]) +
12647 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12648 0 : [ME_TIER_ZERO_PU_8x8_1]) +
12649 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12650 0 : [ME_TIER_ZERO_PU_8x8_2]) +
12651 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12652 0 : [ME_TIER_ZERO_PU_8x8_3]) +
12653 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12654 0 : [ME_TIER_ZERO_PU_8x8_4]) +
12655 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12656 0 : [ME_TIER_ZERO_PU_8x8_5]) +
12657 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12658 0 : [ME_TIER_ZERO_PU_8x8_6]) +
12659 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12660 0 : [ME_TIER_ZERO_PU_8x8_7]) +
12661 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12662 0 : [ME_TIER_ZERO_PU_8x8_8]) +
12663 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12664 0 : [ME_TIER_ZERO_PU_8x8_9]) +
12665 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12666 0 : [ME_TIER_ZERO_PU_8x8_10]) +
12667 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12668 0 : [ME_TIER_ZERO_PU_8x8_11]) +
12669 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12670 0 : [ME_TIER_ZERO_PU_8x8_12]) +
12671 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12672 0 : [ME_TIER_ZERO_PU_8x8_13]) +
12673 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12674 0 : [ME_TIER_ZERO_PU_8x8_14]) +
12675 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12676 0 : [ME_TIER_ZERO_PU_8x8_15]) +
12677 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12678 0 : [ME_TIER_ZERO_PU_8x8_16]) +
12679 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12680 0 : [ME_TIER_ZERO_PU_8x8_17]) +
12681 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12682 0 : [ME_TIER_ZERO_PU_8x8_18]) +
12683 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12684 0 : [ME_TIER_ZERO_PU_8x8_19]) +
12685 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12686 0 : [ME_TIER_ZERO_PU_8x8_20]) +
12687 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12688 0 : [ME_TIER_ZERO_PU_8x8_21]) +
12689 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12690 0 : [ME_TIER_ZERO_PU_8x8_22]) +
12691 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12692 0 : [ME_TIER_ZERO_PU_8x8_23]) +
12693 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12694 0 : [ME_TIER_ZERO_PU_8x8_24]) +
12695 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12696 0 : [ME_TIER_ZERO_PU_8x8_25]) +
12697 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12698 0 : [ME_TIER_ZERO_PU_8x8_26]) +
12699 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12700 0 : [ME_TIER_ZERO_PU_8x8_27]) +
12701 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12702 0 : [ME_TIER_ZERO_PU_8x8_28]) +
12703 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12704 0 : [ME_TIER_ZERO_PU_8x8_29]) +
12705 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12706 0 : [ME_TIER_ZERO_PU_8x8_30]) +
12707 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12708 0 : [ME_TIER_ZERO_PU_8x8_31]) +
12709 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12710 0 : [ME_TIER_ZERO_PU_8x8_32]) +
12711 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12712 0 : [ME_TIER_ZERO_PU_8x8_33]) +
12713 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12714 0 : [ME_TIER_ZERO_PU_8x8_34]) +
12715 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12716 0 : [ME_TIER_ZERO_PU_8x8_35]) +
12717 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12718 0 : [ME_TIER_ZERO_PU_8x8_36]) +
12719 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12720 0 : [ME_TIER_ZERO_PU_8x8_37]) +
12721 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12722 0 : [ME_TIER_ZERO_PU_8x8_38]) +
12723 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12724 0 : [ME_TIER_ZERO_PU_8x8_39]) +
12725 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12726 0 : [ME_TIER_ZERO_PU_8x8_40]) +
12727 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12728 0 : [ME_TIER_ZERO_PU_8x8_41]) +
12729 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12730 0 : [ME_TIER_ZERO_PU_8x8_42]) +
12731 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12732 0 : [ME_TIER_ZERO_PU_8x8_43]) +
12733 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12734 0 : [ME_TIER_ZERO_PU_8x8_44]) +
12735 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12736 0 : [ME_TIER_ZERO_PU_8x8_45]) +
12737 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12738 0 : [ME_TIER_ZERO_PU_8x8_46]) +
12739 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12740 0 : [ME_TIER_ZERO_PU_8x8_47]) +
12741 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12742 0 : [ME_TIER_ZERO_PU_8x8_48]) +
12743 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12744 0 : [ME_TIER_ZERO_PU_8x8_49]) +
12745 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12746 0 : [ME_TIER_ZERO_PU_8x8_50]) +
12747 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12748 0 : [ME_TIER_ZERO_PU_8x8_51]) +
12749 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12750 0 : [ME_TIER_ZERO_PU_8x8_52]) +
12751 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12752 0 : [ME_TIER_ZERO_PU_8x8_53]) +
12753 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12754 0 : [ME_TIER_ZERO_PU_8x8_54]) +
12755 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12756 0 : [ME_TIER_ZERO_PU_8x8_55]) +
12757 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12758 0 : [ME_TIER_ZERO_PU_8x8_56]) +
12759 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12760 0 : [ME_TIER_ZERO_PU_8x8_57]) +
12761 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12762 0 : [ME_TIER_ZERO_PU_8x8_58]) +
12763 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12764 0 : [ME_TIER_ZERO_PU_8x8_59]) +
12765 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12766 0 : [ME_TIER_ZERO_PU_8x8_60]) +
12767 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12768 0 : [ME_TIER_ZERO_PU_8x8_61]) +
12769 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12770 0 : [ME_TIER_ZERO_PU_8x8_62]) +
12771 0 : _MVXT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12772 0 : [ME_TIER_ZERO_PU_8x8_63])) >>
12773 : 6;
12774 0 : avgMvy8x8 = (_MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12775 0 : [ME_TIER_ZERO_PU_8x8_0]) +
12776 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12777 0 : [ME_TIER_ZERO_PU_8x8_1]) +
12778 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12779 0 : [ME_TIER_ZERO_PU_8x8_2]) +
12780 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12781 0 : [ME_TIER_ZERO_PU_8x8_3]) +
12782 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12783 0 : [ME_TIER_ZERO_PU_8x8_4]) +
12784 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12785 0 : [ME_TIER_ZERO_PU_8x8_5]) +
12786 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12787 0 : [ME_TIER_ZERO_PU_8x8_6]) +
12788 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12789 0 : [ME_TIER_ZERO_PU_8x8_7]) +
12790 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12791 0 : [ME_TIER_ZERO_PU_8x8_8]) +
12792 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12793 0 : [ME_TIER_ZERO_PU_8x8_9]) +
12794 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12795 0 : [ME_TIER_ZERO_PU_8x8_10]) +
12796 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12797 0 : [ME_TIER_ZERO_PU_8x8_11]) +
12798 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12799 0 : [ME_TIER_ZERO_PU_8x8_12]) +
12800 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12801 0 : [ME_TIER_ZERO_PU_8x8_13]) +
12802 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12803 0 : [ME_TIER_ZERO_PU_8x8_14]) +
12804 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12805 0 : [ME_TIER_ZERO_PU_8x8_15]) +
12806 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12807 0 : [ME_TIER_ZERO_PU_8x8_16]) +
12808 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12809 0 : [ME_TIER_ZERO_PU_8x8_17]) +
12810 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12811 0 : [ME_TIER_ZERO_PU_8x8_18]) +
12812 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12813 0 : [ME_TIER_ZERO_PU_8x8_19]) +
12814 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12815 0 : [ME_TIER_ZERO_PU_8x8_20]) +
12816 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12817 0 : [ME_TIER_ZERO_PU_8x8_21]) +
12818 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12819 0 : [ME_TIER_ZERO_PU_8x8_22]) +
12820 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12821 0 : [ME_TIER_ZERO_PU_8x8_23]) +
12822 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12823 0 : [ME_TIER_ZERO_PU_8x8_24]) +
12824 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12825 0 : [ME_TIER_ZERO_PU_8x8_25]) +
12826 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12827 0 : [ME_TIER_ZERO_PU_8x8_26]) +
12828 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12829 0 : [ME_TIER_ZERO_PU_8x8_27]) +
12830 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12831 0 : [ME_TIER_ZERO_PU_8x8_28]) +
12832 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12833 0 : [ME_TIER_ZERO_PU_8x8_29]) +
12834 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12835 0 : [ME_TIER_ZERO_PU_8x8_30]) +
12836 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12837 0 : [ME_TIER_ZERO_PU_8x8_31]) +
12838 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12839 0 : [ME_TIER_ZERO_PU_8x8_32]) +
12840 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12841 0 : [ME_TIER_ZERO_PU_8x8_33]) +
12842 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12843 0 : [ME_TIER_ZERO_PU_8x8_34]) +
12844 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12845 0 : [ME_TIER_ZERO_PU_8x8_35]) +
12846 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12847 0 : [ME_TIER_ZERO_PU_8x8_36]) +
12848 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12849 0 : [ME_TIER_ZERO_PU_8x8_37]) +
12850 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12851 0 : [ME_TIER_ZERO_PU_8x8_38]) +
12852 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12853 0 : [ME_TIER_ZERO_PU_8x8_39]) +
12854 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12855 0 : [ME_TIER_ZERO_PU_8x8_40]) +
12856 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12857 0 : [ME_TIER_ZERO_PU_8x8_41]) +
12858 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12859 0 : [ME_TIER_ZERO_PU_8x8_42]) +
12860 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12861 0 : [ME_TIER_ZERO_PU_8x8_43]) +
12862 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12863 0 : [ME_TIER_ZERO_PU_8x8_44]) +
12864 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12865 0 : [ME_TIER_ZERO_PU_8x8_45]) +
12866 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12867 0 : [ME_TIER_ZERO_PU_8x8_46]) +
12868 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12869 0 : [ME_TIER_ZERO_PU_8x8_47]) +
12870 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12871 0 : [ME_TIER_ZERO_PU_8x8_48]) +
12872 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12873 0 : [ME_TIER_ZERO_PU_8x8_49]) +
12874 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12875 0 : [ME_TIER_ZERO_PU_8x8_50]) +
12876 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12877 0 : [ME_TIER_ZERO_PU_8x8_51]) +
12878 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12879 0 : [ME_TIER_ZERO_PU_8x8_52]) +
12880 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12881 0 : [ME_TIER_ZERO_PU_8x8_53]) +
12882 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12883 0 : [ME_TIER_ZERO_PU_8x8_54]) +
12884 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12885 0 : [ME_TIER_ZERO_PU_8x8_55]) +
12886 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12887 0 : [ME_TIER_ZERO_PU_8x8_56]) +
12888 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12889 0 : [ME_TIER_ZERO_PU_8x8_57]) +
12890 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12891 0 : [ME_TIER_ZERO_PU_8x8_58]) +
12892 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12893 0 : [ME_TIER_ZERO_PU_8x8_59]) +
12894 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12895 0 : [ME_TIER_ZERO_PU_8x8_60]) +
12896 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12897 0 : [ME_TIER_ZERO_PU_8x8_61]) +
12898 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12899 0 : [ME_TIER_ZERO_PU_8x8_62]) +
12900 0 : _MVYT(context_ptr->p_sb_best_mv[listIndex][refPicIndex]
12901 0 : [ME_TIER_ZERO_PU_8x8_63])) >>
12902 : 6;
12903 0 : mvMag8x8 = SQR(avgMvx8x8) + SQR(avgMvy8x8);
12904 :
12905 0 : avgSad32x32 =
12906 : (context_ptr
12907 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_32x32_0] +
12908 : context_ptr
12909 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_32x32_1] +
12910 : context_ptr
12911 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_32x32_2] +
12912 : context_ptr->p_sb_best_sad[listIndex][refPicIndex]
12913 0 : [ME_TIER_ZERO_PU_32x32_3]) >>
12914 : 2;
12915 0 : avgSad16x16 =
12916 : (context_ptr
12917 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_0] +
12918 : context_ptr
12919 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_1] +
12920 : context_ptr
12921 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_2] +
12922 : context_ptr
12923 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_3] +
12924 : context_ptr
12925 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_4] +
12926 : context_ptr
12927 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_5] +
12928 : context_ptr
12929 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_6] +
12930 : context_ptr
12931 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_7] +
12932 : context_ptr
12933 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_8] +
12934 : context_ptr
12935 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_9] +
12936 : context_ptr
12937 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_10] +
12938 : context_ptr
12939 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_11] +
12940 : context_ptr
12941 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_12] +
12942 : context_ptr
12943 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_13] +
12944 : context_ptr
12945 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_16x16_14] +
12946 : context_ptr->p_sb_best_sad[listIndex][refPicIndex]
12947 0 : [ME_TIER_ZERO_PU_16x16_15]) >>
12948 : 4;
12949 0 : avgSad8x8 =
12950 : (context_ptr
12951 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_0] +
12952 : context_ptr
12953 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_1] +
12954 : context_ptr
12955 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_2] +
12956 : context_ptr
12957 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_3] +
12958 : context_ptr
12959 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_4] +
12960 : context_ptr
12961 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_5] +
12962 : context_ptr
12963 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_6] +
12964 : context_ptr
12965 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_7] +
12966 : context_ptr
12967 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_8] +
12968 : context_ptr
12969 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_9] +
12970 : context_ptr
12971 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_10] +
12972 : context_ptr
12973 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_11] +
12974 : context_ptr
12975 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_12] +
12976 : context_ptr
12977 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_13] +
12978 : context_ptr
12979 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_14] +
12980 : context_ptr
12981 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_15] +
12982 : context_ptr
12983 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_16] +
12984 : context_ptr
12985 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_17] +
12986 : context_ptr
12987 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_18] +
12988 : context_ptr
12989 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_19] +
12990 : context_ptr
12991 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_20] +
12992 : context_ptr
12993 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_21] +
12994 : context_ptr
12995 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_22] +
12996 : context_ptr
12997 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_23] +
12998 : context_ptr
12999 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_24] +
13000 : context_ptr
13001 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_25] +
13002 : context_ptr
13003 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_26] +
13004 : context_ptr
13005 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_27] +
13006 : context_ptr
13007 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_28] +
13008 : context_ptr
13009 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_29] +
13010 : context_ptr
13011 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_30] +
13012 : context_ptr
13013 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_31] +
13014 : context_ptr
13015 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_32] +
13016 : context_ptr
13017 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_33] +
13018 : context_ptr
13019 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_34] +
13020 : context_ptr
13021 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_35] +
13022 : context_ptr
13023 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_36] +
13024 : context_ptr
13025 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_37] +
13026 : context_ptr
13027 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_38] +
13028 : context_ptr
13029 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_39] +
13030 : context_ptr
13031 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_40] +
13032 : context_ptr
13033 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_41] +
13034 : context_ptr
13035 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_42] +
13036 : context_ptr
13037 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_43] +
13038 : context_ptr
13039 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_44] +
13040 : context_ptr
13041 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_45] +
13042 : context_ptr
13043 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_46] +
13044 : context_ptr
13045 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_47] +
13046 : context_ptr
13047 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_48] +
13048 : context_ptr
13049 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_49] +
13050 : context_ptr
13051 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_50] +
13052 : context_ptr
13053 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_51] +
13054 : context_ptr
13055 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_52] +
13056 : context_ptr
13057 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_53] +
13058 : context_ptr
13059 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_54] +
13060 : context_ptr
13061 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_55] +
13062 : context_ptr
13063 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_56] +
13064 : context_ptr
13065 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_57] +
13066 : context_ptr
13067 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_58] +
13068 : context_ptr
13069 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_59] +
13070 : context_ptr
13071 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_60] +
13072 : context_ptr
13073 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_61] +
13074 : context_ptr
13075 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_62] +
13076 : context_ptr
13077 0 : ->p_sb_best_sad[listIndex][refPicIndex][ME_TIER_ZERO_PU_8x8_63]) >>
13078 : 6;
13079 :
13080 0 : if (picture_control_set_ptr->temporal_layer_index == 0) {
13081 : // 32x32
13082 0 : if ((mvMag32x32 < SQR(48)) && (avgSad32x32 < 32 * 32 * 6))
13083 0 : *enableHalfPel32x32 = EB_TRUE; // CLASS_0
13084 0 : else if ((mvMag32x32 < SQR(48)) && !(avgSad32x32 < 32 * 32 * 6))
13085 0 : *enableHalfPel32x32 = EB_FALSE; // CLASS_1
13086 0 : else if (!(mvMag32x32 < SQR(48)) && (avgSad32x32 < 32 * 32 * 6))
13087 0 : *enableHalfPel32x32 = EB_TRUE; // CLASS_2
13088 : else
13089 0 : *enableHalfPel32x32 = EB_FALSE; // CLASS_3
13090 : // 16x16
13091 0 : if ((mvMag16x16 < SQR(48)) && (avgSad16x16 < 16 * 16 * 2))
13092 0 : *enableHalfPel16x16 = EB_FALSE; // CLASS_0
13093 0 : else if ((mvMag16x16 < SQR(48)) && !(avgSad16x16 < 16 * 16 * 2))
13094 0 : *enableHalfPel16x16 = EB_TRUE; // CLASS_1
13095 0 : else if (!(mvMag16x16 < SQR(48)) && (avgSad16x16 < 16 * 16 * 2))
13096 0 : *enableHalfPel16x16 = EB_FALSE; // CLASS_2
13097 : else
13098 0 : *enableHalfPel16x16 = EB_TRUE; // CLASS_3
13099 : // 8x8
13100 0 : if ((mvMag8x8 < SQR(48)) && (avgSad8x8 < 8 * 8 * 2))
13101 0 : *enableHalfPel8x8 = EB_FALSE; // CLASS_0
13102 0 : else if ((mvMag8x8 < SQR(48)) && !(avgSad8x8 < 8 * 8 * 2))
13103 0 : *enableHalfPel8x8 = EB_TRUE; // CLASS_1
13104 0 : else if (!(mvMag8x8 < SQR(48)) && (avgSad8x8 < 8 * 8 * 2))
13105 0 : *enableHalfPel8x8 = EB_FALSE; // CLASS_2
13106 : else
13107 0 : *enableHalfPel8x8 = EB_TRUE; // CLASS_3
13108 : }
13109 :
13110 0 : else if (picture_control_set_ptr->temporal_layer_index == 1) {
13111 : // 32x32
13112 0 : if ((mvMag32x32 < SQR(32)) && (avgSad32x32 < 32 * 32 * 6))
13113 0 : *enableHalfPel32x32 = EB_TRUE; // CLASS_0
13114 0 : else if ((mvMag32x32 < SQR(32)) && !(avgSad32x32 < 32 * 32 * 6))
13115 0 : *enableHalfPel32x32 = EB_FALSE; // CLASS_1
13116 0 : else if (!(mvMag32x32 < SQR(32)) && (avgSad32x32 < 32 * 32 * 6))
13117 0 : *enableHalfPel32x32 = EB_TRUE; // CLASS_2
13118 : else
13119 0 : *enableHalfPel32x32 = EB_TRUE; // CLASS_3
13120 : // 16x16
13121 0 : if ((mvMag16x16 < SQR(32)) && (avgSad16x16 < 16 * 16 * 2))
13122 0 : *enableHalfPel16x16 = EB_FALSE; // CLASS_0
13123 0 : else if ((mvMag16x16 < SQR(32)) && !(avgSad16x16 < 16 * 16 * 2))
13124 0 : *enableHalfPel16x16 = EB_TRUE; // CLASS_1
13125 0 : else if (!(mvMag16x16 < SQR(32)) && (avgSad16x16 < 16 * 16 * 2))
13126 0 : *enableHalfPel16x16 = EB_FALSE; // CLASS_2
13127 : else
13128 0 : *enableHalfPel16x16 = EB_TRUE; // CLASS_3
13129 : // 8x8
13130 0 : if ((mvMag8x8 < SQR(32)) && (avgSad8x8 < 8 * 8 * 2))
13131 0 : *enableHalfPel8x8 = EB_FALSE; // CLASS_0
13132 0 : else if ((mvMag8x8 < SQR(32)) && !(avgSad8x8 < 8 * 8 * 2))
13133 0 : *enableHalfPel8x8 = EB_TRUE; // CLASS_1
13134 0 : else if (!(mvMag8x8 < SQR(32)) && (avgSad8x8 < 8 * 8 * 2))
13135 0 : *enableHalfPel8x8 = EB_FALSE; // CLASS_2
13136 : else
13137 0 : *enableHalfPel8x8 = EB_TRUE; // CLASS_3
13138 0 : } else if (picture_control_set_ptr->temporal_layer_index == 2) {
13139 : // 32x32
13140 0 : if ((mvMag32x32 < SQR(80)) && (avgSad32x32 < 32 * 32 * 6))
13141 0 : *enableHalfPel32x32 = EB_TRUE; // CLASS_0
13142 0 : else if ((mvMag32x32 < SQR(80)) && !(avgSad32x32 < 32 * 32 * 6))
13143 0 : *enableHalfPel32x32 = EB_FALSE; // CLASS_1
13144 0 : else if (!(mvMag32x32 < SQR(80)) && (avgSad32x32 < 32 * 32 * 6))
13145 0 : *enableHalfPel32x32 = EB_TRUE; // CLASS_2
13146 : else
13147 0 : *enableHalfPel32x32 = EB_FALSE; // CLASS_3
13148 : // 16x16
13149 0 : if ((mvMag16x16 < SQR(80)) && (avgSad16x16 < 16 * 16 * 2))
13150 0 : *enableHalfPel16x16 = EB_FALSE; // CLASS_0
13151 0 : else if ((mvMag16x16 < SQR(80)) && !(avgSad16x16 < 16 * 16 * 2))
13152 0 : *enableHalfPel16x16 = EB_TRUE; // CLASS_1
13153 0 : else if (!(mvMag16x16 < SQR(80)) && (avgSad16x16 < 16 * 16 * 2))
13154 0 : *enableHalfPel16x16 = EB_FALSE; // CLASS_2
13155 : else
13156 0 : *enableHalfPel16x16 = EB_TRUE; // CLASS_3
13157 : // 8x8
13158 0 : if ((mvMag8x8 < SQR(80)) && (avgSad8x8 < 8 * 8 * 2))
13159 0 : *enableHalfPel8x8 = EB_FALSE; // CLASS_0
13160 0 : else if ((mvMag8x8 < SQR(80)) && !(avgSad8x8 < 8 * 8 * 2))
13161 0 : *enableHalfPel8x8 = EB_TRUE; // CLASS_1
13162 0 : else if (!(mvMag8x8 < SQR(80)) && (avgSad8x8 < 8 * 8 * 2))
13163 0 : *enableHalfPel8x8 = EB_FALSE; // CLASS_2
13164 : else
13165 0 : *enableHalfPel8x8 = EB_TRUE; // CLASS_3
13166 : } else {
13167 : // 32x32
13168 0 : if ((mvMag32x32 < SQR(48)) && (avgSad32x32 < 32 * 32 * 6))
13169 0 : *enableHalfPel32x32 = EB_TRUE; // CLASS_0
13170 0 : else if ((mvMag32x32 < SQR(48)) && !(avgSad32x32 < 32 * 32 * 6))
13171 0 : *enableHalfPel32x32 = EB_TRUE; // CLASS_1
13172 0 : else if (!(mvMag32x32 < SQR(48)) && (avgSad32x32 < 32 * 32 * 6))
13173 0 : *enableHalfPel32x32 = EB_TRUE; // CLASS_2
13174 : else
13175 0 : *enableHalfPel32x32 = EB_FALSE; // CLASS_3
13176 : // 16x16
13177 0 : if ((mvMag16x16 < SQR(48)) && (avgSad16x16 < 16 * 16 * 2))
13178 0 : *enableHalfPel16x16 = EB_FALSE; // CLASS_0
13179 0 : else if ((mvMag16x16 < SQR(48)) && !(avgSad16x16 < 16 * 16 * 2))
13180 0 : *enableHalfPel16x16 = EB_TRUE; // CLASS_1
13181 0 : else if (!(mvMag16x16 < SQR(48)) && (avgSad16x16 < 16 * 16 * 2))
13182 0 : *enableHalfPel16x16 = EB_FALSE; // CLASS_2
13183 : else
13184 0 : *enableHalfPel16x16 = EB_TRUE; // CLASS_3
13185 : // 8x8
13186 0 : if ((mvMag8x8 < SQR(48)) && (avgSad8x8 < 8 * 8 * 2))
13187 0 : *enableHalfPel8x8 = EB_FALSE; // CLASS_0
13188 0 : else if ((mvMag8x8 < SQR(48)) && !(avgSad8x8 < 8 * 8 * 2))
13189 0 : *enableHalfPel8x8 = EB_TRUE; // CLASS_1
13190 0 : else if (!(mvMag8x8 < SQR(48)) && (avgSad8x8 < 8 * 8 * 2))
13191 0 : *enableHalfPel8x8 = EB_FALSE; // CLASS_2
13192 : else
13193 0 : *enableHalfPel8x8 = EB_FALSE; // EB_TRUE; //CLASS_3
13194 : }
13195 :
13196 0 : return return_error;
13197 : }
13198 :
13199 0 : static void hme_mv_center_check(EbPictureBufferDesc *ref_pic_ptr,
13200 : MeContext *context_ptr, int16_t *xsc,
13201 : int16_t *ysc, uint32_t list_index,
13202 : int16_t origin_x, int16_t origin_y,
13203 : uint32_t sb_width, uint32_t sb_height)
13204 : {
13205 : // Search for (-srx/2, 0), (+srx/2, 0), (0, -sry/2), (0, +sry/2),
13206 : /*
13207 : |------------C-------------|
13208 : |--------------------------|
13209 : |--------------------------|
13210 : A 0 B
13211 : |--------------------------|
13212 : |--------------------------|
13213 : |------------D-------------|
13214 : */
13215 : uint32_t search_region_index;
13216 0 : int16_t search_center_x = *xsc;
13217 0 : int16_t search_center_y = *ysc;
13218 : uint64_t best_cost;
13219 0 : uint64_t direct_mv_cost = 0xFFFFFFFFFFFFF;
13220 0 : uint8_t sparce_scale = 1;
13221 0 : int16_t pad_width = (int16_t)BLOCK_SIZE_64 - 1;
13222 0 : int16_t pad_height = (int16_t)BLOCK_SIZE_64 - 1;
13223 : // O pos
13224 :
13225 0 : search_region_index =
13226 0 : (int16_t)ref_pic_ptr->origin_x + origin_x +
13227 0 : ((int16_t)ref_pic_ptr->origin_y + origin_y) * ref_pic_ptr->stride_y;
13228 :
13229 0 : uint32_t sub_sampled_sad = 1;
13230 0 : uint64_t zero_mv_sad = nxm_sad_kernel(
13231 0 : context_ptr->sb_src_ptr,
13232 0 : context_ptr->sb_src_stride << sub_sampled_sad,
13233 0 : &(ref_pic_ptr->buffer_y[search_region_index]),
13234 0 : ref_pic_ptr->stride_y << sub_sampled_sad,
13235 : sb_height >> sub_sampled_sad,
13236 : sb_width);
13237 :
13238 0 : zero_mv_sad = zero_mv_sad << sub_sampled_sad;
13239 :
13240 0 : uint64_t zero_mv_cost = zero_mv_sad << COST_PRECISION;
13241 :
13242 : // A pos
13243 0 : search_center_x =
13244 0 : 0 - (context_ptr->hme_level0_total_search_area_width * sparce_scale);
13245 0 : search_center_y = 0;
13246 :
13247 : // Correct the left edge of the Search Area if it is not on the reference
13248 : // Picture
13249 0 : search_center_x = ((origin_x + search_center_x) < -pad_width)
13250 0 : ? -pad_width - origin_x
13251 : : search_center_x;
13252 : // Correct the right edge of the Search Area if its not on the reference
13253 : // Picture
13254 0 : search_center_x =
13255 0 : ((origin_x + search_center_x) > (int16_t)ref_pic_ptr->width - 1)
13256 0 : ? search_center_x - ((origin_x + search_center_x) -
13257 0 : ((int16_t)ref_pic_ptr->width - 1))
13258 : : search_center_x;
13259 : // Correct the top edge of the Search Area if it is not on the reference
13260 : // Picture
13261 0 : search_center_y = ((origin_y + search_center_y) < -pad_height)
13262 0 : ? -pad_height - origin_y
13263 : : search_center_y;
13264 :
13265 : // Correct the bottom edge of the Search Area if its not on the reference
13266 : // Picture
13267 0 : search_center_y =
13268 0 : ((origin_y + search_center_y) > (int16_t)ref_pic_ptr->height - 1)
13269 0 : ? search_center_y - ((origin_y + search_center_y) -
13270 0 : ((int16_t)ref_pic_ptr->height - 1))
13271 : : search_center_y;
13272 :
13273 0 : uint64_t mv_a_sad = nxm_sad_kernel(
13274 0 : context_ptr->sb_src_ptr,
13275 0 : context_ptr->sb_src_stride << sub_sampled_sad,
13276 0 : &(ref_pic_ptr->buffer_y[search_region_index]),
13277 0 : ref_pic_ptr->stride_y << sub_sampled_sad,
13278 : sb_height >> sub_sampled_sad,
13279 : sb_width);
13280 :
13281 0 : mv_a_sad = mv_a_sad << sub_sampled_sad;
13282 :
13283 0 : uint64_t mv_a_cost = mv_a_sad << COST_PRECISION;
13284 :
13285 : // B pos
13286 0 : search_center_x =
13287 0 : (context_ptr->hme_level0_total_search_area_width * sparce_scale);
13288 0 : search_center_y = 0;
13289 : ///////////////// correct
13290 : // Correct the left edge of the Search Area if it is not on the reference
13291 : // Picture
13292 0 : search_center_x = ((origin_x + search_center_x) < -pad_width)
13293 0 : ? -pad_width - origin_x
13294 : : search_center_x;
13295 : // Correct the right edge of the Search Area if its not on the reference
13296 : // Picture
13297 0 : search_center_x =
13298 0 : ((origin_x + search_center_x) > (int16_t)ref_pic_ptr->width - 1)
13299 0 : ? search_center_x - ((origin_x + search_center_x) -
13300 0 : ((int16_t)ref_pic_ptr->width - 1))
13301 : : search_center_x;
13302 : // Correct the top edge of the Search Area if it is not on the reference
13303 : // Picture
13304 0 : search_center_y = ((origin_y + search_center_y) < -pad_height)
13305 0 : ? -pad_height - origin_y
13306 : : search_center_y;
13307 : // Correct the bottom edge of the Search Area if its not on the reference
13308 : // Picture
13309 0 : search_center_y =
13310 0 : ((origin_y + search_center_y) > (int16_t)ref_pic_ptr->height - 1)
13311 0 : ? search_center_y - ((origin_y + search_center_y) -
13312 0 : ((int16_t)ref_pic_ptr->height - 1))
13313 : : search_center_y;
13314 :
13315 0 : search_region_index =
13316 0 : (int16_t)(ref_pic_ptr->origin_x + origin_x) + search_center_x +
13317 0 : ((int16_t)(ref_pic_ptr->origin_y + origin_y) + search_center_y) *
13318 0 : ref_pic_ptr->stride_y;
13319 :
13320 0 : uint64_t mv_b_sad = nxm_sad_kernel(
13321 0 : context_ptr->sb_src_ptr,
13322 0 : context_ptr->sb_src_stride << sub_sampled_sad,
13323 0 : &(ref_pic_ptr->buffer_y[search_region_index]),
13324 0 : ref_pic_ptr->stride_y << sub_sampled_sad,
13325 : sb_height >> sub_sampled_sad,
13326 : sb_width);
13327 :
13328 0 : mv_b_sad = mv_b_sad << sub_sampled_sad;
13329 :
13330 0 : uint64_t mv_b_cost = mv_b_sad << COST_PRECISION;
13331 : // C pos
13332 0 : search_center_x = 0;
13333 0 : search_center_y =
13334 0 : 0 - (context_ptr->hme_level0_total_search_area_height * sparce_scale);
13335 : ///////////////// correct
13336 : // Correct the left edge of the Search Area if it is not on the reference
13337 : // Picture
13338 0 : search_center_x = ((origin_x + search_center_x) < -pad_width)
13339 0 : ? -pad_width - origin_x
13340 : : search_center_x;
13341 :
13342 : // Correct the right edge of the Search Area if its not on the reference
13343 : // Picture
13344 0 : search_center_x =
13345 0 : ((origin_x + search_center_x) > (int16_t)ref_pic_ptr->width - 1)
13346 0 : ? search_center_x - ((origin_x + search_center_x) -
13347 0 : ((int16_t)ref_pic_ptr->width - 1))
13348 : : search_center_x;
13349 :
13350 : // Correct the top edge of the Search Area if it is not on the reference
13351 : // Picture
13352 0 : search_center_y = ((origin_y + search_center_y) < -pad_height)
13353 0 : ? -pad_height - origin_y
13354 : : search_center_y;
13355 :
13356 : // Correct the bottom edge of the Search Area if its not on the reference
13357 : // Picture
13358 0 : search_center_y =
13359 0 : ((origin_y + search_center_y) > (int16_t)ref_pic_ptr->height - 1)
13360 0 : ? search_center_y - ((origin_y + search_center_y) -
13361 0 : ((int16_t)ref_pic_ptr->height - 1))
13362 : : search_center_y;
13363 :
13364 0 : search_region_index =
13365 0 : (int16_t)(ref_pic_ptr->origin_x + origin_x) + search_center_x +
13366 0 : ((int16_t)(ref_pic_ptr->origin_y + origin_y) + search_center_y) *
13367 0 : ref_pic_ptr->stride_y;
13368 :
13369 0 : uint64_t mv_c_sad = nxm_sad_kernel(
13370 0 : context_ptr->sb_src_ptr,
13371 0 : context_ptr->sb_src_stride << sub_sampled_sad,
13372 0 : &(ref_pic_ptr->buffer_y[search_region_index]),
13373 0 : ref_pic_ptr->stride_y << sub_sampled_sad,
13374 : sb_height >> sub_sampled_sad,
13375 : sb_width);
13376 :
13377 0 : mv_c_sad = mv_c_sad << sub_sampled_sad;
13378 :
13379 0 : uint64_t mv_c_cost = mv_c_sad << COST_PRECISION;
13380 :
13381 : // D pos
13382 0 : search_center_x = 0;
13383 0 : search_center_y =
13384 0 : (context_ptr->hme_level0_total_search_area_height * sparce_scale);
13385 : // Correct the left edge of the Search Area if it is not on the reference
13386 : // Picture
13387 0 : search_center_x = ((origin_x + search_center_x) < -pad_width)
13388 0 : ? -pad_width - origin_x
13389 : : search_center_x;
13390 : // Correct the right edge of the Search Area if its not on the reference
13391 : // Picture
13392 0 : search_center_x =
13393 0 : ((origin_x + search_center_x) > (int16_t)ref_pic_ptr->width - 1)
13394 0 : ? search_center_x - ((origin_x + search_center_x) -
13395 0 : ((int16_t)ref_pic_ptr->width - 1))
13396 : : search_center_x;
13397 : // Correct the top edge of the Search Area if it is not on the reference
13398 : // Picture
13399 0 : search_center_y = ((origin_y + search_center_y) < -pad_height)
13400 0 : ? -pad_height - origin_y
13401 : : search_center_y;
13402 : // Correct the bottom edge of the Search Area if its not on the reference
13403 : // Picture
13404 0 : search_center_y =
13405 0 : ((origin_y + search_center_y) > (int16_t)ref_pic_ptr->height - 1)
13406 0 : ? search_center_y - ((origin_y + search_center_y) -
13407 0 : ((int16_t)ref_pic_ptr->height - 1))
13408 : : search_center_y;
13409 0 : search_region_index =
13410 0 : (int16_t)(ref_pic_ptr->origin_x + origin_x) + search_center_x +
13411 0 : ((int16_t)(ref_pic_ptr->origin_y + origin_y) + search_center_y) *
13412 0 : ref_pic_ptr->stride_y;
13413 0 : uint64_t mv_d_sad = nxm_sad_kernel(
13414 0 : context_ptr->sb_src_ptr,
13415 0 : context_ptr->sb_src_stride << sub_sampled_sad,
13416 0 : &(ref_pic_ptr->buffer_y[search_region_index]),
13417 0 : ref_pic_ptr->stride_y << sub_sampled_sad,
13418 : sb_height >> sub_sampled_sad,
13419 : sb_width);
13420 :
13421 0 : mv_d_sad = mv_d_sad << sub_sampled_sad;
13422 :
13423 0 : uint64_t mv_d_cost = mv_d_sad << COST_PRECISION;
13424 :
13425 0 : if (list_index == 1) {
13426 0 : search_center_x =
13427 0 : list_index ? 0 - (_MVXT(context_ptr->p_sb_best_mv[0][0][0]) >> 2)
13428 : : 0;
13429 0 : search_center_y =
13430 0 : list_index ? 0 - (_MVYT(context_ptr->p_sb_best_mv[0][0][0]) >> 2)
13431 : : 0;
13432 : ///////////////// correct
13433 : // Correct the left edge of the Search Area if it is not on the
13434 : // reference Picture
13435 0 : search_center_x = ((origin_x + search_center_x) < -pad_width)
13436 0 : ? -pad_width - origin_x
13437 : : search_center_x;
13438 : // Correct the right edge of the Search Area if its not on the reference
13439 : // Picture
13440 0 : search_center_x =
13441 0 : ((origin_x + search_center_x) > (int16_t)ref_pic_ptr->width - 1)
13442 0 : ? search_center_x - ((origin_x + search_center_x) -
13443 0 : ((int16_t)ref_pic_ptr->width - 1))
13444 : : search_center_x;
13445 : // Correct the top edge of the Search Area if it is not on the reference
13446 : // Picture
13447 0 : search_center_y = ((origin_y + search_center_y) < -pad_height)
13448 0 : ? -pad_height - origin_y
13449 : : search_center_y;
13450 : // Correct the bottom edge of the Search Area if its not on the
13451 : // reference Picture
13452 0 : search_center_y =
13453 0 : ((origin_y + search_center_y) > (int16_t)ref_pic_ptr->height - 1)
13454 0 : ? search_center_y - ((origin_y + search_center_y) -
13455 0 : ((int16_t)ref_pic_ptr->height - 1))
13456 : : search_center_y;
13457 :
13458 0 : search_region_index =
13459 0 : (int16_t)(ref_pic_ptr->origin_x + origin_x) + search_center_x +
13460 0 : ((int16_t)(ref_pic_ptr->origin_y + origin_y) + search_center_y) *
13461 0 : ref_pic_ptr->stride_y;
13462 :
13463 0 : uint64_t direct_mv_sad = nxm_sad_kernel(
13464 0 : context_ptr->sb_src_ptr,
13465 0 : context_ptr->sb_src_stride << sub_sampled_sad,
13466 0 : &(ref_pic_ptr->buffer_y[search_region_index]),
13467 0 : ref_pic_ptr->stride_y << sub_sampled_sad,
13468 : sb_height >> sub_sampled_sad,
13469 : sb_width);
13470 :
13471 0 : direct_mv_sad = direct_mv_sad << sub_sampled_sad;
13472 :
13473 0 : direct_mv_cost = (direct_mv_sad << COST_PRECISION);
13474 : }
13475 :
13476 0 : best_cost = MIN(
13477 : zero_mv_cost,
13478 : MIN(mv_a_cost,
13479 : MIN(mv_b_cost, MIN(mv_c_cost, MIN(mv_d_cost, direct_mv_cost)))));
13480 :
13481 0 : if (best_cost == zero_mv_cost) {
13482 0 : search_center_x = 0;
13483 0 : search_center_y = 0;
13484 0 : } else if (best_cost == mv_a_cost) {
13485 0 : search_center_x = 0 - (context_ptr->hme_level0_total_search_area_width *
13486 : sparce_scale);
13487 0 : search_center_y = 0;
13488 0 : } else if (best_cost == mv_b_cost) {
13489 0 : search_center_x =
13490 0 : (context_ptr->hme_level0_total_search_area_width * sparce_scale);
13491 0 : search_center_y = 0;
13492 0 : } else if (best_cost == mv_c_cost) {
13493 0 : search_center_x = 0;
13494 0 : search_center_y =
13495 : 0 -
13496 0 : (context_ptr->hme_level0_total_search_area_height * sparce_scale);
13497 0 : } else if (best_cost == direct_mv_cost) {
13498 0 : search_center_x =
13499 0 : list_index ? 0 - (_MVXT(context_ptr->p_sb_best_mv[0][0][0]) >> 2)
13500 : : 0;
13501 0 : search_center_y =
13502 0 : list_index ? 0 - (_MVYT(context_ptr->p_sb_best_mv[0][0][0]) >> 2)
13503 : : 0;
13504 0 : } else if (best_cost == mv_d_cost) {
13505 0 : search_center_x = 0;
13506 0 : search_center_y =
13507 0 : (context_ptr->hme_level0_total_search_area_height * sparce_scale);
13508 : }
13509 :
13510 : else
13511 0 : SVT_LOG("error no center selected");
13512 0 : *xsc = search_center_x;
13513 0 : *ysc = search_center_y;
13514 0 : }
13515 :
13516 0 : void SwapMeCandidate(MePredUnit *a, MePredUnit *b) {
13517 : MePredUnit tempPtr;
13518 0 : tempPtr = *a;
13519 0 : *a = *b;
13520 0 : *b = tempPtr;
13521 0 : }
13522 :
13523 : /*******************************************
13524 : * motion_estimate_lcu
13525 : * performs ME (LCU)
13526 : *******************************************/
13527 0 : EbErrorType motion_estimate_lcu(
13528 : PictureParentControlSet *picture_control_set_ptr, // input parameter, Picture Control Set Ptr
13529 : uint32_t sb_index, // input parameter, SB Index
13530 : uint32_t sb_origin_x, // input parameter, SB Origin X
13531 : uint32_t sb_origin_y, // input parameter, SB Origin X
13532 : MeContext *context_ptr, // input parameter, ME Context Ptr, used to store decimated/interpolated LCU/SR
13533 : EbPictureBufferDesc *input_ptr) // input parameter, source Picture Ptr
13534 :
13535 : {
13536 0 : EbErrorType return_error = EB_ErrorNone;
13537 :
13538 0 : SequenceControlSet *sequence_control_set_ptr =
13539 : (SequenceControlSet *)picture_control_set_ptr
13540 0 : ->sequence_control_set_wrapper_ptr->object_ptr;
13541 :
13542 : int16_t xTopLeftSearchRegion;
13543 : int16_t yTopLeftSearchRegion;
13544 : uint32_t searchRegionIndex;
13545 :
13546 0 : int16_t picture_width =
13547 : (int16_t)((SequenceControlSet *)picture_control_set_ptr
13548 0 : ->sequence_control_set_wrapper_ptr->object_ptr)
13549 0 : ->seq_header.max_frame_width;
13550 0 : int16_t picture_height =
13551 : (int16_t)((SequenceControlSet *)picture_control_set_ptr
13552 0 : ->sequence_control_set_wrapper_ptr->object_ptr)
13553 0 : ->seq_header.max_frame_height;
13554 0 : uint32_t sb_width = (input_ptr->width - sb_origin_x) < BLOCK_SIZE_64
13555 : ? input_ptr->width - sb_origin_x
13556 : : BLOCK_SIZE_64;
13557 0 : uint32_t sb_height = (input_ptr->height - sb_origin_y) < BLOCK_SIZE_64
13558 : ? input_ptr->height - sb_origin_y
13559 : : BLOCK_SIZE_64;
13560 :
13561 0 : int16_t padWidth = (int16_t)BLOCK_SIZE_64 - 1;
13562 0 : int16_t padHeight = (int16_t)BLOCK_SIZE_64 - 1;
13563 : int16_t search_area_width;
13564 : int16_t search_area_height;
13565 : int16_t x_search_area_origin;
13566 : int16_t y_search_area_origin;
13567 0 : int16_t origin_x = (int16_t)sb_origin_x;
13568 0 : int16_t origin_y = (int16_t)sb_origin_y;
13569 :
13570 : // HME
13571 0 : uint32_t searchRegionNumberInWidth = 0;
13572 0 : uint32_t searchRegionNumberInHeight = 0;
13573 : int16_t xHmeLevel0SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
13574 : [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
13575 : int16_t yHmeLevel0SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
13576 : [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
13577 : uint64_t hmeLevel0Sad[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
13578 : [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
13579 : int16_t xHmeLevel1SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
13580 : [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
13581 : int16_t yHmeLevel1SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
13582 : [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
13583 : uint64_t hmeLevel1Sad[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
13584 : [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
13585 : int16_t xHmeLevel2SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
13586 : [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
13587 : int16_t yHmeLevel2SearchCenter[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
13588 : [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
13589 : uint64_t hmeLevel2Sad[EB_HME_SEARCH_AREA_COLUMN_MAX_COUNT]
13590 : [EB_HME_SEARCH_AREA_ROW_MAX_COUNT];
13591 :
13592 : // Hierarchical ME Search Center
13593 0 : int16_t xHmeSearchCenter = 0;
13594 0 : int16_t yHmeSearchCenter = 0;
13595 :
13596 : // Final ME Search Center
13597 0 : int16_t x_search_center = 0;
13598 0 : int16_t y_search_center = 0;
13599 :
13600 : // Search Center SADs
13601 0 : uint64_t hmeMvSad = 0;
13602 :
13603 : uint32_t pu_index;
13604 :
13605 0 : uint32_t max_number_of_pus_per_sb =
13606 0 : picture_control_set_ptr->max_number_of_pus_per_sb;
13607 :
13608 : uint32_t numOfListToSearch;
13609 : uint32_t listIndex;
13610 0 : uint8_t candidateIndex = 0;
13611 0 : uint8_t total_me_candidate_index = 0;
13612 : EbPaReferenceObject
13613 : *referenceObject; // input parameter, reference Object Ptr
13614 :
13615 : uint8_t ref_pic_index;
13616 : uint8_t num_of_ref_pic_to_search;
13617 0 : uint8_t candidate_index = 0;
13618 0 : uint32_t next_candidate_index = 0;
13619 :
13620 : MePredUnit *me_candidate;
13621 : EbPictureBufferDesc *refPicPtr;
13622 : EbPictureBufferDesc *quarterRefPicPtr;
13623 : EbPictureBufferDesc *sixteenthRefPicPtr;
13624 :
13625 0 : int16_t tempXHmeSearchCenter = 0;
13626 0 : int16_t tempYHmeSearchCenter = 0;
13627 :
13628 : uint32_t numQuadInWidth;
13629 : uint32_t totalMeQuad;
13630 : uint32_t quadIndex;
13631 : uint32_t nextQuadIndex;
13632 : uint64_t tempXHmeSad;
13633 :
13634 0 : uint64_t ref0Poc = 0;
13635 0 : uint64_t ref1Poc = 0;
13636 :
13637 : uint64_t i;
13638 :
13639 : int16_t hmeLevel1SearchAreaInWidth;
13640 : int16_t hmeLevel1SearchAreaInHeight;
13641 : // Configure HME level 0, level 1 and level 2 from static config parameters
13642 0 : EbBool enable_hme_level0_flag =
13643 : context_ptr->enable_hme_level0_flag;
13644 0 : EbBool enable_hme_level1_flag =
13645 : context_ptr->enable_hme_level1_flag;
13646 0 : EbBool enable_hme_level2_flag =
13647 : context_ptr->enable_hme_level2_flag;
13648 :
13649 0 : EbBool enableHalfPel32x32 = EB_FALSE;
13650 0 : EbBool enableHalfPel16x16 = EB_FALSE;
13651 0 : EbBool enableHalfPel8x8 = EB_FALSE;
13652 0 : EbBool enableQuarterPel = EB_FALSE;
13653 0 : EbBool oneQuadrantHME = EB_FALSE;
13654 :
13655 0 : oneQuadrantHME =
13656 0 : sequence_control_set_ptr->input_resolution < INPUT_SIZE_4K_RANGE
13657 : ? 0
13658 : : oneQuadrantHME;
13659 :
13660 0 : numOfListToSearch = (picture_control_set_ptr->slice_type == P_SLICE)
13661 : ? (uint32_t)REF_LIST_0
13662 0 : : (uint32_t)REF_LIST_1;
13663 :
13664 0 : EbBool is_nsq_table_used =
13665 0 : (picture_control_set_ptr->pic_depth_mode <= PIC_ALL_C_DEPTH_MODE &&
13666 0 : picture_control_set_ptr->nsq_search_level >= NSQ_SEARCH_LEVEL1 &&
13667 0 : picture_control_set_ptr->nsq_search_level < NSQ_SEARCH_FULL)
13668 : ? EB_TRUE
13669 0 : : EB_FALSE;
13670 :
13671 0 : is_nsq_table_used = picture_control_set_ptr->enc_mode == ENC_M0 ? EB_FALSE : is_nsq_table_used;
13672 :
13673 0 : if (context_ptr->me_alt_ref == EB_TRUE)
13674 0 : numOfListToSearch = 0;
13675 :
13676 : // Uni-Prediction motion estimation loop
13677 : // List Loop
13678 0 : for (listIndex = REF_LIST_0; listIndex <= numOfListToSearch; ++listIndex) {
13679 :
13680 0 : if (context_ptr->me_alt_ref == EB_TRUE) {
13681 0 : num_of_ref_pic_to_search = 1;
13682 : } else {
13683 0 : num_of_ref_pic_to_search =
13684 0 : (picture_control_set_ptr->slice_type == P_SLICE)
13685 : ? picture_control_set_ptr->ref_list0_count
13686 : : (listIndex == REF_LIST_0)
13687 : ? picture_control_set_ptr->ref_list0_count
13688 : : picture_control_set_ptr->ref_list1_count;
13689 :
13690 0 : referenceObject = (EbPaReferenceObject *)picture_control_set_ptr
13691 0 : ->ref_pa_pic_ptr_array[0][0]
13692 : ->object_ptr;
13693 0 : ref0Poc = picture_control_set_ptr->ref_pic_poc_array[0][0];
13694 : }
13695 :
13696 : // Ref Picture Loop
13697 0 : for (ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search;
13698 0 : ++ref_pic_index)
13699 : {
13700 0 : if (context_ptr->me_alt_ref == EB_TRUE) {
13701 0 : referenceObject =
13702 : (EbPaReferenceObject *)context_ptr->alt_ref_reference_ptr;
13703 : } else {
13704 0 : if (numOfListToSearch) {
13705 0 : referenceObject =
13706 : (EbPaReferenceObject *)picture_control_set_ptr
13707 0 : ->ref_pa_pic_ptr_array[1][0]
13708 : ->object_ptr;
13709 0 : ref1Poc = picture_control_set_ptr->ref_pic_poc_array[1][0];
13710 : }
13711 :
13712 0 : referenceObject =
13713 : (EbPaReferenceObject *)picture_control_set_ptr
13714 0 : ->ref_pa_pic_ptr_array[listIndex][ref_pic_index]
13715 : ->object_ptr;
13716 : }
13717 :
13718 0 : refPicPtr = (EbPictureBufferDesc*)referenceObject->input_padded_picture_ptr;
13719 : // Set 1/4 and 1/16 ME reference buffer(s); filtered or decimated
13720 0 : quarterRefPicPtr = (sequence_control_set_ptr->down_sampling_method_me_search == ME_FILTERED_DOWNSAMPLED) ?
13721 0 : (EbPictureBufferDesc*)referenceObject->quarter_filtered_picture_ptr :
13722 : (EbPictureBufferDesc*)referenceObject->quarter_decimated_picture_ptr;
13723 :
13724 0 : sixteenthRefPicPtr = (sequence_control_set_ptr->down_sampling_method_me_search == ME_FILTERED_DOWNSAMPLED) ?
13725 0 : (EbPictureBufferDesc*)referenceObject->sixteenth_filtered_picture_ptr:
13726 : (EbPictureBufferDesc*)referenceObject->sixteenth_decimated_picture_ptr;
13727 0 : if (picture_control_set_ptr->temporal_layer_index > 0 ||
13728 : listIndex == 0) {
13729 : // A - The MV center for Tier0 search could be either (0,0), or
13730 : // HME A - Set HME MV Center
13731 0 : if (context_ptr->update_hme_search_center_flag)
13732 0 : hme_mv_center_check(refPicPtr,
13733 : context_ptr,
13734 : &x_search_center,
13735 : &y_search_center,
13736 : listIndex,
13737 : origin_x,
13738 : origin_y,
13739 : sb_width,
13740 : sb_height);
13741 : else {
13742 0 : x_search_center = 0;
13743 0 : y_search_center = 0;
13744 : }
13745 : // B - NO HME in boundaries
13746 : // C - Skip HME
13747 :
13748 0 : if (context_ptr->enable_hme_flag &&
13749 :
13750 : /*B*/ sb_height ==
13751 : BLOCK_SIZE_64) { //(searchCenterSad >
13752 : // sequence_control_set_ptr->static_config.skipTier0HmeTh))
13753 : //{
13754 0 : while (searchRegionNumberInHeight <
13755 0 : context_ptr->number_hme_search_region_in_height) {
13756 0 : while (searchRegionNumberInWidth <
13757 0 : context_ptr->number_hme_search_region_in_width) {
13758 : xHmeLevel0SearchCenter[searchRegionNumberInWidth]
13759 0 : [searchRegionNumberInHeight] =
13760 : x_search_center;
13761 : yHmeLevel0SearchCenter[searchRegionNumberInWidth]
13762 0 : [searchRegionNumberInHeight] =
13763 : y_search_center;
13764 :
13765 : xHmeLevel1SearchCenter[searchRegionNumberInWidth]
13766 0 : [searchRegionNumberInHeight] =
13767 : x_search_center;
13768 : yHmeLevel1SearchCenter[searchRegionNumberInWidth]
13769 0 : [searchRegionNumberInHeight] =
13770 : y_search_center;
13771 :
13772 : xHmeLevel2SearchCenter[searchRegionNumberInWidth]
13773 0 : [searchRegionNumberInHeight] =
13774 : x_search_center;
13775 : yHmeLevel2SearchCenter[searchRegionNumberInWidth]
13776 0 : [searchRegionNumberInHeight] =
13777 : y_search_center;
13778 :
13779 0 : searchRegionNumberInWidth++;
13780 : }
13781 0 : searchRegionNumberInWidth = 0;
13782 0 : searchRegionNumberInHeight++;
13783 : }
13784 :
13785 : // HME: Level0 search
13786 :
13787 0 : if (enable_hme_level0_flag) {
13788 0 : if (oneQuadrantHME && !enable_hme_level1_flag &&
13789 : !enable_hme_level2_flag) {
13790 0 : searchRegionNumberInHeight = 0;
13791 0 : searchRegionNumberInWidth = 0;
13792 :
13793 0 : HmeOneQuadrantLevel0(
13794 : picture_control_set_ptr,
13795 : context_ptr,
13796 : origin_x >> 2,
13797 : origin_y >> 2,
13798 : sb_width >> 2,
13799 : sb_height >> 2,
13800 : x_search_center >> 2,
13801 : y_search_center >> 2,
13802 : sixteenthRefPicPtr,
13803 : &(hmeLevel0Sad[searchRegionNumberInWidth]
13804 : [searchRegionNumberInHeight]),
13805 : &(xHmeLevel0SearchCenter
13806 : [searchRegionNumberInWidth]
13807 : [searchRegionNumberInHeight]),
13808 : &(yHmeLevel0SearchCenter
13809 : [searchRegionNumberInWidth]
13810 : [searchRegionNumberInHeight]),
13811 : hme_level_0_search_area_multiplier_x
13812 : [picture_control_set_ptr
13813 0 : ->hierarchical_levels]
13814 0 : [picture_control_set_ptr
13815 0 : ->temporal_layer_index],
13816 : hme_level_0_search_area_multiplier_y
13817 : [picture_control_set_ptr
13818 0 : ->hierarchical_levels]
13819 0 : [picture_control_set_ptr
13820 0 : ->temporal_layer_index]);
13821 : } else {
13822 0 : searchRegionNumberInHeight = 0;
13823 0 : searchRegionNumberInWidth = 0;
13824 : {
13825 0 : while (
13826 : searchRegionNumberInHeight <
13827 : context_ptr
13828 0 : ->number_hme_search_region_in_height) {
13829 0 : while (
13830 : searchRegionNumberInWidth <
13831 : context_ptr
13832 0 : ->number_hme_search_region_in_width) {
13833 0 : HmeLevel0(
13834 : picture_control_set_ptr,
13835 : context_ptr,
13836 : origin_x >> 2,
13837 : origin_y >> 2,
13838 : sb_width >> 2,
13839 : sb_height >> 2,
13840 : x_search_center >> 2,
13841 : y_search_center >> 2,
13842 : sixteenthRefPicPtr,
13843 : searchRegionNumberInWidth,
13844 : searchRegionNumberInHeight,
13845 : &(hmeLevel0Sad
13846 : [searchRegionNumberInWidth]
13847 : [searchRegionNumberInHeight]),
13848 : &(xHmeLevel0SearchCenter
13849 : [searchRegionNumberInWidth]
13850 : [searchRegionNumberInHeight]),
13851 : &(yHmeLevel0SearchCenter
13852 : [searchRegionNumberInWidth]
13853 : [searchRegionNumberInHeight]),
13854 : hme_level_0_search_area_multiplier_x
13855 : [picture_control_set_ptr
13856 0 : ->hierarchical_levels]
13857 0 : [picture_control_set_ptr
13858 0 : ->temporal_layer_index],
13859 : hme_level_0_search_area_multiplier_y
13860 : [picture_control_set_ptr
13861 0 : ->hierarchical_levels]
13862 0 : [picture_control_set_ptr
13863 0 : ->temporal_layer_index]);
13864 :
13865 0 : searchRegionNumberInWidth++;
13866 : }
13867 0 : searchRegionNumberInWidth = 0;
13868 0 : searchRegionNumberInHeight++;
13869 : }
13870 : }
13871 : }
13872 : }
13873 :
13874 : // HME: Level1 search
13875 0 : if (enable_hme_level1_flag) {
13876 0 : searchRegionNumberInHeight = 0;
13877 0 : searchRegionNumberInWidth = 0;
13878 :
13879 : {
13880 0 : while (searchRegionNumberInHeight <
13881 : context_ptr
13882 0 : ->number_hme_search_region_in_height) {
13883 0 : while (
13884 : searchRegionNumberInWidth <
13885 : context_ptr
13886 0 : ->number_hme_search_region_in_width) {
13887 : // When HME level 0 has been disabled,
13888 : // increase the search area width and height
13889 : // for level 1 to (32x12) for Gold only
13890 :
13891 0 : hmeLevel1SearchAreaInWidth =
13892 : (int16_t)context_ptr
13893 : ->hme_level1_search_area_in_width_array
13894 0 : [searchRegionNumberInWidth];
13895 0 : hmeLevel1SearchAreaInHeight =
13896 : (int16_t)context_ptr
13897 : ->hme_level1_search_area_in_height_array
13898 0 : [searchRegionNumberInHeight];
13899 :
13900 0 : HmeLevel1(
13901 : context_ptr,
13902 : origin_x >> 1,
13903 : origin_y >> 1,
13904 : sb_width >> 1,
13905 : sb_height >> 1,
13906 : quarterRefPicPtr,
13907 : hmeLevel1SearchAreaInWidth,
13908 : hmeLevel1SearchAreaInHeight,
13909 : xHmeLevel0SearchCenter
13910 : [searchRegionNumberInWidth]
13911 0 : [searchRegionNumberInHeight] >>
13912 : 1,
13913 : yHmeLevel0SearchCenter
13914 : [searchRegionNumberInWidth]
13915 0 : [searchRegionNumberInHeight] >>
13916 : 1,
13917 : &(hmeLevel1Sad
13918 : [searchRegionNumberInWidth]
13919 : [searchRegionNumberInHeight]),
13920 : &(xHmeLevel1SearchCenter
13921 : [searchRegionNumberInWidth]
13922 : [searchRegionNumberInHeight]),
13923 : &(yHmeLevel1SearchCenter
13924 : [searchRegionNumberInWidth]
13925 : [searchRegionNumberInHeight]));
13926 :
13927 0 : searchRegionNumberInWidth++;
13928 : }
13929 0 : searchRegionNumberInWidth = 0;
13930 0 : searchRegionNumberInHeight++;
13931 : }
13932 : }
13933 : }
13934 :
13935 : // HME: Level2 search
13936 0 : if (enable_hme_level2_flag) {
13937 0 : searchRegionNumberInHeight = 0;
13938 0 : searchRegionNumberInWidth = 0;
13939 :
13940 : {
13941 0 : while (searchRegionNumberInHeight <
13942 : context_ptr
13943 0 : ->number_hme_search_region_in_height) {
13944 0 : while (
13945 : searchRegionNumberInWidth <
13946 : context_ptr
13947 0 : ->number_hme_search_region_in_width) {
13948 0 : HmeLevel2(
13949 : picture_control_set_ptr,
13950 : context_ptr,
13951 : origin_x,
13952 : origin_y,
13953 : sb_width,
13954 : sb_height,
13955 : refPicPtr,
13956 : searchRegionNumberInWidth,
13957 : searchRegionNumberInHeight,
13958 : xHmeLevel1SearchCenter
13959 : [searchRegionNumberInWidth]
13960 0 : [searchRegionNumberInHeight],
13961 : yHmeLevel1SearchCenter
13962 : [searchRegionNumberInWidth]
13963 0 : [searchRegionNumberInHeight],
13964 : &(hmeLevel2Sad
13965 : [searchRegionNumberInWidth]
13966 : [searchRegionNumberInHeight]),
13967 : &(xHmeLevel2SearchCenter
13968 : [searchRegionNumberInWidth]
13969 : [searchRegionNumberInHeight]),
13970 : &(yHmeLevel2SearchCenter
13971 : [searchRegionNumberInWidth]
13972 : [searchRegionNumberInHeight]));
13973 :
13974 0 : searchRegionNumberInWidth++;
13975 : }
13976 0 : searchRegionNumberInWidth = 0;
13977 0 : searchRegionNumberInHeight++;
13978 : }
13979 : }
13980 : }
13981 :
13982 : // Hierarchical ME - Search Center
13983 0 : if (enable_hme_level0_flag && !enable_hme_level1_flag &&
13984 : !enable_hme_level2_flag) {
13985 0 : if (oneQuadrantHME) {
13986 0 : xHmeSearchCenter = xHmeLevel0SearchCenter[0][0];
13987 0 : yHmeSearchCenter = yHmeLevel0SearchCenter[0][0];
13988 0 : hmeMvSad = hmeLevel0Sad[0][0];
13989 : } else {
13990 0 : xHmeSearchCenter = xHmeLevel0SearchCenter[0][0];
13991 0 : yHmeSearchCenter = yHmeLevel0SearchCenter[0][0];
13992 0 : hmeMvSad = hmeLevel0Sad[0][0];
13993 :
13994 0 : searchRegionNumberInWidth = 1;
13995 0 : searchRegionNumberInHeight = 0;
13996 :
13997 0 : while (searchRegionNumberInHeight <
13998 : context_ptr
13999 0 : ->number_hme_search_region_in_height) {
14000 0 : while (
14001 : searchRegionNumberInWidth <
14002 : context_ptr
14003 0 : ->number_hme_search_region_in_width) {
14004 0 : xHmeSearchCenter =
14005 : (hmeLevel0Sad
14006 : [searchRegionNumberInWidth]
14007 0 : [searchRegionNumberInHeight] <
14008 : hmeMvSad)
14009 : ? xHmeLevel0SearchCenter
14010 : [searchRegionNumberInWidth]
14011 : [searchRegionNumberInHeight]
14012 : : xHmeSearchCenter;
14013 0 : yHmeSearchCenter =
14014 : (hmeLevel0Sad
14015 : [searchRegionNumberInWidth]
14016 0 : [searchRegionNumberInHeight] <
14017 : hmeMvSad)
14018 : ? yHmeLevel0SearchCenter
14019 : [searchRegionNumberInWidth]
14020 : [searchRegionNumberInHeight]
14021 : : yHmeSearchCenter;
14022 0 : hmeMvSad =
14023 : (hmeLevel0Sad
14024 : [searchRegionNumberInWidth]
14025 0 : [searchRegionNumberInHeight] <
14026 : hmeMvSad)
14027 : ? hmeLevel0Sad
14028 : [searchRegionNumberInWidth]
14029 : [searchRegionNumberInHeight]
14030 : : hmeMvSad;
14031 0 : searchRegionNumberInWidth++;
14032 : }
14033 0 : searchRegionNumberInWidth = 0;
14034 0 : searchRegionNumberInHeight++;
14035 : }
14036 : }
14037 : }
14038 :
14039 0 : if (enable_hme_level1_flag && !enable_hme_level2_flag) {
14040 0 : xHmeSearchCenter = xHmeLevel1SearchCenter[0][0];
14041 0 : yHmeSearchCenter = yHmeLevel1SearchCenter[0][0];
14042 0 : hmeMvSad = hmeLevel1Sad[0][0];
14043 :
14044 0 : searchRegionNumberInWidth = 1;
14045 0 : searchRegionNumberInHeight = 0;
14046 :
14047 0 : while (
14048 : searchRegionNumberInHeight <
14049 0 : context_ptr->number_hme_search_region_in_height) {
14050 0 : while (searchRegionNumberInWidth <
14051 : context_ptr
14052 0 : ->number_hme_search_region_in_width) {
14053 0 : xHmeSearchCenter =
14054 : (hmeLevel1Sad[searchRegionNumberInWidth]
14055 0 : [searchRegionNumberInHeight] <
14056 : hmeMvSad)
14057 : ? xHmeLevel1SearchCenter
14058 : [searchRegionNumberInWidth]
14059 : [searchRegionNumberInHeight]
14060 : : xHmeSearchCenter;
14061 0 : yHmeSearchCenter =
14062 : (hmeLevel1Sad[searchRegionNumberInWidth]
14063 0 : [searchRegionNumberInHeight] <
14064 : hmeMvSad)
14065 : ? yHmeLevel1SearchCenter
14066 : [searchRegionNumberInWidth]
14067 : [searchRegionNumberInHeight]
14068 : : yHmeSearchCenter;
14069 0 : hmeMvSad =
14070 : (hmeLevel1Sad[searchRegionNumberInWidth]
14071 0 : [searchRegionNumberInHeight] <
14072 : hmeMvSad)
14073 : ? hmeLevel1Sad
14074 : [searchRegionNumberInWidth]
14075 : [searchRegionNumberInHeight]
14076 : : hmeMvSad;
14077 0 : searchRegionNumberInWidth++;
14078 : }
14079 0 : searchRegionNumberInWidth = 0;
14080 0 : searchRegionNumberInHeight++;
14081 : }
14082 : }
14083 :
14084 0 : if (enable_hme_level2_flag) {
14085 0 : xHmeSearchCenter = xHmeLevel2SearchCenter[0][0];
14086 0 : yHmeSearchCenter = yHmeLevel2SearchCenter[0][0];
14087 0 : hmeMvSad = hmeLevel2Sad[0][0];
14088 :
14089 0 : searchRegionNumberInWidth = 1;
14090 0 : searchRegionNumberInHeight = 0;
14091 :
14092 0 : while (
14093 : searchRegionNumberInHeight <
14094 0 : context_ptr->number_hme_search_region_in_height) {
14095 0 : while (searchRegionNumberInWidth <
14096 : context_ptr
14097 0 : ->number_hme_search_region_in_width) {
14098 0 : xHmeSearchCenter =
14099 : (hmeLevel2Sad[searchRegionNumberInWidth]
14100 0 : [searchRegionNumberInHeight] <
14101 : hmeMvSad)
14102 : ? xHmeLevel2SearchCenter
14103 : [searchRegionNumberInWidth]
14104 : [searchRegionNumberInHeight]
14105 : : xHmeSearchCenter;
14106 0 : yHmeSearchCenter =
14107 : (hmeLevel2Sad[searchRegionNumberInWidth]
14108 0 : [searchRegionNumberInHeight] <
14109 : hmeMvSad)
14110 : ? yHmeLevel2SearchCenter
14111 : [searchRegionNumberInWidth]
14112 : [searchRegionNumberInHeight]
14113 : : yHmeSearchCenter;
14114 0 : hmeMvSad =
14115 : (hmeLevel2Sad[searchRegionNumberInWidth]
14116 0 : [searchRegionNumberInHeight] <
14117 : hmeMvSad)
14118 : ? hmeLevel2Sad
14119 : [searchRegionNumberInWidth]
14120 : [searchRegionNumberInHeight]
14121 : : hmeMvSad;
14122 0 : searchRegionNumberInWidth++;
14123 : }
14124 0 : searchRegionNumberInWidth = 0;
14125 0 : searchRegionNumberInHeight++;
14126 : }
14127 :
14128 0 : numQuadInWidth =
14129 0 : context_ptr->number_hme_search_region_in_width;
14130 0 : totalMeQuad =
14131 0 : context_ptr->number_hme_search_region_in_height *
14132 0 : context_ptr->number_hme_search_region_in_width;
14133 :
14134 0 : if ((ref0Poc == ref1Poc) && (listIndex == 1) &&
14135 : (totalMeQuad > 1)) {
14136 0 : for (quadIndex = 0; quadIndex < totalMeQuad - 1;
14137 0 : ++quadIndex) {
14138 0 : for (nextQuadIndex = quadIndex + 1;
14139 : nextQuadIndex < totalMeQuad;
14140 0 : ++nextQuadIndex) {
14141 0 : if (hmeLevel2Sad[quadIndex / numQuadInWidth]
14142 0 : [quadIndex %
14143 : numQuadInWidth] >
14144 0 : hmeLevel2Sad[nextQuadIndex /
14145 : numQuadInWidth]
14146 0 : [nextQuadIndex %
14147 : numQuadInWidth]) {
14148 0 : tempXHmeSearchCenter =
14149 : xHmeLevel2SearchCenter
14150 0 : [quadIndex / numQuadInWidth]
14151 0 : [quadIndex % numQuadInWidth];
14152 0 : tempYHmeSearchCenter =
14153 : yHmeLevel2SearchCenter
14154 0 : [quadIndex / numQuadInWidth]
14155 0 : [quadIndex % numQuadInWidth];
14156 0 : tempXHmeSad =
14157 0 : hmeLevel2Sad[quadIndex /
14158 : numQuadInWidth]
14159 0 : [quadIndex %
14160 : numQuadInWidth];
14161 :
14162 : xHmeLevel2SearchCenter
14163 0 : [quadIndex / numQuadInWidth]
14164 0 : [quadIndex % numQuadInWidth] =
14165 : xHmeLevel2SearchCenter
14166 0 : [nextQuadIndex /
14167 : numQuadInWidth]
14168 0 : [nextQuadIndex %
14169 : numQuadInWidth];
14170 : yHmeLevel2SearchCenter
14171 0 : [quadIndex / numQuadInWidth]
14172 0 : [quadIndex % numQuadInWidth] =
14173 : yHmeLevel2SearchCenter
14174 0 : [nextQuadIndex /
14175 : numQuadInWidth]
14176 0 : [nextQuadIndex %
14177 : numQuadInWidth];
14178 : hmeLevel2Sad
14179 0 : [quadIndex / numQuadInWidth]
14180 0 : [quadIndex % numQuadInWidth] =
14181 0 : hmeLevel2Sad[nextQuadIndex /
14182 : numQuadInWidth]
14183 0 : [nextQuadIndex %
14184 : numQuadInWidth];
14185 :
14186 : xHmeLevel2SearchCenter
14187 0 : [nextQuadIndex / numQuadInWidth]
14188 0 : [nextQuadIndex % numQuadInWidth] =
14189 : tempXHmeSearchCenter;
14190 : yHmeLevel2SearchCenter
14191 0 : [nextQuadIndex / numQuadInWidth]
14192 0 : [nextQuadIndex % numQuadInWidth] =
14193 : tempYHmeSearchCenter;
14194 0 : hmeLevel2Sad[nextQuadIndex /
14195 : numQuadInWidth]
14196 0 : [nextQuadIndex %
14197 0 : numQuadInWidth] =
14198 : tempXHmeSad;
14199 : }
14200 : }
14201 : }
14202 :
14203 0 : xHmeSearchCenter = xHmeLevel2SearchCenter[0][1];
14204 0 : yHmeSearchCenter = yHmeLevel2SearchCenter[0][1];
14205 : }
14206 : }
14207 :
14208 0 : x_search_center = xHmeSearchCenter;
14209 0 : y_search_center = yHmeSearchCenter;
14210 : }
14211 : }
14212 :
14213 : else {
14214 0 : x_search_center = 0;
14215 0 : y_search_center = 0;
14216 : }
14217 : // Constrain x_ME to be a multiple of 8 (round up)
14218 0 : search_area_width = (context_ptr->search_area_width + 7) & ~0x07;
14219 0 : search_area_height = context_ptr->search_area_height;
14220 0 : if ((x_search_center != 0 || y_search_center != 0) &&
14221 0 : (picture_control_set_ptr->is_used_as_reference_flag ==
14222 : EB_TRUE)) {
14223 0 : CheckZeroZeroCenter(refPicPtr,
14224 : context_ptr,
14225 : sb_origin_x,
14226 : sb_origin_y,
14227 : sb_width,
14228 : sb_height,
14229 : &x_search_center,
14230 : &y_search_center);
14231 : }
14232 0 : x_search_area_origin = x_search_center - (search_area_width >> 1);
14233 0 : y_search_area_origin = y_search_center - (search_area_height >> 1);
14234 :
14235 0 : if(sequence_control_set_ptr->static_config.unrestricted_motion_vector == 0)
14236 : {
14237 0 : int tile_start_x = sequence_control_set_ptr->sb_params_array[sb_index].tile_start_x;
14238 0 : int tile_end_x = sequence_control_set_ptr->sb_params_array[sb_index].tile_end_x;
14239 :
14240 : // Correct the left edge of the Search Area if it is not on the
14241 : // reference Picture
14242 0 : x_search_area_origin =
14243 0 : ((origin_x + x_search_area_origin) < tile_start_x)
14244 0 : ? tile_start_x - origin_x
14245 : : x_search_area_origin;
14246 :
14247 0 : search_area_width =
14248 0 : ((origin_x + x_search_area_origin) < tile_start_x)
14249 0 : ? search_area_width - (tile_start_x - (origin_x + x_search_area_origin))
14250 : : search_area_width;
14251 :
14252 : // Correct the right edge of the Search Area if its not on the
14253 : // reference Picture
14254 0 : x_search_area_origin =
14255 0 : ((origin_x + x_search_area_origin) > tile_end_x - 1)
14256 0 : ? x_search_area_origin - ((origin_x + x_search_area_origin) - (tile_end_x - 1))
14257 : : x_search_area_origin;
14258 :
14259 0 : search_area_width =
14260 0 : ((origin_x + x_search_area_origin + search_area_width) > tile_end_x)
14261 0 : ? MAX(1, search_area_width - ((origin_x + x_search_area_origin + search_area_width) - tile_end_x))
14262 : : search_area_width;
14263 :
14264 : // Constrain x_ME to be a multiple of 8 (round down as cropping
14265 : // already performed)
14266 0 : search_area_width = (search_area_width < 8)
14267 : ? search_area_width
14268 : : search_area_width & ~0x07;
14269 : } else {
14270 : // Correct the left edge of the Search Area if it is not on the
14271 : // reference Picture
14272 0 : x_search_area_origin =
14273 0 : ((origin_x + x_search_area_origin) < -padWidth)
14274 0 : ? -padWidth - origin_x
14275 : : x_search_area_origin;
14276 :
14277 0 : search_area_width =
14278 0 : ((origin_x + x_search_area_origin) < -padWidth)
14279 0 : ? search_area_width -
14280 0 : (-padWidth - (origin_x + x_search_area_origin))
14281 : : search_area_width;
14282 :
14283 : // Correct the right edge of the Search Area if its not on the
14284 : // reference Picture
14285 0 : x_search_area_origin =
14286 0 : ((origin_x + x_search_area_origin) > picture_width - 1)
14287 0 : ? x_search_area_origin -
14288 0 : ((origin_x + x_search_area_origin) -
14289 : (picture_width - 1))
14290 : : x_search_area_origin;
14291 :
14292 0 : search_area_width =
14293 0 : ((origin_x + x_search_area_origin + search_area_width) >
14294 : picture_width)
14295 0 : ? MAX(1,
14296 : search_area_width -
14297 : ((origin_x + x_search_area_origin +
14298 : search_area_width) -
14299 : picture_width))
14300 : : search_area_width;
14301 :
14302 : // Constrain x_ME to be a multiple of 8 (round down as cropping
14303 : // already performed)
14304 0 : search_area_width = (search_area_width < 8)
14305 : ? search_area_width
14306 : : search_area_width & ~0x07;
14307 : }
14308 :
14309 0 : if(sequence_control_set_ptr->static_config.unrestricted_motion_vector == 0)
14310 : {
14311 0 : int tile_start_y = sequence_control_set_ptr->sb_params_array[sb_index].tile_start_y;
14312 0 : int tile_end_y = sequence_control_set_ptr->sb_params_array[sb_index].tile_end_y;
14313 :
14314 : // Correct the top edge of the Search Area if it is not on the
14315 : // reference Picture
14316 0 : y_search_area_origin =
14317 0 : ((origin_y + y_search_area_origin) < tile_start_y)
14318 0 : ? tile_start_y - origin_y
14319 : : y_search_area_origin;
14320 :
14321 0 : search_area_height =
14322 0 : ((origin_y + y_search_area_origin) < tile_start_y)
14323 0 : ? search_area_height - (tile_start_y - (origin_y + y_search_area_origin))
14324 : : search_area_height;
14325 :
14326 : // Correct the bottom edge of the Search Area if its not on the
14327 : // reference Picture
14328 0 : y_search_area_origin =
14329 0 : ((origin_y + y_search_area_origin) > tile_end_y - 1)
14330 0 : ? y_search_area_origin - ((origin_y + y_search_area_origin) - (tile_end_y - 1))
14331 : : y_search_area_origin;
14332 :
14333 0 : search_area_height =
14334 0 : (origin_y + y_search_area_origin + search_area_height > tile_end_y)
14335 0 : ? MAX(1, search_area_height - ((origin_y + y_search_area_origin + search_area_height) - tile_end_y))
14336 : : search_area_height;
14337 : } else {
14338 : // Correct the top edge of the Search Area if it is not on the
14339 : // reference Picture
14340 0 : y_search_area_origin =
14341 0 : ((origin_y + y_search_area_origin) < -padHeight)
14342 0 : ? -padHeight - origin_y
14343 : : y_search_area_origin;
14344 :
14345 0 : search_area_height =
14346 0 : ((origin_y + y_search_area_origin) < -padHeight)
14347 0 : ? search_area_height -
14348 0 : (-padHeight - (origin_y + y_search_area_origin))
14349 : : search_area_height;
14350 :
14351 : // Correct the bottom edge of the Search Area if its not on the
14352 : // reference Picture
14353 0 : y_search_area_origin =
14354 0 : ((origin_y + y_search_area_origin) > picture_height - 1)
14355 0 : ? y_search_area_origin -
14356 0 : ((origin_y + y_search_area_origin) -
14357 : (picture_height - 1))
14358 : : y_search_area_origin;
14359 :
14360 0 : search_area_height =
14361 0 : (origin_y + y_search_area_origin + search_area_height >
14362 : picture_height)
14363 0 : ? MAX(1,
14364 : search_area_height -
14365 : ((origin_y + y_search_area_origin +
14366 : search_area_height) -
14367 : picture_height))
14368 : : search_area_height;
14369 : }
14370 0 : context_ptr->x_search_area_origin[listIndex][ref_pic_index] =
14371 : x_search_area_origin;
14372 0 : context_ptr->y_search_area_origin[listIndex][ref_pic_index] =
14373 : y_search_area_origin;
14374 :
14375 0 : context_ptr->adj_search_area_width = search_area_width;
14376 0 : context_ptr->adj_search_area_height = search_area_height;
14377 :
14378 0 : xTopLeftSearchRegion =
14379 0 : (int16_t)(refPicPtr->origin_x + sb_origin_x) -
14380 0 : (ME_FILTER_TAP >> 1) + x_search_area_origin;
14381 0 : yTopLeftSearchRegion =
14382 0 : (int16_t)(refPicPtr->origin_y + sb_origin_y) -
14383 0 : (ME_FILTER_TAP >> 1) + y_search_area_origin;
14384 0 : searchRegionIndex = (xTopLeftSearchRegion) +
14385 0 : (yTopLeftSearchRegion)*refPicPtr->stride_y;
14386 0 : context_ptr->integer_buffer_ptr[listIndex][ref_pic_index] =
14387 0 : &(refPicPtr->buffer_y[searchRegionIndex]);
14388 0 : context_ptr->interpolated_full_stride[listIndex][ref_pic_index] =
14389 0 : refPicPtr->stride_y;
14390 :
14391 : // Move to the top left of the search region
14392 0 : xTopLeftSearchRegion =
14393 0 : (int16_t)(refPicPtr->origin_x + sb_origin_x) +
14394 : x_search_area_origin;
14395 0 : yTopLeftSearchRegion =
14396 0 : (int16_t)(refPicPtr->origin_y + sb_origin_y) +
14397 : y_search_area_origin;
14398 0 : searchRegionIndex = xTopLeftSearchRegion +
14399 0 : yTopLeftSearchRegion * refPicPtr->stride_y;
14400 :
14401 : {
14402 : {
14403 0 : if (picture_control_set_ptr->pic_depth_mode <=
14404 : PIC_ALL_C_DEPTH_MODE) {
14405 0 : initialize_buffer_32bits(
14406 : context_ptr
14407 0 : ->p_sb_best_sad[listIndex][ref_pic_index],
14408 : 52,
14409 : 1,
14410 : MAX_SAD_VALUE);
14411 :
14412 0 : context_ptr->p_best_sad64x64 = &(
14413 : context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
14414 0 : [ME_TIER_ZERO_PU_64x64]);
14415 0 : context_ptr->p_best_sad32x32 =
14416 0 : &(context_ptr
14417 : ->p_sb_best_sad[listIndex][ref_pic_index]
14418 0 : [ME_TIER_ZERO_PU_32x32_0]);
14419 0 : context_ptr->p_best_sad16x16 =
14420 0 : &(context_ptr
14421 : ->p_sb_best_sad[listIndex][ref_pic_index]
14422 0 : [ME_TIER_ZERO_PU_16x16_0]);
14423 0 : context_ptr->p_best_sad8x8 = &(
14424 : context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
14425 0 : [ME_TIER_ZERO_PU_8x8_0]);
14426 0 : context_ptr->p_best_sad64x32 =
14427 0 : &(context_ptr
14428 : ->p_sb_best_sad[listIndex][ref_pic_index]
14429 0 : [ME_TIER_ZERO_PU_64x32_0]);
14430 0 : context_ptr->p_best_sad32x16 =
14431 0 : &(context_ptr
14432 : ->p_sb_best_sad[listIndex][ref_pic_index]
14433 0 : [ME_TIER_ZERO_PU_32x16_0]);
14434 0 : context_ptr->p_best_sad16x8 = &(
14435 : context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
14436 0 : [ME_TIER_ZERO_PU_16x8_0]);
14437 0 : context_ptr->p_best_sad32x64 =
14438 0 : &(context_ptr
14439 : ->p_sb_best_sad[listIndex][ref_pic_index]
14440 0 : [ME_TIER_ZERO_PU_32x64_0]);
14441 0 : context_ptr->p_best_sad16x32 =
14442 0 : &(context_ptr
14443 : ->p_sb_best_sad[listIndex][ref_pic_index]
14444 0 : [ME_TIER_ZERO_PU_16x32_0]);
14445 0 : context_ptr->p_best_sad8x16 = &(
14446 : context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
14447 0 : [ME_TIER_ZERO_PU_8x16_0]);
14448 0 : context_ptr->p_best_sad32x8 = &(
14449 : context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
14450 0 : [ME_TIER_ZERO_PU_32x8_0]);
14451 0 : context_ptr->p_best_sad8x32 = &(
14452 : context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
14453 0 : [ME_TIER_ZERO_PU_8x32_0]);
14454 0 : context_ptr->p_best_sad64x16 =
14455 0 : &(context_ptr
14456 : ->p_sb_best_sad[listIndex][ref_pic_index]
14457 0 : [ME_TIER_ZERO_PU_64x16_0]);
14458 0 : context_ptr->p_best_sad16x64 =
14459 0 : &(context_ptr
14460 : ->p_sb_best_sad[listIndex][ref_pic_index]
14461 0 : [ME_TIER_ZERO_PU_16x64_0]);
14462 :
14463 0 : context_ptr->p_best_mv64x64 = &(
14464 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14465 0 : [ME_TIER_ZERO_PU_64x64]);
14466 0 : context_ptr->p_best_mv32x32 = &(
14467 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14468 0 : [ME_TIER_ZERO_PU_32x32_0]);
14469 0 : context_ptr->p_best_mv16x16 = &(
14470 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14471 0 : [ME_TIER_ZERO_PU_16x16_0]);
14472 0 : context_ptr->p_best_mv8x8 = &(
14473 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14474 0 : [ME_TIER_ZERO_PU_8x8_0]);
14475 0 : context_ptr->p_best_mv64x32 = &(
14476 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14477 0 : [ME_TIER_ZERO_PU_64x32_0]);
14478 0 : context_ptr->p_best_mv32x16 = &(
14479 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14480 0 : [ME_TIER_ZERO_PU_32x16_0]);
14481 0 : context_ptr->p_best_mv16x8 = &(
14482 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14483 0 : [ME_TIER_ZERO_PU_16x8_0]);
14484 0 : context_ptr->p_best_mv32x64 = &(
14485 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14486 0 : [ME_TIER_ZERO_PU_32x64_0]);
14487 0 : context_ptr->p_best_mv16x32 = &(
14488 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14489 0 : [ME_TIER_ZERO_PU_16x32_0]);
14490 0 : context_ptr->p_best_mv8x16 = &(
14491 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14492 0 : [ME_TIER_ZERO_PU_8x16_0]);
14493 0 : context_ptr->p_best_mv32x8 = &(
14494 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14495 0 : [ME_TIER_ZERO_PU_32x8_0]);
14496 0 : context_ptr->p_best_mv8x32 = &(
14497 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14498 0 : [ME_TIER_ZERO_PU_8x32_0]);
14499 0 : context_ptr->p_best_mv64x16 = &(
14500 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14501 0 : [ME_TIER_ZERO_PU_64x16_0]);
14502 0 : context_ptr->p_best_mv16x64 = &(
14503 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14504 0 : [ME_TIER_ZERO_PU_16x64_0]);
14505 :
14506 0 : context_ptr->p_best_ssd64x64 = &(
14507 : context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
14508 0 : [ME_TIER_ZERO_PU_64x64]);
14509 0 : context_ptr->p_best_ssd32x32 =
14510 0 : &(context_ptr
14511 : ->p_sb_best_ssd[listIndex][ref_pic_index]
14512 0 : [ME_TIER_ZERO_PU_32x32_0]);
14513 0 : context_ptr->p_best_ssd16x16 =
14514 0 : &(context_ptr
14515 : ->p_sb_best_ssd[listIndex][ref_pic_index]
14516 0 : [ME_TIER_ZERO_PU_16x16_0]);
14517 0 : context_ptr->p_best_ssd8x8 = &(
14518 : context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
14519 0 : [ME_TIER_ZERO_PU_8x8_0]);
14520 0 : context_ptr->p_best_ssd64x32 =
14521 0 : &(context_ptr
14522 : ->p_sb_best_ssd[listIndex][ref_pic_index]
14523 0 : [ME_TIER_ZERO_PU_64x32_0]);
14524 0 : context_ptr->p_best_ssd32x16 =
14525 0 : &(context_ptr
14526 : ->p_sb_best_ssd[listIndex][ref_pic_index]
14527 0 : [ME_TIER_ZERO_PU_32x16_0]);
14528 0 : context_ptr->p_best_ssd16x8 = &(
14529 : context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
14530 0 : [ME_TIER_ZERO_PU_16x8_0]);
14531 0 : context_ptr->p_best_ssd32x64 =
14532 0 : &(context_ptr
14533 : ->p_sb_best_ssd[listIndex][ref_pic_index]
14534 0 : [ME_TIER_ZERO_PU_32x64_0]);
14535 0 : context_ptr->p_best_ssd16x32 =
14536 0 : &(context_ptr
14537 : ->p_sb_best_ssd[listIndex][ref_pic_index]
14538 0 : [ME_TIER_ZERO_PU_16x32_0]);
14539 0 : context_ptr->p_best_ssd8x16 = &(
14540 : context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
14541 0 : [ME_TIER_ZERO_PU_8x16_0]);
14542 0 : context_ptr->p_best_ssd32x8 = &(
14543 : context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
14544 0 : [ME_TIER_ZERO_PU_32x8_0]);
14545 0 : context_ptr->p_best_ssd8x32 = &(
14546 : context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
14547 0 : [ME_TIER_ZERO_PU_8x32_0]);
14548 0 : context_ptr->p_best_ssd64x16 =
14549 0 : &(context_ptr
14550 : ->p_sb_best_ssd[listIndex][ref_pic_index]
14551 0 : [ME_TIER_ZERO_PU_64x16_0]);
14552 0 : context_ptr->p_best_ssd16x64 =
14553 0 : &(context_ptr
14554 : ->p_sb_best_ssd[listIndex][ref_pic_index]
14555 0 : [ME_TIER_ZERO_PU_16x64_0]);
14556 :
14557 0 : open_loop_me_fullpel_search_sblock(context_ptr,
14558 : listIndex,
14559 : ref_pic_index,
14560 : x_search_area_origin,
14561 : y_search_area_origin,
14562 : search_area_width,
14563 : search_area_height);
14564 0 : context_ptr->full_quarter_pel_refinement = 0;
14565 :
14566 0 : if (context_ptr->half_pel_mode ==
14567 : EX_HP_MODE) {
14568 : // Move to the top left of the search region
14569 0 : xTopLeftSearchRegion =
14570 0 : (int16_t)(refPicPtr->origin_x + sb_origin_x) +
14571 : x_search_area_origin;
14572 0 : yTopLeftSearchRegion =
14573 0 : (int16_t)(refPicPtr->origin_y + sb_origin_y) +
14574 : y_search_area_origin;
14575 0 : searchRegionIndex =
14576 0 : xTopLeftSearchRegion +
14577 0 : yTopLeftSearchRegion * refPicPtr->stride_y;
14578 : // Interpolate the search region for Half-Pel
14579 : // Refinements H - AVC Style
14580 0 : InterpolateSearchRegionAVC(
14581 : context_ptr,
14582 : listIndex,
14583 : ref_pic_index,
14584 : context_ptr->integer_buffer_ptr[listIndex]
14585 0 : [ref_pic_index] +
14586 0 : (ME_FILTER_TAP >> 1) +
14587 0 : ((ME_FILTER_TAP >> 1) *
14588 : context_ptr->interpolated_full_stride
14589 0 : [listIndex][ref_pic_index]),
14590 : context_ptr
14591 : ->interpolated_full_stride[listIndex]
14592 : [ref_pic_index],
14593 0 : (uint32_t)search_area_width +
14594 : (BLOCK_SIZE_64 - 1),
14595 0 : (uint32_t)search_area_height +
14596 : (BLOCK_SIZE_64 - 1),
14597 : 8);
14598 :
14599 0 : initialize_buffer_32bits(
14600 : context_ptr
14601 0 : ->p_sb_best_ssd[listIndex][ref_pic_index],
14602 : 52,
14603 : 1,
14604 : MAX_SSE_VALUE);
14605 0 : memcpy(context_ptr
14606 : ->p_sb_best_full_pel_mv[listIndex]
14607 0 : [ref_pic_index],
14608 : context_ptr
14609 0 : ->p_sb_best_mv[listIndex][ref_pic_index],
14610 : MAX_ME_PU_COUNT * sizeof(uint32_t));
14611 0 : context_ptr->full_quarter_pel_refinement = 1;
14612 0 : context_ptr->p_best_full_pel_mv64x64 =
14613 0 : &(context_ptr->p_sb_best_full_pel_mv
14614 : [listIndex][ref_pic_index]
14615 0 : [ME_TIER_ZERO_PU_64x64]);
14616 0 : context_ptr->p_best_full_pel_mv32x32 =
14617 0 : &(context_ptr->p_sb_best_full_pel_mv
14618 : [listIndex][ref_pic_index]
14619 0 : [ME_TIER_ZERO_PU_32x32_0]);
14620 0 : context_ptr->p_best_full_pel_mv16x16 =
14621 0 : &(context_ptr->p_sb_best_full_pel_mv
14622 : [listIndex][ref_pic_index]
14623 0 : [ME_TIER_ZERO_PU_16x16_0]);
14624 0 : context_ptr->p_best_full_pel_mv8x8 =
14625 0 : &(context_ptr->p_sb_best_full_pel_mv
14626 : [listIndex][ref_pic_index]
14627 0 : [ME_TIER_ZERO_PU_8x8_0]);
14628 0 : context_ptr->p_best_full_pel_mv64x32 =
14629 0 : &(context_ptr->p_sb_best_full_pel_mv
14630 : [listIndex][ref_pic_index]
14631 0 : [ME_TIER_ZERO_PU_64x32_0]);
14632 0 : context_ptr->p_best_full_pel_mv32x16 =
14633 0 : &(context_ptr->p_sb_best_full_pel_mv
14634 : [listIndex][ref_pic_index]
14635 0 : [ME_TIER_ZERO_PU_32x16_0]);
14636 0 : context_ptr->p_best_full_pel_mv16x8 =
14637 0 : &(context_ptr->p_sb_best_full_pel_mv
14638 : [listIndex][ref_pic_index]
14639 0 : [ME_TIER_ZERO_PU_16x8_0]);
14640 0 : context_ptr->p_best_full_pel_mv32x64 =
14641 0 : &(context_ptr->p_sb_best_full_pel_mv
14642 : [listIndex][ref_pic_index]
14643 0 : [ME_TIER_ZERO_PU_32x64_0]);
14644 0 : context_ptr->p_best_full_pel_mv16x32 =
14645 0 : &(context_ptr->p_sb_best_full_pel_mv
14646 : [listIndex][ref_pic_index]
14647 0 : [ME_TIER_ZERO_PU_16x32_0]);
14648 0 : context_ptr->p_best_full_pel_mv8x16 =
14649 0 : &(context_ptr->p_sb_best_full_pel_mv
14650 : [listIndex][ref_pic_index]
14651 0 : [ME_TIER_ZERO_PU_8x16_0]);
14652 0 : context_ptr->p_best_full_pel_mv32x8 =
14653 0 : &(context_ptr->p_sb_best_full_pel_mv
14654 : [listIndex][ref_pic_index]
14655 0 : [ME_TIER_ZERO_PU_32x8_0]);
14656 0 : context_ptr->p_best_full_pel_mv8x32 =
14657 0 : &(context_ptr->p_sb_best_full_pel_mv
14658 : [listIndex][ref_pic_index]
14659 0 : [ME_TIER_ZERO_PU_8x32_0]);
14660 0 : context_ptr->p_best_full_pel_mv64x16 =
14661 0 : &(context_ptr->p_sb_best_full_pel_mv
14662 : [listIndex][ref_pic_index]
14663 0 : [ME_TIER_ZERO_PU_64x16_0]);
14664 0 : context_ptr->p_best_full_pel_mv16x64 =
14665 0 : &(context_ptr->p_sb_best_full_pel_mv
14666 : [listIndex][ref_pic_index]
14667 0 : [ME_TIER_ZERO_PU_16x64_0]);
14668 : // half-Pel search
14669 0 : open_loop_me_half_pel_search_sblock(
14670 : picture_control_set_ptr,
14671 : context_ptr,
14672 : listIndex,
14673 : ref_pic_index,
14674 : x_search_area_origin,
14675 : y_search_area_origin,
14676 : search_area_width,
14677 : search_area_height);
14678 : }
14679 :
14680 0 : if (context_ptr->quarter_pel_mode ==
14681 : EX_QP_MODE) {
14682 : // Quarter-Pel search
14683 0 : memcpy(context_ptr
14684 : ->p_sb_best_full_pel_mv[listIndex]
14685 0 : [ref_pic_index],
14686 : context_ptr
14687 0 : ->p_sb_best_mv[listIndex][ref_pic_index],
14688 : MAX_ME_PU_COUNT * sizeof(uint32_t));
14689 0 : open_loop_me_quarter_pel_search_sblock(
14690 : context_ptr,
14691 : listIndex,
14692 : ref_pic_index,
14693 : x_search_area_origin,
14694 : y_search_area_origin,
14695 : search_area_width,
14696 : search_area_height);
14697 : }
14698 : } else {
14699 0 : initialize_buffer_32bits(
14700 : context_ptr
14701 0 : ->p_sb_best_sad[listIndex][ref_pic_index],
14702 : 21,
14703 : 1,
14704 : MAX_SAD_VALUE);
14705 :
14706 0 : context_ptr->full_quarter_pel_refinement = 0;
14707 :
14708 0 : context_ptr->p_best_sad64x64 = &(
14709 : context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
14710 0 : [ME_TIER_ZERO_PU_64x64]);
14711 0 : context_ptr->p_best_sad32x32 =
14712 0 : &(context_ptr
14713 : ->p_sb_best_sad[listIndex][ref_pic_index]
14714 0 : [ME_TIER_ZERO_PU_32x32_0]);
14715 0 : context_ptr->p_best_sad16x16 =
14716 0 : &(context_ptr
14717 : ->p_sb_best_sad[listIndex][ref_pic_index]
14718 0 : [ME_TIER_ZERO_PU_16x16_0]);
14719 0 : context_ptr->p_best_sad8x8 = &(
14720 : context_ptr->p_sb_best_sad[listIndex][ref_pic_index]
14721 0 : [ME_TIER_ZERO_PU_8x8_0]);
14722 :
14723 0 : context_ptr->p_best_mv64x64 = &(
14724 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14725 0 : [ME_TIER_ZERO_PU_64x64]);
14726 0 : context_ptr->p_best_mv32x32 = &(
14727 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14728 0 : [ME_TIER_ZERO_PU_32x32_0]);
14729 0 : context_ptr->p_best_mv16x16 = &(
14730 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14731 0 : [ME_TIER_ZERO_PU_16x16_0]);
14732 0 : context_ptr->p_best_mv8x8 = &(
14733 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index]
14734 0 : [ME_TIER_ZERO_PU_8x8_0]);
14735 :
14736 0 : context_ptr->p_best_ssd64x64 = &(
14737 : context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
14738 0 : [ME_TIER_ZERO_PU_64x64]);
14739 0 : context_ptr->p_best_ssd32x32 =
14740 0 : &(context_ptr
14741 : ->p_sb_best_ssd[listIndex][ref_pic_index]
14742 0 : [ME_TIER_ZERO_PU_32x32_0]);
14743 0 : context_ptr->p_best_ssd16x16 =
14744 0 : &(context_ptr
14745 : ->p_sb_best_ssd[listIndex][ref_pic_index]
14746 0 : [ME_TIER_ZERO_PU_16x16_0]);
14747 0 : context_ptr->p_best_ssd8x8 = &(
14748 : context_ptr->p_sb_best_ssd[listIndex][ref_pic_index]
14749 0 : [ME_TIER_ZERO_PU_8x8_0]);
14750 0 : FullPelSearch_LCU(context_ptr,
14751 : listIndex,
14752 : ref_pic_index,
14753 : x_search_area_origin,
14754 : y_search_area_origin,
14755 : search_area_width,
14756 : search_area_height);
14757 : }
14758 : }
14759 :
14760 0 : if (context_ptr->fractional_search_model == 0) {
14761 0 : enableHalfPel32x32 = EB_TRUE;
14762 0 : enableHalfPel16x16 = EB_TRUE;
14763 0 : enableHalfPel8x8 = EB_TRUE;
14764 0 : enableQuarterPel = EB_TRUE;
14765 0 : } else if (context_ptr->fractional_search_model == 1) {
14766 0 : suPelEnable(context_ptr,
14767 : picture_control_set_ptr,
14768 : listIndex,
14769 : 0,
14770 : &enableHalfPel32x32,
14771 : &enableHalfPel16x16,
14772 : &enableHalfPel8x8);
14773 0 : enableQuarterPel = EB_TRUE;
14774 : } else {
14775 0 : enableHalfPel32x32 = EB_FALSE;
14776 0 : enableHalfPel16x16 = EB_FALSE;
14777 0 : enableHalfPel8x8 = EB_FALSE;
14778 0 : enableQuarterPel = EB_FALSE;
14779 : }
14780 0 : if (enableHalfPel32x32 || enableHalfPel16x16 ||
14781 0 : enableHalfPel8x8 || enableQuarterPel) {
14782 : // if((picture_control_set_ptr->is_used_as_reference_flag ==
14783 : // EB_TRUE)) {
14784 : // Move to the top left of the search region
14785 0 : xTopLeftSearchRegion =
14786 0 : (int16_t)(refPicPtr->origin_x + sb_origin_x) +
14787 : x_search_area_origin;
14788 0 : yTopLeftSearchRegion =
14789 0 : (int16_t)(refPicPtr->origin_y + sb_origin_y) +
14790 : y_search_area_origin;
14791 0 : searchRegionIndex =
14792 0 : xTopLeftSearchRegion +
14793 0 : yTopLeftSearchRegion * refPicPtr->stride_y;
14794 :
14795 : // Interpolate the search region for Half-Pel Refinements
14796 : // H - AVC Style
14797 :
14798 0 : if (context_ptr->half_pel_mode ==
14799 : REFINMENT_HP_MODE) {
14800 0 : InterpolateSearchRegionAVC(
14801 : context_ptr,
14802 : listIndex,
14803 : ref_pic_index,
14804 : context_ptr->integer_buffer_ptr[listIndex]
14805 0 : [ref_pic_index] +
14806 0 : (ME_FILTER_TAP >> 1) +
14807 0 : ((ME_FILTER_TAP >> 1) *
14808 : context_ptr
14809 : ->interpolated_full_stride[listIndex]
14810 0 : [ref_pic_index]),
14811 : context_ptr
14812 : ->interpolated_full_stride[listIndex]
14813 : [ref_pic_index],
14814 0 : (uint32_t)search_area_width + (BLOCK_SIZE_64 - 1),
14815 0 : (uint32_t)search_area_height + (BLOCK_SIZE_64 - 1),
14816 : 8);
14817 :
14818 : // Half-Pel Refinement [8 search positions]
14819 0 : HalfPelSearch_LCU(
14820 : sequence_control_set_ptr,
14821 : picture_control_set_ptr,
14822 : context_ptr,
14823 : #if M0_HIGH_PRECISION_INTERPOLATION
14824 : context_ptr->integer_buffer_ptr[listIndex]
14825 : [ref_pic_index] +
14826 : (ME_FILTER_PAD_DISTANCE >> 1) +
14827 : ((ME_FILTER_PAD_DISTANCE >> 1) *
14828 : context_ptr
14829 : ->interpolated_full_stride[listIndex]
14830 : [ref_pic_index]),
14831 : context_ptr
14832 : ->interpolated_full_stride[listIndex]
14833 : [ref_pic_index],
14834 : &(context_ptr->pos_b_buffer
14835 : [listIndex][ref_pic_index]
14836 : [(ME_FILTER_PAD_DISTANCE >> 1) *
14837 : context_ptr->interpolated_stride]),
14838 : #else
14839 : context_ptr->integer_buffer_ptr[listIndex]
14840 0 : [ref_pic_index] +
14841 0 : (ME_FILTER_TAP >> 1) +
14842 0 : ((ME_FILTER_TAP >> 1) *
14843 : context_ptr
14844 : ->interpolated_full_stride[listIndex]
14845 0 : [ref_pic_index]),
14846 : context_ptr
14847 : ->interpolated_full_stride[listIndex]
14848 : [ref_pic_index],
14849 : &(context_ptr->pos_b_buffer
14850 0 : [listIndex][ref_pic_index]
14851 0 : [(ME_FILTER_TAP >> 1) *
14852 0 : context_ptr->interpolated_stride]),
14853 : #endif
14854 : &(context_ptr
14855 0 : ->pos_h_buffer[listIndex][ref_pic_index][1]),
14856 : &(context_ptr
14857 : ->pos_j_buffer[listIndex][ref_pic_index][0]),
14858 : x_search_area_origin,
14859 : y_search_area_origin,
14860 0 : picture_control_set_ptr->cu8x8_mode ==
14861 : CU_8x8_MODE_1,
14862 : enableHalfPel32x32,
14863 : enableHalfPel16x16,
14864 : enableHalfPel8x8);
14865 : }
14866 :
14867 0 : if (context_ptr->quarter_pel_mode ==
14868 : REFINMENT_QP_MODE) {
14869 : // Quarter-Pel Refinement [8 search positions]
14870 0 : QuarterPelSearch_LCU(
14871 : context_ptr,
14872 : #if M0_HIGH_PRECISION_INTERPOLATION
14873 : context_ptr->integer_buffer_ptr[listIndex]
14874 : [ref_pic_index] +
14875 : (ME_FILTER_PAD_DISTANCE >> 1) +
14876 : ((ME_FILTER_PAD_DISTANCE >> 1) *
14877 : context_ptr
14878 : ->interpolated_full_stride[listIndex]
14879 : [ref_pic_index]),
14880 : context_ptr
14881 : ->interpolated_full_stride[listIndex]
14882 : [ref_pic_index],
14883 : &(context_ptr->pos_b_buffer
14884 : [listIndex][ref_pic_index]
14885 : [(ME_FILTER_PAD_DISTANCE >> 1) *
14886 : context_ptr
14887 : ->interpolated_stride]), // points to b
14888 : // position of
14889 : // the figure
14890 : // above
14891 : #else
14892 : context_ptr->integer_buffer_ptr[listIndex]
14893 0 : [ref_pic_index] +
14894 0 : (ME_FILTER_TAP >> 1) +
14895 0 : ((ME_FILTER_TAP >> 1) *
14896 : context_ptr
14897 : ->interpolated_full_stride[listIndex]
14898 0 : [ref_pic_index]),
14899 : context_ptr
14900 : ->interpolated_full_stride[listIndex]
14901 : [ref_pic_index],
14902 : &(context_ptr->pos_b_buffer
14903 0 : [listIndex][ref_pic_index]
14904 0 : [(ME_FILTER_TAP >> 1) *
14905 : context_ptr
14906 0 : ->interpolated_stride]), // points to b
14907 : // position of
14908 : // the figure
14909 : // above
14910 : #endif
14911 : &(context_ptr
14912 0 : ->pos_h_buffer[listIndex][ref_pic_index]
14913 : [1]), // points to h position
14914 : // of the figure above
14915 : &(context_ptr
14916 : ->pos_j_buffer[listIndex][ref_pic_index]
14917 : [0]), // points to j position
14918 : // of the figure above
14919 : x_search_area_origin,
14920 : y_search_area_origin,
14921 0 : picture_control_set_ptr->cu8x8_mode ==
14922 : CU_8x8_MODE_1,
14923 : enableHalfPel32x32,
14924 : enableHalfPel16x16,
14925 : enableHalfPel8x8,
14926 : enableQuarterPel,
14927 0 : picture_control_set_ptr->pic_depth_mode <=
14928 : PIC_ALL_C_DEPTH_MODE);
14929 : }
14930 : }
14931 0 : if (is_nsq_table_used && ref_pic_index == 0) {
14932 0 : context_ptr->p_best_nsq64x64 =
14933 0 : &(context_ptr->p_sb_best_nsq[listIndex][0]
14934 : [ME_TIER_ZERO_PU_64x64]);
14935 0 : context_ptr->p_best_nsq32x32 =
14936 0 : &(context_ptr->p_sb_best_nsq[listIndex][0]
14937 : [ME_TIER_ZERO_PU_32x32_0]);
14938 0 : context_ptr->p_best_nsq16x16 =
14939 0 : &(context_ptr->p_sb_best_nsq[listIndex][0]
14940 : [ME_TIER_ZERO_PU_16x16_0]);
14941 0 : context_ptr->p_best_nsq8x8 =
14942 0 : &(context_ptr->p_sb_best_nsq[listIndex][0]
14943 : [ME_TIER_ZERO_PU_8x8_0]);
14944 0 : nsq_get_analysis_results_block(context_ptr);
14945 : }
14946 : }
14947 : }
14948 : }
14949 :
14950 0 : if (context_ptr->me_alt_ref == EB_FALSE) {
14951 :
14952 : // Bi-Prediction motion estimation loop
14953 0 : for (pu_index = 0; pu_index < max_number_of_pus_per_sb; ++pu_index) {
14954 0 : candidateIndex = 0;
14955 :
14956 : uint32_t nIdx;
14957 :
14958 0 : if (pu_index > 200)
14959 0 : nIdx = pu_index;
14960 0 : else if (pu_index > 184)
14961 0 : nIdx = tab8x32[pu_index - 185] + 185;
14962 0 : else if (pu_index > 168)
14963 0 : nIdx = tab32x8[pu_index - 169] + 169;
14964 0 : else if (pu_index > 136)
14965 0 : nIdx = tab8x16[pu_index - 137] + 137;
14966 0 : else if (pu_index > 128)
14967 0 : nIdx = tab16x32[pu_index - 129] + 129;
14968 0 : else if (pu_index > 126)
14969 0 : nIdx = pu_index;
14970 0 : else if (pu_index > 94)
14971 0 : nIdx = tab16x8[pu_index - 95] + 95;
14972 0 : else if (pu_index > 86)
14973 0 : nIdx = tab32x16[pu_index - 87] + 87;
14974 0 : else if (pu_index > 84)
14975 0 : nIdx = pu_index;
14976 0 : else if (pu_index > 20)
14977 0 : nIdx = tab8x8[pu_index - 21] + 21;
14978 0 : else if (pu_index > 4)
14979 0 : nIdx = tab16x16[pu_index - 5] + 5;
14980 : else
14981 0 : nIdx = pu_index;
14982 0 : for (listIndex = REF_LIST_0; listIndex <= numOfListToSearch;
14983 0 : ++listIndex) {
14984 0 : num_of_ref_pic_to_search =
14985 0 : (picture_control_set_ptr->slice_type == P_SLICE)
14986 : ? picture_control_set_ptr->ref_list0_count
14987 : : (listIndex == REF_LIST_0)
14988 : ? picture_control_set_ptr->ref_list0_count
14989 : : picture_control_set_ptr->ref_list1_count;
14990 :
14991 : // Ref Picture Loop
14992 0 : for (ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search;
14993 0 : ++ref_pic_index) {
14994 0 : me_candidate =
14995 0 : &(context_ptr->me_candidate[candidateIndex].pu[pu_index]);
14996 0 : me_candidate->prediction_direction = listIndex;
14997 0 : me_candidate->ref_index[listIndex] = ref_pic_index;
14998 0 : me_candidate->ref0_list = me_candidate->prediction_direction == 0 ? listIndex : 24;
14999 0 : me_candidate->ref1_list = me_candidate->prediction_direction == 1 ? listIndex : 24;
15000 0 : me_candidate->distortion =
15001 0 : context_ptr->p_sb_best_sad[listIndex][ref_pic_index][nIdx];
15002 0 : candidateIndex++;
15003 : }
15004 : }
15005 :
15006 0 : total_me_candidate_index = candidateIndex;
15007 : uint8_t ref_type_table[7];
15008 0 : if (picture_control_set_ptr->prune_unipred_at_me) {
15009 : // Sorting of the ME candidates
15010 0 : for (candidate_index = 0;
15011 0 : candidate_index < total_me_candidate_index - 1;
15012 0 : ++candidate_index) {
15013 0 : for (next_candidate_index = candidate_index + 1;
15014 0 : next_candidate_index < total_me_candidate_index;
15015 0 : ++next_candidate_index) {
15016 0 : if (context_ptr->me_candidate[candidate_index]
15017 : .pu[pu_index]
15018 0 : .distortion >
15019 0 : context_ptr->me_candidate[next_candidate_index]
15020 : .pu[pu_index]
15021 0 : .distortion) {
15022 0 : SwapMeCandidate(
15023 0 : &(context_ptr->me_candidate[candidate_index]
15024 : .pu[pu_index]),
15025 0 : &(context_ptr->me_candidate[next_candidate_index]
15026 : .pu[pu_index]));
15027 : }
15028 : }
15029 : }
15030 0 : for (candidate_index = 0;
15031 0 : candidate_index < total_me_candidate_index;
15032 0 : ++candidate_index) {
15033 :
15034 0 : me_candidate =
15035 0 : &(context_ptr->me_candidate[candidate_index].pu[pu_index]);
15036 :
15037 0 : if (me_candidate->prediction_direction == 0)
15038 0 : ref_type_table[candidate_index] = svt_get_ref_frame_type(me_candidate->ref0_list, me_candidate->ref_index[0]);
15039 : else
15040 0 : ref_type_table[candidate_index] = svt_get_ref_frame_type(me_candidate->ref1_list, me_candidate->ref_index[1]);
15041 :
15042 : }
15043 : }
15044 0 : if (numOfListToSearch) {
15045 0 : if (picture_control_set_ptr->cu8x8_mode == CU_8x8_MODE_0 ||
15046 0 : pu_index < 21 ||
15047 0 : (picture_control_set_ptr->pic_depth_mode <=
15048 : PIC_ALL_C_DEPTH_MODE)) {
15049 0 : BiPredictionSearch(
15050 : sequence_control_set_ptr,
15051 : context_ptr,
15052 : pu_index,
15053 : candidateIndex,
15054 0 : picture_control_set_ptr->ref_list0_count,
15055 0 : picture_control_set_ptr->ref_list1_count,
15056 : &total_me_candidate_index,
15057 : ref_type_table,
15058 : picture_control_set_ptr);
15059 : }
15060 : }
15061 :
15062 : // Sorting of the ME candidates
15063 0 : for (candidate_index = 0;
15064 0 : candidate_index < total_me_candidate_index - 1;
15065 0 : ++candidate_index) {
15066 0 : for (next_candidate_index = candidate_index + 1;
15067 0 : next_candidate_index < total_me_candidate_index;
15068 0 : ++next_candidate_index) {
15069 0 : if (context_ptr->me_candidate[candidate_index]
15070 : .pu[pu_index]
15071 0 : .distortion >
15072 0 : context_ptr->me_candidate[next_candidate_index]
15073 : .pu[pu_index]
15074 0 : .distortion) {
15075 0 : SwapMeCandidate(
15076 0 : &(context_ptr->me_candidate[candidate_index]
15077 : .pu[pu_index]),
15078 0 : &(context_ptr->me_candidate[next_candidate_index]
15079 : .pu[pu_index]));
15080 : }
15081 : }
15082 : }
15083 :
15084 0 : MeLcuResults *mePuResult =
15085 0 : picture_control_set_ptr->me_results[sb_index];
15086 0 : mePuResult->total_me_candidate_index[pu_index] =
15087 : total_me_candidate_index;
15088 :
15089 0 : uint8_t l0_nsq =
15090 : is_nsq_table_used ? context_ptr->p_sb_best_nsq[0][0][nIdx] : 0;
15091 0 : uint8_t l1_nsq =
15092 : is_nsq_table_used ? context_ptr->p_sb_best_nsq[1][0][nIdx] : 0;
15093 0 : mePuResult->me_nsq_0[pu_index] = l0_nsq;
15094 0 : mePuResult->me_nsq_1[pu_index] = l1_nsq;
15095 :
15096 0 : mePuResult->total_me_candidate_index[pu_index] =
15097 0 : MIN(total_me_candidate_index, ME_RES_CAND_MRP_MODE_0);
15098 : // Assining the ME candidates to the me Results buffer
15099 0 : for (candidateIndex = 0; candidateIndex < total_me_candidate_index;
15100 0 : ++candidateIndex) {
15101 0 : me_candidate =
15102 0 : &(context_ptr->me_candidate[candidateIndex].pu[pu_index]);
15103 0 : picture_control_set_ptr->me_results[sb_index]
15104 0 : ->me_candidate[pu_index][candidateIndex]
15105 0 : .distortion = me_candidate->distortion;
15106 0 : picture_control_set_ptr->me_results[sb_index]
15107 0 : ->me_candidate[pu_index][candidateIndex]
15108 0 : .direction = me_candidate->prediction_direction;
15109 0 : picture_control_set_ptr->me_results[sb_index]
15110 0 : ->me_candidate[pu_index][candidateIndex]
15111 0 : .ref_idx_l0 = me_candidate->ref_index[0];
15112 0 : picture_control_set_ptr->me_results[sb_index]
15113 0 : ->me_candidate[pu_index][candidateIndex]
15114 0 : .ref_idx_l1 = me_candidate->ref_index[1];
15115 0 : picture_control_set_ptr->me_results[sb_index]
15116 0 : ->me_candidate[pu_index][candidateIndex]
15117 0 : .ref0_list = me_candidate->ref0_list;
15118 0 : picture_control_set_ptr->me_results[sb_index]
15119 0 : ->me_candidate[pu_index][candidateIndex]
15120 0 : .ref1_list = me_candidate->ref1_list;
15121 : }
15122 :
15123 0 : for (listIndex = REF_LIST_0; listIndex <= numOfListToSearch;
15124 0 : ++listIndex) {
15125 0 : num_of_ref_pic_to_search =
15126 0 : (picture_control_set_ptr->slice_type == P_SLICE)
15127 : ? picture_control_set_ptr->ref_list0_count
15128 : : (listIndex == REF_LIST_0)
15129 : ? picture_control_set_ptr->ref_list0_count
15130 : : picture_control_set_ptr->ref_list1_count;
15131 :
15132 : // Ref Picture Loop
15133 0 : for (ref_pic_index = 0; ref_pic_index < num_of_ref_pic_to_search;
15134 0 : ++ref_pic_index) {
15135 0 : picture_control_set_ptr->me_results[sb_index]
15136 0 : ->me_mv_array[pu_index]
15137 0 : [((listIndex &&
15138 0 : sequence_control_set_ptr->mrp_mode == 0)
15139 : ? 4
15140 0 : : listIndex ? 2 : 0) +
15141 : ref_pic_index]
15142 0 : .x_mv = _MVXT(
15143 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index][nIdx]);
15144 0 : picture_control_set_ptr->me_results[sb_index]
15145 0 : ->me_mv_array[pu_index]
15146 0 : [((listIndex &&
15147 0 : sequence_control_set_ptr->mrp_mode == 0)
15148 : ? 4
15149 0 : : listIndex ? 2 : 0) +
15150 : ref_pic_index]
15151 0 : .y_mv = _MVYT(
15152 : context_ptr->p_sb_best_mv[listIndex][ref_pic_index][nIdx]);
15153 : }
15154 : }
15155 : }
15156 : {
15157 : // Compute the sum of the distortion of all 16 16x16 (best) blocks
15158 : // in the LCU
15159 0 : picture_control_set_ptr->rc_me_distortion[sb_index] = 0;
15160 0 : for (i = 0; i < 16; i++)
15161 0 : picture_control_set_ptr->rc_me_distortion[sb_index] +=
15162 0 : picture_control_set_ptr->me_results[sb_index]
15163 0 : ->me_candidate[5 + i][0]
15164 0 : .distortion;
15165 : }
15166 :
15167 : }
15168 :
15169 0 : return return_error;
15170 : }
15171 :
15172 : /*******************************************
15173 : * SixteenthDecimatedSearch
15174 : * performs a 1/16 decimated search
15175 : *******************************************/
15176 0 : uint64_t SixteenthDecimatedSearch(MeContext *context_ptr, int16_t origin_x,
15177 : int16_t origin_y, uint32_t sb_width,
15178 : uint32_t sb_height,
15179 : EbPictureBufferDesc *sixteenthRefPicPtr,
15180 : int16_t search_area_width,
15181 : int16_t search_area_height)
15182 : {
15183 : int16_t xTopLeftSearchRegion;
15184 : int16_t yTopLeftSearchRegion;
15185 : uint32_t searchRegionIndex;
15186 : int16_t x_search_area_origin;
15187 : int16_t y_search_area_origin;
15188 :
15189 0 : int16_t padWidth = (int16_t)(sixteenthRefPicPtr->origin_x) - 1;
15190 0 : int16_t padHeight = (int16_t)(sixteenthRefPicPtr->origin_y) - 1;
15191 :
15192 : uint64_t best_sad;
15193 : int16_t x_search_center;
15194 : int16_t y_search_center;
15195 :
15196 0 : x_search_area_origin = -(search_area_width >> 1);
15197 0 : y_search_area_origin = -(search_area_height >> 1);
15198 :
15199 : // Correct the left edge of the Search Area if it is not on the reference
15200 : // Picture
15201 0 : x_search_area_origin = ((origin_x + x_search_area_origin) < -padWidth)
15202 0 : ? -padWidth - origin_x
15203 : : x_search_area_origin;
15204 :
15205 0 : search_area_width =
15206 0 : ((origin_x + x_search_area_origin) < -padWidth)
15207 0 : ? search_area_width -
15208 0 : (-padWidth - (origin_x + x_search_area_origin))
15209 : : search_area_width;
15210 :
15211 : // Correct the right edge of the Search Area if its not on the reference
15212 : // Picture
15213 0 : x_search_area_origin =
15214 0 : ((origin_x + x_search_area_origin) >
15215 0 : (int16_t)sixteenthRefPicPtr->width - 1)
15216 0 : ? x_search_area_origin - ((origin_x + x_search_area_origin) -
15217 0 : ((int16_t)sixteenthRefPicPtr->width - 1))
15218 : : x_search_area_origin;
15219 :
15220 0 : search_area_width =
15221 0 : ((origin_x + x_search_area_origin + search_area_width) >
15222 0 : (int16_t)sixteenthRefPicPtr->width)
15223 0 : ? MAX(1,
15224 : search_area_width -
15225 : ((origin_x + x_search_area_origin + search_area_width) -
15226 : (int16_t)sixteenthRefPicPtr->width))
15227 : : search_area_width;
15228 :
15229 : // Correct the top edge of the Search Area if it is not on the reference
15230 : // Picture
15231 0 : y_search_area_origin = ((origin_y + y_search_area_origin) < -padHeight)
15232 0 : ? -padHeight - origin_y
15233 : : y_search_area_origin;
15234 :
15235 0 : search_area_height =
15236 0 : ((origin_y + y_search_area_origin) < -padHeight)
15237 0 : ? search_area_height -
15238 0 : (-padHeight - (origin_y + y_search_area_origin))
15239 : : search_area_height;
15240 :
15241 : // Correct the bottom edge of the Search Area if its not on the reference
15242 : // Picture
15243 0 : y_search_area_origin =
15244 0 : ((origin_y + y_search_area_origin) >
15245 0 : (int16_t)sixteenthRefPicPtr->height - 1)
15246 0 : ? y_search_area_origin - ((origin_y + y_search_area_origin) -
15247 0 : ((int16_t)sixteenthRefPicPtr->height - 1))
15248 : : y_search_area_origin;
15249 :
15250 0 : search_area_height =
15251 0 : (origin_y + y_search_area_origin + search_area_height >
15252 0 : (int16_t)sixteenthRefPicPtr->height)
15253 0 : ? MAX(1,
15254 : search_area_height -
15255 : ((origin_y + y_search_area_origin + search_area_height) -
15256 : (int16_t)sixteenthRefPicPtr->height))
15257 : : search_area_height;
15258 :
15259 0 : xTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_x + origin_x) +
15260 : x_search_area_origin;
15261 0 : yTopLeftSearchRegion = ((int16_t)sixteenthRefPicPtr->origin_y + origin_y) +
15262 : y_search_area_origin;
15263 0 : searchRegionIndex = xTopLeftSearchRegion +
15264 0 : yTopLeftSearchRegion * sixteenthRefPicPtr->stride_y;
15265 :
15266 0 : if ((search_area_width & 15) == 0) {
15267 : // Only width equals 16 (LCU equals 64) is updated
15268 : // other width sizes work with the old code as the one
15269 : // in"sad_loop_kernel_sse4_1_intrin"
15270 0 : sad_loop_kernel_hme_l0(
15271 : &context_ptr->sixteenth_sb_buffer[0],
15272 0 : context_ptr->sixteenth_sb_buffer_stride * 2,
15273 0 : &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
15274 0 : sixteenthRefPicPtr->stride_y * 2,
15275 : sb_height >> 1,
15276 : sb_width,
15277 : /* results */
15278 : &best_sad,
15279 : &x_search_center,
15280 : &y_search_center,
15281 : /* range */
15282 0 : sixteenthRefPicPtr->stride_y,
15283 : search_area_width,
15284 : search_area_height);
15285 : } else {
15286 : // Put the first search location into level0 results
15287 0 : sad_loop_kernel(
15288 : &context_ptr->sixteenth_sb_buffer[0],
15289 0 : context_ptr->sixteenth_sb_buffer_stride * 2,
15290 0 : &sixteenthRefPicPtr->buffer_y[searchRegionIndex],
15291 0 : sixteenthRefPicPtr->stride_y * 2,
15292 : sb_height >> 1,
15293 : sb_width,
15294 : /* results */
15295 : &best_sad,
15296 : &x_search_center,
15297 : &y_search_center,
15298 : /* range */
15299 0 : sixteenthRefPicPtr->stride_y,
15300 : search_area_width,
15301 : search_area_height);
15302 : }
15303 :
15304 0 : return (best_sad);
15305 : }
15306 :
15307 : /*******************************************
15308 : * IsComplexLcu
15309 : * returns true is the SB has a high spatial & temporal complexity
15310 : *******************************************/
15311 0 : EbBool IsComplexLcu(PictureParentControlSet *previousParentPcs,
15312 : PictureParentControlSet *currentParentPcs,
15313 : PictureParentControlSet *plusOneParentPcs,
15314 : uint32_t pictureWidthInLcus, uint32_t lcuAdrr,
15315 : uint32_t sb_origin_x, uint32_t sb_origin_y,
15316 : uint32_t sb_width, uint32_t sb_height,
15317 : uint32_t lcuCollocatedSad) {
15318 0 : uint32_t availableLcusCount = 0;
15319 0 : uint32_t highVarianceLcusCount = 0;
15320 :
15321 : // Check the variance of the current LCU
15322 0 : if ((currentParentPcs->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >
15323 : IS_COMPLEX_LCU_VARIANCE_TH) {
15324 0 : availableLcusCount++;
15325 0 : highVarianceLcusCount++;
15326 : }
15327 :
15328 : // Check the variance of left SB if available
15329 0 : if (sb_origin_x != 0) {
15330 0 : availableLcusCount++;
15331 0 : if ((currentParentPcs->variance[lcuAdrr - 1][ME_TIER_ZERO_PU_64x64]) >
15332 : IS_COMPLEX_LCU_VARIANCE_TH)
15333 0 : highVarianceLcusCount++;
15334 : }
15335 :
15336 : // Check the variance of right SB if available
15337 0 : if ((sb_origin_x + BLOCK_SIZE_64) <
15338 0 : currentParentPcs->enhanced_picture_ptr->width) {
15339 0 : availableLcusCount++;
15340 0 : if ((currentParentPcs->variance[lcuAdrr + 1][ME_TIER_ZERO_PU_64x64]) >
15341 : IS_COMPLEX_LCU_VARIANCE_TH)
15342 0 : highVarianceLcusCount++;
15343 : }
15344 :
15345 : // Check the variance of top SB if available
15346 0 : if (sb_origin_y != 0) {
15347 0 : availableLcusCount++;
15348 0 : if ((currentParentPcs->variance[lcuAdrr - pictureWidthInLcus]
15349 0 : [ME_TIER_ZERO_PU_64x64]) >
15350 : IS_COMPLEX_LCU_VARIANCE_TH)
15351 0 : highVarianceLcusCount++;
15352 : }
15353 :
15354 : // Check the variance of bottom LCU
15355 0 : if ((sb_origin_y + BLOCK_SIZE_64) <
15356 0 : currentParentPcs->enhanced_picture_ptr->height) {
15357 0 : availableLcusCount++;
15358 0 : if ((currentParentPcs->variance[lcuAdrr + pictureWidthInLcus]
15359 0 : [ME_TIER_ZERO_PU_64x64]) >
15360 : IS_COMPLEX_LCU_VARIANCE_TH)
15361 0 : highVarianceLcusCount++;
15362 : }
15363 :
15364 : // Check the variance of top-left LCU
15365 0 : if ((sb_origin_x >= BLOCK_SIZE_64) && (sb_origin_y >= BLOCK_SIZE_64)) {
15366 0 : availableLcusCount++;
15367 0 : if ((currentParentPcs->variance[lcuAdrr - pictureWidthInLcus - 1]
15368 0 : [ME_TIER_ZERO_PU_64x64]) >
15369 : IS_COMPLEX_LCU_VARIANCE_TH)
15370 0 : highVarianceLcusCount++;
15371 : }
15372 :
15373 : // Check the variance of top-right LCU
15374 0 : if ((sb_origin_x <
15375 0 : currentParentPcs->enhanced_picture_ptr->width - BLOCK_SIZE_64) &&
15376 : (sb_origin_y >= BLOCK_SIZE_64)) {
15377 0 : availableLcusCount++;
15378 0 : if ((currentParentPcs->variance[lcuAdrr - pictureWidthInLcus + 1]
15379 0 : [ME_TIER_ZERO_PU_64x64]) >
15380 : IS_COMPLEX_LCU_VARIANCE_TH)
15381 0 : highVarianceLcusCount++;
15382 : }
15383 :
15384 : // Check the variance of bottom-left LCU
15385 0 : if ((sb_origin_x >= BLOCK_SIZE_64) &&
15386 : (sb_origin_y <
15387 0 : currentParentPcs->enhanced_picture_ptr->height - BLOCK_SIZE_64)) {
15388 0 : availableLcusCount++;
15389 0 : if ((currentParentPcs->variance[lcuAdrr + pictureWidthInLcus - 1]
15390 0 : [ME_TIER_ZERO_PU_64x64]) >
15391 : IS_COMPLEX_LCU_VARIANCE_TH)
15392 0 : highVarianceLcusCount++;
15393 : }
15394 :
15395 : // Check the variance of bottom-right LCU
15396 0 : if ((sb_origin_x <
15397 0 : currentParentPcs->enhanced_picture_ptr->width - BLOCK_SIZE_64) &&
15398 : (sb_origin_y <
15399 0 : currentParentPcs->enhanced_picture_ptr->height - BLOCK_SIZE_64)) {
15400 0 : availableLcusCount++;
15401 0 : if ((currentParentPcs->variance[lcuAdrr + pictureWidthInLcus + 1]
15402 0 : [ME_TIER_ZERO_PU_64x64]) >
15403 : IS_COMPLEX_LCU_VARIANCE_TH)
15404 0 : highVarianceLcusCount++;
15405 : }
15406 :
15407 0 : EbBool varianceFluctuateFlag = EB_FALSE;
15408 :
15409 0 : if ((previousParentPcs->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >
15410 0 : IS_COMPLEX_LCU_FLAT_VARIANCE_TH &&
15411 0 : (currentParentPcs->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >
15412 0 : IS_COMPLEX_LCU_FLAT_VARIANCE_TH &&
15413 0 : (plusOneParentPcs->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >
15414 : IS_COMPLEX_LCU_FLAT_VARIANCE_TH) {
15415 0 : varianceFluctuateFlag = (EbBool)(
15416 0 : (((ABS((int32_t)currentParentPcs
15417 : ->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64] -
15418 : (int32_t)previousParentPcs
15419 0 : ->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) *
15420 0 : 100) /
15421 0 : (int32_t)previousParentPcs
15422 0 : ->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >=
15423 0 : IS_COMPLEX_LCU_VARIANCE_DEVIATION_TH) &&
15424 0 : (((ABS((int32_t)currentParentPcs
15425 : ->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64] -
15426 : (int32_t)plusOneParentPcs
15427 0 : ->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) *
15428 0 : 100) /
15429 0 : (int32_t)
15430 0 : plusOneParentPcs->variance[lcuAdrr][ME_TIER_ZERO_PU_64x64]) >=
15431 : IS_COMPLEX_LCU_VARIANCE_DEVIATION_TH));
15432 : }
15433 :
15434 0 : if (lcuCollocatedSad >=
15435 0 : ((sb_width * sb_height) * IS_COMPLEX_LCU_ZZ_SAD_FACTOR_TH) &&
15436 0 : highVarianceLcusCount >= (availableLcusCount >> 1) &&
15437 : varianceFluctuateFlag) {
15438 0 : return EB_TRUE;
15439 : }
15440 :
15441 0 : return EB_FALSE;
15442 : }
15443 :
15444 0 : EbErrorType open_loop_intra_search_sb(
15445 : PictureParentControlSet *picture_control_set_ptr, uint32_t sb_index,
15446 : MotionEstimationContext_t *context_ptr, EbPictureBufferDesc *input_ptr)
15447 : {
15448 0 : EbErrorType return_error = EB_ErrorNone;
15449 0 : SequenceControlSet *sequence_control_set_ptr =
15450 : (SequenceControlSet *)picture_control_set_ptr
15451 0 : ->sequence_control_set_wrapper_ptr->object_ptr;
15452 :
15453 : uint32_t cu_origin_x;
15454 : uint32_t cu_origin_y;
15455 0 : uint32_t pa_blk_index = 0;
15456 : #if !PAETH_HBD
15457 : uint8_t is_16_bit =
15458 : (sequence_control_set_ptr->static_config.encoder_bit_depth > EB_8BIT);
15459 : #endif
15460 0 : SbParams *sb_params = &sequence_control_set_ptr->sb_params_array[sb_index];
15461 0 : OisSbResults *ois_sb_results_ptr =
15462 0 : picture_control_set_ptr->ois_sb_results[sb_index];
15463 : uint8_t *above_row;
15464 : uint8_t *left_col;
15465 :
15466 : DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
15467 : DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
15468 0 : while (pa_blk_index < CU_MAX_COUNT) {
15469 : const CodedUnitStats *blk_stats_ptr;
15470 0 : blk_stats_ptr = get_coded_unit_stats(pa_blk_index);
15471 0 : uint8_t bsize = blk_stats_ptr->size;
15472 0 : TxSize tx_size =
15473 : bsize == 8
15474 : ? TX_8X8
15475 0 : : bsize == 16 ? TX_16X16 : bsize == 32 ? TX_32X32 : TX_64X64;
15476 0 : if (sb_params->raster_scan_cu_validity
15477 0 : [md_scan_to_raster_scan[pa_blk_index]]) {
15478 0 : OisCandidate *ois_blk_ptr =
15479 : ois_sb_results_ptr->ois_candidate_array[pa_blk_index];
15480 0 : cu_origin_x = sb_params->origin_x + blk_stats_ptr->origin_x;
15481 0 : cu_origin_y = sb_params->origin_y + blk_stats_ptr->origin_y;
15482 0 : above_row = above_data + 16;
15483 0 : left_col = left_data + 16;
15484 :
15485 : // Fill Neighbor Arrays
15486 0 : update_neighbor_samples_array_open_loop(above_row - 1,
15487 : left_col - 1,
15488 : input_ptr,
15489 0 : input_ptr->stride_y,
15490 : cu_origin_x,
15491 : cu_origin_y,
15492 : bsize,
15493 : bsize);
15494 : uint8_t ois_intra_mode;
15495 0 : uint8_t ois_intra_count = 0;
15496 0 : uint8_t best_intra_ois_index = 0;
15497 0 : uint32_t best_intra_ois_distortion = 64 * 64 * 255;
15498 0 : uint8_t intra_mode_start = DC_PRED;
15499 : #if PAETH_HBD
15500 0 : uint8_t intra_mode_end = PAETH_PRED;
15501 : #else
15502 : uint8_t intra_mode_end = is_16_bit ? SMOOTH_H_PRED : PAETH_PRED;
15503 : #endif
15504 0 : uint8_t angle_delta_counter = 0;
15505 0 : uint8_t angle_delta_shift = 1;
15506 0 : EbBool use_angle_delta = (bsize >= 8);
15507 0 : uint8_t angle_delta_candidate_count = use_angle_delta ? 7 : 1;
15508 0 : uint8_t disable_angular_prediction = 0;
15509 0 : if (picture_control_set_ptr->intra_pred_mode == 5) {
15510 0 : intra_mode_end =
15511 0 : (picture_control_set_ptr->is_used_as_reference_flag == 0)
15512 : ? DC_PRED
15513 : : intra_mode_end;
15514 0 : disable_angular_prediction =
15515 0 : picture_control_set_ptr->temporal_layer_index > 0
15516 : ? 1
15517 0 : : (bsize > 16) ? 1 : 0;
15518 0 : angle_delta_candidate_count =
15519 : disable_angular_prediction ? 1 : use_angle_delta ? 5 : 1;
15520 0 : angle_delta_shift = 1;
15521 : }
15522 0 : else if (picture_control_set_ptr->intra_pred_mode == 6) {
15523 0 : intra_mode_end =
15524 0 : (picture_control_set_ptr->is_used_as_reference_flag == 0)
15525 : ? DC_PRED
15526 : : intra_mode_end;
15527 0 : disable_angular_prediction =
15528 0 : picture_control_set_ptr->temporal_layer_index > 0
15529 : ? 1
15530 0 : : (bsize > 16) ? 1 : 0;
15531 0 : angle_delta_candidate_count = 1;
15532 0 : angle_delta_shift = 1;
15533 : } else {
15534 0 : if (picture_control_set_ptr->slice_type == I_SLICE) {
15535 : #if PAETH_HBD
15536 0 : intra_mode_end = /*is_16_bit ? SMOOTH_H_PRED :*/ PAETH_PRED;
15537 : #else
15538 : intra_mode_end = is_16_bit ? SMOOTH_H_PRED : PAETH_PRED;
15539 : #endif
15540 0 : angle_delta_candidate_count = use_angle_delta ? 5 : 1;
15541 0 : disable_angular_prediction = 0;
15542 0 : angle_delta_shift = 1;
15543 0 : } else if (picture_control_set_ptr->temporal_layer_index == 0) {
15544 : #if PAETH_HBD
15545 0 : intra_mode_end = /*is_16_bit ? SMOOTH_H_PRED :*/ PAETH_PRED;
15546 : #else
15547 : intra_mode_end = is_16_bit ? SMOOTH_H_PRED : PAETH_PRED;
15548 : #endif
15549 0 : angle_delta_candidate_count =
15550 : (bsize > 16) ? 1 : use_angle_delta ? 2 : 1;
15551 0 : disable_angular_prediction = 0;
15552 0 : angle_delta_shift = 3;
15553 : } else {
15554 0 : intra_mode_end = DC_PRED;
15555 0 : disable_angular_prediction = 1;
15556 0 : angle_delta_candidate_count = 1;
15557 0 : angle_delta_shift = 1;
15558 : }
15559 : }
15560 0 : for (ois_intra_mode = intra_mode_start;
15561 : ois_intra_mode <= intra_mode_end;
15562 0 : ++ois_intra_mode) {
15563 0 : if (av1_is_directional_mode((PredictionMode)ois_intra_mode)) {
15564 0 : if (!disable_angular_prediction) {
15565 0 : for (angle_delta_counter = 0;
15566 : angle_delta_counter < angle_delta_candidate_count;
15567 0 : ++angle_delta_counter) {
15568 0 : int32_t angle_delta =
15569 0 : angle_delta_shift *
15570 : (angle_delta_candidate_count == 1
15571 : ? 0
15572 0 : : angle_delta_counter -
15573 0 : (angle_delta_candidate_count >> 1));
15574 0 : int32_t p_angle =
15575 0 : mode_to_angle_map[(
15576 0 : PredictionMode)ois_intra_mode] +
15577 0 : angle_delta * ANGLE_STEP;
15578 : // PRED
15579 0 : intra_prediction_open_loop(p_angle,
15580 : ois_intra_mode,
15581 : cu_origin_x,
15582 : cu_origin_y,
15583 : tx_size,
15584 : above_row,
15585 : left_col,
15586 : context_ptr);
15587 : // Distortion
15588 0 : ois_blk_ptr[ois_intra_count].distortion =
15589 0 : (uint32_t)nxm_sad_kernel( // Always SAD without weighting
15590 0 : &(input_ptr->buffer_y
15591 0 : [(input_ptr->origin_y +
15592 0 : cu_origin_y) *
15593 0 : input_ptr->stride_y +
15594 0 : (input_ptr->origin_x +
15595 : cu_origin_x)]),
15596 0 : input_ptr->stride_y,
15597 0 : &(context_ptr->me_context_ptr
15598 : ->sb_buffer[0]),
15599 : BLOCK_SIZE_64,
15600 : bsize,
15601 : bsize);
15602 : // kepp track of best SAD
15603 0 : if (ois_blk_ptr[ois_intra_count].distortion <
15604 : best_intra_ois_distortion) {
15605 0 : best_intra_ois_index = ois_intra_count;
15606 0 : best_intra_ois_distortion =
15607 0 : ois_blk_ptr[ois_intra_count].distortion;
15608 : }
15609 0 : ois_blk_ptr[ois_intra_count].intra_mode =
15610 : ois_intra_mode;
15611 0 : ois_blk_ptr[ois_intra_count].valid_distortion =
15612 : EB_TRUE;
15613 0 : ois_blk_ptr[ois_intra_count++].angle_delta =
15614 : angle_delta;
15615 : }
15616 : }
15617 : } else {
15618 : // PRED
15619 0 : intra_prediction_open_loop(0,
15620 : ois_intra_mode,
15621 : cu_origin_x,
15622 : cu_origin_y,
15623 : tx_size,
15624 : above_row,
15625 : left_col,
15626 : context_ptr);
15627 : // Distortion
15628 0 : ois_blk_ptr[ois_intra_count]
15629 0 : .distortion = (uint32_t)nxm_sad_kernel( // Always SAD without weighting
15630 0 : &(input_ptr->buffer_y
15631 0 : [(input_ptr->origin_y + cu_origin_y) *
15632 0 : input_ptr->stride_y +
15633 0 : (input_ptr->origin_x + cu_origin_x)]),
15634 0 : input_ptr->stride_y,
15635 0 : &(context_ptr->me_context_ptr->sb_buffer[0]),
15636 : BLOCK_SIZE_64,
15637 : bsize,
15638 : bsize);
15639 : // kepp track of best SAD
15640 0 : if (ois_blk_ptr[ois_intra_count].distortion <
15641 : best_intra_ois_distortion) {
15642 0 : best_intra_ois_index = ois_intra_count;
15643 0 : best_intra_ois_distortion =
15644 0 : ois_blk_ptr[ois_intra_count].distortion;
15645 : }
15646 0 : ois_blk_ptr[ois_intra_count].intra_mode = ois_intra_mode;
15647 0 : ois_blk_ptr[ois_intra_count].valid_distortion = EB_TRUE;
15648 0 : ois_blk_ptr[ois_intra_count++].angle_delta = 0;
15649 : }
15650 : }
15651 0 : ois_sb_results_ptr->best_distortion_index[pa_blk_index] =
15652 : best_intra_ois_index;
15653 0 : ois_sb_results_ptr->total_ois_intra_candidate[pa_blk_index] =
15654 : ois_intra_count;
15655 : }
15656 0 : pa_blk_index++;
15657 : }
15658 0 : return return_error;
15659 : }
|