2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 #include "./aom_dsp_rtcd.h"
16 #include "./av1_rtcd.h"
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_dsp/blend.h"
20 #include "aom_mem/aom_mem.h"
21 #include "aom_ports/mem.h"
22 #include "aom_ports/system_state.h"
24 #include "av1/common/common.h"
25 #include "av1/common/common_data.h"
26 #include "av1/common/entropy.h"
27 #include "av1/common/entropymode.h"
28 #include "av1/common/idct.h"
29 #include "av1/common/mvref_common.h"
30 #include "av1/common/pred_common.h"
31 #include "av1/common/quant_common.h"
32 #include "av1/common/reconinter.h"
33 #include "av1/common/reconintra.h"
34 #include "av1/common/scan.h"
35 #include "av1/common/seg_common.h"
37 #include "av1/common/txb_common.h"
39 #if CONFIG_WARPED_MOTION
40 #include "av1/common/warped_motion.h"
41 #endif // CONFIG_WARPED_MOTION
43 #include "av1/encoder/aq_variance.h"
44 #include "av1/encoder/av1_quantize.h"
45 #include "av1/encoder/cost.h"
46 #include "av1/encoder/encodemb.h"
47 #include "av1/encoder/encodemv.h"
48 #include "av1/encoder/encoder.h"
50 #include "av1/encoder/encodetxb.h"
52 #include "av1/encoder/hybrid_fwd_txfm.h"
53 #include "av1/encoder/mcomp.h"
55 #include "av1/encoder/palette.h"
56 #endif // CONFIG_PALETTE
57 #include "av1/encoder/ratectrl.h"
58 #include "av1/encoder/rd.h"
59 #include "av1/encoder/rdopt.h"
60 #include "av1/encoder/tokenize.h"
62 #include "av1/encoder/pvq_encoder.h"
64 #if CONFIG_PVQ || CONFIG_DAALA_DIST
65 #include "av1/common/pvq.h"
66 #endif // CONFIG_PVQ || CONFIG_DAALA_DIST
67 #if CONFIG_DUAL_FILTER
68 #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
69 static const int filter_sets
[DUAL_FILTER_SET_SIZE
][2] = {
70 { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
71 { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
72 { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
74 #endif // CONFIG_DUAL_FILTER
78 #define LAST_FRAME_MODE_MASK \
79 ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
80 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
81 #define LAST2_FRAME_MODE_MASK \
82 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
83 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
84 #define LAST3_FRAME_MODE_MASK \
85 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
86 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
87 #define GOLDEN_FRAME_MODE_MASK \
88 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
89 (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
90 #define BWDREF_FRAME_MODE_MASK \
91 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
92 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
93 #define ALTREF_FRAME_MODE_MASK \
94 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
95 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))
99 #define LAST_FRAME_MODE_MASK \
100 ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
101 #define GOLDEN_FRAME_MODE_MASK \
102 ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
103 #define ALTREF_FRAME_MODE_MASK \
104 ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
106 #endif // CONFIG_EXT_REFS
109 #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
111 #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
112 #endif // CONFIG_EXT_REFS
114 #define MIN_EARLY_TERM_INDEX 3
115 #define NEW_MV_DISCOUNT_FACTOR 8
118 #define ANGLE_SKIP_THRESH 10
119 #define FILTER_FAST_SEARCH 1
120 #endif // CONFIG_EXT_INTRA
122 const double ADST_FLIP_SVM
[8] = { -6.6623, -2.8062, -3.2531, 3.1671, // vert
123 -7.7051, -3.2234, -3.6193, 3.4533 }; // horz
126 PREDICTION_MODE mode
;
127 MV_REFERENCE_FRAME ref_frame
[2];
130 typedef struct { MV_REFERENCE_FRAME ref_frame
[2]; } REF_DEFINITION
;
132 struct rdcost_block_args
{
135 ENTROPY_CONTEXT t_above
[2 * MAX_MIB_SIZE
];
136 ENTROPY_CONTEXT t_left
[2 * MAX_MIB_SIZE
];
141 int use_fast_coef_costing
;
144 #define LAST_NEW_MV_INDEX 6
145 static const MODE_DEFINITION av1_mode_order
[MAX_MODES
] = {
146 { NEARESTMV
, { LAST_FRAME
, NONE_FRAME
} },
148 { NEARESTMV
, { LAST2_FRAME
, NONE_FRAME
} },
149 { NEARESTMV
, { LAST3_FRAME
, NONE_FRAME
} },
150 { NEARESTMV
, { BWDREF_FRAME
, NONE_FRAME
} },
151 #endif // CONFIG_EXT_REFS
152 { NEARESTMV
, { ALTREF_FRAME
, NONE_FRAME
} },
153 { NEARESTMV
, { GOLDEN_FRAME
, NONE_FRAME
} },
155 { DC_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
157 { NEWMV
, { LAST_FRAME
, NONE_FRAME
} },
159 { NEWMV
, { LAST2_FRAME
, NONE_FRAME
} },
160 { NEWMV
, { LAST3_FRAME
, NONE_FRAME
} },
161 { NEWMV
, { BWDREF_FRAME
, NONE_FRAME
} },
162 #endif // CONFIG_EXT_REFS
163 { NEWMV
, { ALTREF_FRAME
, NONE_FRAME
} },
164 { NEWMV
, { GOLDEN_FRAME
, NONE_FRAME
} },
166 { NEARMV
, { LAST_FRAME
, NONE_FRAME
} },
168 { NEARMV
, { LAST2_FRAME
, NONE_FRAME
} },
169 { NEARMV
, { LAST3_FRAME
, NONE_FRAME
} },
170 { NEARMV
, { BWDREF_FRAME
, NONE_FRAME
} },
171 #endif // CONFIG_EXT_REFS
172 { NEARMV
, { ALTREF_FRAME
, NONE_FRAME
} },
173 { NEARMV
, { GOLDEN_FRAME
, NONE_FRAME
} },
175 { ZEROMV
, { LAST_FRAME
, NONE_FRAME
} },
177 { ZEROMV
, { LAST2_FRAME
, NONE_FRAME
} },
178 { ZEROMV
, { LAST3_FRAME
, NONE_FRAME
} },
179 { ZEROMV
, { BWDREF_FRAME
, NONE_FRAME
} },
180 #endif // CONFIG_EXT_REFS
181 { ZEROMV
, { GOLDEN_FRAME
, NONE_FRAME
} },
182 { ZEROMV
, { ALTREF_FRAME
, NONE_FRAME
} },
184 // TODO(zoeliu): May need to reconsider the order on the modes to check
187 { NEAREST_NEARESTMV
, { LAST_FRAME
, ALTREF_FRAME
} },
189 { NEAREST_NEARESTMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
190 { NEAREST_NEARESTMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
191 #endif // CONFIG_EXT_REFS
192 { NEAREST_NEARESTMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
194 { NEAREST_NEARESTMV
, { LAST_FRAME
, BWDREF_FRAME
} },
195 { NEAREST_NEARESTMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
196 { NEAREST_NEARESTMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
197 { NEAREST_NEARESTMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
198 #endif // CONFIG_EXT_REFS
200 #else // CONFIG_EXT_INTER
202 { NEARESTMV
, { LAST_FRAME
, ALTREF_FRAME
} },
204 { NEARESTMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
205 { NEARESTMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
206 #endif // CONFIG_EXT_REFS
207 { NEARESTMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
209 { NEARESTMV
, { LAST_FRAME
, BWDREF_FRAME
} },
210 { NEARESTMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
211 { NEARESTMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
212 { NEARESTMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
213 #endif // CONFIG_EXT_REFS
214 #endif // CONFIG_EXT_INTER
216 { TM_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
219 { SMOOTH_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
221 { SMOOTH_V_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
222 { SMOOTH_H_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
223 #endif // CONFIG_SMOOTH_HV
224 #endif // CONFIG_ALT_INTRA
227 { NEAR_NEARMV
, { LAST_FRAME
, ALTREF_FRAME
} },
228 { NEW_NEARESTMV
, { LAST_FRAME
, ALTREF_FRAME
} },
229 { NEAREST_NEWMV
, { LAST_FRAME
, ALTREF_FRAME
} },
230 { NEW_NEARMV
, { LAST_FRAME
, ALTREF_FRAME
} },
231 { NEAR_NEWMV
, { LAST_FRAME
, ALTREF_FRAME
} },
232 { NEW_NEWMV
, { LAST_FRAME
, ALTREF_FRAME
} },
233 { ZERO_ZEROMV
, { LAST_FRAME
, ALTREF_FRAME
} },
236 { NEAR_NEARMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
237 { NEW_NEARESTMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
238 { NEAREST_NEWMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
239 { NEW_NEARMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
240 { NEAR_NEWMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
241 { NEW_NEWMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
242 { ZERO_ZEROMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
244 { NEAR_NEARMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
245 { NEW_NEARESTMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
246 { NEAREST_NEWMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
247 { NEW_NEARMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
248 { NEAR_NEWMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
249 { NEW_NEWMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
250 { ZERO_ZEROMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
251 #endif // CONFIG_EXT_REFS
253 { NEAR_NEARMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
254 { NEW_NEARESTMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
255 { NEAREST_NEWMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
256 { NEW_NEARMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
257 { NEAR_NEWMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
258 { NEW_NEWMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
259 { ZERO_ZEROMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
262 { NEAR_NEARMV
, { LAST_FRAME
, BWDREF_FRAME
} },
263 { NEW_NEARESTMV
, { LAST_FRAME
, BWDREF_FRAME
} },
264 { NEAREST_NEWMV
, { LAST_FRAME
, BWDREF_FRAME
} },
265 { NEW_NEARMV
, { LAST_FRAME
, BWDREF_FRAME
} },
266 { NEAR_NEWMV
, { LAST_FRAME
, BWDREF_FRAME
} },
267 { NEW_NEWMV
, { LAST_FRAME
, BWDREF_FRAME
} },
268 { ZERO_ZEROMV
, { LAST_FRAME
, BWDREF_FRAME
} },
270 { NEAR_NEARMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
271 { NEW_NEARESTMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
272 { NEAREST_NEWMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
273 { NEW_NEARMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
274 { NEAR_NEWMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
275 { NEW_NEWMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
276 { ZERO_ZEROMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
278 { NEAR_NEARMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
279 { NEW_NEARESTMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
280 { NEAREST_NEWMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
281 { NEW_NEARMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
282 { NEAR_NEWMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
283 { NEW_NEWMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
284 { ZERO_ZEROMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
286 { NEAR_NEARMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
287 { NEW_NEARESTMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
288 { NEAREST_NEWMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
289 { NEW_NEARMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
290 { NEAR_NEWMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
291 { NEW_NEWMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
292 { ZERO_ZEROMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
293 #endif // CONFIG_EXT_REFS
295 #else // CONFIG_EXT_INTER
297 { NEARMV
, { LAST_FRAME
, ALTREF_FRAME
} },
298 { NEWMV
, { LAST_FRAME
, ALTREF_FRAME
} },
300 { NEARMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
301 { NEWMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
302 { NEARMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
303 { NEWMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
304 #endif // CONFIG_EXT_REFS
305 { NEARMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
306 { NEWMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
309 { NEARMV
, { LAST_FRAME
, BWDREF_FRAME
} },
310 { NEWMV
, { LAST_FRAME
, BWDREF_FRAME
} },
311 { NEARMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
312 { NEWMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
313 { NEARMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
314 { NEWMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
315 { NEARMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
316 { NEWMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
317 #endif // CONFIG_EXT_REFS
319 { ZEROMV
, { LAST_FRAME
, ALTREF_FRAME
} },
321 { ZEROMV
, { LAST2_FRAME
, ALTREF_FRAME
} },
322 { ZEROMV
, { LAST3_FRAME
, ALTREF_FRAME
} },
323 #endif // CONFIG_EXT_REFS
324 { ZEROMV
, { GOLDEN_FRAME
, ALTREF_FRAME
} },
327 { ZEROMV
, { LAST_FRAME
, BWDREF_FRAME
} },
328 { ZEROMV
, { LAST2_FRAME
, BWDREF_FRAME
} },
329 { ZEROMV
, { LAST3_FRAME
, BWDREF_FRAME
} },
330 { ZEROMV
, { GOLDEN_FRAME
, BWDREF_FRAME
} },
331 #endif // CONFIG_EXT_REFS
333 #endif // CONFIG_EXT_INTER
335 { H_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
336 { V_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
337 { D135_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
338 { D207_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
339 { D153_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
340 { D63_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
341 { D117_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
342 { D45_PRED
, { INTRA_FRAME
, NONE_FRAME
} },
345 { ZEROMV
, { LAST_FRAME
, INTRA_FRAME
} },
346 { NEARESTMV
, { LAST_FRAME
, INTRA_FRAME
} },
347 { NEARMV
, { LAST_FRAME
, INTRA_FRAME
} },
348 { NEWMV
, { LAST_FRAME
, INTRA_FRAME
} },
351 { ZEROMV
, { LAST2_FRAME
, INTRA_FRAME
} },
352 { NEARESTMV
, { LAST2_FRAME
, INTRA_FRAME
} },
353 { NEARMV
, { LAST2_FRAME
, INTRA_FRAME
} },
354 { NEWMV
, { LAST2_FRAME
, INTRA_FRAME
} },
356 { ZEROMV
, { LAST3_FRAME
, INTRA_FRAME
} },
357 { NEARESTMV
, { LAST3_FRAME
, INTRA_FRAME
} },
358 { NEARMV
, { LAST3_FRAME
, INTRA_FRAME
} },
359 { NEWMV
, { LAST3_FRAME
, INTRA_FRAME
} },
360 #endif // CONFIG_EXT_REFS
362 { ZEROMV
, { GOLDEN_FRAME
, INTRA_FRAME
} },
363 { NEARESTMV
, { GOLDEN_FRAME
, INTRA_FRAME
} },
364 { NEARMV
, { GOLDEN_FRAME
, INTRA_FRAME
} },
365 { NEWMV
, { GOLDEN_FRAME
, INTRA_FRAME
} },
368 { ZEROMV
, { BWDREF_FRAME
, INTRA_FRAME
} },
369 { NEARESTMV
, { BWDREF_FRAME
, INTRA_FRAME
} },
370 { NEARMV
, { BWDREF_FRAME
, INTRA_FRAME
} },
371 { NEWMV
, { BWDREF_FRAME
, INTRA_FRAME
} },
372 #endif // CONFIG_EXT_REFS
374 { ZEROMV
, { ALTREF_FRAME
, INTRA_FRAME
} },
375 { NEARESTMV
, { ALTREF_FRAME
, INTRA_FRAME
} },
376 { NEARMV
, { ALTREF_FRAME
, INTRA_FRAME
} },
377 { NEWMV
, { ALTREF_FRAME
, INTRA_FRAME
} },
378 #endif // CONFIG_EXT_INTER
381 #if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
382 static INLINE
int write_uniform_cost(int n
, int v
) {
383 const int l
= get_unsigned_bits(n
);
384 const int m
= (1 << l
) - n
;
385 if (l
== 0) return 0;
387 return (l
- 1) * av1_cost_bit(128, 0);
389 return l
* av1_cost_bit(128, 0);
391 #endif // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
393 // constants for prune 1 and prune 2 decision boundaries
394 #define FAST_EXT_TX_CORR_MID 0.0
395 #define FAST_EXT_TX_EDST_MID 0.1
396 #define FAST_EXT_TX_CORR_MARGIN 0.5
397 #define FAST_EXT_TX_EDST_MARGIN 0.3
399 #if CONFIG_DAALA_DIST
400 static int od_compute_var_4x4(od_coeff
*x
, int stride
) {
406 for (i
= 0; i
< 4; i
++) {
408 for (j
= 0; j
< 4; j
++) {
411 t
= x
[i
* stride
+ j
];
416 // TODO(yushin) : Check wheter any changes are required for high bit depth.
417 return (s2
- (sum
* sum
>> 4)) >> 4;
420 /* OD_DIST_LP_MID controls the frequency weighting filter used for computing
421 the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
422 is applied both horizontally and vertically. For X=5, the filter is
423 a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
424 #define OD_DIST_LP_MID (5)
425 #define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
427 static double od_compute_dist_8x8(int qm
, int use_activity_masking
, od_coeff
*x
,
428 od_coeff
*y
, od_coeff
*e_lp
, int stride
) {
440 OD_ASSERT(qm
!= OD_FLAT_QM
);
445 for (i
= 0; i
< 3; i
++) {
446 for (j
= 0; j
< 3; j
++) {
449 varx
= od_compute_var_4x4(x
+ 2 * i
* stride
+ 2 * j
, stride
);
450 vary
= od_compute_var_4x4(y
+ 2 * i
* stride
+ 2 * j
, stride
);
451 min_var
= OD_MINI(min_var
, varx
);
452 mean_var
+= 1. / (1 + varx
);
453 /* The cast to (double) is to avoid an overflow before the sqrt.*/
454 vardist
+= varx
- 2 * sqrt(varx
* (double)vary
) + vary
;
457 /* We use a different variance statistic depending on whether activity
458 masking is used, since the harmonic mean appeared slghtly worse with
459 masking off. The calibration constant just ensures that we preserve the
460 rate compared to activity=1. */
461 if (use_activity_masking
) {
463 var_stat
= 9. / mean_var
;
468 /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
469 activity masking constant. */
470 activity
= calibration
* pow(.25 + var_stat
, -1. / 6);
475 for (i
= 0; i
< 8; i
++) {
476 for (j
= 0; j
< 8; j
++)
477 sum
+= e_lp
[i
* stride
+ j
] * (double)e_lp
[i
* stride
+ j
];
479 /* Normalize the filter to unit DC response. */
480 sum
*= 1. / (OD_DIST_LP_NORM
* OD_DIST_LP_NORM
* OD_DIST_LP_NORM
*
482 return activity
* activity
* (sum
+ vardist
);
485 // Note : Inputs x and y are in a pixel domain
486 static double od_compute_dist(int qm
, int activity_masking
, od_coeff
*x
,
487 od_coeff
*y
, int bsize_w
, int bsize_h
,
493 assert(bsize_w
>= 8 && bsize_h
>= 8);
495 if (qm
== OD_FLAT_QM
) {
496 for (i
= 0; i
< bsize_w
* bsize_h
; i
++) {
503 DECLARE_ALIGNED(16, od_coeff
, e
[MAX_TX_SQUARE
]);
504 DECLARE_ALIGNED(16, od_coeff
, tmp
[MAX_TX_SQUARE
]);
505 DECLARE_ALIGNED(16, od_coeff
, e_lp
[MAX_TX_SQUARE
]);
506 int mid
= OD_DIST_LP_MID
;
507 for (i
= 0; i
< bsize_h
; i
++) {
508 for (j
= 0; j
< bsize_w
; j
++) {
509 e
[i
* bsize_w
+ j
] = x
[i
* bsize_w
+ j
] - y
[i
* bsize_w
+ j
];
512 for (i
= 0; i
< bsize_h
; i
++) {
513 tmp
[i
* bsize_w
] = mid
* e
[i
* bsize_w
] + 2 * e
[i
* bsize_w
+ 1];
514 tmp
[i
* bsize_w
+ bsize_w
- 1] =
515 mid
* e
[i
* bsize_w
+ bsize_w
- 1] + 2 * e
[i
* bsize_w
+ bsize_w
- 2];
516 for (j
= 1; j
< bsize_w
- 1; j
++) {
517 tmp
[i
* bsize_w
+ j
] = mid
* e
[i
* bsize_w
+ j
] +
518 e
[i
* bsize_w
+ j
- 1] + e
[i
* bsize_w
+ j
+ 1];
521 for (j
= 0; j
< bsize_w
; j
++) {
522 e_lp
[j
] = mid
* tmp
[j
] + 2 * tmp
[bsize_w
+ j
];
523 e_lp
[(bsize_h
- 1) * bsize_w
+ j
] =
524 mid
* tmp
[(bsize_h
- 1) * bsize_w
+ j
] +
525 2 * tmp
[(bsize_h
- 2) * bsize_w
+ j
];
527 for (i
= 1; i
< bsize_h
- 1; i
++) {
528 for (j
= 0; j
< bsize_w
; j
++) {
529 e_lp
[i
* bsize_w
+ j
] = mid
* tmp
[i
* bsize_w
+ j
] +
530 tmp
[(i
- 1) * bsize_w
+ j
] +
531 tmp
[(i
+ 1) * bsize_w
+ j
];
534 for (i
= 0; i
< bsize_h
; i
+= 8) {
535 for (j
= 0; j
< bsize_w
; j
+= 8) {
536 sum
+= od_compute_dist_8x8(qm
, activity_masking
, &x
[i
* bsize_w
+ j
],
537 &y
[i
* bsize_w
+ j
], &e_lp
[i
* bsize_w
+ j
],
541 /* Scale according to linear regression against SSE, for 8x8 blocks. */
542 if (activity_masking
) {
543 sum
*= 2.2 + (1.7 - 2.2) * (qindex
- 99) / (210 - 99) +
544 (qindex
< 99 ? 2.5 * (qindex
- 99) / 99 * (qindex
- 99) / 99 : 0);
547 ? 1.4 + (0.9 - 1.4) * (qindex
- 128) / (209 - 128)
549 ? 1.5 + (2.0 - 1.5) * (qindex
- 43) / (16 - 43)
550 : 1.5 + (1.4 - 1.5) * (qindex
- 43) / (128 - 43);
556 int64_t av1_daala_dist(const uint8_t *src
, int src_stride
, const uint8_t *dst
,
557 int dst_stride
, int bsw
, int bsh
, int qm
,
558 int use_activity_masking
, int qindex
) {
561 DECLARE_ALIGNED(16, od_coeff
, orig
[MAX_TX_SQUARE
]);
562 DECLARE_ALIGNED(16, od_coeff
, rec
[MAX_TX_SQUARE
]);
564 assert(qm
== OD_HVS_QM
);
566 for (j
= 0; j
< bsh
; j
++)
567 for (i
= 0; i
< bsw
; i
++) orig
[j
* bsw
+ i
] = src
[j
* src_stride
+ i
];
569 for (j
= 0; j
< bsh
; j
++)
570 for (i
= 0; i
< bsw
; i
++) rec
[j
* bsw
+ i
] = dst
[j
* dst_stride
+ i
];
572 d
= (int64_t)od_compute_dist(qm
, use_activity_masking
, orig
, rec
, bsw
, bsh
,
576 #endif // CONFIG_DAALA_DIST
578 static void get_energy_distribution_fine(const AV1_COMP
*cpi
, BLOCK_SIZE bsize
,
579 const uint8_t *src
, int src_stride
,
580 const uint8_t *dst
, int dst_stride
,
581 double *hordist
, double *verdist
) {
582 const int bw
= block_size_wide
[bsize
];
583 const int bh
= block_size_high
[bsize
];
584 unsigned int esq
[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
586 const int f_index
= bsize
- BLOCK_16X16
;
588 const int w_shift
= bw
== 8 ? 1 : 2;
589 const int h_shift
= bh
== 8 ? 1 : 2;
590 #if CONFIG_HIGHBITDEPTH
591 if (cpi
->common
.use_highbitdepth
) {
592 const uint16_t *src16
= CONVERT_TO_SHORTPTR(src
);
593 const uint16_t *dst16
= CONVERT_TO_SHORTPTR(dst
);
594 for (int i
= 0; i
< bh
; ++i
)
595 for (int j
= 0; j
< bw
; ++j
) {
596 const int index
= (j
>> w_shift
) + ((i
>> h_shift
) << 2);
598 (src16
[j
+ i
* src_stride
] - dst16
[j
+ i
* dst_stride
]) *
599 (src16
[j
+ i
* src_stride
] - dst16
[j
+ i
* dst_stride
]);
602 #endif // CONFIG_HIGHBITDEPTH
604 for (int i
= 0; i
< bh
; ++i
)
605 for (int j
= 0; j
< bw
; ++j
) {
606 const int index
= (j
>> w_shift
) + ((i
>> h_shift
) << 2);
607 esq
[index
] += (src
[j
+ i
* src_stride
] - dst
[j
+ i
* dst_stride
]) *
608 (src
[j
+ i
* src_stride
] - dst
[j
+ i
* dst_stride
]);
610 #if CONFIG_HIGHBITDEPTH
612 #endif // CONFIG_HIGHBITDEPTH
614 cpi
->fn_ptr
[f_index
].vf(src
, src_stride
, dst
, dst_stride
, &esq
[0]);
615 cpi
->fn_ptr
[f_index
].vf(src
+ bw
/ 4, src_stride
, dst
+ bw
/ 4, dst_stride
,
617 cpi
->fn_ptr
[f_index
].vf(src
+ bw
/ 2, src_stride
, dst
+ bw
/ 2, dst_stride
,
619 cpi
->fn_ptr
[f_index
].vf(src
+ 3 * bw
/ 4, src_stride
, dst
+ 3 * bw
/ 4,
620 dst_stride
, &esq
[3]);
621 src
+= bh
/ 4 * src_stride
;
622 dst
+= bh
/ 4 * dst_stride
;
624 cpi
->fn_ptr
[f_index
].vf(src
, src_stride
, dst
, dst_stride
, &esq
[4]);
625 cpi
->fn_ptr
[f_index
].vf(src
+ bw
/ 4, src_stride
, dst
+ bw
/ 4, dst_stride
,
627 cpi
->fn_ptr
[f_index
].vf(src
+ bw
/ 2, src_stride
, dst
+ bw
/ 2, dst_stride
,
629 cpi
->fn_ptr
[f_index
].vf(src
+ 3 * bw
/ 4, src_stride
, dst
+ 3 * bw
/ 4,
630 dst_stride
, &esq
[7]);
631 src
+= bh
/ 4 * src_stride
;
632 dst
+= bh
/ 4 * dst_stride
;
634 cpi
->fn_ptr
[f_index
].vf(src
, src_stride
, dst
, dst_stride
, &esq
[8]);
635 cpi
->fn_ptr
[f_index
].vf(src
+ bw
/ 4, src_stride
, dst
+ bw
/ 4, dst_stride
,
637 cpi
->fn_ptr
[f_index
].vf(src
+ bw
/ 2, src_stride
, dst
+ bw
/ 2, dst_stride
,
639 cpi
->fn_ptr
[f_index
].vf(src
+ 3 * bw
/ 4, src_stride
, dst
+ 3 * bw
/ 4,
640 dst_stride
, &esq
[11]);
641 src
+= bh
/ 4 * src_stride
;
642 dst
+= bh
/ 4 * dst_stride
;
644 cpi
->fn_ptr
[f_index
].vf(src
, src_stride
, dst
, dst_stride
, &esq
[12]);
645 cpi
->fn_ptr
[f_index
].vf(src
+ bw
/ 4, src_stride
, dst
+ bw
/ 4, dst_stride
,
647 cpi
->fn_ptr
[f_index
].vf(src
+ bw
/ 2, src_stride
, dst
+ bw
/ 2, dst_stride
,
649 cpi
->fn_ptr
[f_index
].vf(src
+ 3 * bw
/ 4, src_stride
, dst
+ 3 * bw
/ 4,
650 dst_stride
, &esq
[15]);
653 double total
= (double)esq
[0] + esq
[1] + esq
[2] + esq
[3] + esq
[4] + esq
[5] +
654 esq
[6] + esq
[7] + esq
[8] + esq
[9] + esq
[10] + esq
[11] +
655 esq
[12] + esq
[13] + esq
[14] + esq
[15];
657 const double e_recip
= 1.0 / total
;
658 hordist
[0] = ((double)esq
[0] + esq
[4] + esq
[8] + esq
[12]) * e_recip
;
659 hordist
[1] = ((double)esq
[1] + esq
[5] + esq
[9] + esq
[13]) * e_recip
;
660 hordist
[2] = ((double)esq
[2] + esq
[6] + esq
[10] + esq
[14]) * e_recip
;
661 verdist
[0] = ((double)esq
[0] + esq
[1] + esq
[2] + esq
[3]) * e_recip
;
662 verdist
[1] = ((double)esq
[4] + esq
[5] + esq
[6] + esq
[7]) * e_recip
;
663 verdist
[2] = ((double)esq
[8] + esq
[9] + esq
[10] + esq
[11]) * e_recip
;
665 hordist
[0] = verdist
[0] = 0.25;
666 hordist
[1] = verdist
[1] = 0.25;
667 hordist
[2] = verdist
[2] = 0.25;
671 static int adst_vs_flipadst(const AV1_COMP
*cpi
, BLOCK_SIZE bsize
,
672 const uint8_t *src
, int src_stride
,
673 const uint8_t *dst
, int dst_stride
) {
674 int prune_bitmask
= 0;
675 double svm_proj_h
= 0, svm_proj_v
= 0;
676 double hdist
[3] = { 0, 0, 0 }, vdist
[3] = { 0, 0, 0 };
677 get_energy_distribution_fine(cpi
, bsize
, src
, src_stride
, dst
, dst_stride
,
680 svm_proj_v
= vdist
[0] * ADST_FLIP_SVM
[0] + vdist
[1] * ADST_FLIP_SVM
[1] +
681 vdist
[2] * ADST_FLIP_SVM
[2] + ADST_FLIP_SVM
[3];
682 svm_proj_h
= hdist
[0] * ADST_FLIP_SVM
[4] + hdist
[1] * ADST_FLIP_SVM
[5] +
683 hdist
[2] * ADST_FLIP_SVM
[6] + ADST_FLIP_SVM
[7];
684 if (svm_proj_v
> FAST_EXT_TX_EDST_MID
+ FAST_EXT_TX_EDST_MARGIN
)
685 prune_bitmask
|= 1 << FLIPADST_1D
;
686 else if (svm_proj_v
< FAST_EXT_TX_EDST_MID
- FAST_EXT_TX_EDST_MARGIN
)
687 prune_bitmask
|= 1 << ADST_1D
;
689 if (svm_proj_h
> FAST_EXT_TX_EDST_MID
+ FAST_EXT_TX_EDST_MARGIN
)
690 prune_bitmask
|= 1 << (FLIPADST_1D
+ 8);
691 else if (svm_proj_h
< FAST_EXT_TX_EDST_MID
- FAST_EXT_TX_EDST_MARGIN
)
692 prune_bitmask
|= 1 << (ADST_1D
+ 8);
694 return prune_bitmask
;
698 static void get_horver_correlation(const int16_t *diff
, int stride
, int w
,
699 int h
, double *hcorr
, double *vcorr
) {
700 // Returns hor/ver correlation coefficient
701 const int num
= (h
- 1) * (w
- 1);
704 int64_t xy_sum
= 0, xz_sum
= 0;
705 int64_t x_sum
= 0, y_sum
= 0, z_sum
= 0;
706 int64_t x2_sum
= 0, y2_sum
= 0, z2_sum
= 0;
707 double x_var_n
, y_var_n
, z_var_n
, xy_var_n
, xz_var_n
;
712 for (i
= 1; i
< h
; ++i
) {
713 for (j
= 1; j
< w
; ++j
) {
714 const int16_t x
= diff
[i
* stride
+ j
];
715 const int16_t y
= diff
[i
* stride
+ j
- 1];
716 const int16_t z
= diff
[(i
- 1) * stride
+ j
];
727 x_var_n
= x2_sum
- (x_sum
* x_sum
) * num_r
;
728 y_var_n
= y2_sum
- (y_sum
* y_sum
) * num_r
;
729 z_var_n
= z2_sum
- (z_sum
* z_sum
) * num_r
;
730 xy_var_n
= xy_sum
- (x_sum
* y_sum
) * num_r
;
731 xz_var_n
= xz_sum
- (x_sum
* z_sum
) * num_r
;
732 if (x_var_n
> 0 && y_var_n
> 0) {
733 *hcorr
= xy_var_n
/ sqrt(x_var_n
* y_var_n
);
734 *hcorr
= *hcorr
< 0 ? 0 : *hcorr
;
736 if (x_var_n
> 0 && z_var_n
> 0) {
737 *vcorr
= xz_var_n
/ sqrt(x_var_n
* z_var_n
);
738 *vcorr
= *vcorr
< 0 ? 0 : *vcorr
;
742 int dct_vs_idtx(const int16_t *diff
, int stride
, int w
, int h
) {
744 int prune_bitmask
= 0;
745 get_horver_correlation(diff
, stride
, w
, h
, &hcorr
, &vcorr
);
747 if (vcorr
> FAST_EXT_TX_CORR_MID
+ FAST_EXT_TX_CORR_MARGIN
)
748 prune_bitmask
|= 1 << IDTX_1D
;
749 else if (vcorr
< FAST_EXT_TX_CORR_MID
- FAST_EXT_TX_CORR_MARGIN
)
750 prune_bitmask
|= 1 << DCT_1D
;
752 if (hcorr
> FAST_EXT_TX_CORR_MID
+ FAST_EXT_TX_CORR_MARGIN
)
753 prune_bitmask
|= 1 << (IDTX_1D
+ 8);
754 else if (hcorr
< FAST_EXT_TX_CORR_MID
- FAST_EXT_TX_CORR_MARGIN
)
755 prune_bitmask
|= 1 << (DCT_1D
+ 8);
756 return prune_bitmask
;
759 // Performance drop: 0.5%, Speed improvement: 24%
760 static int prune_two_for_sby(const AV1_COMP
*cpi
, BLOCK_SIZE bsize
,
761 MACROBLOCK
*x
, const MACROBLOCKD
*xd
,
762 int adst_flipadst
, int dct_idtx
) {
766 const struct macroblock_plane
*const p
= &x
->plane
[0];
767 const struct macroblockd_plane
*const pd
= &xd
->plane
[0];
768 prune
|= adst_vs_flipadst(cpi
, bsize
, p
->src
.buf
, p
->src
.stride
,
769 pd
->dst
.buf
, pd
->dst
.stride
);
772 av1_subtract_plane(x
, bsize
, 0);
773 const struct macroblock_plane
*const p
= &x
->plane
[0];
774 const int bw
= 4 << (b_width_log2_lookup
[bsize
]);
775 const int bh
= 4 << (b_height_log2_lookup
[bsize
]);
776 prune
|= dct_vs_idtx(p
->src_diff
, bw
, bw
, bh
);
781 #endif // CONFIG_EXT_TX
783 // Performance drop: 0.3%, Speed improvement: 5%
784 static int prune_one_for_sby(const AV1_COMP
*cpi
, BLOCK_SIZE bsize
,
785 const MACROBLOCK
*x
, const MACROBLOCKD
*xd
) {
786 const struct macroblock_plane
*const p
= &x
->plane
[0];
787 const struct macroblockd_plane
*const pd
= &xd
->plane
[0];
788 return adst_vs_flipadst(cpi
, bsize
, p
->src
.buf
, p
->src
.stride
, pd
->dst
.buf
,
792 static int prune_tx_types(const AV1_COMP
*cpi
, BLOCK_SIZE bsize
, MACROBLOCK
*x
,
793 const MACROBLOCKD
*const xd
, int tx_set
) {
795 const int *tx_set_1D
= tx_set
>= 0 ? ext_tx_used_inter_1D
[tx_set
] : NULL
;
797 const int tx_set_1D
[TX_TYPES_1D
] = { 0 };
798 #endif // CONFIG_EXT_TX
800 switch (cpi
->sf
.tx_type_search
.prune_mode
) {
801 case NO_PRUNE
: return 0; break;
803 if ((tx_set
>= 0) && !(tx_set_1D
[FLIPADST_1D
] & tx_set_1D
[ADST_1D
]))
805 return prune_one_for_sby(cpi
, bsize
, x
, xd
);
809 if ((tx_set
>= 0) && !(tx_set_1D
[FLIPADST_1D
] & tx_set_1D
[ADST_1D
])) {
810 if (!(tx_set_1D
[DCT_1D
] & tx_set_1D
[IDTX_1D
])) return 0;
811 return prune_two_for_sby(cpi
, bsize
, x
, xd
, 0, 1);
813 if ((tx_set
>= 0) && !(tx_set_1D
[DCT_1D
] & tx_set_1D
[IDTX_1D
]))
814 return prune_two_for_sby(cpi
, bsize
, x
, xd
, 1, 0);
815 return prune_two_for_sby(cpi
, bsize
, x
, xd
, 1, 1);
817 #endif // CONFIG_EXT_TX
823 static int do_tx_type_search(TX_TYPE tx_type
, int prune
) {
824 // TODO(sarahparker) implement for non ext tx
826 return !(((prune
>> vtx_tab
[tx_type
]) & 1) |
827 ((prune
>> (htx_tab
[tx_type
] + 8)) & 1));
829 // temporary to avoid compiler warnings
835 #endif // CONFIG_EXT_TX
838 static void model_rd_from_sse(const AV1_COMP
*const cpi
,
839 const MACROBLOCKD
*const xd
, BLOCK_SIZE bsize
,
840 int plane
, int64_t sse
, int *rate
,
842 const struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
843 const int dequant_shift
=
844 #if CONFIG_HIGHBITDEPTH
845 (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) ? xd
->bd
- 5 :
846 #endif // CONFIG_HIGHBITDEPTH
849 // Fast approximate the modelling function.
850 if (cpi
->sf
.simple_model_rd_from_var
) {
851 const int64_t square_error
= sse
;
852 int quantizer
= (pd
->dequant
[1] >> dequant_shift
);
855 *rate
= (int)((square_error
* (280 - quantizer
)) >>
856 (16 - AV1_PROB_COST_SHIFT
));
859 *dist
= (square_error
* quantizer
) >> 8;
861 av1_model_rd_from_var_lapndz(sse
, num_pels_log2_lookup
[bsize
],
862 pd
->dequant
[1] >> dequant_shift
, rate
, dist
);
868 static void model_rd_for_sb(const AV1_COMP
*const cpi
, BLOCK_SIZE bsize
,
869 MACROBLOCK
*x
, MACROBLOCKD
*xd
, int plane_from
,
870 int plane_to
, int *out_rate_sum
,
871 int64_t *out_dist_sum
, int *skip_txfm_sb
,
872 int64_t *skip_sse_sb
) {
873 // Note our transform coeffs are 8 times an orthogonal transform.
874 // Hence quantizer step is also 8 times. To get effective quantizer
875 // we need to divide by 8 before sending to modeling function.
877 const int ref
= xd
->mi
[0]->mbmi
.ref_frame
[0];
879 int64_t rate_sum
= 0;
880 int64_t dist_sum
= 0;
881 int64_t total_sse
= 0;
883 x
->pred_sse
[ref
] = 0;
885 for (plane
= plane_from
; plane
<= plane_to
; ++plane
) {
886 struct macroblock_plane
*const p
= &x
->plane
[plane
];
887 struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
888 #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
889 const BLOCK_SIZE bs
= AOMMAX(BLOCK_4X4
, get_plane_block_size(bsize
, pd
));
891 const BLOCK_SIZE bs
= get_plane_block_size(bsize
, pd
);
892 #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
899 if (x
->skip_chroma_rd
&& plane
) continue;
900 #endif // CONFIG_CB4X4
902 // TODO(geza): Write direct sse functions that do not compute
904 cpi
->fn_ptr
[bs
].vf(p
->src
.buf
, p
->src
.stride
, pd
->dst
.buf
, pd
->dst
.stride
,
907 if (plane
== 0) x
->pred_sse
[ref
] = sse
;
911 model_rd_from_sse(cpi
, xd
, bs
, plane
, sse
, &rate
, &dist
);
917 *skip_txfm_sb
= total_sse
== 0;
918 *skip_sse_sb
= total_sse
<< 4;
919 *out_rate_sum
= (int)rate_sum
;
920 *out_dist_sum
= dist_sum
;
923 int64_t av1_block_error_c(const tran_low_t
*coeff
, const tran_low_t
*dqcoeff
,
924 intptr_t block_size
, int64_t *ssz
) {
926 int64_t error
= 0, sqcoeff
= 0;
928 for (i
= 0; i
< block_size
; i
++) {
929 const int diff
= coeff
[i
] - dqcoeff
[i
];
930 error
+= diff
* diff
;
931 sqcoeff
+= coeff
[i
] * coeff
[i
];
938 int64_t av1_block_error_fp_c(const int16_t *coeff
, const int16_t *dqcoeff
,
943 for (i
= 0; i
< block_size
; i
++) {
944 const int diff
= coeff
[i
] - dqcoeff
[i
];
945 error
+= diff
* diff
;
951 #if CONFIG_HIGHBITDEPTH
952 int64_t av1_highbd_block_error_c(const tran_low_t
*coeff
,
953 const tran_low_t
*dqcoeff
, intptr_t block_size
,
954 int64_t *ssz
, int bd
) {
956 int64_t error
= 0, sqcoeff
= 0;
957 int shift
= 2 * (bd
- 8);
958 int rounding
= shift
> 0 ? 1 << (shift
- 1) : 0;
960 for (i
= 0; i
< block_size
; i
++) {
961 const int64_t diff
= coeff
[i
] - dqcoeff
[i
];
962 error
+= diff
* diff
;
963 sqcoeff
+= (int64_t)coeff
[i
] * (int64_t)coeff
[i
];
965 assert(error
>= 0 && sqcoeff
>= 0);
966 error
= (error
+ rounding
) >> shift
;
967 sqcoeff
= (sqcoeff
+ rounding
) >> shift
;
972 #endif // CONFIG_HIGHBITDEPTH
975 // Without PVQ, av1_block_error_c() return two kind of errors,
976 // 1) reconstruction (i.e. decoded) error and
977 // 2) Squared sum of transformed residue (i.e. 'coeff')
978 // However, if PVQ is enabled, coeff does not keep the transformed residue
979 // but instead a transformed original is kept.
980 // Hence, new parameter ref vector (i.e. transformed predicted signal)
981 // is required to derive the residue signal,
982 // i.e. coeff - ref = residue (all transformed).
984 #if CONFIG_HIGHBITDEPTH
985 static int64_t av1_highbd_block_error2_c(const tran_low_t
*coeff
,
986 const tran_low_t
*dqcoeff
,
987 const tran_low_t
*ref
,
988 intptr_t block_size
, int64_t *ssz
,
992 int shift
= 2 * (bd
- 8);
993 int rounding
= shift
> 0 ? 1 << (shift
- 1) : 0;
994 // Use the existing sse codes for calculating distortion of decoded signal:
995 // i.e. (orig - decoded)^2
996 // For high bit depth, throw away ssz until a 32-bit version of
997 // av1_block_error_fp is written.
999 error
= av1_block_error(coeff
, dqcoeff
, block_size
, &ssz_trash
);
1000 // prediction residue^2 = (orig - ref)^2
1001 sqcoeff
= av1_block_error(coeff
, ref
, block_size
, &ssz_trash
);
1002 error
= (error
+ rounding
) >> shift
;
1003 sqcoeff
= (sqcoeff
+ rounding
) >> shift
;
1008 // TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
1009 // a separate function that does not do the extra computations for ssz.
1010 static int64_t av1_block_error2_c(const tran_low_t
*coeff
,
1011 const tran_low_t
*dqcoeff
,
1012 const tran_low_t
*ref
, intptr_t block_size
,
1015 // Use the existing sse codes for calculating distortion of decoded signal:
1016 // i.e. (orig - decoded)^2
1017 error
= av1_block_error_fp(coeff
, dqcoeff
, block_size
);
1018 // prediction residue^2 = (orig - ref)^2
1019 *ssz
= av1_block_error_fp(coeff
, ref
, block_size
);
1022 #endif // CONFIG_HIGHBITDEPTH
1023 #endif // CONFIG_PVQ
1025 #if !CONFIG_PVQ || CONFIG_VAR_TX
1026 /* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
1027 * decide whether to include cost of a trailing EOB node or not (i.e. we
1028 * can skip this if the last coefficient in this transform block, e.g. the
1029 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
1030 * were non-zero). */
1032 static int cost_coeffs(const AV1_COMMON
*const cm
, MACROBLOCK
*x
, int plane
,
1033 int block
, TX_SIZE tx_size
, const SCAN_ORDER
*scan_order
,
1034 const ENTROPY_CONTEXT
*a
, const ENTROPY_CONTEXT
*l
,
1035 int use_fast_coef_costing
) {
1036 MACROBLOCKD
*const xd
= &x
->e_mbd
;
1037 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
1038 const struct macroblock_plane
*p
= &x
->plane
[plane
];
1039 const struct macroblockd_plane
*pd
= &xd
->plane
[plane
];
1040 const PLANE_TYPE type
= pd
->plane_type
;
1041 const uint16_t *band_count
= &band_count_table
[tx_size
][1];
1042 const int eob
= p
->eobs
[block
];
1043 const tran_low_t
*const qcoeff
= BLOCK_OFFSET(p
->qcoeff
, block
);
1044 const int tx_size_ctx
= txsize_sqr_map
[tx_size
];
1045 unsigned int(*token_costs
)[2][COEFF_CONTEXTS
][ENTROPY_TOKENS
] =
1046 x
->token_costs
[tx_size_ctx
][type
][is_inter_block(mbmi
)];
1047 uint8_t token_cache
[MAX_TX_SQUARE
];
1048 int pt
= combine_entropy_contexts(*a
, *l
);
1050 const int16_t *scan
= scan_order
->scan
;
1051 const int16_t *nb
= scan_order
->neighbors
;
1052 const int ref
= is_inter_block(mbmi
);
1053 aom_prob
*blockz_probs
=
1054 cm
->fc
->blockzero_probs
[txsize_sqr_map
[tx_size
]][type
][ref
];
1056 #if CONFIG_HIGHBITDEPTH
1057 const int cat6_bits
= av1_get_cat6_extrabits_size(tx_size
, xd
->bd
);
1059 const int cat6_bits
= av1_get_cat6_extrabits_size(tx_size
, 8);
1060 #endif // CONFIG_HIGHBITDEPTH
1062 #if !CONFIG_VAR_TX && !CONFIG_SUPERTX
1063 // Check for consistency of tx_size with mode info
1064 assert(tx_size
== get_tx_size(plane
, xd
));
1065 #endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX
1070 cost
= av1_cost_bit(blockz_probs
[pt
], 0);
1072 if (use_fast_coef_costing
) {
1073 int band_left
= *band_count
++;
1078 cost
= av1_get_token_cost(v
, &prev_t
, cat6_bits
);
1079 cost
+= (*token_costs
)[!prev_t
][pt
][prev_t
];
1081 token_cache
[0] = av1_pt_energy_class
[prev_t
];
1085 for (c
= 1; c
< eob
; c
++) {
1086 const int rc
= scan
[c
];
1090 cost
+= av1_get_token_cost(v
, &t
, cat6_bits
);
1091 cost
+= (*token_costs
)[!t
][!prev_t
][t
];
1094 band_left
= *band_count
++;
1100 cost
+= (*token_costs
)[0][!prev_t
][EOB_TOKEN
];
1102 } else { // !use_fast_coef_costing
1103 int band_left
= *band_count
++;
1108 cost
= av1_get_token_cost(v
, &tok
, cat6_bits
);
1109 cost
+= (*token_costs
)[!tok
][pt
][tok
];
1111 token_cache
[0] = av1_pt_energy_class
[tok
];
1115 for (c
= 1; c
< eob
; c
++) {
1116 const int rc
= scan
[c
];
1119 cost
+= av1_get_token_cost(v
, &tok
, cat6_bits
);
1120 pt
= get_coef_context(nb
, token_cache
, c
);
1121 cost
+= (*token_costs
)[!tok
][pt
][tok
];
1122 token_cache
[rc
] = av1_pt_energy_class
[tok
];
1124 band_left
= *band_count
++;
1130 pt
= get_coef_context(nb
, token_cache
, c
);
1131 cost
+= (*token_costs
)[0][pt
][EOB_TOKEN
];
1137 #endif // !CONFIG_LV_MAP
1139 int av1_cost_coeffs(const AV1_COMP
*const cpi
, MACROBLOCK
*x
, int plane
,
1140 int block
, TX_SIZE tx_size
, const SCAN_ORDER
*scan_order
,
1141 const ENTROPY_CONTEXT
*a
, const ENTROPY_CONTEXT
*l
,
1142 int use_fast_coef_costing
) {
1144 const AV1_COMMON
*const cm
= &cpi
->common
;
1145 return cost_coeffs(cm
, x
, plane
, block
, tx_size
, scan_order
, a
, l
,
1146 use_fast_coef_costing
);
1147 #else // !CONFIG_LV_MAP
1149 (void)use_fast_coef_costing
;
1150 const MACROBLOCKD
*xd
= &x
->e_mbd
;
1151 const MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
1152 const struct macroblockd_plane
*pd
= &xd
->plane
[plane
];
1153 const BLOCK_SIZE bsize
= mbmi
->sb_type
;
1155 #if CONFIG_CHROMA_2X2
1156 const BLOCK_SIZE plane_bsize
= get_plane_block_size(bsize
, pd
);
1158 const BLOCK_SIZE plane_bsize
=
1159 AOMMAX(BLOCK_4X4
, get_plane_block_size(bsize
, pd
));
1160 #endif // CONFIG_CHROMA_2X2
1161 #else // CONFIG_CB4X4
1162 const BLOCK_SIZE plane_bsize
=
1163 get_plane_block_size(AOMMAX(BLOCK_8X8
, bsize
), pd
);
1164 #endif // CONFIG_CB4X4
1167 get_txb_ctx(plane_bsize
, tx_size
, plane
, a
, l
, &txb_ctx
);
1168 return av1_cost_coeffs_txb(cpi
, x
, plane
, block
, &txb_ctx
);
1169 #endif // !CONFIG_LV_MAP
1171 #endif // !CONFIG_PVQ || CONFIG_VAR_TX
1173 // Get transform block visible dimensions cropped to the MI units.
1174 static void get_txb_dimensions(const MACROBLOCKD
*xd
, int plane
,
1175 BLOCK_SIZE plane_bsize
, int blk_row
, int blk_col
,
1176 BLOCK_SIZE tx_bsize
, int *width
, int *height
,
1177 int *visible_width
, int *visible_height
) {
1178 #if !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
1179 assert(tx_bsize
<= plane_bsize
);
1180 #endif // !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
1181 int txb_height
= block_size_high
[tx_bsize
];
1182 int txb_width
= block_size_wide
[tx_bsize
];
1183 const int block_height
= block_size_high
[plane_bsize
];
1184 const int block_width
= block_size_wide
[plane_bsize
];
1185 const struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
1186 // TODO(aconverse@google.com): Investigate using crop_width/height here rather
1188 const int block_rows
=
1189 (xd
->mb_to_bottom_edge
>= 0)
1191 : (xd
->mb_to_bottom_edge
>> (3 + pd
->subsampling_y
)) + block_height
;
1192 const int block_cols
=
1193 (xd
->mb_to_right_edge
>= 0)
1195 : (xd
->mb_to_right_edge
>> (3 + pd
->subsampling_x
)) + block_width
;
1196 const int tx_unit_size
= tx_size_wide_log2
[0];
1197 if (width
) *width
= txb_width
;
1198 if (height
) *height
= txb_height
;
1199 *visible_width
= clamp(block_cols
- (blk_col
<< tx_unit_size
), 0, txb_width
);
1201 clamp(block_rows
- (blk_row
<< tx_unit_size
), 0, txb_height
);
1204 // Compute the pixel domain sum square error on all visible 4x4s in the
1206 static unsigned pixel_sse(const AV1_COMP
*const cpi
, const MACROBLOCKD
*xd
,
1207 int plane
, const uint8_t *src
, const int src_stride
,
1208 const uint8_t *dst
, const int dst_stride
, int blk_row
,
1209 int blk_col
, const BLOCK_SIZE plane_bsize
,
1210 const BLOCK_SIZE tx_bsize
) {
1211 int txb_rows
, txb_cols
, visible_rows
, visible_cols
;
1212 get_txb_dimensions(xd
, plane
, plane_bsize
, blk_row
, blk_col
, tx_bsize
,
1213 &txb_cols
, &txb_rows
, &visible_cols
, &visible_rows
);
1214 assert(visible_rows
> 0);
1215 assert(visible_cols
> 0);
1216 #if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
1217 if ((txb_rows
== visible_rows
&& txb_cols
== visible_cols
) &&
1218 tx_bsize
< BLOCK_SIZES
) {
1220 if (txb_rows
== visible_rows
&& txb_cols
== visible_cols
) {
1223 cpi
->fn_ptr
[tx_bsize
].vf(src
, src_stride
, dst
, dst_stride
, &sse
);
1226 #if CONFIG_HIGHBITDEPTH
1227 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
1228 uint64_t sse
= aom_highbd_sse_odd_size(src
, src_stride
, dst
, dst_stride
,
1229 visible_cols
, visible_rows
);
1230 return (unsigned int)ROUND_POWER_OF_TWO(sse
, (xd
->bd
- 8) * 2);
1232 #endif // CONFIG_HIGHBITDEPTH
1233 unsigned sse
= aom_sse_odd_size(src
, src_stride
, dst
, dst_stride
,
1234 visible_cols
, visible_rows
);
1238 // Compute the squares sum squares on all visible 4x4s in the transform block.
1239 static int64_t sum_squares_visible(const MACROBLOCKD
*xd
, int plane
,
1240 const int16_t *diff
, const int diff_stride
,
1241 int blk_row
, int blk_col
,
1242 const BLOCK_SIZE plane_bsize
,
1243 const BLOCK_SIZE tx_bsize
) {
1244 int visible_rows
, visible_cols
;
1245 get_txb_dimensions(xd
, plane
, plane_bsize
, blk_row
, blk_col
, tx_bsize
, NULL
,
1246 NULL
, &visible_cols
, &visible_rows
);
1247 return aom_sum_squares_2d_i16(diff
, diff_stride
, visible_cols
, visible_rows
);
1250 void av1_dist_block(const AV1_COMP
*cpi
, MACROBLOCK
*x
, int plane
,
1251 BLOCK_SIZE plane_bsize
, int block
, int blk_row
, int blk_col
,
1252 TX_SIZE tx_size
, int64_t *out_dist
, int64_t *out_sse
,
1253 OUTPUT_STATUS output_status
) {
1254 MACROBLOCKD
*const xd
= &x
->e_mbd
;
1255 const struct macroblock_plane
*const p
= &x
->plane
[plane
];
1256 #if CONFIG_DAALA_DIST
1258 int use_activity_masking
= 0;
1260 use_activity_masking
= x
->daala_enc
.use_activity_masking
;
1261 #endif // CONFIG_PVQ
1262 struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
1263 #else // CONFIG_DAALA_DIST
1264 const struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
1265 #endif // CONFIG_DAALA_DIST
1267 if (cpi
->sf
.use_transform_domain_distortion
&& !CONFIG_DAALA_DIST
) {
1268 // Transform domain distortion computation is more efficient as it does
1269 // not involve an inverse transform, but it is less accurate.
1270 const int buffer_length
= tx_size_2d
[tx_size
];
1272 int shift
= (MAX_TX_SCALE
- av1_get_tx_scale(tx_size
)) * 2;
1273 tran_low_t
*const coeff
= BLOCK_OFFSET(p
->coeff
, block
);
1274 tran_low_t
*const dqcoeff
= BLOCK_OFFSET(pd
->dqcoeff
, block
);
1276 tran_low_t
*ref_coeff
= BLOCK_OFFSET(pd
->pvq_ref_coeff
, block
);
1278 #if CONFIG_HIGHBITDEPTH
1279 const int bd
= (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) ? xd
->bd
: 8;
1280 *out_dist
= av1_highbd_block_error2_c(coeff
, dqcoeff
, ref_coeff
,
1281 buffer_length
, &this_sse
, bd
) >>
1284 *out_dist
= av1_block_error2_c(coeff
, dqcoeff
, ref_coeff
, buffer_length
,
1287 #endif // CONFIG_HIGHBITDEPTH
1288 #elif CONFIG_HIGHBITDEPTH
1289 const int bd
= (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) ? xd
->bd
: 8;
1291 av1_highbd_block_error(coeff
, dqcoeff
, buffer_length
, &this_sse
, bd
) >>
1295 av1_block_error(coeff
, dqcoeff
, buffer_length
, &this_sse
) >> shift
;
1296 #endif // CONFIG_PVQ
1297 *out_sse
= this_sse
>> shift
;
1299 const BLOCK_SIZE tx_bsize
= txsize_to_bsize
[tx_size
];
1300 #if !CONFIG_PVQ || CONFIG_DAALA_DIST
1301 const int bsw
= block_size_wide
[tx_bsize
];
1302 const int bsh
= block_size_high
[tx_bsize
];
1304 const int src_stride
= x
->plane
[plane
].src
.stride
;
1305 const int dst_stride
= xd
->plane
[plane
].dst
.stride
;
1306 // Scale the transform block index to pixel unit.
1307 const int src_idx
= (blk_row
* src_stride
+ blk_col
)
1308 << tx_size_wide_log2
[0];
1309 const int dst_idx
= (blk_row
* dst_stride
+ blk_col
)
1310 << tx_size_wide_log2
[0];
1311 const uint8_t *src
= &x
->plane
[plane
].src
.buf
[src_idx
];
1312 const uint8_t *dst
= &xd
->plane
[plane
].dst
.buf
[dst_idx
];
1313 const tran_low_t
*dqcoeff
= BLOCK_OFFSET(pd
->dqcoeff
, block
);
1314 const uint16_t eob
= p
->eobs
[block
];
1316 assert(cpi
!= NULL
);
1317 assert(tx_size_wide_log2
[0] == tx_size_high_log2
[0]);
1319 #if CONFIG_DAALA_DIST
1320 if (plane
== 0 && bsw
>= 8 && bsh
>= 8) {
1321 if (output_status
== OUTPUT_HAS_DECODED_PIXELS
) {
1322 const int pred_stride
= block_size_wide
[plane_bsize
];
1323 const int pred_idx
= (blk_row
* pred_stride
+ blk_col
)
1324 << tx_size_wide_log2
[0];
1325 const int16_t *pred
= &pd
->pred
[pred_idx
];
1327 DECLARE_ALIGNED(16, uint8_t, pred8
[MAX_TX_SQUARE
]);
1329 for (j
= 0; j
< bsh
; j
++)
1330 for (i
= 0; i
< bsw
; i
++)
1331 pred8
[j
* bsw
+ i
] = pred
[j
* pred_stride
+ i
];
1332 *out_sse
= av1_daala_dist(src
, src_stride
, pred8
, bsw
, bsw
, bsh
, qm
,
1333 use_activity_masking
, x
->qindex
);
1335 *out_sse
= av1_daala_dist(src
, src_stride
, dst
, dst_stride
, bsw
, bsh
,
1336 qm
, use_activity_masking
, x
->qindex
);
1339 #endif // CONFIG_DAALA_DIST
1341 const int diff_stride
= block_size_wide
[plane_bsize
];
1342 const int diff_idx
= (blk_row
* diff_stride
+ blk_col
)
1343 << tx_size_wide_log2
[0];
1344 const int16_t *diff
= &p
->src_diff
[diff_idx
];
1345 *out_sse
= sum_squares_visible(xd
, plane
, diff
, diff_stride
, blk_row
,
1346 blk_col
, plane_bsize
, tx_bsize
);
1347 #if CONFIG_HIGHBITDEPTH
1348 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
)
1349 *out_sse
= ROUND_POWER_OF_TWO(*out_sse
, (xd
->bd
- 8) * 2);
1350 #endif // CONFIG_HIGHBITDEPTH
1355 if (output_status
== OUTPUT_HAS_DECODED_PIXELS
) {
1356 #if CONFIG_DAALA_DIST
1357 if (plane
== 0 && bsw
>= 8 && bsh
>= 8)
1358 *out_dist
= av1_daala_dist(src
, src_stride
, dst
, dst_stride
, bsw
, bsh
,
1359 qm
, use_activity_masking
, x
->qindex
);
1361 #endif // CONFIG_DAALA_DIST
1363 pixel_sse(cpi
, xd
, plane
, src
, src_stride
, dst
, dst_stride
,
1364 blk_row
, blk_col
, plane_bsize
, tx_bsize
);
1366 #if CONFIG_HIGHBITDEPTH
1368 DECLARE_ALIGNED(16, uint16_t, recon16
[MAX_TX_SQUARE
]);
1370 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
)
1371 recon
= CONVERT_TO_BYTEPTR(recon16
);
1373 recon
= (uint8_t *)recon16
;
1375 DECLARE_ALIGNED(16, uint8_t, recon
[MAX_TX_SQUARE
]);
1376 #endif // CONFIG_HIGHBITDEPTH
1379 #if CONFIG_HIGHBITDEPTH
1380 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
1381 aom_highbd_convolve_copy(dst
, dst_stride
, recon
, MAX_TX_SIZE
, NULL
, 0,
1382 NULL
, 0, bsw
, bsh
, xd
->bd
);
1384 #endif // CONFIG_HIGHBITDEPTH
1385 aom_convolve_copy(dst
, dst_stride
, recon
, MAX_TX_SIZE
, NULL
, 0, NULL
,
1387 #if CONFIG_HIGHBITDEPTH
1389 #endif // CONFIG_HIGHBITDEPTH
1392 #endif // !CONFIG_PVQ
1394 const PLANE_TYPE plane_type
= get_plane_type(plane
);
1395 TX_TYPE tx_type
= get_tx_type(plane_type
, xd
, block
, tx_size
);
1397 av1_inverse_transform_block(xd
, dqcoeff
, tx_type
, tx_size
, recon
,
1400 #if CONFIG_DAALA_DIST
1401 if (plane
== 0 && bsw
>= 8 && bsh
>= 8) {
1402 *out_dist
= av1_daala_dist(src
, src_stride
, recon
, MAX_TX_SIZE
, bsw
,
1403 bsh
, qm
, use_activity_masking
, x
->qindex
);
1406 // Save decoded pixels for inter block in pd->pred to avoid
1407 // block_8x8_rd_txfm_daala_dist() need to produce them
1408 // by calling av1_inverse_transform_block() again.
1409 const int pred_stride
= block_size_wide
[plane_bsize
];
1410 const int pred_idx
= (blk_row
* pred_stride
+ blk_col
)
1411 << tx_size_wide_log2
[0];
1412 int16_t *pred
= &pd
->pred
[pred_idx
];
1415 for (j
= 0; j
< bsh
; j
++)
1416 for (i
= 0; i
< bsw
; i
++)
1417 pred
[j
* pred_stride
+ i
] = recon
[j
* MAX_TX_SIZE
+ i
];
1419 #endif // CONFIG_DAALA_DIST
1421 pixel_sse(cpi
, xd
, plane
, src
, src_stride
, recon
, MAX_TX_SIZE
,
1422 blk_row
, blk_col
, plane_bsize
, tx_bsize
);
1423 #if CONFIG_DAALA_DIST
1425 #endif // CONFIG_DAALA_DIST
1429 *out_dist
= *out_sse
;
1434 static void block_rd_txfm(int plane
, int block
, int blk_row
, int blk_col
,
1435 BLOCK_SIZE plane_bsize
, TX_SIZE tx_size
, void *arg
) {
1436 struct rdcost_block_args
*args
= arg
;
1437 MACROBLOCK
*const x
= args
->x
;
1438 MACROBLOCKD
*const xd
= &x
->e_mbd
;
1439 const MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
1440 const AV1_COMP
*cpi
= args
->cpi
;
1441 ENTROPY_CONTEXT
*a
= args
->t_above
+ blk_col
;
1442 ENTROPY_CONTEXT
*l
= args
->t_left
+ blk_row
;
1444 const AV1_COMMON
*cm
= &cpi
->common
;
1446 int64_t rd1
, rd2
, rd
;
1447 RD_STATS this_rd_stats
;
1449 assert(tx_size
== get_tx_size(plane
, xd
));
1451 av1_init_rd_stats(&this_rd_stats
);
1453 if (args
->exit_early
) return;
1455 if (!is_inter_block(mbmi
)) {
1459 FRAME_CONTEXT
*const ec_ctx
= xd
->tile_ctx
;
1461 FRAME_CONTEXT
*const ec_ctx
= cm
->fc
;
1462 #endif // CONFIG_EC_ADAPT
1464 av1_predict_intra_block_encoder_facade(x
, ec_ctx
, plane
, block
, blk_col
,
1465 blk_row
, tx_size
, plane_bsize
);
1467 av1_predict_intra_block_facade(xd
, plane
, block
, blk_col
, blk_row
, tx_size
);
1469 #if CONFIG_DPCM_INTRA
1470 const int block_raster_idx
=
1471 av1_block_index_to_raster_order(tx_size
, block
);
1472 const PREDICTION_MODE mode
=
1473 (plane
== 0) ? get_y_mode(xd
->mi
[0], block_raster_idx
) : mbmi
->uv_mode
;
1474 TX_TYPE tx_type
= get_tx_type((plane
== 0) ? PLANE_TYPE_Y
: PLANE_TYPE_UV
,
1475 xd
, block
, tx_size
);
1476 if (av1_use_dpcm_intra(plane
, mode
, tx_type
, mbmi
)) {
1478 av1_encode_block_intra_dpcm(cm
, x
, mode
, plane
, block
, blk_row
, blk_col
,
1479 plane_bsize
, tx_size
, tx_type
, a
, l
, &skip
);
1480 av1_dist_block(args
->cpi
, x
, plane
, plane_bsize
, block
, blk_row
, blk_col
,
1481 tx_size
, &this_rd_stats
.dist
, &this_rd_stats
.sse
,
1482 OUTPUT_HAS_DECODED_PIXELS
);
1485 #endif // CONFIG_DPCM_INTRA
1486 av1_subtract_txb(x
, plane
, plane_bsize
, blk_col
, blk_row
, tx_size
);
1490 // full forward transform and quantization
1491 const int coeff_ctx
= combine_entropy_contexts(*a
, *l
);
1492 av1_xform_quant(cm
, x
, plane
, block
, blk_row
, blk_col
, plane_bsize
, tx_size
,
1493 coeff_ctx
, AV1_XFORM_QUANT_FP
);
1494 av1_optimize_b(cm
, x
, plane
, block
, tx_size
, coeff_ctx
);
1496 if (!is_inter_block(mbmi
)) {
1497 struct macroblock_plane
*const p
= &x
->plane
[plane
];
1498 av1_inverse_transform_block_facade(xd
, plane
, block
, blk_row
, blk_col
,
1500 av1_dist_block(args
->cpi
, x
, plane
, plane_bsize
, block
, blk_row
, blk_col
,
1501 tx_size
, &this_rd_stats
.dist
, &this_rd_stats
.sse
,
1502 OUTPUT_HAS_DECODED_PIXELS
);
1504 av1_dist_block(args
->cpi
, x
, plane
, plane_bsize
, block
, blk_row
, blk_col
,
1505 tx_size
, &this_rd_stats
.dist
, &this_rd_stats
.sse
,
1506 OUTPUT_HAS_PREDICTED_PIXELS
);
1509 if (plane
== AOM_PLANE_Y
&& x
->cfl_store_y
) {
1510 struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
1511 const int dst_stride
= pd
->dst
.stride
;
1513 &pd
->dst
.buf
[(blk_row
* dst_stride
+ blk_col
) << tx_size_wide_log2
[0]];
1514 cfl_store(xd
->cfl
, dst
, dst_stride
, blk_row
, blk_col
, tx_size
);
1517 #if CONFIG_DPCM_INTRA
1519 #endif // CONFIG_DPCM_INTRA
1520 rd
= RDCOST(x
->rdmult
, x
->rddiv
, 0, this_rd_stats
.dist
);
1521 if (args
->this_rd
+ rd
> args
->best_rd
) {
1522 args
->exit_early
= 1;
1526 const PLANE_TYPE plane_type
= get_plane_type(plane
);
1527 const TX_TYPE tx_type
= get_tx_type(plane_type
, xd
, block
, tx_size
);
1528 const SCAN_ORDER
*scan_order
=
1529 get_scan(cm
, tx_size
, tx_type
, is_inter_block(mbmi
));
1530 this_rd_stats
.rate
=
1531 av1_cost_coeffs(cpi
, x
, plane
, block
, tx_size
, scan_order
, a
, l
,
1532 args
->use_fast_coef_costing
);
1533 #else // !CONFIG_PVQ
1534 this_rd_stats
.rate
= x
->rate
;
1535 #endif // !CONFIG_PVQ
1536 #else // !CONFIG_TXK_SEL
1537 av1_search_txk_type(cpi
, x
, plane
, block
, blk_row
, blk_col
, plane_bsize
,
1538 tx_size
, a
, l
, args
->use_fast_coef_costing
,
1540 #endif // !CONFIG_TXK_SEL
1544 av1_update_txb_coeff_cost(&this_rd_stats
, plane
, tx_size
, blk_row
, blk_col
,
1545 this_rd_stats
.rate
);
1546 #endif // CONFIG_RD_DEBUG
1547 av1_set_txb_context(x
, plane
, block
, tx_size
, a
, l
);
1548 #endif // !CONFIG_PVQ
1550 rd1
= RDCOST(x
->rdmult
, x
->rddiv
, this_rd_stats
.rate
, this_rd_stats
.dist
);
1551 rd2
= RDCOST(x
->rdmult
, x
->rddiv
, 0, this_rd_stats
.sse
);
1553 // TODO(jingning): temporarily enabled only for luma component
1554 rd
= AOMMIN(rd1
, rd2
);
1556 #if CONFIG_DAALA_DIST
1557 if (plane
== 0 && plane_bsize
>= BLOCK_8X8
&&
1558 (tx_size
== TX_4X4
|| tx_size
== TX_4X8
|| tx_size
== TX_8X4
)) {
1559 this_rd_stats
.dist
= 0;
1560 this_rd_stats
.sse
= 0;
1562 x
->rate_4x4
[block
] = this_rd_stats
.rate
;
1564 #endif // CONFIG_DAALA_DIST
1567 this_rd_stats
.skip
&= !x
->plane
[plane
].eobs
[block
];
1569 this_rd_stats
.skip
&= x
->pvq_skip
[plane
];
1570 #endif // !CONFIG_PVQ
1571 av1_merge_rd_stats(&args
->rd_stats
, &this_rd_stats
);
1573 args
->this_rd
+= rd
;
1575 if (args
->this_rd
> args
->best_rd
) {
1576 args
->exit_early
= 1;
1581 #if CONFIG_DAALA_DIST
1582 static void block_8x8_rd_txfm_daala_dist(int plane
, int block
, int blk_row
,
1583 int blk_col
, BLOCK_SIZE plane_bsize
,
1584 TX_SIZE tx_size
, void *arg
) {
1585 struct rdcost_block_args
*args
= arg
;
1586 MACROBLOCK
*const x
= args
->x
;
1587 MACROBLOCKD
*const xd
= &x
->e_mbd
;
1588 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
1589 int64_t rd
, rd1
, rd2
;
1590 RD_STATS this_rd_stats
;
1592 int use_activity_masking
= 0;
1597 assert(plane_bsize
>= BLOCK_8X8
);
1599 use_activity_masking
= x
->daala_enc
.use_activity_masking
;
1600 #endif // CONFIG_PVQ
1601 av1_init_rd_stats(&this_rd_stats
);
1603 if (args
->exit_early
) return;
1606 const struct macroblock_plane
*const p
= &x
->plane
[plane
];
1607 struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
1609 const int src_stride
= p
->src
.stride
;
1610 const int dst_stride
= pd
->dst
.stride
;
1611 const int diff_stride
= block_size_wide
[plane_bsize
];
1613 const uint8_t *src
=
1614 &p
->src
.buf
[(blk_row
* src_stride
+ blk_col
) << tx_size_wide_log2
[0]];
1615 const uint8_t *dst
=
1616 &pd
->dst
.buf
[(blk_row
* dst_stride
+ blk_col
) << tx_size_wide_log2
[0]];
1618 unsigned int tmp1
, tmp2
;
1619 int qindex
= x
->qindex
;
1620 const int pred_stride
= block_size_wide
[plane_bsize
];
1621 const int pred_idx
= (blk_row
* pred_stride
+ blk_col
)
1622 << tx_size_wide_log2
[0];
1623 int16_t *pred
= &pd
->pred
[pred_idx
];
1625 const int tx_blk_size
= 8;
1627 DECLARE_ALIGNED(16, uint8_t, pred8
[8 * 8]);
1629 for (j
= 0; j
< tx_blk_size
; j
++)
1630 for (i
= 0; i
< tx_blk_size
; i
++)
1631 pred8
[j
* tx_blk_size
+ i
] = pred
[j
* diff_stride
+ i
];
1633 tmp1
= av1_daala_dist(src
, src_stride
, pred8
, tx_blk_size
, 8, 8, qm
,
1634 use_activity_masking
, qindex
);
1635 tmp2
= av1_daala_dist(src
, src_stride
, dst
, dst_stride
, 8, 8, qm
,
1636 use_activity_masking
, qindex
);
1638 if (!is_inter_block(mbmi
)) {
1639 this_rd_stats
.sse
= (int64_t)tmp1
* 16;
1640 this_rd_stats
.dist
= (int64_t)tmp2
* 16;
1642 // For inter mode, the decoded pixels are provided in pd->pred,
1643 // while the predicted pixels are in dst.
1644 this_rd_stats
.sse
= (int64_t)tmp2
* 16;
1645 this_rd_stats
.dist
= (int64_t)tmp1
* 16;
1649 rd
= RDCOST(x
->rdmult
, x
->rddiv
, 0, this_rd_stats
.dist
);
1650 if (args
->this_rd
+ rd
> args
->best_rd
) {
1651 args
->exit_early
= 1;
1656 const int max_blocks_wide
= max_block_wide(xd
, plane_bsize
, plane
);
1657 const uint8_t txw_unit
= tx_size_wide_unit
[tx_size
];
1658 const uint8_t txh_unit
= tx_size_high_unit
[tx_size
];
1659 const int step
= txw_unit
* txh_unit
;
1660 int offset_h
= tx_size_high_unit
[TX_4X4
];
1661 // The rate of the current 8x8 block is the sum of four 4x4 blocks in it.
1662 this_rd_stats
.rate
=
1663 x
->rate_4x4
[block
- max_blocks_wide
* offset_h
- step
] +
1664 x
->rate_4x4
[block
- max_blocks_wide
* offset_h
] +
1665 x
->rate_4x4
[block
- step
] + x
->rate_4x4
[block
];
1667 rd1
= RDCOST(x
->rdmult
, x
->rddiv
, this_rd_stats
.rate
, this_rd_stats
.dist
);
1668 rd2
= RDCOST(x
->rdmult
, x
->rddiv
, 0, this_rd_stats
.sse
);
1669 rd
= AOMMIN(rd1
, rd2
);
1671 args
->rd_stats
.dist
+= this_rd_stats
.dist
;
1672 args
->rd_stats
.sse
+= this_rd_stats
.sse
;
1674 args
->this_rd
+= rd
;
1676 if (args
->this_rd
> args
->best_rd
) {
1677 args
->exit_early
= 1;
1681 #endif // CONFIG_DAALA_DIST
1683 static void txfm_rd_in_plane(MACROBLOCK
*x
, const AV1_COMP
*cpi
,
1684 RD_STATS
*rd_stats
, int64_t ref_best_rd
, int plane
,
1685 BLOCK_SIZE bsize
, TX_SIZE tx_size
,
1686 int use_fast_coef_casting
) {
1687 MACROBLOCKD
*const xd
= &x
->e_mbd
;
1688 const struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
1689 struct rdcost_block_args args
;
1693 args
.best_rd
= ref_best_rd
;
1694 args
.use_fast_coef_costing
= use_fast_coef_casting
;
1695 av1_init_rd_stats(&args
.rd_stats
);
1697 if (plane
== 0) xd
->mi
[0]->mbmi
.tx_size
= tx_size
;
1699 av1_get_entropy_contexts(bsize
, tx_size
, pd
, args
.t_above
, args
.t_left
);
1701 #if CONFIG_DAALA_DIST
1702 if (plane
== 0 && bsize
>= BLOCK_8X8
&&
1703 (tx_size
== TX_4X4
|| tx_size
== TX_4X8
|| tx_size
== TX_8X4
))
1704 av1_foreach_8x8_transformed_block_in_yplane(
1705 xd
, bsize
, block_rd_txfm
, block_8x8_rd_txfm_daala_dist
, &args
);
1707 #endif // CONFIG_DAALA_DIST
1708 av1_foreach_transformed_block_in_plane(xd
, bsize
, plane
, block_rd_txfm
,
1711 if (args
.exit_early
) {
1712 av1_invalid_rd_stats(rd_stats
);
1714 *rd_stats
= args
.rd_stats
;
1719 void av1_txfm_rd_in_plane_supertx(MACROBLOCK
*x
, const AV1_COMP
*cpi
, int *rate
,
1720 int64_t *distortion
, int *skippable
,
1721 int64_t *sse
, int64_t ref_best_rd
, int plane
,
1722 BLOCK_SIZE bsize
, TX_SIZE tx_size
,
1723 int use_fast_coef_casting
) {
1724 MACROBLOCKD
*const xd
= &x
->e_mbd
;
1725 const struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
1726 struct rdcost_block_args args
;
1730 args
.best_rd
= ref_best_rd
;
1731 args
.use_fast_coef_costing
= use_fast_coef_casting
;
1734 assert(tx_size
< TX_SIZES
);
1735 #endif // CONFIG_EXT_TX
1737 if (plane
== 0) xd
->mi
[0]->mbmi
.tx_size
= tx_size
;
1739 av1_get_entropy_contexts(bsize
, tx_size
, pd
, args
.t_above
, args
.t_left
);
1741 block_rd_txfm(plane
, 0, 0, 0, get_plane_block_size(bsize
, pd
), tx_size
,
1744 if (args
.exit_early
) {
1746 *distortion
= INT64_MAX
;
1750 *distortion
= args
.rd_stats
.dist
;
1751 *rate
= args
.rd_stats
.rate
;
1752 *sse
= args
.rd_stats
.sse
;
1753 *skippable
= !x
->plane
[plane
].eobs
[0];
1756 #endif // CONFIG_SUPERTX
1758 static int tx_size_cost(const AV1_COMP
*const cpi
, const MACROBLOCK
*const x
,
1759 BLOCK_SIZE bsize
, TX_SIZE tx_size
) {
1760 const AV1_COMMON
*const cm
= &cpi
->common
;
1761 const MACROBLOCKD
*const xd
= &x
->e_mbd
;
1762 const MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
1764 const int tx_select
=
1765 cm
->tx_mode
== TX_MODE_SELECT
&& mbmi
->sb_type
>= BLOCK_8X8
;
1768 const int is_inter
= is_inter_block(mbmi
);
1769 const int tx_size_cat
= is_inter
? inter_tx_size_cat_lookup
[bsize
]
1770 : intra_tx_size_cat_lookup
[bsize
];
1771 const TX_SIZE coded_tx_size
= txsize_sqr_up_map
[tx_size
];
1772 const int depth
= tx_size_to_depth(coded_tx_size
);
1773 const int tx_size_ctx
= get_tx_size_context(xd
);
1774 int r_tx_size
= cpi
->tx_size_cost
[tx_size_cat
][tx_size_ctx
][depth
];
1775 #if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
1776 if (is_quarter_tx_allowed(xd
, mbmi
, is_inter
) && tx_size
!= coded_tx_size
)
1777 r_tx_size
+= av1_cost_bit(cm
->fc
->quarter_tx_size_prob
,
1778 tx_size
== quarter_txsize_lookup
[bsize
]);
1779 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
1786 // #TODO(angiebird): use this function whenever it's possible
1787 int av1_tx_type_cost(const AV1_COMP
*cpi
, const MACROBLOCKD
*xd
,
1788 BLOCK_SIZE bsize
, int plane
, TX_SIZE tx_size
,
1790 if (plane
> 0) return 0;
1792 const MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
1793 const int is_inter
= is_inter_block(mbmi
);
1795 const AV1_COMMON
*cm
= &cpi
->common
;
1796 if (get_ext_tx_types(tx_size
, bsize
, is_inter
, cm
->reduced_tx_set_used
) > 1 &&
1797 !xd
->lossless
[xd
->mi
[0]->mbmi
.segment_id
]) {
1798 const int ext_tx_set
=
1799 get_ext_tx_set(tx_size
, bsize
, is_inter
, cm
->reduced_tx_set_used
);
1803 ->inter_tx_type_costs
[ext_tx_set
][txsize_sqr_map
[tx_size
]][tx_type
];
1805 if (ext_tx_set
> 0 && ALLOW_INTRA_EXT_TX
)
1806 return cpi
->intra_tx_type_costs
[ext_tx_set
][txsize_sqr_map
[tx_size
]]
1807 [mbmi
->mode
][tx_type
];
1812 if (tx_size
< TX_32X32
&& !xd
->lossless
[xd
->mi
[0]->mbmi
.segment_id
] &&
1815 return cpi
->inter_tx_type_costs
[tx_size
][tx_type
];
1817 return cpi
->intra_tx_type_costs
[tx_size
]
1818 [intra_mode_to_tx_type_context
[mbmi
->mode
]]
1822 #endif // CONFIG_EXT_TX
1825 static int64_t txfm_yrd(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
1826 RD_STATS
*rd_stats
, int64_t ref_best_rd
, BLOCK_SIZE bs
,
1827 TX_TYPE tx_type
, int tx_size
) {
1828 const AV1_COMMON
*const cm
= &cpi
->common
;
1829 MACROBLOCKD
*const xd
= &x
->e_mbd
;
1830 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
1831 int64_t rd
= INT64_MAX
;
1832 aom_prob skip_prob
= av1_get_skip_prob(cm
, xd
);
1834 const int is_inter
= is_inter_block(mbmi
);
1835 const int tx_select
=
1836 cm
->tx_mode
== TX_MODE_SELECT
&& mbmi
->sb_type
>= BLOCK_8X8
;
1838 const int r_tx_size
= tx_size_cost(cpi
, x
, bs
, tx_size
);
1840 assert(skip_prob
> 0);
1841 #if CONFIG_EXT_TX && CONFIG_RECT_TX
1842 assert(IMPLIES(is_rect_tx(tx_size
), is_rect_tx_allowed_bsize(bs
)));
1843 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
1845 s0
= av1_cost_bit(skip_prob
, 0);
1846 s1
= av1_cost_bit(skip_prob
, 1);
1848 mbmi
->tx_type
= tx_type
;
1849 mbmi
->tx_size
= tx_size
;
1850 txfm_rd_in_plane(x
, cpi
, rd_stats
, ref_best_rd
, 0, bs
, tx_size
,
1851 cpi
->sf
.use_fast_coef_costing
);
1852 if (rd_stats
->rate
== INT_MAX
) return INT64_MAX
;
1855 rd_stats
->rate
+= av1_tx_type_cost(cpi
, xd
, bs
, plane
, tx_size
, tx_type
);
1858 if (rd_stats
->skip
) {
1860 rd
= RDCOST(x
->rdmult
, x
->rddiv
, s1
, rd_stats
->sse
);
1862 rd
= RDCOST(x
->rdmult
, x
->rddiv
, s1
+ r_tx_size
* tx_select
,
1866 rd
= RDCOST(x
->rdmult
, x
->rddiv
,
1867 rd_stats
->rate
+ s0
+ r_tx_size
* tx_select
, rd_stats
->dist
);
1870 if (tx_select
) rd_stats
->rate
+= r_tx_size
;
1872 if (is_inter
&& !xd
->lossless
[xd
->mi
[0]->mbmi
.segment_id
] &&
1874 rd
= AOMMIN(rd
, RDCOST(x
->rdmult
, x
->rddiv
, s1
, rd_stats
->sse
));
1879 static int skip_txfm_search(const AV1_COMP
*cpi
, MACROBLOCK
*x
, BLOCK_SIZE bs
,
1880 TX_TYPE tx_type
, TX_SIZE tx_size
) {
1881 const MACROBLOCKD
*const xd
= &x
->e_mbd
;
1882 const MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
1883 const TX_SIZE max_tx_size
= max_txsize_lookup
[bs
];
1884 const int is_inter
= is_inter_block(mbmi
);
1886 if (is_inter
&& cpi
->sf
.tx_type_search
.prune_mode
> NO_PRUNE
)
1887 // passing -1 in for tx_type indicates that all 1D
1888 // transforms should be considered for pruning
1889 prune
= prune_tx_types(cpi
, bs
, x
, xd
, -1);
1891 if (mbmi
->ref_mv_idx
> 0 && tx_type
!= DCT_DCT
) return 1;
1892 if (FIXED_TX_TYPE
&& tx_type
!= get_default_tx_type(0, xd
, 0, tx_size
))
1894 if (!is_inter
&& x
->use_default_intra_tx_type
&&
1895 tx_type
!= get_default_tx_type(0, xd
, 0, tx_size
))
1897 if (is_inter
&& x
->use_default_inter_tx_type
&&
1898 tx_type
!= get_default_tx_type(0, xd
, 0, tx_size
))
1900 if (max_tx_size
>= TX_32X32
&& tx_size
== TX_4X4
) return 1;
1902 const AV1_COMMON
*const cm
= &cpi
->common
;
1904 get_ext_tx_set(tx_size
, bs
, is_inter
, cm
->reduced_tx_set_used
);
1906 if (!ext_tx_used_inter
[ext_tx_set
][tx_type
]) return 1;
1907 if (cpi
->sf
.tx_type_search
.prune_mode
> NO_PRUNE
) {
1908 if (!do_tx_type_search(tx_type
, prune
)) return 1;
1911 if (!ALLOW_INTRA_EXT_TX
&& bs
>= BLOCK_8X8
) {
1912 if (tx_type
!= intra_mode_to_tx_type_context
[mbmi
->mode
]) return 1;
1914 if (!ext_tx_used_intra
[ext_tx_set
][tx_type
]) return 1;
1916 #else // CONFIG_EXT_TX
1917 if (tx_size
>= TX_32X32
&& tx_type
!= DCT_DCT
) return 1;
1918 if (is_inter
&& cpi
->sf
.tx_type_search
.prune_mode
> NO_PRUNE
&&
1919 !do_tx_type_search(tx_type
, prune
))
1921 #endif // CONFIG_EXT_TX
1925 #if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
1926 static int64_t estimate_yrd_for_sb(const AV1_COMP
*const cpi
, BLOCK_SIZE bs
,
1927 MACROBLOCK
*x
, int *r
, int64_t *d
, int *s
,
1928 int64_t *sse
, int64_t ref_best_rd
) {
1930 int64_t rd
= txfm_yrd(cpi
, x
, &rd_stats
, ref_best_rd
, bs
, DCT_DCT
,
1931 max_txsize_lookup
[bs
]);
1935 *sse
= rd_stats
.sse
;
1938 #endif // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
1940 static void choose_largest_tx_size(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
1941 RD_STATS
*rd_stats
, int64_t ref_best_rd
,
1943 const AV1_COMMON
*const cm
= &cpi
->common
;
1944 MACROBLOCKD
*const xd
= &x
->e_mbd
;
1945 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
1946 TX_TYPE tx_type
, best_tx_type
= DCT_DCT
;
1947 int64_t this_rd
, best_rd
= INT64_MAX
;
1948 aom_prob skip_prob
= av1_get_skip_prob(cm
, xd
);
1949 int s0
= av1_cost_bit(skip_prob
, 0);
1950 int s1
= av1_cost_bit(skip_prob
, 1);
1951 const int is_inter
= is_inter_block(mbmi
);
1953 const int plane
= 0;
1956 #endif // CONFIG_EXT_TX
1957 av1_invalid_rd_stats(rd_stats
);
1959 mbmi
->tx_size
= tx_size_from_tx_mode(bs
, cm
->tx_mode
, is_inter
);
1961 mbmi
->min_tx_size
= get_min_tx_size(mbmi
->tx_size
);
1962 #endif // CONFIG_VAR_TX
1965 get_ext_tx_set(mbmi
->tx_size
, bs
, is_inter
, cm
->reduced_tx_set_used
);
1966 #endif // CONFIG_EXT_TX
1968 if (is_inter
&& cpi
->sf
.tx_type_search
.prune_mode
> NO_PRUNE
)
1970 prune
= prune_tx_types(cpi
, bs
, x
, xd
, ext_tx_set
);
1972 prune
= prune_tx_types(cpi
, bs
, x
, xd
, 0);
1973 #endif // CONFIG_EXT_TX
1975 if (get_ext_tx_types(mbmi
->tx_size
, bs
, is_inter
, cm
->reduced_tx_set_used
) >
1977 !xd
->lossless
[mbmi
->segment_id
]) {
1979 od_rollback_buffer pre_buf
, post_buf
;
1981 od_encode_checkpoint(&x
->daala_enc
, &pre_buf
);
1982 od_encode_checkpoint(&x
->daala_enc
, &post_buf
);
1983 #endif // CONFIG_PVQ
1985 for (tx_type
= DCT_DCT
; tx_type
< TX_TYPES
; ++tx_type
) {
1986 RD_STATS this_rd_stats
;
1988 if (x
->use_default_inter_tx_type
&&
1989 tx_type
!= get_default_tx_type(0, xd
, 0, mbmi
->tx_size
))
1991 if (!ext_tx_used_inter
[ext_tx_set
][tx_type
]) continue;
1992 if (cpi
->sf
.tx_type_search
.prune_mode
> NO_PRUNE
) {
1993 if (!do_tx_type_search(tx_type
, prune
)) continue;
1996 if (x
->use_default_intra_tx_type
&&
1997 tx_type
!= get_default_tx_type(0, xd
, 0, mbmi
->tx_size
))
1999 if (!ALLOW_INTRA_EXT_TX
&& bs
>= BLOCK_8X8
) {
2000 if (tx_type
!= intra_mode_to_tx_type_context
[mbmi
->mode
]) continue;
2002 if (!ext_tx_used_intra
[ext_tx_set
][tx_type
]) continue;
2005 mbmi
->tx_type
= tx_type
;
2007 txfm_rd_in_plane(x
, cpi
, &this_rd_stats
, ref_best_rd
, 0, bs
,
2008 mbmi
->tx_size
, cpi
->sf
.use_fast_coef_costing
);
2010 od_encode_rollback(&x
->daala_enc
, &pre_buf
);
2011 #endif // CONFIG_PVQ
2012 if (this_rd_stats
.rate
== INT_MAX
) continue;
2013 av1_tx_type_cost(cpi
, xd
, bs
, plane
, mbmi
->tx_size
, tx_type
);
2015 if (this_rd_stats
.skip
)
2016 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, s1
, this_rd_stats
.sse
);
2018 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, this_rd_stats
.rate
+ s0
,
2019 this_rd_stats
.dist
);
2020 if (is_inter_block(mbmi
) && !xd
->lossless
[mbmi
->segment_id
] &&
2021 !this_rd_stats
.skip
)
2023 AOMMIN(this_rd
, RDCOST(x
->rdmult
, x
->rddiv
, s1
, this_rd_stats
.sse
));
2025 if (this_rd
< best_rd
) {
2027 best_tx_type
= mbmi
->tx_type
;
2028 *rd_stats
= this_rd_stats
;
2030 od_encode_checkpoint(&x
->daala_enc
, &post_buf
);
2031 #endif // CONFIG_PVQ
2035 od_encode_rollback(&x
->daala_enc
, &post_buf
);
2036 #endif // CONFIG_PVQ
2038 mbmi
->tx_type
= DCT_DCT
;
2039 txfm_rd_in_plane(x
, cpi
, rd_stats
, ref_best_rd
, 0, bs
, mbmi
->tx_size
,
2040 cpi
->sf
.use_fast_coef_costing
);
2042 #else // CONFIG_EXT_TX
2043 if (mbmi
->tx_size
< TX_32X32
&& !xd
->lossless
[mbmi
->segment_id
]) {
2044 for (tx_type
= 0; tx_type
< TX_TYPES
; ++tx_type
) {
2045 RD_STATS this_rd_stats
;
2046 if (!is_inter
&& x
->use_default_intra_tx_type
&&
2047 tx_type
!= get_default_tx_type(0, xd
, 0, mbmi
->tx_size
))
2049 if (is_inter
&& x
->use_default_inter_tx_type
&&
2050 tx_type
!= get_default_tx_type(0, xd
, 0, mbmi
->tx_size
))
2052 mbmi
->tx_type
= tx_type
;
2053 txfm_rd_in_plane(x
, cpi
, &this_rd_stats
, ref_best_rd
, 0, bs
,
2054 mbmi
->tx_size
, cpi
->sf
.use_fast_coef_costing
);
2055 if (this_rd_stats
.rate
== INT_MAX
) continue;
2057 av1_tx_type_cost(cpi
, xd
, bs
, plane
, mbmi
->tx_size
, tx_type
);
2059 if (cpi
->sf
.tx_type_search
.prune_mode
> NO_PRUNE
&&
2060 !do_tx_type_search(tx_type
, prune
))
2063 if (this_rd_stats
.skip
)
2064 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, s1
, this_rd_stats
.sse
);
2066 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, this_rd_stats
.rate
+ s0
,
2067 this_rd_stats
.dist
);
2068 if (is_inter
&& !xd
->lossless
[mbmi
->segment_id
] && !this_rd_stats
.skip
)
2070 AOMMIN(this_rd
, RDCOST(x
->rdmult
, x
->rddiv
, s1
, this_rd_stats
.sse
));
2072 if (this_rd
< best_rd
) {
2074 best_tx_type
= mbmi
->tx_type
;
2075 *rd_stats
= this_rd_stats
;
2079 mbmi
->tx_type
= DCT_DCT
;
2080 txfm_rd_in_plane(x
, cpi
, rd_stats
, ref_best_rd
, 0, bs
, mbmi
->tx_size
,
2081 cpi
->sf
.use_fast_coef_costing
);
2083 #endif // CONFIG_EXT_TX
2084 mbmi
->tx_type
= best_tx_type
;
2087 static void choose_smallest_tx_size(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
2088 RD_STATS
*rd_stats
, int64_t ref_best_rd
,
2090 MACROBLOCKD
*const xd
= &x
->e_mbd
;
2091 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
2093 mbmi
->tx_size
= TX_4X4
;
2094 mbmi
->tx_type
= DCT_DCT
;
2096 mbmi
->min_tx_size
= get_min_tx_size(TX_4X4
);
2097 #endif // CONFIG_VAR_TX
2099 txfm_rd_in_plane(x
, cpi
, rd_stats
, ref_best_rd
, 0, bs
, mbmi
->tx_size
,
2100 cpi
->sf
.use_fast_coef_costing
);
2103 #if CONFIG_TXK_SEL || CONFIG_VAR_TX
2104 static INLINE
int bsize_to_num_blk(BLOCK_SIZE bsize
) {
2105 int num_blk
= 1 << (num_pels_log2_lookup
[bsize
] - 2 * tx_size_wide_log2
[0]);
2108 #endif // CONFIG_TXK_SEL || CONFIG_VAR_TX
2110 static void choose_tx_size_type_from_rd(const AV1_COMP
*const cpi
,
2111 MACROBLOCK
*x
, RD_STATS
*rd_stats
,
2112 int64_t ref_best_rd
, BLOCK_SIZE bs
) {
2113 const AV1_COMMON
*const cm
= &cpi
->common
;
2114 MACROBLOCKD
*const xd
= &x
->e_mbd
;
2115 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
2116 int64_t rd
= INT64_MAX
;
2118 int start_tx
, end_tx
;
2119 int64_t best_rd
= INT64_MAX
, last_rd
= INT64_MAX
;
2120 const TX_SIZE max_tx_size
= max_txsize_lookup
[bs
];
2121 TX_SIZE best_tx_size
= max_tx_size
;
2122 TX_TYPE best_tx_type
= DCT_DCT
;
2124 TX_TYPE best_txk_type
[MAX_SB_SQUARE
/ (TX_SIZE_W_MIN
* TX_SIZE_H_MIN
)];
2125 const int num_blk
= bsize_to_num_blk(bs
);
2126 #endif // CONFIG_TXK_SEL
2127 const int tx_select
= cm
->tx_mode
== TX_MODE_SELECT
;
2128 const int is_inter
= is_inter_block(mbmi
);
2130 od_rollback_buffer buf
;
2131 od_encode_checkpoint(&x
->daala_enc
, &buf
);
2132 #endif // CONFIG_PVQ
2134 av1_invalid_rd_stats(rd_stats
);
2136 #if CONFIG_EXT_TX && CONFIG_RECT_TX
2137 int evaluate_rect_tx
= 0;
2139 evaluate_rect_tx
= is_rect_tx_allowed(xd
, mbmi
);
2141 const TX_SIZE chosen_tx_size
=
2142 tx_size_from_tx_mode(bs
, cm
->tx_mode
, is_inter
);
2143 evaluate_rect_tx
= is_rect_tx(chosen_tx_size
);
2144 assert(IMPLIES(evaluate_rect_tx
, is_rect_tx_allowed(xd
, mbmi
)));
2146 if (evaluate_rect_tx
) {
2147 TX_TYPE tx_start
= DCT_DCT
;
2148 TX_TYPE tx_end
= TX_TYPES
;
2150 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2151 // performed in av1_search_txk_type()
2152 tx_end
= DCT_DCT
+ 1;
2155 for (tx_type
= tx_start
; tx_type
< tx_end
; ++tx_type
) {
2156 if (mbmi
->ref_mv_idx
> 0 && tx_type
!= DCT_DCT
) continue;
2157 const TX_SIZE rect_tx_size
= max_txsize_rect_lookup
[bs
];
2158 RD_STATS this_rd_stats
;
2160 get_ext_tx_set(rect_tx_size
, bs
, is_inter
, cm
->reduced_tx_set_used
);
2161 if ((is_inter
&& ext_tx_used_inter
[ext_tx_set
][tx_type
]) ||
2162 (!is_inter
&& ext_tx_used_intra
[ext_tx_set
][tx_type
])) {
2163 rd
= txfm_yrd(cpi
, x
, &this_rd_stats
, ref_best_rd
, bs
, tx_type
,
2167 memcpy(best_txk_type
, mbmi
->txk_type
,
2168 sizeof(best_txk_type
[0]) * num_blk
);
2170 best_tx_type
= tx_type
;
2171 best_tx_size
= rect_tx_size
;
2173 *rd_stats
= this_rd_stats
;
2176 #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2177 const int is_inter
= is_inter_block(mbmi
);
2178 if (mbmi
->sb_type
< BLOCK_8X8
&& is_inter
) break;
2179 #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2183 #if CONFIG_RECT_TX_EXT
2185 int evaluate_quarter_tx
= 0;
2186 if (is_quarter_tx_allowed(xd
, mbmi
, is_inter
)) {
2188 evaluate_quarter_tx
= 1;
2190 const TX_SIZE chosen_tx_size
=
2191 tx_size_from_tx_mode(bs
, cm
->tx_mode
, is_inter
);
2192 evaluate_quarter_tx
= chosen_tx_size
== quarter_txsize_lookup
[bs
];
2195 if (evaluate_quarter_tx
) {
2196 TX_TYPE tx_start
= DCT_DCT
;
2197 TX_TYPE tx_end
= TX_TYPES
;
2199 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2200 // performed in av1_search_txk_type()
2201 tx_end
= DCT_DCT
+ 1;
2204 for (tx_type
= tx_start
; tx_type
< tx_end
; ++tx_type
) {
2205 if (mbmi
->ref_mv_idx
> 0 && tx_type
!= DCT_DCT
) continue;
2206 const TX_SIZE tx_size
= quarter_txsize_lookup
[bs
];
2207 RD_STATS this_rd_stats
;
2209 get_ext_tx_set(tx_size
, bs
, is_inter
, cm
->reduced_tx_set_used
);
2210 if ((is_inter
&& ext_tx_used_inter
[ext_tx_set
][tx_type
]) ||
2211 (!is_inter
&& ext_tx_used_intra
[ext_tx_set
][tx_type
])) {
2213 txfm_yrd(cpi
, x
, &this_rd_stats
, ref_best_rd
, bs
, tx_type
, tx_size
);
2216 memcpy(best_txk_type
, mbmi
->txk_type
,
2217 sizeof(best_txk_type
[0]) * num_blk
);
2219 best_tx_type
= tx_type
;
2220 best_tx_size
= tx_size
;
2222 *rd_stats
= this_rd_stats
;
2225 #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2226 const int is_inter
= is_inter_block(mbmi
);
2227 if (mbmi
->sb_type
< BLOCK_8X8
&& is_inter
) break;
2228 #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2231 #endif // CONFIG_RECT_TX_EXT
2232 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2235 start_tx
= max_tx_size
;
2236 end_tx
= (max_tx_size
>= TX_32X32
) ? TX_8X8
: TX_4X4
;
2238 const TX_SIZE chosen_tx_size
=
2239 tx_size_from_tx_mode(bs
, cm
->tx_mode
, is_inter
);
2240 start_tx
= chosen_tx_size
;
2241 end_tx
= chosen_tx_size
;
2244 last_rd
= INT64_MAX
;
2245 for (n
= start_tx
; n
>= end_tx
; --n
) {
2246 #if CONFIG_EXT_TX && CONFIG_RECT_TX
2247 if (is_rect_tx(n
)) break;
2248 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2249 TX_TYPE tx_start
= DCT_DCT
;
2250 TX_TYPE tx_end
= TX_TYPES
;
2252 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2253 // performed in av1_search_txk_type()
2254 tx_end
= DCT_DCT
+ 1;
2257 for (tx_type
= tx_start
; tx_type
< tx_end
; ++tx_type
) {
2258 RD_STATS this_rd_stats
;
2259 if (skip_txfm_search(cpi
, x
, bs
, tx_type
, n
)) continue;
2260 rd
= txfm_yrd(cpi
, x
, &this_rd_stats
, ref_best_rd
, bs
, tx_type
, n
);
2262 od_encode_rollback(&x
->daala_enc
, &buf
);
2263 #endif // CONFIG_PVQ
2264 // Early termination in transform size search.
2265 if (cpi
->sf
.tx_size_search_breakout
&&
2267 (this_rd_stats
.skip
== 1 && tx_type
!= DCT_DCT
&& n
< start_tx
) ||
2268 (n
< (int)max_tx_size
&& rd
> last_rd
)))
2274 memcpy(best_txk_type
, mbmi
->txk_type
,
2275 sizeof(best_txk_type
[0]) * num_blk
);
2277 best_tx_type
= tx_type
;
2280 *rd_stats
= this_rd_stats
;
2282 #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2283 const int is_inter
= is_inter_block(mbmi
);
2284 if (mbmi
->sb_type
< BLOCK_8X8
&& is_inter
) break;
2285 #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2288 mbmi
->tx_size
= best_tx_size
;
2289 mbmi
->tx_type
= best_tx_type
;
2291 memcpy(mbmi
->txk_type
, best_txk_type
, sizeof(best_txk_type
[0]) * num_blk
);
2295 mbmi
->min_tx_size
= get_min_tx_size(mbmi
->tx_size
);
2296 #endif // CONFIG_VAR_TX
2299 if (mbmi
->tx_size
>= TX_32X32
) assert(mbmi
->tx_type
== DCT_DCT
);
2300 #endif // !CONFIG_EXT_TX
2302 if (best_rd
!= INT64_MAX
) {
2303 txfm_yrd(cpi
, x
, rd_stats
, ref_best_rd
, bs
, best_tx_type
, best_tx_size
);
2305 #endif // CONFIG_PVQ
2308 static void super_block_yrd(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
2309 RD_STATS
*rd_stats
, BLOCK_SIZE bs
,
2310 int64_t ref_best_rd
) {
2311 MACROBLOCKD
*xd
= &x
->e_mbd
;
2312 av1_init_rd_stats(rd_stats
);
2314 assert(bs
== xd
->mi
[0]->mbmi
.sb_type
);
2316 if (xd
->lossless
[xd
->mi
[0]->mbmi
.segment_id
]) {
2317 choose_smallest_tx_size(cpi
, x
, rd_stats
, ref_best_rd
, bs
);
2318 } else if (cpi
->sf
.tx_size_search_method
== USE_LARGESTALL
) {
2319 choose_largest_tx_size(cpi
, x
, rd_stats
, ref_best_rd
, bs
);
2321 choose_tx_size_type_from_rd(cpi
, x
, rd_stats
, ref_best_rd
, bs
);
2325 static int conditional_skipintra(PREDICTION_MODE mode
,
2326 PREDICTION_MODE best_intra_mode
) {
2327 if (mode
== D117_PRED
&& best_intra_mode
!= V_PRED
&&
2328 best_intra_mode
!= D135_PRED
)
2330 if (mode
== D63_PRED
&& best_intra_mode
!= V_PRED
&&
2331 best_intra_mode
!= D45_PRED
)
2333 if (mode
== D207_PRED
&& best_intra_mode
!= H_PRED
&&
2334 best_intra_mode
!= D45_PRED
)
2336 if (mode
== D153_PRED
&& best_intra_mode
!= H_PRED
&&
2337 best_intra_mode
!= D135_PRED
)
2342 // Model based RD estimation for luma intra blocks.
2343 static int64_t intra_model_yrd(const AV1_COMP
*const cpi
, MACROBLOCK
*const x
,
2344 BLOCK_SIZE bsize
, int mode_cost
) {
2345 MACROBLOCKD
*const xd
= &x
->e_mbd
;
2346 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
2347 assert(!is_inter_block(mbmi
));
2348 RD_STATS this_rd_stats
;
2350 int64_t temp_sse
, this_rd
;
2351 const TX_SIZE tx_size
= tx_size_from_tx_mode(bsize
, cpi
->common
.tx_mode
, 0);
2352 const int stepr
= tx_size_high_unit
[tx_size
];
2353 const int stepc
= tx_size_wide_unit
[tx_size
];
2354 const int max_blocks_wide
= max_block_wide(xd
, bsize
, 0);
2355 const int max_blocks_high
= max_block_high(xd
, bsize
, 0);
2356 mbmi
->tx_size
= tx_size
;
2358 const int step
= stepr
* stepc
;
2360 for (row
= 0; row
< max_blocks_high
; row
+= stepr
) {
2361 for (col
= 0; col
< max_blocks_wide
; col
+= stepc
) {
2363 const struct macroblockd_plane
*const pd
= &xd
->plane
[0];
2364 const BLOCK_SIZE plane_bsize
= get_plane_block_size(bsize
, pd
);
2367 FRAME_CONTEXT
*const ec_ctx
= xd
->tile_ctx
;
2369 FRAME_CONTEXT
*const ec_ctx
= cpi
->common
.fc
;
2370 #endif // CONFIG_EC_ADAPT
2372 av1_predict_intra_block_encoder_facade(x
, ec_ctx
, 0, block
, col
, row
,
2373 tx_size
, plane_bsize
);
2375 av1_predict_intra_block_facade(xd
, 0, block
, col
, row
, tx_size
);
2381 model_rd_for_sb(cpi
, bsize
, x
, xd
, 0, 0, &this_rd_stats
.rate
,
2382 &this_rd_stats
.dist
, &this_rd_stats
.skip
, &temp_sse
);
2383 #if CONFIG_EXT_INTRA
2384 if (av1_is_directional_mode(mbmi
->mode
, bsize
)) {
2385 mode_cost
+= write_uniform_cost(2 * MAX_ANGLE_DELTA
+ 1,
2386 MAX_ANGLE_DELTA
+ mbmi
->angle_delta
[0]);
2388 #endif // CONFIG_EXT_INTRA
2389 #if CONFIG_FILTER_INTRA
2390 if (mbmi
->mode
== DC_PRED
) {
2391 const aom_prob prob
= cpi
->common
.fc
->filter_intra_probs
[0];
2392 if (mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0]) {
2393 const int mode
= mbmi
->filter_intra_mode_info
.filter_intra_mode
[0];
2394 mode_cost
+= (av1_cost_bit(prob
, 1) +
2395 write_uniform_cost(FILTER_INTRA_MODES
, mode
));
2397 mode_cost
+= av1_cost_bit(prob
, 0);
2400 #endif // CONFIG_FILTER_INTRA
2401 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, this_rd_stats
.rate
+ mode_cost
,
2402 this_rd_stats
.dist
);
2407 // Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
2408 // new_height'. Extra rows and columns are filled in by copying last valid
2410 static void extend_palette_color_map(uint8_t *const color_map
, int orig_width
,
2411 int orig_height
, int new_width
,
2414 assert(new_width
>= orig_width
);
2415 assert(new_height
>= orig_height
);
2416 if (new_width
== orig_width
&& new_height
== orig_height
) return;
2418 for (j
= orig_height
- 1; j
>= 0; --j
) {
2419 memmove(color_map
+ j
* new_width
, color_map
+ j
* orig_width
, orig_width
);
2420 // Copy last column to extra columns.
2421 memset(color_map
+ j
* new_width
+ orig_width
,
2422 color_map
[j
* new_width
+ orig_width
- 1], new_width
- orig_width
);
2424 // Copy last row to extra rows.
2425 for (j
= orig_height
; j
< new_height
; ++j
) {
2426 memcpy(color_map
+ j
* new_width
, color_map
+ (orig_height
- 1) * new_width
,
2431 #if CONFIG_PALETTE_DELTA_ENCODING
2432 // Bias toward using colors in the cache.
2433 // TODO(huisu): Try other schemes to improve compression.
2434 static void optimize_palette_colors(uint16_t *color_cache
, int n_cache
,
2435 int n_colors
, int stride
,
2437 if (n_cache
<= 0) return;
2438 for (int i
= 0; i
< n_colors
* stride
; i
+= stride
) {
2439 float min_diff
= fabsf(centroids
[i
] - color_cache
[0]);
2441 for (int j
= 1; j
< n_cache
; ++j
) {
2442 float this_diff
= fabsf(centroids
[i
] - color_cache
[j
]);
2443 if (this_diff
< min_diff
) {
2444 min_diff
= this_diff
;
2448 if (min_diff
< 1.5) centroids
[i
] = color_cache
[idx
];
2451 #endif // CONFIG_PALETTE_DELTA_ENCODING
2453 static int rd_pick_palette_intra_sby(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
2454 BLOCK_SIZE bsize
, int palette_ctx
,
2455 int dc_mode_cost
, MB_MODE_INFO
*best_mbmi
,
2456 uint8_t *best_palette_color_map
,
2457 int64_t *best_rd
, int64_t *best_model_rd
,
2458 int *rate
, int *rate_tokenonly
,
2459 int64_t *distortion
, int *skippable
) {
2460 int rate_overhead
= 0;
2461 MACROBLOCKD
*const xd
= &x
->e_mbd
;
2462 MODE_INFO
*const mic
= xd
->mi
[0];
2463 MB_MODE_INFO
*const mbmi
= &mic
->mbmi
;
2464 assert(!is_inter_block(mbmi
));
2465 int this_rate
, colors
, n
;
2466 const int src_stride
= x
->plane
[0].src
.stride
;
2467 const uint8_t *const src
= x
->plane
[0].src
.buf
;
2468 uint8_t *const color_map
= xd
->plane
[0].color_index_map
;
2469 int block_width
, block_height
, rows
, cols
;
2470 av1_get_block_dimensions(bsize
, 0, xd
, &block_width
, &block_height
, &rows
,
2473 assert(cpi
->common
.allow_screen_content_tools
);
2475 #if CONFIG_HIGHBITDEPTH
2476 if (cpi
->common
.use_highbitdepth
)
2477 colors
= av1_count_colors_highbd(src
, src_stride
, rows
, cols
,
2478 cpi
->common
.bit_depth
);
2480 #endif // CONFIG_HIGHBITDEPTH
2481 colors
= av1_count_colors(src
, src_stride
, rows
, cols
);
2482 #if CONFIG_FILTER_INTRA
2483 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0] = 0;
2484 #endif // CONFIG_FILTER_INTRA
2486 if (colors
> 1 && colors
<= 64) {
2487 int r
, c
, i
, j
, k
, palette_mode_cost
;
2488 const int max_itr
= 50;
2489 uint8_t color_order
[PALETTE_MAX_SIZE
];
2490 float *const data
= x
->palette_buffer
->kmeans_data_buf
;
2491 float centroids
[PALETTE_MAX_SIZE
];
2493 RD_STATS tokenonly_rd_stats
;
2494 int64_t this_rd
, this_model_rd
;
2495 PALETTE_MODE_INFO
*const pmi
= &mbmi
->palette_mode_info
;
2496 #if CONFIG_HIGHBITDEPTH
2497 uint16_t *src16
= CONVERT_TO_SHORTPTR(src
);
2498 if (cpi
->common
.use_highbitdepth
)
2501 #endif // CONFIG_HIGHBITDEPTH
2504 #if CONFIG_HIGHBITDEPTH
2505 if (cpi
->common
.use_highbitdepth
) {
2506 for (r
= 0; r
< rows
; ++r
) {
2507 for (c
= 0; c
< cols
; ++c
) {
2508 val
= src16
[r
* src_stride
+ c
];
2509 data
[r
* cols
+ c
] = val
;
2517 #endif // CONFIG_HIGHBITDEPTH
2518 for (r
= 0; r
< rows
; ++r
) {
2519 for (c
= 0; c
< cols
; ++c
) {
2520 val
= src
[r
* src_stride
+ c
];
2521 data
[r
* cols
+ c
] = val
;
2528 #if CONFIG_HIGHBITDEPTH
2530 #endif // CONFIG_HIGHBITDEPTH
2532 mbmi
->mode
= DC_PRED
;
2533 #if CONFIG_FILTER_INTRA
2534 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0] = 0;
2535 #endif // CONFIG_FILTER_INTRA
2537 if (rows
* cols
> PALETTE_MAX_BLOCK_SIZE
) return 0;
2539 #if CONFIG_PALETTE_DELTA_ENCODING
2540 const MODE_INFO
*above_mi
= xd
->above_mi
;
2541 const MODE_INFO
*left_mi
= xd
->left_mi
;
2542 uint16_t color_cache
[2 * PALETTE_MAX_SIZE
];
2544 av1_get_palette_cache(above_mi
, left_mi
, 0, color_cache
);
2545 #endif // CONFIG_PALETTE_DELTA_ENCODING
2547 for (n
= colors
> PALETTE_MAX_SIZE
? PALETTE_MAX_SIZE
: colors
; n
>= 2;
2549 if (colors
== PALETTE_MIN_SIZE
) {
2550 // Special case: These colors automatically become the centroids.
2551 assert(colors
== n
);
2552 assert(colors
== 2);
2557 for (i
= 0; i
< n
; ++i
) {
2558 centroids
[i
] = lb
+ (2 * i
+ 1) * (ub
- lb
) / n
/ 2;
2560 av1_k_means(data
, centroids
, color_map
, rows
* cols
, n
, 1, max_itr
);
2561 #if CONFIG_PALETTE_DELTA_ENCODING
2562 optimize_palette_colors(color_cache
, n_cache
, n
, 1, centroids
);
2563 #endif // CONFIG_PALETTE_DELTA_ENCODING
2564 k
= av1_remove_duplicates(centroids
, n
);
2565 if (k
< PALETTE_MIN_SIZE
) {
2566 // Too few unique colors to create a palette. And DC_PRED will work
2567 // well for that case anyway. So skip.
2572 #if CONFIG_HIGHBITDEPTH
2573 if (cpi
->common
.use_highbitdepth
)
2574 for (i
= 0; i
< k
; ++i
)
2575 pmi
->palette_colors
[i
] =
2576 clip_pixel_highbd((int)centroids
[i
], cpi
->common
.bit_depth
);
2578 #endif // CONFIG_HIGHBITDEPTH
2579 for (i
= 0; i
< k
; ++i
)
2580 pmi
->palette_colors
[i
] = clip_pixel((int)centroids
[i
]);
2581 pmi
->palette_size
[0] = k
;
2583 av1_calc_indices(data
, centroids
, color_map
, rows
* cols
, k
, 1);
2584 extend_palette_color_map(color_map
, cols
, rows
, block_width
,
2588 cpi
->palette_y_size_cost
[bsize
- BLOCK_8X8
][k
- PALETTE_MIN_SIZE
] +
2589 write_uniform_cost(k
, color_map
[0]) +
2591 av1_default_palette_y_mode_prob
[bsize
- BLOCK_8X8
][palette_ctx
],
2593 palette_mode_cost
+= av1_palette_color_cost_y(pmi
,
2594 #if CONFIG_PALETTE_DELTA_ENCODING
2595 color_cache
, n_cache
,
2596 #endif // CONFIG_PALETTE_DELTA_ENCODING
2597 cpi
->common
.bit_depth
);
2598 for (i
= 0; i
< rows
; ++i
) {
2599 for (j
= (i
== 0 ? 1 : 0); j
< cols
; ++j
) {
2601 const int color_ctx
= av1_get_palette_color_index_context(
2602 color_map
, block_width
, i
, j
, k
, color_order
, &color_idx
);
2603 assert(color_idx
>= 0 && color_idx
< k
);
2604 palette_mode_cost
+= cpi
->palette_y_color_cost
[k
- PALETTE_MIN_SIZE
]
2605 [color_ctx
][color_idx
];
2608 this_model_rd
= intra_model_yrd(cpi
, x
, bsize
, palette_mode_cost
);
2609 if (*best_model_rd
!= INT64_MAX
&&
2610 this_model_rd
> *best_model_rd
+ (*best_model_rd
>> 1))
2612 if (this_model_rd
< *best_model_rd
) *best_model_rd
= this_model_rd
;
2613 super_block_yrd(cpi
, x
, &tokenonly_rd_stats
, bsize
, *best_rd
);
2614 if (tokenonly_rd_stats
.rate
== INT_MAX
) continue;
2615 this_rate
= tokenonly_rd_stats
.rate
+ palette_mode_cost
;
2616 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, this_rate
, tokenonly_rd_stats
.dist
);
2617 if (!xd
->lossless
[mbmi
->segment_id
] && mbmi
->sb_type
>= BLOCK_8X8
) {
2618 tokenonly_rd_stats
.rate
-= tx_size_cost(cpi
, x
, bsize
, mbmi
->tx_size
);
2620 if (this_rd
< *best_rd
) {
2622 memcpy(best_palette_color_map
, color_map
,
2623 block_width
* block_height
* sizeof(color_map
[0]));
2625 rate_overhead
= this_rate
- tokenonly_rd_stats
.rate
;
2626 if (rate
) *rate
= this_rate
;
2627 if (rate_tokenonly
) *rate_tokenonly
= tokenonly_rd_stats
.rate
;
2628 if (distortion
) *distortion
= tokenonly_rd_stats
.dist
;
2629 if (skippable
) *skippable
= tokenonly_rd_stats
.skip
;
2634 if (best_mbmi
->palette_mode_info
.palette_size
[0] > 0) {
2635 memcpy(color_map
, best_palette_color_map
,
2636 rows
* cols
* sizeof(best_palette_color_map
[0]));
2639 return rate_overhead
;
2641 #endif // CONFIG_PALETTE
2643 static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
2644 const AV1_COMP
*const cpi
, MACROBLOCK
*x
, int row
, int col
,
2645 PREDICTION_MODE
*best_mode
, const int *bmode_costs
, ENTROPY_CONTEXT
*a
,
2646 ENTROPY_CONTEXT
*l
, int *bestrate
, int *bestratey
, int64_t *bestdistortion
,
2647 BLOCK_SIZE bsize
, TX_SIZE tx_size
, int *y_skip
, int64_t rd_thresh
) {
2648 const AV1_COMMON
*const cm
= &cpi
->common
;
2649 PREDICTION_MODE mode
;
2650 MACROBLOCKD
*const xd
= &x
->e_mbd
;
2651 assert(!is_inter_block(&xd
->mi
[0]->mbmi
));
2652 int64_t best_rd
= rd_thresh
;
2653 struct macroblock_plane
*p
= &x
->plane
[0];
2654 struct macroblockd_plane
*pd
= &xd
->plane
[0];
2655 const int src_stride
= p
->src
.stride
;
2656 const int dst_stride
= pd
->dst
.stride
;
2657 const uint8_t *src_init
= &p
->src
.buf
[row
* 4 * src_stride
+ col
* 4];
2658 uint8_t *dst_init
= &pd
->dst
.buf
[row
* 4 * dst_stride
+ col
* 4];
2659 #if CONFIG_CHROMA_2X2
2660 // TODO(jingning): This is a temporal change. The whole function should be
2661 // out when cb4x4 is enabled.
2662 ENTROPY_CONTEXT ta
[4], tempa
[4];
2663 ENTROPY_CONTEXT tl
[4], templ
[4];
2665 ENTROPY_CONTEXT ta
[2], tempa
[2];
2666 ENTROPY_CONTEXT tl
[2], templ
[2];
2667 #endif // CONFIG_CHROMA_2X2
2669 const int pred_width_in_4x4_blocks
= num_4x4_blocks_wide_lookup
[bsize
];
2670 const int pred_height_in_4x4_blocks
= num_4x4_blocks_high_lookup
[bsize
];
2671 const int tx_width_unit
= tx_size_wide_unit
[tx_size
];
2672 const int tx_height_unit
= tx_size_high_unit
[tx_size
];
2673 const int pred_block_width
= block_size_wide
[bsize
];
2674 const int pred_block_height
= block_size_high
[bsize
];
2675 const int tx_width
= tx_size_wide
[tx_size
];
2676 const int tx_height
= tx_size_high
[tx_size
];
2677 const int pred_width_in_transform_blocks
= pred_block_width
/ tx_width
;
2678 const int pred_height_in_transform_blocks
= pred_block_height
/ tx_height
;
2680 int best_can_skip
= 0;
2681 uint8_t best_dst
[8 * 8];
2682 #if CONFIG_HIGHBITDEPTH
2683 uint16_t best_dst16
[8 * 8];
2684 #endif // CONFIG_HIGHBITDEPTH
2685 const int is_lossless
= xd
->lossless
[xd
->mi
[0]->mbmi
.segment_id
];
2686 #if CONFIG_EXT_TX && CONFIG_RECT_TX
2687 const int sub_bsize
= bsize
;
2689 const int sub_bsize
= BLOCK_4X4
;
2690 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2693 od_rollback_buffer pre_buf
, post_buf
;
2694 od_encode_checkpoint(&x
->daala_enc
, &pre_buf
);
2695 od_encode_checkpoint(&x
->daala_enc
, &post_buf
);
2696 #endif // CONFIG_PVQ
2698 assert(bsize
< BLOCK_8X8
);
2699 assert(tx_width
< 8 || tx_height
< 8);
2700 #if CONFIG_EXT_TX && CONFIG_RECT_TX
2702 assert(tx_width
== 4 && tx_height
== 4);
2704 assert(tx_width
== pred_block_width
&& tx_height
== pred_block_height
);
2706 assert(tx_width
== 4 && tx_height
== 4);
2707 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2709 memcpy(ta
, a
, pred_width_in_transform_blocks
* sizeof(a
[0]));
2710 memcpy(tl
, l
, pred_height_in_transform_blocks
* sizeof(l
[0]));
2712 xd
->mi
[0]->mbmi
.tx_size
= tx_size
;
2715 xd
->mi
[0]->mbmi
.palette_mode_info
.palette_size
[0] = 0;
2716 #endif // CONFIG_PALETTE
2718 #if CONFIG_HIGHBITDEPTH
2719 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
2721 od_encode_checkpoint(&x
->daala_enc
, &pre_buf
);
2723 for (mode
= DC_PRED
; mode
<= TM_PRED
; ++mode
) {
2726 int64_t distortion
= 0;
2727 int rate
= bmode_costs
[mode
];
2730 if (!(cpi
->sf
.intra_y_mode_mask
[txsize_sqr_up_map
[tx_size
]] &
2734 // Only do the oblique modes if the best so far is
2735 // one of the neighboring directional modes
2736 if (cpi
->sf
.mode_search_skip_flags
& FLAG_SKIP_INTRA_DIRMISMATCH
) {
2737 if (conditional_skipintra(mode
, *best_mode
)) continue;
2740 memcpy(tempa
, ta
, pred_width_in_transform_blocks
* sizeof(ta
[0]));
2741 memcpy(templ
, tl
, pred_height_in_transform_blocks
* sizeof(tl
[0]));
2743 for (idy
= 0; idy
< pred_height_in_transform_blocks
; ++idy
) {
2744 for (idx
= 0; idx
< pred_width_in_transform_blocks
; ++idx
) {
2745 const int block_raster_idx
= (row
+ idy
) * 2 + (col
+ idx
);
2747 av1_raster_order_to_block_index(tx_size
, block_raster_idx
);
2748 const uint8_t *const src
= &src_init
[idx
* 4 + idy
* 4 * src_stride
];
2749 uint8_t *const dst
= &dst_init
[idx
* 4 + idy
* 4 * dst_stride
];
2751 int16_t *const src_diff
= av1_raster_block_offset_int16(
2752 BLOCK_8X8
, block_raster_idx
, p
->src_diff
);
2756 assert(IMPLIES(tx_size
== TX_4X8
|| tx_size
== TX_8X4
,
2757 idx
== 0 && idy
== 0));
2758 assert(IMPLIES(tx_size
== TX_4X8
|| tx_size
== TX_8X4
,
2759 block
== 0 || block
== 2));
2760 xd
->mi
[0]->bmi
[block_raster_idx
].as_mode
= mode
;
2761 av1_predict_intra_block(
2762 xd
, pd
->width
, pd
->height
, txsize_to_bsize
[tx_size
], mode
, dst
,
2763 dst_stride
, dst
, dst_stride
, col
+ idx
, row
+ idy
, 0);
2765 aom_highbd_subtract_block(tx_height
, tx_width
, src_diff
, 8, src
,
2766 src_stride
, dst
, dst_stride
, xd
->bd
);
2769 TX_TYPE tx_type
= get_tx_type(PLANE_TYPE_Y
, xd
, block
, tx_size
);
2770 const SCAN_ORDER
*scan_order
= get_scan(cm
, tx_size
, tx_type
, 0);
2771 const int coeff_ctx
=
2772 combine_entropy_contexts(tempa
[idx
], templ
[idy
]);
2774 av1_xform_quant(cm
, x
, 0, block
, row
+ idy
, col
+ idx
, BLOCK_8X8
,
2775 tx_size
, coeff_ctx
, AV1_XFORM_QUANT_FP
);
2776 ratey
+= av1_cost_coeffs(cpi
, x
, 0, block
, tx_size
, scan_order
,
2777 tempa
+ idx
, templ
+ idy
,
2778 cpi
->sf
.use_fast_coef_costing
);
2779 skip
= (p
->eobs
[block
] == 0);
2784 if (tx_size
== TX_8X4
) {
2785 tempa
[idx
+ 1] = tempa
[idx
];
2786 } else if (tx_size
== TX_4X8
) {
2787 templ
[idy
+ 1] = templ
[idy
];
2789 #endif // CONFIG_EXT_TX
2793 av1_xform_quant(cm
, x
, 0, block
, row
+ idy
, col
+ idx
, BLOCK_8X8
,
2794 tx_size
, coeff_ctx
, AV1_XFORM_QUANT_B
);
2797 skip
= x
->pvq_skip
[0];
2802 if (RDCOST(x
->rdmult
, x
->rddiv
, ratey
, distortion
) >= best_rd
)
2807 av1_inverse_transform_block(xd
, BLOCK_OFFSET(pd
->dqcoeff
, block
),
2808 DCT_DCT
, tx_size
, dst
, dst_stride
,
2813 TX_TYPE tx_type
= get_tx_type(PLANE_TYPE_Y
, xd
, block
, tx_size
);
2814 const SCAN_ORDER
*scan_order
= get_scan(cm
, tx_size
, tx_type
, 0);
2815 const int coeff_ctx
=
2816 combine_entropy_contexts(tempa
[idx
], templ
[idy
]);
2818 av1_xform_quant(cm
, x
, 0, block
, row
+ idy
, col
+ idx
, BLOCK_8X8
,
2819 tx_size
, coeff_ctx
, AV1_XFORM_QUANT_FP
);
2820 av1_optimize_b(cm
, x
, 0, block
, tx_size
, coeff_ctx
);
2821 ratey
+= av1_cost_coeffs(cpi
, x
, 0, block
, tx_size
, scan_order
,
2822 tempa
+ idx
, templ
+ idy
,
2823 cpi
->sf
.use_fast_coef_costing
);
2824 skip
= (p
->eobs
[block
] == 0);
2829 if (tx_size
== TX_8X4
) {
2830 tempa
[idx
+ 1] = tempa
[idx
];
2831 } else if (tx_size
== TX_4X8
) {
2832 templ
[idy
+ 1] = templ
[idy
];
2834 #endif // CONFIG_EXT_TX
2838 av1_xform_quant(cm
, x
, 0, block
, row
+ idy
, col
+ idx
, BLOCK_8X8
,
2839 tx_size
, coeff_ctx
, AV1_XFORM_QUANT_FP
);
2841 skip
= x
->pvq_skip
[0];
2849 av1_inverse_transform_block(xd
, BLOCK_OFFSET(pd
->dqcoeff
, block
),
2850 tx_type
, tx_size
, dst
, dst_stride
,
2852 cpi
->fn_ptr
[sub_bsize
].vf(src
, src_stride
, dst
, dst_stride
, &tmp
);
2853 dist
= (int64_t)tmp
<< 4;
2855 if (RDCOST(x
->rdmult
, x
->rddiv
, ratey
, distortion
) >= best_rd
)
2862 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, rate
, distortion
);
2864 if (this_rd
< best_rd
) {
2867 *bestdistortion
= distortion
;
2869 best_can_skip
= can_skip
;
2871 memcpy(a
, tempa
, pred_width_in_transform_blocks
* sizeof(tempa
[0]));
2872 memcpy(l
, templ
, pred_height_in_transform_blocks
* sizeof(templ
[0]));
2874 od_encode_checkpoint(&x
->daala_enc
, &post_buf
);
2876 for (idy
= 0; idy
< pred_height_in_transform_blocks
* 4; ++idy
) {
2877 memcpy(best_dst16
+ idy
* 8,
2878 CONVERT_TO_SHORTPTR(dst_init
+ idy
* dst_stride
),
2879 pred_width_in_transform_blocks
* 4 * sizeof(uint16_t));
2884 od_encode_rollback(&x
->daala_enc
, &pre_buf
);
2888 if (best_rd
>= rd_thresh
) return best_rd
;
2891 od_encode_rollback(&x
->daala_enc
, &post_buf
);
2894 if (y_skip
) *y_skip
&= best_can_skip
;
2896 for (idy
= 0; idy
< pred_height_in_transform_blocks
* 4; ++idy
) {
2897 memcpy(CONVERT_TO_SHORTPTR(dst_init
+ idy
* dst_stride
),
2898 best_dst16
+ idy
* 8,
2899 pred_width_in_transform_blocks
* 4 * sizeof(uint16_t));
2904 #endif // CONFIG_HIGHBITDEPTH
2907 od_encode_checkpoint(&x
->daala_enc
, &pre_buf
);
2908 #endif // CONFIG_PVQ
2910 for (mode
= DC_PRED
; mode
<= TM_PRED
; ++mode
) {
2913 int64_t distortion
= 0;
2914 int rate
= bmode_costs
[mode
];
2917 if (!(cpi
->sf
.intra_y_mode_mask
[txsize_sqr_up_map
[tx_size
]] &
2922 // Only do the oblique modes if the best so far is
2923 // one of the neighboring directional modes
2924 if (cpi
->sf
.mode_search_skip_flags
& FLAG_SKIP_INTRA_DIRMISMATCH
) {
2925 if (conditional_skipintra(mode
, *best_mode
)) continue;
2928 memcpy(tempa
, ta
, pred_width_in_transform_blocks
* sizeof(ta
[0]));
2929 memcpy(templ
, tl
, pred_height_in_transform_blocks
* sizeof(tl
[0]));
2931 for (idy
= 0; idy
< pred_height_in_4x4_blocks
; idy
+= tx_height_unit
) {
2932 for (idx
= 0; idx
< pred_width_in_4x4_blocks
; idx
+= tx_width_unit
) {
2933 const int block_raster_idx
= (row
+ idy
) * 2 + (col
+ idx
);
2934 int block
= av1_raster_order_to_block_index(tx_size
, block_raster_idx
);
2935 const uint8_t *const src
= &src_init
[idx
* 4 + idy
* 4 * src_stride
];
2936 uint8_t *const dst
= &dst_init
[idx
* 4 + idy
* 4 * dst_stride
];
2938 int16_t *const src_diff
= av1_raster_block_offset_int16(
2939 BLOCK_8X8
, block_raster_idx
, p
->src_diff
);
2940 #endif // !CONFIG_PVQ
2943 assert(IMPLIES(tx_size
== TX_4X8
|| tx_size
== TX_8X4
,
2944 idx
== 0 && idy
== 0));
2945 assert(IMPLIES(tx_size
== TX_4X8
|| tx_size
== TX_8X4
,
2946 block
== 0 || block
== 2));
2947 xd
->mi
[0]->bmi
[block_raster_idx
].as_mode
= mode
;
2948 av1_predict_intra_block(xd
, pd
->width
, pd
->height
,
2949 txsize_to_bsize
[tx_size
], mode
, dst
, dst_stride
,
2952 2 * (col
+ idx
), 2 * (row
+ idy
),
2954 col
+ idx
, row
+ idy
,
2955 #endif // CONFIG_CB4X4
2958 aom_subtract_block(tx_height
, tx_width
, src_diff
, 8, src
, src_stride
,
2960 #endif // !CONFIG_PVQ
2962 TX_TYPE tx_type
= get_tx_type(PLANE_TYPE_Y
, xd
, block
, tx_size
);
2963 const SCAN_ORDER
*scan_order
= get_scan(cm
, tx_size
, tx_type
, 0);
2964 const int coeff_ctx
= combine_entropy_contexts(tempa
[idx
], templ
[idy
]);
2967 #endif // CONFIG_CB4X4
2969 const AV1_XFORM_QUANT xform_quant
=
2970 is_lossless
? AV1_XFORM_QUANT_B
: AV1_XFORM_QUANT_FP
;
2971 av1_xform_quant(cm
, x
, 0, block
,
2973 2 * (row
+ idy
), 2 * (col
+ idx
),
2975 row
+ idy
, col
+ idx
,
2976 #endif // CONFIG_CB4X4
2977 BLOCK_8X8
, tx_size
, coeff_ctx
, xform_quant
);
2979 av1_optimize_b(cm
, x
, 0, block
, tx_size
, coeff_ctx
);
2982 av1_cost_coeffs(cpi
, x
, 0, block
, tx_size
, scan_order
, tempa
+ idx
,
2983 templ
+ idy
, cpi
->sf
.use_fast_coef_costing
);
2984 skip
= (p
->eobs
[block
] == 0);
2989 if (tx_size
== TX_8X4
) {
2990 tempa
[idx
+ 1] = tempa
[idx
];
2991 } else if (tx_size
== TX_4X8
) {
2992 templ
[idy
+ 1] = templ
[idy
];
2994 #endif // CONFIG_EXT_TX
2998 av1_xform_quant(cm
, x
, 0, block
,
3000 2 * (row
+ idy
), 2 * (col
+ idx
),
3002 row
+ idy
, col
+ idx
,
3003 #endif // CONFIG_CB4X4
3004 BLOCK_8X8
, tx_size
, coeff_ctx
, AV1_XFORM_QUANT_FP
);
3007 skip
= x
->pvq_skip
[0];
3011 #endif // !CONFIG_PVQ
3013 if (!is_lossless
) { // To use the pixel domain distortion, we need to
3014 // calculate inverse txfm *before* calculating RD
3015 // cost. Compared to calculating the distortion in
3016 // the frequency domain, the overhead of encoding
3020 #endif // CONFIG_PVQ
3021 av1_inverse_transform_block(xd
, BLOCK_OFFSET(pd
->dqcoeff
, block
),
3022 tx_type
, tx_size
, dst
, dst_stride
,
3025 cpi
->fn_ptr
[sub_bsize
].vf(src
, src_stride
, dst
, dst_stride
, &tmp
);
3026 const int64_t dist
= (int64_t)tmp
<< 4;
3030 if (RDCOST(x
->rdmult
, x
->rddiv
, ratey
, distortion
) >= best_rd
)
3033 if (is_lossless
) { // Calculate inverse txfm *after* RD cost.
3036 #endif // CONFIG_PVQ
3037 av1_inverse_transform_block(xd
, BLOCK_OFFSET(pd
->dqcoeff
, block
),
3038 DCT_DCT
, tx_size
, dst
, dst_stride
,
3045 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, rate
, distortion
);
3047 if (this_rd
< best_rd
) {
3050 *bestdistortion
= distortion
;
3052 best_can_skip
= can_skip
;
3054 memcpy(a
, tempa
, pred_width_in_transform_blocks
* sizeof(tempa
[0]));
3055 memcpy(l
, templ
, pred_height_in_transform_blocks
* sizeof(templ
[0]));
3057 od_encode_checkpoint(&x
->daala_enc
, &post_buf
);
3058 #endif // CONFIG_PVQ
3059 for (idy
= 0; idy
< pred_height_in_transform_blocks
* 4; ++idy
)
3060 memcpy(best_dst
+ idy
* 8, dst_init
+ idy
* dst_stride
,
3061 pred_width_in_transform_blocks
* 4);
3065 od_encode_rollback(&x
->daala_enc
, &pre_buf
);
3066 #endif // CONFIG_PVQ
3067 } // mode decision loop
3069 if (best_rd
>= rd_thresh
) return best_rd
;
3072 od_encode_rollback(&x
->daala_enc
, &post_buf
);
3073 #endif // CONFIG_PVQ
3075 if (y_skip
) *y_skip
&= best_can_skip
;
3077 for (idy
= 0; idy
< pred_height_in_transform_blocks
* 4; ++idy
)
3078 memcpy(dst_init
+ idy
* dst_stride
, best_dst
+ idy
* 8,
3079 pred_width_in_transform_blocks
* 4);
3084 static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP
*const cpi
,
3085 MACROBLOCK
*mb
, int *rate
,
3086 int *rate_y
, int64_t *distortion
,
3087 int *y_skip
, int64_t best_rd
) {
3088 const MACROBLOCKD
*const xd
= &mb
->e_mbd
;
3089 MODE_INFO
*const mic
= xd
->mi
[0];
3090 const MODE_INFO
*above_mi
= xd
->above_mi
;
3091 const MODE_INFO
*left_mi
= xd
->left_mi
;
3092 MB_MODE_INFO
*const mbmi
= &mic
->mbmi
;
3093 assert(!is_inter_block(mbmi
));
3094 const BLOCK_SIZE bsize
= mbmi
->sb_type
;
3095 const int pred_width_in_4x4_blocks
= num_4x4_blocks_wide_lookup
[bsize
];
3096 const int pred_height_in_4x4_blocks
= num_4x4_blocks_high_lookup
[bsize
];
3099 int64_t total_distortion
= 0;
3101 int64_t total_rd
= 0;
3102 const int *bmode_costs
= cpi
->mbmode_cost
[0];
3103 const int is_lossless
= xd
->lossless
[mbmi
->segment_id
];
3104 #if CONFIG_EXT_TX && CONFIG_RECT_TX
3105 const TX_SIZE tx_size
= is_lossless
? TX_4X4
: max_txsize_rect_lookup
[bsize
];
3107 const TX_SIZE tx_size
= TX_4X4
;
3108 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
3110 #if CONFIG_EXT_INTRA
3111 #if CONFIG_INTRA_INTERP
3112 mbmi
->intra_filter
= INTRA_FILTER_LINEAR
;
3113 #endif // CONFIG_INTRA_INTERP
3114 #endif // CONFIG_EXT_INTRA
3115 #if CONFIG_FILTER_INTRA
3116 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0] = 0;
3117 #endif // CONFIG_FILTER_INTRA
3119 // TODO(any): Add search of the tx_type to improve rd performance at the
3120 // expense of speed.
3121 mbmi
->tx_type
= DCT_DCT
;
3122 mbmi
->tx_size
= tx_size
;
3124 if (y_skip
) *y_skip
= 1;
3126 // Pick modes for each prediction sub-block (of size 4x4, 4x8, or 8x4) in this
3127 // 8x8 coding block.
3128 for (idy
= 0; idy
< 2; idy
+= pred_height_in_4x4_blocks
) {
3129 for (idx
= 0; idx
< 2; idx
+= pred_width_in_4x4_blocks
) {
3130 PREDICTION_MODE best_mode
= DC_PRED
;
3131 int r
= INT_MAX
, ry
= INT_MAX
;
3132 int64_t d
= INT64_MAX
, this_rd
= INT64_MAX
;
3134 const int pred_block_idx
= idy
* 2 + idx
;
3135 if (cpi
->common
.frame_type
== KEY_FRAME
) {
3136 const PREDICTION_MODE A
=
3137 av1_above_block_mode(mic
, above_mi
, pred_block_idx
);
3138 const PREDICTION_MODE L
=
3139 av1_left_block_mode(mic
, left_mi
, pred_block_idx
);
3141 bmode_costs
= cpi
->y_mode_costs
[A
][L
];
3143 this_rd
= rd_pick_intra_sub_8x8_y_subblock_mode(
3144 cpi
, mb
, idy
, idx
, &best_mode
, bmode_costs
,
3145 xd
->plane
[0].above_context
+ idx
, xd
->plane
[0].left_context
+ idy
, &r
,
3146 &ry
, &d
, bsize
, tx_size
, y_skip
, best_rd
- total_rd
);
3147 #if !CONFIG_DAALA_DIST
3148 if (this_rd
>= best_rd
- total_rd
) return INT64_MAX
;
3149 #endif // !CONFIG_DAALA_DIST
3150 total_rd
+= this_rd
;
3152 total_distortion
+= d
;
3155 mic
->bmi
[pred_block_idx
].as_mode
= best_mode
;
3156 for (j
= 1; j
< pred_height_in_4x4_blocks
; ++j
)
3157 mic
->bmi
[pred_block_idx
+ j
* 2].as_mode
= best_mode
;
3158 for (j
= 1; j
< pred_width_in_4x4_blocks
; ++j
)
3159 mic
->bmi
[pred_block_idx
+ j
].as_mode
= best_mode
;
3161 if (total_rd
>= best_rd
) return INT64_MAX
;
3164 mbmi
->mode
= mic
->bmi
[3].as_mode
;
3166 #if CONFIG_DAALA_DIST
3168 const struct macroblock_plane
*p
= &mb
->plane
[0];
3169 const struct macroblockd_plane
*pd
= &xd
->plane
[0];
3170 const int src_stride
= p
->src
.stride
;
3171 const int dst_stride
= pd
->dst
.stride
;
3172 uint8_t *src
= p
->src
.buf
;
3173 uint8_t *dst
= pd
->dst
.buf
;
3174 int use_activity_masking
= 0;
3178 use_activity_masking
= mb
->daala_enc
.use_activity_masking
;
3179 #endif // CONFIG_PVQ
3180 // Daala-defined distortion computed for the block of 8x8 pixels
3181 total_distortion
= av1_daala_dist(src
, src_stride
, dst
, dst_stride
, 8, 8,
3182 qm
, use_activity_masking
, mb
->qindex
)
3185 #endif // CONFIG_DAALA_DIST
3186 // Add in the cost of the transform type
3188 int rate_tx_type
= 0;
3190 if (get_ext_tx_types(tx_size
, bsize
, 0, cpi
->common
.reduced_tx_set_used
) >
3193 get_ext_tx_set(tx_size
, bsize
, 0, cpi
->common
.reduced_tx_set_used
);
3194 rate_tx_type
= cpi
->intra_tx_type_costs
[eset
][txsize_sqr_map
[tx_size
]]
3195 [mbmi
->mode
][mbmi
->tx_type
];
3199 cpi
->intra_tx_type_costs
[txsize_sqr_map
[tx_size
]]
3200 [intra_mode_to_tx_type_context
[mbmi
->mode
]]
3202 #endif // CONFIG_EXT_TX
3203 assert(mbmi
->tx_size
== tx_size
);
3204 cost
+= rate_tx_type
;
3205 tot_rate_y
+= rate_tx_type
;
3209 *rate_y
= tot_rate_y
;
3210 *distortion
= total_distortion
;
3212 return RDCOST(mb
->rdmult
, mb
->rddiv
, cost
, total_distortion
);
3215 #if CONFIG_FILTER_INTRA
3216 // Return 1 if an filter intra mode is selected; return 0 otherwise.
3217 static int rd_pick_filter_intra_sby(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
3218 int *rate
, int *rate_tokenonly
,
3219 int64_t *distortion
, int *skippable
,
3220 BLOCK_SIZE bsize
, int mode_cost
,
3221 int64_t *best_rd
, int64_t *best_model_rd
,
3222 uint16_t skip_mask
) {
3223 MACROBLOCKD
*const xd
= &x
->e_mbd
;
3224 MODE_INFO
*const mic
= xd
->mi
[0];
3225 MB_MODE_INFO
*mbmi
= &mic
->mbmi
;
3226 int filter_intra_selected_flag
= 0;
3227 FILTER_INTRA_MODE mode
;
3228 TX_SIZE best_tx_size
= TX_4X4
;
3229 FILTER_INTRA_MODE_INFO filter_intra_mode_info
;
3230 TX_TYPE best_tx_type
;
3232 av1_zero(filter_intra_mode_info
);
3233 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0] = 1;
3234 mbmi
->mode
= DC_PRED
;
3236 mbmi
->palette_mode_info
.palette_size
[0] = 0;
3237 #endif // CONFIG_PALETTE
3239 for (mode
= 0; mode
< FILTER_INTRA_MODES
; ++mode
) {
3241 int64_t this_rd
, this_model_rd
;
3242 RD_STATS tokenonly_rd_stats
;
3243 if (skip_mask
& (1 << mode
)) continue;
3244 mbmi
->filter_intra_mode_info
.filter_intra_mode
[0] = mode
;
3245 this_model_rd
= intra_model_yrd(cpi
, x
, bsize
, mode_cost
);
3246 if (*best_model_rd
!= INT64_MAX
&&
3247 this_model_rd
> *best_model_rd
+ (*best_model_rd
>> 1))
3249 if (this_model_rd
< *best_model_rd
) *best_model_rd
= this_model_rd
;
3250 super_block_yrd(cpi
, x
, &tokenonly_rd_stats
, bsize
, *best_rd
);
3251 if (tokenonly_rd_stats
.rate
== INT_MAX
) continue;
3252 this_rate
= tokenonly_rd_stats
.rate
+
3253 av1_cost_bit(cpi
->common
.fc
->filter_intra_probs
[0], 1) +
3254 write_uniform_cost(FILTER_INTRA_MODES
, mode
) + mode_cost
;
3255 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, this_rate
, tokenonly_rd_stats
.dist
);
3257 if (this_rd
< *best_rd
) {
3259 best_tx_size
= mic
->mbmi
.tx_size
;
3260 filter_intra_mode_info
= mbmi
->filter_intra_mode_info
;
3261 best_tx_type
= mic
->mbmi
.tx_type
;
3263 *rate_tokenonly
= tokenonly_rd_stats
.rate
;
3264 *distortion
= tokenonly_rd_stats
.dist
;
3265 *skippable
= tokenonly_rd_stats
.skip
;
3266 filter_intra_selected_flag
= 1;
3270 if (filter_intra_selected_flag
) {
3271 mbmi
->mode
= DC_PRED
;
3272 mbmi
->tx_size
= best_tx_size
;
3273 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0] =
3274 filter_intra_mode_info
.use_filter_intra_mode
[0];
3275 mbmi
->filter_intra_mode_info
.filter_intra_mode
[0] =
3276 filter_intra_mode_info
.filter_intra_mode
[0];
3277 mbmi
->tx_type
= best_tx_type
;
3283 #endif // CONFIG_FILTER_INTRA
3285 #if CONFIG_EXT_INTRA
3286 // Run RD calculation with given luma intra prediction angle., and return
3287 // the RD cost. Update the best mode info. if the RD cost is the best so far.
3288 static int64_t calc_rd_given_intra_angle(
3289 const AV1_COMP
*const cpi
, MACROBLOCK
*x
, BLOCK_SIZE bsize
, int mode_cost
,
3290 int64_t best_rd_in
, int8_t angle_delta
, int max_angle_delta
, int *rate
,
3291 RD_STATS
*rd_stats
, int *best_angle_delta
, TX_SIZE
*best_tx_size
,
3292 TX_TYPE
*best_tx_type
,
3293 #if CONFIG_INTRA_INTERP
3294 INTRA_FILTER
*best_filter
,
3295 #endif // CONFIG_INTRA_INTERP
3296 int64_t *best_rd
, int64_t *best_model_rd
) {
3298 RD_STATS tokenonly_rd_stats
;
3299 int64_t this_rd
, this_model_rd
;
3300 MB_MODE_INFO
*mbmi
= &x
->e_mbd
.mi
[0]->mbmi
;
3301 assert(!is_inter_block(mbmi
));
3303 mbmi
->angle_delta
[0] = angle_delta
;
3304 this_model_rd
= intra_model_yrd(cpi
, x
, bsize
, mode_cost
);
3305 if (*best_model_rd
!= INT64_MAX
&&
3306 this_model_rd
> *best_model_rd
+ (*best_model_rd
>> 1))
3308 if (this_model_rd
< *best_model_rd
) *best_model_rd
= this_model_rd
;
3309 super_block_yrd(cpi
, x
, &tokenonly_rd_stats
, bsize
, best_rd_in
);
3310 if (tokenonly_rd_stats
.rate
== INT_MAX
) return INT64_MAX
;
3312 this_rate
= tokenonly_rd_stats
.rate
+ mode_cost
+
3313 write_uniform_cost(2 * max_angle_delta
+ 1,
3314 mbmi
->angle_delta
[0] + max_angle_delta
);
3315 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, this_rate
, tokenonly_rd_stats
.dist
);
3317 if (this_rd
< *best_rd
) {
3319 *best_angle_delta
= mbmi
->angle_delta
[0];
3320 *best_tx_size
= mbmi
->tx_size
;
3321 #if CONFIG_INTRA_INTERP
3322 *best_filter
= mbmi
->intra_filter
;
3323 #endif // CONFIG_INTRA_INTERP
3324 *best_tx_type
= mbmi
->tx_type
;
3326 rd_stats
->rate
= tokenonly_rd_stats
.rate
;
3327 rd_stats
->dist
= tokenonly_rd_stats
.dist
;
3328 rd_stats
->skip
= tokenonly_rd_stats
.skip
;
3333 // With given luma directional intra prediction mode, pick the best angle delta
3334 // Return the RD cost corresponding to the best angle delta.
3335 static int64_t rd_pick_intra_angle_sby(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
3336 int *rate
, RD_STATS
*rd_stats
,
3337 BLOCK_SIZE bsize
, int mode_cost
,
3339 int64_t *best_model_rd
) {
3340 MACROBLOCKD
*const xd
= &x
->e_mbd
;
3341 MODE_INFO
*const mic
= xd
->mi
[0];
3342 MB_MODE_INFO
*mbmi
= &mic
->mbmi
;
3343 assert(!is_inter_block(mbmi
));
3344 int i
, angle_delta
, best_angle_delta
= 0;
3346 #if CONFIG_INTRA_INTERP
3348 const int intra_filter_ctx
= av1_get_pred_context_intra_interp(xd
);
3349 INTRA_FILTER filter
, best_filter
= INTRA_FILTER_LINEAR
;
3350 #endif // CONFIG_INTRA_INTERP
3351 int64_t this_rd
, best_rd_in
, rd_cost
[2 * (MAX_ANGLE_DELTA
+ 2)];
3352 TX_SIZE best_tx_size
= mic
->mbmi
.tx_size
;
3353 TX_TYPE best_tx_type
= mbmi
->tx_type
;
3355 for (i
= 0; i
< 2 * (MAX_ANGLE_DELTA
+ 2); ++i
) rd_cost
[i
] = INT64_MAX
;
3357 for (angle_delta
= 0; angle_delta
<= MAX_ANGLE_DELTA
; angle_delta
+= 2) {
3358 #if CONFIG_INTRA_INTERP
3359 for (filter
= INTRA_FILTER_LINEAR
; filter
< INTRA_FILTERS
; ++filter
) {
3360 if (FILTER_FAST_SEARCH
&& filter
!= INTRA_FILTER_LINEAR
) continue;
3361 mic
->mbmi
.intra_filter
= filter
;
3362 #endif // CONFIG_INTRA_INTERP
3363 for (i
= 0; i
< 2; ++i
) {
3364 best_rd_in
= (best_rd
== INT64_MAX
)
3366 : (best_rd
+ (best_rd
>> (first_try
? 3 : 5)));
3367 this_rd
= calc_rd_given_intra_angle(
3369 #if CONFIG_INTRA_INTERP
3370 mode_cost
+ cpi
->intra_filter_cost
[intra_filter_ctx
][filter
],
3373 #endif // CONFIG_INTRA_INTERP
3374 best_rd_in
, (1 - 2 * i
) * angle_delta
, MAX_ANGLE_DELTA
, rate
,
3375 rd_stats
, &best_angle_delta
, &best_tx_size
, &best_tx_type
,
3376 #if CONFIG_INTRA_INTERP
3378 #endif // CONFIG_INTRA_INTERP
3379 &best_rd
, best_model_rd
);
3380 rd_cost
[2 * angle_delta
+ i
] = this_rd
;
3381 if (first_try
&& this_rd
== INT64_MAX
) return best_rd
;
3383 if (angle_delta
== 0) {
3384 rd_cost
[1] = this_rd
;
3388 #if CONFIG_INTRA_INTERP
3390 #endif // CONFIG_INTRA_INTERP
3393 assert(best_rd
!= INT64_MAX
);
3394 for (angle_delta
= 1; angle_delta
<= MAX_ANGLE_DELTA
; angle_delta
+= 2) {
3396 #if CONFIG_INTRA_INTERP
3397 for (filter
= INTRA_FILTER_LINEAR
; filter
< INTRA_FILTERS
; ++filter
) {
3398 if (FILTER_FAST_SEARCH
&& filter
!= INTRA_FILTER_LINEAR
) continue;
3399 mic
->mbmi
.intra_filter
= filter
;
3400 #endif // CONFIG_INTRA_INTERP
3401 for (i
= 0; i
< 2; ++i
) {
3402 int skip_search
= 0;
3403 rd_thresh
= best_rd
+ (best_rd
>> 5);
3404 if (rd_cost
[2 * (angle_delta
+ 1) + i
] > rd_thresh
&&
3405 rd_cost
[2 * (angle_delta
- 1) + i
] > rd_thresh
)
3408 calc_rd_given_intra_angle(
3410 #if CONFIG_INTRA_INTERP
3411 mode_cost
+ cpi
->intra_filter_cost
[intra_filter_ctx
][filter
],
3414 #endif // CONFIG_INTRA_INTERP
3415 best_rd
, (1 - 2 * i
) * angle_delta
, MAX_ANGLE_DELTA
, rate
,
3416 rd_stats
, &best_angle_delta
, &best_tx_size
, &best_tx_type
,
3417 #if CONFIG_INTRA_INTERP
3419 #endif // CONFIG_INTRA_INTERP
3420 &best_rd
, best_model_rd
);
3423 #if CONFIG_INTRA_INTERP
3425 #endif // CONFIG_INTRA_INTERP
3428 #if CONFIG_INTRA_INTERP
3429 if (FILTER_FAST_SEARCH
&& rd_stats
->rate
< INT_MAX
) {
3430 p_angle
= mode_to_angle_map
[mbmi
->mode
] + best_angle_delta
* ANGLE_STEP
;
3431 if (av1_is_intra_filter_switchable(p_angle
)) {
3432 for (filter
= INTRA_FILTER_LINEAR
+ 1; filter
< INTRA_FILTERS
; ++filter
) {
3433 mic
->mbmi
.intra_filter
= filter
;
3434 this_rd
= calc_rd_given_intra_angle(
3436 mode_cost
+ cpi
->intra_filter_cost
[intra_filter_ctx
][filter
],
3437 best_rd
, best_angle_delta
, MAX_ANGLE_DELTA
, rate
, rd_stats
,
3438 &best_angle_delta
, &best_tx_size
, &best_tx_type
, &best_filter
,
3439 &best_rd
, best_model_rd
);
3443 #endif // CONFIG_INTRA_INTERP
3445 mbmi
->tx_size
= best_tx_size
;
3446 mbmi
->angle_delta
[0] = best_angle_delta
;
3447 #if CONFIG_INTRA_INTERP
3448 mic
->mbmi
.intra_filter
= best_filter
;
3449 #endif // CONFIG_INTRA_INTERP
3450 mbmi
->tx_type
= best_tx_type
;
3454 // Indices are sign, integer, and fractional part of the gradient value
3455 static const uint8_t gradient_to_angle_bin
[2][7][16] = {
3457 { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 },
3458 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
3459 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3460 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3461 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3462 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3463 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3466 { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 },
3467 { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 },
3468 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3469 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3470 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3471 { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3472 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3476 /* clang-format off */
3477 static const uint8_t mode_to_angle_bin
[INTRA_MODES
] = {
3478 0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
3479 #if CONFIG_ALT_INTRA
3481 #endif // CONFIG_ALT_INTRA
3483 /* clang-format on */
3485 static void angle_estimation(const uint8_t *src
, int src_stride
, int rows
,
3486 int cols
, BLOCK_SIZE bsize
,
3487 uint8_t *directional_mode_skip_mask
) {
3488 memset(directional_mode_skip_mask
, 0,
3489 INTRA_MODES
* sizeof(*directional_mode_skip_mask
));
3490 // Sub-8x8 blocks do not use extra directions.
3491 if (bsize
< BLOCK_8X8
) return;
3492 uint64_t hist
[DIRECTIONAL_MODES
];
3493 memset(hist
, 0, DIRECTIONAL_MODES
* sizeof(hist
[0]));
3496 for (r
= 1; r
< rows
; ++r
) {
3497 for (c
= 1; c
< cols
; ++c
) {
3498 dx
= src
[c
] - src
[c
- 1];
3499 dy
= src
[c
] - src
[c
- src_stride
];
3501 const int temp
= dx
* dx
+ dy
* dy
;
3505 const int sn
= (dx
> 0) ^ (dy
> 0);
3508 const int remd
= (dx
% dy
) * 16 / dy
;
3509 const int quot
= dx
/ dy
;
3510 index
= gradient_to_angle_bin
[sn
][AOMMIN(quot
, 6)][AOMMIN(remd
, 15)];
3512 hist
[index
] += temp
;
3518 uint64_t hist_sum
= 0;
3519 for (i
= 0; i
< DIRECTIONAL_MODES
; ++i
) hist_sum
+= hist
[i
];
3520 for (i
= 0; i
< INTRA_MODES
; ++i
) {
3521 if (av1_is_directional_mode(i
, bsize
)) {
3522 const uint8_t angle_bin
= mode_to_angle_bin
[i
];
3523 uint64_t score
= 2 * hist
[angle_bin
];
3525 if (angle_bin
> 0) {
3526 score
+= hist
[angle_bin
- 1];
3529 if (angle_bin
< DIRECTIONAL_MODES
- 1) {
3530 score
+= hist
[angle_bin
+ 1];
3533 if (score
* ANGLE_SKIP_THRESH
< hist_sum
* weight
)
3534 directional_mode_skip_mask
[i
] = 1;
3539 #if CONFIG_HIGHBITDEPTH
3540 static void highbd_angle_estimation(const uint8_t *src8
, int src_stride
,
3541 int rows
, int cols
, BLOCK_SIZE bsize
,
3542 uint8_t *directional_mode_skip_mask
) {
3543 memset(directional_mode_skip_mask
, 0,
3544 INTRA_MODES
* sizeof(*directional_mode_skip_mask
));
3545 // Sub-8x8 blocks do not use extra directions.
3546 if (bsize
< BLOCK_8X8
) return;
3547 uint16_t *src
= CONVERT_TO_SHORTPTR(src8
);
3548 uint64_t hist
[DIRECTIONAL_MODES
];
3549 memset(hist
, 0, DIRECTIONAL_MODES
* sizeof(hist
[0]));
3552 for (r
= 1; r
< rows
; ++r
) {
3553 for (c
= 1; c
< cols
; ++c
) {
3554 dx
= src
[c
] - src
[c
- 1];
3555 dy
= src
[c
] - src
[c
- src_stride
];
3557 const int temp
= dx
* dx
+ dy
* dy
;
3561 const int sn
= (dx
> 0) ^ (dy
> 0);
3564 const int remd
= (dx
% dy
) * 16 / dy
;
3565 const int quot
= dx
/ dy
;
3566 index
= gradient_to_angle_bin
[sn
][AOMMIN(quot
, 6)][AOMMIN(remd
, 15)];
3568 hist
[index
] += temp
;
3574 uint64_t hist_sum
= 0;
3575 for (i
= 0; i
< DIRECTIONAL_MODES
; ++i
) hist_sum
+= hist
[i
];
3576 for (i
= 0; i
< INTRA_MODES
; ++i
) {
3577 if (av1_is_directional_mode(i
, bsize
)) {
3578 const uint8_t angle_bin
= mode_to_angle_bin
[i
];
3579 uint64_t score
= 2 * hist
[angle_bin
];
3581 if (angle_bin
> 0) {
3582 score
+= hist
[angle_bin
- 1];
3585 if (angle_bin
< DIRECTIONAL_MODES
- 1) {
3586 score
+= hist
[angle_bin
+ 1];
3589 if (score
* ANGLE_SKIP_THRESH
< hist_sum
* weight
)
3590 directional_mode_skip_mask
[i
] = 1;
3594 #endif // CONFIG_HIGHBITDEPTH
3595 #endif // CONFIG_EXT_INTRA
3597 // This function is used only for intra_only frames
3598 static int64_t rd_pick_intra_sby_mode(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
3599 int *rate
, int *rate_tokenonly
,
3600 int64_t *distortion
, int *skippable
,
3601 BLOCK_SIZE bsize
, int64_t best_rd
) {
3603 MACROBLOCKD
*const xd
= &x
->e_mbd
;
3604 MODE_INFO
*const mic
= xd
->mi
[0];
3605 MB_MODE_INFO
*const mbmi
= &mic
->mbmi
;
3606 assert(!is_inter_block(mbmi
));
3607 MB_MODE_INFO best_mbmi
= *mbmi
;
3608 int64_t best_model_rd
= INT64_MAX
;
3609 #if CONFIG_EXT_INTRA
3610 const int rows
= block_size_high
[bsize
];
3611 const int cols
= block_size_wide
[bsize
];
3612 #if CONFIG_INTRA_INTERP
3613 const int intra_filter_ctx
= av1_get_pred_context_intra_interp(xd
);
3614 #endif // CONFIG_INTRA_INTERP
3615 int is_directional_mode
;
3616 uint8_t directional_mode_skip_mask
[INTRA_MODES
];
3617 const int src_stride
= x
->plane
[0].src
.stride
;
3618 const uint8_t *src
= x
->plane
[0].src
.buf
;
3619 #endif // CONFIG_EXT_INTRA
3620 #if CONFIG_FILTER_INTRA
3621 int beat_best_rd
= 0;
3622 uint16_t filter_intra_mode_skip_mask
= (1 << FILTER_INTRA_MODES
) - 1;
3623 #endif // CONFIG_FILTER_INTRA
3624 const int *bmode_costs
;
3626 PALETTE_MODE_INFO
*const pmi
= &mbmi
->palette_mode_info
;
3627 uint8_t *best_palette_color_map
=
3628 cpi
->common
.allow_screen_content_tools
3629 ? x
->palette_buffer
->best_palette_color_map
3631 int palette_y_mode_ctx
= 0;
3632 const int try_palette
=
3633 cpi
->common
.allow_screen_content_tools
&& bsize
>= BLOCK_8X8
;
3634 #endif // CONFIG_PALETTE
3635 const MODE_INFO
*above_mi
= xd
->above_mi
;
3636 const MODE_INFO
*left_mi
= xd
->left_mi
;
3637 const PREDICTION_MODE A
= av1_above_block_mode(mic
, above_mi
, 0);
3638 const PREDICTION_MODE L
= av1_left_block_mode(mic
, left_mi
, 0);
3639 const PREDICTION_MODE FINAL_MODE_SEARCH
= TM_PRED
+ 1;
3641 od_rollback_buffer pre_buf
, post_buf
;
3643 od_encode_checkpoint(&x
->daala_enc
, &pre_buf
);
3644 od_encode_checkpoint(&x
->daala_enc
, &post_buf
);
3645 #endif // CONFIG_PVQ
3646 bmode_costs
= cpi
->y_mode_costs
[A
][L
];
3648 #if CONFIG_EXT_INTRA
3649 mbmi
->angle_delta
[0] = 0;
3650 #if CONFIG_HIGHBITDEPTH
3651 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
)
3652 highbd_angle_estimation(src
, src_stride
, rows
, cols
, bsize
,
3653 directional_mode_skip_mask
);
3655 #endif // CONFIG_HIGHBITDEPTH
3656 angle_estimation(src
, src_stride
, rows
, cols
, bsize
,
3657 directional_mode_skip_mask
);
3658 #endif // CONFIG_EXT_INTRA
3659 #if CONFIG_FILTER_INTRA
3660 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0] = 0;
3661 #endif // CONFIG_FILTER_INTRA
3663 pmi
->palette_size
[0] = 0;
3665 palette_y_mode_ctx
+=
3666 (above_mi
->mbmi
.palette_mode_info
.palette_size
[0] > 0);
3668 palette_y_mode_ctx
+= (left_mi
->mbmi
.palette_mode_info
.palette_size
[0] > 0);
3669 #endif // CONFIG_PALETTE
3671 if (cpi
->sf
.tx_type_search
.fast_intra_tx_type_search
)
3672 x
->use_default_intra_tx_type
= 1;
3674 x
->use_default_intra_tx_type
= 0;
3676 /* Y Search for intra prediction mode */
3677 for (mode_idx
= DC_PRED
; mode_idx
<= FINAL_MODE_SEARCH
; ++mode_idx
) {
3678 RD_STATS this_rd_stats
;
3679 int this_rate
, this_rate_tokenonly
, s
;
3680 int64_t this_distortion
, this_rd
, this_model_rd
;
3681 if (mode_idx
== FINAL_MODE_SEARCH
) {
3682 if (x
->use_default_intra_tx_type
== 0) break;
3683 mbmi
->mode
= best_mbmi
.mode
;
3684 x
->use_default_intra_tx_type
= 0;
3686 mbmi
->mode
= mode_idx
;
3689 od_encode_rollback(&x
->daala_enc
, &pre_buf
);
3690 #endif // CONFIG_PVQ
3691 #if CONFIG_EXT_INTRA
3692 mbmi
->angle_delta
[0] = 0;
3693 #endif // CONFIG_EXT_INTRA
3694 this_model_rd
= intra_model_yrd(cpi
, x
, bsize
, bmode_costs
[mbmi
->mode
]);
3695 if (best_model_rd
!= INT64_MAX
&&
3696 this_model_rd
> best_model_rd
+ (best_model_rd
>> 1))
3698 if (this_model_rd
< best_model_rd
) best_model_rd
= this_model_rd
;
3699 #if CONFIG_EXT_INTRA
3700 is_directional_mode
= av1_is_directional_mode(mbmi
->mode
, bsize
);
3701 if (is_directional_mode
&& directional_mode_skip_mask
[mbmi
->mode
]) continue;
3702 if (is_directional_mode
) {
3703 this_rd_stats
.rate
= INT_MAX
;
3704 rd_pick_intra_angle_sby(cpi
, x
, &this_rate
, &this_rd_stats
, bsize
,
3705 bmode_costs
[mbmi
->mode
], best_rd
, &best_model_rd
);
3707 super_block_yrd(cpi
, x
, &this_rd_stats
, bsize
, best_rd
);
3710 super_block_yrd(cpi
, x
, &this_rd_stats
, bsize
, best_rd
);
3711 #endif // CONFIG_EXT_INTRA
3712 this_rate_tokenonly
= this_rd_stats
.rate
;
3713 this_distortion
= this_rd_stats
.dist
;
3714 s
= this_rd_stats
.skip
;
3716 if (this_rate_tokenonly
== INT_MAX
) continue;
3718 this_rate
= this_rate_tokenonly
+ bmode_costs
[mbmi
->mode
];
3720 if (!xd
->lossless
[mbmi
->segment_id
] && mbmi
->sb_type
>= BLOCK_8X8
) {
3721 // super_block_yrd above includes the cost of the tx_size in the
3722 // tokenonly rate, but for intra blocks, tx_size is always coded
3723 // (prediction granularity), so we account for it in the full rate,
3724 // not the tokenonly rate.
3725 this_rate_tokenonly
-= tx_size_cost(cpi
, x
, bsize
, mbmi
->tx_size
);
3728 if (try_palette
&& mbmi
->mode
== DC_PRED
) {
3730 av1_cost_bit(av1_default_palette_y_mode_prob
[bsize
- BLOCK_8X8
]
3731 [palette_y_mode_ctx
],
3734 #endif // CONFIG_PALETTE
3735 #if CONFIG_FILTER_INTRA
3736 if (mbmi
->mode
== DC_PRED
)
3737 this_rate
+= av1_cost_bit(cpi
->common
.fc
->filter_intra_probs
[0], 0);
3738 #endif // CONFIG_FILTER_INTRA
3739 #if CONFIG_EXT_INTRA
3740 if (is_directional_mode
) {
3741 #if CONFIG_INTRA_INTERP
3743 mode_to_angle_map
[mbmi
->mode
] + mbmi
->angle_delta
[0] * ANGLE_STEP
;
3744 if (av1_is_intra_filter_switchable(p_angle
))
3746 cpi
->intra_filter_cost
[intra_filter_ctx
][mbmi
->intra_filter
];
3747 #endif // CONFIG_INTRA_INTERP
3748 this_rate
+= write_uniform_cost(2 * MAX_ANGLE_DELTA
+ 1,
3749 MAX_ANGLE_DELTA
+ mbmi
->angle_delta
[0]);
3751 #endif // CONFIG_EXT_INTRA
3752 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, this_rate
, this_distortion
);
3753 #if CONFIG_FILTER_INTRA
3754 if (best_rd
== INT64_MAX
|| this_rd
- best_rd
< (best_rd
>> 4)) {
3755 filter_intra_mode_skip_mask
^= (1 << mbmi
->mode
);
3757 #endif // CONFIG_FILTER_INTRA
3759 if (this_rd
< best_rd
) {
3762 #if CONFIG_FILTER_INTRA
3764 #endif // CONFIG_FILTER_INTRA
3766 *rate_tokenonly
= this_rate_tokenonly
;
3767 *distortion
= this_distortion
;
3770 od_encode_checkpoint(&x
->daala_enc
, &post_buf
);
3771 #endif // CONFIG_PVQ
3776 od_encode_rollback(&x
->daala_enc
, &post_buf
);
3777 #endif // CONFIG_PVQ
3780 // Perform one extra txfm_rd_in_plane() call, this time with the best value so
3781 // we can store reconstructed luma values
3782 RD_STATS this_rd_stats
;
3784 txfm_rd_in_plane(x
, cpi
, &this_rd_stats
, INT64_MAX
, 0, bsize
,
3785 mic
->mbmi
.tx_size
, cpi
->sf
.use_fast_coef_costing
);
3791 rd_pick_palette_intra_sby(cpi
, x
, bsize
, palette_y_mode_ctx
,
3792 bmode_costs
[DC_PRED
], &best_mbmi
,
3793 best_palette_color_map
, &best_rd
, &best_model_rd
,
3794 rate
, rate_tokenonly
, distortion
, skippable
);
3796 #endif // CONFIG_PALETTE
3798 #if CONFIG_FILTER_INTRA
3800 if (rd_pick_filter_intra_sby(cpi
, x
, rate
, rate_tokenonly
, distortion
,
3801 skippable
, bsize
, bmode_costs
[DC_PRED
],
3802 &best_rd
, &best_model_rd
,
3803 filter_intra_mode_skip_mask
)) {
3807 #endif // CONFIG_FILTER_INTRA
3813 // Return value 0: early termination triggered, no valid rd cost available;
3814 // 1: rd cost values are valid.
3815 static int super_block_uvrd(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
3816 RD_STATS
*rd_stats
, BLOCK_SIZE bsize
,
3817 int64_t ref_best_rd
) {
3818 MACROBLOCKD
*const xd
= &x
->e_mbd
;
3819 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
3820 const TX_SIZE uv_tx_size
= get_uv_tx_size(mbmi
, &xd
->plane
[1]);
3822 int is_cost_valid
= 1;
3823 av1_init_rd_stats(rd_stats
);
3825 if (ref_best_rd
< 0) is_cost_valid
= 0;
3827 #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
3828 if (x
->skip_chroma_rd
) return is_cost_valid
;
3830 bsize
= scale_chroma_bsize(bsize
, xd
->plane
[1].subsampling_x
,
3831 xd
->plane
[1].subsampling_y
);
3832 #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
3835 if (is_inter_block(mbmi
) && is_cost_valid
) {
3836 for (plane
= 1; plane
< MAX_MB_PLANE
; ++plane
)
3837 av1_subtract_plane(x
, bsize
, plane
);
3839 #endif // !CONFIG_PVQ
3841 if (is_cost_valid
) {
3842 for (plane
= 1; plane
< MAX_MB_PLANE
; ++plane
) {
3843 RD_STATS pn_rd_stats
;
3844 txfm_rd_in_plane(x
, cpi
, &pn_rd_stats
, ref_best_rd
, plane
, bsize
,
3845 uv_tx_size
, cpi
->sf
.use_fast_coef_costing
);
3846 if (pn_rd_stats
.rate
== INT_MAX
) {
3850 av1_merge_rd_stats(rd_stats
, &pn_rd_stats
);
3851 if (RDCOST(x
->rdmult
, x
->rddiv
, rd_stats
->rate
, rd_stats
->dist
) >
3853 RDCOST(x
->rdmult
, x
->rddiv
, 0, rd_stats
->sse
) > ref_best_rd
) {
3860 if (!is_cost_valid
) {
3862 av1_invalid_rd_stats(rd_stats
);
3865 return is_cost_valid
;
3869 // FIXME crop these calls
3870 static uint64_t sum_squares_2d(const int16_t *diff
, int diff_stride
,
3872 return aom_sum_squares_2d_i16(diff
, diff_stride
, tx_size_wide
[tx_size
],
3873 tx_size_high
[tx_size
]);
3876 void av1_tx_block_rd_b(const AV1_COMP
*cpi
, MACROBLOCK
*x
, TX_SIZE tx_size
,
3877 int blk_row
, int blk_col
, int plane
, int block
,
3878 int plane_bsize
, const ENTROPY_CONTEXT
*a
,
3879 const ENTROPY_CONTEXT
*l
, RD_STATS
*rd_stats
) {
3880 const AV1_COMMON
*const cm
= &cpi
->common
;
3881 MACROBLOCKD
*xd
= &x
->e_mbd
;
3882 const struct macroblock_plane
*const p
= &x
->plane
[plane
];
3883 struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
3885 tran_low_t
*const dqcoeff
= BLOCK_OFFSET(pd
->dqcoeff
, block
);
3886 PLANE_TYPE plane_type
= get_plane_type(plane
);
3887 TX_TYPE tx_type
= get_tx_type(plane_type
, xd
, block
, tx_size
);
3888 const SCAN_ORDER
*const scan_order
=
3889 get_scan(cm
, tx_size
, tx_type
, is_inter_block(&xd
->mi
[0]->mbmi
));
3890 BLOCK_SIZE txm_bsize
= txsize_to_bsize
[tx_size
];
3891 int bh
= block_size_high
[txm_bsize
];
3892 int bw
= block_size_wide
[txm_bsize
];
3893 int txb_h
= tx_size_high_unit
[tx_size
];
3894 int txb_w
= tx_size_wide_unit
[tx_size
];
3896 int src_stride
= p
->src
.stride
;
3898 &p
->src
.buf
[(blk_row
* src_stride
+ blk_col
) << tx_size_wide_log2
[0]];
3901 .buf
[(blk_row
* pd
->dst
.stride
+ blk_col
) << tx_size_wide_log2
[0]];
3902 #if CONFIG_HIGHBITDEPTH
3903 DECLARE_ALIGNED(16, uint16_t, rec_buffer16
[MAX_TX_SQUARE
]);
3904 uint8_t *rec_buffer
;
3906 DECLARE_ALIGNED(16, uint8_t, rec_buffer
[MAX_TX_SQUARE
]);
3907 #endif // CONFIG_HIGHBITDEPTH
3908 int max_blocks_high
= block_size_high
[plane_bsize
];
3909 int max_blocks_wide
= block_size_wide
[plane_bsize
];
3910 const int diff_stride
= max_blocks_wide
;
3911 const int16_t *diff
=
3912 &p
->src_diff
[(blk_row
* diff_stride
+ blk_col
) << tx_size_wide_log2
[0]];
3915 assert(tx_size
< TX_SIZES_ALL
);
3917 if (xd
->mb_to_bottom_edge
< 0)
3918 max_blocks_high
+= xd
->mb_to_bottom_edge
>> (3 + pd
->subsampling_y
);
3919 if (xd
->mb_to_right_edge
< 0)
3920 max_blocks_wide
+= xd
->mb_to_right_edge
>> (3 + pd
->subsampling_x
);
3922 max_blocks_high
>>= tx_size_wide_log2
[0];
3923 max_blocks_wide
>>= tx_size_wide_log2
[0];
3925 int coeff_ctx
= get_entropy_context(tx_size
, a
, l
);
3927 av1_xform_quant(cm
, x
, plane
, block
, blk_row
, blk_col
, plane_bsize
, tx_size
,
3928 coeff_ctx
, AV1_XFORM_QUANT_FP
);
3930 av1_optimize_b(cm
, x
, plane
, block
, tx_size
, coeff_ctx
);
3932 // TODO(any): Use av1_dist_block to compute distortion
3933 #if CONFIG_HIGHBITDEPTH
3934 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
3935 rec_buffer
= CONVERT_TO_BYTEPTR(rec_buffer16
);
3936 aom_highbd_convolve_copy(dst
, pd
->dst
.stride
, rec_buffer
, MAX_TX_SIZE
, NULL
,
3937 0, NULL
, 0, bw
, bh
, xd
->bd
);
3939 rec_buffer
= (uint8_t *)rec_buffer16
;
3940 aom_convolve_copy(dst
, pd
->dst
.stride
, rec_buffer
, MAX_TX_SIZE
, NULL
, 0,
3944 aom_convolve_copy(dst
, pd
->dst
.stride
, rec_buffer
, MAX_TX_SIZE
, NULL
, 0, NULL
,
3946 #endif // CONFIG_HIGHBITDEPTH
3948 if (blk_row
+ txb_h
> max_blocks_high
|| blk_col
+ txb_w
> max_blocks_wide
) {
3950 int blocks_height
= AOMMIN(txb_h
, max_blocks_high
- blk_row
);
3951 int blocks_width
= AOMMIN(txb_w
, max_blocks_wide
- blk_col
);
3953 for (idy
= 0; idy
< blocks_height
; ++idy
) {
3954 for (idx
= 0; idx
< blocks_width
; ++idx
) {
3956 diff
+ ((idy
* diff_stride
+ idx
) << tx_size_wide_log2
[0]);
3957 tmp
+= sum_squares_2d(d
, diff_stride
, 0);
3961 tmp
= sum_squares_2d(diff
, diff_stride
, tx_size
);
3964 #if CONFIG_HIGHBITDEPTH
3965 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
)
3966 tmp
= ROUND_POWER_OF_TWO(tmp
, (xd
->bd
- 8) * 2);
3967 #endif // CONFIG_HIGHBITDEPTH
3968 rd_stats
->sse
+= tmp
* 16;
3969 const int eob
= p
->eobs
[block
];
3971 av1_inverse_transform_block(xd
, dqcoeff
, tx_type
, tx_size
, rec_buffer
,
3974 if (txb_w
+ blk_col
> max_blocks_wide
||
3975 txb_h
+ blk_row
> max_blocks_high
) {
3977 unsigned int this_dist
;
3978 int blocks_height
= AOMMIN(txb_h
, max_blocks_high
- blk_row
);
3979 int blocks_width
= AOMMIN(txb_w
, max_blocks_wide
- blk_col
);
3981 for (idy
= 0; idy
< blocks_height
; ++idy
) {
3982 for (idx
= 0; idx
< blocks_width
; ++idx
) {
3984 src
+ ((idy
* src_stride
+ idx
) << tx_size_wide_log2
[0]);
3986 rec_buffer
+ ((idy
* MAX_TX_SIZE
+ idx
) << tx_size_wide_log2
[0]);
3987 cpi
->fn_ptr
[0].vf(s
, src_stride
, r
, MAX_TX_SIZE
, &this_dist
);
3993 cpi
->fn_ptr
[txm_bsize
].vf(src
, src_stride
, rec_buffer
, MAX_TX_SIZE
,
3998 rd_stats
->dist
+= tmp
* 16;
4000 av1_cost_coeffs(cpi
, x
, plane
, block
, tx_size
, scan_order
, a
, l
, 0);
4001 rd_stats
->rate
+= txb_coeff_cost
;
4002 rd_stats
->skip
&= (eob
== 0);
4005 av1_update_txb_coeff_cost(rd_stats
, plane
, tx_size
, blk_row
, blk_col
,
4007 #endif // CONFIG_RD_DEBUG
4010 static void select_tx_block(const AV1_COMP
*cpi
, MACROBLOCK
*x
, int blk_row
,
4011 int blk_col
, int plane
, int block
, int block32
,
4012 TX_SIZE tx_size
, int depth
, BLOCK_SIZE plane_bsize
,
4013 ENTROPY_CONTEXT
*ta
, ENTROPY_CONTEXT
*tl
,
4014 TXFM_CONTEXT
*tx_above
, TXFM_CONTEXT
*tx_left
,
4015 RD_STATS
*rd_stats
, int64_t ref_best_rd
,
4016 int *is_cost_valid
, RD_STATS
*rd_stats_stack
) {
4017 MACROBLOCKD
*const xd
= &x
->e_mbd
;
4018 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
4019 struct macroblock_plane
*const p
= &x
->plane
[plane
];
4020 struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
4021 const int tx_row
= blk_row
>> (1 - pd
->subsampling_y
);
4022 const int tx_col
= blk_col
>> (1 - pd
->subsampling_x
);
4023 TX_SIZE(*const inter_tx_size
)
4025 (TX_SIZE(*)[MAX_MIB_SIZE
]) & mbmi
->inter_tx_size
[tx_row
][tx_col
];
4026 const int max_blocks_high
= max_block_high(xd
, plane_bsize
, plane
);
4027 const int max_blocks_wide
= max_block_wide(xd
, plane_bsize
, plane
);
4028 const int bw
= block_size_wide
[plane_bsize
] >> tx_size_wide_log2
[0];
4029 int64_t this_rd
= INT64_MAX
;
4030 ENTROPY_CONTEXT
*pta
= ta
+ blk_col
;
4031 ENTROPY_CONTEXT
*ptl
= tl
+ blk_row
;
4033 int ctx
= txfm_partition_context(tx_above
+ blk_col
, tx_left
+ blk_row
,
4034 mbmi
->sb_type
, tx_size
);
4035 int64_t sum_rd
= INT64_MAX
;
4038 RD_STATS sum_rd_stats
;
4039 const int tx_size_ctx
= txsize_sqr_map
[tx_size
];
4041 av1_init_rd_stats(&sum_rd_stats
);
4043 assert(tx_size
< TX_SIZES_ALL
);
4045 if (ref_best_rd
< 0) {
4050 coeff_ctx
= get_entropy_context(tx_size
, pta
, ptl
);
4052 av1_init_rd_stats(rd_stats
);
4054 if (blk_row
>= max_blocks_high
|| blk_col
>= max_blocks_wide
) return;
4056 zero_blk_rate
= x
->token_costs
[tx_size_ctx
][pd
->plane_type
][1][0][0]
4057 [coeff_ctx
][EOB_TOKEN
];
4059 if (cpi
->common
.tx_mode
== TX_MODE_SELECT
|| tx_size
== TX_4X4
) {
4060 inter_tx_size
[0][0] = tx_size
;
4062 if (tx_size
== TX_32X32
&& mbmi
->tx_type
!= DCT_DCT
&&
4063 rd_stats_stack
[block32
].rate
!= INT_MAX
) {
4064 *rd_stats
= rd_stats_stack
[block32
];
4065 p
->eobs
[block
] = !rd_stats
->skip
;
4066 x
->blk_skip
[plane
][blk_row
* bw
+ blk_col
] = rd_stats
->skip
;
4068 av1_tx_block_rd_b(cpi
, x
, tx_size
, blk_row
, blk_col
, plane
, block
,
4069 plane_bsize
, pta
, ptl
, rd_stats
);
4070 if (tx_size
== TX_32X32
) {
4071 rd_stats_stack
[block32
] = *rd_stats
;
4075 if ((RDCOST(x
->rdmult
, x
->rddiv
, rd_stats
->rate
, rd_stats
->dist
) >=
4076 RDCOST(x
->rdmult
, x
->rddiv
, zero_blk_rate
, rd_stats
->sse
) ||
4077 rd_stats
->skip
== 1) &&
4078 !xd
->lossless
[mbmi
->segment_id
]) {
4080 av1_update_txb_coeff_cost(rd_stats
, plane
, tx_size
, blk_row
, blk_col
,
4081 zero_blk_rate
- rd_stats
->rate
);
4082 #endif // CONFIG_RD_DEBUG
4083 rd_stats
->rate
= zero_blk_rate
;
4084 rd_stats
->dist
= rd_stats
->sse
;
4086 x
->blk_skip
[plane
][blk_row
* bw
+ blk_col
] = 1;
4089 x
->blk_skip
[plane
][blk_row
* bw
+ blk_col
] = 0;
4093 if (tx_size
> TX_4X4
&& depth
< MAX_VARTX_DEPTH
)
4095 av1_cost_bit(cpi
->common
.fc
->txfm_partition_prob
[ctx
], 0);
4096 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, rd_stats
->rate
, rd_stats
->dist
);
4097 tmp_eob
= p
->eobs
[block
];
4100 if (tx_size
> TX_4X4
&& depth
< MAX_VARTX_DEPTH
) {
4101 const TX_SIZE sub_txs
= sub_tx_size_map
[tx_size
];
4102 const int bsl
= tx_size_wide_unit
[sub_txs
];
4103 int sub_step
= tx_size_wide_unit
[sub_txs
] * tx_size_high_unit
[sub_txs
];
4104 RD_STATS this_rd_stats
;
4105 int this_cost_valid
= 1;
4109 av1_cost_bit(cpi
->common
.fc
->txfm_partition_prob
[ctx
], 1);
4111 assert(tx_size
< TX_SIZES_ALL
);
4113 for (i
= 0; i
< 4 && this_cost_valid
; ++i
) {
4114 int offsetr
= blk_row
+ (i
>> 1) * bsl
;
4115 int offsetc
= blk_col
+ (i
& 0x01) * bsl
;
4117 if (offsetr
>= max_blocks_high
|| offsetc
>= max_blocks_wide
) continue;
4119 select_tx_block(cpi
, x
, offsetr
, offsetc
, plane
, block
, block32
, sub_txs
,
4120 depth
+ 1, plane_bsize
, ta
, tl
, tx_above
, tx_left
,
4121 &this_rd_stats
, ref_best_rd
- tmp_rd
, &this_cost_valid
,
4124 av1_merge_rd_stats(&sum_rd_stats
, &this_rd_stats
);
4127 RDCOST(x
->rdmult
, x
->rddiv
, sum_rd_stats
.rate
, sum_rd_stats
.dist
);
4128 if (this_rd
< tmp_rd
) break;
4131 if (this_cost_valid
) sum_rd
= tmp_rd
;
4134 if (this_rd
< sum_rd
) {
4136 for (i
= 0; i
< tx_size_wide_unit
[tx_size
]; ++i
) pta
[i
] = !(tmp_eob
== 0);
4137 for (i
= 0; i
< tx_size_high_unit
[tx_size
]; ++i
) ptl
[i
] = !(tmp_eob
== 0);
4138 txfm_partition_update(tx_above
+ blk_col
, tx_left
+ blk_row
, tx_size
,
4140 inter_tx_size
[0][0] = tx_size
;
4141 for (idy
= 0; idy
< tx_size_high_unit
[tx_size
] / 2; ++idy
)
4142 for (idx
= 0; idx
< tx_size_wide_unit
[tx_size
] / 2; ++idx
)
4143 inter_tx_size
[idy
][idx
] = tx_size
;
4144 mbmi
->tx_size
= tx_size
;
4145 if (this_rd
== INT64_MAX
) *is_cost_valid
= 0;
4146 x
->blk_skip
[plane
][blk_row
* bw
+ blk_col
] = rd_stats
->skip
;
4148 *rd_stats
= sum_rd_stats
;
4149 if (sum_rd
== INT64_MAX
) *is_cost_valid
= 0;
4153 static void inter_block_yrd(const AV1_COMP
*cpi
, MACROBLOCK
*x
,
4154 RD_STATS
*rd_stats
, BLOCK_SIZE bsize
,
4155 int64_t ref_best_rd
, RD_STATS
*rd_stats_stack
) {
4156 MACROBLOCKD
*const xd
= &x
->e_mbd
;
4157 int is_cost_valid
= 1;
4158 int64_t this_rd
= 0;
4160 if (ref_best_rd
< 0) is_cost_valid
= 0;
4162 av1_init_rd_stats(rd_stats
);
4164 if (is_cost_valid
) {
4165 const struct macroblockd_plane
*const pd
= &xd
->plane
[0];
4166 const BLOCK_SIZE plane_bsize
= get_plane_block_size(bsize
, pd
);
4167 const int mi_width
= block_size_wide
[plane_bsize
] >> tx_size_wide_log2
[0];
4168 const int mi_height
= block_size_high
[plane_bsize
] >> tx_size_high_log2
[0];
4169 const TX_SIZE max_tx_size
= max_txsize_rect_lookup
[plane_bsize
];
4170 const int bh
= tx_size_high_unit
[max_tx_size
];
4171 const int bw
= tx_size_wide_unit
[max_tx_size
];
4175 int step
= tx_size_wide_unit
[max_tx_size
] * tx_size_high_unit
[max_tx_size
];
4176 ENTROPY_CONTEXT ctxa
[2 * MAX_MIB_SIZE
];
4177 ENTROPY_CONTEXT ctxl
[2 * MAX_MIB_SIZE
];
4178 TXFM_CONTEXT tx_above
[MAX_MIB_SIZE
* 2];
4179 TXFM_CONTEXT tx_left
[MAX_MIB_SIZE
* 2];
4181 RD_STATS pn_rd_stats
;
4182 av1_init_rd_stats(&pn_rd_stats
);
4184 av1_get_entropy_contexts(bsize
, 0, pd
, ctxa
, ctxl
);
4185 memcpy(tx_above
, xd
->above_txfm_context
, sizeof(TXFM_CONTEXT
) * mi_width
);
4186 memcpy(tx_left
, xd
->left_txfm_context
, sizeof(TXFM_CONTEXT
) * mi_height
);
4188 for (idy
= 0; idy
< mi_height
; idy
+= bh
) {
4189 for (idx
= 0; idx
< mi_width
; idx
+= bw
) {
4190 select_tx_block(cpi
, x
, idy
, idx
, 0, block
, block32
, max_tx_size
,
4191 mi_height
!= mi_width
, plane_bsize
, ctxa
, ctxl
,
4192 tx_above
, tx_left
, &pn_rd_stats
, ref_best_rd
- this_rd
,
4193 &is_cost_valid
, rd_stats_stack
);
4194 av1_merge_rd_stats(rd_stats
, &pn_rd_stats
);
4196 RDCOST(x
->rdmult
, x
->rddiv
, pn_rd_stats
.rate
, pn_rd_stats
.dist
),
4197 RDCOST(x
->rdmult
, x
->rddiv
, 0, pn_rd_stats
.sse
));
4204 this_rd
= AOMMIN(RDCOST(x
->rdmult
, x
->rddiv
, rd_stats
->rate
, rd_stats
->dist
),
4205 RDCOST(x
->rdmult
, x
->rddiv
, 0, rd_stats
->sse
));
4206 if (this_rd
> ref_best_rd
) is_cost_valid
= 0;
4208 if (!is_cost_valid
) {
4210 av1_invalid_rd_stats(rd_stats
);
4214 static int64_t select_tx_size_fix_type(const AV1_COMP
*cpi
, MACROBLOCK
*x
,
4215 RD_STATS
*rd_stats
, BLOCK_SIZE bsize
,
4216 int64_t ref_best_rd
, TX_TYPE tx_type
,
4217 RD_STATS
*rd_stats_stack
) {
4218 const AV1_COMMON
*const cm
= &cpi
->common
;
4219 MACROBLOCKD
*const xd
= &x
->e_mbd
;
4220 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
4221 const int is_inter
= is_inter_block(mbmi
);
4222 aom_prob skip_prob
= av1_get_skip_prob(cm
, xd
);
4223 int s0
= av1_cost_bit(skip_prob
, 0);
4224 int s1
= av1_cost_bit(skip_prob
, 1);
4227 const int max_blocks_high
= max_block_high(xd
, bsize
, 0);
4228 const int max_blocks_wide
= max_block_wide(xd
, bsize
, 0);
4230 mbmi
->tx_type
= tx_type
;
4231 inter_block_yrd(cpi
, x
, rd_stats
, bsize
, ref_best_rd
, rd_stats_stack
);
4232 mbmi
->min_tx_size
= get_min_tx_size(mbmi
->inter_tx_size
[0][0]);
4234 if (rd_stats
->rate
== INT_MAX
) return INT64_MAX
;
4236 for (row
= 0; row
< max_blocks_high
/ 2; ++row
)
4237 for (col
= 0; col
< max_blocks_wide
/ 2; ++col
)
4238 mbmi
->min_tx_size
= AOMMIN(
4239 mbmi
->min_tx_size
, get_min_tx_size(mbmi
->inter_tx_size
[row
][col
]));
4242 if (get_ext_tx_types(mbmi
->min_tx_size
, bsize
, is_inter
,
4243 cm
->reduced_tx_set_used
) > 1 &&
4244 !xd
->lossless
[xd
->mi
[0]->mbmi
.segment_id
]) {
4245 const int ext_tx_set
= get_ext_tx_set(mbmi
->min_tx_size
, bsize
, is_inter
,
4246 cm
->reduced_tx_set_used
);
4250 cpi
->inter_tx_type_costs
[ext_tx_set
]
4251 [txsize_sqr_map
[mbmi
->min_tx_size
]]
4254 if (ext_tx_set
> 0 && ALLOW_INTRA_EXT_TX
)
4256 cpi
->intra_tx_type_costs
[ext_tx_set
][mbmi
->min_tx_size
][mbmi
->mode
]
4260 #else // CONFIG_EXT_TX
4261 if (mbmi
->min_tx_size
< TX_32X32
&& !xd
->lossless
[xd
->mi
[0]->mbmi
.segment_id
])
4263 cpi
->inter_tx_type_costs
[mbmi
->min_tx_size
][mbmi
->tx_type
];
4264 #endif // CONFIG_EXT_TX
4267 rd
= RDCOST(x
->rdmult
, x
->rddiv
, s1
, rd_stats
->sse
);
4269 rd
= RDCOST(x
->rdmult
, x
->rddiv
, rd_stats
->rate
+ s0
, rd_stats
->dist
);
4271 if (is_inter
&& !xd
->lossless
[xd
->mi
[0]->mbmi
.segment_id
] &&
4273 rd
= AOMMIN(rd
, RDCOST(x
->rdmult
, x
->rddiv
, s1
, rd_stats
->sse
));
4278 static void select_tx_type_yrd(const AV1_COMP
*cpi
, MACROBLOCK
*x
,
4279 RD_STATS
*rd_stats
, BLOCK_SIZE bsize
,
4280 int64_t ref_best_rd
) {
4281 const AV1_COMMON
*cm
= &cpi
->common
;
4282 const TX_SIZE max_tx_size
= max_txsize_lookup
[bsize
];
4283 MACROBLOCKD
*const xd
= &x
->e_mbd
;
4284 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
4285 int64_t rd
= INT64_MAX
;
4286 int64_t best_rd
= INT64_MAX
;
4287 TX_TYPE tx_type
, best_tx_type
= DCT_DCT
;
4288 const int is_inter
= is_inter_block(mbmi
);
4289 TX_SIZE best_tx_size
[MAX_MIB_SIZE
][MAX_MIB_SIZE
];
4290 TX_SIZE best_tx
= max_txsize_lookup
[bsize
];
4291 TX_SIZE best_min_tx_size
= TX_SIZES_ALL
;
4292 uint8_t best_blk_skip
[MAX_MIB_SIZE
* MAX_MIB_SIZE
* 8];
4293 const int n4
= bsize_to_num_blk(bsize
);
4297 1 << (2 * (cm
->mib_size_log2
- mi_width_log2_lookup
[BLOCK_32X32
]));
4298 #if CONFIG_EXT_PARTITION
4299 RD_STATS rd_stats_stack
[16];
4301 RD_STATS rd_stats_stack
[4];
4302 #endif // CONFIG_EXT_PARTITION
4304 const int ext_tx_set
=
4305 get_ext_tx_set(max_tx_size
, bsize
, is_inter
, cm
->reduced_tx_set_used
);
4306 #endif // CONFIG_EXT_TX
4308 if (is_inter
&& cpi
->sf
.tx_type_search
.prune_mode
> NO_PRUNE
)
4310 prune
= prune_tx_types(cpi
, bsize
, x
, xd
, ext_tx_set
);
4312 prune
= prune_tx_types(cpi
, bsize
, x
, xd
, 0);
4313 #endif // CONFIG_EXT_TX
4315 av1_invalid_rd_stats(rd_stats
);
4317 for (idx
= 0; idx
< count32
; ++idx
)
4318 av1_invalid_rd_stats(&rd_stats_stack
[idx
]);
4320 for (tx_type
= DCT_DCT
; tx_type
< TX_TYPES
; ++tx_type
) {
4321 RD_STATS this_rd_stats
;
4322 av1_init_rd_stats(&this_rd_stats
);
4325 if (!ext_tx_used_inter
[ext_tx_set
][tx_type
]) continue;
4326 if (cpi
->sf
.tx_type_search
.prune_mode
> NO_PRUNE
) {
4327 if (!do_tx_type_search(tx_type
, prune
)) continue;
4330 if (!ALLOW_INTRA_EXT_TX
&& bsize
>= BLOCK_8X8
) {
4331 if (tx_type
!= intra_mode_to_tx_type_context
[mbmi
->mode
]) continue;
4333 if (!ext_tx_used_intra
[ext_tx_set
][tx_type
]) continue;
4335 #else // CONFIG_EXT_TX
4336 if (is_inter
&& cpi
->sf
.tx_type_search
.prune_mode
> NO_PRUNE
&&
4337 !do_tx_type_search(tx_type
, prune
))
4339 #endif // CONFIG_EXT_TX
4340 if (is_inter
&& x
->use_default_inter_tx_type
&&
4341 tx_type
!= get_default_tx_type(0, xd
, 0, max_tx_size
))
4344 if (xd
->lossless
[mbmi
->segment_id
])
4345 if (tx_type
!= DCT_DCT
) continue;
4347 rd
= select_tx_size_fix_type(cpi
, x
, &this_rd_stats
, bsize
, ref_best_rd
,
4348 tx_type
, rd_stats_stack
);
4352 *rd_stats
= this_rd_stats
;
4353 best_tx_type
= mbmi
->tx_type
;
4354 best_tx
= mbmi
->tx_size
;
4355 best_min_tx_size
= mbmi
->min_tx_size
;
4356 memcpy(best_blk_skip
, x
->blk_skip
[0], sizeof(best_blk_skip
[0]) * n4
);
4357 for (idy
= 0; idy
< xd
->n8_h
; ++idy
)
4358 for (idx
= 0; idx
< xd
->n8_w
; ++idx
)
4359 best_tx_size
[idy
][idx
] = mbmi
->inter_tx_size
[idy
][idx
];
4363 mbmi
->tx_type
= best_tx_type
;
4364 for (idy
= 0; idy
< xd
->n8_h
; ++idy
)
4365 for (idx
= 0; idx
< xd
->n8_w
; ++idx
)
4366 mbmi
->inter_tx_size
[idy
][idx
] = best_tx_size
[idy
][idx
];
4367 mbmi
->tx_size
= best_tx
;
4368 mbmi
->min_tx_size
= best_min_tx_size
;
4369 memcpy(x
->blk_skip
[0], best_blk_skip
, sizeof(best_blk_skip
[0]) * n4
);
4372 static void tx_block_rd(const AV1_COMP
*cpi
, MACROBLOCK
*x
, int blk_row
,
4373 int blk_col
, int plane
, int block
, TX_SIZE tx_size
,
4374 BLOCK_SIZE plane_bsize
, ENTROPY_CONTEXT
*above_ctx
,
4375 ENTROPY_CONTEXT
*left_ctx
, RD_STATS
*rd_stats
) {
4376 MACROBLOCKD
*const xd
= &x
->e_mbd
;
4377 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
4378 struct macroblock_plane
*const p
= &x
->plane
[plane
];
4379 struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
4380 BLOCK_SIZE bsize
= txsize_to_bsize
[tx_size
];
4381 const int tx_row
= blk_row
>> (1 - pd
->subsampling_y
);
4382 const int tx_col
= blk_col
>> (1 - pd
->subsampling_x
);
4383 TX_SIZE plane_tx_size
;
4384 const int max_blocks_high
= max_block_high(xd
, plane_bsize
, plane
);
4385 const int max_blocks_wide
= max_block_wide(xd
, plane_bsize
, plane
);
4387 assert(tx_size
< TX_SIZES_ALL
);
4389 if (blk_row
>= max_blocks_high
|| blk_col
>= max_blocks_wide
) return;
4392 plane
? uv_txsize_lookup
[bsize
][mbmi
->inter_tx_size
[tx_row
][tx_col
]][0][0]
4393 : mbmi
->inter_tx_size
[tx_row
][tx_col
];
4395 if (tx_size
== plane_tx_size
) {
4397 ENTROPY_CONTEXT
*ta
= above_ctx
+ blk_col
;
4398 ENTROPY_CONTEXT
*tl
= left_ctx
+ blk_row
;
4399 av1_tx_block_rd_b(cpi
, x
, tx_size
, blk_row
, blk_col
, plane
, block
,
4400 plane_bsize
, ta
, tl
, rd_stats
);
4402 for (i
= 0; i
< tx_size_wide_unit
[tx_size
]; ++i
)
4403 ta
[i
] = !(p
->eobs
[block
] == 0);
4404 for (i
= 0; i
< tx_size_high_unit
[tx_size
]; ++i
)
4405 tl
[i
] = !(p
->eobs
[block
] == 0);
4407 const TX_SIZE sub_txs
= sub_tx_size_map
[tx_size
];
4408 const int bsl
= tx_size_wide_unit
[sub_txs
];
4409 int step
= tx_size_wide_unit
[sub_txs
] * tx_size_high_unit
[sub_txs
];
4414 for (i
= 0; i
< 4; ++i
) {
4415 int offsetr
= blk_row
+ (i
>> 1) * bsl
;
4416 int offsetc
= blk_col
+ (i
& 0x01) * bsl
;
4418 if (offsetr
>= max_blocks_high
|| offsetc
>= max_blocks_wide
) continue;
4420 tx_block_rd(cpi
, x
, offsetr
, offsetc
, plane
, block
, sub_txs
, plane_bsize
,
4421 above_ctx
, left_ctx
, rd_stats
);
4427 // Return value 0: early termination triggered, no valid rd cost available;
4428 // 1: rd cost values are valid.
4429 static int inter_block_uvrd(const AV1_COMP
*cpi
, MACROBLOCK
*x
,
4430 RD_STATS
*rd_stats
, BLOCK_SIZE bsize
,
4431 int64_t ref_best_rd
) {
4432 MACROBLOCKD
*const xd
= &x
->e_mbd
;
4433 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
4435 int is_cost_valid
= 1;
4438 if (ref_best_rd
< 0) is_cost_valid
= 0;
4440 av1_init_rd_stats(rd_stats
);
4442 #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
4443 if (x
->skip_chroma_rd
) return is_cost_valid
;
4444 bsize
= scale_chroma_bsize(mbmi
->sb_type
, xd
->plane
[1].subsampling_x
,
4445 xd
->plane
[1].subsampling_y
);
4446 #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
4448 #if CONFIG_EXT_TX && CONFIG_RECT_TX
4449 if (is_rect_tx(mbmi
->tx_size
)) {
4450 return super_block_uvrd(cpi
, x
, rd_stats
, bsize
, ref_best_rd
);
4452 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
4454 if (is_inter_block(mbmi
) && is_cost_valid
) {
4455 for (plane
= 1; plane
< MAX_MB_PLANE
; ++plane
)
4456 av1_subtract_plane(x
, bsize
, plane
);
4459 for (plane
= 1; plane
< MAX_MB_PLANE
; ++plane
) {
4460 const struct macroblockd_plane
*const pd
= &xd
->plane
[plane
];
4461 const BLOCK_SIZE plane_bsize
= get_plane_block_size(bsize
, pd
);
4462 const int mi_width
= block_size_wide
[plane_bsize
] >> tx_size_wide_log2
[0];
4463 const int mi_height
= block_size_high
[plane_bsize
] >> tx_size_high_log2
[0];
4464 const TX_SIZE max_tx_size
= max_txsize_rect_lookup
[plane_bsize
];
4465 const int bh
= tx_size_high_unit
[max_tx_size
];
4466 const int bw
= tx_size_wide_unit
[max_tx_size
];
4469 const int step
= bh
* bw
;
4470 ENTROPY_CONTEXT ta
[2 * MAX_MIB_SIZE
];
4471 ENTROPY_CONTEXT tl
[2 * MAX_MIB_SIZE
];
4472 RD_STATS pn_rd_stats
;
4473 av1_init_rd_stats(&pn_rd_stats
);
4475 av1_get_entropy_contexts(bsize
, 0, pd
, ta
, tl
);
4477 for (idy
= 0; idy
< mi_height
; idy
+= bh
) {
4478 for (idx
= 0; idx
< mi_width
; idx
+= bw
) {
4479 tx_block_rd(cpi
, x
, idy
, idx
, plane
, block
, max_tx_size
, plane_bsize
,
4480 ta
, tl
, &pn_rd_stats
);
4485 if (pn_rd_stats
.rate
== INT_MAX
) {
4490 av1_merge_rd_stats(rd_stats
, &pn_rd_stats
);
4493 AOMMIN(RDCOST(x
->rdmult
, x
->rddiv
, rd_stats
->rate
, rd_stats
->dist
),
4494 RDCOST(x
->rdmult
, x
->rddiv
, 0, rd_stats
->sse
));
4496 if (this_rd
> ref_best_rd
) {
4502 if (!is_cost_valid
) {
4504 av1_invalid_rd_stats(rd_stats
);
4507 return is_cost_valid
;
4509 #endif // CONFIG_VAR_TX
4512 static void rd_pick_palette_intra_sbuv(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
4514 uint8_t *best_palette_color_map
,
4515 MB_MODE_INFO
*const best_mbmi
,
4516 int64_t *best_rd
, int *rate
,
4517 int *rate_tokenonly
, int64_t *distortion
,
4519 MACROBLOCKD
*const xd
= &x
->e_mbd
;
4520 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
4521 assert(!is_inter_block(mbmi
));
4522 PALETTE_MODE_INFO
*const pmi
= &mbmi
->palette_mode_info
;
4523 const BLOCK_SIZE bsize
= mbmi
->sb_type
;
4526 int colors_u
, colors_v
, colors
;
4527 const int src_stride
= x
->plane
[1].src
.stride
;
4528 const uint8_t *const src_u
= x
->plane
[1].src
.buf
;
4529 const uint8_t *const src_v
= x
->plane
[2].src
.buf
;
4530 uint8_t *const color_map
= xd
->plane
[1].color_index_map
;
4531 RD_STATS tokenonly_rd_stats
;
4532 int plane_block_width
, plane_block_height
, rows
, cols
;
4533 av1_get_block_dimensions(bsize
, 1, xd
, &plane_block_width
,
4534 &plane_block_height
, &rows
, &cols
);
4535 if (rows
* cols
> PALETTE_MAX_BLOCK_SIZE
) return;
4537 mbmi
->uv_mode
= DC_PRED
;
4538 #if CONFIG_FILTER_INTRA
4539 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1] = 0;
4540 #endif // CONFIG_FILTER_INTRA
4542 #if CONFIG_HIGHBITDEPTH
4543 if (cpi
->common
.use_highbitdepth
) {
4544 colors_u
= av1_count_colors_highbd(src_u
, src_stride
, rows
, cols
,
4545 cpi
->common
.bit_depth
);
4546 colors_v
= av1_count_colors_highbd(src_v
, src_stride
, rows
, cols
,
4547 cpi
->common
.bit_depth
);
4549 #endif // CONFIG_HIGHBITDEPTH
4550 colors_u
= av1_count_colors(src_u
, src_stride
, rows
, cols
);
4551 colors_v
= av1_count_colors(src_v
, src_stride
, rows
, cols
);
4552 #if CONFIG_HIGHBITDEPTH
4554 #endif // CONFIG_HIGHBITDEPTH
4556 #if CONFIG_PALETTE_DELTA_ENCODING
4557 const MODE_INFO
*above_mi
= xd
->above_mi
;
4558 const MODE_INFO
*left_mi
= xd
->left_mi
;
4559 uint16_t color_cache
[2 * PALETTE_MAX_SIZE
];
4560 const int n_cache
= av1_get_palette_cache(above_mi
, left_mi
, 1, color_cache
);
4561 #endif // CONFIG_PALETTE_DELTA_ENCODING
4563 colors
= colors_u
> colors_v
? colors_u
: colors_v
;
4564 if (colors
> 1 && colors
<= 64) {
4566 const int max_itr
= 50;
4567 uint8_t color_order
[PALETTE_MAX_SIZE
];
4568 float lb_u
, ub_u
, val_u
;
4569 float lb_v
, ub_v
, val_v
;
4570 float *const data
= x
->palette_buffer
->kmeans_data_buf
;
4571 float centroids
[2 * PALETTE_MAX_SIZE
];
4573 #if CONFIG_HIGHBITDEPTH
4574 uint16_t *src_u16
= CONVERT_TO_SHORTPTR(src_u
);
4575 uint16_t *src_v16
= CONVERT_TO_SHORTPTR(src_v
);
4576 if (cpi
->common
.use_highbitdepth
) {
4582 #endif // CONFIG_HIGHBITDEPTH
4587 #if CONFIG_HIGHBITDEPTH
4589 #endif // CONFIG_HIGHBITDEPTH
4591 for (r
= 0; r
< rows
; ++r
) {
4592 for (c
= 0; c
< cols
; ++c
) {
4593 #if CONFIG_HIGHBITDEPTH
4594 if (cpi
->common
.use_highbitdepth
) {
4595 val_u
= src_u16
[r
* src_stride
+ c
];
4596 val_v
= src_v16
[r
* src_stride
+ c
];
4597 data
[(r
* cols
+ c
) * 2] = val_u
;
4598 data
[(r
* cols
+ c
) * 2 + 1] = val_v
;
4600 #endif // CONFIG_HIGHBITDEPTH
4601 val_u
= src_u
[r
* src_stride
+ c
];
4602 val_v
= src_v
[r
* src_stride
+ c
];
4603 data
[(r
* cols
+ c
) * 2] = val_u
;
4604 data
[(r
* cols
+ c
) * 2 + 1] = val_v
;
4605 #if CONFIG_HIGHBITDEPTH
4607 #endif // CONFIG_HIGHBITDEPTH
4610 else if (val_u
> ub_u
)
4614 else if (val_v
> ub_v
)
4619 for (n
= colors
> PALETTE_MAX_SIZE
? PALETTE_MAX_SIZE
: colors
; n
>= 2;
4621 for (i
= 0; i
< n
; ++i
) {
4622 centroids
[i
* 2] = lb_u
+ (2 * i
+ 1) * (ub_u
- lb_u
) / n
/ 2;
4623 centroids
[i
* 2 + 1] = lb_v
+ (2 * i
+ 1) * (ub_v
- lb_v
) / n
/ 2;
4625 av1_k_means(data
, centroids
, color_map
, rows
* cols
, n
, 2, max_itr
);
4626 #if CONFIG_PALETTE_DELTA_ENCODING
4627 optimize_palette_colors(color_cache
, n_cache
, n
, 2, centroids
);
4628 // Sort the U channel colors in ascending order.
4629 for (i
= 0; i
< 2 * (n
- 1); i
+= 2) {
4631 float min_val
= centroids
[i
];
4632 for (j
= i
+ 2; j
< 2 * n
; j
+= 2)
4633 if (centroids
[j
] < min_val
) min_val
= centroids
[j
], min_idx
= j
;
4635 float temp_u
= centroids
[i
], temp_v
= centroids
[i
+ 1];
4636 centroids
[i
] = centroids
[min_idx
];
4637 centroids
[i
+ 1] = centroids
[min_idx
+ 1];
4638 centroids
[min_idx
] = temp_u
, centroids
[min_idx
+ 1] = temp_v
;
4641 av1_calc_indices(data
, centroids
, color_map
, rows
* cols
, n
, 2);
4642 #endif // CONFIG_PALETTE_DELTA_ENCODING
4643 extend_palette_color_map(color_map
, cols
, rows
, plane_block_width
,
4644 plane_block_height
);
4645 pmi
->palette_size
[1] = n
;
4646 for (i
= 1; i
< 3; ++i
) {
4647 for (j
= 0; j
< n
; ++j
) {
4648 #if CONFIG_HIGHBITDEPTH
4649 if (cpi
->common
.use_highbitdepth
)
4650 pmi
->palette_colors
[i
* PALETTE_MAX_SIZE
+ j
] = clip_pixel_highbd(
4651 (int)centroids
[j
* 2 + i
- 1], cpi
->common
.bit_depth
);
4653 #endif // CONFIG_HIGHBITDEPTH
4654 pmi
->palette_colors
[i
* PALETTE_MAX_SIZE
+ j
] =
4655 clip_pixel((int)centroids
[j
* 2 + i
- 1]);
4659 super_block_uvrd(cpi
, x
, &tokenonly_rd_stats
, bsize
, *best_rd
);
4660 if (tokenonly_rd_stats
.rate
== INT_MAX
) continue;
4662 tokenonly_rd_stats
.rate
+ dc_mode_cost
+
4663 cpi
->palette_uv_size_cost
[bsize
- BLOCK_8X8
][n
- PALETTE_MIN_SIZE
] +
4664 write_uniform_cost(n
, color_map
[0]) +
4666 av1_default_palette_uv_mode_prob
[pmi
->palette_size
[0] > 0], 1);
4667 this_rate
+= av1_palette_color_cost_uv(pmi
,
4668 #if CONFIG_PALETTE_DELTA_ENCODING
4669 color_cache
, n_cache
,
4670 #endif // CONFIG_PALETTE_DELTA_ENCODING
4671 cpi
->common
.bit_depth
);
4672 for (i
= 0; i
< rows
; ++i
) {
4673 for (j
= (i
== 0 ? 1 : 0); j
< cols
; ++j
) {
4675 const int color_ctx
= av1_get_palette_color_index_context(
4676 color_map
, plane_block_width
, i
, j
, n
, color_order
, &color_idx
);
4677 assert(color_idx
>= 0 && color_idx
< n
);
4678 this_rate
+= cpi
->palette_uv_color_cost
[n
- PALETTE_MIN_SIZE
]
4679 [color_ctx
][color_idx
];
4683 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, this_rate
, tokenonly_rd_stats
.dist
);
4684 if (this_rd
< *best_rd
) {
4687 memcpy(best_palette_color_map
, color_map
,
4688 plane_block_width
* plane_block_height
*
4689 sizeof(best_palette_color_map
[0]));
4691 *distortion
= tokenonly_rd_stats
.dist
;
4692 *rate_tokenonly
= tokenonly_rd_stats
.rate
;
4693 *skippable
= tokenonly_rd_stats
.skip
;
4697 if (best_mbmi
->palette_mode_info
.palette_size
[1] > 0) {
4698 memcpy(color_map
, best_palette_color_map
,
4699 rows
* cols
* sizeof(best_palette_color_map
[0]));
4702 #endif // CONFIG_PALETTE
4704 #if CONFIG_FILTER_INTRA
4705 // Return 1 if an filter intra mode is selected; return 0 otherwise.
4706 static int rd_pick_filter_intra_sbuv(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
4707 int *rate
, int *rate_tokenonly
,
4708 int64_t *distortion
, int *skippable
,
4709 BLOCK_SIZE bsize
, int64_t *best_rd
) {
4710 MACROBLOCKD
*const xd
= &x
->e_mbd
;
4711 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
4712 int filter_intra_selected_flag
= 0;
4715 FILTER_INTRA_MODE mode
;
4716 FILTER_INTRA_MODE_INFO filter_intra_mode_info
;
4717 RD_STATS tokenonly_rd_stats
;
4719 av1_zero(filter_intra_mode_info
);
4720 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1] = 1;
4721 mbmi
->uv_mode
= DC_PRED
;
4723 mbmi
->palette_mode_info
.palette_size
[1] = 0;
4724 #endif // CONFIG_PALETTE
4726 for (mode
= 0; mode
< FILTER_INTRA_MODES
; ++mode
) {
4727 mbmi
->filter_intra_mode_info
.filter_intra_mode
[1] = mode
;
4728 if (!super_block_uvrd(cpi
, x
, &tokenonly_rd_stats
, bsize
, *best_rd
))
4731 this_rate
= tokenonly_rd_stats
.rate
+
4732 av1_cost_bit(cpi
->common
.fc
->filter_intra_probs
[1], 1) +
4733 cpi
->intra_uv_mode_cost
[mbmi
->mode
][mbmi
->uv_mode
] +
4734 write_uniform_cost(FILTER_INTRA_MODES
, mode
);
4735 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, this_rate
, tokenonly_rd_stats
.dist
);
4736 if (this_rd
< *best_rd
) {
4739 *rate_tokenonly
= tokenonly_rd_stats
.rate
;
4740 *distortion
= tokenonly_rd_stats
.dist
;
4741 *skippable
= tokenonly_rd_stats
.skip
;
4742 filter_intra_mode_info
= mbmi
->filter_intra_mode_info
;
4743 filter_intra_selected_flag
= 1;
4747 if (filter_intra_selected_flag
) {
4748 mbmi
->uv_mode
= DC_PRED
;
4749 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1] =
4750 filter_intra_mode_info
.use_filter_intra_mode
[1];
4751 mbmi
->filter_intra_mode_info
.filter_intra_mode
[1] =
4752 filter_intra_mode_info
.filter_intra_mode
[1];
4758 #endif // CONFIG_FILTER_INTRA
4760 #if CONFIG_EXT_INTRA
4761 // Run RD calculation with given chroma intra prediction angle., and return
4762 // the RD cost. Update the best mode info. if the RD cost is the best so far.
4763 static int64_t pick_intra_angle_routine_sbuv(
4764 const AV1_COMP
*const cpi
, MACROBLOCK
*x
, BLOCK_SIZE bsize
,
4765 int rate_overhead
, int64_t best_rd_in
, int *rate
, RD_STATS
*rd_stats
,
4766 int *best_angle_delta
, int64_t *best_rd
) {
4767 MB_MODE_INFO
*mbmi
= &x
->e_mbd
.mi
[0]->mbmi
;
4768 assert(!is_inter_block(mbmi
));
4771 RD_STATS tokenonly_rd_stats
;
4773 if (!super_block_uvrd(cpi
, x
, &tokenonly_rd_stats
, bsize
, best_rd_in
))
4775 this_rate
= tokenonly_rd_stats
.rate
+ rate_overhead
;
4776 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, this_rate
, tokenonly_rd_stats
.dist
);
4777 if (this_rd
< *best_rd
) {
4779 *best_angle_delta
= mbmi
->angle_delta
[1];
4781 rd_stats
->rate
= tokenonly_rd_stats
.rate
;
4782 rd_stats
->dist
= tokenonly_rd_stats
.dist
;
4783 rd_stats
->skip
= tokenonly_rd_stats
.skip
;
4788 // With given chroma directional intra prediction mode, pick the best angle
4789 // delta. Return true if a RD cost that is smaller than the input one is found.
4790 static int rd_pick_intra_angle_sbuv(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
4791 BLOCK_SIZE bsize
, int rate_overhead
,
4792 int64_t best_rd
, int *rate
,
4793 RD_STATS
*rd_stats
) {
4794 MACROBLOCKD
*const xd
= &x
->e_mbd
;
4795 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
4796 assert(!is_inter_block(mbmi
));
4797 int i
, angle_delta
, best_angle_delta
= 0;
4798 int64_t this_rd
, best_rd_in
, rd_cost
[2 * (MAX_ANGLE_DELTA
+ 2)];
4800 rd_stats
->rate
= INT_MAX
;
4802 rd_stats
->dist
= INT64_MAX
;
4803 for (i
= 0; i
< 2 * (MAX_ANGLE_DELTA
+ 2); ++i
) rd_cost
[i
] = INT64_MAX
;
4805 for (angle_delta
= 0; angle_delta
<= MAX_ANGLE_DELTA
; angle_delta
+= 2) {
4806 for (i
= 0; i
< 2; ++i
) {
4807 best_rd_in
= (best_rd
== INT64_MAX
)
4809 : (best_rd
+ (best_rd
>> ((angle_delta
== 0) ? 3 : 5)));
4810 mbmi
->angle_delta
[1] = (1 - 2 * i
) * angle_delta
;
4811 this_rd
= pick_intra_angle_routine_sbuv(cpi
, x
, bsize
, rate_overhead
,
4812 best_rd_in
, rate
, rd_stats
,
4813 &best_angle_delta
, &best_rd
);
4814 rd_cost
[2 * angle_delta
+ i
] = this_rd
;
4815 if (angle_delta
== 0) {
4816 if (this_rd
== INT64_MAX
) return 0;
4817 rd_cost
[1] = this_rd
;
4823 assert(best_rd
!= INT64_MAX
);
4824 for (angle_delta
= 1; angle_delta
<= MAX_ANGLE_DELTA
; angle_delta
+= 2) {
4826 for (i
= 0; i
< 2; ++i
) {
4827 int skip_search
= 0;
4828 rd_thresh
= best_rd
+ (best_rd
>> 5);
4829 if (rd_cost
[2 * (angle_delta
+ 1) + i
] > rd_thresh
&&
4830 rd_cost
[2 * (angle_delta
- 1) + i
] > rd_thresh
)
4833 mbmi
->angle_delta
[1] = (1 - 2 * i
) * angle_delta
;
4834 pick_intra_angle_routine_sbuv(cpi
, x
, bsize
, rate_overhead
, best_rd
,
4835 rate
, rd_stats
, &best_angle_delta
,
4841 mbmi
->angle_delta
[1] = best_angle_delta
;
4842 return rd_stats
->rate
!= INT_MAX
;
4844 #endif // CONFIG_EXT_INTRA
4846 static void init_sbuv_mode(MB_MODE_INFO
*const mbmi
) {
4847 mbmi
->uv_mode
= DC_PRED
;
4849 mbmi
->palette_mode_info
.palette_size
[1] = 0;
4850 #endif // CONFIG_PALETTE
4851 #if CONFIG_FILTER_INTRA
4852 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1] = 0;
4853 #endif // CONFIG_FILTER_INTRA
4856 static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
4857 int *rate
, int *rate_tokenonly
,
4858 int64_t *distortion
, int *skippable
,
4859 BLOCK_SIZE bsize
, TX_SIZE max_tx_size
) {
4860 MACROBLOCKD
*xd
= &x
->e_mbd
;
4861 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
4862 assert(!is_inter_block(mbmi
));
4863 MB_MODE_INFO best_mbmi
= *mbmi
;
4864 PREDICTION_MODE mode
;
4865 int64_t best_rd
= INT64_MAX
, this_rd
;
4867 RD_STATS tokenonly_rd_stats
;
4869 od_rollback_buffer buf
;
4870 od_encode_checkpoint(&x
->daala_enc
, &buf
);
4871 #endif // CONFIG_PVQ
4873 PALETTE_MODE_INFO
*const pmi
= &mbmi
->palette_mode_info
;
4874 uint8_t *best_palette_color_map
= NULL
;
4875 #endif // CONFIG_PALETTE
4877 for (mode
= DC_PRED
; mode
<= TM_PRED
; ++mode
) {
4878 #if CONFIG_EXT_INTRA
4879 const int is_directional_mode
=
4880 av1_is_directional_mode(mode
, mbmi
->sb_type
);
4881 #endif // CONFIG_EXT_INTRA
4882 if (!(cpi
->sf
.intra_uv_mode_mask
[txsize_sqr_up_map
[max_tx_size
]] &
4886 mbmi
->uv_mode
= mode
;
4887 #if CONFIG_EXT_INTRA
4888 mbmi
->angle_delta
[1] = 0;
4889 if (is_directional_mode
) {
4890 const int rate_overhead
= cpi
->intra_uv_mode_cost
[mbmi
->mode
][mode
] +
4891 write_uniform_cost(2 * MAX_ANGLE_DELTA
+ 1, 0);
4892 if (!rd_pick_intra_angle_sbuv(cpi
, x
, bsize
, rate_overhead
, best_rd
,
4893 &this_rate
, &tokenonly_rd_stats
))
4896 #endif // CONFIG_EXT_INTRA
4897 if (!super_block_uvrd(cpi
, x
, &tokenonly_rd_stats
, bsize
, best_rd
)) {
4899 od_encode_rollback(&x
->daala_enc
, &buf
);
4900 #endif // CONFIG_PVQ
4903 #if CONFIG_EXT_INTRA
4905 #endif // CONFIG_EXT_INTRA
4907 tokenonly_rd_stats
.rate
+ cpi
->intra_uv_mode_cost
[mbmi
->mode
][mode
];
4909 #if CONFIG_EXT_INTRA
4910 if (is_directional_mode
) {
4911 this_rate
+= write_uniform_cost(2 * MAX_ANGLE_DELTA
+ 1,
4912 MAX_ANGLE_DELTA
+ mbmi
->angle_delta
[1]);
4914 #endif // CONFIG_EXT_INTRA
4915 #if CONFIG_FILTER_INTRA
4916 if (mbmi
->sb_type
>= BLOCK_8X8
&& mode
== DC_PRED
)
4917 this_rate
+= av1_cost_bit(cpi
->common
.fc
->filter_intra_probs
[1], 0);
4918 #endif // CONFIG_FILTER_INTRA
4920 if (cpi
->common
.allow_screen_content_tools
&& mbmi
->sb_type
>= BLOCK_8X8
&&
4922 this_rate
+= av1_cost_bit(
4923 av1_default_palette_uv_mode_prob
[pmi
->palette_size
[0] > 0], 0);
4924 #endif // CONFIG_PALETTE
4927 od_encode_rollback(&x
->daala_enc
, &buf
);
4928 #endif // CONFIG_PVQ
4929 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, this_rate
, tokenonly_rd_stats
.dist
);
4931 if (this_rd
< best_rd
) {
4935 *rate_tokenonly
= tokenonly_rd_stats
.rate
;
4936 *distortion
= tokenonly_rd_stats
.dist
;
4937 *skippable
= tokenonly_rd_stats
.skip
;
4942 if (cpi
->common
.allow_screen_content_tools
&& mbmi
->sb_type
>= BLOCK_8X8
) {
4943 best_palette_color_map
= x
->palette_buffer
->best_palette_color_map
;
4944 rd_pick_palette_intra_sbuv(cpi
, x
,
4945 cpi
->intra_uv_mode_cost
[mbmi
->mode
][DC_PRED
],
4946 best_palette_color_map
, &best_mbmi
, &best_rd
,
4947 rate
, rate_tokenonly
, distortion
, skippable
);
4949 #endif // CONFIG_PALETTE
4951 #if CONFIG_FILTER_INTRA
4952 if (mbmi
->sb_type
>= BLOCK_8X8
) {
4953 if (rd_pick_filter_intra_sbuv(cpi
, x
, rate
, rate_tokenonly
, distortion
,
4954 skippable
, bsize
, &best_rd
))
4957 #endif // CONFIG_FILTER_INTRA
4960 // Make sure we actually chose a mode
4961 assert(best_rd
< INT64_MAX
);
4965 static void choose_intra_uv_mode(const AV1_COMP
*const cpi
, MACROBLOCK
*const x
,
4966 PICK_MODE_CONTEXT
*ctx
, BLOCK_SIZE bsize
,
4967 TX_SIZE max_tx_size
, int *rate_uv
,
4968 int *rate_uv_tokenonly
, int64_t *dist_uv
,
4969 int *skip_uv
, PREDICTION_MODE
*mode_uv
) {
4970 // Use an estimated rd for uv_intra based on DC_PRED if the
4971 // appropriate speed flag is set.
4973 init_sbuv_mode(&x
->e_mbd
.mi
[0]->mbmi
);
4975 #if CONFIG_CHROMA_2X2
4976 rd_pick_intra_sbuv_mode(cpi
, x
, rate_uv
, rate_uv_tokenonly
, dist_uv
, skip_uv
,
4977 bsize
, max_tx_size
);
4979 if (x
->skip_chroma_rd
) {
4981 *rate_uv_tokenonly
= 0;
4987 BLOCK_SIZE bs
= scale_chroma_bsize(bsize
, x
->e_mbd
.plane
[1].subsampling_x
,
4988 x
->e_mbd
.plane
[1].subsampling_y
);
4989 rd_pick_intra_sbuv_mode(cpi
, x
, rate_uv
, rate_uv_tokenonly
, dist_uv
, skip_uv
,
4991 #endif // CONFIG_CHROMA_2X2
4993 rd_pick_intra_sbuv_mode(cpi
, x
, rate_uv
, rate_uv_tokenonly
, dist_uv
, skip_uv
,
4994 bsize
< BLOCK_8X8
? BLOCK_8X8
: bsize
, max_tx_size
);
4995 #endif // CONFIG_CB4X4
4996 *mode_uv
= x
->e_mbd
.mi
[0]->mbmi
.uv_mode
;
4999 static int cost_mv_ref(const AV1_COMP
*const cpi
, PREDICTION_MODE mode
,
5000 int16_t mode_context
) {
5001 #if CONFIG_EXT_INTER
5002 if (is_inter_compound_mode(mode
)) {
5004 ->inter_compound_mode_cost
[mode_context
][INTER_COMPOUND_OFFSET(mode
)];
5009 int16_t mode_ctx
= mode_context
& NEWMV_CTX_MASK
;
5010 int16_t is_all_zero_mv
= mode_context
& (1 << ALL_ZERO_FLAG_OFFSET
);
5012 assert(is_inter_mode(mode
));
5014 if (mode
== NEWMV
) {
5015 mode_cost
= cpi
->newmv_mode_cost
[mode_ctx
][0];
5018 mode_cost
= cpi
->newmv_mode_cost
[mode_ctx
][1];
5019 mode_ctx
= (mode_context
>> ZEROMV_OFFSET
) & ZEROMV_CTX_MASK
;
5021 if (is_all_zero_mv
) return mode_cost
;
5023 if (mode
== ZEROMV
) {
5024 mode_cost
+= cpi
->zeromv_mode_cost
[mode_ctx
][0];
5027 mode_cost
+= cpi
->zeromv_mode_cost
[mode_ctx
][1];
5028 mode_ctx
= (mode_context
>> REFMV_OFFSET
) & REFMV_CTX_MASK
;
5030 if (mode_context
& (1 << SKIP_NEARESTMV_OFFSET
)) mode_ctx
= 6;
5031 if (mode_context
& (1 << SKIP_NEARMV_OFFSET
)) mode_ctx
= 7;
5032 if (mode_context
& (1 << SKIP_NEARESTMV_SUB8X8_OFFSET
)) mode_ctx
= 8;
5034 mode_cost
+= cpi
->refmv_mode_cost
[mode_ctx
][mode
!= NEARESTMV
];
5040 #if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
5041 static int get_interinter_compound_type_bits(BLOCK_SIZE bsize
,
5042 COMPOUND_TYPE comp_type
) {
5044 switch (comp_type
) {
5045 case COMPOUND_AVERAGE
: return 0;
5047 case COMPOUND_WEDGE
: return get_interinter_wedge_bits(bsize
);
5048 #endif // CONFIG_WEDGE
5049 #if CONFIG_COMPOUND_SEGMENT
5050 case COMPOUND_SEG
: return 1;
5051 #endif // CONFIG_COMPOUND_SEGMENT
5052 default: assert(0); return 0;
5055 #endif // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
5066 #if CONFIG_EXT_INTER
5068 #endif // CONFIG_EXT_INTER
5070 #if CONFIG_CHROMA_2X2
5071 ENTROPY_CONTEXT ta
[4];
5072 ENTROPY_CONTEXT tl
[4];
5074 ENTROPY_CONTEXT ta
[2];
5075 ENTROPY_CONTEXT tl
[2];
5076 #endif // CONFIG_CHROMA_2X2
5088 PREDICTION_MODE modes
[4];
5089 #if CONFIG_EXT_INTER
5090 SEG_RDSTAT rdstat
[4][INTER_MODES
+ INTER_COMPOUND_MODES
];
5092 SEG_RDSTAT rdstat
[4][INTER_MODES
];
5093 #endif // CONFIG_EXT_INTER
5097 static INLINE
int mv_check_bounds(const MvLimits
*mv_limits
, const MV
*mv
) {
5098 return (mv
->row
>> 3) < mv_limits
->row_min
||
5099 (mv
->row
>> 3) > mv_limits
->row_max
||
5100 (mv
->col
>> 3) < mv_limits
->col_min
||
5101 (mv
->col
>> 3) > mv_limits
->col_max
;
5104 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
5105 // TODO(aconverse): Find out if this is still productive then clean up or remove
5106 static int check_best_zero_mv(
5107 const AV1_COMP
*const cpi
, const int16_t mode_context
[TOTAL_REFS_PER_FRAME
],
5108 #if CONFIG_EXT_INTER
5109 const int16_t compound_mode_context
[TOTAL_REFS_PER_FRAME
],
5110 #endif // CONFIG_EXT_INTER
5111 int_mv frame_mv
[MB_MODE_COUNT
][TOTAL_REFS_PER_FRAME
], int this_mode
,
5112 const MV_REFERENCE_FRAME ref_frames
[2], const BLOCK_SIZE bsize
, int block
,
5113 int mi_row
, int mi_col
) {
5115 int comp_pred_mode
= ref_frames
[1] > INTRA_FRAME
;
5119 for (cur_frm
= 0; cur_frm
< 1 + comp_pred_mode
; cur_frm
++) {
5120 #if CONFIG_GLOBAL_MOTION
5121 if (this_mode
== ZEROMV
5122 #if CONFIG_EXT_INTER
5123 || this_mode
== ZERO_ZEROMV
5124 #endif // CONFIG_EXT_INTER
5126 zeromv
[cur_frm
].as_int
=
5127 gm_get_motion_vector(&cpi
->common
.global_motion
[ref_frames
[cur_frm
]],
5128 cpi
->common
.allow_high_precision_mv
, bsize
,
5129 mi_col
, mi_row
, block
)
5132 #endif // CONFIG_GLOBAL_MOTION
5133 zeromv
[cur_frm
].as_int
= 0;
5135 #if !CONFIG_EXT_INTER
5136 assert(ref_frames
[1] != INTRA_FRAME
); // Just sanity check
5137 #endif // !CONFIG_EXT_INTER
5138 if ((this_mode
== NEARMV
|| this_mode
== NEARESTMV
|| this_mode
== ZEROMV
) &&
5139 frame_mv
[this_mode
][ref_frames
[0]].as_int
== zeromv
[0].as_int
&&
5140 (ref_frames
[1] <= INTRA_FRAME
||
5141 frame_mv
[this_mode
][ref_frames
[1]].as_int
== zeromv
[1].as_int
)) {
5143 av1_mode_context_analyzer(mode_context
, ref_frames
, bsize
, block
);
5144 int c1
= cost_mv_ref(cpi
, NEARMV
, rfc
);
5145 int c2
= cost_mv_ref(cpi
, NEARESTMV
, rfc
);
5146 int c3
= cost_mv_ref(cpi
, ZEROMV
, rfc
);
5148 if (this_mode
== NEARMV
) {
5149 if (c1
> c3
) return 0;
5150 } else if (this_mode
== NEARESTMV
) {
5151 if (c2
> c3
) return 0;
5153 assert(this_mode
== ZEROMV
);
5154 if (ref_frames
[1] <= INTRA_FRAME
) {
5155 if ((c3
>= c2
&& frame_mv
[NEARESTMV
][ref_frames
[0]].as_int
== 0) ||
5156 (c3
>= c1
&& frame_mv
[NEARMV
][ref_frames
[0]].as_int
== 0))
5159 if ((c3
>= c2
&& frame_mv
[NEARESTMV
][ref_frames
[0]].as_int
== 0 &&
5160 frame_mv
[NEARESTMV
][ref_frames
[1]].as_int
== 0) ||
5161 (c3
>= c1
&& frame_mv
[NEARMV
][ref_frames
[0]].as_int
== 0 &&
5162 frame_mv
[NEARMV
][ref_frames
[1]].as_int
== 0))
5167 #if CONFIG_EXT_INTER
5168 else if ((this_mode
== NEAREST_NEARESTMV
|| this_mode
== NEAR_NEARMV
||
5169 this_mode
== ZERO_ZEROMV
) &&
5170 frame_mv
[this_mode
][ref_frames
[0]].as_int
== zeromv
[0].as_int
&&
5171 frame_mv
[this_mode
][ref_frames
[1]].as_int
== zeromv
[1].as_int
) {
5172 int16_t rfc
= compound_mode_context
[ref_frames
[0]];
5173 int c2
= cost_mv_ref(cpi
, NEAREST_NEARESTMV
, rfc
);
5174 int c3
= cost_mv_ref(cpi
, ZERO_ZEROMV
, rfc
);
5175 int c5
= cost_mv_ref(cpi
, NEAR_NEARMV
, rfc
);
5177 if (this_mode
== NEAREST_NEARESTMV
) {
5178 if (c2
> c3
) return 0;
5179 } else if (this_mode
== NEAR_NEARMV
) {
5180 if (c5
> c3
) return 0;
5182 assert(this_mode
== ZERO_ZEROMV
);
5183 if ((c3
>= c2
&& frame_mv
[NEAREST_NEARESTMV
][ref_frames
[0]].as_int
== 0 &&
5184 frame_mv
[NEAREST_NEARESTMV
][ref_frames
[1]].as_int
== 0) ||
5185 (c3
>= c5
&& frame_mv
[NEAR_NEARMV
][ref_frames
[0]].as_int
== 0 &&
5186 frame_mv
[NEAR_NEARMV
][ref_frames
[1]].as_int
== 0))
5190 #endif // CONFIG_EXT_INTER
5194 static void joint_motion_search(const AV1_COMP
*cpi
, MACROBLOCK
*x
,
5195 BLOCK_SIZE bsize
, int_mv
*frame_mv
, int mi_row
,
5197 #if CONFIG_EXT_INTER
5198 int_mv
*ref_mv_sub8x8
[2], const uint8_t *mask
,
5200 #endif // CONFIG_EXT_INTER
5201 int *rate_mv
, const int block
) {
5202 const AV1_COMMON
*const cm
= &cpi
->common
;
5203 const int pw
= block_size_wide
[bsize
];
5204 const int ph
= block_size_high
[bsize
];
5205 MACROBLOCKD
*xd
= &x
->e_mbd
;
5206 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
5207 // This function should only ever be called for compound modes
5208 assert(has_second_ref(mbmi
));
5209 const int refs
[2] = { mbmi
->ref_frame
[0], mbmi
->ref_frame
[1] };
5212 #if CONFIG_DUAL_FILTER
5213 InterpFilter interp_filter
[4] = {
5214 mbmi
->interp_filter
[0], mbmi
->interp_filter
[1], mbmi
->interp_filter
[2],
5215 mbmi
->interp_filter
[3],
5218 const InterpFilter interp_filter
= mbmi
->interp_filter
;
5219 #endif // CONFIG_DUAL_FILTER
5220 struct scale_factors sf
;
5221 struct macroblockd_plane
*const pd
= &xd
->plane
[0];
5222 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5223 // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
5224 const int ic
= block
& 1;
5225 const int ir
= (block
- ic
) >> 1;
5226 const int p_col
= ((mi_col
* MI_SIZE
) >> pd
->subsampling_x
) + 4 * ic
;
5227 const int p_row
= ((mi_row
* MI_SIZE
) >> pd
->subsampling_y
) + 4 * ir
;
5228 #if CONFIG_GLOBAL_MOTION
5230 for (ref
= 0; ref
< 2; ++ref
) {
5231 WarpedMotionParams
*const wm
=
5232 &xd
->global_motion
[xd
->mi
[0]->mbmi
.ref_frame
[ref
]];
5233 is_global
[ref
] = is_global_mv_block(xd
->mi
[0], block
, wm
->wmtype
);
5235 #endif // CONFIG_GLOBAL_MOTION
5236 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5238 // Do joint motion search in compound mode to get more accurate mv.
5239 struct buf_2d backup_yv12
[2][MAX_MB_PLANE
];
5240 int last_besterr
[2] = { INT_MAX
, INT_MAX
};
5241 const YV12_BUFFER_CONFIG
*const scaled_ref_frame
[2] = {
5242 av1_get_scaled_ref_frame(cpi
, refs
[0]),
5243 av1_get_scaled_ref_frame(cpi
, refs
[1])
5246 // Prediction buffer from second frame.
5247 #if CONFIG_HIGHBITDEPTH
5248 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16
[MAX_SB_SQUARE
]);
5249 uint8_t *second_pred
;
5251 DECLARE_ALIGNED(16, uint8_t, second_pred
[MAX_SB_SQUARE
]);
5252 #endif // CONFIG_HIGHBITDEPTH
5254 #if CONFIG_EXT_INTER && CONFIG_CB4X4
5255 (void)ref_mv_sub8x8
;
5256 #endif // CONFIG_EXT_INTER && CONFIG_CB4X4
5258 for (ref
= 0; ref
< 2; ++ref
) {
5259 #if CONFIG_EXT_INTER && !CONFIG_CB4X4
5260 if (bsize
< BLOCK_8X8
&& ref_mv_sub8x8
!= NULL
)
5261 ref_mv
[ref
].as_int
= ref_mv_sub8x8
[ref
]->as_int
;
5263 #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
5264 ref_mv
[ref
] = x
->mbmi_ext
->ref_mvs
[refs
[ref
]][0];
5266 if (scaled_ref_frame
[ref
]) {
5268 // Swap out the reference frame for a version that's been scaled to
5269 // match the resolution of the current frame, allowing the existing
5270 // motion search code to be used without additional modifications.
5271 for (i
= 0; i
< MAX_MB_PLANE
; i
++)
5272 backup_yv12
[ref
][i
] = xd
->plane
[i
].pre
[ref
];
5273 av1_setup_pre_planes(xd
, ref
, scaled_ref_frame
[ref
], mi_row
, mi_col
,
5278 // Since we have scaled the reference frames to match the size of the current
5279 // frame we must use a unit scaling factor during mode selection.
5280 #if CONFIG_HIGHBITDEPTH
5281 av1_setup_scale_factors_for_frame(&sf
, cm
->width
, cm
->height
, cm
->width
,
5282 cm
->height
, cm
->use_highbitdepth
);
5284 av1_setup_scale_factors_for_frame(&sf
, cm
->width
, cm
->height
, cm
->width
,
5286 #endif // CONFIG_HIGHBITDEPTH
5288 // Allow joint search multiple times iteratively for each reference frame
5289 // and break out of the search loop if it couldn't find a better mv.
5290 for (ite
= 0; ite
< 4; ite
++) {
5291 struct buf_2d ref_yv12
[2];
5292 int bestsme
= INT_MAX
;
5293 int sadpb
= x
->sadperbit16
;
5294 MV
*const best_mv
= &x
->best_mv
.as_mv
;
5295 int search_range
= 3;
5297 MvLimits tmp_mv_limits
= x
->mv_limits
;
5298 int id
= ite
% 2; // Even iterations search in the first reference frame,
5299 // odd iterations search in the second. The predictor
5300 // found for the 'other' reference frame is factored in.
5301 const int plane
= 0;
5302 ConvolveParams conv_params
= get_conv_params(0, plane
);
5303 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5304 WarpTypesAllowed warp_types
;
5305 #if CONFIG_GLOBAL_MOTION
5306 warp_types
.global_warp_allowed
= is_global
[!id
];
5307 #endif // CONFIG_GLOBAL_MOTION
5308 #if CONFIG_WARPED_MOTION
5309 warp_types
.local_warp_allowed
= mbmi
->motion_mode
== WARPED_CAUSAL
;
5310 #endif // CONFIG_WARPED_MOTION
5311 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5313 // Initialized here because of compiler problem in Visual Studio.
5314 ref_yv12
[0] = xd
->plane
[plane
].pre
[0];
5315 ref_yv12
[1] = xd
->plane
[plane
].pre
[1];
5317 #if CONFIG_DUAL_FILTER
5318 // reload the filter types
5320 (id
== 0) ? mbmi
->interp_filter
[2] : mbmi
->interp_filter
[0];
5322 (id
== 0) ? mbmi
->interp_filter
[3] : mbmi
->interp_filter
[1];
5323 #endif // CONFIG_DUAL_FILTER
5325 // Get the prediction block from the 'other' reference frame.
5326 #if CONFIG_HIGHBITDEPTH
5327 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
5328 second_pred
= CONVERT_TO_BYTEPTR(second_pred_alloc_16
);
5329 av1_highbd_build_inter_predictor(
5330 ref_yv12
[!id
].buf
, ref_yv12
[!id
].stride
, second_pred
, pw
,
5331 &frame_mv
[refs
[!id
]].as_mv
, &sf
, pw
, ph
, 0, interp_filter
,
5332 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5333 &warp_types
, p_col
, p_row
,
5334 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5335 plane
, MV_PRECISION_Q3
, mi_col
* MI_SIZE
, mi_row
* MI_SIZE
, xd
);
5337 second_pred
= (uint8_t *)second_pred_alloc_16
;
5338 #endif // CONFIG_HIGHBITDEPTH
5339 av1_build_inter_predictor(
5340 ref_yv12
[!id
].buf
, ref_yv12
[!id
].stride
, second_pred
, pw
,
5341 &frame_mv
[refs
[!id
]].as_mv
, &sf
, pw
, ph
, &conv_params
, interp_filter
,
5342 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5343 &warp_types
, p_col
, p_row
, plane
, !id
,
5344 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5345 MV_PRECISION_Q3
, mi_col
* MI_SIZE
, mi_row
* MI_SIZE
, xd
);
5346 #if CONFIG_HIGHBITDEPTH
5348 #endif // CONFIG_HIGHBITDEPTH
5350 // Do compound motion search on the current reference frame.
5351 if (id
) xd
->plane
[plane
].pre
[0] = ref_yv12
[id
];
5352 av1_set_mv_search_range(&x
->mv_limits
, &ref_mv
[id
].as_mv
);
5354 // Use the mv result from the single mode as mv predictor.
5355 *best_mv
= frame_mv
[refs
[id
]].as_mv
;
5360 av1_set_mvcost(x
, refs
[id
], id
, mbmi
->ref_mv_idx
);
5362 // Small-range full-pixel motion search.
5364 av1_refining_search_8p_c(x
, sadpb
, search_range
, &cpi
->fn_ptr
[bsize
],
5365 #if CONFIG_EXT_INTER
5366 mask
, mask_stride
, id
,
5368 &ref_mv
[id
].as_mv
, second_pred
);
5369 if (bestsme
< INT_MAX
) {
5370 #if CONFIG_EXT_INTER
5372 bestsme
= av1_get_mvpred_mask_var(x
, best_mv
, &ref_mv
[id
].as_mv
,
5373 second_pred
, mask
, mask_stride
, id
,
5374 &cpi
->fn_ptr
[bsize
], 1);
5377 bestsme
= av1_get_mvpred_av_var(x
, best_mv
, &ref_mv
[id
].as_mv
,
5378 second_pred
, &cpi
->fn_ptr
[bsize
], 1);
5381 x
->mv_limits
= tmp_mv_limits
;
5383 if (bestsme
< INT_MAX
) {
5384 int dis
; /* TODO: use dis in distortion calculation later. */
5386 if (cpi
->sf
.use_upsampled_references
) {
5387 // Use up-sampled reference frames.
5388 struct buf_2d backup_pred
= pd
->pre
[0];
5389 const YV12_BUFFER_CONFIG
*upsampled_ref
=
5390 get_upsampled_ref(cpi
, refs
[id
]);
5392 // Set pred for Y plane
5393 setup_pred_plane(&pd
->pre
[0], bsize
, upsampled_ref
->y_buffer
,
5394 upsampled_ref
->y_crop_width
,
5395 upsampled_ref
->y_crop_height
, upsampled_ref
->y_stride
,
5396 (mi_row
<< 3), (mi_col
<< 3), NULL
, pd
->subsampling_x
,
5399 // If bsize < BLOCK_8X8, adjust pred pointer for this block
5401 if (bsize
< BLOCK_8X8
)
5403 &pd
->pre
[0].buf
[(av1_raster_block_offset(BLOCK_8X8
, block
,
5406 #endif // !CONFIG_CB4X4
5408 bestsme
= cpi
->find_fractional_mv_step(
5409 x
, &ref_mv
[id
].as_mv
, cpi
->common
.allow_high_precision_mv
,
5410 x
->errorperbit
, &cpi
->fn_ptr
[bsize
], 0,
5411 cpi
->sf
.mv
.subpel_iters_per_step
, NULL
, x
->nmvjointcost
, x
->mvcost
,
5412 &dis
, &sse
, second_pred
,
5413 #if CONFIG_EXT_INTER
5414 mask
, mask_stride
, id
,
5418 // Restore the reference frames.
5419 pd
->pre
[0] = backup_pred
;
5422 bestsme
= cpi
->find_fractional_mv_step(
5423 x
, &ref_mv
[id
].as_mv
, cpi
->common
.allow_high_precision_mv
,
5424 x
->errorperbit
, &cpi
->fn_ptr
[bsize
], 0,
5425 cpi
->sf
.mv
.subpel_iters_per_step
, NULL
, x
->nmvjointcost
, x
->mvcost
,
5426 &dis
, &sse
, second_pred
,
5427 #if CONFIG_EXT_INTER
5428 mask
, mask_stride
, id
,
5434 // Restore the pointer to the first (possibly scaled) prediction buffer.
5435 if (id
) xd
->plane
[plane
].pre
[0] = ref_yv12
[0];
5437 if (bestsme
< last_besterr
[id
]) {
5438 frame_mv
[refs
[id
]].as_mv
= *best_mv
;
5439 last_besterr
[id
] = bestsme
;
5447 for (ref
= 0; ref
< 2; ++ref
) {
5448 if (scaled_ref_frame
[ref
]) {
5449 // Restore the prediction frame pointers to their unscaled versions.
5451 for (i
= 0; i
< MAX_MB_PLANE
; i
++)
5452 xd
->plane
[i
].pre
[ref
] = backup_yv12
[ref
][i
];
5454 av1_set_mvcost(x
, refs
[ref
], ref
, mbmi
->ref_mv_idx
);
5455 #if CONFIG_EXT_INTER && !CONFIG_CB4X4
5456 if (bsize
>= BLOCK_8X8
)
5457 #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
5458 *rate_mv
+= av1_mv_bit_cost(&frame_mv
[refs
[ref
]].as_mv
,
5459 &x
->mbmi_ext
->ref_mvs
[refs
[ref
]][0].as_mv
,
5460 x
->nmvjointcost
, x
->mvcost
, MV_COST_WEIGHT
);
5461 #if CONFIG_EXT_INTER && !CONFIG_CB4X4
5463 *rate_mv
+= av1_mv_bit_cost(&frame_mv
[refs
[ref
]].as_mv
,
5464 &ref_mv_sub8x8
[ref
]->as_mv
, x
->nmvjointcost
,
5465 x
->mvcost
, MV_COST_WEIGHT
);
5466 #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
5470 static void estimate_ref_frame_costs(const AV1_COMMON
*cm
,
5471 const MACROBLOCKD
*xd
, int segment_id
,
5472 unsigned int *ref_costs_single
,
5473 unsigned int *ref_costs_comp
,
5474 aom_prob
*comp_mode_p
) {
5475 int seg_ref_active
=
5476 segfeature_active(&cm
->seg
, segment_id
, SEG_LVL_REF_FRAME
);
5477 if (seg_ref_active
) {
5478 memset(ref_costs_single
, 0,
5479 TOTAL_REFS_PER_FRAME
* sizeof(*ref_costs_single
));
5480 memset(ref_costs_comp
, 0, TOTAL_REFS_PER_FRAME
* sizeof(*ref_costs_comp
));
5483 aom_prob intra_inter_p
= av1_get_intra_inter_prob(cm
, xd
);
5484 aom_prob comp_inter_p
= 128;
5486 if (cm
->reference_mode
== REFERENCE_MODE_SELECT
) {
5487 comp_inter_p
= av1_get_reference_mode_prob(cm
, xd
);
5488 *comp_mode_p
= comp_inter_p
;
5493 ref_costs_single
[INTRA_FRAME
] = av1_cost_bit(intra_inter_p
, 0);
5495 if (cm
->reference_mode
!= COMPOUND_REFERENCE
) {
5496 aom_prob ref_single_p1
= av1_get_pred_prob_single_ref_p1(cm
, xd
);
5497 aom_prob ref_single_p2
= av1_get_pred_prob_single_ref_p2(cm
, xd
);
5499 aom_prob ref_single_p3
= av1_get_pred_prob_single_ref_p3(cm
, xd
);
5500 aom_prob ref_single_p4
= av1_get_pred_prob_single_ref_p4(cm
, xd
);
5501 aom_prob ref_single_p5
= av1_get_pred_prob_single_ref_p5(cm
, xd
);
5502 #endif // CONFIG_EXT_REFS
5504 unsigned int base_cost
= av1_cost_bit(intra_inter_p
, 1);
5506 ref_costs_single
[LAST_FRAME
] =
5508 ref_costs_single
[LAST2_FRAME
] = ref_costs_single
[LAST3_FRAME
] =
5509 ref_costs_single
[BWDREF_FRAME
] =
5510 #endif // CONFIG_EXT_REFS
5511 ref_costs_single
[GOLDEN_FRAME
] =
5512 ref_costs_single
[ALTREF_FRAME
] = base_cost
;
5515 ref_costs_single
[LAST_FRAME
] += av1_cost_bit(ref_single_p1
, 0);
5516 ref_costs_single
[LAST2_FRAME
] += av1_cost_bit(ref_single_p1
, 0);
5517 ref_costs_single
[LAST3_FRAME
] += av1_cost_bit(ref_single_p1
, 0);
5518 ref_costs_single
[GOLDEN_FRAME
] += av1_cost_bit(ref_single_p1
, 0);
5519 ref_costs_single
[BWDREF_FRAME
] += av1_cost_bit(ref_single_p1
, 1);
5520 ref_costs_single
[ALTREF_FRAME
] += av1_cost_bit(ref_single_p1
, 1);
5522 ref_costs_single
[LAST_FRAME
] += av1_cost_bit(ref_single_p3
, 0);
5523 ref_costs_single
[LAST2_FRAME
] += av1_cost_bit(ref_single_p3
, 0);
5524 ref_costs_single
[LAST3_FRAME
] += av1_cost_bit(ref_single_p3
, 1);
5525 ref_costs_single
[GOLDEN_FRAME
] += av1_cost_bit(ref_single_p3
, 1);
5527 ref_costs_single
[BWDREF_FRAME
] += av1_cost_bit(ref_single_p2
, 0);
5528 ref_costs_single
[ALTREF_FRAME
] += av1_cost_bit(ref_single_p2
, 1);
5530 ref_costs_single
[LAST_FRAME
] += av1_cost_bit(ref_single_p4
, 0);
5531 ref_costs_single
[LAST2_FRAME
] += av1_cost_bit(ref_single_p4
, 1);
5533 ref_costs_single
[LAST3_FRAME
] += av1_cost_bit(ref_single_p5
, 0);
5534 ref_costs_single
[GOLDEN_FRAME
] += av1_cost_bit(ref_single_p5
, 1);
5536 ref_costs_single
[LAST_FRAME
] += av1_cost_bit(ref_single_p1
, 0);
5537 ref_costs_single
[GOLDEN_FRAME
] += av1_cost_bit(ref_single_p1
, 1);
5538 ref_costs_single
[ALTREF_FRAME
] += av1_cost_bit(ref_single_p1
, 1);
5540 ref_costs_single
[GOLDEN_FRAME
] += av1_cost_bit(ref_single_p2
, 0);
5541 ref_costs_single
[ALTREF_FRAME
] += av1_cost_bit(ref_single_p2
, 1);
5542 #endif // CONFIG_EXT_REFS
5544 ref_costs_single
[LAST_FRAME
] = 512;
5546 ref_costs_single
[LAST2_FRAME
] = 512;
5547 ref_costs_single
[LAST3_FRAME
] = 512;
5548 ref_costs_single
[BWDREF_FRAME
] = 512;
5549 #endif // CONFIG_EXT_REFS
5550 ref_costs_single
[GOLDEN_FRAME
] = 512;
5551 ref_costs_single
[ALTREF_FRAME
] = 512;
5554 if (cm
->reference_mode
!= SINGLE_REFERENCE
) {
5555 aom_prob ref_comp_p
= av1_get_pred_prob_comp_ref_p(cm
, xd
);
5557 aom_prob ref_comp_p1
= av1_get_pred_prob_comp_ref_p1(cm
, xd
);
5558 aom_prob ref_comp_p2
= av1_get_pred_prob_comp_ref_p2(cm
, xd
);
5559 aom_prob bwdref_comp_p
= av1_get_pred_prob_comp_bwdref_p(cm
, xd
);
5560 #endif // CONFIG_EXT_REFS
5562 unsigned int base_cost
= av1_cost_bit(intra_inter_p
, 1);
5564 ref_costs_comp
[LAST_FRAME
] =
5566 ref_costs_comp
[LAST2_FRAME
] = ref_costs_comp
[LAST3_FRAME
] =
5567 #endif // CONFIG_EXT_REFS
5568 ref_costs_comp
[GOLDEN_FRAME
] = base_cost
;
5571 ref_costs_comp
[BWDREF_FRAME
] = ref_costs_comp
[ALTREF_FRAME
] = 0;
5572 #endif // CONFIG_EXT_REFS
5575 ref_costs_comp
[LAST_FRAME
] += av1_cost_bit(ref_comp_p
, 0);
5576 ref_costs_comp
[LAST2_FRAME
] += av1_cost_bit(ref_comp_p
, 0);
5577 ref_costs_comp
[LAST3_FRAME
] += av1_cost_bit(ref_comp_p
, 1);
5578 ref_costs_comp
[GOLDEN_FRAME
] += av1_cost_bit(ref_comp_p
, 1);
5580 ref_costs_comp
[LAST_FRAME
] += av1_cost_bit(ref_comp_p1
, 1);
5581 ref_costs_comp
[LAST2_FRAME
] += av1_cost_bit(ref_comp_p1
, 0);
5583 ref_costs_comp
[LAST3_FRAME
] += av1_cost_bit(ref_comp_p2
, 0);
5584 ref_costs_comp
[GOLDEN_FRAME
] += av1_cost_bit(ref_comp_p2
, 1);
5586 // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
5588 ref_costs_comp
[BWDREF_FRAME
] += av1_cost_bit(bwdref_comp_p
, 0);
5589 ref_costs_comp
[ALTREF_FRAME
] += av1_cost_bit(bwdref_comp_p
, 1);
5591 ref_costs_comp
[LAST_FRAME
] += av1_cost_bit(ref_comp_p
, 0);
5592 ref_costs_comp
[GOLDEN_FRAME
] += av1_cost_bit(ref_comp_p
, 1);
5593 #endif // CONFIG_EXT_REFS
5595 ref_costs_comp
[LAST_FRAME
] = 512;
5597 ref_costs_comp
[LAST2_FRAME
] = 512;
5598 ref_costs_comp
[LAST3_FRAME
] = 512;
5599 ref_costs_comp
[BWDREF_FRAME
] = 512;
5600 ref_costs_comp
[ALTREF_FRAME
] = 512;
5601 #endif // CONFIG_EXT_REFS
5602 ref_costs_comp
[GOLDEN_FRAME
] = 512;
5607 static void store_coding_context(MACROBLOCK
*x
, PICK_MODE_CONTEXT
*ctx
,
5609 int64_t comp_pred_diff
[REFERENCE_MODES
],
5611 MACROBLOCKD
*const xd
= &x
->e_mbd
;
5613 // Take a snapshot of the coding context so it can be
5614 // restored if we decide to encode this way
5615 ctx
->skip
= x
->skip
;
5616 ctx
->skippable
= skippable
;
5617 ctx
->best_mode_index
= mode_index
;
5618 ctx
->mic
= *xd
->mi
[0];
5619 ctx
->mbmi_ext
= *x
->mbmi_ext
;
5620 ctx
->single_pred_diff
= (int)comp_pred_diff
[SINGLE_REFERENCE
];
5621 ctx
->comp_pred_diff
= (int)comp_pred_diff
[COMPOUND_REFERENCE
];
5622 ctx
->hybrid_pred_diff
= (int)comp_pred_diff
[REFERENCE_MODE_SELECT
];
5625 static void setup_buffer_inter(
5626 const AV1_COMP
*const cpi
, MACROBLOCK
*x
, MV_REFERENCE_FRAME ref_frame
,
5627 BLOCK_SIZE block_size
, int mi_row
, int mi_col
,
5628 int_mv frame_nearest_mv
[TOTAL_REFS_PER_FRAME
],
5629 int_mv frame_near_mv
[TOTAL_REFS_PER_FRAME
],
5630 struct buf_2d yv12_mb
[TOTAL_REFS_PER_FRAME
][MAX_MB_PLANE
]) {
5631 const AV1_COMMON
*cm
= &cpi
->common
;
5632 const YV12_BUFFER_CONFIG
*yv12
= get_ref_frame_buffer(cpi
, ref_frame
);
5633 MACROBLOCKD
*const xd
= &x
->e_mbd
;
5634 MODE_INFO
*const mi
= xd
->mi
[0];
5635 int_mv
*const candidates
= x
->mbmi_ext
->ref_mvs
[ref_frame
];
5636 const struct scale_factors
*const sf
= &cm
->frame_refs
[ref_frame
- 1].sf
;
5637 MB_MODE_INFO_EXT
*const mbmi_ext
= x
->mbmi_ext
;
5639 assert(yv12
!= NULL
);
5641 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
5642 // use the UV scaling factors.
5643 av1_setup_pred_block(xd
, yv12_mb
[ref_frame
], yv12
, mi_row
, mi_col
, sf
, sf
);
5645 // Gets an initial list of candidate vectors from neighbours and orders them
5646 av1_find_mv_refs(cm
, xd
, mi
, ref_frame
, &mbmi_ext
->ref_mv_count
[ref_frame
],
5647 mbmi_ext
->ref_mv_stack
[ref_frame
],
5648 #if CONFIG_EXT_INTER
5649 mbmi_ext
->compound_mode_context
,
5650 #endif // CONFIG_EXT_INTER
5651 candidates
, mi_row
, mi_col
, NULL
, NULL
,
5652 mbmi_ext
->mode_context
);
5654 // Candidate refinement carried out at encoder and decoder
5655 av1_find_best_ref_mvs(cm
->allow_high_precision_mv
, candidates
,
5656 &frame_nearest_mv
[ref_frame
],
5657 &frame_near_mv
[ref_frame
]);
5659 // Further refinement that is encode side only to test the top few candidates
5660 // in full and choose the best as the centre point for subsequent searches.
5661 // The current implementation doesn't support scaling.
5663 av1_mv_pred(cpi
, x
, yv12_mb
[ref_frame
][0].buf
, yv12
->y_stride
, ref_frame
,
5666 if (!av1_is_scaled(sf
) && block_size
>= BLOCK_8X8
)
5667 av1_mv_pred(cpi
, x
, yv12_mb
[ref_frame
][0].buf
, yv12
->y_stride
, ref_frame
,
5669 #endif // CONFIG_CB4X4
5672 static void single_motion_search(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
5673 BLOCK_SIZE bsize
, int mi_row
, int mi_col
,
5674 #if CONFIG_EXT_INTER
5676 #endif // CONFIG_EXT_INTER
5678 MACROBLOCKD
*xd
= &x
->e_mbd
;
5679 const AV1_COMMON
*cm
= &cpi
->common
;
5680 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
5681 struct buf_2d backup_yv12
[MAX_MB_PLANE
] = { { 0, 0, 0, 0, 0 } };
5682 int bestsme
= INT_MAX
;
5684 int sadpb
= x
->sadperbit16
;
5686 #if CONFIG_EXT_INTER
5687 int ref
= mbmi
->ref_frame
[ref_idx
];
5689 int ref
= mbmi
->ref_frame
[0];
5691 #endif // CONFIG_EXT_INTER
5692 MV ref_mv
= x
->mbmi_ext
->ref_mvs
[ref
][0].as_mv
;
5694 MvLimits tmp_mv_limits
= x
->mv_limits
;
5697 const YV12_BUFFER_CONFIG
*scaled_ref_frame
=
5698 av1_get_scaled_ref_frame(cpi
, ref
);
5701 pred_mv
[0] = x
->mbmi_ext
->ref_mvs
[ref
][0].as_mv
;
5702 pred_mv
[1] = x
->mbmi_ext
->ref_mvs
[ref
][1].as_mv
;
5703 pred_mv
[2] = x
->pred_mv
[ref
];
5705 if (scaled_ref_frame
) {
5707 // Swap out the reference frame for a version that's been scaled to
5708 // match the resolution of the current frame, allowing the existing
5709 // motion search code to be used without additional modifications.
5710 for (i
= 0; i
< MAX_MB_PLANE
; i
++)
5711 backup_yv12
[i
] = xd
->plane
[i
].pre
[ref_idx
];
5713 av1_setup_pre_planes(xd
, ref_idx
, scaled_ref_frame
, mi_row
, mi_col
, NULL
);
5716 av1_set_mv_search_range(&x
->mv_limits
, &ref_mv
);
5718 av1_set_mvcost(x
, ref
, ref_idx
, mbmi
->ref_mv_idx
);
5720 // Work out the size of the first step in the mv step search.
5721 // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
5722 if (cpi
->sf
.mv
.auto_mv_step_size
&& cm
->show_frame
) {
5723 // Take wtd average of the step_params based on the last frame's
5724 // max mv magnitude and that based on the best ref mvs of the current
5725 // block for the given reference.
5727 (av1_init_search_range(x
->max_mv_context
[ref
]) + cpi
->mv_step_param
) /
5730 step_param
= cpi
->mv_step_param
;
5733 if (cpi
->sf
.adaptive_motion_search
&& bsize
< cm
->sb_size
) {
5735 2 * (b_width_log2_lookup
[cm
->sb_size
] -
5736 AOMMIN(b_height_log2_lookup
[bsize
], b_width_log2_lookup
[bsize
]));
5737 step_param
= AOMMAX(step_param
, boffset
);
5740 if (cpi
->sf
.adaptive_motion_search
) {
5741 int bwl
= b_width_log2_lookup
[bsize
];
5742 int bhl
= b_height_log2_lookup
[bsize
];
5743 int tlevel
= x
->pred_mv_sad
[ref
] >> (bwl
+ bhl
+ 4);
5745 if (tlevel
< 5) step_param
+= 2;
5747 // prev_mv_sad is not setup for dynamically scaled frames.
5748 if (cpi
->oxcf
.resize_mode
!= RESIZE_DYNAMIC
) {
5750 for (i
= LAST_FRAME
; i
<= ALTREF_FRAME
&& cm
->show_frame
; ++i
) {
5751 if ((x
->pred_mv_sad
[ref
] >> 3) > x
->pred_mv_sad
[i
]) {
5752 x
->pred_mv
[ref
].row
= 0;
5753 x
->pred_mv
[ref
].col
= 0;
5754 x
->best_mv
.as_int
= INVALID_MV
;
5756 if (scaled_ref_frame
) {
5758 for (j
= 0; j
< MAX_MB_PLANE
; ++j
)
5759 xd
->plane
[j
].pre
[ref_idx
] = backup_yv12
[j
];
5767 av1_set_mv_search_range(&x
->mv_limits
, &ref_mv
);
5769 #if CONFIG_MOTION_VAR
5770 if (mbmi
->motion_mode
!= SIMPLE_TRANSLATION
)
5771 mvp_full
= mbmi
->mv
[0].as_mv
;
5773 #endif // CONFIG_MOTION_VAR
5774 mvp_full
= pred_mv
[x
->mv_best_ref_index
[ref
]];
5779 x
->best_mv
.as_int
= x
->second_best_mv
.as_int
= INVALID_MV
;
5781 #if CONFIG_MOTION_VAR
5782 switch (mbmi
->motion_mode
) {
5783 case SIMPLE_TRANSLATION
:
5784 #endif // CONFIG_MOTION_VAR
5785 bestsme
= av1_full_pixel_search(cpi
, x
, bsize
, &mvp_full
, step_param
,
5786 sadpb
, cond_cost_list(cpi
, cost_list
),
5787 &ref_mv
, INT_MAX
, 1);
5788 #if CONFIG_MOTION_VAR
5791 bestsme
= av1_obmc_full_pixel_diamond(
5792 cpi
, x
, &mvp_full
, step_param
, sadpb
,
5793 MAX_MVSEARCH_STEPS
- 1 - step_param
, 1, &cpi
->fn_ptr
[bsize
], &ref_mv
,
5794 &(x
->best_mv
.as_mv
), 0);
5796 default: assert("Invalid motion mode!\n");
5798 #endif // CONFIG_MOTION_VAR
5800 x
->mv_limits
= tmp_mv_limits
;
5802 if (bestsme
< INT_MAX
) {
5803 int dis
; /* TODO: use dis in distortion calculation later. */
5804 #if CONFIG_MOTION_VAR
5805 switch (mbmi
->motion_mode
) {
5806 case SIMPLE_TRANSLATION
:
5807 #endif // CONFIG_MOTION_VAR
5808 if (cpi
->sf
.use_upsampled_references
) {
5810 const int try_second
= x
->second_best_mv
.as_int
!= INVALID_MV
&&
5811 x
->second_best_mv
.as_int
!= x
->best_mv
.as_int
;
5812 const int pw
= block_size_wide
[bsize
];
5813 const int ph
= block_size_high
[bsize
];
5814 // Use up-sampled reference frames.
5815 struct macroblockd_plane
*const pd
= &xd
->plane
[0];
5816 struct buf_2d backup_pred
= pd
->pre
[ref_idx
];
5817 const YV12_BUFFER_CONFIG
*upsampled_ref
= get_upsampled_ref(cpi
, ref
);
5819 // Set pred for Y plane
5821 &pd
->pre
[ref_idx
], bsize
, upsampled_ref
->y_buffer
,
5822 upsampled_ref
->y_crop_width
, upsampled_ref
->y_crop_height
,
5823 upsampled_ref
->y_stride
, (mi_row
<< 3), (mi_col
<< 3), NULL
,
5824 pd
->subsampling_x
, pd
->subsampling_y
);
5826 best_mv_var
= cpi
->find_fractional_mv_step(
5827 x
, &ref_mv
, cm
->allow_high_precision_mv
, x
->errorperbit
,
5828 &cpi
->fn_ptr
[bsize
], cpi
->sf
.mv
.subpel_force_stop
,
5829 cpi
->sf
.mv
.subpel_iters_per_step
, cond_cost_list(cpi
, cost_list
),
5830 x
->nmvjointcost
, x
->mvcost
, &dis
, &x
->pred_sse
[ref
], NULL
,
5831 #if CONFIG_EXT_INTER
5838 AOMMAX(x
->mv_limits
.col_min
* 8, ref_mv
.col
- MV_MAX
);
5840 AOMMIN(x
->mv_limits
.col_max
* 8, ref_mv
.col
+ MV_MAX
);
5842 AOMMAX(x
->mv_limits
.row_min
* 8, ref_mv
.row
- MV_MAX
);
5844 AOMMIN(x
->mv_limits
.row_max
* 8, ref_mv
.row
+ MV_MAX
);
5846 MV best_mv
= x
->best_mv
.as_mv
;
5848 x
->best_mv
= x
->second_best_mv
;
5849 if (x
->best_mv
.as_mv
.row
* 8 <= maxr
&&
5850 x
->best_mv
.as_mv
.row
* 8 >= minr
&&
5851 x
->best_mv
.as_mv
.col
* 8 <= maxc
&&
5852 x
->best_mv
.as_mv
.col
* 8 >= minc
) {
5853 this_var
= cpi
->find_fractional_mv_step(
5854 x
, &ref_mv
, cm
->allow_high_precision_mv
, x
->errorperbit
,
5855 &cpi
->fn_ptr
[bsize
], cpi
->sf
.mv
.subpel_force_stop
,
5856 cpi
->sf
.mv
.subpel_iters_per_step
,
5857 cond_cost_list(cpi
, cost_list
), x
->nmvjointcost
, x
->mvcost
,
5858 &dis
, &x
->pred_sse
[ref
], NULL
,
5859 #if CONFIG_EXT_INTER
5863 if (this_var
< best_mv_var
) best_mv
= x
->best_mv
.as_mv
;
5864 x
->best_mv
.as_mv
= best_mv
;
5868 // Restore the reference frames.
5869 pd
->pre
[ref_idx
] = backup_pred
;
5871 cpi
->find_fractional_mv_step(
5872 x
, &ref_mv
, cm
->allow_high_precision_mv
, x
->errorperbit
,
5873 &cpi
->fn_ptr
[bsize
], cpi
->sf
.mv
.subpel_force_stop
,
5874 cpi
->sf
.mv
.subpel_iters_per_step
, cond_cost_list(cpi
, cost_list
),
5875 x
->nmvjointcost
, x
->mvcost
, &dis
, &x
->pred_sse
[ref
], NULL
,
5876 #if CONFIG_EXT_INTER
5881 #if CONFIG_MOTION_VAR
5884 av1_find_best_obmc_sub_pixel_tree_up(
5885 cpi
, x
, mi_row
, mi_col
, &x
->best_mv
.as_mv
, &ref_mv
,
5886 cm
->allow_high_precision_mv
, x
->errorperbit
, &cpi
->fn_ptr
[bsize
],
5887 cpi
->sf
.mv
.subpel_force_stop
, cpi
->sf
.mv
.subpel_iters_per_step
,
5888 x
->nmvjointcost
, x
->mvcost
, &dis
, &x
->pred_sse
[ref
], 0,
5889 cpi
->sf
.use_upsampled_references
);
5891 default: assert("Invalid motion mode!\n");
5893 #endif // CONFIG_MOTION_VAR
5895 *rate_mv
= av1_mv_bit_cost(&x
->best_mv
.as_mv
, &ref_mv
, x
->nmvjointcost
,
5896 x
->mvcost
, MV_COST_WEIGHT
);
5898 #if CONFIG_MOTION_VAR
5899 if (cpi
->sf
.adaptive_motion_search
&& mbmi
->motion_mode
== SIMPLE_TRANSLATION
)
5901 if (cpi
->sf
.adaptive_motion_search
)
5902 #endif // CONFIG_MOTION_VAR
5903 x
->pred_mv
[ref
] = x
->best_mv
.as_mv
;
5905 if (scaled_ref_frame
) {
5907 for (i
= 0; i
< MAX_MB_PLANE
; i
++)
5908 xd
->plane
[i
].pre
[ref_idx
] = backup_yv12
[i
];
5912 static INLINE
void restore_dst_buf(MACROBLOCKD
*xd
, BUFFER_SET dst
) {
5914 for (i
= 0; i
< MAX_MB_PLANE
; i
++) {
5915 xd
->plane
[i
].dst
.buf
= dst
.plane
[i
];
5916 xd
->plane
[i
].dst
.stride
= dst
.stride
[i
];
5920 #if CONFIG_EXT_INTER
5921 static void build_second_inter_pred(const AV1_COMP
*cpi
, MACROBLOCK
*x
,
5922 BLOCK_SIZE bsize
, const MV
*other_mv
,
5923 int mi_row
, int mi_col
, const int block
,
5924 int ref_idx
, uint8_t *second_pred
) {
5925 const AV1_COMMON
*const cm
= &cpi
->common
;
5926 const int pw
= block_size_wide
[bsize
];
5927 const int ph
= block_size_high
[bsize
];
5928 MACROBLOCKD
*xd
= &x
->e_mbd
;
5929 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
5930 const int other_ref
= mbmi
->ref_frame
[!ref_idx
];
5931 #if CONFIG_DUAL_FILTER
5932 InterpFilter interp_filter
[2] = {
5933 (ref_idx
== 0) ? mbmi
->interp_filter
[2] : mbmi
->interp_filter
[0],
5934 (ref_idx
== 0) ? mbmi
->interp_filter
[3] : mbmi
->interp_filter
[1]
5937 const InterpFilter interp_filter
= mbmi
->interp_filter
;
5938 #endif // CONFIG_DUAL_FILTER
5939 struct scale_factors sf
;
5940 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5941 struct macroblockd_plane
*const pd
= &xd
->plane
[0];
5942 // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
5943 const int ic
= block
& 1;
5944 const int ir
= (block
- ic
) >> 1;
5945 const int p_col
= ((mi_col
* MI_SIZE
) >> pd
->subsampling_x
) + 4 * ic
;
5946 const int p_row
= ((mi_row
* MI_SIZE
) >> pd
->subsampling_y
) + 4 * ir
;
5947 #if CONFIG_GLOBAL_MOTION
5948 WarpedMotionParams
*const wm
= &xd
->global_motion
[other_ref
];
5949 int is_global
= is_global_mv_block(xd
->mi
[0], block
, wm
->wmtype
);
5950 #endif // CONFIG_GLOBAL_MOTION
5953 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5955 // This function should only ever be called for compound modes
5956 assert(has_second_ref(mbmi
));
5958 struct buf_2d backup_yv12
[MAX_MB_PLANE
];
5959 const YV12_BUFFER_CONFIG
*const scaled_ref_frame
=
5960 av1_get_scaled_ref_frame(cpi
, other_ref
);
5962 if (scaled_ref_frame
) {
5964 // Swap out the reference frame for a version that's been scaled to
5965 // match the resolution of the current frame, allowing the existing
5966 // motion search code to be used without additional modifications.
5967 for (i
= 0; i
< MAX_MB_PLANE
; i
++)
5968 backup_yv12
[i
] = xd
->plane
[i
].pre
[!ref_idx
];
5969 av1_setup_pre_planes(xd
, !ref_idx
, scaled_ref_frame
, mi_row
, mi_col
, NULL
);
5972 // Since we have scaled the reference frames to match the size of the current
5973 // frame we must use a unit scaling factor during mode selection.
5974 #if CONFIG_HIGHBITDEPTH
5975 av1_setup_scale_factors_for_frame(&sf
, cm
->width
, cm
->height
, cm
->width
,
5976 cm
->height
, cm
->use_highbitdepth
);
5978 av1_setup_scale_factors_for_frame(&sf
, cm
->width
, cm
->height
, cm
->width
,
5980 #endif // CONFIG_HIGHBITDEPTH
5982 struct buf_2d ref_yv12
;
5984 const int plane
= 0;
5985 ConvolveParams conv_params
= get_conv_params(0, plane
);
5986 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5987 WarpTypesAllowed warp_types
;
5988 #if CONFIG_GLOBAL_MOTION
5989 warp_types
.global_warp_allowed
= is_global
;
5990 #endif // CONFIG_GLOBAL_MOTION
5991 #if CONFIG_WARPED_MOTION
5992 warp_types
.local_warp_allowed
= mbmi
->motion_mode
== WARPED_CAUSAL
;
5993 #endif // CONFIG_WARPED_MOTION
5994 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5996 // Initialized here because of compiler problem in Visual Studio.
5997 ref_yv12
= xd
->plane
[plane
].pre
[!ref_idx
];
5999 // Get the prediction block from the 'other' reference frame.
6000 #if CONFIG_HIGHBITDEPTH
6001 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
6002 av1_highbd_build_inter_predictor(
6003 ref_yv12
.buf
, ref_yv12
.stride
, second_pred
, pw
, other_mv
, &sf
, pw
, ph
,
6005 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6006 &warp_types
, p_col
, p_row
,
6007 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6008 plane
, MV_PRECISION_Q3
, mi_col
* MI_SIZE
, mi_row
* MI_SIZE
, xd
);
6010 #endif // CONFIG_HIGHBITDEPTH
6011 av1_build_inter_predictor(
6012 ref_yv12
.buf
, ref_yv12
.stride
, second_pred
, pw
, other_mv
, &sf
, pw
, ph
,
6013 &conv_params
, interp_filter
,
6014 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6015 &warp_types
, p_col
, p_row
, plane
, !ref_idx
,
6016 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6017 MV_PRECISION_Q3
, mi_col
* MI_SIZE
, mi_row
* MI_SIZE
, xd
);
6018 #if CONFIG_HIGHBITDEPTH
6020 #endif // CONFIG_HIGHBITDEPTH
6022 if (scaled_ref_frame
) {
6023 // Restore the prediction frame pointers to their unscaled versions.
6025 for (i
= 0; i
< MAX_MB_PLANE
; i
++)
6026 xd
->plane
[i
].pre
[!ref_idx
] = backup_yv12
[i
];
6030 // Search for the best mv for one component of a compound,
6031 // given that the other component is fixed.
6032 static void compound_single_motion_search(
6033 const AV1_COMP
*cpi
, MACROBLOCK
*x
, BLOCK_SIZE bsize
, MV
*this_mv
,
6034 int mi_row
, int mi_col
, const uint8_t *second_pred
, const uint8_t *mask
,
6035 int mask_stride
, int *rate_mv
, const int block
, int ref_idx
) {
6036 const int pw
= block_size_wide
[bsize
];
6037 const int ph
= block_size_high
[bsize
];
6038 MACROBLOCKD
*xd
= &x
->e_mbd
;
6039 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
6040 const int ref
= mbmi
->ref_frame
[ref_idx
];
6041 int_mv ref_mv
= x
->mbmi_ext
->ref_mvs
[ref
][0];
6042 struct macroblockd_plane
*const pd
= &xd
->plane
[0];
6044 struct buf_2d backup_yv12
[MAX_MB_PLANE
];
6045 const YV12_BUFFER_CONFIG
*const scaled_ref_frame
=
6046 av1_get_scaled_ref_frame(cpi
, ref
);
6048 // Check that this is either an interinter or an interintra block
6049 assert(has_second_ref(mbmi
) ||
6050 (ref_idx
== 0 && mbmi
->ref_frame
[1] == INTRA_FRAME
));
6052 if (scaled_ref_frame
) {
6054 // Swap out the reference frame for a version that's been scaled to
6055 // match the resolution of the current frame, allowing the existing
6056 // motion search code to be used without additional modifications.
6057 for (i
= 0; i
< MAX_MB_PLANE
; i
++)
6058 backup_yv12
[i
] = xd
->plane
[i
].pre
[ref_idx
];
6059 av1_setup_pre_planes(xd
, ref_idx
, scaled_ref_frame
, mi_row
, mi_col
, NULL
);
6062 struct buf_2d orig_yv12
;
6063 int bestsme
= INT_MAX
;
6064 int sadpb
= x
->sadperbit16
;
6065 MV
*const best_mv
= &x
->best_mv
.as_mv
;
6066 int search_range
= 3;
6068 MvLimits tmp_mv_limits
= x
->mv_limits
;
6070 // Initialized here because of compiler problem in Visual Studio.
6072 orig_yv12
= pd
->pre
[0];
6073 pd
->pre
[0] = pd
->pre
[ref_idx
];
6076 // Do compound motion search on the current reference frame.
6077 av1_set_mv_search_range(&x
->mv_limits
, &ref_mv
.as_mv
);
6079 // Use the mv result from the single mode as mv predictor.
6080 *best_mv
= *this_mv
;
6085 av1_set_mvcost(x
, ref
, ref_idx
, mbmi
->ref_mv_idx
);
6087 // Small-range full-pixel motion search.
6088 bestsme
= av1_refining_search_8p_c(x
, sadpb
, search_range
,
6089 &cpi
->fn_ptr
[bsize
], mask
, mask_stride
,
6090 ref_idx
, &ref_mv
.as_mv
, second_pred
);
6091 if (bestsme
< INT_MAX
) {
6094 av1_get_mvpred_mask_var(x
, best_mv
, &ref_mv
.as_mv
, second_pred
, mask
,
6095 mask_stride
, ref_idx
, &cpi
->fn_ptr
[bsize
], 1);
6097 bestsme
= av1_get_mvpred_av_var(x
, best_mv
, &ref_mv
.as_mv
, second_pred
,
6098 &cpi
->fn_ptr
[bsize
], 1);
6101 x
->mv_limits
= tmp_mv_limits
;
6103 if (bestsme
< INT_MAX
) {
6104 int dis
; /* TODO: use dis in distortion calculation later. */
6106 if (cpi
->sf
.use_upsampled_references
) {
6107 // Use up-sampled reference frames.
6108 struct buf_2d backup_pred
= pd
->pre
[0];
6109 const YV12_BUFFER_CONFIG
*upsampled_ref
= get_upsampled_ref(cpi
, ref
);
6111 // Set pred for Y plane
6112 setup_pred_plane(&pd
->pre
[0], bsize
, upsampled_ref
->y_buffer
,
6113 upsampled_ref
->y_crop_width
,
6114 upsampled_ref
->y_crop_height
, upsampled_ref
->y_stride
,
6115 (mi_row
<< 3), (mi_col
<< 3), NULL
, pd
->subsampling_x
,
6118 // If bsize < BLOCK_8X8, adjust pred pointer for this block
6120 if (bsize
< BLOCK_8X8
)
6122 &pd
->pre
[0].buf
[(av1_raster_block_offset(BLOCK_8X8
, block
,
6125 #endif // !CONFIG_CB4X4
6127 bestsme
= cpi
->find_fractional_mv_step(
6128 x
, &ref_mv
.as_mv
, cpi
->common
.allow_high_precision_mv
, x
->errorperbit
,
6129 &cpi
->fn_ptr
[bsize
], 0, cpi
->sf
.mv
.subpel_iters_per_step
, NULL
,
6130 x
->nmvjointcost
, x
->mvcost
, &dis
, &sse
, second_pred
, mask
,
6131 mask_stride
, ref_idx
, pw
, ph
, 1);
6133 // Restore the reference frames.
6134 pd
->pre
[0] = backup_pred
;
6137 bestsme
= cpi
->find_fractional_mv_step(
6138 x
, &ref_mv
.as_mv
, cpi
->common
.allow_high_precision_mv
, x
->errorperbit
,
6139 &cpi
->fn_ptr
[bsize
], 0, cpi
->sf
.mv
.subpel_iters_per_step
, NULL
,
6140 x
->nmvjointcost
, x
->mvcost
, &dis
, &sse
, second_pred
, mask
,
6141 mask_stride
, ref_idx
, pw
, ph
, 0);
6145 // Restore the pointer to the first (possibly scaled) prediction buffer.
6146 if (ref_idx
) pd
->pre
[0] = orig_yv12
;
6148 if (bestsme
< INT_MAX
) *this_mv
= *best_mv
;
6152 if (scaled_ref_frame
) {
6153 // Restore the prediction frame pointers to their unscaled versions.
6155 for (i
= 0; i
< MAX_MB_PLANE
; i
++)
6156 xd
->plane
[i
].pre
[ref_idx
] = backup_yv12
[i
];
6159 av1_set_mvcost(x
, ref
, ref_idx
, mbmi
->ref_mv_idx
);
6160 *rate_mv
+= av1_mv_bit_cost(this_mv
, &ref_mv
.as_mv
, x
->nmvjointcost
,
6161 x
->mvcost
, MV_COST_WEIGHT
);
6164 // Wrapper for compound_single_motion_search, for the common case
6165 // where the second prediction is also an inter mode.
6166 static void compound_single_motion_search_interinter(
6167 const AV1_COMP
*cpi
, MACROBLOCK
*x
, BLOCK_SIZE bsize
, int_mv
*frame_mv
,
6168 int mi_row
, int mi_col
, const uint8_t *mask
, int mask_stride
, int *rate_mv
,
6169 const int block
, int ref_idx
) {
6170 MACROBLOCKD
*xd
= &x
->e_mbd
;
6171 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
6173 // This function should only ever be called for compound modes
6174 assert(has_second_ref(mbmi
));
6176 // Prediction buffer from second frame.
6177 #if CONFIG_HIGHBITDEPTH
6178 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16
[MAX_SB_SQUARE
]);
6179 uint8_t *second_pred
;
6180 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
)
6181 second_pred
= CONVERT_TO_BYTEPTR(second_pred_alloc_16
);
6183 second_pred
= (uint8_t *)second_pred_alloc_16
;
6185 DECLARE_ALIGNED(16, uint8_t, second_pred
[MAX_SB_SQUARE
]);
6186 #endif // CONFIG_HIGHBITDEPTH
6188 MV
*this_mv
= &frame_mv
[mbmi
->ref_frame
[ref_idx
]].as_mv
;
6189 const MV
*other_mv
= &frame_mv
[mbmi
->ref_frame
[!ref_idx
]].as_mv
;
6191 build_second_inter_pred(cpi
, x
, bsize
, other_mv
, mi_row
, mi_col
, block
,
6192 ref_idx
, second_pred
);
6194 compound_single_motion_search(cpi
, x
, bsize
, this_mv
, mi_row
, mi_col
,
6195 second_pred
, mask
, mask_stride
, rate_mv
, block
,
6199 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6200 static void do_masked_motion_search_indexed(
6201 const AV1_COMP
*const cpi
, MACROBLOCK
*x
, const int_mv
*const cur_mv
,
6202 const INTERINTER_COMPOUND_DATA
*const comp_data
, BLOCK_SIZE bsize
,
6203 int mi_row
, int mi_col
, int_mv
*tmp_mv
, int *rate_mv
, int which
) {
6204 // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
6205 MACROBLOCKD
*xd
= &x
->e_mbd
;
6206 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
6207 BLOCK_SIZE sb_type
= mbmi
->sb_type
;
6208 const uint8_t *mask
;
6209 const int mask_stride
= block_size_wide
[bsize
];
6211 mask
= av1_get_compound_type_mask(comp_data
, sb_type
);
6213 int_mv frame_mv
[TOTAL_REFS_PER_FRAME
];
6214 MV_REFERENCE_FRAME rf
[2] = { mbmi
->ref_frame
[0], mbmi
->ref_frame
[1] };
6215 assert(bsize
>= BLOCK_8X8
|| CONFIG_CB4X4
);
6217 frame_mv
[rf
[0]].as_int
= cur_mv
[0].as_int
;
6218 frame_mv
[rf
[1]].as_int
= cur_mv
[1].as_int
;
6219 if (which
== 0 || which
== 1) {
6220 compound_single_motion_search_interinter(cpi
, x
, bsize
, frame_mv
, mi_row
,
6221 mi_col
, mask
, mask_stride
, rate_mv
,
6223 } else if (which
== 2) {
6224 joint_motion_search(cpi
, x
, bsize
, frame_mv
, mi_row
, mi_col
, NULL
, mask
,
6225 mask_stride
, rate_mv
, 0);
6227 tmp_mv
[0].as_int
= frame_mv
[rf
[0]].as_int
;
6228 tmp_mv
[1].as_int
= frame_mv
[rf
[1]].as_int
;
6230 #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6231 #endif // CONFIG_EXT_INTER
6233 // In some situations we want to discount tha pparent cost of a new motion
6234 // vector. Where there is a subtle motion field and especially where there is
6235 // low spatial complexity then it can be hard to cover the cost of a new motion
6236 // vector in a single block, even if that motion vector reduces distortion.
6237 // However, once established that vector may be usable through the nearest and
6238 // near mv modes to reduce distortion in subsequent blocks and also improve
6240 static int discount_newmv_test(const AV1_COMP
*const cpi
, int this_mode
,
6242 int_mv (*mode_mv
)[TOTAL_REFS_PER_FRAME
],
6244 return (!cpi
->rc
.is_src_frame_alt_ref
&& (this_mode
== NEWMV
) &&
6245 (this_mv
.as_int
!= 0) &&
6246 ((mode_mv
[NEARESTMV
][ref_frame
].as_int
== 0) ||
6247 (mode_mv
[NEARESTMV
][ref_frame
].as_int
== INVALID_MV
)) &&
6248 ((mode_mv
[NEARMV
][ref_frame
].as_int
== 0) ||
6249 (mode_mv
[NEARMV
][ref_frame
].as_int
== INVALID_MV
)));
6252 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
6253 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
6255 // TODO(jingning): this mv clamping function should be block size dependent.
6256 static INLINE
void clamp_mv2(MV
*mv
, const MACROBLOCKD
*xd
) {
6257 clamp_mv(mv
, xd
->mb_to_left_edge
- LEFT_TOP_MARGIN
,
6258 xd
->mb_to_right_edge
+ RIGHT_BOTTOM_MARGIN
,
6259 xd
->mb_to_top_edge
- LEFT_TOP_MARGIN
,
6260 xd
->mb_to_bottom_edge
+ RIGHT_BOTTOM_MARGIN
);
6263 #if CONFIG_EXT_INTER
6265 static int estimate_wedge_sign(const AV1_COMP
*cpi
, const MACROBLOCK
*x
,
6266 const BLOCK_SIZE bsize
, const uint8_t *pred0
,
6267 int stride0
, const uint8_t *pred1
, int stride1
) {
6268 const struct macroblock_plane
*const p
= &x
->plane
[0];
6269 const uint8_t *src
= p
->src
.buf
;
6270 int src_stride
= p
->src
.stride
;
6271 const int f_index
= bsize
- BLOCK_8X8
;
6272 const int bw
= block_size_wide
[bsize
];
6273 const int bh
= block_size_high
[bsize
];
6277 #if CONFIG_HIGHBITDEPTH
6278 if (x
->e_mbd
.cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
6279 pred0
= CONVERT_TO_BYTEPTR(pred0
);
6280 pred1
= CONVERT_TO_BYTEPTR(pred1
);
6282 #endif // CONFIG_HIGHBITDEPTH
6284 cpi
->fn_ptr
[f_index
].vf(src
, src_stride
, pred0
, stride0
, &esq
[0][0]);
6285 cpi
->fn_ptr
[f_index
].vf(src
+ bw
/ 2, src_stride
, pred0
+ bw
/ 2, stride0
,
6287 cpi
->fn_ptr
[f_index
].vf(src
+ bh
/ 2 * src_stride
, src_stride
,
6288 pred0
+ bh
/ 2 * stride0
, stride0
, &esq
[0][2]);
6289 cpi
->fn_ptr
[f_index
].vf(src
+ bh
/ 2 * src_stride
+ bw
/ 2, src_stride
,
6290 pred0
+ bh
/ 2 * stride0
+ bw
/ 2, stride0
,
6292 cpi
->fn_ptr
[f_index
].vf(src
, src_stride
, pred1
, stride1
, &esq
[1][0]);
6293 cpi
->fn_ptr
[f_index
].vf(src
+ bw
/ 2, src_stride
, pred1
+ bw
/ 2, stride1
,
6295 cpi
->fn_ptr
[f_index
].vf(src
+ bh
/ 2 * src_stride
, src_stride
,
6296 pred1
+ bh
/ 2 * stride1
, stride0
, &esq
[1][2]);
6297 cpi
->fn_ptr
[f_index
].vf(src
+ bh
/ 2 * src_stride
+ bw
/ 2, src_stride
,
6298 pred1
+ bh
/ 2 * stride1
+ bw
/ 2, stride0
,
6301 tl
= (int64_t)(esq
[0][0] + esq
[0][1] + esq
[0][2]) -
6302 (int64_t)(esq
[1][0] + esq
[1][1] + esq
[1][2]);
6303 br
= (int64_t)(esq
[1][3] + esq
[1][1] + esq
[1][2]) -
6304 (int64_t)(esq
[0][3] + esq
[0][1] + esq
[0][2]);
6305 return (tl
+ br
> 0);
6307 #endif // CONFIG_WEDGE
6308 #endif // CONFIG_EXT_INTER
6310 #if !CONFIG_DUAL_FILTER
6311 static InterpFilter
predict_interp_filter(
6312 const AV1_COMP
*cpi
, const MACROBLOCK
*x
, const BLOCK_SIZE bsize
,
6313 const int mi_row
, const int mi_col
,
6314 InterpFilter (*single_filter
)[TOTAL_REFS_PER_FRAME
]) {
6315 InterpFilter best_filter
= SWITCHABLE
;
6316 const AV1_COMMON
*cm
= &cpi
->common
;
6317 const MACROBLOCKD
*xd
= &x
->e_mbd
;
6318 int bsl
= mi_width_log2_lookup
[bsize
];
6319 int pred_filter_search
=
6320 cpi
->sf
.cb_pred_filter_search
6321 ? (((mi_row
+ mi_col
) >> bsl
) +
6322 get_chessboard_index(cm
->current_video_frame
)) &
6325 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
6326 const int is_comp_pred
= has_second_ref(mbmi
);
6327 const int this_mode
= mbmi
->mode
;
6328 int refs
[2] = { mbmi
->ref_frame
[0],
6329 (mbmi
->ref_frame
[1] < 0 ? 0 : mbmi
->ref_frame
[1]) };
6330 if (pred_filter_search
) {
6331 InterpFilter af
= SWITCHABLE
, lf
= SWITCHABLE
;
6332 if (xd
->up_available
) af
= xd
->mi
[-xd
->mi_stride
]->mbmi
.interp_filter
;
6333 if (xd
->left_available
) lf
= xd
->mi
[-1]->mbmi
.interp_filter
;
6335 #if CONFIG_EXT_INTER
6336 if ((this_mode
!= NEWMV
&& this_mode
!= NEW_NEWMV
) || (af
== lf
))
6338 if ((this_mode
!= NEWMV
) || (af
== lf
))
6339 #endif // CONFIG_EXT_INTER
6343 if (cpi
->sf
.adaptive_mode_search
) {
6344 #if CONFIG_EXT_INTER
6345 switch (this_mode
) {
6346 case NEAREST_NEARESTMV
:
6347 if (single_filter
[NEARESTMV
][refs
[0]] ==
6348 single_filter
[NEARESTMV
][refs
[1]])
6349 best_filter
= single_filter
[NEARESTMV
][refs
[0]];
6352 if (single_filter
[NEARMV
][refs
[0]] == single_filter
[NEARMV
][refs
[1]])
6353 best_filter
= single_filter
[NEARMV
][refs
[0]];
6356 if (single_filter
[ZEROMV
][refs
[0]] == single_filter
[ZEROMV
][refs
[1]])
6357 best_filter
= single_filter
[ZEROMV
][refs
[0]];
6360 if (single_filter
[NEWMV
][refs
[0]] == single_filter
[NEWMV
][refs
[1]])
6361 best_filter
= single_filter
[NEWMV
][refs
[0]];
6364 if (single_filter
[NEARESTMV
][refs
[0]] ==
6365 single_filter
[NEWMV
][refs
[1]])
6366 best_filter
= single_filter
[NEARESTMV
][refs
[0]];
6369 if (single_filter
[NEARMV
][refs
[0]] == single_filter
[NEWMV
][refs
[1]])
6370 best_filter
= single_filter
[NEARMV
][refs
[0]];
6373 if (single_filter
[NEWMV
][refs
[0]] ==
6374 single_filter
[NEARESTMV
][refs
[1]])
6375 best_filter
= single_filter
[NEWMV
][refs
[0]];
6378 if (single_filter
[NEWMV
][refs
[0]] == single_filter
[NEARMV
][refs
[1]])
6379 best_filter
= single_filter
[NEWMV
][refs
[0]];
6382 if (single_filter
[this_mode
][refs
[0]] ==
6383 single_filter
[this_mode
][refs
[1]])
6384 best_filter
= single_filter
[this_mode
][refs
[0]];
6388 if (single_filter
[this_mode
][refs
[0]] ==
6389 single_filter
[this_mode
][refs
[1]])
6390 best_filter
= single_filter
[this_mode
][refs
[0]];
6391 #endif // CONFIG_EXT_INTER
6394 if (x
->source_variance
< cpi
->sf
.disable_filter_search_var_thresh
) {
6395 best_filter
= EIGHTTAP_REGULAR
;
6399 #endif // !CONFIG_DUAL_FILTER
6401 #if CONFIG_EXT_INTER
6402 // Choose the best wedge index and sign
6404 static int64_t pick_wedge(const AV1_COMP
*const cpi
, const MACROBLOCK
*const x
,
6405 const BLOCK_SIZE bsize
, const uint8_t *const p0
,
6406 const uint8_t *const p1
, int *const best_wedge_sign
,
6407 int *const best_wedge_index
) {
6408 const MACROBLOCKD
*const xd
= &x
->e_mbd
;
6409 const struct buf_2d
*const src
= &x
->plane
[0].src
;
6410 const int bw
= block_size_wide
[bsize
];
6411 const int bh
= block_size_high
[bsize
];
6412 const int N
= bw
* bh
;
6415 int64_t rd
, best_rd
= INT64_MAX
;
6418 int wedge_types
= (1 << get_wedge_bits_lookup(bsize
));
6419 const uint8_t *mask
;
6421 #if CONFIG_HIGHBITDEPTH
6422 const int hbd
= xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
;
6423 const int bd_round
= hbd
? (xd
->bd
- 8) * 2 : 0;
6425 const int bd_round
= 0;
6426 #endif // CONFIG_HIGHBITDEPTH
6428 DECLARE_ALIGNED(32, int16_t, r0
[MAX_SB_SQUARE
]);
6429 DECLARE_ALIGNED(32, int16_t, r1
[MAX_SB_SQUARE
]);
6430 DECLARE_ALIGNED(32, int16_t, d10
[MAX_SB_SQUARE
]);
6431 DECLARE_ALIGNED(32, int16_t, ds
[MAX_SB_SQUARE
]);
6435 #if CONFIG_HIGHBITDEPTH
6437 aom_highbd_subtract_block(bh
, bw
, r0
, bw
, src
->buf
, src
->stride
,
6438 CONVERT_TO_BYTEPTR(p0
), bw
, xd
->bd
);
6439 aom_highbd_subtract_block(bh
, bw
, r1
, bw
, src
->buf
, src
->stride
,
6440 CONVERT_TO_BYTEPTR(p1
), bw
, xd
->bd
);
6441 aom_highbd_subtract_block(bh
, bw
, d10
, bw
, CONVERT_TO_BYTEPTR(p1
), bw
,
6442 CONVERT_TO_BYTEPTR(p0
), bw
, xd
->bd
);
6444 #endif // CONFIG_HIGHBITDEPTH
6446 aom_subtract_block(bh
, bw
, r0
, bw
, src
->buf
, src
->stride
, p0
, bw
);
6447 aom_subtract_block(bh
, bw
, r1
, bw
, src
->buf
, src
->stride
, p1
, bw
);
6448 aom_subtract_block(bh
, bw
, d10
, bw
, p1
, bw
, p0
, bw
);
6451 sign_limit
= ((int64_t)aom_sum_squares_i16(r0
, N
) -
6452 (int64_t)aom_sum_squares_i16(r1
, N
)) *
6453 (1 << WEDGE_WEIGHT_BITS
) / 2;
6456 av1_wedge_compute_delta_squares_c(ds
, r0
, r1
, N
);
6458 av1_wedge_compute_delta_squares(ds
, r0
, r1
, N
);
6460 for (wedge_index
= 0; wedge_index
< wedge_types
; ++wedge_index
) {
6461 mask
= av1_get_contiguous_soft_mask(wedge_index
, 0, bsize
);
6463 // TODO(jingning): Make sse2 functions support N = 16 case
6465 wedge_sign
= av1_wedge_sign_from_residuals_c(ds
, mask
, N
, sign_limit
);
6467 wedge_sign
= av1_wedge_sign_from_residuals(ds
, mask
, N
, sign_limit
);
6469 mask
= av1_get_contiguous_soft_mask(wedge_index
, wedge_sign
, bsize
);
6471 sse
= av1_wedge_sse_from_residuals_c(r1
, d10
, mask
, N
);
6473 sse
= av1_wedge_sse_from_residuals(r1
, d10
, mask
, N
);
6474 sse
= ROUND_POWER_OF_TWO(sse
, bd_round
);
6476 model_rd_from_sse(cpi
, xd
, bsize
, 0, sse
, &rate
, &dist
);
6477 rd
= RDCOST(x
->rdmult
, x
->rddiv
, rate
, dist
);
6480 *best_wedge_index
= wedge_index
;
6481 *best_wedge_sign
= wedge_sign
;
6489 // Choose the best wedge index the specified sign
6490 static int64_t pick_wedge_fixed_sign(
6491 const AV1_COMP
*const cpi
, const MACROBLOCK
*const x
,
6492 const BLOCK_SIZE bsize
, const uint8_t *const p0
, const uint8_t *const p1
,
6493 const int wedge_sign
, int *const best_wedge_index
) {
6494 const MACROBLOCKD
*const xd
= &x
->e_mbd
;
6495 const struct buf_2d
*const src
= &x
->plane
[0].src
;
6496 const int bw
= block_size_wide
[bsize
];
6497 const int bh
= block_size_high
[bsize
];
6498 const int N
= bw
* bh
;
6501 int64_t rd
, best_rd
= INT64_MAX
;
6503 int wedge_types
= (1 << get_wedge_bits_lookup(bsize
));
6504 const uint8_t *mask
;
6506 #if CONFIG_HIGHBITDEPTH
6507 const int hbd
= xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
;
6508 const int bd_round
= hbd
? (xd
->bd
- 8) * 2 : 0;
6510 const int bd_round
= 0;
6511 #endif // CONFIG_HIGHBITDEPTH
6513 DECLARE_ALIGNED(32, int16_t, r1
[MAX_SB_SQUARE
]);
6514 DECLARE_ALIGNED(32, int16_t, d10
[MAX_SB_SQUARE
]);
6516 #if CONFIG_HIGHBITDEPTH
6518 aom_highbd_subtract_block(bh
, bw
, r1
, bw
, src
->buf
, src
->stride
,
6519 CONVERT_TO_BYTEPTR(p1
), bw
, xd
->bd
);
6520 aom_highbd_subtract_block(bh
, bw
, d10
, bw
, CONVERT_TO_BYTEPTR(p1
), bw
,
6521 CONVERT_TO_BYTEPTR(p0
), bw
, xd
->bd
);
6523 #endif // CONFIG_HIGHBITDEPTH
6525 aom_subtract_block(bh
, bw
, r1
, bw
, src
->buf
, src
->stride
, p1
, bw
);
6526 aom_subtract_block(bh
, bw
, d10
, bw
, p1
, bw
, p0
, bw
);
6529 for (wedge_index
= 0; wedge_index
< wedge_types
; ++wedge_index
) {
6530 mask
= av1_get_contiguous_soft_mask(wedge_index
, wedge_sign
, bsize
);
6532 sse
= av1_wedge_sse_from_residuals_c(r1
, d10
, mask
, N
);
6534 sse
= av1_wedge_sse_from_residuals(r1
, d10
, mask
, N
);
6535 sse
= ROUND_POWER_OF_TWO(sse
, bd_round
);
6537 model_rd_from_sse(cpi
, xd
, bsize
, 0, sse
, &rate
, &dist
);
6538 rd
= RDCOST(x
->rdmult
, x
->rddiv
, rate
, dist
);
6541 *best_wedge_index
= wedge_index
;
6549 static int64_t pick_interinter_wedge(const AV1_COMP
*const cpi
,
6550 MACROBLOCK
*const x
,
6551 const BLOCK_SIZE bsize
,
6552 const uint8_t *const p0
,
6553 const uint8_t *const p1
) {
6554 MACROBLOCKD
*const xd
= &x
->e_mbd
;
6555 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
6556 const int bw
= block_size_wide
[bsize
];
6559 int wedge_index
= -1;
6562 assert(is_interinter_compound_used(COMPOUND_WEDGE
, bsize
));
6563 assert(cpi
->common
.allow_masked_compound
);
6565 if (cpi
->sf
.fast_wedge_sign_estimate
) {
6566 wedge_sign
= estimate_wedge_sign(cpi
, x
, bsize
, p0
, bw
, p1
, bw
);
6567 rd
= pick_wedge_fixed_sign(cpi
, x
, bsize
, p0
, p1
, wedge_sign
, &wedge_index
);
6569 rd
= pick_wedge(cpi
, x
, bsize
, p0
, p1
, &wedge_sign
, &wedge_index
);
6572 mbmi
->wedge_sign
= wedge_sign
;
6573 mbmi
->wedge_index
= wedge_index
;
6576 #endif // CONFIG_WEDGE
6578 #if CONFIG_COMPOUND_SEGMENT
6579 static int64_t pick_interinter_seg(const AV1_COMP
*const cpi
,
6580 MACROBLOCK
*const x
, const BLOCK_SIZE bsize
,
6581 const uint8_t *const p0
,
6582 const uint8_t *const p1
) {
6583 MACROBLOCKD
*const xd
= &x
->e_mbd
;
6584 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
6585 const struct buf_2d
*const src
= &x
->plane
[0].src
;
6586 const int bw
= block_size_wide
[bsize
];
6587 const int bh
= block_size_high
[bsize
];
6588 const int N
= bw
* bh
;
6593 SEG_MASK_TYPE cur_mask_type
;
6594 int64_t best_rd
= INT64_MAX
;
6595 SEG_MASK_TYPE best_mask_type
= 0;
6596 #if CONFIG_HIGHBITDEPTH
6597 const int hbd
= xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
;
6598 const int bd_round
= hbd
? (xd
->bd
- 8) * 2 : 0;
6600 const int bd_round
= 0;
6601 #endif // CONFIG_HIGHBITDEPTH
6602 DECLARE_ALIGNED(32, int16_t, r0
[MAX_SB_SQUARE
]);
6603 DECLARE_ALIGNED(32, int16_t, r1
[MAX_SB_SQUARE
]);
6604 DECLARE_ALIGNED(32, int16_t, d10
[MAX_SB_SQUARE
]);
6606 #if CONFIG_HIGHBITDEPTH
6608 aom_highbd_subtract_block(bh
, bw
, r0
, bw
, src
->buf
, src
->stride
,
6609 CONVERT_TO_BYTEPTR(p0
), bw
, xd
->bd
);
6610 aom_highbd_subtract_block(bh
, bw
, r1
, bw
, src
->buf
, src
->stride
,
6611 CONVERT_TO_BYTEPTR(p1
), bw
, xd
->bd
);
6612 aom_highbd_subtract_block(bh
, bw
, d10
, bw
, CONVERT_TO_BYTEPTR(p1
), bw
,
6613 CONVERT_TO_BYTEPTR(p0
), bw
, xd
->bd
);
6615 #endif // CONFIG_HIGHBITDEPTH
6617 aom_subtract_block(bh
, bw
, r0
, bw
, src
->buf
, src
->stride
, p0
, bw
);
6618 aom_subtract_block(bh
, bw
, r1
, bw
, src
->buf
, src
->stride
, p1
, bw
);
6619 aom_subtract_block(bh
, bw
, d10
, bw
, p1
, bw
, p0
, bw
);
6622 // try each mask type and its inverse
6623 for (cur_mask_type
= 0; cur_mask_type
< SEG_MASK_TYPES
; cur_mask_type
++) {
6624 // build mask and inverse
6625 #if CONFIG_HIGHBITDEPTH
6627 build_compound_seg_mask_highbd(
6628 xd
->seg_mask
, cur_mask_type
, CONVERT_TO_BYTEPTR(p0
), bw
,
6629 CONVERT_TO_BYTEPTR(p1
), bw
, bsize
, bh
, bw
, xd
->bd
);
6631 #endif // CONFIG_HIGHBITDEPTH
6632 build_compound_seg_mask(xd
->seg_mask
, cur_mask_type
, p0
, bw
, p1
, bw
,
6635 // compute rd for mask
6636 sse
= av1_wedge_sse_from_residuals(r1
, d10
, xd
->seg_mask
, N
);
6637 sse
= ROUND_POWER_OF_TWO(sse
, bd_round
);
6639 model_rd_from_sse(cpi
, xd
, bsize
, 0, sse
, &rate
, &dist
);
6640 rd0
= RDCOST(x
->rdmult
, x
->rddiv
, rate
, dist
);
6642 if (rd0
< best_rd
) {
6643 best_mask_type
= cur_mask_type
;
6649 mbmi
->mask_type
= best_mask_type
;
6650 #if CONFIG_HIGHBITDEPTH
6652 build_compound_seg_mask_highbd(
6653 xd
->seg_mask
, mbmi
->mask_type
, CONVERT_TO_BYTEPTR(p0
), bw
,
6654 CONVERT_TO_BYTEPTR(p1
), bw
, bsize
, bh
, bw
, xd
->bd
);
6656 #endif // CONFIG_HIGHBITDEPTH
6657 build_compound_seg_mask(xd
->seg_mask
, mbmi
->mask_type
, p0
, bw
, p1
, bw
,
6662 #endif // CONFIG_COMPOUND_SEGMENT
6664 #if CONFIG_WEDGE && CONFIG_INTERINTRA
6665 static int64_t pick_interintra_wedge(const AV1_COMP
*const cpi
,
6666 const MACROBLOCK
*const x
,
6667 const BLOCK_SIZE bsize
,
6668 const uint8_t *const p0
,
6669 const uint8_t *const p1
) {
6670 const MACROBLOCKD
*const xd
= &x
->e_mbd
;
6671 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
6674 int wedge_index
= -1;
6676 assert(is_interintra_wedge_used(bsize
));
6677 assert(cpi
->common
.allow_interintra_compound
);
6679 rd
= pick_wedge_fixed_sign(cpi
, x
, bsize
, p0
, p1
, 0, &wedge_index
);
6681 mbmi
->interintra_wedge_sign
= 0;
6682 mbmi
->interintra_wedge_index
= wedge_index
;
6685 #endif // CONFIG_WEDGE && CONFIG_INTERINTRA
6687 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6688 static int64_t pick_interinter_mask(const AV1_COMP
*const cpi
, MACROBLOCK
*x
,
6689 const BLOCK_SIZE bsize
,
6690 const uint8_t *const p0
,
6691 const uint8_t *const p1
) {
6692 const COMPOUND_TYPE compound_type
=
6693 x
->e_mbd
.mi
[0]->mbmi
.interinter_compound_type
;
6694 switch (compound_type
) {
6696 case COMPOUND_WEDGE
: return pick_interinter_wedge(cpi
, x
, bsize
, p0
, p1
);
6697 #endif // CONFIG_WEDGE
6698 #if CONFIG_COMPOUND_SEGMENT
6699 case COMPOUND_SEG
: return pick_interinter_seg(cpi
, x
, bsize
, p0
, p1
);
6700 #endif // CONFIG_COMPOUND_SEGMENT
6701 default: assert(0); return 0;
6705 static int interinter_compound_motion_search(
6706 const AV1_COMP
*const cpi
, MACROBLOCK
*x
, const int_mv
*const cur_mv
,
6707 const BLOCK_SIZE bsize
, const int this_mode
, int mi_row
, int mi_col
) {
6708 MACROBLOCKD
*const xd
= &x
->e_mbd
;
6709 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
6711 int tmp_rate_mv
= 0;
6712 const INTERINTER_COMPOUND_DATA compound_data
= {
6716 #endif // CONFIG_WEDGE
6717 #if CONFIG_COMPOUND_SEGMENT
6720 #endif // CONFIG_COMPOUND_SEGMENT
6721 mbmi
->interinter_compound_type
6723 if (this_mode
== NEW_NEWMV
) {
6724 do_masked_motion_search_indexed(cpi
, x
, cur_mv
, &compound_data
, bsize
,
6725 mi_row
, mi_col
, tmp_mv
, &tmp_rate_mv
, 2);
6726 mbmi
->mv
[0].as_int
= tmp_mv
[0].as_int
;
6727 mbmi
->mv
[1].as_int
= tmp_mv
[1].as_int
;
6728 } else if (this_mode
== NEW_NEARESTMV
|| this_mode
== NEW_NEARMV
) {
6729 do_masked_motion_search_indexed(cpi
, x
, cur_mv
, &compound_data
, bsize
,
6730 mi_row
, mi_col
, tmp_mv
, &tmp_rate_mv
, 0);
6731 mbmi
->mv
[0].as_int
= tmp_mv
[0].as_int
;
6732 } else if (this_mode
== NEAREST_NEWMV
|| this_mode
== NEAR_NEWMV
) {
6733 do_masked_motion_search_indexed(cpi
, x
, cur_mv
, &compound_data
, bsize
,
6734 mi_row
, mi_col
, tmp_mv
, &tmp_rate_mv
, 1);
6735 mbmi
->mv
[1].as_int
= tmp_mv
[1].as_int
;
6740 static int64_t build_and_cost_compound_type(
6741 const AV1_COMP
*const cpi
, MACROBLOCK
*x
, const int_mv
*const cur_mv
,
6742 const BLOCK_SIZE bsize
, const int this_mode
, int rs2
, int rate_mv
,
6743 BUFFER_SET
*ctx
, int *out_rate_mv
, uint8_t **preds0
, uint8_t **preds1
,
6744 int *strides
, int mi_row
, int mi_col
) {
6745 const AV1_COMMON
*const cm
= &cpi
->common
;
6746 MACROBLOCKD
*xd
= &x
->e_mbd
;
6747 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
6750 int64_t best_rd_cur
= INT64_MAX
;
6751 int64_t rd
= INT64_MAX
;
6752 int tmp_skip_txfm_sb
;
6753 int64_t tmp_skip_sse_sb
;
6754 const COMPOUND_TYPE compound_type
= mbmi
->interinter_compound_type
;
6756 best_rd_cur
= pick_interinter_mask(cpi
, x
, bsize
, *preds0
, *preds1
);
6757 best_rd_cur
+= RDCOST(x
->rdmult
, x
->rddiv
, rs2
+ rate_mv
, 0);
6759 if (have_newmv_in_inter_mode(this_mode
) &&
6760 use_masked_motion_search(compound_type
)) {
6761 *out_rate_mv
= interinter_compound_motion_search(cpi
, x
, cur_mv
, bsize
,
6762 this_mode
, mi_row
, mi_col
);
6763 av1_build_inter_predictors_sby(cm
, xd
, mi_row
, mi_col
, ctx
, bsize
);
6764 model_rd_for_sb(cpi
, bsize
, x
, xd
, 0, 0, &rate_sum
, &dist_sum
,
6765 &tmp_skip_txfm_sb
, &tmp_skip_sse_sb
);
6766 rd
= RDCOST(x
->rdmult
, x
->rddiv
, rs2
+ *out_rate_mv
+ rate_sum
, dist_sum
);
6767 if (rd
>= best_rd_cur
) {
6768 mbmi
->mv
[0].as_int
= cur_mv
[0].as_int
;
6769 mbmi
->mv
[1].as_int
= cur_mv
[1].as_int
;
6770 *out_rate_mv
= rate_mv
;
6771 av1_build_wedge_inter_predictor_from_buf(xd
, bsize
, 0, 0,
6774 #endif // CONFIG_SUPERTX
6775 preds0
, strides
, preds1
,
6778 av1_subtract_plane(x
, bsize
, 0);
6779 rd
= estimate_yrd_for_sb(cpi
, bsize
, x
, &rate_sum
, &dist_sum
,
6780 &tmp_skip_txfm_sb
, &tmp_skip_sse_sb
, INT64_MAX
);
6781 if (rd
!= INT64_MAX
)
6782 rd
= RDCOST(x
->rdmult
, x
->rddiv
, rs2
+ *out_rate_mv
+ rate_sum
, dist_sum
);
6786 av1_build_wedge_inter_predictor_from_buf(xd
, bsize
, 0, 0,
6789 #endif // CONFIG_SUPERTX
6790 preds0
, strides
, preds1
, strides
);
6791 av1_subtract_plane(x
, bsize
, 0);
6792 rd
= estimate_yrd_for_sb(cpi
, bsize
, x
, &rate_sum
, &dist_sum
,
6793 &tmp_skip_txfm_sb
, &tmp_skip_sse_sb
, INT64_MAX
);
6794 if (rd
!= INT64_MAX
)
6795 rd
= RDCOST(x
->rdmult
, x
->rddiv
, rs2
+ rate_mv
+ rate_sum
, dist_sum
);
6800 #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6801 #endif // CONFIG_EXT_INTER
6804 #if CONFIG_MOTION_VAR
6805 // Inter prediction buffers and respective strides
6806 uint8_t *above_pred_buf
[MAX_MB_PLANE
];
6807 int above_pred_stride
[MAX_MB_PLANE
];
6808 uint8_t *left_pred_buf
[MAX_MB_PLANE
];
6809 int left_pred_stride
[MAX_MB_PLANE
];
6810 #endif // CONFIG_MOTION_VAR
6811 int_mv
*single_newmv
;
6812 #if CONFIG_EXT_INTER
6813 // Pointer to array of motion vectors to use for each ref and their rates
6814 // Should point to first of 2 arrays in 2D array
6815 int *single_newmv_rate
;
6816 // Pointer to array of predicted rate-distortion
6817 // Should point to first of 2 arrays in 2D array
6818 int64_t (*modelled_rd
)[TOTAL_REFS_PER_FRAME
];
6819 #endif // CONFIG_EXT_INTER
6820 InterpFilter single_filter
[MB_MODE_COUNT
][TOTAL_REFS_PER_FRAME
];
6821 } HandleInterModeArgs
;
6823 static int64_t handle_newmv(const AV1_COMP
*const cpi
, MACROBLOCK
*const x
,
6824 const BLOCK_SIZE bsize
,
6825 int_mv (*const mode_mv
)[TOTAL_REFS_PER_FRAME
],
6826 const int mi_row
, const int mi_col
,
6827 int *const rate_mv
, int_mv
*const single_newmv
,
6828 HandleInterModeArgs
*const args
) {
6829 const MACROBLOCKD
*const xd
= &x
->e_mbd
;
6830 const MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
6831 const MB_MODE_INFO_EXT
*const mbmi_ext
= x
->mbmi_ext
;
6832 const int is_comp_pred
= has_second_ref(mbmi
);
6833 const PREDICTION_MODE this_mode
= mbmi
->mode
;
6834 #if CONFIG_EXT_INTER
6835 const int is_comp_interintra_pred
= (mbmi
->ref_frame
[1] == INTRA_FRAME
);
6836 #endif // CONFIG_EXT_INTER
6837 int_mv
*const frame_mv
= mode_mv
[this_mode
];
6838 const int refs
[2] = { mbmi
->ref_frame
[0],
6839 mbmi
->ref_frame
[1] < 0 ? 0 : mbmi
->ref_frame
[1] };
6845 #if CONFIG_EXT_INTER
6846 for (i
= 0; i
< 2; ++i
) {
6847 single_newmv
[refs
[i
]].as_int
= args
->single_newmv
[refs
[i
]].as_int
;
6850 if (this_mode
== NEW_NEWMV
) {
6851 frame_mv
[refs
[0]].as_int
= single_newmv
[refs
[0]].as_int
;
6852 frame_mv
[refs
[1]].as_int
= single_newmv
[refs
[1]].as_int
;
6854 if (cpi
->sf
.comp_inter_joint_search_thresh
<= bsize
) {
6855 joint_motion_search(cpi
, x
, bsize
, frame_mv
, mi_row
, mi_col
, NULL
, NULL
,
6859 for (i
= 0; i
< 2; ++i
) {
6860 av1_set_mvcost(x
, refs
[i
], i
, mbmi
->ref_mv_idx
);
6861 *rate_mv
+= av1_mv_bit_cost(
6862 &frame_mv
[refs
[i
]].as_mv
, &mbmi_ext
->ref_mvs
[refs
[i
]][0].as_mv
,
6863 x
->nmvjointcost
, x
->mvcost
, MV_COST_WEIGHT
);
6866 } else if (this_mode
== NEAREST_NEWMV
|| this_mode
== NEAR_NEWMV
) {
6867 frame_mv
[refs
[1]].as_int
= single_newmv
[refs
[1]].as_int
;
6868 if (cpi
->sf
.comp_inter_joint_search_thresh
<= bsize
) {
6869 frame_mv
[refs
[0]].as_int
=
6870 mode_mv
[compound_ref0_mode(this_mode
)][refs
[0]].as_int
;
6871 compound_single_motion_search_interinter(
6872 cpi
, x
, bsize
, frame_mv
, mi_row
, mi_col
, NULL
, 0, rate_mv
, 0, 1);
6874 av1_set_mvcost(x
, refs
[1], 1, mbmi
->ref_mv_idx
);
6875 *rate_mv
= av1_mv_bit_cost(&frame_mv
[refs
[1]].as_mv
,
6876 &mbmi_ext
->ref_mvs
[refs
[1]][0].as_mv
,
6877 x
->nmvjointcost
, x
->mvcost
, MV_COST_WEIGHT
);
6880 assert(this_mode
== NEW_NEARESTMV
|| this_mode
== NEW_NEARMV
);
6881 frame_mv
[refs
[0]].as_int
= single_newmv
[refs
[0]].as_int
;
6882 if (cpi
->sf
.comp_inter_joint_search_thresh
<= bsize
) {
6883 frame_mv
[refs
[1]].as_int
=
6884 mode_mv
[compound_ref1_mode(this_mode
)][refs
[1]].as_int
;
6885 compound_single_motion_search_interinter(
6886 cpi
, x
, bsize
, frame_mv
, mi_row
, mi_col
, NULL
, 0, rate_mv
, 0, 0);
6888 av1_set_mvcost(x
, refs
[0], 0, mbmi
->ref_mv_idx
);
6889 *rate_mv
= av1_mv_bit_cost(&frame_mv
[refs
[0]].as_mv
,
6890 &mbmi_ext
->ref_mvs
[refs
[0]][0].as_mv
,
6891 x
->nmvjointcost
, x
->mvcost
, MV_COST_WEIGHT
);
6895 // Initialize mv using single prediction mode result.
6896 frame_mv
[refs
[0]].as_int
= single_newmv
[refs
[0]].as_int
;
6897 frame_mv
[refs
[1]].as_int
= single_newmv
[refs
[1]].as_int
;
6899 if (cpi
->sf
.comp_inter_joint_search_thresh
<= bsize
) {
6900 joint_motion_search(cpi
, x
, bsize
, frame_mv
, mi_row
, mi_col
, rate_mv
, 0);
6903 for (i
= 0; i
< 2; ++i
) {
6904 av1_set_mvcost(x
, refs
[i
], i
, mbmi
->ref_mv_idx
);
6905 *rate_mv
+= av1_mv_bit_cost(&frame_mv
[refs
[i
]].as_mv
,
6906 &mbmi_ext
->ref_mvs
[refs
[i
]][0].as_mv
,
6907 x
->nmvjointcost
, x
->mvcost
, MV_COST_WEIGHT
);
6910 #endif // CONFIG_EXT_INTER
6912 #if CONFIG_EXT_INTER
6913 if (is_comp_interintra_pred
) {
6914 x
->best_mv
= args
->single_newmv
[refs
[0]];
6915 *rate_mv
= args
->single_newmv_rate
[refs
[0]];
6917 single_motion_search(cpi
, x
, bsize
, mi_row
, mi_col
, 0, rate_mv
);
6918 args
->single_newmv
[refs
[0]] = x
->best_mv
;
6919 args
->single_newmv_rate
[refs
[0]] = *rate_mv
;
6922 single_motion_search(cpi
, x
, bsize
, mi_row
, mi_col
, rate_mv
);
6923 single_newmv
[refs
[0]] = x
->best_mv
;
6924 #endif // CONFIG_EXT_INTER
6926 if (x
->best_mv
.as_int
== INVALID_MV
) return INT64_MAX
;
6928 frame_mv
[refs
[0]] = x
->best_mv
;
6929 xd
->mi
[0]->bmi
[0].as_mv
[0] = x
->best_mv
;
6931 // Estimate the rate implications of a new mv but discount this
6932 // under certain circumstances where we want to help initiate a weak
6933 // motion field, where the distortion gain for a single block may not
6934 // be enough to overcome the cost of a new mv.
6935 if (discount_newmv_test(cpi
, this_mode
, x
->best_mv
, mode_mv
, refs
[0])) {
6936 *rate_mv
= AOMMAX(*rate_mv
/ NEW_MV_DISCOUNT_FACTOR
, 1);
6943 int64_t interpolation_filter_search(
6944 MACROBLOCK
*const x
, const AV1_COMP
*const cpi
, BLOCK_SIZE bsize
,
6945 int mi_row
, int mi_col
, const BUFFER_SET
*const tmp_dst
,
6946 BUFFER_SET
*const orig_dst
,
6947 InterpFilter (*const single_filter
)[TOTAL_REFS_PER_FRAME
],
6948 int64_t *const rd
, int *const switchable_rate
, int *const skip_txfm_sb
,
6949 int64_t *const skip_sse_sb
) {
6950 const AV1_COMMON
*cm
= &cpi
->common
;
6951 MACROBLOCKD
*const xd
= &x
->e_mbd
;
6952 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
6957 (void)single_filter
;
6959 InterpFilter assign_filter
= SWITCHABLE
;
6961 if (cm
->interp_filter
== SWITCHABLE
) {
6962 #if !CONFIG_DUAL_FILTER
6963 assign_filter
= av1_is_interp_needed(xd
)
6964 ? predict_interp_filter(cpi
, x
, bsize
, mi_row
, mi_col
,
6966 : cm
->interp_filter
;
6967 #endif // !CONFIG_DUAL_FILTER
6969 assign_filter
= cm
->interp_filter
;
6972 set_default_interp_filters(mbmi
, assign_filter
);
6974 *switchable_rate
= av1_get_switchable_rate(cpi
, xd
);
6975 av1_build_inter_predictors_sb(cm
, xd
, mi_row
, mi_col
, orig_dst
, bsize
);
6976 model_rd_for_sb(cpi
, bsize
, x
, xd
, 0, MAX_MB_PLANE
- 1, &tmp_rate
, &tmp_dist
,
6977 skip_txfm_sb
, skip_sse_sb
);
6978 *rd
= RDCOST(x
->rdmult
, x
->rddiv
, *switchable_rate
+ tmp_rate
, tmp_dist
);
6980 if (assign_filter
== SWITCHABLE
) {
6981 // do interp_filter search
6982 if (av1_is_interp_needed(xd
) && av1_is_interp_search_needed(xd
)) {
6983 #if CONFIG_DUAL_FILTER
6984 const int filter_set_size
= DUAL_FILTER_SET_SIZE
;
6986 const int filter_set_size
= SWITCHABLE_FILTERS
;
6987 #endif // CONFIG_DUAL_FILTER
6988 int best_in_temp
= 0;
6989 #if CONFIG_DUAL_FILTER
6990 InterpFilter best_filter
[4];
6991 av1_copy(best_filter
, mbmi
->interp_filter
);
6993 InterpFilter best_filter
= mbmi
->interp_filter
;
6994 #endif // CONFIG_DUAL_FILTER
6995 restore_dst_buf(xd
, *tmp_dst
);
6996 // EIGHTTAP_REGULAR mode is calculated beforehand
6997 for (i
= 1; i
< filter_set_size
; ++i
) {
6998 int tmp_skip_sb
= 0;
6999 int64_t tmp_skip_sse
= INT64_MAX
;
7002 #if CONFIG_DUAL_FILTER
7003 mbmi
->interp_filter
[0] = filter_sets
[i
][0];
7004 mbmi
->interp_filter
[1] = filter_sets
[i
][1];
7005 mbmi
->interp_filter
[2] = filter_sets
[i
][0];
7006 mbmi
->interp_filter
[3] = filter_sets
[i
][1];
7008 mbmi
->interp_filter
= (InterpFilter
)i
;
7009 #endif // CONFIG_DUAL_FILTER
7010 tmp_rs
= av1_get_switchable_rate(cpi
, xd
);
7011 av1_build_inter_predictors_sb(cm
, xd
, mi_row
, mi_col
, orig_dst
, bsize
);
7012 model_rd_for_sb(cpi
, bsize
, x
, xd
, 0, MAX_MB_PLANE
- 1, &tmp_rate
,
7013 &tmp_dist
, &tmp_skip_sb
, &tmp_skip_sse
);
7014 tmp_rd
= RDCOST(x
->rdmult
, x
->rddiv
, tmp_rs
+ tmp_rate
, tmp_dist
);
7018 *switchable_rate
= av1_get_switchable_rate(cpi
, xd
);
7019 #if CONFIG_DUAL_FILTER
7020 av1_copy(best_filter
, mbmi
->interp_filter
);
7022 best_filter
= mbmi
->interp_filter
;
7023 #endif // CONFIG_DUAL_FILTER
7024 *skip_txfm_sb
= tmp_skip_sb
;
7025 *skip_sse_sb
= tmp_skip_sse
;
7026 best_in_temp
= !best_in_temp
;
7028 restore_dst_buf(xd
, *orig_dst
);
7030 restore_dst_buf(xd
, *tmp_dst
);
7035 restore_dst_buf(xd
, *tmp_dst
);
7037 restore_dst_buf(xd
, *orig_dst
);
7039 #if CONFIG_DUAL_FILTER
7040 av1_copy(mbmi
->interp_filter
, best_filter
);
7042 mbmi
->interp_filter
= best_filter
;
7043 #endif // CONFIG_DUAL_FILTER
7045 #if CONFIG_DUAL_FILTER
7046 for (i
= 0; i
< 4; ++i
)
7047 assert(mbmi
->interp_filter
[i
] == EIGHTTAP_REGULAR
);
7049 assert(mbmi
->interp_filter
== EIGHTTAP_REGULAR
);
7050 #endif // CONFIG_DUAL_FILTER
7057 // TODO(afergs): Refactor the MBMI references in here - there's four
7058 // TODO(afergs): Refactor optional args - add them to a struct or remove
7059 static int64_t motion_mode_rd(
7060 const AV1_COMP
*const cpi
, MACROBLOCK
*const x
, BLOCK_SIZE bsize
,
7061 RD_STATS
*rd_stats
, RD_STATS
*rd_stats_y
, RD_STATS
*rd_stats_uv
,
7062 int *disable_skip
, int_mv (*mode_mv
)[TOTAL_REFS_PER_FRAME
], int mi_row
,
7063 int mi_col
, HandleInterModeArgs
*const args
, const int64_t ref_best_rd
,
7064 const int *refs
, int rate_mv
,
7065 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7066 int_mv
*const single_newmv
,
7067 #if CONFIG_EXT_INTER
7068 int rate2_bmc_nocoeff
, MB_MODE_INFO
*best_bmc_mbmi
,
7069 #if CONFIG_MOTION_VAR
7071 #endif // CONFIG_MOTION_VAR
7072 #endif // CONFIG_EXT_INTER
7073 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7074 int rs
, int *skip_txfm_sb
, int64_t *skip_sse_sb
, BUFFER_SET
*orig_dst
) {
7075 const AV1_COMMON
*const cm
= &cpi
->common
;
7076 MACROBLOCKD
*xd
= &x
->e_mbd
;
7077 MODE_INFO
*mi
= xd
->mi
[0];
7078 MB_MODE_INFO
*mbmi
= &mi
->mbmi
;
7079 const int is_comp_pred
= has_second_ref(mbmi
);
7080 const PREDICTION_MODE this_mode
= mbmi
->mode
;
7091 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7092 MOTION_MODE motion_mode
, last_motion_mode_allowed
;
7093 int rate2_nocoeff
= 0, best_xskip
, best_disable_skip
= 0;
7094 RD_STATS best_rd_stats
, best_rd_stats_y
, best_rd_stats_uv
;
7095 MB_MODE_INFO base_mbmi
, best_mbmi
;
7097 uint8_t best_blk_skip
[MAX_MB_PLANE
][MAX_MIB_SIZE
* MAX_MIB_SIZE
* 4];
7098 #endif // CONFIG_VAR_TX
7099 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7101 #if CONFIG_WARPED_MOTION
7102 int pts
[SAMPLES_ARRAY_SIZE
], pts_inref
[SAMPLES_ARRAY_SIZE
];
7103 #endif // CONFIG_WARPED_MOTION
7105 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7106 av1_invalid_rd_stats(&best_rd_stats
);
7107 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7109 if (cm
->interp_filter
== SWITCHABLE
) rd_stats
->rate
+= rs
;
7110 #if CONFIG_WARPED_MOTION
7111 aom_clear_system_state();
7112 mbmi
->num_proj_ref
[0] = findSamples(cm
, xd
, mi_row
, mi_col
, pts
, pts_inref
);
7113 #if CONFIG_EXT_INTER
7114 best_bmc_mbmi
->num_proj_ref
[0] = mbmi
->num_proj_ref
[0];
7115 #endif // CONFIG_EXT_INTER
7116 #endif // CONFIG_WARPED_MOTION
7117 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7118 rate2_nocoeff
= rd_stats
->rate
;
7119 last_motion_mode_allowed
= motion_mode_allowed(
7120 #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
7121 0, xd
->global_motion
,
7122 #endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
7125 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7127 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7128 int64_t best_rd
= INT64_MAX
;
7129 for (motion_mode
= SIMPLE_TRANSLATION
;
7130 motion_mode
<= last_motion_mode_allowed
; motion_mode
++) {
7131 int64_t tmp_rd
= INT64_MAX
;
7134 #if CONFIG_EXT_INTER
7136 motion_mode
!= SIMPLE_TRANSLATION
? rate2_bmc_nocoeff
: rate2_nocoeff
;
7138 int tmp_rate2
= rate2_nocoeff
;
7139 #endif // CONFIG_EXT_INTER
7142 mbmi
->motion_mode
= motion_mode
;
7143 #if CONFIG_MOTION_VAR
7144 if (mbmi
->motion_mode
== OBMC_CAUSAL
) {
7145 #if CONFIG_EXT_INTER
7146 *mbmi
= *best_bmc_mbmi
;
7147 mbmi
->motion_mode
= OBMC_CAUSAL
;
7148 #endif // CONFIG_EXT_INTER
7149 if (!is_comp_pred
&& have_newmv_in_inter_mode(this_mode
)) {
7150 int tmp_rate_mv
= 0;
7152 single_motion_search(cpi
, x
, bsize
, mi_row
, mi_col
,
7153 #if CONFIG_EXT_INTER
7155 #endif // CONFIG_EXT_INTER
7157 mbmi
->mv
[0].as_int
= x
->best_mv
.as_int
;
7158 if (discount_newmv_test(cpi
, this_mode
, mbmi
->mv
[0], mode_mv
,
7160 tmp_rate_mv
= AOMMAX((tmp_rate_mv
/ NEW_MV_DISCOUNT_FACTOR
), 1);
7162 #if CONFIG_EXT_INTER
7163 tmp_rate2
= rate2_bmc_nocoeff
- rate_mv_bmc
+ tmp_rate_mv
;
7165 tmp_rate2
= rate2_nocoeff
- rate_mv
+ tmp_rate_mv
;
7166 #endif // CONFIG_EXT_INTER
7167 #if CONFIG_DUAL_FILTER
7168 if (!has_subpel_mv_component(xd
->mi
[0], xd
, 0))
7169 mbmi
->interp_filter
[0] = EIGHTTAP_REGULAR
;
7170 if (!has_subpel_mv_component(xd
->mi
[0], xd
, 1))
7171 mbmi
->interp_filter
[1] = EIGHTTAP_REGULAR
;
7172 #endif // CONFIG_DUAL_FILTER
7173 av1_build_inter_predictors_sb(cm
, xd
, mi_row
, mi_col
, orig_dst
, bsize
);
7174 #if CONFIG_EXT_INTER
7176 av1_build_inter_predictors_sb(cm
, xd
, mi_row
, mi_col
, orig_dst
, bsize
);
7177 #endif // CONFIG_EXT_INTER
7179 av1_build_obmc_inter_prediction(
7180 cm
, xd
, mi_row
, mi_col
, args
->above_pred_buf
, args
->above_pred_stride
,
7181 args
->left_pred_buf
, args
->left_pred_stride
);
7182 model_rd_for_sb(cpi
, bsize
, x
, xd
, 0, MAX_MB_PLANE
- 1, &tmp_rate
,
7183 &tmp_dist
, skip_txfm_sb
, skip_sse_sb
);
7185 #endif // CONFIG_MOTION_VAR
7187 #if CONFIG_WARPED_MOTION
7188 if (mbmi
->motion_mode
== WARPED_CAUSAL
) {
7189 #if CONFIG_EXT_INTER
7190 *mbmi
= *best_bmc_mbmi
;
7191 mbmi
->motion_mode
= WARPED_CAUSAL
;
7192 #endif // CONFIG_EXT_INTER
7193 mbmi
->wm_params
[0].wmtype
= DEFAULT_WMTYPE
;
7194 #if CONFIG_DUAL_FILTER
7195 for (int dir
= 0; dir
< 4; ++dir
)
7196 mbmi
->interp_filter
[dir
] = cm
->interp_filter
== SWITCHABLE
7198 : cm
->interp_filter
;
7200 mbmi
->interp_filter
= cm
->interp_filter
== SWITCHABLE
? EIGHTTAP_REGULAR
7201 : cm
->interp_filter
;
7202 #endif // CONFIG_DUAL_FILTER
7204 if (!find_projection(mbmi
->num_proj_ref
[0], pts
, pts_inref
, bsize
,
7205 mbmi
->mv
[0].as_mv
.row
, mbmi
->mv
[0].as_mv
.col
,
7206 &mbmi
->wm_params
[0], mi_row
, mi_col
)) {
7207 // Refine MV for NEWMV mode
7208 if (!is_comp_pred
&& have_newmv_in_inter_mode(this_mode
)) {
7209 int tmp_rate_mv
= 0;
7210 const int_mv mv0
= mbmi
->mv
[0];
7211 WarpedMotionParams wm_params0
= mbmi
->wm_params
[0];
7213 // Refine MV in a small range.
7214 av1_refine_warped_mv(cpi
, x
, bsize
, mi_row
, mi_col
, pts
, pts_inref
);
7216 // Keep the refined MV and WM parameters.
7217 if (mv0
.as_int
!= mbmi
->mv
[0].as_int
) {
7218 const int ref
= refs
[0];
7219 const MV ref_mv
= x
->mbmi_ext
->ref_mvs
[ref
][0].as_mv
;
7222 av1_mv_bit_cost(&mbmi
->mv
[0].as_mv
, &ref_mv
, x
->nmvjointcost
,
7223 x
->mvcost
, MV_COST_WEIGHT
);
7225 if (cpi
->sf
.adaptive_motion_search
)
7226 x
->pred_mv
[ref
] = mbmi
->mv
[0].as_mv
;
7228 single_newmv
[ref
] = mbmi
->mv
[0];
7230 if (discount_newmv_test(cpi
, this_mode
, mbmi
->mv
[0], mode_mv
,
7232 tmp_rate_mv
= AOMMAX((tmp_rate_mv
/ NEW_MV_DISCOUNT_FACTOR
), 1);
7234 #if CONFIG_EXT_INTER
7235 tmp_rate2
= rate2_bmc_nocoeff
- rate_mv_bmc
+ tmp_rate_mv
;
7237 tmp_rate2
= rate2_nocoeff
- rate_mv
+ tmp_rate_mv
;
7238 #endif // CONFIG_EXT_INTER
7239 #if CONFIG_DUAL_FILTER
7240 if (!has_subpel_mv_component(xd
->mi
[0], xd
, 0))
7241 mbmi
->interp_filter
[0] = EIGHTTAP_REGULAR
;
7242 if (!has_subpel_mv_component(xd
->mi
[0], xd
, 1))
7243 mbmi
->interp_filter
[1] = EIGHTTAP_REGULAR
;
7244 #endif // CONFIG_DUAL_FILTER
7246 // Restore the old MV and WM parameters.
7248 mbmi
->wm_params
[0] = wm_params0
;
7252 av1_build_inter_predictors_sb(cm
, xd
, mi_row
, mi_col
, NULL
, bsize
);
7253 model_rd_for_sb(cpi
, bsize
, x
, xd
, 0, MAX_MB_PLANE
- 1, &tmp_rate
,
7254 &tmp_dist
, skip_txfm_sb
, skip_sse_sb
);
7259 #endif // CONFIG_WARPED_MOTION
7265 rd_stats
->rate
= tmp_rate2
;
7266 if (last_motion_mode_allowed
> SIMPLE_TRANSLATION
) {
7267 #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7268 if (last_motion_mode_allowed
== WARPED_CAUSAL
)
7269 #endif // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7270 rd_stats
->rate
+= cpi
->motion_mode_cost
[bsize
][mbmi
->motion_mode
];
7271 #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7273 rd_stats
->rate
+= cpi
->motion_mode_cost1
[bsize
][mbmi
->motion_mode
];
7274 #endif // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7276 #if CONFIG_WARPED_MOTION
7277 if (mbmi
->motion_mode
== WARPED_CAUSAL
) {
7278 rd_stats
->rate
-= rs
;
7280 #endif // CONFIG_WARPED_MOTION
7281 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7282 if (!*skip_txfm_sb
) {
7283 int64_t rdcosty
= INT64_MAX
;
7284 int is_cost_valid_uv
= 0;
7286 // cost and distortion
7287 av1_subtract_plane(x
, bsize
, 0);
7289 if (cm
->tx_mode
== TX_MODE_SELECT
&& !xd
->lossless
[mbmi
->segment_id
]) {
7290 select_tx_type_yrd(cpi
, x
, rd_stats_y
, bsize
, ref_best_rd
);
7293 super_block_yrd(cpi
, x
, rd_stats_y
, bsize
, ref_best_rd
);
7294 for (idy
= 0; idy
< xd
->n8_h
; ++idy
)
7295 for (idx
= 0; idx
< xd
->n8_w
; ++idx
)
7296 mbmi
->inter_tx_size
[idy
][idx
] = mbmi
->tx_size
;
7297 memset(x
->blk_skip
[0], rd_stats_y
->skip
,
7298 sizeof(uint8_t) * xd
->n8_h
* xd
->n8_w
* 4);
7301 /* clang-format off */
7302 super_block_yrd(cpi
, x
, rd_stats_y
, bsize
, ref_best_rd
);
7303 /* clang-format on */
7304 #endif // CONFIG_VAR_TX
7306 if (rd_stats_y
->rate
== INT_MAX
) {
7307 av1_invalid_rd_stats(rd_stats
);
7308 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7309 if (mbmi
->motion_mode
!= SIMPLE_TRANSLATION
) {
7312 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7313 restore_dst_buf(xd
, *orig_dst
);
7315 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7317 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7320 av1_merge_rd_stats(rd_stats
, rd_stats_y
);
7322 rdcosty
= RDCOST(x
->rdmult
, x
->rddiv
, rd_stats
->rate
, rd_stats
->dist
);
7323 rdcosty
= AOMMIN(rdcosty
, RDCOST(x
->rdmult
, x
->rddiv
, 0, rd_stats
->sse
));
7324 /* clang-format off */
7327 inter_block_uvrd(cpi
, x
, rd_stats_uv
, bsize
, ref_best_rd
- rdcosty
);
7330 super_block_uvrd(cpi
, x
, rd_stats_uv
, bsize
, ref_best_rd
- rdcosty
);
7331 #endif // CONFIG_VAR_TX
7332 if (!is_cost_valid_uv
) {
7333 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7336 restore_dst_buf(xd
, *orig_dst
);
7338 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7340 /* clang-format on */
7341 av1_merge_rd_stats(rd_stats
, rd_stats_uv
);
7343 // record transform block coefficient cost
7344 // TODO(angiebird): So far rd_debug tool only detects discrepancy of
7345 // coefficient cost. Therefore, it is fine to copy rd_stats into mbmi
7346 // here because we already collect the coefficient cost. Move this part to
7347 // other place when we need to compare non-coefficient cost.
7348 mbmi
->rd_stats
= *rd_stats
;
7349 #endif // CONFIG_RD_DEBUG
7350 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7351 if (rd_stats
->skip
) {
7352 rd_stats
->rate
-= rd_stats_uv
->rate
+ rd_stats_y
->rate
;
7353 rd_stats_y
->rate
= 0;
7354 rd_stats_uv
->rate
= 0;
7355 rd_stats
->rate
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1);
7357 // here mbmi->skip temporarily plays a role as what this_skip2 does
7358 } else if (!xd
->lossless
[mbmi
->segment_id
] &&
7359 (RDCOST(x
->rdmult
, x
->rddiv
,
7360 rd_stats_y
->rate
+ rd_stats_uv
->rate
+
7361 av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0),
7363 RDCOST(x
->rdmult
, x
->rddiv
,
7364 av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1),
7366 rd_stats
->rate
-= rd_stats_uv
->rate
+ rd_stats_y
->rate
;
7367 rd_stats
->rate
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1);
7368 rd_stats
->dist
= rd_stats
->sse
;
7369 rd_stats_y
->rate
= 0;
7370 rd_stats_uv
->rate
= 0;
7373 rd_stats
->rate
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0);
7377 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7381 mbmi
->tx_size
= tx_size_from_tx_mode(bsize
, cm
->tx_mode
, 1);
7383 // The cost of skip bit needs to be added.
7384 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7386 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7387 rd_stats
->rate
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1);
7389 rd_stats
->dist
= *skip_sse_sb
;
7390 rd_stats
->sse
= *skip_sse_sb
;
7391 rd_stats_y
->rate
= 0;
7392 rd_stats_uv
->rate
= 0;
7396 #if CONFIG_GLOBAL_MOTION
7397 if (this_mode
== ZEROMV
7398 #if CONFIG_EXT_INTER
7399 || this_mode
== ZERO_ZEROMV
7400 #endif // CONFIG_EXT_INTER
7402 if (is_nontrans_global_motion(xd
)) {
7403 rd_stats
->rate
-= rs
;
7404 #if CONFIG_DUAL_FILTER
7405 mbmi
->interp_filter
[0] = cm
->interp_filter
== SWITCHABLE
7407 : cm
->interp_filter
;
7408 mbmi
->interp_filter
[1] = cm
->interp_filter
== SWITCHABLE
7410 : cm
->interp_filter
;
7412 mbmi
->interp_filter
= cm
->interp_filter
== SWITCHABLE
7414 : cm
->interp_filter
;
7415 #endif // CONFIG_DUAL_FILTER
7418 #endif // CONFIG_GLOBAL_MOTION
7420 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7421 tmp_rd
= RDCOST(x
->rdmult
, x
->rddiv
, rd_stats
->rate
, rd_stats
->dist
);
7422 if (mbmi
->motion_mode
== SIMPLE_TRANSLATION
|| (tmp_rd
< best_rd
)) {
7425 best_rd_stats
= *rd_stats
;
7426 best_rd_stats_y
= *rd_stats_y
;
7427 best_rd_stats_uv
= *rd_stats_uv
;
7429 for (int i
= 0; i
< MAX_MB_PLANE
; ++i
)
7430 memcpy(best_blk_skip
[i
], x
->blk_skip
[i
],
7431 sizeof(uint8_t) * xd
->n8_h
* xd
->n8_w
* 4);
7432 #endif // CONFIG_VAR_TX
7433 best_xskip
= x
->skip
;
7434 best_disable_skip
= *disable_skip
;
7438 if (best_rd
== INT64_MAX
) {
7439 av1_invalid_rd_stats(rd_stats
);
7440 restore_dst_buf(xd
, *orig_dst
);
7444 *rd_stats
= best_rd_stats
;
7445 *rd_stats_y
= best_rd_stats_y
;
7446 *rd_stats_uv
= best_rd_stats_uv
;
7448 for (int i
= 0; i
< MAX_MB_PLANE
; ++i
)
7449 memcpy(x
->blk_skip
[i
], best_blk_skip
[i
],
7450 sizeof(uint8_t) * xd
->n8_h
* xd
->n8_w
* 4);
7451 #endif // CONFIG_VAR_TX
7452 x
->skip
= best_xskip
;
7453 *disable_skip
= best_disable_skip
;
7454 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7456 restore_dst_buf(xd
, *orig_dst
);
7460 static int64_t handle_inter_mode(
7461 const AV1_COMP
*const cpi
, MACROBLOCK
*x
, BLOCK_SIZE bsize
,
7462 RD_STATS
*rd_stats
, RD_STATS
*rd_stats_y
, RD_STATS
*rd_stats_uv
,
7463 int *disable_skip
, int_mv (*mode_mv
)[TOTAL_REFS_PER_FRAME
], int mi_row
,
7464 int mi_col
, HandleInterModeArgs
*args
, const int64_t ref_best_rd
) {
7465 const AV1_COMMON
*cm
= &cpi
->common
;
7467 MACROBLOCKD
*xd
= &x
->e_mbd
;
7468 MODE_INFO
*mi
= xd
->mi
[0];
7469 MB_MODE_INFO
*mbmi
= &mi
->mbmi
;
7470 MB_MODE_INFO_EXT
*const mbmi_ext
= x
->mbmi_ext
;
7471 const int is_comp_pred
= has_second_ref(mbmi
);
7472 const int this_mode
= mbmi
->mode
;
7473 int_mv
*frame_mv
= mode_mv
[this_mode
];
7475 int refs
[2] = { mbmi
->ref_frame
[0],
7476 (mbmi
->ref_frame
[1] < 0 ? 0 : mbmi
->ref_frame
[1]) };
7479 #if CONFIG_EXT_INTER
7480 int pred_exists
= 1;
7481 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7482 const int bw
= block_size_wide
[bsize
];
7483 #endif // ONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7484 int_mv single_newmv
[TOTAL_REFS_PER_FRAME
];
7485 #if CONFIG_INTERINTRA
7486 const unsigned int *const interintra_mode_cost
=
7487 cpi
->interintra_mode_cost
[size_group_lookup
[bsize
]];
7488 #endif // CONFIG_INTERINTRA
7489 const int is_comp_interintra_pred
= (mbmi
->ref_frame
[1] == INTRA_FRAME
);
7490 uint8_t ref_frame_type
= av1_ref_frame_type(mbmi
->ref_frame
);
7492 int_mv
*const single_newmv
= args
->single_newmv
;
7493 #endif // CONFIG_EXT_INTER
7494 #if CONFIG_HIGHBITDEPTH
7495 DECLARE_ALIGNED(16, uint8_t, tmp_buf_
[2 * MAX_MB_PLANE
* MAX_SB_SQUARE
]);
7497 DECLARE_ALIGNED(16, uint8_t, tmp_buf_
[MAX_MB_PLANE
* MAX_SB_SQUARE
]);
7498 #endif // CONFIG_HIGHBITDEPTH
7501 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7502 #if CONFIG_EXT_INTER
7503 int rate2_bmc_nocoeff
;
7504 MB_MODE_INFO best_bmc_mbmi
;
7505 #if CONFIG_MOTION_VAR
7507 #endif // CONFIG_MOTION_VAR
7508 #endif // CONFIG_EXT_INTER
7509 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7510 int64_t rd
= INT64_MAX
;
7511 BUFFER_SET orig_dst
, tmp_dst
;
7514 int skip_txfm_sb
= 0;
7515 int64_t skip_sse_sb
= INT64_MAX
;
7518 #if CONFIG_EXT_INTER
7519 #if CONFIG_INTERINTRA
7520 int compmode_interintra_cost
= 0;
7521 mbmi
->use_wedge_interintra
= 0;
7523 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7524 int compmode_interinter_cost
= 0;
7525 mbmi
->interinter_compound_type
= COMPOUND_AVERAGE
;
7528 #if CONFIG_INTERINTRA
7529 if (!cm
->allow_interintra_compound
&& is_comp_interintra_pred
)
7531 #endif // CONFIG_INTERINTRA
7533 // is_comp_interintra_pred implies !is_comp_pred
7534 assert(!is_comp_interintra_pred
|| (!is_comp_pred
));
7535 // is_comp_interintra_pred implies is_interintra_allowed(mbmi->sb_type)
7536 assert(!is_comp_interintra_pred
|| is_interintra_allowed(mbmi
));
7537 #endif // CONFIG_EXT_INTER
7539 #if CONFIG_EXT_INTER
7541 mode_ctx
= mbmi_ext
->compound_mode_context
[refs
[0]];
7543 #endif // CONFIG_EXT_INTER
7544 mode_ctx
= av1_mode_context_analyzer(mbmi_ext
->mode_context
,
7545 mbmi
->ref_frame
, bsize
, -1);
7547 #if CONFIG_HIGHBITDEPTH
7548 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
)
7549 tmp_buf
= CONVERT_TO_BYTEPTR(tmp_buf_
);
7551 #endif // CONFIG_HIGHBITDEPTH
7553 // Make sure that we didn't leave the plane destination buffers set
7554 // to tmp_buf at the end of the last iteration
7555 assert(xd
->plane
[0].dst
.buf
!= tmp_buf
);
7557 #if CONFIG_WARPED_MOTION
7558 mbmi
->num_proj_ref
[0] = 0;
7559 mbmi
->num_proj_ref
[1] = 0;
7560 #endif // CONFIG_WARPED_MOTION
7563 if (frame_mv
[refs
[0]].as_int
== INVALID_MV
||
7564 frame_mv
[refs
[1]].as_int
== INVALID_MV
)
7568 mbmi
->motion_mode
= SIMPLE_TRANSLATION
;
7569 if (have_newmv_in_inter_mode(this_mode
)) {
7570 const int64_t ret_val
= handle_newmv(cpi
, x
, bsize
, mode_mv
, mi_row
, mi_col
,
7571 &rate_mv
, single_newmv
, args
);
7575 rd_stats
->rate
+= rate_mv
;
7577 for (i
= 0; i
< is_comp_pred
+ 1; ++i
) {
7578 cur_mv
[i
] = frame_mv
[refs
[i
]];
7579 // Clip "next_nearest" so that it does not extend to far out of image
7580 if (this_mode
!= NEWMV
) clamp_mv2(&cur_mv
[i
].as_mv
, xd
);
7581 if (mv_check_bounds(&x
->mv_limits
, &cur_mv
[i
].as_mv
)) return INT64_MAX
;
7582 mbmi
->mv
[i
].as_int
= cur_mv
[i
].as_int
;
7585 #if CONFIG_EXT_INTER
7586 if (this_mode
== NEAREST_NEARESTMV
)
7588 if (this_mode
== NEARESTMV
&& is_comp_pred
)
7589 #endif // CONFIG_EXT_INTER
7591 #if !CONFIG_EXT_INTER
7592 uint8_t ref_frame_type
= av1_ref_frame_type(mbmi
->ref_frame
);
7593 #endif // !CONFIG_EXT_INTER
7594 if (mbmi_ext
->ref_mv_count
[ref_frame_type
] > 0) {
7595 cur_mv
[0] = mbmi_ext
->ref_mv_stack
[ref_frame_type
][0].this_mv
;
7596 cur_mv
[1] = mbmi_ext
->ref_mv_stack
[ref_frame_type
][0].comp_mv
;
7598 for (i
= 0; i
< 2; ++i
) {
7599 clamp_mv2(&cur_mv
[i
].as_mv
, xd
);
7600 if (mv_check_bounds(&x
->mv_limits
, &cur_mv
[i
].as_mv
)) return INT64_MAX
;
7601 mbmi
->mv
[i
].as_int
= cur_mv
[i
].as_int
;
7606 #if CONFIG_EXT_INTER
7607 if (mbmi_ext
->ref_mv_count
[ref_frame_type
] > 0) {
7608 if (this_mode
== NEAREST_NEWMV
) {
7609 cur_mv
[0] = mbmi_ext
->ref_mv_stack
[ref_frame_type
][0].this_mv
;
7611 lower_mv_precision(&cur_mv
[0].as_mv
, cm
->allow_high_precision_mv
);
7612 clamp_mv2(&cur_mv
[0].as_mv
, xd
);
7613 if (mv_check_bounds(&x
->mv_limits
, &cur_mv
[0].as_mv
)) return INT64_MAX
;
7614 mbmi
->mv
[0].as_int
= cur_mv
[0].as_int
;
7617 if (this_mode
== NEW_NEARESTMV
) {
7618 cur_mv
[1] = mbmi_ext
->ref_mv_stack
[ref_frame_type
][0].comp_mv
;
7620 lower_mv_precision(&cur_mv
[1].as_mv
, cm
->allow_high_precision_mv
);
7621 clamp_mv2(&cur_mv
[1].as_mv
, xd
);
7622 if (mv_check_bounds(&x
->mv_limits
, &cur_mv
[1].as_mv
)) return INT64_MAX
;
7623 mbmi
->mv
[1].as_int
= cur_mv
[1].as_int
;
7627 if (mbmi_ext
->ref_mv_count
[ref_frame_type
] > 1) {
7628 int ref_mv_idx
= mbmi
->ref_mv_idx
+ 1;
7629 if (this_mode
== NEAR_NEWMV
|| this_mode
== NEAR_NEARMV
) {
7630 cur_mv
[0] = mbmi_ext
->ref_mv_stack
[ref_frame_type
][ref_mv_idx
].this_mv
;
7632 lower_mv_precision(&cur_mv
[0].as_mv
, cm
->allow_high_precision_mv
);
7633 clamp_mv2(&cur_mv
[0].as_mv
, xd
);
7634 if (mv_check_bounds(&x
->mv_limits
, &cur_mv
[0].as_mv
)) return INT64_MAX
;
7635 mbmi
->mv
[0].as_int
= cur_mv
[0].as_int
;
7638 if (this_mode
== NEW_NEARMV
|| this_mode
== NEAR_NEARMV
) {
7639 cur_mv
[1] = mbmi_ext
->ref_mv_stack
[ref_frame_type
][ref_mv_idx
].comp_mv
;
7641 lower_mv_precision(&cur_mv
[1].as_mv
, cm
->allow_high_precision_mv
);
7642 clamp_mv2(&cur_mv
[1].as_mv
, xd
);
7643 if (mv_check_bounds(&x
->mv_limits
, &cur_mv
[1].as_mv
)) return INT64_MAX
;
7644 mbmi
->mv
[1].as_int
= cur_mv
[1].as_int
;
7648 if (this_mode
== NEARMV
&& is_comp_pred
) {
7649 uint8_t ref_frame_type
= av1_ref_frame_type(mbmi
->ref_frame
);
7650 if (mbmi_ext
->ref_mv_count
[ref_frame_type
] > 1) {
7651 int ref_mv_idx
= mbmi
->ref_mv_idx
+ 1;
7652 cur_mv
[0] = mbmi_ext
->ref_mv_stack
[ref_frame_type
][ref_mv_idx
].this_mv
;
7653 cur_mv
[1] = mbmi_ext
->ref_mv_stack
[ref_frame_type
][ref_mv_idx
].comp_mv
;
7655 for (i
= 0; i
< 2; ++i
) {
7656 clamp_mv2(&cur_mv
[i
].as_mv
, xd
);
7657 if (mv_check_bounds(&x
->mv_limits
, &cur_mv
[i
].as_mv
)) return INT64_MAX
;
7658 mbmi
->mv
[i
].as_int
= cur_mv
[i
].as_int
;
7662 #endif // CONFIG_EXT_INTER
7664 // do first prediction into the destination buffer. Do the next
7665 // prediction into a temporary buffer. Then keep track of which one
7666 // of these currently holds the best predictor, and use the other
7667 // one for future predictions. In the end, copy from tmp_buf to
7668 // dst if necessary.
7669 for (i
= 0; i
< MAX_MB_PLANE
; i
++) {
7670 tmp_dst
.plane
[i
] = tmp_buf
+ i
* MAX_SB_SQUARE
;
7671 tmp_dst
.stride
[i
] = MAX_SB_SIZE
;
7673 for (i
= 0; i
< MAX_MB_PLANE
; i
++) {
7674 orig_dst
.plane
[i
] = xd
->plane
[i
].dst
.buf
;
7675 orig_dst
.stride
[i
] = xd
->plane
[i
].dst
.stride
;
7678 // We don't include the cost of the second reference here, because there
7679 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
7680 // words if you present them in that order, the second one is always known
7681 // if the first is known.
7683 // Under some circumstances we discount the cost of new mv mode to encourage
7684 // initiation of a motion field.
7685 if (discount_newmv_test(cpi
, this_mode
, frame_mv
[refs
[0]], mode_mv
,
7687 #if CONFIG_EXT_INTER
7689 AOMMIN(cost_mv_ref(cpi
, this_mode
, mode_ctx
),
7690 cost_mv_ref(cpi
, is_comp_pred
? NEAREST_NEARESTMV
: NEARESTMV
,
7693 rd_stats
->rate
+= AOMMIN(cost_mv_ref(cpi
, this_mode
, mode_ctx
),
7694 cost_mv_ref(cpi
, NEARESTMV
, mode_ctx
));
7695 #endif // CONFIG_EXT_INTER
7697 rd_stats
->rate
+= cost_mv_ref(cpi
, this_mode
, mode_ctx
);
7700 if (RDCOST(x
->rdmult
, x
->rddiv
, rd_stats
->rate
, 0) > ref_best_rd
&&
7701 #if CONFIG_EXT_INTER
7702 mbmi
->mode
!= NEARESTMV
&& mbmi
->mode
!= NEAREST_NEARESTMV
7704 mbmi
->mode
!= NEARESTMV
7705 #endif // CONFIG_EXT_INTER
7709 int64_t ret_val
= interpolation_filter_search(
7710 x
, cpi
, bsize
, mi_row
, mi_col
, &tmp_dst
, &orig_dst
, args
->single_filter
,
7711 &rd
, &rs
, &skip_txfm_sb
, &skip_sse_sb
);
7712 if (ret_val
!= 0) return ret_val
;
7714 #if CONFIG_EXT_INTER
7715 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7716 best_bmc_mbmi
= *mbmi
;
7717 rate2_bmc_nocoeff
= rd_stats
->rate
;
7718 if (cm
->interp_filter
== SWITCHABLE
) rate2_bmc_nocoeff
+= rs
;
7719 #if CONFIG_MOTION_VAR
7720 rate_mv_bmc
= rate_mv
;
7721 #endif // CONFIG_MOTION_VAR
7722 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7724 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7728 int64_t best_rd_compound
= INT64_MAX
, best_rd_cur
= INT64_MAX
;
7729 INTERINTER_COMPOUND_DATA best_compound_data
;
7731 int best_tmp_rate_mv
= rate_mv
;
7732 int tmp_skip_txfm_sb
;
7733 int64_t tmp_skip_sse_sb
;
7734 int compound_type_cost
[COMPOUND_TYPES
];
7735 uint8_t pred0
[2 * MAX_SB_SQUARE
];
7736 uint8_t pred1
[2 * MAX_SB_SQUARE
];
7737 uint8_t *preds0
[1] = { pred0
};
7738 uint8_t *preds1
[1] = { pred1
};
7739 int strides
[1] = { bw
};
7741 int masked_compound_used
= is_any_masked_compound_used(bsize
);
7742 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
7743 masked_compound_used
= masked_compound_used
&& cm
->allow_masked_compound
;
7744 #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
7745 COMPOUND_TYPE cur_type
;
7747 best_mv
[0].as_int
= cur_mv
[0].as_int
;
7748 best_mv
[1].as_int
= cur_mv
[1].as_int
;
7749 memset(&best_compound_data
, 0, sizeof(best_compound_data
));
7750 #if CONFIG_COMPOUND_SEGMENT
7751 uint8_t tmp_mask_buf
[2 * MAX_SB_SQUARE
];
7752 best_compound_data
.seg_mask
= tmp_mask_buf
;
7753 #endif // CONFIG_COMPOUND_SEGMENT
7755 if (masked_compound_used
) {
7756 av1_cost_tokens(compound_type_cost
, cm
->fc
->compound_type_prob
[bsize
],
7757 av1_compound_type_tree
);
7758 // get inter predictors to use for masked compound modes
7759 av1_build_inter_predictors_for_planes_single_buf(
7760 xd
, bsize
, 0, 0, mi_row
, mi_col
, 0, preds0
, strides
);
7761 av1_build_inter_predictors_for_planes_single_buf(
7762 xd
, bsize
, 0, 0, mi_row
, mi_col
, 1, preds1
, strides
);
7765 for (cur_type
= COMPOUND_AVERAGE
; cur_type
< COMPOUND_TYPES
; cur_type
++) {
7766 if (cur_type
!= COMPOUND_AVERAGE
&& !masked_compound_used
) break;
7767 if (!is_interinter_compound_used(cur_type
, bsize
)) break;
7768 tmp_rate_mv
= rate_mv
;
7769 best_rd_cur
= INT64_MAX
;
7770 mbmi
->interinter_compound_type
= cur_type
;
7771 rs2
= av1_cost_literal(get_interinter_compound_type_bits(
7772 bsize
, mbmi
->interinter_compound_type
)) +
7773 (masked_compound_used
7774 ? compound_type_cost
[mbmi
->interinter_compound_type
]
7778 case COMPOUND_AVERAGE
:
7779 av1_build_inter_predictors_sby(cm
, xd
, mi_row
, mi_col
, &orig_dst
,
7781 av1_subtract_plane(x
, bsize
, 0);
7782 rd
= estimate_yrd_for_sb(cpi
, bsize
, x
, &rate_sum
, &dist_sum
,
7783 &tmp_skip_txfm_sb
, &tmp_skip_sse_sb
,
7785 if (rd
!= INT64_MAX
)
7787 RDCOST(x
->rdmult
, x
->rddiv
, rs2
+ rate_mv
+ rate_sum
, dist_sum
);
7788 best_rd_compound
= best_rd_cur
;
7791 case COMPOUND_WEDGE
:
7792 if (x
->source_variance
> cpi
->sf
.disable_wedge_search_var_thresh
&&
7793 best_rd_compound
/ 3 < ref_best_rd
) {
7794 best_rd_cur
= build_and_cost_compound_type(
7795 cpi
, x
, cur_mv
, bsize
, this_mode
, rs2
, rate_mv
, &orig_dst
,
7796 &tmp_rate_mv
, preds0
, preds1
, strides
, mi_row
, mi_col
);
7799 #endif // CONFIG_WEDGE
7800 #if CONFIG_COMPOUND_SEGMENT
7802 if (x
->source_variance
> cpi
->sf
.disable_wedge_search_var_thresh
&&
7803 best_rd_compound
/ 3 < ref_best_rd
) {
7804 best_rd_cur
= build_and_cost_compound_type(
7805 cpi
, x
, cur_mv
, bsize
, this_mode
, rs2
, rate_mv
, &orig_dst
,
7806 &tmp_rate_mv
, preds0
, preds1
, strides
, mi_row
, mi_col
);
7809 #endif // CONFIG_COMPOUND_SEGMENT
7810 default: assert(0); return 0;
7813 if (best_rd_cur
< best_rd_compound
) {
7814 best_rd_compound
= best_rd_cur
;
7816 best_compound_data
.wedge_index
= mbmi
->wedge_index
;
7817 best_compound_data
.wedge_sign
= mbmi
->wedge_sign
;
7818 #endif // CONFIG_WEDGE
7819 #if CONFIG_COMPOUND_SEGMENT
7820 best_compound_data
.mask_type
= mbmi
->mask_type
;
7821 memcpy(best_compound_data
.seg_mask
, xd
->seg_mask
,
7822 2 * MAX_SB_SQUARE
* sizeof(uint8_t));
7823 #endif // CONFIG_COMPOUND_SEGMENT
7824 best_compound_data
.interinter_compound_type
=
7825 mbmi
->interinter_compound_type
;
7826 if (have_newmv_in_inter_mode(this_mode
)) {
7827 if (use_masked_motion_search(cur_type
)) {
7828 best_tmp_rate_mv
= tmp_rate_mv
;
7829 best_mv
[0].as_int
= mbmi
->mv
[0].as_int
;
7830 best_mv
[1].as_int
= mbmi
->mv
[1].as_int
;
7832 best_mv
[0].as_int
= cur_mv
[0].as_int
;
7833 best_mv
[1].as_int
= cur_mv
[1].as_int
;
7837 // reset to original mvs for next iteration
7838 mbmi
->mv
[0].as_int
= cur_mv
[0].as_int
;
7839 mbmi
->mv
[1].as_int
= cur_mv
[1].as_int
;
7842 mbmi
->wedge_index
= best_compound_data
.wedge_index
;
7843 mbmi
->wedge_sign
= best_compound_data
.wedge_sign
;
7844 #endif // CONFIG_WEDGE
7845 #if CONFIG_COMPOUND_SEGMENT
7846 mbmi
->mask_type
= best_compound_data
.mask_type
;
7847 memcpy(xd
->seg_mask
, best_compound_data
.seg_mask
,
7848 2 * MAX_SB_SQUARE
* sizeof(uint8_t));
7849 #endif // CONFIG_COMPOUND_SEGMENT
7850 mbmi
->interinter_compound_type
=
7851 best_compound_data
.interinter_compound_type
;
7852 if (have_newmv_in_inter_mode(this_mode
)) {
7853 mbmi
->mv
[0].as_int
= best_mv
[0].as_int
;
7854 mbmi
->mv
[1].as_int
= best_mv
[1].as_int
;
7855 xd
->mi
[0]->bmi
[0].as_mv
[0].as_int
= mbmi
->mv
[0].as_int
;
7856 xd
->mi
[0]->bmi
[0].as_mv
[1].as_int
= mbmi
->mv
[1].as_int
;
7857 if (use_masked_motion_search(mbmi
->interinter_compound_type
)) {
7858 rd_stats
->rate
+= best_tmp_rate_mv
- rate_mv
;
7859 rate_mv
= best_tmp_rate_mv
;
7863 if (ref_best_rd
< INT64_MAX
&& best_rd_compound
/ 3 > ref_best_rd
) {
7864 restore_dst_buf(xd
, orig_dst
);
7870 compmode_interinter_cost
=
7871 av1_cost_literal(get_interinter_compound_type_bits(
7872 bsize
, mbmi
->interinter_compound_type
)) +
7873 (masked_compound_used
7874 ? compound_type_cost
[mbmi
->interinter_compound_type
]
7877 #endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7879 #if CONFIG_INTERINTRA
7880 if (is_comp_interintra_pred
) {
7881 INTERINTRA_MODE best_interintra_mode
= II_DC_PRED
;
7882 int64_t best_interintra_rd
= INT64_MAX
;
7883 int rmode
, rate_sum
;
7886 int tmp_rate_mv
= 0;
7887 int tmp_skip_txfm_sb
;
7888 int64_t tmp_skip_sse_sb
;
7889 DECLARE_ALIGNED(16, uint8_t, intrapred_
[2 * MAX_SB_SQUARE
]);
7892 #if CONFIG_HIGHBITDEPTH
7893 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
)
7894 intrapred
= CONVERT_TO_BYTEPTR(intrapred_
);
7896 #endif // CONFIG_HIGHBITDEPTH
7897 intrapred
= intrapred_
;
7899 mbmi
->ref_frame
[1] = NONE_FRAME
;
7900 for (j
= 0; j
< MAX_MB_PLANE
; j
++) {
7901 xd
->plane
[j
].dst
.buf
= tmp_buf
+ j
* MAX_SB_SQUARE
;
7902 xd
->plane
[j
].dst
.stride
= bw
;
7904 av1_build_inter_predictors_sby(cm
, xd
, mi_row
, mi_col
, &orig_dst
, bsize
);
7905 restore_dst_buf(xd
, orig_dst
);
7906 mbmi
->ref_frame
[1] = INTRA_FRAME
;
7907 mbmi
->use_wedge_interintra
= 0;
7909 for (j
= 0; j
< INTERINTRA_MODES
; ++j
) {
7910 mbmi
->interintra_mode
= (INTERINTRA_MODE
)j
;
7911 rmode
= interintra_mode_cost
[mbmi
->interintra_mode
];
7912 av1_build_intra_predictors_for_interintra(xd
, bsize
, 0, &orig_dst
,
7914 av1_combine_interintra(xd
, bsize
, 0, tmp_buf
, bw
, intrapred
, bw
);
7915 model_rd_for_sb(cpi
, bsize
, x
, xd
, 0, 0, &rate_sum
, &dist_sum
,
7916 &tmp_skip_txfm_sb
, &tmp_skip_sse_sb
);
7918 RDCOST(x
->rdmult
, x
->rddiv
, tmp_rate_mv
+ rate_sum
+ rmode
, dist_sum
);
7919 if (rd
< best_interintra_rd
) {
7920 best_interintra_rd
= rd
;
7921 best_interintra_mode
= mbmi
->interintra_mode
;
7924 mbmi
->interintra_mode
= best_interintra_mode
;
7925 rmode
= interintra_mode_cost
[mbmi
->interintra_mode
];
7926 av1_build_intra_predictors_for_interintra(xd
, bsize
, 0, &orig_dst
,
7928 av1_combine_interintra(xd
, bsize
, 0, tmp_buf
, bw
, intrapred
, bw
);
7929 av1_subtract_plane(x
, bsize
, 0);
7930 rd
= estimate_yrd_for_sb(cpi
, bsize
, x
, &rate_sum
, &dist_sum
,
7931 &tmp_skip_txfm_sb
, &tmp_skip_sse_sb
, INT64_MAX
);
7932 if (rd
!= INT64_MAX
)
7933 rd
= RDCOST(x
->rdmult
, x
->rddiv
, rate_mv
+ rmode
+ rate_sum
, dist_sum
);
7934 best_interintra_rd
= rd
;
7936 if (ref_best_rd
< INT64_MAX
&& best_interintra_rd
> 2 * ref_best_rd
) {
7937 // Don't need to call restore_dst_buf here
7941 if (is_interintra_wedge_used(bsize
)) {
7942 int64_t best_interintra_rd_nowedge
= INT64_MAX
;
7943 int64_t best_interintra_rd_wedge
= INT64_MAX
;
7945 int rwedge
= av1_cost_bit(cm
->fc
->wedge_interintra_prob
[bsize
], 0);
7946 if (rd
!= INT64_MAX
)
7947 rd
= RDCOST(x
->rdmult
, x
->rddiv
, rmode
+ rate_mv
+ rwedge
+ rate_sum
,
7949 best_interintra_rd_nowedge
= best_interintra_rd
;
7951 // Disable wedge search if source variance is small
7952 if (x
->source_variance
> cpi
->sf
.disable_wedge_search_var_thresh
) {
7953 mbmi
->use_wedge_interintra
= 1;
7955 rwedge
= av1_cost_literal(get_interintra_wedge_bits(bsize
)) +
7956 av1_cost_bit(cm
->fc
->wedge_interintra_prob
[bsize
], 1);
7958 best_interintra_rd_wedge
=
7959 pick_interintra_wedge(cpi
, x
, bsize
, intrapred_
, tmp_buf_
);
7961 best_interintra_rd_wedge
+=
7962 RDCOST(x
->rdmult
, x
->rddiv
, rmode
+ rate_mv
+ rwedge
, 0);
7963 // Refine motion vector.
7964 if (have_newmv_in_inter_mode(this_mode
)) {
7965 // get negative of mask
7966 const uint8_t *mask
= av1_get_contiguous_soft_mask(
7967 mbmi
->interintra_wedge_index
, 1, bsize
);
7968 tmp_mv
.as_int
= x
->mbmi_ext
->ref_mvs
[refs
[0]][0].as_int
;
7969 compound_single_motion_search(cpi
, x
, bsize
, &tmp_mv
.as_mv
, mi_row
,
7970 mi_col
, intrapred
, mask
, bw
,
7971 &tmp_rate_mv
, 0, 0);
7972 mbmi
->mv
[0].as_int
= tmp_mv
.as_int
;
7973 av1_build_inter_predictors_sby(cm
, xd
, mi_row
, mi_col
, &orig_dst
,
7975 model_rd_for_sb(cpi
, bsize
, x
, xd
, 0, 0, &rate_sum
, &dist_sum
,
7976 &tmp_skip_txfm_sb
, &tmp_skip_sse_sb
);
7977 rd
= RDCOST(x
->rdmult
, x
->rddiv
,
7978 rmode
+ tmp_rate_mv
+ rwedge
+ rate_sum
, dist_sum
);
7979 if (rd
>= best_interintra_rd_wedge
) {
7980 tmp_mv
.as_int
= cur_mv
[0].as_int
;
7981 tmp_rate_mv
= rate_mv
;
7984 tmp_mv
.as_int
= cur_mv
[0].as_int
;
7985 tmp_rate_mv
= rate_mv
;
7986 av1_combine_interintra(xd
, bsize
, 0, tmp_buf
, bw
, intrapred
, bw
);
7988 // Evaluate closer to true rd
7989 av1_subtract_plane(x
, bsize
, 0);
7991 estimate_yrd_for_sb(cpi
, bsize
, x
, &rate_sum
, &dist_sum
,
7992 &tmp_skip_txfm_sb
, &tmp_skip_sse_sb
, INT64_MAX
);
7993 if (rd
!= INT64_MAX
)
7994 rd
= RDCOST(x
->rdmult
, x
->rddiv
,
7995 rmode
+ tmp_rate_mv
+ rwedge
+ rate_sum
, dist_sum
);
7996 best_interintra_rd_wedge
= rd
;
7997 if (best_interintra_rd_wedge
< best_interintra_rd_nowedge
) {
7998 mbmi
->use_wedge_interintra
= 1;
7999 mbmi
->mv
[0].as_int
= tmp_mv
.as_int
;
8000 rd_stats
->rate
+= tmp_rate_mv
- rate_mv
;
8001 rate_mv
= tmp_rate_mv
;
8003 mbmi
->use_wedge_interintra
= 0;
8004 mbmi
->mv
[0].as_int
= cur_mv
[0].as_int
;
8007 mbmi
->use_wedge_interintra
= 0;
8010 #endif // CONFIG_WEDGE
8013 compmode_interintra_cost
=
8014 av1_cost_bit(cm
->fc
->interintra_prob
[size_group_lookup
[bsize
]], 1) +
8015 interintra_mode_cost
[mbmi
->interintra_mode
];
8016 if (is_interintra_wedge_used(bsize
)) {
8017 compmode_interintra_cost
+= av1_cost_bit(
8018 cm
->fc
->wedge_interintra_prob
[bsize
], mbmi
->use_wedge_interintra
);
8019 if (mbmi
->use_wedge_interintra
) {
8020 compmode_interintra_cost
+=
8021 av1_cost_literal(get_interintra_wedge_bits(bsize
));
8024 } else if (is_interintra_allowed(mbmi
)) {
8025 compmode_interintra_cost
=
8026 av1_cost_bit(cm
->fc
->interintra_prob
[size_group_lookup
[bsize
]], 0);
8028 #endif // CONFIG_INTERINTRA
8030 if (pred_exists
== 0) {
8033 av1_build_inter_predictors_sb(cm
, xd
, mi_row
, mi_col
, &orig_dst
, bsize
);
8034 model_rd_for_sb(cpi
, bsize
, x
, xd
, 0, MAX_MB_PLANE
- 1, &tmp_rate
,
8035 &tmp_dist
, &skip_txfm_sb
, &skip_sse_sb
);
8036 rd
= RDCOST(x
->rdmult
, x
->rddiv
, rs
+ tmp_rate
, tmp_dist
);
8038 #endif // CONFIG_EXT_INTER
8041 #if CONFIG_DUAL_FILTER
8042 args
->single_filter
[this_mode
][refs
[0]] = mbmi
->interp_filter
[0];
8044 args
->single_filter
[this_mode
][refs
[0]] = mbmi
->interp_filter
;
8045 #endif // CONFIG_DUAL_FILTER
8047 #if CONFIG_EXT_INTER
8048 if (args
->modelled_rd
!= NULL
) {
8050 const int mode0
= compound_ref0_mode(this_mode
);
8051 const int mode1
= compound_ref1_mode(this_mode
);
8052 const int64_t mrd
= AOMMIN(args
->modelled_rd
[mode0
][refs
[0]],
8053 args
->modelled_rd
[mode1
][refs
[1]]);
8054 if (rd
/ 4 * 3 > mrd
&& ref_best_rd
< INT64_MAX
) {
8055 restore_dst_buf(xd
, orig_dst
);
8058 } else if (!is_comp_interintra_pred
) {
8059 args
->modelled_rd
[this_mode
][refs
[0]] = rd
;
8062 #endif // CONFIG_EXT_INTER
8064 if (cpi
->sf
.use_rd_breakout
&& ref_best_rd
< INT64_MAX
) {
8065 // if current pred_error modeled rd is substantially more than the best
8066 // so far, do not bother doing full rd
8067 if (rd
/ 2 > ref_best_rd
) {
8068 restore_dst_buf(xd
, orig_dst
);
8073 #if CONFIG_EXT_INTER
8074 #if CONFIG_INTERINTRA
8075 rd_stats
->rate
+= compmode_interintra_cost
;
8076 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8077 rate2_bmc_nocoeff
+= compmode_interintra_cost
;
8080 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
8081 rd_stats
->rate
+= compmode_interinter_cost
;
8085 ret_val
= motion_mode_rd(cpi
, x
, bsize
, rd_stats
, rd_stats_y
, rd_stats_uv
,
8086 disable_skip
, mode_mv
, mi_row
, mi_col
, args
,
8087 ref_best_rd
, refs
, rate_mv
,
8088 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8090 #if CONFIG_EXT_INTER
8091 rate2_bmc_nocoeff
, &best_bmc_mbmi
,
8092 #if CONFIG_MOTION_VAR
8094 #endif // CONFIG_MOTION_VAR
8095 #endif // CONFIG_EXT_INTER
8096 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8097 rs
, &skip_txfm_sb
, &skip_sse_sb
, &orig_dst
);
8098 if (ret_val
!= 0) return ret_val
;
8100 return 0; // The rate-distortion cost will be re-calculated by caller.
8104 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP
*cpi
, MACROBLOCK
*x
,
8105 RD_STATS
*rd_cost
, BLOCK_SIZE bsize
,
8107 const AV1_COMMON
*const cm
= &cpi
->common
;
8108 if (bsize
< BLOCK_8X8
|| !cm
->allow_screen_content_tools
) return INT64_MAX
;
8110 MACROBLOCKD
*const xd
= &x
->e_mbd
;
8111 const TileInfo
*tile
= &xd
->tile
;
8112 MODE_INFO
*const mi
= xd
->mi
[0];
8113 const int mi_row
= -xd
->mb_to_top_edge
/ (8 * MI_SIZE
);
8114 const int mi_col
= -xd
->mb_to_left_edge
/ (8 * MI_SIZE
);
8115 const int w
= block_size_wide
[bsize
];
8116 const int h
= block_size_high
[bsize
];
8117 const int sb_row
= mi_row
/ MAX_MIB_SIZE
;
8118 const int sb_col
= mi_col
/ MAX_MIB_SIZE
;
8120 MB_MODE_INFO_EXT
*const mbmi_ext
= x
->mbmi_ext
;
8121 MV_REFERENCE_FRAME ref_frame
= INTRA_FRAME
;
8122 int_mv
*const candidates
= x
->mbmi_ext
->ref_mvs
[ref_frame
];
8123 av1_find_mv_refs(cm
, xd
, mi
, ref_frame
, &mbmi_ext
->ref_mv_count
[ref_frame
],
8124 mbmi_ext
->ref_mv_stack
[ref_frame
],
8125 #if CONFIG_EXT_INTER
8126 mbmi_ext
->compound_mode_context
,
8127 #endif // CONFIG_EXT_INTER
8128 candidates
, mi_row
, mi_col
, NULL
, NULL
,
8129 mbmi_ext
->mode_context
);
8131 int_mv nearestmv
, nearmv
;
8132 av1_find_best_ref_mvs(0, candidates
, &nearestmv
, &nearmv
);
8134 int_mv dv_ref
= nearestmv
.as_int
== 0 ? nearmv
: nearestmv
;
8135 if (dv_ref
.as_int
== 0) av1_find_ref_dv(&dv_ref
, mi_row
, mi_col
);
8136 mbmi_ext
->ref_mvs
[INTRA_FRAME
][0] = dv_ref
;
8138 struct buf_2d yv12_mb
[MAX_MB_PLANE
];
8139 av1_setup_pred_block(xd
, yv12_mb
, xd
->cur_buf
, mi_row
, mi_col
, NULL
, NULL
);
8140 for (int i
= 0; i
< MAX_MB_PLANE
; ++i
) {
8141 xd
->plane
[i
].pre
[0] = yv12_mb
[i
];
8144 enum IntrabcMotionDirection
{
8147 IBC_MOTION_DIRECTIONS
8150 MB_MODE_INFO
*mbmi
= &mi
->mbmi
;
8151 MB_MODE_INFO best_mbmi
= *mbmi
;
8152 RD_STATS best_rdcost
= *rd_cost
;
8153 int best_skip
= x
->skip
;
8155 for (enum IntrabcMotionDirection dir
= IBC_MOTION_ABOVE
;
8156 dir
< IBC_MOTION_DIRECTIONS
; ++dir
) {
8157 const MvLimits tmp_mv_limits
= x
->mv_limits
;
8159 case IBC_MOTION_ABOVE
:
8160 x
->mv_limits
.col_min
= (tile
->mi_col_start
- mi_col
) * MI_SIZE
;
8161 x
->mv_limits
.col_max
= (tile
->mi_col_end
- mi_col
) * MI_SIZE
- w
;
8162 x
->mv_limits
.row_min
= (tile
->mi_row_start
- mi_row
) * MI_SIZE
;
8163 x
->mv_limits
.row_max
= (sb_row
* MAX_MIB_SIZE
- mi_row
) * MI_SIZE
- h
;
8165 case IBC_MOTION_LEFT
:
8166 x
->mv_limits
.col_min
= (tile
->mi_col_start
- mi_col
) * MI_SIZE
;
8167 x
->mv_limits
.col_max
= (sb_col
* MAX_MIB_SIZE
- mi_col
) * MI_SIZE
- w
;
8168 // TODO(aconverse@google.com): Minimize the overlap between above and
8170 x
->mv_limits
.row_min
= (tile
->mi_row_start
- mi_row
) * MI_SIZE
;
8171 int bottom_coded_mi_edge
=
8172 AOMMIN((sb_row
+ 1) * MAX_MIB_SIZE
, tile
->mi_row_end
);
8173 x
->mv_limits
.row_max
= (bottom_coded_mi_edge
- mi_row
) * MI_SIZE
- h
;
8177 assert(x
->mv_limits
.col_min
>= tmp_mv_limits
.col_min
);
8178 assert(x
->mv_limits
.col_max
<= tmp_mv_limits
.col_max
);
8179 assert(x
->mv_limits
.row_min
>= tmp_mv_limits
.row_min
);
8180 assert(x
->mv_limits
.row_max
<= tmp_mv_limits
.row_max
);
8181 av1_set_mv_search_range(&x
->mv_limits
, &dv_ref
.as_mv
);
8183 if (x
->mv_limits
.col_max
< x
->mv_limits
.col_min
||
8184 x
->mv_limits
.row_max
< x
->mv_limits
.row_min
) {
8185 x
->mv_limits
= tmp_mv_limits
;
8189 int step_param
= cpi
->mv_step_param
;
8190 MV mvp_full
= dv_ref
.as_mv
;
8193 int sadpb
= x
->sadperbit16
;
8195 int bestsme
= av1_full_pixel_search(cpi
, x
, bsize
, &mvp_full
, step_param
,
8196 sadpb
, cond_cost_list(cpi
, cost_list
),
8197 &dv_ref
.as_mv
, INT_MAX
, 1);
8199 x
->mv_limits
= tmp_mv_limits
;
8200 if (bestsme
== INT_MAX
) continue;
8201 mvp_full
= x
->best_mv
.as_mv
;
8202 MV dv
= {.row
= mvp_full
.row
* 8, .col
= mvp_full
.col
* 8 };
8203 if (mv_check_bounds(&x
->mv_limits
, &dv
)) continue;
8204 if (!is_dv_valid(dv
, tile
, mi_row
, mi_col
, bsize
)) continue;
8207 memset(&mbmi
->palette_mode_info
, 0, sizeof(mbmi
->palette_mode_info
));
8209 mbmi
->use_intrabc
= 1;
8210 mbmi
->mode
= DC_PRED
;
8211 mbmi
->uv_mode
= DC_PRED
;
8212 mbmi
->mv
[0].as_mv
= dv
;
8213 #if CONFIG_DUAL_FILTER
8214 for (int idx
= 0; idx
< 4; ++idx
) mbmi
->interp_filter
[idx
] = BILINEAR
;
8216 mbmi
->interp_filter
= BILINEAR
;
8220 av1_build_inter_predictors_sb(cm
, xd
, mi_row
, mi_col
, NULL
, bsize
);
8222 int rate_mv
= av1_mv_bit_cost(&dv
, &dv_ref
.as_mv
, x
->nmvjointcost
,
8223 x
->mvcost
, MV_COST_WEIGHT
);
8224 const PREDICTION_MODE A
= av1_above_block_mode(mi
, xd
->above_mi
, 0);
8225 const PREDICTION_MODE L
= av1_left_block_mode(mi
, xd
->left_mi
, 0);
8226 const int rate_mode
=
8227 cpi
->y_mode_costs
[A
][L
][DC_PRED
] + av1_cost_bit(INTRABC_PROB
, 1);
8229 RD_STATS rd_stats
, rd_stats_uv
;
8230 av1_subtract_plane(x
, bsize
, 0);
8231 super_block_yrd(cpi
, x
, &rd_stats
, bsize
, INT64_MAX
);
8232 super_block_uvrd(cpi
, x
, &rd_stats_uv
, bsize
, INT64_MAX
);
8233 av1_merge_rd_stats(&rd_stats
, &rd_stats_uv
);
8235 mbmi
->rd_stats
= rd_stats
;
8239 // TODO(aconverse@google.com): Evaluate allowing VAR TX on intrabc blocks
8240 const int width
= block_size_wide
[bsize
] >> tx_size_wide_log2
[0];
8241 const int height
= block_size_high
[bsize
] >> tx_size_high_log2
[0];
8243 for (idy
= 0; idy
< height
; ++idy
)
8244 for (idx
= 0; idx
< width
; ++idx
)
8245 mbmi
->inter_tx_size
[idy
>> 1][idx
>> 1] = mbmi
->tx_size
;
8246 mbmi
->min_tx_size
= get_min_tx_size(mbmi
->tx_size
);
8247 #endif // CONFIG_VAR_TX
8249 const aom_prob skip_prob
= av1_get_skip_prob(cm
, xd
);
8251 RD_STATS rdc_noskip
;
8252 av1_init_rd_stats(&rdc_noskip
);
8254 rate_mode
+ rate_mv
+ rd_stats
.rate
+ av1_cost_bit(skip_prob
, 0);
8255 rdc_noskip
.dist
= rd_stats
.dist
;
8257 RDCOST(x
->rdmult
, x
->rddiv
, rdc_noskip
.rate
, rdc_noskip
.dist
);
8258 if (rdc_noskip
.rdcost
< best_rd
) {
8259 best_rd
= rdc_noskip
.rdcost
;
8261 best_skip
= x
->skip
;
8262 best_rdcost
= rdc_noskip
;
8268 av1_init_rd_stats(&rdc_skip
);
8269 rdc_skip
.rate
= rate_mode
+ rate_mv
+ av1_cost_bit(skip_prob
, 1);
8270 rdc_skip
.dist
= rd_stats
.sse
;
8271 rdc_skip
.rdcost
= RDCOST(x
->rdmult
, x
->rddiv
, rdc_skip
.rate
, rdc_skip
.dist
);
8272 if (rdc_skip
.rdcost
< best_rd
) {
8273 best_rd
= rdc_skip
.rdcost
;
8275 best_skip
= x
->skip
;
8276 best_rdcost
= rdc_skip
;
8280 *rd_cost
= best_rdcost
;
8281 x
->skip
= best_skip
;
8284 #endif // CONFIG_INTRABC
8286 void av1_rd_pick_intra_mode_sb(const AV1_COMP
*cpi
, MACROBLOCK
*x
,
8287 RD_STATS
*rd_cost
, BLOCK_SIZE bsize
,
8288 PICK_MODE_CONTEXT
*ctx
, int64_t best_rd
) {
8289 const AV1_COMMON
*const cm
= &cpi
->common
;
8290 MACROBLOCKD
*const xd
= &x
->e_mbd
;
8291 struct macroblockd_plane
*const pd
= xd
->plane
;
8292 int rate_y
= 0, rate_uv
= 0, rate_y_tokenonly
= 0, rate_uv_tokenonly
= 0;
8293 int y_skip
= 0, uv_skip
= 0;
8294 int64_t dist_y
= 0, dist_uv
= 0;
8295 TX_SIZE max_uv_tx_size
;
8296 const int unify_bsize
= CONFIG_CB4X4
;
8299 xd
->mi
[0]->mbmi
.ref_frame
[0] = INTRA_FRAME
;
8300 xd
->mi
[0]->mbmi
.ref_frame
[1] = NONE_FRAME
;
8302 xd
->mi
[0]->mbmi
.use_intrabc
= 0;
8303 xd
->mi
[0]->mbmi
.mv
[0].as_int
= 0;
8304 #endif // CONFIG_INTRABC
8306 const int64_t intra_yrd
=
8307 (bsize
>= BLOCK_8X8
|| unify_bsize
)
8308 ? rd_pick_intra_sby_mode(cpi
, x
, &rate_y
, &rate_y_tokenonly
, &dist_y
,
8309 &y_skip
, bsize
, best_rd
)
8310 : rd_pick_intra_sub_8x8_y_mode(cpi
, x
, &rate_y
, &rate_y_tokenonly
,
8311 &dist_y
, &y_skip
, best_rd
);
8313 if (intra_yrd
< best_rd
) {
8314 max_uv_tx_size
= uv_txsize_lookup
[bsize
][xd
->mi
[0]->mbmi
.tx_size
]
8315 [pd
[1].subsampling_x
][pd
[1].subsampling_y
];
8316 init_sbuv_mode(&xd
->mi
[0]->mbmi
);
8318 if (!x
->skip_chroma_rd
)
8319 rd_pick_intra_sbuv_mode(cpi
, x
, &rate_uv
, &rate_uv_tokenonly
, &dist_uv
,
8320 &uv_skip
, bsize
, max_uv_tx_size
);
8322 rd_pick_intra_sbuv_mode(cpi
, x
, &rate_uv
, &rate_uv_tokenonly
, &dist_uv
,
8323 &uv_skip
, AOMMAX(BLOCK_8X8
, bsize
), max_uv_tx_size
);
8324 #endif // CONFIG_CB4X4
8326 if (y_skip
&& uv_skip
) {
8327 rd_cost
->rate
= rate_y
+ rate_uv
- rate_y_tokenonly
- rate_uv_tokenonly
+
8328 av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1);
8329 rd_cost
->dist
= dist_y
+ dist_uv
;
8332 rate_y
+ rate_uv
+ av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0);
8333 rd_cost
->dist
= dist_y
+ dist_uv
;
8335 rd_cost
->rdcost
= RDCOST(x
->rdmult
, x
->rddiv
, rd_cost
->rate
, rd_cost
->dist
);
8336 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
8337 rd_cost
->dist_y
= dist_y
;
8340 rd_cost
->rate
= INT_MAX
;
8344 if (rd_cost
->rate
!= INT_MAX
&& rd_cost
->rdcost
< best_rd
)
8345 best_rd
= rd_cost
->rdcost
;
8346 if (rd_pick_intrabc_mode_sb(cpi
, x
, rd_cost
, bsize
, best_rd
) < best_rd
) {
8347 ctx
->skip
= x
->skip
; // FIXME where is the proper place to set this?!
8348 assert(rd_cost
->rate
!= INT_MAX
);
8349 rd_cost
->rdcost
= RDCOST(x
->rdmult
, x
->rddiv
, rd_cost
->rate
, rd_cost
->dist
);
8352 if (rd_cost
->rate
== INT_MAX
) return;
8354 ctx
->mic
= *xd
->mi
[0];
8355 ctx
->mbmi_ext
= *x
->mbmi_ext
;
8358 // Do we have an internal image edge (e.g. formatting bars).
8359 int av1_internal_image_edge(const AV1_COMP
*cpi
) {
8360 return (cpi
->oxcf
.pass
== 2) &&
8361 ((cpi
->twopass
.this_frame_stats
.inactive_zone_rows
> 0) ||
8362 (cpi
->twopass
.this_frame_stats
.inactive_zone_cols
> 0));
8365 // Checks to see if a super block is on a horizontal image edge.
8366 // In most cases this is the "real" edge unless there are formatting
8367 // bars embedded in the stream.
8368 int av1_active_h_edge(const AV1_COMP
*cpi
, int mi_row
, int mi_step
) {
8370 int bottom_edge
= cpi
->common
.mi_rows
;
8371 int is_active_h_edge
= 0;
8373 // For two pass account for any formatting bars detected.
8374 if (cpi
->oxcf
.pass
== 2) {
8375 const TWO_PASS
*const twopass
= &cpi
->twopass
;
8377 // The inactive region is specified in MBs not mi units.
8378 // The image edge is in the following MB row.
8379 top_edge
+= (int)(twopass
->this_frame_stats
.inactive_zone_rows
* 2);
8381 bottom_edge
-= (int)(twopass
->this_frame_stats
.inactive_zone_rows
* 2);
8382 bottom_edge
= AOMMAX(top_edge
, bottom_edge
);
8385 if (((top_edge
>= mi_row
) && (top_edge
< (mi_row
+ mi_step
))) ||
8386 ((bottom_edge
>= mi_row
) && (bottom_edge
< (mi_row
+ mi_step
)))) {
8387 is_active_h_edge
= 1;
8389 return is_active_h_edge
;
8392 // Checks to see if a super block is on a vertical image edge.
8393 // In most cases this is the "real" edge unless there are formatting
8394 // bars embedded in the stream.
8395 int av1_active_v_edge(const AV1_COMP
*cpi
, int mi_col
, int mi_step
) {
8397 int right_edge
= cpi
->common
.mi_cols
;
8398 int is_active_v_edge
= 0;
8400 // For two pass account for any formatting bars detected.
8401 if (cpi
->oxcf
.pass
== 2) {
8402 const TWO_PASS
*const twopass
= &cpi
->twopass
;
8404 // The inactive region is specified in MBs not mi units.
8405 // The image edge is in the following MB row.
8406 left_edge
+= (int)(twopass
->this_frame_stats
.inactive_zone_cols
* 2);
8408 right_edge
-= (int)(twopass
->this_frame_stats
.inactive_zone_cols
* 2);
8409 right_edge
= AOMMAX(left_edge
, right_edge
);
8412 if (((left_edge
>= mi_col
) && (left_edge
< (mi_col
+ mi_step
))) ||
8413 ((right_edge
>= mi_col
) && (right_edge
< (mi_col
+ mi_step
)))) {
8414 is_active_v_edge
= 1;
8416 return is_active_v_edge
;
8419 // Checks to see if a super block is at the edge of the active image.
8420 // In most cases this is the "real" edge unless there are formatting
8421 // bars embedded in the stream.
8422 int av1_active_edge_sb(const AV1_COMP
*cpi
, int mi_row
, int mi_col
) {
8423 return av1_active_h_edge(cpi
, mi_row
, cpi
->common
.mib_size
) ||
8424 av1_active_v_edge(cpi
, mi_col
, cpi
->common
.mib_size
);
8428 static void restore_uv_color_map(const AV1_COMP
*const cpi
, MACROBLOCK
*x
) {
8429 MACROBLOCKD
*const xd
= &x
->e_mbd
;
8430 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
8431 PALETTE_MODE_INFO
*const pmi
= &mbmi
->palette_mode_info
;
8432 const BLOCK_SIZE bsize
= mbmi
->sb_type
;
8433 int src_stride
= x
->plane
[1].src
.stride
;
8434 const uint8_t *const src_u
= x
->plane
[1].src
.buf
;
8435 const uint8_t *const src_v
= x
->plane
[2].src
.buf
;
8436 float *const data
= x
->palette_buffer
->kmeans_data_buf
;
8437 float centroids
[2 * PALETTE_MAX_SIZE
];
8438 uint8_t *const color_map
= xd
->plane
[1].color_index_map
;
8440 #if CONFIG_HIGHBITDEPTH
8441 const uint16_t *const src_u16
= CONVERT_TO_SHORTPTR(src_u
);
8442 const uint16_t *const src_v16
= CONVERT_TO_SHORTPTR(src_v
);
8443 #endif // CONFIG_HIGHBITDEPTH
8444 int plane_block_width
, plane_block_height
, rows
, cols
;
8445 av1_get_block_dimensions(bsize
, 1, xd
, &plane_block_width
,
8446 &plane_block_height
, &rows
, &cols
);
8449 for (r
= 0; r
< rows
; ++r
) {
8450 for (c
= 0; c
< cols
; ++c
) {
8451 #if CONFIG_HIGHBITDEPTH
8452 if (cpi
->common
.use_highbitdepth
) {
8453 data
[(r
* cols
+ c
) * 2] = src_u16
[r
* src_stride
+ c
];
8454 data
[(r
* cols
+ c
) * 2 + 1] = src_v16
[r
* src_stride
+ c
];
8456 #endif // CONFIG_HIGHBITDEPTH
8457 data
[(r
* cols
+ c
) * 2] = src_u
[r
* src_stride
+ c
];
8458 data
[(r
* cols
+ c
) * 2 + 1] = src_v
[r
* src_stride
+ c
];
8459 #if CONFIG_HIGHBITDEPTH
8461 #endif // CONFIG_HIGHBITDEPTH
8465 for (r
= 1; r
< 3; ++r
) {
8466 for (c
= 0; c
< pmi
->palette_size
[1]; ++c
) {
8467 centroids
[c
* 2 + r
- 1] = pmi
->palette_colors
[r
* PALETTE_MAX_SIZE
+ c
];
8471 av1_calc_indices(data
, centroids
, color_map
, rows
* cols
,
8472 pmi
->palette_size
[1], 2);
8473 extend_palette_color_map(color_map
, cols
, rows
, plane_block_width
,
8474 plane_block_height
);
8476 #endif // CONFIG_PALETTE
8478 #if CONFIG_FILTER_INTRA
8479 static void pick_filter_intra_interframe(
8480 const AV1_COMP
*cpi
, MACROBLOCK
*x
, PICK_MODE_CONTEXT
*ctx
,
8481 BLOCK_SIZE bsize
, int mi_row
, int mi_col
, int *rate_uv_intra
,
8482 int *rate_uv_tokenonly
, int64_t *dist_uv
, int *skip_uv
,
8483 PREDICTION_MODE
*mode_uv
, FILTER_INTRA_MODE_INFO
*filter_intra_mode_info_uv
,
8484 #if CONFIG_EXT_INTRA
8485 int8_t *uv_angle_delta
,
8486 #endif // CONFIG_EXT_INTRA
8488 PALETTE_MODE_INFO
*pmi_uv
, int palette_ctx
,
8489 #endif // CONFIG_PALETTE
8490 int skip_mask
, unsigned int *ref_costs_single
, int64_t *best_rd
,
8491 int64_t *best_intra_rd
, PREDICTION_MODE
*best_intra_mode
,
8492 int *best_mode_index
, int *best_skip2
, int *best_mode_skippable
,
8494 int *returnrate_nocoef
,
8495 #endif // CONFIG_SUPERTX
8496 int64_t *best_pred_rd
, MB_MODE_INFO
*best_mbmode
, RD_STATS
*rd_cost
) {
8497 const AV1_COMMON
*const cm
= &cpi
->common
;
8498 MACROBLOCKD
*const xd
= &x
->e_mbd
;
8499 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
8501 PALETTE_MODE_INFO
*const pmi
= &mbmi
->palette_mode_info
;
8502 #endif // CONFIG_PALETTE
8503 int rate2
= 0, rate_y
= INT_MAX
, skippable
= 0, rate_uv
, rate_dummy
, i
;
8505 const int *const intra_mode_cost
= cpi
->mbmode_cost
[size_group_lookup
[bsize
]];
8506 int64_t distortion2
= 0, distortion_y
= 0, this_rd
= *best_rd
;
8507 int64_t distortion_uv
, model_rd
= INT64_MAX
;
8510 for (i
= 0; i
< MAX_MODES
; ++i
)
8511 if (av1_mode_order
[i
].mode
== DC_PRED
&&
8512 av1_mode_order
[i
].ref_frame
[0] == INTRA_FRAME
)
8515 assert(i
< MAX_MODES
);
8517 // TODO(huisu): use skip_mask for further speedup.
8519 mbmi
->mode
= DC_PRED
;
8520 mbmi
->uv_mode
= DC_PRED
;
8521 mbmi
->ref_frame
[0] = INTRA_FRAME
;
8522 mbmi
->ref_frame
[1] = NONE_FRAME
;
8523 if (!rd_pick_filter_intra_sby(cpi
, x
, &rate_dummy
, &rate_y
, &distortion_y
,
8524 &skippable
, bsize
, intra_mode_cost
[mbmi
->mode
],
8525 &this_rd
, &model_rd
, 0)) {
8528 if (rate_y
== INT_MAX
) return;
8530 uv_tx
= uv_txsize_lookup
[bsize
][mbmi
->tx_size
][xd
->plane
[1].subsampling_x
]
8531 [xd
->plane
[1].subsampling_y
];
8532 if (rate_uv_intra
[uv_tx
] == INT_MAX
) {
8533 choose_intra_uv_mode(cpi
, x
, ctx
, bsize
, uv_tx
, &rate_uv_intra
[uv_tx
],
8534 &rate_uv_tokenonly
[uv_tx
], &dist_uv
[uv_tx
],
8535 &skip_uv
[uv_tx
], &mode_uv
[uv_tx
]);
8537 if (cm
->allow_screen_content_tools
) pmi_uv
[uv_tx
] = *pmi
;
8538 #endif // CONFIG_PALETTE
8539 filter_intra_mode_info_uv
[uv_tx
] = mbmi
->filter_intra_mode_info
;
8540 #if CONFIG_EXT_INTRA
8541 uv_angle_delta
[uv_tx
] = mbmi
->angle_delta
[1];
8542 #endif // CONFIG_EXT_INTRA
8545 rate_uv
= rate_uv_tokenonly
[uv_tx
];
8546 distortion_uv
= dist_uv
[uv_tx
];
8547 skippable
= skippable
&& skip_uv
[uv_tx
];
8548 mbmi
->uv_mode
= mode_uv
[uv_tx
];
8550 if (cm
->allow_screen_content_tools
) {
8551 pmi
->palette_size
[1] = pmi_uv
[uv_tx
].palette_size
[1];
8552 memcpy(pmi
->palette_colors
+ PALETTE_MAX_SIZE
,
8553 pmi_uv
[uv_tx
].palette_colors
+ PALETTE_MAX_SIZE
,
8554 2 * PALETTE_MAX_SIZE
* sizeof(pmi
->palette_colors
[0]));
8556 #endif // CONFIG_PALETTE
8557 #if CONFIG_EXT_INTRA
8558 mbmi
->angle_delta
[1] = uv_angle_delta
[uv_tx
];
8559 #endif // CONFIG_EXT_INTRA
8560 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1] =
8561 filter_intra_mode_info_uv
[uv_tx
].use_filter_intra_mode
[1];
8562 if (filter_intra_mode_info_uv
[uv_tx
].use_filter_intra_mode
[1]) {
8563 mbmi
->filter_intra_mode_info
.filter_intra_mode
[1] =
8564 filter_intra_mode_info_uv
[uv_tx
].filter_intra_mode
[1];
8567 rate2
= rate_y
+ intra_mode_cost
[mbmi
->mode
] + rate_uv
+
8568 cpi
->intra_uv_mode_cost
[mbmi
->mode
][mbmi
->uv_mode
];
8570 if (cpi
->common
.allow_screen_content_tools
&& mbmi
->mode
== DC_PRED
&&
8572 rate2
+= av1_cost_bit(
8573 av1_default_palette_y_mode_prob
[bsize
- BLOCK_8X8
][palette_ctx
], 0);
8574 #endif // CONFIG_PALETTE
8576 if (!xd
->lossless
[mbmi
->segment_id
]) {
8577 // super_block_yrd above includes the cost of the tx_size in the
8578 // tokenonly rate, but for intra blocks, tx_size is always coded
8579 // (prediction granularity), so we account for it in the full rate,
8580 // not the tokenonly rate.
8581 rate_y
-= tx_size_cost(cpi
, x
, bsize
, mbmi
->tx_size
);
8584 rate2
+= av1_cost_bit(cm
->fc
->filter_intra_probs
[0],
8585 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0]);
8586 rate2
+= write_uniform_cost(
8587 FILTER_INTRA_MODES
, mbmi
->filter_intra_mode_info
.filter_intra_mode
[0]);
8588 #if CONFIG_EXT_INTRA
8589 if (av1_is_directional_mode(mbmi
->uv_mode
, bsize
)) {
8590 rate2
+= write_uniform_cost(2 * MAX_ANGLE_DELTA
+ 1,
8591 MAX_ANGLE_DELTA
+ mbmi
->angle_delta
[1]);
8593 #endif // CONFIG_EXT_INTRA
8594 if (mbmi
->mode
== DC_PRED
) {
8596 av1_cost_bit(cpi
->common
.fc
->filter_intra_probs
[1],
8597 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1]);
8598 if (mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1])
8600 write_uniform_cost(FILTER_INTRA_MODES
,
8601 mbmi
->filter_intra_mode_info
.filter_intra_mode
[1]);
8603 distortion2
= distortion_y
+ distortion_uv
;
8604 av1_encode_intra_block_plane((AV1_COMMON
*)cm
, x
, bsize
, 0, 0, mi_row
,
8607 rate2
+= ref_costs_single
[INTRA_FRAME
];
8610 rate2
-= (rate_y
+ rate_uv
);
8613 rate2
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1);
8615 rate2
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0);
8617 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, rate2
, distortion2
);
8619 if (this_rd
< *best_intra_rd
) {
8620 *best_intra_rd
= this_rd
;
8621 *best_intra_mode
= mbmi
->mode
;
8623 for (i
= 0; i
< REFERENCE_MODES
; ++i
)
8624 best_pred_rd
[i
] = AOMMIN(best_pred_rd
[i
], this_rd
);
8626 if (this_rd
< *best_rd
) {
8627 *best_mode_index
= dc_mode_index
;
8628 mbmi
->mv
[0].as_int
= 0;
8629 rd_cost
->rate
= rate2
;
8632 *returnrate_nocoef
= rate2
;
8634 *returnrate_nocoef
= rate2
- rate_y
- rate_uv
;
8635 *returnrate_nocoef
-= av1_cost_bit(av1_get_skip_prob(cm
, xd
), skippable
);
8636 *returnrate_nocoef
-= av1_cost_bit(av1_get_intra_inter_prob(cm
, xd
),
8637 mbmi
->ref_frame
[0] != INTRA_FRAME
);
8638 #endif // CONFIG_SUPERTX
8639 rd_cost
->dist
= distortion2
;
8640 rd_cost
->rdcost
= this_rd
;
8642 *best_mbmode
= *mbmi
;
8644 *best_mode_skippable
= skippable
;
8647 #endif // CONFIG_FILTER_INTRA
8649 #if CONFIG_MOTION_VAR
8650 static void calc_target_weighted_pred(const AV1_COMMON
*cm
, const MACROBLOCK
*x
,
8651 const MACROBLOCKD
*xd
, int mi_row
,
8652 int mi_col
, const uint8_t *above
,
8653 int above_stride
, const uint8_t *left
,
8655 #endif // CONFIG_MOTION_VAR
8657 void av1_rd_pick_inter_mode_sb(const AV1_COMP
*cpi
, TileDataEnc
*tile_data
,
8658 MACROBLOCK
*x
, int mi_row
, int mi_col
,
8661 int *returnrate_nocoef
,
8662 #endif // CONFIG_SUPERTX
8663 BLOCK_SIZE bsize
, PICK_MODE_CONTEXT
*ctx
,
8664 int64_t best_rd_so_far
) {
8665 const AV1_COMMON
*const cm
= &cpi
->common
;
8666 const RD_OPT
*const rd_opt
= &cpi
->rd
;
8667 const SPEED_FEATURES
*const sf
= &cpi
->sf
;
8668 MACROBLOCKD
*const xd
= &x
->e_mbd
;
8669 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
8671 const int try_palette
=
8672 cpi
->common
.allow_screen_content_tools
&& bsize
>= BLOCK_8X8
;
8673 PALETTE_MODE_INFO
*const pmi
= &mbmi
->palette_mode_info
;
8674 #endif // CONFIG_PALETTE
8675 MB_MODE_INFO_EXT
*const mbmi_ext
= x
->mbmi_ext
;
8676 const struct segmentation
*const seg
= &cm
->seg
;
8677 PREDICTION_MODE this_mode
;
8678 MV_REFERENCE_FRAME ref_frame
, second_ref_frame
;
8679 unsigned char segment_id
= mbmi
->segment_id
;
8680 int comp_pred
, i
, k
;
8681 int_mv frame_mv
[MB_MODE_COUNT
][TOTAL_REFS_PER_FRAME
];
8682 struct buf_2d yv12_mb
[TOTAL_REFS_PER_FRAME
][MAX_MB_PLANE
];
8683 int_mv single_newmv
[TOTAL_REFS_PER_FRAME
] = { { 0 } };
8684 #if CONFIG_EXT_INTER
8685 int single_newmv_rate
[TOTAL_REFS_PER_FRAME
] = { 0 };
8686 int64_t modelled_rd
[MB_MODE_COUNT
][TOTAL_REFS_PER_FRAME
];
8687 #endif // CONFIG_EXT_INTER
8688 static const int flag_list
[TOTAL_REFS_PER_FRAME
] = {
8694 #endif // CONFIG_EXT_REFS
8698 #endif // CONFIG_EXT_REFS
8701 int64_t best_rd
= best_rd_so_far
;
8702 int best_rate_y
= INT_MAX
, best_rate_uv
= INT_MAX
;
8703 int64_t best_pred_diff
[REFERENCE_MODES
];
8704 int64_t best_pred_rd
[REFERENCE_MODES
];
8705 MB_MODE_INFO best_mbmode
;
8706 int rate_skip0
= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0);
8707 int rate_skip1
= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1);
8708 int best_mode_skippable
= 0;
8709 int midx
, best_mode_index
= -1;
8710 unsigned int ref_costs_single
[TOTAL_REFS_PER_FRAME
];
8711 unsigned int ref_costs_comp
[TOTAL_REFS_PER_FRAME
];
8712 aom_prob comp_mode_p
;
8713 int64_t best_intra_rd
= INT64_MAX
;
8714 unsigned int best_pred_sse
= UINT_MAX
;
8715 PREDICTION_MODE best_intra_mode
= DC_PRED
;
8716 int rate_uv_intra
[TX_SIZES_ALL
], rate_uv_tokenonly
[TX_SIZES_ALL
];
8717 int64_t dist_uvs
[TX_SIZES_ALL
];
8718 int skip_uvs
[TX_SIZES_ALL
];
8719 PREDICTION_MODE mode_uv
[TX_SIZES_ALL
];
8721 PALETTE_MODE_INFO pmi_uv
[TX_SIZES_ALL
];
8722 #endif // CONFIG_PALETTE
8723 #if CONFIG_EXT_INTRA
8724 int8_t uv_angle_delta
[TX_SIZES_ALL
];
8725 int is_directional_mode
, angle_stats_ready
= 0;
8726 uint8_t directional_mode_skip_mask
[INTRA_MODES
];
8727 #endif // CONFIG_EXT_INTRA
8728 #if CONFIG_FILTER_INTRA
8729 int8_t dc_skipped
= 1;
8730 FILTER_INTRA_MODE_INFO filter_intra_mode_info_uv
[TX_SIZES_ALL
];
8731 #endif // CONFIG_FILTER_INTRA
8732 const int intra_cost_penalty
= av1_get_intra_cost_penalty(
8733 cm
->base_qindex
, cm
->y_dc_delta_q
, cm
->bit_depth
);
8734 const int *const intra_mode_cost
= cpi
->mbmode_cost
[size_group_lookup
[bsize
]];
8736 uint8_t ref_frame_skip_mask
[2] = { 0 };
8737 uint32_t mode_skip_mask
[TOTAL_REFS_PER_FRAME
] = { 0 };
8738 #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
8739 MV_REFERENCE_FRAME best_single_inter_ref
= LAST_FRAME
;
8740 int64_t best_single_inter_rd
= INT64_MAX
;
8741 #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
8742 int mode_skip_start
= sf
->mode_skip_start
+ 1;
8743 const int *const rd_threshes
= rd_opt
->threshes
[segment_id
][bsize
];
8744 const int *const rd_thresh_freq_fact
= tile_data
->thresh_freq_fact
[bsize
];
8745 int64_t mode_threshold
[MAX_MODES
];
8746 int *mode_map
= tile_data
->mode_map
[bsize
];
8747 const int mode_search_skip_flags
= sf
->mode_search_skip_flags
;
8749 od_rollback_buffer pre_buf
;
8750 #endif // CONFIG_PVQ
8752 HandleInterModeArgs args
= {
8753 #if CONFIG_MOTION_VAR
8755 { MAX_SB_SIZE
, MAX_SB_SIZE
, MAX_SB_SIZE
},
8757 { MAX_SB_SIZE
, MAX_SB_SIZE
, MAX_SB_SIZE
},
8758 #endif // CONFIG_MOTION_VAR
8759 #if CONFIG_EXT_INTER
8763 #else // CONFIG_EXT_INTER
8765 #endif // CONFIG_EXT_INTER
8769 #if CONFIG_PALETTE || CONFIG_EXT_INTRA
8770 const int rows
= block_size_high
[bsize
];
8771 const int cols
= block_size_wide
[bsize
];
8772 #endif // CONFIG_PALETTE || CONFIG_EXT_INTRA
8774 int palette_ctx
= 0;
8775 const MODE_INFO
*above_mi
= xd
->above_mi
;
8776 const MODE_INFO
*left_mi
= xd
->left_mi
;
8777 #endif // CONFIG_PALETTE
8778 #if CONFIG_MOTION_VAR
8779 int dst_width1
[MAX_MB_PLANE
] = { MAX_SB_SIZE
, MAX_SB_SIZE
, MAX_SB_SIZE
};
8780 int dst_width2
[MAX_MB_PLANE
] = { MAX_SB_SIZE
, MAX_SB_SIZE
, MAX_SB_SIZE
};
8781 int dst_height1
[MAX_MB_PLANE
] = { MAX_SB_SIZE
, MAX_SB_SIZE
, MAX_SB_SIZE
};
8782 int dst_height2
[MAX_MB_PLANE
] = { MAX_SB_SIZE
, MAX_SB_SIZE
, MAX_SB_SIZE
};
8784 #if CONFIG_HIGHBITDEPTH
8785 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
8786 int len
= sizeof(uint16_t);
8787 args
.above_pred_buf
[0] = CONVERT_TO_BYTEPTR(x
->above_pred_buf
);
8788 args
.above_pred_buf
[1] =
8789 CONVERT_TO_BYTEPTR(x
->above_pred_buf
+ MAX_SB_SQUARE
* len
);
8790 args
.above_pred_buf
[2] =
8791 CONVERT_TO_BYTEPTR(x
->above_pred_buf
+ 2 * MAX_SB_SQUARE
* len
);
8792 args
.left_pred_buf
[0] = CONVERT_TO_BYTEPTR(x
->left_pred_buf
);
8793 args
.left_pred_buf
[1] =
8794 CONVERT_TO_BYTEPTR(x
->left_pred_buf
+ MAX_SB_SQUARE
* len
);
8795 args
.left_pred_buf
[2] =
8796 CONVERT_TO_BYTEPTR(x
->left_pred_buf
+ 2 * MAX_SB_SQUARE
* len
);
8798 #endif // CONFIG_HIGHBITDEPTH
8799 args
.above_pred_buf
[0] = x
->above_pred_buf
;
8800 args
.above_pred_buf
[1] = x
->above_pred_buf
+ MAX_SB_SQUARE
;
8801 args
.above_pred_buf
[2] = x
->above_pred_buf
+ 2 * MAX_SB_SQUARE
;
8802 args
.left_pred_buf
[0] = x
->left_pred_buf
;
8803 args
.left_pred_buf
[1] = x
->left_pred_buf
+ MAX_SB_SQUARE
;
8804 args
.left_pred_buf
[2] = x
->left_pred_buf
+ 2 * MAX_SB_SQUARE
;
8805 #if CONFIG_HIGHBITDEPTH
8807 #endif // CONFIG_HIGHBITDEPTH
8808 #endif // CONFIG_MOTION_VAR
8810 av1_zero(best_mbmode
);
8816 palette_ctx
+= (above_mi
->mbmi
.palette_mode_info
.palette_size
[0] > 0);
8818 palette_ctx
+= (left_mi
->mbmi
.palette_mode_info
.palette_size
[0] > 0);
8820 #endif // CONFIG_PALETTE
8822 estimate_ref_frame_costs(cm
, xd
, segment_id
, ref_costs_single
, ref_costs_comp
,
8825 for (i
= 0; i
< REFERENCE_MODES
; ++i
) best_pred_rd
[i
] = INT64_MAX
;
8826 for (i
= 0; i
< TX_SIZES_ALL
; i
++) rate_uv_intra
[i
] = INT_MAX
;
8827 for (i
= 0; i
< TOTAL_REFS_PER_FRAME
; ++i
) x
->pred_sse
[i
] = INT_MAX
;
8828 for (i
= 0; i
< MB_MODE_COUNT
; ++i
) {
8829 for (k
= 0; k
< TOTAL_REFS_PER_FRAME
; ++k
) {
8830 args
.single_filter
[i
][k
] = SWITCHABLE
;
8834 rd_cost
->rate
= INT_MAX
;
8836 *returnrate_nocoef
= INT_MAX
;
8837 #endif // CONFIG_SUPERTX
8839 for (ref_frame
= LAST_FRAME
; ref_frame
<= ALTREF_FRAME
; ++ref_frame
) {
8840 x
->pred_mv_sad
[ref_frame
] = INT_MAX
;
8841 x
->mbmi_ext
->mode_context
[ref_frame
] = 0;
8842 #if CONFIG_EXT_INTER
8843 x
->mbmi_ext
->compound_mode_context
[ref_frame
] = 0;
8844 #endif // CONFIG_EXT_INTER
8845 if (cpi
->ref_frame_flags
& flag_list
[ref_frame
]) {
8846 assert(get_ref_frame_buffer(cpi
, ref_frame
) != NULL
);
8847 setup_buffer_inter(cpi
, x
, ref_frame
, bsize
, mi_row
, mi_col
,
8848 frame_mv
[NEARESTMV
], frame_mv
[NEARMV
], yv12_mb
);
8850 frame_mv
[NEWMV
][ref_frame
].as_int
= INVALID_MV
;
8851 #if CONFIG_GLOBAL_MOTION
8852 frame_mv
[ZEROMV
][ref_frame
].as_int
=
8853 gm_get_motion_vector(&cm
->global_motion
[ref_frame
],
8854 cm
->allow_high_precision_mv
, bsize
, mi_col
, mi_row
,
8857 #else // CONFIG_GLOBAL_MOTION
8858 frame_mv
[ZEROMV
][ref_frame
].as_int
= 0;
8859 #endif // CONFIG_GLOBAL_MOTION
8860 #if CONFIG_EXT_INTER
8861 frame_mv
[NEW_NEWMV
][ref_frame
].as_int
= INVALID_MV
;
8862 #if CONFIG_GLOBAL_MOTION
8863 frame_mv
[ZERO_ZEROMV
][ref_frame
].as_int
=
8864 gm_get_motion_vector(&cm
->global_motion
[ref_frame
],
8865 cm
->allow_high_precision_mv
, bsize
, mi_col
, mi_row
,
8868 #else // CONFIG_GLOBAL_MOTION
8869 frame_mv
[ZERO_ZEROMV
][ref_frame
].as_int
= 0;
8870 #endif // CONFIG_GLOBAL_MOTION
8871 #endif // CONFIG_EXT_INTER
8874 for (; ref_frame
< MODE_CTX_REF_FRAMES
; ++ref_frame
) {
8875 MODE_INFO
*const mi
= xd
->mi
[0];
8876 int_mv
*const candidates
= x
->mbmi_ext
->ref_mvs
[ref_frame
];
8877 x
->mbmi_ext
->mode_context
[ref_frame
] = 0;
8878 av1_find_mv_refs(cm
, xd
, mi
, ref_frame
, &mbmi_ext
->ref_mv_count
[ref_frame
],
8879 mbmi_ext
->ref_mv_stack
[ref_frame
],
8880 #if CONFIG_EXT_INTER
8881 mbmi_ext
->compound_mode_context
,
8882 #endif // CONFIG_EXT_INTER
8883 candidates
, mi_row
, mi_col
, NULL
, NULL
,
8884 mbmi_ext
->mode_context
);
8885 if (mbmi_ext
->ref_mv_count
[ref_frame
] < 2) {
8886 MV_REFERENCE_FRAME rf
[2];
8887 av1_set_ref_frame(rf
, ref_frame
);
8888 if (mbmi_ext
->ref_mvs
[rf
[0]][0].as_int
!=
8889 frame_mv
[ZEROMV
][rf
[0]].as_int
||
8890 mbmi_ext
->ref_mvs
[rf
[0]][1].as_int
!=
8891 frame_mv
[ZEROMV
][rf
[0]].as_int
||
8892 mbmi_ext
->ref_mvs
[rf
[1]][0].as_int
!=
8893 frame_mv
[ZEROMV
][rf
[1]].as_int
||
8894 mbmi_ext
->ref_mvs
[rf
[1]][1].as_int
!= frame_mv
[ZEROMV
][rf
[1]].as_int
)
8895 mbmi_ext
->mode_context
[ref_frame
] &= ~(1 << ALL_ZERO_FLAG_OFFSET
);
8899 #if CONFIG_MOTION_VAR
8900 av1_count_overlappable_neighbors(cm
, xd
, mi_row
, mi_col
);
8902 if (check_num_overlappable_neighbors(mbmi
) &&
8903 is_motion_variation_allowed_bsize(bsize
)) {
8904 av1_build_prediction_by_above_preds(cm
, xd
, mi_row
, mi_col
,
8905 args
.above_pred_buf
, dst_width1
,
8906 dst_height1
, args
.above_pred_stride
);
8907 av1_build_prediction_by_left_preds(cm
, xd
, mi_row
, mi_col
,
8908 args
.left_pred_buf
, dst_width2
,
8909 dst_height2
, args
.left_pred_stride
);
8910 av1_setup_dst_planes(xd
->plane
, bsize
, get_frame_new_buffer(cm
), mi_row
,
8912 calc_target_weighted_pred(cm
, x
, xd
, mi_row
, mi_col
, args
.above_pred_buf
[0],
8913 args
.above_pred_stride
[0], args
.left_pred_buf
[0],
8914 args
.left_pred_stride
[0]);
8916 #endif // CONFIG_MOTION_VAR
8918 for (ref_frame
= LAST_FRAME
; ref_frame
<= ALTREF_FRAME
; ++ref_frame
) {
8919 if (!(cpi
->ref_frame_flags
& flag_list
[ref_frame
])) {
8920 // Skip checking missing references in both single and compound reference
8921 // modes. Note that a mode will be skipped iff both reference frames
8924 if (ref_frame
== BWDREF_FRAME
|| ref_frame
== ALTREF_FRAME
) {
8925 ref_frame_skip_mask
[0] |= (1 << ref_frame
);
8926 ref_frame_skip_mask
[1] |= ((1 << ref_frame
) | 0x01);
8928 #endif // CONFIG_EXT_REFS
8929 ref_frame_skip_mask
[0] |= (1 << ref_frame
);
8930 ref_frame_skip_mask
[1] |= SECOND_REF_FRAME_MASK
;
8933 #endif // CONFIG_EXT_REFS
8935 for (i
= LAST_FRAME
; i
<= ALTREF_FRAME
; ++i
) {
8936 // Skip fixed mv modes for poor references
8937 if ((x
->pred_mv_sad
[ref_frame
] >> 2) > x
->pred_mv_sad
[i
]) {
8938 mode_skip_mask
[ref_frame
] |= INTER_NEAREST_NEAR_ZERO
;
8943 // If the segment reference frame feature is enabled....
8944 // then do nothing if the current ref frame is not allowed..
8945 if (segfeature_active(seg
, segment_id
, SEG_LVL_REF_FRAME
) &&
8946 get_segdata(seg
, segment_id
, SEG_LVL_REF_FRAME
) != (int)ref_frame
) {
8947 ref_frame_skip_mask
[0] |= (1 << ref_frame
);
8948 ref_frame_skip_mask
[1] |= SECOND_REF_FRAME_MASK
;
8952 // Disable this drop out case if the ref frame
8953 // segment level feature is enabled for this segment. This is to
8954 // prevent the possibility that we end up unable to pick any mode.
8955 if (!segfeature_active(seg
, segment_id
, SEG_LVL_REF_FRAME
)) {
8956 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
8957 // unless ARNR filtering is enabled in which case we want
8958 // an unfiltered alternative. We allow near/nearest as well
8959 // because they may result in zero-zero MVs but be cheaper.
8960 if (cpi
->rc
.is_src_frame_alt_ref
&& (cpi
->oxcf
.arnr_max_frames
== 0)) {
8962 ref_frame_skip_mask
[0] = (1 << LAST_FRAME
) |
8964 (1 << LAST2_FRAME
) | (1 << LAST3_FRAME
) |
8965 (1 << BWDREF_FRAME
) |
8966 #endif // CONFIG_EXT_REFS
8967 (1 << GOLDEN_FRAME
);
8968 ref_frame_skip_mask
[1] = SECOND_REF_FRAME_MASK
;
8969 // TODO(zoeliu): To further explore whether following needs to be done for
8970 // BWDREF_FRAME as well.
8971 mode_skip_mask
[ALTREF_FRAME
] = ~INTER_NEAREST_NEAR_ZERO
;
8972 #if CONFIG_GLOBAL_MOTION
8973 zeromv
.as_int
= gm_get_motion_vector(&cm
->global_motion
[ALTREF_FRAME
],
8974 cm
->allow_high_precision_mv
, bsize
,
8979 #endif // CONFIG_GLOBAL_MOTION
8980 if (frame_mv
[NEARMV
][ALTREF_FRAME
].as_int
!= zeromv
.as_int
)
8981 mode_skip_mask
[ALTREF_FRAME
] |= (1 << NEARMV
);
8982 if (frame_mv
[NEARESTMV
][ALTREF_FRAME
].as_int
!= zeromv
.as_int
)
8983 mode_skip_mask
[ALTREF_FRAME
] |= (1 << NEARESTMV
);
8984 #if CONFIG_EXT_INTER
8985 if (frame_mv
[NEAREST_NEARESTMV
][ALTREF_FRAME
].as_int
!= zeromv
.as_int
)
8986 mode_skip_mask
[ALTREF_FRAME
] |= (1 << NEAREST_NEARESTMV
);
8987 if (frame_mv
[NEAR_NEARMV
][ALTREF_FRAME
].as_int
!= zeromv
.as_int
)
8988 mode_skip_mask
[ALTREF_FRAME
] |= (1 << NEAR_NEARMV
);
8989 #endif // CONFIG_EXT_INTER
8993 if (cpi
->rc
.is_src_frame_alt_ref
) {
8994 if (sf
->alt_ref_search_fp
) {
8995 assert(cpi
->ref_frame_flags
& flag_list
[ALTREF_FRAME
]);
8996 mode_skip_mask
[ALTREF_FRAME
] = 0;
8997 ref_frame_skip_mask
[0] = ~(1 << ALTREF_FRAME
);
8998 ref_frame_skip_mask
[1] = SECOND_REF_FRAME_MASK
;
9002 if (sf
->alt_ref_search_fp
)
9003 if (!cm
->show_frame
&& x
->pred_mv_sad
[GOLDEN_FRAME
] < INT_MAX
)
9004 if (x
->pred_mv_sad
[ALTREF_FRAME
] > (x
->pred_mv_sad
[GOLDEN_FRAME
] << 1))
9005 mode_skip_mask
[ALTREF_FRAME
] |= INTER_ALL
;
9007 if (sf
->adaptive_mode_search
) {
9008 if (cm
->show_frame
&& !cpi
->rc
.is_src_frame_alt_ref
&&
9009 cpi
->rc
.frames_since_golden
>= 3)
9010 if ((x
->pred_mv_sad
[GOLDEN_FRAME
] >> 1) > x
->pred_mv_sad
[LAST_FRAME
])
9011 mode_skip_mask
[GOLDEN_FRAME
] |= INTER_ALL
;
9014 if (bsize
> sf
->max_intra_bsize
) {
9015 ref_frame_skip_mask
[0] |= (1 << INTRA_FRAME
);
9016 ref_frame_skip_mask
[1] |= (1 << INTRA_FRAME
);
9019 mode_skip_mask
[INTRA_FRAME
] |=
9020 ~(sf
->intra_y_mode_mask
[max_txsize_lookup
[bsize
]]);
9022 for (i
= 0; i
<= LAST_NEW_MV_INDEX
; ++i
) mode_threshold
[i
] = 0;
9023 for (i
= LAST_NEW_MV_INDEX
+ 1; i
< MAX_MODES
; ++i
)
9024 mode_threshold
[i
] = ((int64_t)rd_threshes
[i
] * rd_thresh_freq_fact
[i
]) >> 5;
9026 midx
= sf
->schedule_mode_search
? mode_skip_start
: 0;
9028 uint8_t end_pos
= 0;
9029 for (i
= 5; i
< midx
; ++i
) {
9030 if (mode_threshold
[mode_map
[i
- 1]] > mode_threshold
[mode_map
[i
]]) {
9031 uint8_t tmp
= mode_map
[i
];
9032 mode_map
[i
] = mode_map
[i
- 1];
9033 mode_map
[i
- 1] = tmp
;
9040 if (cpi
->sf
.tx_type_search
.fast_intra_tx_type_search
)
9041 x
->use_default_intra_tx_type
= 1;
9043 x
->use_default_intra_tx_type
= 0;
9045 if (cpi
->sf
.tx_type_search
.fast_inter_tx_type_search
)
9046 x
->use_default_inter_tx_type
= 1;
9048 x
->use_default_inter_tx_type
= 0;
9050 od_encode_checkpoint(&x
->daala_enc
, &pre_buf
);
9051 #endif // CONFIG_PVQ
9052 #if CONFIG_EXT_INTER
9053 for (i
= 0; i
< MB_MODE_COUNT
; ++i
)
9054 for (ref_frame
= 0; ref_frame
< TOTAL_REFS_PER_FRAME
; ++ref_frame
)
9055 modelled_rd
[i
][ref_frame
] = INT64_MAX
;
9056 #endif // CONFIG_EXT_INTER
9058 for (midx
= 0; midx
< MAX_MODES
; ++midx
) {
9060 int mode_excluded
= 0;
9061 int64_t this_rd
= INT64_MAX
;
9062 int disable_skip
= 0;
9063 int compmode_cost
= 0;
9064 int rate2
= 0, rate_y
= 0, rate_uv
= 0;
9065 int64_t distortion2
= 0, distortion_y
= 0, distortion_uv
= 0;
9066 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9067 int64_t distortion2_y
= 0;
9068 int64_t total_sse_y
= INT64_MAX
;
9072 int64_t total_sse
= INT64_MAX
;
9073 uint8_t ref_frame_type
;
9075 od_encode_rollback(&x
->daala_enc
, &pre_buf
);
9076 #endif // CONFIG_PVQ
9077 mode_index
= mode_map
[midx
];
9078 this_mode
= av1_mode_order
[mode_index
].mode
;
9079 ref_frame
= av1_mode_order
[mode_index
].ref_frame
[0];
9080 second_ref_frame
= av1_mode_order
[mode_index
].ref_frame
[1];
9081 mbmi
->ref_mv_idx
= 0;
9083 #if CONFIG_EXT_INTER
9084 if (ref_frame
> INTRA_FRAME
&& second_ref_frame
== INTRA_FRAME
) {
9085 // Mode must by compatible
9086 if (!is_interintra_allowed_mode(this_mode
)) continue;
9087 if (!is_interintra_allowed_bsize(bsize
)) continue;
9090 if (is_inter_compound_mode(this_mode
)) {
9091 frame_mv
[this_mode
][ref_frame
].as_int
=
9092 frame_mv
[compound_ref0_mode(this_mode
)][ref_frame
].as_int
;
9093 frame_mv
[this_mode
][second_ref_frame
].as_int
=
9094 frame_mv
[compound_ref1_mode(this_mode
)][second_ref_frame
].as_int
;
9096 #endif // CONFIG_EXT_INTER
9098 // Look at the reference frame of the best mode so far and set the
9099 // skip mask to look at a subset of the remaining modes.
9100 if (midx
== mode_skip_start
&& best_mode_index
>= 0) {
9101 switch (best_mbmode
.ref_frame
[0]) {
9102 case INTRA_FRAME
: break;
9104 ref_frame_skip_mask
[0] |= LAST_FRAME_MODE_MASK
;
9105 ref_frame_skip_mask
[1] |= SECOND_REF_FRAME_MASK
;
9109 ref_frame_skip_mask
[0] |= LAST2_FRAME_MODE_MASK
;
9110 ref_frame_skip_mask
[1] |= SECOND_REF_FRAME_MASK
;
9113 ref_frame_skip_mask
[0] |= LAST3_FRAME_MODE_MASK
;
9114 ref_frame_skip_mask
[1] |= SECOND_REF_FRAME_MASK
;
9116 #endif // CONFIG_EXT_REFS
9118 ref_frame_skip_mask
[0] |= GOLDEN_FRAME_MODE_MASK
;
9119 ref_frame_skip_mask
[1] |= SECOND_REF_FRAME_MASK
;
9123 ref_frame_skip_mask
[0] |= BWDREF_FRAME_MODE_MASK
;
9124 ref_frame_skip_mask
[1] |= SECOND_REF_FRAME_MASK
;
9126 #endif // CONFIG_EXT_REFS
9127 case ALTREF_FRAME
: ref_frame_skip_mask
[0] |= ALTREF_FRAME_MODE_MASK
;
9129 ref_frame_skip_mask
[1] |= SECOND_REF_FRAME_MASK
;
9130 #endif // CONFIG_EXT_REFS
9133 case TOTAL_REFS_PER_FRAME
:
9134 assert(0 && "Invalid Reference frame");
9139 if ((ref_frame_skip_mask
[0] & (1 << ref_frame
)) &&
9140 (ref_frame_skip_mask
[1] & (1 << AOMMAX(0, second_ref_frame
))))
9143 if (mode_skip_mask
[ref_frame
] & (1 << this_mode
)) continue;
9145 // Test best rd so far against threshold for trying this mode.
9146 if (best_mode_skippable
&& sf
->schedule_mode_search
)
9147 mode_threshold
[mode_index
] <<= 1;
9149 if (best_rd
< mode_threshold
[mode_index
]) continue;
9151 // This is only used in motion vector unit test.
9152 if (cpi
->oxcf
.motion_vector_unit_test
&& ref_frame
== INTRA_FRAME
) continue;
9154 #if CONFIG_ONE_SIDED_COMPOUND // Changes LL bitstream
9156 if (cpi
->oxcf
.pass
== 0) {
9157 // Complexity-compression trade-offs
9158 // if (ref_frame == ALTREF_FRAME) continue;
9159 // if (ref_frame == BWDREF_FRAME) continue;
9160 if (second_ref_frame
== ALTREF_FRAME
) continue;
9161 // if (second_ref_frame == BWDREF_FRAME) continue;
9165 comp_pred
= second_ref_frame
> INTRA_FRAME
;
9167 if (!cpi
->allow_comp_inter_inter
) continue;
9169 // Skip compound inter modes if ARF is not available.
9170 if (!(cpi
->ref_frame_flags
& flag_list
[second_ref_frame
])) continue;
9172 // Do not allow compound prediction if the segment level reference frame
9173 // feature is in use as in this case there can only be one reference.
9174 if (segfeature_active(seg
, segment_id
, SEG_LVL_REF_FRAME
)) continue;
9176 if ((mode_search_skip_flags
& FLAG_SKIP_COMP_BESTINTRA
) &&
9177 best_mode_index
>= 0 && best_mbmode
.ref_frame
[0] == INTRA_FRAME
)
9180 mode_excluded
= cm
->reference_mode
== SINGLE_REFERENCE
;
9182 if (ref_frame
!= INTRA_FRAME
)
9183 mode_excluded
= cm
->reference_mode
== COMPOUND_REFERENCE
;
9186 if (ref_frame
== INTRA_FRAME
) {
9187 if (sf
->adaptive_mode_search
)
9188 if ((x
->source_variance
<< num_pels_log2_lookup
[bsize
]) > best_pred_sse
)
9191 if (this_mode
!= DC_PRED
) {
9192 // Disable intra modes other than DC_PRED for blocks with low variance
9193 // Threshold for intra skipping based on source variance
9194 // TODO(debargha): Specialize the threshold for super block sizes
9195 const unsigned int skip_intra_var_thresh
= 64;
9196 if ((mode_search_skip_flags
& FLAG_SKIP_INTRA_LOWVAR
) &&
9197 x
->source_variance
< skip_intra_var_thresh
)
9199 // Only search the oblique modes if the best so far is
9200 // one of the neighboring directional modes
9201 if ((mode_search_skip_flags
& FLAG_SKIP_INTRA_BESTINTER
) &&
9202 (this_mode
>= D45_PRED
&& this_mode
<= TM_PRED
)) {
9203 if (best_mode_index
>= 0 && best_mbmode
.ref_frame
[0] > INTRA_FRAME
)
9206 if (mode_search_skip_flags
& FLAG_SKIP_INTRA_DIRMISMATCH
) {
9207 if (conditional_skipintra(this_mode
, best_intra_mode
)) continue;
9210 #if CONFIG_GLOBAL_MOTION
9211 } else if (cm
->global_motion
[ref_frame
].wmtype
== IDENTITY
&&
9213 cm
->global_motion
[second_ref_frame
].wmtype
== IDENTITY
)) {
9214 #else // CONFIG_GLOBAL_MOTION
9216 #endif // CONFIG_GLOBAL_MOTION
9217 const MV_REFERENCE_FRAME ref_frames
[2] = { ref_frame
, second_ref_frame
};
9218 if (!check_best_zero_mv(cpi
, mbmi_ext
->mode_context
,
9219 #if CONFIG_EXT_INTER
9220 mbmi_ext
->compound_mode_context
,
9221 #endif // CONFIG_EXT_INTER
9222 frame_mv
, this_mode
, ref_frames
, bsize
, -1,
9227 mbmi
->mode
= this_mode
;
9228 mbmi
->uv_mode
= DC_PRED
;
9229 mbmi
->ref_frame
[0] = ref_frame
;
9230 mbmi
->ref_frame
[1] = second_ref_frame
;
9232 pmi
->palette_size
[0] = 0;
9233 pmi
->palette_size
[1] = 0;
9234 #endif // CONFIG_PALETTE
9235 #if CONFIG_FILTER_INTRA
9236 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0] = 0;
9237 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1] = 0;
9238 #endif // CONFIG_FILTER_INTRA
9239 // Evaluate all sub-pel filters irrespective of whether we can use
9240 // them for this frame.
9242 set_default_interp_filters(mbmi
, cm
->interp_filter
);
9244 mbmi
->mv
[0].as_int
= mbmi
->mv
[1].as_int
= 0;
9245 mbmi
->motion_mode
= SIMPLE_TRANSLATION
;
9248 set_ref_ptrs(cm
, xd
, ref_frame
, second_ref_frame
);
9250 // Select prediction reference frames.
9251 for (i
= 0; i
< MAX_MB_PLANE
; i
++) {
9252 xd
->plane
[i
].pre
[0] = yv12_mb
[ref_frame
][i
];
9253 if (comp_pred
) xd
->plane
[i
].pre
[1] = yv12_mb
[second_ref_frame
][i
];
9256 #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
9257 mbmi
->interintra_mode
= (INTERINTRA_MODE
)(II_DC_PRED
- 1);
9258 #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
9260 if (ref_frame
== INTRA_FRAME
) {
9261 RD_STATS rd_stats_y
;
9263 struct macroblockd_plane
*const pd
= &xd
->plane
[1];
9264 #if CONFIG_EXT_INTRA
9265 is_directional_mode
= av1_is_directional_mode(mbmi
->mode
, bsize
);
9266 if (is_directional_mode
) {
9268 int64_t model_rd
= INT64_MAX
;
9269 if (!angle_stats_ready
) {
9270 const int src_stride
= x
->plane
[0].src
.stride
;
9271 const uint8_t *src
= x
->plane
[0].src
.buf
;
9272 #if CONFIG_HIGHBITDEPTH
9273 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
)
9274 highbd_angle_estimation(src
, src_stride
, rows
, cols
, bsize
,
9275 directional_mode_skip_mask
);
9277 #endif // CONFIG_HIGHBITDEPTH
9278 angle_estimation(src
, src_stride
, rows
, cols
, bsize
,
9279 directional_mode_skip_mask
);
9280 angle_stats_ready
= 1;
9282 if (directional_mode_skip_mask
[mbmi
->mode
]) continue;
9283 rd_stats_y
.rate
= INT_MAX
;
9284 rd_pick_intra_angle_sby(cpi
, x
, &rate_dummy
, &rd_stats_y
, bsize
,
9285 intra_mode_cost
[mbmi
->mode
], best_rd
,
9288 mbmi
->angle_delta
[0] = 0;
9289 super_block_yrd(cpi
, x
, &rd_stats_y
, bsize
, best_rd
);
9292 super_block_yrd(cpi
, x
, &rd_stats_y
, bsize
, best_rd
);
9293 #endif // CONFIG_EXT_INTRA
9294 rate_y
= rd_stats_y
.rate
;
9295 distortion_y
= rd_stats_y
.dist
;
9296 skippable
= rd_stats_y
.skip
;
9298 if (rate_y
== INT_MAX
) continue;
9300 #if CONFIG_FILTER_INTRA
9301 if (mbmi
->mode
== DC_PRED
) dc_skipped
= 0;
9302 #endif // CONFIG_FILTER_INTRA
9304 uv_tx
= uv_txsize_lookup
[bsize
][mbmi
->tx_size
][pd
->subsampling_x
]
9305 [pd
->subsampling_y
];
9306 if (rate_uv_intra
[uv_tx
] == INT_MAX
) {
9307 choose_intra_uv_mode(cpi
, x
, ctx
, bsize
, uv_tx
, &rate_uv_intra
[uv_tx
],
9308 &rate_uv_tokenonly
[uv_tx
], &dist_uvs
[uv_tx
],
9309 &skip_uvs
[uv_tx
], &mode_uv
[uv_tx
]);
9311 if (try_palette
) pmi_uv
[uv_tx
] = *pmi
;
9312 #endif // CONFIG_PALETTE
9314 #if CONFIG_EXT_INTRA
9315 uv_angle_delta
[uv_tx
] = mbmi
->angle_delta
[1];
9316 #endif // CONFIG_EXT_INTRA
9317 #if CONFIG_FILTER_INTRA
9318 filter_intra_mode_info_uv
[uv_tx
] = mbmi
->filter_intra_mode_info
;
9319 #endif // CONFIG_FILTER_INTRA
9322 rate_uv
= rate_uv_tokenonly
[uv_tx
];
9323 distortion_uv
= dist_uvs
[uv_tx
];
9324 skippable
= skippable
&& skip_uvs
[uv_tx
];
9325 mbmi
->uv_mode
= mode_uv
[uv_tx
];
9328 pmi
->palette_size
[1] = pmi_uv
[uv_tx
].palette_size
[1];
9329 memcpy(pmi
->palette_colors
+ PALETTE_MAX_SIZE
,
9330 pmi_uv
[uv_tx
].palette_colors
+ PALETTE_MAX_SIZE
,
9331 2 * PALETTE_MAX_SIZE
* sizeof(pmi
->palette_colors
[0]));
9333 #endif // CONFIG_PALETTE
9335 #if CONFIG_EXT_INTRA
9336 mbmi
->angle_delta
[1] = uv_angle_delta
[uv_tx
];
9337 #endif // CONFIG_EXT_INTRA
9338 #if CONFIG_FILTER_INTRA
9339 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1] =
9340 filter_intra_mode_info_uv
[uv_tx
].use_filter_intra_mode
[1];
9341 if (filter_intra_mode_info_uv
[uv_tx
].use_filter_intra_mode
[1]) {
9342 mbmi
->filter_intra_mode_info
.filter_intra_mode
[1] =
9343 filter_intra_mode_info_uv
[uv_tx
].filter_intra_mode
[1];
9345 #endif // CONFIG_FILTER_INTRA
9348 rate2
= rate_y
+ intra_mode_cost
[mbmi
->mode
];
9349 if (!x
->skip_chroma_rd
)
9350 rate2
+= rate_uv
+ cpi
->intra_uv_mode_cost
[mbmi
->mode
][mbmi
->uv_mode
];
9352 rate2
= rate_y
+ intra_mode_cost
[mbmi
->mode
] + rate_uv
+
9353 cpi
->intra_uv_mode_cost
[mbmi
->mode
][mbmi
->uv_mode
];
9354 #endif // CONFIG_CB4X4
9357 if (try_palette
&& mbmi
->mode
== DC_PRED
) {
9358 rate2
+= av1_cost_bit(
9359 av1_default_palette_y_mode_prob
[bsize
- BLOCK_8X8
][palette_ctx
], 0);
9361 #endif // CONFIG_PALETTE
9363 if (!xd
->lossless
[mbmi
->segment_id
] && bsize
>= BLOCK_8X8
) {
9364 // super_block_yrd above includes the cost of the tx_size in the
9365 // tokenonly rate, but for intra blocks, tx_size is always coded
9366 // (prediction granularity), so we account for it in the full rate,
9367 // not the tokenonly rate.
9368 rate_y
-= tx_size_cost(cpi
, x
, bsize
, mbmi
->tx_size
);
9370 #if CONFIG_EXT_INTRA
9371 if (is_directional_mode
) {
9372 #if CONFIG_INTRA_INTERP
9373 const int intra_filter_ctx
= av1_get_pred_context_intra_interp(xd
);
9375 mode_to_angle_map
[mbmi
->mode
] + mbmi
->angle_delta
[0] * ANGLE_STEP
;
9376 if (av1_is_intra_filter_switchable(p_angle
))
9377 rate2
+= cpi
->intra_filter_cost
[intra_filter_ctx
][mbmi
->intra_filter
];
9378 #endif // CONFIG_INTRA_INTERP
9379 rate2
+= write_uniform_cost(2 * MAX_ANGLE_DELTA
+ 1,
9380 MAX_ANGLE_DELTA
+ mbmi
->angle_delta
[0]);
9382 if (mbmi
->uv_mode
!= DC_PRED
&& mbmi
->uv_mode
!= TM_PRED
) {
9383 rate2
+= write_uniform_cost(2 * MAX_ANGLE_DELTA
+ 1,
9384 MAX_ANGLE_DELTA
+ mbmi
->angle_delta
[1]);
9386 #endif // CONFIG_EXT_INTRA
9387 #if CONFIG_FILTER_INTRA
9388 if (mbmi
->mode
== DC_PRED
) {
9390 av1_cost_bit(cm
->fc
->filter_intra_probs
[0],
9391 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0]);
9392 if (mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0]) {
9393 rate2
+= write_uniform_cost(
9395 mbmi
->filter_intra_mode_info
.filter_intra_mode
[0]);
9398 if (mbmi
->uv_mode
== DC_PRED
) {
9400 av1_cost_bit(cpi
->common
.fc
->filter_intra_probs
[1],
9401 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1]);
9402 if (mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1])
9403 rate2
+= write_uniform_cost(
9405 mbmi
->filter_intra_mode_info
.filter_intra_mode
[1]);
9407 #endif // CONFIG_FILTER_INTRA
9408 if (mbmi
->mode
!= DC_PRED
&& mbmi
->mode
!= TM_PRED
)
9409 rate2
+= intra_cost_penalty
;
9410 distortion2
= distortion_y
+ distortion_uv
;
9411 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9412 if (bsize
< BLOCK_8X8
) distortion2_y
= distortion_y
;
9415 int_mv backup_ref_mv
[2];
9417 #if !SUB8X8_COMP_REF
9418 if (bsize
== BLOCK_4X4
&& mbmi
->ref_frame
[1] > INTRA_FRAME
) continue;
9419 #endif // !SUB8X8_COMP_REF
9421 backup_ref_mv
[0] = mbmi_ext
->ref_mvs
[ref_frame
][0];
9422 if (comp_pred
) backup_ref_mv
[1] = mbmi_ext
->ref_mvs
[second_ref_frame
][0];
9423 #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
9424 if (second_ref_frame
== INTRA_FRAME
) {
9425 if (best_single_inter_ref
!= ref_frame
) continue;
9426 mbmi
->interintra_mode
= intra_to_interintra_mode
[best_intra_mode
];
9427 // TODO(debargha|geza.lore):
9428 // Should we use ext_intra modes for interintra?
9429 #if CONFIG_EXT_INTRA
9430 mbmi
->angle_delta
[0] = 0;
9431 mbmi
->angle_delta
[1] = 0;
9432 #if CONFIG_INTRA_INTERP
9433 mbmi
->intra_filter
= INTRA_FILTER_LINEAR
;
9434 #endif // CONFIG_INTRA_INTERP
9435 #endif // CONFIG_EXT_INTRA
9436 #if CONFIG_FILTER_INTRA
9437 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0] = 0;
9438 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1] = 0;
9439 #endif // CONFIG_FILTER_INTRA
9441 #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
9442 mbmi
->ref_mv_idx
= 0;
9443 ref_frame_type
= av1_ref_frame_type(mbmi
->ref_frame
);
9445 #if CONFIG_EXT_INTER
9447 if (mbmi_ext
->ref_mv_count
[ref_frame_type
] > 1) {
9449 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
9450 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
9451 // mbmi->ref_mv_idx (like NEWMV)
9452 if (mbmi
->mode
== NEAR_NEWMV
|| mbmi
->mode
== NEW_NEARMV
)
9455 if (compound_ref0_mode(mbmi
->mode
) == NEWMV
) {
9457 mbmi_ext
->ref_mv_stack
[ref_frame_type
][ref_mv_idx
].this_mv
;
9458 clamp_mv_ref(&this_mv
.as_mv
, xd
->n8_w
<< MI_SIZE_LOG2
,
9459 xd
->n8_h
<< MI_SIZE_LOG2
, xd
);
9460 mbmi_ext
->ref_mvs
[mbmi
->ref_frame
[0]][0] = this_mv
;
9462 if (compound_ref1_mode(mbmi
->mode
) == NEWMV
) {
9464 mbmi_ext
->ref_mv_stack
[ref_frame_type
][ref_mv_idx
].comp_mv
;
9465 clamp_mv_ref(&this_mv
.as_mv
, xd
->n8_w
<< MI_SIZE_LOG2
,
9466 xd
->n8_h
<< MI_SIZE_LOG2
, xd
);
9467 mbmi_ext
->ref_mvs
[mbmi
->ref_frame
[1]][0] = this_mv
;
9471 #endif // CONFIG_EXT_INTER
9472 if (mbmi
->mode
== NEWMV
&& mbmi_ext
->ref_mv_count
[ref_frame_type
] > 1) {
9474 for (ref
= 0; ref
< 1 + comp_pred
; ++ref
) {
9476 (ref
== 0) ? mbmi_ext
->ref_mv_stack
[ref_frame_type
][0].this_mv
9477 : mbmi_ext
->ref_mv_stack
[ref_frame_type
][0].comp_mv
;
9478 clamp_mv_ref(&this_mv
.as_mv
, xd
->n8_w
<< MI_SIZE_LOG2
,
9479 xd
->n8_h
<< MI_SIZE_LOG2
, xd
);
9480 mbmi_ext
->ref_mvs
[mbmi
->ref_frame
[ref
]][0] = this_mv
;
9483 #if CONFIG_EXT_INTER
9485 #endif // CONFIG_EXT_INTER
9487 RD_STATS rd_stats
, rd_stats_y
, rd_stats_uv
;
9488 av1_init_rd_stats(&rd_stats
);
9489 rd_stats
.rate
= rate2
;
9491 // Point to variables that are maintained between loop iterations
9492 args
.single_newmv
= single_newmv
;
9493 #if CONFIG_EXT_INTER
9494 args
.single_newmv_rate
= single_newmv_rate
;
9495 args
.modelled_rd
= modelled_rd
;
9496 #endif // CONFIG_EXT_INTER
9497 this_rd
= handle_inter_mode(cpi
, x
, bsize
, &rd_stats
, &rd_stats_y
,
9498 &rd_stats_uv
, &disable_skip
, frame_mv
,
9499 mi_row
, mi_col
, &args
, best_rd
);
9501 rate2
= rd_stats
.rate
;
9502 skippable
= rd_stats
.skip
;
9503 distortion2
= rd_stats
.dist
;
9504 total_sse
= rd_stats
.sse
;
9505 rate_y
= rd_stats_y
.rate
;
9506 rate_uv
= rd_stats_uv
.rate
;
9507 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9508 if (bsize
< BLOCK_8X8
) distortion2_y
= rd_stats_y
.dist
;
9512 // TODO(jingning): This needs some refactoring to improve code quality
9513 // and reduce redundant steps.
9514 #if CONFIG_EXT_INTER
9515 if ((have_nearmv_in_inter_mode(mbmi
->mode
) &&
9516 mbmi_ext
->ref_mv_count
[ref_frame_type
] > 2) ||
9517 ((mbmi
->mode
== NEWMV
|| mbmi
->mode
== NEW_NEWMV
) &&
9518 mbmi_ext
->ref_mv_count
[ref_frame_type
] > 1)) {
9520 if ((mbmi
->mode
== NEARMV
&&
9521 mbmi_ext
->ref_mv_count
[ref_frame_type
] > 2) ||
9522 (mbmi
->mode
== NEWMV
&& mbmi_ext
->ref_mv_count
[ref_frame_type
] > 1)) {
9524 int_mv backup_mv
= frame_mv
[NEARMV
][ref_frame
];
9525 MB_MODE_INFO backup_mbmi
= *mbmi
;
9526 int backup_skip
= x
->skip
;
9527 int64_t tmp_ref_rd
= this_rd
;
9530 // TODO(jingning): This should be deprecated shortly.
9531 #if CONFIG_EXT_INTER
9532 int idx_offset
= have_nearmv_in_inter_mode(mbmi
->mode
) ? 1 : 0;
9534 int idx_offset
= (mbmi
->mode
== NEARMV
) ? 1 : 0;
9535 #endif // CONFIG_EXT_INTER
9537 AOMMIN(2, mbmi_ext
->ref_mv_count
[ref_frame_type
] - 1 - idx_offset
);
9540 av1_drl_ctx(mbmi_ext
->ref_mv_stack
[ref_frame_type
], idx_offset
);
9542 int_mv backup_fmv
[2];
9543 backup_fmv
[0] = frame_mv
[NEWMV
][ref_frame
];
9544 if (comp_pred
) backup_fmv
[1] = frame_mv
[NEWMV
][second_ref_frame
];
9546 rate2
+= (rate2
< INT_MAX
? cpi
->drl_mode_cost0
[drl_ctx
][0] : 0);
9548 if (this_rd
< INT64_MAX
) {
9549 if (RDCOST(x
->rdmult
, x
->rddiv
, rate_y
+ rate_uv
, distortion2
) <
9550 RDCOST(x
->rdmult
, x
->rddiv
, 0, total_sse
))
9552 RDCOST(x
->rdmult
, x
->rddiv
,
9553 rate2
+ av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0),
9557 RDCOST(x
->rdmult
, x
->rddiv
,
9558 rate2
+ av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1) -
9563 for (i
= 0; i
< MAX_MB_PLANE
; ++i
)
9564 memcpy(x
->blk_skip_drl
[i
], x
->blk_skip
[i
],
9565 sizeof(uint8_t) * ctx
->num_4x4_blk
);
9566 #endif // CONFIG_VAR_TX
9568 for (ref_idx
= 0; ref_idx
< ref_set
; ++ref_idx
) {
9569 int64_t tmp_alt_rd
= INT64_MAX
;
9570 int dummy_disable_skip
= 0;
9573 RD_STATS tmp_rd_stats
, tmp_rd_stats_y
, tmp_rd_stats_uv
;
9575 av1_invalid_rd_stats(&tmp_rd_stats
);
9578 mbmi
->ref_mv_idx
= 1 + ref_idx
;
9580 #if CONFIG_EXT_INTER
9582 int ref_mv_idx
= mbmi
->ref_mv_idx
;
9583 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
9584 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
9585 // mbmi->ref_mv_idx (like NEWMV)
9586 if (mbmi
->mode
== NEAR_NEWMV
|| mbmi
->mode
== NEW_NEARMV
)
9587 ref_mv_idx
= 1 + mbmi
->ref_mv_idx
;
9589 if (compound_ref0_mode(mbmi
->mode
) == NEWMV
) {
9591 mbmi_ext
->ref_mv_stack
[ref_frame_type
][ref_mv_idx
].this_mv
;
9592 clamp_mv_ref(&this_mv
.as_mv
, xd
->n8_w
<< MI_SIZE_LOG2
,
9593 xd
->n8_h
<< MI_SIZE_LOG2
, xd
);
9594 mbmi_ext
->ref_mvs
[mbmi
->ref_frame
[0]][0] = this_mv
;
9595 } else if (compound_ref0_mode(mbmi
->mode
) == NEARESTMV
) {
9597 mbmi_ext
->ref_mv_stack
[ref_frame_type
][0].this_mv
;
9598 clamp_mv_ref(&this_mv
.as_mv
, xd
->n8_w
<< MI_SIZE_LOG2
,
9599 xd
->n8_h
<< MI_SIZE_LOG2
, xd
);
9600 mbmi_ext
->ref_mvs
[mbmi
->ref_frame
[0]][0] = this_mv
;
9603 if (compound_ref1_mode(mbmi
->mode
) == NEWMV
) {
9605 mbmi_ext
->ref_mv_stack
[ref_frame_type
][ref_mv_idx
].comp_mv
;
9606 clamp_mv_ref(&this_mv
.as_mv
, xd
->n8_w
<< MI_SIZE_LOG2
,
9607 xd
->n8_h
<< MI_SIZE_LOG2
, xd
);
9608 mbmi_ext
->ref_mvs
[mbmi
->ref_frame
[1]][0] = this_mv
;
9609 } else if (compound_ref1_mode(mbmi
->mode
) == NEARESTMV
) {
9611 mbmi_ext
->ref_mv_stack
[ref_frame_type
][0].comp_mv
;
9612 clamp_mv_ref(&this_mv
.as_mv
, xd
->n8_w
<< MI_SIZE_LOG2
,
9613 xd
->n8_h
<< MI_SIZE_LOG2
, xd
);
9614 mbmi_ext
->ref_mvs
[mbmi
->ref_frame
[1]][0] = this_mv
;
9617 #endif // CONFIG_EXT_INTER
9618 for (ref
= 0; ref
< 1 + comp_pred
; ++ref
) {
9621 ? mbmi_ext
->ref_mv_stack
[ref_frame_type
][mbmi
->ref_mv_idx
]
9623 : mbmi_ext
->ref_mv_stack
[ref_frame_type
][mbmi
->ref_mv_idx
]
9625 clamp_mv_ref(&this_mv
.as_mv
, xd
->n8_w
<< MI_SIZE_LOG2
,
9626 xd
->n8_h
<< MI_SIZE_LOG2
, xd
);
9627 mbmi_ext
->ref_mvs
[mbmi
->ref_frame
[ref
]][0] = this_mv
;
9629 #if CONFIG_EXT_INTER
9634 mbmi_ext
->ref_mv_stack
[ref_frame
][mbmi
->ref_mv_idx
+ idx_offset
]
9636 clamp_mv2(&cur_mv
.as_mv
, xd
);
9638 if (!mv_check_bounds(&x
->mv_limits
, &cur_mv
.as_mv
)) {
9639 int_mv dummy_single_newmv
[TOTAL_REFS_PER_FRAME
] = { { 0 } };
9640 #if CONFIG_EXT_INTER
9641 int dummy_single_newmv_rate
[TOTAL_REFS_PER_FRAME
] = { 0 };
9642 #endif // CONFIG_EXT_INTER
9644 frame_mv
[NEARMV
][ref_frame
] = cur_mv
;
9645 av1_init_rd_stats(&tmp_rd_stats
);
9647 // Point to variables that are not maintained between iterations
9648 args
.single_newmv
= dummy_single_newmv
;
9649 #if CONFIG_EXT_INTER
9650 args
.single_newmv_rate
= dummy_single_newmv_rate
;
9651 args
.modelled_rd
= NULL
;
9652 #endif // CONFIG_EXT_INTER
9653 tmp_alt_rd
= handle_inter_mode(
9654 cpi
, x
, bsize
, &tmp_rd_stats
, &tmp_rd_stats_y
, &tmp_rd_stats_uv
,
9655 &dummy_disable_skip
, frame_mv
, mi_row
, mi_col
, &args
, best_rd
);
9656 // Prevent pointers from escaping local scope
9657 args
.single_newmv
= NULL
;
9658 #if CONFIG_EXT_INTER
9659 args
.single_newmv_rate
= NULL
;
9660 #endif // CONFIG_EXT_INTER
9663 for (i
= 0; i
< mbmi
->ref_mv_idx
; ++i
) {
9664 uint8_t drl1_ctx
= 0;
9665 drl1_ctx
= av1_drl_ctx(mbmi_ext
->ref_mv_stack
[ref_frame_type
],
9667 tmp_rd_stats
.rate
+=
9668 (tmp_rd_stats
.rate
< INT_MAX
? cpi
->drl_mode_cost0
[drl1_ctx
][1]
9672 if (mbmi_ext
->ref_mv_count
[ref_frame_type
] >
9673 mbmi
->ref_mv_idx
+ idx_offset
+ 1 &&
9674 ref_idx
< ref_set
- 1) {
9676 av1_drl_ctx(mbmi_ext
->ref_mv_stack
[ref_frame_type
],
9677 mbmi
->ref_mv_idx
+ idx_offset
);
9678 tmp_rd_stats
.rate
+=
9679 (tmp_rd_stats
.rate
< INT_MAX
? cpi
->drl_mode_cost0
[drl1_ctx
][0]
9683 if (tmp_alt_rd
< INT64_MAX
) {
9684 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9685 tmp_alt_rd
= RDCOST(x
->rdmult
, x
->rddiv
, tmp_rd_stats
.rate
,
9688 if (RDCOST(x
->rdmult
, x
->rddiv
,
9689 tmp_rd_stats_y
.rate
+ tmp_rd_stats_uv
.rate
,
9690 tmp_rd_stats
.dist
) <
9691 RDCOST(x
->rdmult
, x
->rddiv
, 0, tmp_rd_stats
.sse
))
9693 RDCOST(x
->rdmult
, x
->rddiv
,
9695 av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0),
9699 RDCOST(x
->rdmult
, x
->rddiv
,
9701 av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1) -
9702 tmp_rd_stats_y
.rate
- tmp_rd_stats_uv
.rate
,
9704 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9707 if (tmp_ref_rd
> tmp_alt_rd
) {
9708 rate2
= tmp_rd_stats
.rate
;
9709 disable_skip
= dummy_disable_skip
;
9710 distortion2
= tmp_rd_stats
.dist
;
9711 skippable
= tmp_rd_stats
.skip
;
9712 rate_y
= tmp_rd_stats_y
.rate
;
9713 rate_uv
= tmp_rd_stats_uv
.rate
;
9714 total_sse
= tmp_rd_stats
.sse
;
9715 this_rd
= tmp_alt_rd
;
9716 tmp_ref_rd
= tmp_alt_rd
;
9717 backup_mbmi
= *mbmi
;
9718 backup_skip
= x
->skip
;
9719 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9720 if (bsize
< BLOCK_8X8
) {
9721 total_sse_y
= tmp_rd_stats_y
.sse
;
9722 distortion2_y
= tmp_rd_stats_y
.dist
;
9726 for (i
= 0; i
< MAX_MB_PLANE
; ++i
)
9727 memcpy(x
->blk_skip_drl
[i
], x
->blk_skip
[i
],
9728 sizeof(uint8_t) * ctx
->num_4x4_blk
);
9729 #endif // CONFIG_VAR_TX
9731 *mbmi
= backup_mbmi
;
9732 x
->skip
= backup_skip
;
9736 frame_mv
[NEARMV
][ref_frame
] = backup_mv
;
9737 frame_mv
[NEWMV
][ref_frame
] = backup_fmv
[0];
9738 if (comp_pred
) frame_mv
[NEWMV
][second_ref_frame
] = backup_fmv
[1];
9740 for (i
= 0; i
< MAX_MB_PLANE
; ++i
)
9741 memcpy(x
->blk_skip
[i
], x
->blk_skip_drl
[i
],
9742 sizeof(uint8_t) * ctx
->num_4x4_blk
);
9743 #endif // CONFIG_VAR_TX
9745 mbmi_ext
->ref_mvs
[ref_frame
][0] = backup_ref_mv
[0];
9746 if (comp_pred
) mbmi_ext
->ref_mvs
[second_ref_frame
][0] = backup_ref_mv
[1];
9748 if (this_rd
== INT64_MAX
) continue;
9751 compmode_cost
= av1_cost_bit(comp_mode_p
, comp_pred
);
9753 if (mbmi
->sb_type
!= BLOCK_4X4
)
9754 compmode_cost
= av1_cost_bit(comp_mode_p
, comp_pred
);
9755 #endif // SUB8X8_COMP_REF
9757 if (cm
->reference_mode
== REFERENCE_MODE_SELECT
) rate2
+= compmode_cost
;
9760 // Estimate the reference frame signaling cost and add it
9761 // to the rolling cost variable.
9763 rate2
+= ref_costs_comp
[ref_frame
];
9765 rate2
+= ref_costs_comp
[second_ref_frame
];
9766 #endif // CONFIG_EXT_REFS
9768 rate2
+= ref_costs_single
[ref_frame
];
9771 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9772 if (ref_frame
== INTRA_FRAME
) {
9774 if (!disable_skip
) {
9775 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9777 // Back out the coefficient coding costs
9778 rate2
-= (rate_y
+ rate_uv
);
9781 // Cost the skip mb case
9782 rate2
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1);
9783 } else if (ref_frame
!= INTRA_FRAME
&& !xd
->lossless
[mbmi
->segment_id
]) {
9784 if (RDCOST(x
->rdmult
, x
->rddiv
, rate_y
+ rate_uv
+ rate_skip0
,
9786 RDCOST(x
->rdmult
, x
->rddiv
, rate_skip1
, total_sse
)) {
9787 // Add in the cost of the no skip flag.
9788 rate2
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0);
9790 // FIXME(rbultje) make this work for splitmv also
9791 rate2
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1);
9792 distortion2
= total_sse
;
9793 assert(total_sse
>= 0);
9794 rate2
-= (rate_y
+ rate_uv
);
9798 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9799 if (bsize
< BLOCK_8X8
) distortion2_y
= total_sse_y
;
9803 // Add in the cost of the no skip flag.
9804 rate2
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0);
9807 // Calculate the final RD estimate for this mode.
9808 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, rate2
, distortion2
);
9809 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9811 this_skip2
= mbmi
->skip
;
9812 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, rate2
, distortion2
);
9817 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9820 if (ref_frame
== INTRA_FRAME
) {
9821 // Keep record of best intra rd
9822 if (this_rd
< best_intra_rd
) {
9823 best_intra_rd
= this_rd
;
9824 best_intra_mode
= mbmi
->mode
;
9826 #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
9827 } else if (second_ref_frame
== NONE_FRAME
) {
9828 if (this_rd
< best_single_inter_rd
) {
9829 best_single_inter_rd
= this_rd
;
9830 best_single_inter_ref
= mbmi
->ref_frame
[0];
9832 #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
9835 if (!disable_skip
&& ref_frame
== INTRA_FRAME
) {
9836 for (i
= 0; i
< REFERENCE_MODES
; ++i
)
9837 best_pred_rd
[i
] = AOMMIN(best_pred_rd
[i
], this_rd
);
9840 // Did this mode help.. i.e. is it the new best mode
9841 if (this_rd
< best_rd
|| x
->skip
) {
9842 if (!mode_excluded
) {
9843 // Note index of best mode so far
9844 best_mode_index
= mode_index
;
9846 if (ref_frame
== INTRA_FRAME
) {
9847 /* required for left and above block mv */
9848 mbmi
->mv
[0].as_int
= 0;
9850 best_pred_sse
= x
->pred_sse
[ref_frame
];
9853 rd_cost
->rate
= rate2
;
9856 *returnrate_nocoef
= rate2
;
9858 *returnrate_nocoef
= rate2
- rate_y
- rate_uv
;
9859 *returnrate_nocoef
-= av1_cost_bit(
9860 av1_get_skip_prob(cm
, xd
), disable_skip
|| skippable
|| this_skip2
);
9861 *returnrate_nocoef
-= av1_cost_bit(av1_get_intra_inter_prob(cm
, xd
),
9862 mbmi
->ref_frame
[0] != INTRA_FRAME
);
9863 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9864 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
9865 MODE_INFO
*const mi
= xd
->mi
[0];
9866 const MOTION_MODE motion_allowed
= motion_mode_allowed(
9867 #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
9868 0, xd
->global_motion
,
9869 #endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
9871 if (motion_allowed
== WARPED_CAUSAL
)
9872 *returnrate_nocoef
-= cpi
->motion_mode_cost
[bsize
][mbmi
->motion_mode
];
9873 else if (motion_allowed
== OBMC_CAUSAL
)
9874 *returnrate_nocoef
-=
9875 cpi
->motion_mode_cost1
[bsize
][mbmi
->motion_mode
];
9877 *returnrate_nocoef
-= cpi
->motion_mode_cost
[bsize
][mbmi
->motion_mode
];
9878 #endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
9879 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9880 #endif // CONFIG_SUPERTX
9881 rd_cost
->dist
= distortion2
;
9882 rd_cost
->rdcost
= this_rd
;
9884 best_mbmode
= *mbmi
;
9885 best_skip2
= this_skip2
;
9886 best_mode_skippable
= skippable
;
9887 best_rate_y
= rate_y
+ av1_cost_bit(av1_get_skip_prob(cm
, xd
),
9888 this_skip2
|| skippable
);
9889 best_rate_uv
= rate_uv
;
9890 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9891 if (bsize
< BLOCK_8X8
) rd_cost
->dist_y
= distortion2_y
;
9894 for (i
= 0; i
< MAX_MB_PLANE
; ++i
)
9895 memcpy(ctx
->blk_skip
[i
], x
->blk_skip
[i
],
9896 sizeof(uint8_t) * ctx
->num_4x4_blk
);
9897 #endif // CONFIG_VAR_TX
9901 /* keep record of best compound/single-only prediction */
9902 if (!disable_skip
&& ref_frame
!= INTRA_FRAME
) {
9903 int64_t single_rd
, hybrid_rd
, single_rate
, hybrid_rate
;
9905 if (cm
->reference_mode
== REFERENCE_MODE_SELECT
) {
9906 single_rate
= rate2
- compmode_cost
;
9907 hybrid_rate
= rate2
;
9909 single_rate
= rate2
;
9910 hybrid_rate
= rate2
+ compmode_cost
;
9913 single_rd
= RDCOST(x
->rdmult
, x
->rddiv
, single_rate
, distortion2
);
9914 hybrid_rd
= RDCOST(x
->rdmult
, x
->rddiv
, hybrid_rate
, distortion2
);
9917 if (single_rd
< best_pred_rd
[SINGLE_REFERENCE
])
9918 best_pred_rd
[SINGLE_REFERENCE
] = single_rd
;
9920 if (single_rd
< best_pred_rd
[COMPOUND_REFERENCE
])
9921 best_pred_rd
[COMPOUND_REFERENCE
] = single_rd
;
9923 if (hybrid_rd
< best_pred_rd
[REFERENCE_MODE_SELECT
])
9924 best_pred_rd
[REFERENCE_MODE_SELECT
] = hybrid_rd
;
9927 if (x
->skip
&& !comp_pred
) break;
9930 if (xd
->lossless
[mbmi
->segment_id
] == 0 && best_mode_index
>= 0 &&
9931 ((sf
->tx_type_search
.fast_inter_tx_type_search
== 1 &&
9932 is_inter_mode(best_mbmode
.mode
)) ||
9933 (sf
->tx_type_search
.fast_intra_tx_type_search
== 1 &&
9934 !is_inter_mode(best_mbmode
.mode
)))) {
9936 RD_STATS rd_stats_y
, rd_stats_uv
;
9938 x
->use_default_inter_tx_type
= 0;
9939 x
->use_default_intra_tx_type
= 0;
9941 *mbmi
= best_mbmode
;
9943 set_ref_ptrs(cm
, xd
, mbmi
->ref_frame
[0], mbmi
->ref_frame
[1]);
9945 // Select prediction reference frames.
9946 for (i
= 0; i
< MAX_MB_PLANE
; i
++) {
9947 xd
->plane
[i
].pre
[0] = yv12_mb
[mbmi
->ref_frame
[0]][i
];
9948 if (has_second_ref(mbmi
))
9949 xd
->plane
[i
].pre
[1] = yv12_mb
[mbmi
->ref_frame
[1]][i
];
9952 if (is_inter_mode(mbmi
->mode
)) {
9953 av1_build_inter_predictors_sb(cm
, xd
, mi_row
, mi_col
, NULL
, bsize
);
9954 #if CONFIG_MOTION_VAR
9955 if (mbmi
->motion_mode
== OBMC_CAUSAL
) {
9956 av1_build_obmc_inter_prediction(
9957 cm
, xd
, mi_row
, mi_col
, args
.above_pred_buf
, args
.above_pred_stride
,
9958 args
.left_pred_buf
, args
.left_pred_stride
);
9960 #endif // CONFIG_MOTION_VAR
9961 av1_subtract_plane(x
, bsize
, 0);
9963 if (cm
->tx_mode
== TX_MODE_SELECT
|| xd
->lossless
[mbmi
->segment_id
]) {
9964 select_tx_type_yrd(cpi
, x
, &rd_stats_y
, bsize
, INT64_MAX
);
9967 super_block_yrd(cpi
, x
, &rd_stats_y
, bsize
, INT64_MAX
);
9968 for (idy
= 0; idy
< xd
->n8_h
; ++idy
)
9969 for (idx
= 0; idx
< xd
->n8_w
; ++idx
)
9970 mbmi
->inter_tx_size
[idy
][idx
] = mbmi
->tx_size
;
9971 memset(x
->blk_skip
[0], rd_stats_y
.skip
,
9972 sizeof(uint8_t) * xd
->n8_h
* xd
->n8_w
* 4);
9975 inter_block_uvrd(cpi
, x
, &rd_stats_uv
, bsize
, INT64_MAX
);
9977 super_block_yrd(cpi
, x
, &rd_stats_y
, bsize
, INT64_MAX
);
9978 super_block_uvrd(cpi
, x
, &rd_stats_uv
, bsize
, INT64_MAX
);
9979 #endif // CONFIG_VAR_TX
9981 super_block_yrd(cpi
, x
, &rd_stats_y
, bsize
, INT64_MAX
);
9982 super_block_uvrd(cpi
, x
, &rd_stats_uv
, bsize
, INT64_MAX
);
9985 if (RDCOST(x
->rdmult
, x
->rddiv
, rd_stats_y
.rate
+ rd_stats_uv
.rate
,
9986 (rd_stats_y
.dist
+ rd_stats_uv
.dist
)) >
9987 RDCOST(x
->rdmult
, x
->rddiv
, 0, (rd_stats_y
.sse
+ rd_stats_uv
.sse
))) {
9989 rd_stats_y
.rate
= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1);
9990 rd_stats_uv
.rate
= 0;
9991 rd_stats_y
.dist
= rd_stats_y
.sse
;
9992 rd_stats_uv
.dist
= rd_stats_uv
.sse
;
9995 rd_stats_y
.rate
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0);
9998 if (RDCOST(x
->rdmult
, x
->rddiv
, best_rate_y
+ best_rate_uv
, rd_cost
->dist
) >
9999 RDCOST(x
->rdmult
, x
->rddiv
, rd_stats_y
.rate
+ rd_stats_uv
.rate
,
10000 (rd_stats_y
.dist
+ rd_stats_uv
.dist
))) {
10003 #endif // CONFIG_VAR_TX
10004 best_mbmode
.tx_type
= mbmi
->tx_type
;
10005 best_mbmode
.tx_size
= mbmi
->tx_size
;
10007 for (idy
= 0; idy
< xd
->n8_h
; ++idy
)
10008 for (idx
= 0; idx
< xd
->n8_w
; ++idx
)
10009 best_mbmode
.inter_tx_size
[idy
][idx
] = mbmi
->inter_tx_size
[idy
][idx
];
10011 for (i
= 0; i
< MAX_MB_PLANE
; ++i
)
10012 memcpy(ctx
->blk_skip
[i
], x
->blk_skip
[i
],
10013 sizeof(uint8_t) * ctx
->num_4x4_blk
);
10015 best_mbmode
.min_tx_size
= mbmi
->min_tx_size
;
10016 #endif // CONFIG_VAR_TX
10018 (rd_stats_y
.rate
+ rd_stats_uv
.rate
- best_rate_y
- best_rate_uv
);
10019 rd_cost
->dist
= rd_stats_y
.dist
+ rd_stats_uv
.dist
;
10020 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
10021 if (bsize
< BLOCK_8X8
) rd_cost
->dist_y
= rd_stats_y
.dist
;
10024 RDCOST(x
->rdmult
, x
->rddiv
, rd_cost
->rate
, rd_cost
->dist
);
10025 best_skip2
= skip_blk
;
10030 // Only try palette mode when the best mode so far is an intra mode.
10031 if (try_palette
&& !is_inter_mode(best_mbmode
.mode
)) {
10034 int best_rate_nocoef
;
10035 #endif // CONFIG_SUPERTX
10036 int64_t distortion2
= 0, best_rd_palette
= best_rd
, this_rd
,
10037 best_model_rd_palette
= INT64_MAX
;
10038 int skippable
= 0, rate_overhead_palette
= 0;
10039 RD_STATS rd_stats_y
;
10041 uint8_t *const best_palette_color_map
=
10042 x
->palette_buffer
->best_palette_color_map
;
10043 uint8_t *const color_map
= xd
->plane
[0].color_index_map
;
10044 MB_MODE_INFO best_mbmi_palette
= best_mbmode
;
10046 mbmi
->mode
= DC_PRED
;
10047 mbmi
->uv_mode
= DC_PRED
;
10048 mbmi
->ref_frame
[0] = INTRA_FRAME
;
10049 mbmi
->ref_frame
[1] = NONE_FRAME
;
10050 rate_overhead_palette
= rd_pick_palette_intra_sby(
10051 cpi
, x
, bsize
, palette_ctx
, intra_mode_cost
[DC_PRED
],
10052 &best_mbmi_palette
, best_palette_color_map
, &best_rd_palette
,
10053 &best_model_rd_palette
, NULL
, NULL
, NULL
, NULL
);
10054 if (pmi
->palette_size
[0] == 0) goto PALETTE_EXIT
;
10055 memcpy(color_map
, best_palette_color_map
,
10056 rows
* cols
* sizeof(best_palette_color_map
[0]));
10057 super_block_yrd(cpi
, x
, &rd_stats_y
, bsize
, best_rd
);
10058 if (rd_stats_y
.rate
== INT_MAX
) goto PALETTE_EXIT
;
10059 uv_tx
= uv_txsize_lookup
[bsize
][mbmi
->tx_size
][xd
->plane
[1].subsampling_x
]
10060 [xd
->plane
[1].subsampling_y
];
10061 if (rate_uv_intra
[uv_tx
] == INT_MAX
) {
10062 choose_intra_uv_mode(cpi
, x
, ctx
, bsize
, uv_tx
, &rate_uv_intra
[uv_tx
],
10063 &rate_uv_tokenonly
[uv_tx
], &dist_uvs
[uv_tx
],
10064 &skip_uvs
[uv_tx
], &mode_uv
[uv_tx
]);
10065 pmi_uv
[uv_tx
] = *pmi
;
10066 #if CONFIG_EXT_INTRA
10067 uv_angle_delta
[uv_tx
] = mbmi
->angle_delta
[1];
10068 #endif // CONFIG_EXT_INTRA
10069 #if CONFIG_FILTER_INTRA
10070 filter_intra_mode_info_uv
[uv_tx
] = mbmi
->filter_intra_mode_info
;
10071 #endif // CONFIG_FILTER_INTRA
10073 mbmi
->uv_mode
= mode_uv
[uv_tx
];
10074 pmi
->palette_size
[1] = pmi_uv
[uv_tx
].palette_size
[1];
10075 if (pmi
->palette_size
[1] > 0) {
10076 memcpy(pmi
->palette_colors
+ PALETTE_MAX_SIZE
,
10077 pmi_uv
[uv_tx
].palette_colors
+ PALETTE_MAX_SIZE
,
10078 2 * PALETTE_MAX_SIZE
* sizeof(pmi
->palette_colors
[0]));
10080 #if CONFIG_EXT_INTRA
10081 mbmi
->angle_delta
[1] = uv_angle_delta
[uv_tx
];
10082 #endif // CONFIG_EXT_INTRA
10083 #if CONFIG_FILTER_INTRA
10084 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1] =
10085 filter_intra_mode_info_uv
[uv_tx
].use_filter_intra_mode
[1];
10086 if (filter_intra_mode_info_uv
[uv_tx
].use_filter_intra_mode
[1]) {
10087 mbmi
->filter_intra_mode_info
.filter_intra_mode
[1] =
10088 filter_intra_mode_info_uv
[uv_tx
].filter_intra_mode
[1];
10090 #endif // CONFIG_FILTER_INTRA
10091 skippable
= rd_stats_y
.skip
&& skip_uvs
[uv_tx
];
10092 distortion2
= rd_stats_y
.dist
+ dist_uvs
[uv_tx
];
10093 rate2
= rd_stats_y
.rate
+ rate_overhead_palette
+ rate_uv_intra
[uv_tx
];
10094 rate2
+= ref_costs_single
[INTRA_FRAME
];
10097 rate2
-= (rd_stats_y
.rate
+ rate_uv_tokenonly
[uv_tx
]);
10099 best_rate_nocoef
= rate2
;
10100 #endif // CONFIG_SUPERTX
10101 rate2
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1);
10104 best_rate_nocoef
= rate2
- (rd_stats_y
.rate
+ rate_uv_tokenonly
[uv_tx
]);
10105 #endif // CONFIG_SUPERTX
10106 rate2
+= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0);
10108 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, rate2
, distortion2
);
10109 if (this_rd
< best_rd
) {
10110 best_mode_index
= 3;
10111 mbmi
->mv
[0].as_int
= 0;
10112 rd_cost
->rate
= rate2
;
10114 *returnrate_nocoef
= best_rate_nocoef
;
10115 #endif // CONFIG_SUPERTX
10116 rd_cost
->dist
= distortion2
;
10117 rd_cost
->rdcost
= this_rd
;
10119 best_mbmode
= *mbmi
;
10121 best_mode_skippable
= skippable
;
10125 #endif // CONFIG_PALETTE
10127 #if CONFIG_FILTER_INTRA
10128 // TODO(huisu): filter-intra is turned off in lossless mode for now to
10129 // avoid a unit test failure
10130 if (!xd
->lossless
[mbmi
->segment_id
] &&
10132 pmi
->palette_size
[0] == 0 &&
10133 #endif // CONFIG_PALETTE
10134 !dc_skipped
&& best_mode_index
>= 0 &&
10135 best_intra_rd
< (best_rd
+ (best_rd
>> 3))) {
10136 pick_filter_intra_interframe(
10137 cpi
, x
, ctx
, bsize
, mi_row
, mi_col
, rate_uv_intra
, rate_uv_tokenonly
,
10138 dist_uvs
, skip_uvs
, mode_uv
, filter_intra_mode_info_uv
,
10139 #if CONFIG_EXT_INTRA
10141 #endif // CONFIG_EXT_INTRA
10143 pmi_uv
, palette_ctx
,
10144 #endif // CONFIG_PALETTE
10145 0, ref_costs_single
, &best_rd
, &best_intra_rd
, &best_intra_mode
,
10146 &best_mode_index
, &best_skip2
, &best_mode_skippable
,
10149 #endif // CONFIG_SUPERTX
10150 best_pred_rd
, &best_mbmode
, rd_cost
);
10152 #endif // CONFIG_FILTER_INTRA
10154 // The inter modes' rate costs are not calculated precisely in some cases.
10155 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
10156 // ZEROMV. Here, checks are added for those cases, and the mode decisions
10158 if (best_mbmode
.mode
== NEWMV
10159 #if CONFIG_EXT_INTER
10160 || best_mbmode
.mode
== NEW_NEWMV
10161 #endif // CONFIG_EXT_INTER
10163 const MV_REFERENCE_FRAME refs
[2] = { best_mbmode
.ref_frame
[0],
10164 best_mbmode
.ref_frame
[1] };
10165 int comp_pred_mode
= refs
[1] > INTRA_FRAME
;
10167 const uint8_t rf_type
= av1_ref_frame_type(best_mbmode
.ref_frame
);
10168 #if CONFIG_GLOBAL_MOTION
10169 zeromv
[0].as_int
= gm_get_motion_vector(&cm
->global_motion
[refs
[0]],
10170 cm
->allow_high_precision_mv
, bsize
,
10173 zeromv
[1].as_int
= comp_pred_mode
10174 ? gm_get_motion_vector(&cm
->global_motion
[refs
[1]],
10175 cm
->allow_high_precision_mv
,
10176 bsize
, mi_col
, mi_row
, 0)
10180 zeromv
[0].as_int
= 0;
10181 zeromv
[1].as_int
= 0;
10182 #endif // CONFIG_GLOBAL_MOTION
10183 if (!comp_pred_mode
) {
10184 int ref_set
= (mbmi_ext
->ref_mv_count
[rf_type
] >= 2)
10185 ? AOMMIN(2, mbmi_ext
->ref_mv_count
[rf_type
] - 2)
10188 for (i
= 0; i
<= ref_set
&& ref_set
!= INT_MAX
; ++i
) {
10189 int_mv cur_mv
= mbmi_ext
->ref_mv_stack
[rf_type
][i
+ 1].this_mv
;
10190 if (cur_mv
.as_int
== best_mbmode
.mv
[0].as_int
) {
10191 best_mbmode
.mode
= NEARMV
;
10192 best_mbmode
.ref_mv_idx
= i
;
10196 if (frame_mv
[NEARESTMV
][refs
[0]].as_int
== best_mbmode
.mv
[0].as_int
)
10197 best_mbmode
.mode
= NEARESTMV
;
10198 else if (best_mbmode
.mv
[0].as_int
== zeromv
[0].as_int
)
10199 best_mbmode
.mode
= ZEROMV
;
10201 int_mv nearestmv
[2];
10204 #if CONFIG_EXT_INTER
10205 if (mbmi_ext
->ref_mv_count
[rf_type
] > 1) {
10206 nearmv
[0] = mbmi_ext
->ref_mv_stack
[rf_type
][1].this_mv
;
10207 nearmv
[1] = mbmi_ext
->ref_mv_stack
[rf_type
][1].comp_mv
;
10209 nearmv
[0] = frame_mv
[NEARMV
][refs
[0]];
10210 nearmv
[1] = frame_mv
[NEARMV
][refs
[1]];
10213 int ref_set
= (mbmi_ext
->ref_mv_count
[rf_type
] >= 2)
10214 ? AOMMIN(2, mbmi_ext
->ref_mv_count
[rf_type
] - 2)
10217 for (i
= 0; i
<= ref_set
&& ref_set
!= INT_MAX
; ++i
) {
10218 nearmv
[0] = mbmi_ext
->ref_mv_stack
[rf_type
][i
+ 1].this_mv
;
10219 nearmv
[1] = mbmi_ext
->ref_mv_stack
[rf_type
][i
+ 1].comp_mv
;
10221 if (nearmv
[0].as_int
== best_mbmode
.mv
[0].as_int
&&
10222 nearmv
[1].as_int
== best_mbmode
.mv
[1].as_int
) {
10223 best_mbmode
.mode
= NEARMV
;
10224 best_mbmode
.ref_mv_idx
= i
;
10227 #endif // CONFIG_EXT_INTER
10228 if (mbmi_ext
->ref_mv_count
[rf_type
] >= 1) {
10229 nearestmv
[0] = mbmi_ext
->ref_mv_stack
[rf_type
][0].this_mv
;
10230 nearestmv
[1] = mbmi_ext
->ref_mv_stack
[rf_type
][0].comp_mv
;
10232 nearestmv
[0] = frame_mv
[NEARESTMV
][refs
[0]];
10233 nearestmv
[1] = frame_mv
[NEARESTMV
][refs
[1]];
10236 if (nearestmv
[0].as_int
== best_mbmode
.mv
[0].as_int
&&
10237 nearestmv
[1].as_int
== best_mbmode
.mv
[1].as_int
) {
10238 #if CONFIG_EXT_INTER
10239 best_mbmode
.mode
= NEAREST_NEARESTMV
;
10241 int ref_set
= (mbmi_ext
->ref_mv_count
[rf_type
] >= 2)
10242 ? AOMMIN(2, mbmi_ext
->ref_mv_count
[rf_type
] - 2)
10245 for (i
= 0; i
<= ref_set
&& ref_set
!= INT_MAX
; ++i
) {
10246 nearmv
[0] = mbmi_ext
->ref_mv_stack
[rf_type
][i
+ 1].this_mv
;
10247 nearmv
[1] = mbmi_ext
->ref_mv_stack
[rf_type
][i
+ 1].comp_mv
;
10249 // Try switching to the NEAR_NEARMV mode
10250 if (nearmv
[0].as_int
== best_mbmode
.mv
[0].as_int
&&
10251 nearmv
[1].as_int
== best_mbmode
.mv
[1].as_int
) {
10252 best_mbmode
.mode
= NEAR_NEARMV
;
10253 best_mbmode
.ref_mv_idx
= i
;
10257 if (best_mbmode
.mode
== NEW_NEWMV
&&
10258 best_mbmode
.mv
[0].as_int
== zeromv
[0].as_int
&&
10259 best_mbmode
.mv
[1].as_int
== zeromv
[1].as_int
)
10260 best_mbmode
.mode
= ZERO_ZEROMV
;
10263 best_mbmode
.mode
= NEARESTMV
;
10264 } else if (best_mbmode
.mv
[0].as_int
== zeromv
[0].as_int
&&
10265 best_mbmode
.mv
[1].as_int
== zeromv
[1].as_int
) {
10266 best_mbmode
.mode
= ZEROMV
;
10268 #endif // CONFIG_EXT_INTER
10272 // Make sure that the ref_mv_idx is only nonzero when we're
10273 // using a mode which can support ref_mv_idx
10274 if (best_mbmode
.ref_mv_idx
!= 0 &&
10275 #if CONFIG_EXT_INTER
10276 !(best_mbmode
.mode
== NEWMV
|| best_mbmode
.mode
== NEW_NEWMV
||
10277 have_nearmv_in_inter_mode(best_mbmode
.mode
))) {
10279 !(best_mbmode
.mode
== NEARMV
|| best_mbmode
.mode
== NEWMV
)) {
10281 best_mbmode
.ref_mv_idx
= 0;
10285 int8_t ref_frame_type
= av1_ref_frame_type(best_mbmode
.ref_frame
);
10286 int16_t mode_ctx
= mbmi_ext
->mode_context
[ref_frame_type
];
10287 if (mode_ctx
& (1 << ALL_ZERO_FLAG_OFFSET
)) {
10289 #if CONFIG_GLOBAL_MOTION
10290 const MV_REFERENCE_FRAME refs
[2] = { best_mbmode
.ref_frame
[0],
10291 best_mbmode
.ref_frame
[1] };
10292 zeromv
[0].as_int
= gm_get_motion_vector(&cm
->global_motion
[refs
[0]],
10293 cm
->allow_high_precision_mv
,
10294 bsize
, mi_col
, mi_row
, 0)
10296 zeromv
[1].as_int
= (refs
[1] != NONE_FRAME
)
10297 ? gm_get_motion_vector(&cm
->global_motion
[refs
[1]],
10298 cm
->allow_high_precision_mv
,
10299 bsize
, mi_col
, mi_row
, 0)
10302 lower_mv_precision(&zeromv
[0].as_mv
, cm
->allow_high_precision_mv
);
10303 lower_mv_precision(&zeromv
[1].as_mv
, cm
->allow_high_precision_mv
);
10305 zeromv
[0].as_int
= zeromv
[1].as_int
= 0;
10306 #endif // CONFIG_GLOBAL_MOTION
10307 if (best_mbmode
.ref_frame
[0] > INTRA_FRAME
&&
10308 best_mbmode
.mv
[0].as_int
== zeromv
[0].as_int
&&
10309 #if CONFIG_EXT_INTER
10310 (best_mbmode
.ref_frame
[1] <= INTRA_FRAME
)
10312 (best_mbmode
.ref_frame
[1] == NONE_FRAME
||
10313 best_mbmode
.mv
[1].as_int
== zeromv
[1].as_int
)
10314 #endif // CONFIG_EXT_INTER
10316 best_mbmode
.mode
= ZEROMV
;
10321 if (best_mode_index
< 0 || best_rd
>= best_rd_so_far
) {
10322 rd_cost
->rate
= INT_MAX
;
10323 rd_cost
->rdcost
= INT64_MAX
;
10327 #if CONFIG_DUAL_FILTER
10328 assert((cm
->interp_filter
== SWITCHABLE
) ||
10329 (cm
->interp_filter
== best_mbmode
.interp_filter
[0]) ||
10330 !is_inter_block(&best_mbmode
));
10331 assert((cm
->interp_filter
== SWITCHABLE
) ||
10332 (cm
->interp_filter
== best_mbmode
.interp_filter
[1]) ||
10333 !is_inter_block(&best_mbmode
));
10334 if (best_mbmode
.ref_frame
[1] > INTRA_FRAME
) {
10335 assert((cm
->interp_filter
== SWITCHABLE
) ||
10336 (cm
->interp_filter
== best_mbmode
.interp_filter
[2]) ||
10337 !is_inter_block(&best_mbmode
));
10338 assert((cm
->interp_filter
== SWITCHABLE
) ||
10339 (cm
->interp_filter
== best_mbmode
.interp_filter
[3]) ||
10340 !is_inter_block(&best_mbmode
));
10343 assert((cm
->interp_filter
== SWITCHABLE
) ||
10344 (cm
->interp_filter
== best_mbmode
.interp_filter
) ||
10345 !is_inter_block(&best_mbmode
));
10346 #endif // CONFIG_DUAL_FILTER
10348 if (!cpi
->rc
.is_src_frame_alt_ref
)
10349 av1_update_rd_thresh_fact(cm
, tile_data
->thresh_freq_fact
,
10350 sf
->adaptive_rd_thresh
, bsize
, best_mode_index
);
10352 // macroblock modes
10353 *mbmi
= best_mbmode
;
10354 x
->skip
|= best_skip2
;
10356 // Note: this section is needed since the mode may have been forced to
10357 // ZEROMV by the all-zero mode handling of ref-mv.
10358 #if CONFIG_GLOBAL_MOTION
10359 if (mbmi
->mode
== ZEROMV
10360 #if CONFIG_EXT_INTER
10361 || mbmi
->mode
== ZERO_ZEROMV
10362 #endif // CONFIG_EXT_INTER
10364 #if CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
10365 // Correct the motion mode for ZEROMV
10366 const MOTION_MODE last_motion_mode_allowed
= motion_mode_allowed(
10367 #if SEPARATE_GLOBAL_MOTION
10368 0, xd
->global_motion
,
10369 #endif // SEPARATE_GLOBAL_MOTION
10371 if (mbmi
->motion_mode
> last_motion_mode_allowed
)
10372 mbmi
->motion_mode
= last_motion_mode_allowed
;
10373 #endif // CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
10375 // Correct the interpolation filter for ZEROMV
10376 if (is_nontrans_global_motion(xd
)) {
10377 #if CONFIG_DUAL_FILTER
10378 mbmi
->interp_filter
[0] = cm
->interp_filter
== SWITCHABLE
10380 : cm
->interp_filter
;
10381 mbmi
->interp_filter
[1] = cm
->interp_filter
== SWITCHABLE
10383 : cm
->interp_filter
;
10385 mbmi
->interp_filter
= cm
->interp_filter
== SWITCHABLE
? EIGHTTAP_REGULAR
10386 : cm
->interp_filter
;
10387 #endif // CONFIG_DUAL_FILTER
10390 #endif // CONFIG_GLOBAL_MOTION
10392 for (i
= 0; i
< 1 + has_second_ref(mbmi
); ++i
) {
10393 if (mbmi
->mode
!= NEWMV
)
10394 mbmi
->pred_mv
[i
].as_int
= mbmi
->mv
[i
].as_int
;
10396 mbmi
->pred_mv
[i
].as_int
= mbmi_ext
->ref_mvs
[mbmi
->ref_frame
[i
]][0].as_int
;
10399 for (i
= 0; i
< REFERENCE_MODES
; ++i
) {
10400 if (best_pred_rd
[i
] == INT64_MAX
)
10401 best_pred_diff
[i
] = INT_MIN
;
10403 best_pred_diff
[i
] = best_rd
- best_pred_rd
[i
];
10406 x
->skip
|= best_mode_skippable
;
10408 assert(best_mode_index
>= 0);
10410 store_coding_context(x
, ctx
, best_mode_index
, best_pred_diff
,
10411 best_mode_skippable
);
10414 if (cm
->allow_screen_content_tools
&& pmi
->palette_size
[1] > 0) {
10415 restore_uv_color_map(cpi
, x
);
10417 #endif // CONFIG_PALETTE
10420 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP
*cpi
,
10421 TileDataEnc
*tile_data
, MACROBLOCK
*x
,
10422 int mi_row
, int mi_col
,
10423 RD_STATS
*rd_cost
, BLOCK_SIZE bsize
,
10424 PICK_MODE_CONTEXT
*ctx
,
10425 int64_t best_rd_so_far
) {
10426 const AV1_COMMON
*const cm
= &cpi
->common
;
10427 MACROBLOCKD
*const xd
= &x
->e_mbd
;
10428 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
10429 unsigned char segment_id
= mbmi
->segment_id
;
10430 const int comp_pred
= 0;
10432 int64_t best_pred_diff
[REFERENCE_MODES
];
10433 unsigned int ref_costs_single
[TOTAL_REFS_PER_FRAME
];
10434 unsigned int ref_costs_comp
[TOTAL_REFS_PER_FRAME
];
10435 aom_prob comp_mode_p
;
10436 InterpFilter best_filter
= SWITCHABLE
;
10437 int64_t this_rd
= INT64_MAX
;
10439 const int64_t distortion2
= 0;
10443 estimate_ref_frame_costs(cm
, xd
, segment_id
, ref_costs_single
, ref_costs_comp
,
10446 for (i
= 0; i
< TOTAL_REFS_PER_FRAME
; ++i
) x
->pred_sse
[i
] = INT_MAX
;
10447 for (i
= LAST_FRAME
; i
< TOTAL_REFS_PER_FRAME
; ++i
)
10448 x
->pred_mv_sad
[i
] = INT_MAX
;
10450 rd_cost
->rate
= INT_MAX
;
10452 assert(segfeature_active(&cm
->seg
, segment_id
, SEG_LVL_SKIP
));
10455 mbmi
->palette_mode_info
.palette_size
[0] = 0;
10456 mbmi
->palette_mode_info
.palette_size
[1] = 0;
10457 #endif // CONFIG_PALETTE
10459 #if CONFIG_FILTER_INTRA
10460 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[0] = 0;
10461 mbmi
->filter_intra_mode_info
.use_filter_intra_mode
[1] = 0;
10462 #endif // CONFIG_FILTER_INTRA
10463 mbmi
->mode
= ZEROMV
;
10464 mbmi
->motion_mode
= SIMPLE_TRANSLATION
;
10465 mbmi
->uv_mode
= DC_PRED
;
10466 mbmi
->ref_frame
[0] = LAST_FRAME
;
10467 mbmi
->ref_frame
[1] = NONE_FRAME
;
10468 #if CONFIG_GLOBAL_MOTION
10469 mbmi
->mv
[0].as_int
=
10470 gm_get_motion_vector(&cm
->global_motion
[mbmi
->ref_frame
[0]],
10471 cm
->allow_high_precision_mv
, bsize
, mi_col
, mi_row
,
10474 #else // CONFIG_GLOBAL_MOTION
10475 mbmi
->mv
[0].as_int
= 0;
10476 #endif // CONFIG_GLOBAL_MOTION
10477 mbmi
->tx_size
= max_txsize_lookup
[bsize
];
10480 mbmi
->ref_mv_idx
= 0;
10481 mbmi
->pred_mv
[0].as_int
= 0;
10483 mbmi
->motion_mode
= SIMPLE_TRANSLATION
;
10484 #if CONFIG_MOTION_VAR
10485 av1_count_overlappable_neighbors(cm
, xd
, mi_row
, mi_col
);
10487 #if CONFIG_WARPED_MOTION
10488 if (is_motion_variation_allowed_bsize(bsize
) && !has_second_ref(mbmi
)) {
10489 int pts
[SAMPLES_ARRAY_SIZE
], pts_inref
[SAMPLES_ARRAY_SIZE
];
10490 mbmi
->num_proj_ref
[0] = findSamples(cm
, xd
, mi_row
, mi_col
, pts
, pts_inref
);
10494 set_default_interp_filters(mbmi
, cm
->interp_filter
);
10496 if (cm
->interp_filter
!= SWITCHABLE
) {
10497 best_filter
= cm
->interp_filter
;
10499 best_filter
= EIGHTTAP_REGULAR
;
10500 if (av1_is_interp_needed(xd
) && av1_is_interp_search_needed(xd
) &&
10501 x
->source_variance
>= cpi
->sf
.disable_filter_search_var_thresh
) {
10503 int best_rs
= INT_MAX
;
10504 for (i
= 0; i
< SWITCHABLE_FILTERS
; ++i
) {
10505 #if CONFIG_DUAL_FILTER
10507 for (k
= 0; k
< 4; ++k
) mbmi
->interp_filter
[k
] = i
;
10509 mbmi
->interp_filter
= i
;
10510 #endif // CONFIG_DUAL_FILTER
10511 rs
= av1_get_switchable_rate(cpi
, xd
);
10512 if (rs
< best_rs
) {
10514 #if CONFIG_DUAL_FILTER
10515 best_filter
= mbmi
->interp_filter
[0];
10517 best_filter
= mbmi
->interp_filter
;
10518 #endif // CONFIG_DUAL_FILTER
10523 // Set the appropriate filter
10524 #if CONFIG_DUAL_FILTER
10525 for (i
= 0; i
< 4; ++i
) mbmi
->interp_filter
[i
] = best_filter
;
10527 mbmi
->interp_filter
= best_filter
;
10528 #endif // CONFIG_DUAL_FILTER
10529 rate2
+= av1_get_switchable_rate(cpi
, xd
);
10531 if (cm
->reference_mode
== REFERENCE_MODE_SELECT
)
10532 rate2
+= av1_cost_bit(comp_mode_p
, comp_pred
);
10534 // Estimate the reference frame signaling cost and add it
10535 // to the rolling cost variable.
10536 rate2
+= ref_costs_single
[LAST_FRAME
];
10537 this_rd
= RDCOST(x
->rdmult
, x
->rddiv
, rate2
, distortion2
);
10539 rd_cost
->rate
= rate2
;
10540 rd_cost
->dist
= distortion2
;
10541 rd_cost
->rdcost
= this_rd
;
10542 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
10543 if (bsize
< BLOCK_8X8
) rd_cost
->dist_y
= distortion2
;
10545 if (this_rd
>= best_rd_so_far
) {
10546 rd_cost
->rate
= INT_MAX
;
10547 rd_cost
->rdcost
= INT64_MAX
;
10551 #if CONFIG_DUAL_FILTER
10552 assert((cm
->interp_filter
== SWITCHABLE
) ||
10553 (cm
->interp_filter
== mbmi
->interp_filter
[0]));
10555 assert((cm
->interp_filter
== SWITCHABLE
) ||
10556 (cm
->interp_filter
== mbmi
->interp_filter
));
10557 #endif // CONFIG_DUAL_FILTER
10559 av1_update_rd_thresh_fact(cm
, tile_data
->thresh_freq_fact
,
10560 cpi
->sf
.adaptive_rd_thresh
, bsize
, THR_ZEROMV
);
10562 av1_zero(best_pred_diff
);
10564 store_coding_context(x
, ctx
, THR_ZEROMV
, best_pred_diff
, 0);
10567 #if CONFIG_MOTION_VAR
10568 // This function has a structure similar to av1_build_obmc_inter_prediction
10570 // The OBMC predictor is computed as:
10573 // AOM_BLEND_A64(Mh(x),
10574 // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
10577 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
10578 // rounding, this can be written as:
10580 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
10581 // Mh(x) * Mv(y) * P(x,y) +
10582 // Mh(x) * Cv(y) * Pabove(x,y) +
10583 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
10587 // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
10588 // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
10590 // This function computes 'wsrc' and 'mask' as:
10593 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
10594 // Mh(x) * Cv(y) * Pabove(x,y) +
10595 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
10597 // mask(x, y) = Mh(x) * Mv(y)
10599 // These can then be used to efficiently approximate the error for any
10600 // predictor P in the context of the provided neighbouring predictors by
10604 // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
10606 static void calc_target_weighted_pred(const AV1_COMMON
*cm
, const MACROBLOCK
*x
,
10607 const MACROBLOCKD
*xd
, int mi_row
,
10608 int mi_col
, const uint8_t *above
,
10609 int above_stride
, const uint8_t *left
,
10611 const BLOCK_SIZE bsize
= xd
->mi
[0]->mbmi
.sb_type
;
10613 const int bw
= xd
->n8_w
<< MI_SIZE_LOG2
;
10614 const int bh
= xd
->n8_h
<< MI_SIZE_LOG2
;
10615 int32_t *mask_buf
= x
->mask_buf
;
10616 int32_t *wsrc_buf
= x
->wsrc_buf
;
10617 const int wsrc_stride
= bw
;
10618 const int mask_stride
= bw
;
10619 const int src_scale
= AOM_BLEND_A64_MAX_ALPHA
* AOM_BLEND_A64_MAX_ALPHA
;
10620 #if CONFIG_HIGHBITDEPTH
10621 const int is_hbd
= (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) ? 1 : 0;
10623 const int is_hbd
= 0;
10624 #endif // CONFIG_HIGHBITDEPTH
10626 // plane 0 should not be subsampled
10627 assert(xd
->plane
[0].subsampling_x
== 0);
10628 assert(xd
->plane
[0].subsampling_y
== 0);
10630 av1_zero_array(wsrc_buf
, bw
* bh
);
10631 for (i
= 0; i
< bw
* bh
; ++i
) mask_buf
[i
] = AOM_BLEND_A64_MAX_ALPHA
;
10633 // handle above row
10634 if (xd
->up_available
) {
10635 const int overlap
= num_4x4_blocks_high_lookup
[bsize
] * 2;
10636 const int miw
= AOMMIN(xd
->n8_w
, cm
->mi_cols
- mi_col
);
10637 const int mi_row_offset
= -1;
10638 const uint8_t *const mask1d
= av1_get_obmc_mask(overlap
);
10639 const int neighbor_limit
= max_neighbor_obmc
[b_width_log2_lookup
[bsize
]];
10640 int neighbor_count
= 0;
10645 do { // for each mi in the above row
10646 const int mi_col_offset
= i
;
10647 const MB_MODE_INFO
*above_mbmi
=
10648 &xd
->mi
[mi_col_offset
+ mi_row_offset
* xd
->mi_stride
]->mbmi
;
10649 #if CONFIG_CHROMA_SUB8X8
10650 if (above_mbmi
->sb_type
< BLOCK_8X8
)
10652 &xd
->mi
[mi_col_offset
+ 1 + mi_row_offset
* xd
->mi_stride
]->mbmi
;
10654 const BLOCK_SIZE a_bsize
= AOMMAX(above_mbmi
->sb_type
, BLOCK_8X8
);
10655 const int mi_step
= AOMMIN(xd
->n8_w
, mi_size_wide
[a_bsize
]);
10656 const int neighbor_bw
= mi_step
* MI_SIZE
;
10658 if (is_neighbor_overlappable(above_mbmi
)) {
10659 if (!CONFIG_CB4X4
&& (a_bsize
== BLOCK_4X4
|| a_bsize
== BLOCK_4X8
))
10660 neighbor_count
+= 2;
10663 if (neighbor_count
> neighbor_limit
) break;
10665 const int tmp_stride
= above_stride
;
10666 int32_t *wsrc
= wsrc_buf
+ (i
* MI_SIZE
);
10667 int32_t *mask
= mask_buf
+ (i
* MI_SIZE
);
10670 const uint8_t *tmp
= above
;
10672 for (row
= 0; row
< overlap
; ++row
) {
10673 const uint8_t m0
= mask1d
[row
];
10674 const uint8_t m1
= AOM_BLEND_A64_MAX_ALPHA
- m0
;
10675 for (col
= 0; col
< neighbor_bw
; ++col
) {
10676 wsrc
[col
] = m1
* tmp
[col
];
10679 wsrc
+= wsrc_stride
;
10680 mask
+= mask_stride
;
10683 #if CONFIG_HIGHBITDEPTH
10685 const uint16_t *tmp
= CONVERT_TO_SHORTPTR(above
);
10687 for (row
= 0; row
< overlap
; ++row
) {
10688 const uint8_t m0
= mask1d
[row
];
10689 const uint8_t m1
= AOM_BLEND_A64_MAX_ALPHA
- m0
;
10690 for (col
= 0; col
< neighbor_bw
; ++col
) {
10691 wsrc
[col
] = m1
* tmp
[col
];
10694 wsrc
+= wsrc_stride
;
10695 mask
+= mask_stride
;
10698 #endif // CONFIG_HIGHBITDEPTH
10702 above
+= neighbor_bw
;
10707 for (i
= 0; i
< bw
* bh
; ++i
) {
10708 wsrc_buf
[i
] *= AOM_BLEND_A64_MAX_ALPHA
;
10709 mask_buf
[i
] *= AOM_BLEND_A64_MAX_ALPHA
;
10712 // handle left column
10713 if (xd
->left_available
) {
10714 const int overlap
= num_4x4_blocks_wide_lookup
[bsize
] * 2;
10715 const int mih
= AOMMIN(xd
->n8_h
, cm
->mi_rows
- mi_row
);
10716 const int mi_col_offset
= -1;
10717 const uint8_t *const mask1d
= av1_get_obmc_mask(overlap
);
10718 const int neighbor_limit
= max_neighbor_obmc
[b_height_log2_lookup
[bsize
]];
10719 int neighbor_count
= 0;
10724 do { // for each mi in the left column
10725 const int mi_row_offset
= i
;
10726 MB_MODE_INFO
*left_mbmi
=
10727 &xd
->mi
[mi_col_offset
+ mi_row_offset
* xd
->mi_stride
]->mbmi
;
10729 #if CONFIG_CHROMA_SUB8X8
10730 if (left_mbmi
->sb_type
< BLOCK_8X8
)
10732 &xd
->mi
[mi_col_offset
+ (mi_row_offset
+ 1) * xd
->mi_stride
]->mbmi
;
10734 const BLOCK_SIZE l_bsize
= AOMMAX(left_mbmi
->sb_type
, BLOCK_8X8
);
10735 const int mi_step
= AOMMIN(xd
->n8_h
, mi_size_high
[l_bsize
]);
10736 const int neighbor_bh
= mi_step
* MI_SIZE
;
10738 if (is_neighbor_overlappable(left_mbmi
)) {
10739 if (!CONFIG_CB4X4
&& (l_bsize
== BLOCK_4X4
|| l_bsize
== BLOCK_8X4
))
10740 neighbor_count
+= 2;
10743 if (neighbor_count
> neighbor_limit
) break;
10745 const int tmp_stride
= left_stride
;
10746 int32_t *wsrc
= wsrc_buf
+ (i
* MI_SIZE
* wsrc_stride
);
10747 int32_t *mask
= mask_buf
+ (i
* MI_SIZE
* mask_stride
);
10750 const uint8_t *tmp
= left
;
10752 for (row
= 0; row
< neighbor_bh
; ++row
) {
10753 for (col
= 0; col
< overlap
; ++col
) {
10754 const uint8_t m0
= mask1d
[col
];
10755 const uint8_t m1
= AOM_BLEND_A64_MAX_ALPHA
- m0
;
10756 wsrc
[col
] = (wsrc
[col
] >> AOM_BLEND_A64_ROUND_BITS
) * m0
+
10757 (tmp
[col
] << AOM_BLEND_A64_ROUND_BITS
) * m1
;
10758 mask
[col
] = (mask
[col
] >> AOM_BLEND_A64_ROUND_BITS
) * m0
;
10760 wsrc
+= wsrc_stride
;
10761 mask
+= mask_stride
;
10764 #if CONFIG_HIGHBITDEPTH
10766 const uint16_t *tmp
= CONVERT_TO_SHORTPTR(left
);
10768 for (row
= 0; row
< neighbor_bh
; ++row
) {
10769 for (col
= 0; col
< overlap
; ++col
) {
10770 const uint8_t m0
= mask1d
[col
];
10771 const uint8_t m1
= AOM_BLEND_A64_MAX_ALPHA
- m0
;
10772 wsrc
[col
] = (wsrc
[col
] >> AOM_BLEND_A64_ROUND_BITS
) * m0
+
10773 (tmp
[col
] << AOM_BLEND_A64_ROUND_BITS
) * m1
;
10774 mask
[col
] = (mask
[col
] >> AOM_BLEND_A64_ROUND_BITS
) * m0
;
10776 wsrc
+= wsrc_stride
;
10777 mask
+= mask_stride
;
10780 #endif // CONFIG_HIGHBITDEPTH
10784 left
+= neighbor_bh
* left_stride
;
10790 const uint8_t *src
= x
->plane
[0].src
.buf
;
10792 for (row
= 0; row
< bh
; ++row
) {
10793 for (col
= 0; col
< bw
; ++col
) {
10794 wsrc_buf
[col
] = src
[col
] * src_scale
- wsrc_buf
[col
];
10796 wsrc_buf
+= wsrc_stride
;
10797 src
+= x
->plane
[0].src
.stride
;
10799 #if CONFIG_HIGHBITDEPTH
10801 const uint16_t *src
= CONVERT_TO_SHORTPTR(x
->plane
[0].src
.buf
);
10803 for (row
= 0; row
< bh
; ++row
) {
10804 for (col
= 0; col
< bw
; ++col
) {
10805 wsrc_buf
[col
] = src
[col
] * src_scale
- wsrc_buf
[col
];
10807 wsrc_buf
+= wsrc_stride
;
10808 src
+= x
->plane
[0].src
.stride
;
10810 #endif // CONFIG_HIGHBITDEPTH
10815 void av1_check_ncobmc_rd(const struct AV1_COMP
*cpi
, struct macroblock
*x
,
10816 int mi_row
, int mi_col
) {
10817 const AV1_COMMON
*const cm
= &cpi
->common
;
10818 MACROBLOCKD
*const xd
= &x
->e_mbd
;
10819 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
10820 MB_MODE_INFO backup_mbmi
;
10821 BLOCK_SIZE bsize
= mbmi
->sb_type
;
10822 int ref
, skip_blk
, backup_skip
= x
->skip
;
10824 RD_STATS rd_stats_y
, rd_stats_uv
;
10825 int rate_skip0
= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 0);
10826 int rate_skip1
= av1_cost_bit(av1_get_skip_prob(cm
, xd
), 1);
10828 // Recompute the best causal predictor and rd
10829 mbmi
->motion_mode
= SIMPLE_TRANSLATION
;
10830 set_ref_ptrs(cm
, xd
, mbmi
->ref_frame
[0], mbmi
->ref_frame
[1]);
10831 for (ref
= 0; ref
< 1 + has_second_ref(mbmi
); ++ref
) {
10832 YV12_BUFFER_CONFIG
*cfg
= get_ref_frame_buffer(cpi
, mbmi
->ref_frame
[ref
]);
10833 assert(cfg
!= NULL
);
10834 av1_setup_pre_planes(xd
, ref
, cfg
, mi_row
, mi_col
,
10835 &xd
->block_refs
[ref
]->sf
);
10837 av1_setup_dst_planes(x
->e_mbd
.plane
, bsize
,
10838 get_frame_new_buffer(&cpi
->common
), mi_row
, mi_col
);
10840 av1_build_inter_predictors_sb(cm
, xd
, mi_row
, mi_col
, NULL
, bsize
);
10842 av1_subtract_plane(x
, bsize
, 0);
10843 super_block_yrd(cpi
, x
, &rd_stats_y
, bsize
, INT64_MAX
);
10844 super_block_uvrd(cpi
, x
, &rd_stats_uv
, bsize
, INT64_MAX
);
10845 assert(rd_stats_y
.rate
!= INT_MAX
&& rd_stats_uv
.rate
!= INT_MAX
);
10846 if (rd_stats_y
.skip
&& rd_stats_uv
.skip
) {
10847 rd_stats_y
.rate
= rate_skip1
;
10848 rd_stats_uv
.rate
= 0;
10849 rd_stats_y
.dist
= rd_stats_y
.sse
;
10850 rd_stats_uv
.dist
= rd_stats_uv
.sse
;
10852 } else if (RDCOST(x
->rdmult
, x
->rddiv
,
10853 (rd_stats_y
.rate
+ rd_stats_uv
.rate
+ rate_skip0
),
10854 (rd_stats_y
.dist
+ rd_stats_uv
.dist
)) >
10855 RDCOST(x
->rdmult
, x
->rddiv
, rate_skip1
,
10856 (rd_stats_y
.sse
+ rd_stats_uv
.sse
))) {
10857 rd_stats_y
.rate
= rate_skip1
;
10858 rd_stats_uv
.rate
= 0;
10859 rd_stats_y
.dist
= rd_stats_y
.sse
;
10860 rd_stats_uv
.dist
= rd_stats_uv
.sse
;
10863 rd_stats_y
.rate
+= rate_skip0
;
10866 backup_skip
= skip_blk
;
10867 backup_mbmi
= *mbmi
;
10868 rd_causal
= RDCOST(x
->rdmult
, x
->rddiv
, (rd_stats_y
.rate
+ rd_stats_uv
.rate
),
10869 (rd_stats_y
.dist
+ rd_stats_uv
.dist
));
10870 rd_causal
+= RDCOST(x
->rdmult
, x
->rddiv
,
10871 av1_cost_bit(cm
->fc
->motion_mode_prob
[bsize
][0], 0), 0);
10873 // Check non-causal mode
10874 mbmi
->motion_mode
= OBMC_CAUSAL
;
10875 av1_build_ncobmc_inter_predictors_sb(cm
, xd
, mi_row
, mi_col
);
10877 av1_subtract_plane(x
, bsize
, 0);
10878 super_block_yrd(cpi
, x
, &rd_stats_y
, bsize
, INT64_MAX
);
10879 super_block_uvrd(cpi
, x
, &rd_stats_uv
, bsize
, INT64_MAX
);
10880 assert(rd_stats_y
.rate
!= INT_MAX
&& rd_stats_uv
.rate
!= INT_MAX
);
10881 if (rd_stats_y
.skip
&& rd_stats_uv
.skip
) {
10882 rd_stats_y
.rate
= rate_skip1
;
10883 rd_stats_uv
.rate
= 0;
10884 rd_stats_y
.dist
= rd_stats_y
.sse
;
10885 rd_stats_uv
.dist
= rd_stats_uv
.sse
;
10887 } else if (RDCOST(x
->rdmult
, x
->rddiv
,
10888 (rd_stats_y
.rate
+ rd_stats_uv
.rate
+ rate_skip0
),
10889 (rd_stats_y
.dist
+ rd_stats_uv
.dist
)) >
10890 RDCOST(x
->rdmult
, x
->rddiv
, rate_skip1
,
10891 (rd_stats_y
.sse
+ rd_stats_uv
.sse
))) {
10892 rd_stats_y
.rate
= rate_skip1
;
10893 rd_stats_uv
.rate
= 0;
10894 rd_stats_y
.dist
= rd_stats_y
.sse
;
10895 rd_stats_uv
.dist
= rd_stats_uv
.sse
;
10898 rd_stats_y
.rate
+= rate_skip0
;
10903 RDCOST(x
->rdmult
, x
->rddiv
,
10904 rd_stats_y
.rate
+ rd_stats_uv
.rate
+
10905 av1_cost_bit(cm
->fc
->motion_mode_prob
[bsize
][0], 1),
10906 (rd_stats_y
.dist
+ rd_stats_uv
.dist
))) {
10907 x
->skip
= skip_blk
;
10909 *mbmi
= backup_mbmi
;
10910 x
->skip
= backup_skip
;
10913 #endif // CONFIG_NCOBMC
10914 #endif // CONFIG_MOTION_VAR