Add MV refining search in warped motion experiment
[aom.git] / av1 / encoder / rdopt.c
blobd94a8c61faa6c0a12ffa9f13722b0091eb33bc18
1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
12 #include <assert.h>
13 #include <math.h>
15 #include "./aom_dsp_rtcd.h"
16 #include "./av1_rtcd.h"
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_dsp/blend.h"
20 #include "aom_mem/aom_mem.h"
21 #include "aom_ports/mem.h"
22 #include "aom_ports/system_state.h"
24 #include "av1/common/common.h"
25 #include "av1/common/common_data.h"
26 #include "av1/common/entropy.h"
27 #include "av1/common/entropymode.h"
28 #include "av1/common/idct.h"
29 #include "av1/common/mvref_common.h"
30 #include "av1/common/pred_common.h"
31 #include "av1/common/quant_common.h"
32 #include "av1/common/reconinter.h"
33 #include "av1/common/reconintra.h"
34 #include "av1/common/scan.h"
35 #include "av1/common/seg_common.h"
36 #if CONFIG_LV_MAP
37 #include "av1/common/txb_common.h"
38 #endif
39 #if CONFIG_WARPED_MOTION
40 #include "av1/common/warped_motion.h"
41 #endif // CONFIG_WARPED_MOTION
43 #include "av1/encoder/aq_variance.h"
44 #include "av1/encoder/av1_quantize.h"
45 #include "av1/encoder/cost.h"
46 #include "av1/encoder/encodemb.h"
47 #include "av1/encoder/encodemv.h"
48 #include "av1/encoder/encoder.h"
49 #if CONFIG_LV_MAP
50 #include "av1/encoder/encodetxb.h"
51 #endif
52 #include "av1/encoder/hybrid_fwd_txfm.h"
53 #include "av1/encoder/mcomp.h"
54 #if CONFIG_PALETTE
55 #include "av1/encoder/palette.h"
56 #endif // CONFIG_PALETTE
57 #include "av1/encoder/ratectrl.h"
58 #include "av1/encoder/rd.h"
59 #include "av1/encoder/rdopt.h"
60 #include "av1/encoder/tokenize.h"
61 #if CONFIG_PVQ
62 #include "av1/encoder/pvq_encoder.h"
63 #endif // CONFIG_PVQ
64 #if CONFIG_PVQ || CONFIG_DAALA_DIST
65 #include "av1/common/pvq.h"
66 #endif // CONFIG_PVQ || CONFIG_DAALA_DIST
67 #if CONFIG_DUAL_FILTER
68 #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
69 static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
70 { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
71 { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
72 { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
74 #endif // CONFIG_DUAL_FILTER
76 #if CONFIG_EXT_REFS
78 #define LAST_FRAME_MODE_MASK \
79 ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
80 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
81 #define LAST2_FRAME_MODE_MASK \
82 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
83 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
84 #define LAST3_FRAME_MODE_MASK \
85 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
86 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
87 #define GOLDEN_FRAME_MODE_MASK \
88 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
89 (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
90 #define BWDREF_FRAME_MODE_MASK \
91 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
92 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
93 #define ALTREF_FRAME_MODE_MASK \
94 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
95 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))
97 #else
99 #define LAST_FRAME_MODE_MASK \
100 ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
101 #define GOLDEN_FRAME_MODE_MASK \
102 ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
103 #define ALTREF_FRAME_MODE_MASK \
104 ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
106 #endif // CONFIG_EXT_REFS
108 #if CONFIG_EXT_REFS
109 #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
110 #else
111 #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
112 #endif // CONFIG_EXT_REFS
114 #define MIN_EARLY_TERM_INDEX 3
115 #define NEW_MV_DISCOUNT_FACTOR 8
117 #if CONFIG_EXT_INTRA
118 #define ANGLE_SKIP_THRESH 10
119 #define FILTER_FAST_SEARCH 1
120 #endif // CONFIG_EXT_INTRA
122 const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671, // vert
123 -7.7051, -3.2234, -3.6193, 3.4533 }; // horz
125 typedef struct {
126 PREDICTION_MODE mode;
127 MV_REFERENCE_FRAME ref_frame[2];
128 } MODE_DEFINITION;
130 typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
132 struct rdcost_block_args {
133 const AV1_COMP *cpi;
134 MACROBLOCK *x;
135 ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
136 ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
137 RD_STATS rd_stats;
138 int64_t this_rd;
139 int64_t best_rd;
140 int exit_early;
141 int use_fast_coef_costing;
144 #define LAST_NEW_MV_INDEX 6
145 static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
146 { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
147 #if CONFIG_EXT_REFS
148 { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
149 { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
150 { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
151 #endif // CONFIG_EXT_REFS
152 { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
153 { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
155 { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
157 { NEWMV, { LAST_FRAME, NONE_FRAME } },
158 #if CONFIG_EXT_REFS
159 { NEWMV, { LAST2_FRAME, NONE_FRAME } },
160 { NEWMV, { LAST3_FRAME, NONE_FRAME } },
161 { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
162 #endif // CONFIG_EXT_REFS
163 { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
164 { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
166 { NEARMV, { LAST_FRAME, NONE_FRAME } },
167 #if CONFIG_EXT_REFS
168 { NEARMV, { LAST2_FRAME, NONE_FRAME } },
169 { NEARMV, { LAST3_FRAME, NONE_FRAME } },
170 { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
171 #endif // CONFIG_EXT_REFS
172 { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
173 { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
175 { ZEROMV, { LAST_FRAME, NONE_FRAME } },
176 #if CONFIG_EXT_REFS
177 { ZEROMV, { LAST2_FRAME, NONE_FRAME } },
178 { ZEROMV, { LAST3_FRAME, NONE_FRAME } },
179 { ZEROMV, { BWDREF_FRAME, NONE_FRAME } },
180 #endif // CONFIG_EXT_REFS
181 { ZEROMV, { GOLDEN_FRAME, NONE_FRAME } },
182 { ZEROMV, { ALTREF_FRAME, NONE_FRAME } },
184 // TODO(zoeliu): May need to reconsider the order on the modes to check
186 #if CONFIG_EXT_INTER
187 { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
188 #if CONFIG_EXT_REFS
189 { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
190 { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
191 #endif // CONFIG_EXT_REFS
192 { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
193 #if CONFIG_EXT_REFS
194 { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
195 { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
196 { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
197 { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
198 #endif // CONFIG_EXT_REFS
200 #else // CONFIG_EXT_INTER
202 { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
203 #if CONFIG_EXT_REFS
204 { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
205 { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
206 #endif // CONFIG_EXT_REFS
207 { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
208 #if CONFIG_EXT_REFS
209 { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
210 { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
211 { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
212 { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
213 #endif // CONFIG_EXT_REFS
214 #endif // CONFIG_EXT_INTER
216 { TM_PRED, { INTRA_FRAME, NONE_FRAME } },
218 #if CONFIG_ALT_INTRA
219 { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
220 #if CONFIG_SMOOTH_HV
221 { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
222 { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
223 #endif // CONFIG_SMOOTH_HV
224 #endif // CONFIG_ALT_INTRA
226 #if CONFIG_EXT_INTER
227 { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
228 { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
229 { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
230 { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
231 { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
232 { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
233 { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
235 #if CONFIG_EXT_REFS
236 { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
237 { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
238 { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
239 { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
240 { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
241 { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
242 { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
244 { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
245 { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
246 { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
247 { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
248 { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
249 { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
250 { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
251 #endif // CONFIG_EXT_REFS
253 { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
254 { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
255 { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
256 { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
257 { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
258 { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
259 { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
261 #if CONFIG_EXT_REFS
262 { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
263 { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
264 { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
265 { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
266 { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
267 { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
268 { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
270 { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
271 { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
272 { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
273 { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
274 { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
275 { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
276 { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
278 { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
279 { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
280 { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
281 { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
282 { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
283 { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
284 { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
286 { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
287 { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
288 { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
289 { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
290 { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
291 { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
292 { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
293 #endif // CONFIG_EXT_REFS
295 #else // CONFIG_EXT_INTER
297 { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
298 { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
299 #if CONFIG_EXT_REFS
300 { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
301 { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
302 { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
303 { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
304 #endif // CONFIG_EXT_REFS
305 { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
306 { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
308 #if CONFIG_EXT_REFS
309 { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
310 { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
311 { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
312 { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
313 { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
314 { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
315 { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
316 { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
317 #endif // CONFIG_EXT_REFS
319 { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
320 #if CONFIG_EXT_REFS
321 { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
322 { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
323 #endif // CONFIG_EXT_REFS
324 { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
326 #if CONFIG_EXT_REFS
327 { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
328 { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
329 { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
330 { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
331 #endif // CONFIG_EXT_REFS
333 #endif // CONFIG_EXT_INTER
335 { H_PRED, { INTRA_FRAME, NONE_FRAME } },
336 { V_PRED, { INTRA_FRAME, NONE_FRAME } },
337 { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
338 { D207_PRED, { INTRA_FRAME, NONE_FRAME } },
339 { D153_PRED, { INTRA_FRAME, NONE_FRAME } },
340 { D63_PRED, { INTRA_FRAME, NONE_FRAME } },
341 { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
342 { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
344 #if CONFIG_EXT_INTER
345 { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
346 { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
347 { NEARMV, { LAST_FRAME, INTRA_FRAME } },
348 { NEWMV, { LAST_FRAME, INTRA_FRAME } },
350 #if CONFIG_EXT_REFS
351 { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
352 { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
353 { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
354 { NEWMV, { LAST2_FRAME, INTRA_FRAME } },
356 { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
357 { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
358 { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
359 { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
360 #endif // CONFIG_EXT_REFS
362 { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
363 { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
364 { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
365 { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
367 #if CONFIG_EXT_REFS
368 { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
369 { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
370 { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
371 { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
372 #endif // CONFIG_EXT_REFS
374 { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
375 { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
376 { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
377 { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
378 #endif // CONFIG_EXT_INTER
381 #if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
382 static INLINE int write_uniform_cost(int n, int v) {
383 const int l = get_unsigned_bits(n);
384 const int m = (1 << l) - n;
385 if (l == 0) return 0;
386 if (v < m)
387 return (l - 1) * av1_cost_bit(128, 0);
388 else
389 return l * av1_cost_bit(128, 0);
391 #endif // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
393 // constants for prune 1 and prune 2 decision boundaries
394 #define FAST_EXT_TX_CORR_MID 0.0
395 #define FAST_EXT_TX_EDST_MID 0.1
396 #define FAST_EXT_TX_CORR_MARGIN 0.5
397 #define FAST_EXT_TX_EDST_MARGIN 0.3
399 #if CONFIG_DAALA_DIST
400 static int od_compute_var_4x4(od_coeff *x, int stride) {
401 int sum;
402 int s2;
403 int i;
404 sum = 0;
405 s2 = 0;
406 for (i = 0; i < 4; i++) {
407 int j;
408 for (j = 0; j < 4; j++) {
409 int t;
411 t = x[i * stride + j];
412 sum += t;
413 s2 += t * t;
416 // TODO(yushin) : Check wheter any changes are required for high bit depth.
417 return (s2 - (sum * sum >> 4)) >> 4;
420 /* OD_DIST_LP_MID controls the frequency weighting filter used for computing
421 the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
422 is applied both horizontally and vertically. For X=5, the filter is
423 a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
424 #define OD_DIST_LP_MID (5)
425 #define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
427 static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
428 od_coeff *y, od_coeff *e_lp, int stride) {
429 double sum;
430 int min_var;
431 double mean_var;
432 double var_stat;
433 double activity;
434 double calibration;
435 int i;
436 int j;
437 double vardist;
439 vardist = 0;
440 OD_ASSERT(qm != OD_FLAT_QM);
441 (void)qm;
442 #if 1
443 min_var = INT_MAX;
444 mean_var = 0;
445 for (i = 0; i < 3; i++) {
446 for (j = 0; j < 3; j++) {
447 int varx;
448 int vary;
449 varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
450 vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
451 min_var = OD_MINI(min_var, varx);
452 mean_var += 1. / (1 + varx);
453 /* The cast to (double) is to avoid an overflow before the sqrt.*/
454 vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
457 /* We use a different variance statistic depending on whether activity
458 masking is used, since the harmonic mean appeared slghtly worse with
459 masking off. The calibration constant just ensures that we preserve the
460 rate compared to activity=1. */
461 if (use_activity_masking) {
462 calibration = 1.95;
463 var_stat = 9. / mean_var;
464 } else {
465 calibration = 1.62;
466 var_stat = min_var;
468 /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
469 activity masking constant. */
470 activity = calibration * pow(.25 + var_stat, -1. / 6);
471 #else
472 activity = 1;
473 #endif // 1
474 sum = 0;
475 for (i = 0; i < 8; i++) {
476 for (j = 0; j < 8; j++)
477 sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
479 /* Normalize the filter to unit DC response. */
480 sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
481 OD_DIST_LP_NORM);
482 return activity * activity * (sum + vardist);
485 // Note : Inputs x and y are in a pixel domain
486 static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
487 od_coeff *y, int bsize_w, int bsize_h,
488 int qindex) {
489 int i;
490 double sum;
491 sum = 0;
493 assert(bsize_w >= 8 && bsize_h >= 8);
495 if (qm == OD_FLAT_QM) {
496 for (i = 0; i < bsize_w * bsize_h; i++) {
497 double tmp;
498 tmp = x[i] - y[i];
499 sum += tmp * tmp;
501 } else {
502 int j;
503 DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
504 DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
505 DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
506 int mid = OD_DIST_LP_MID;
507 for (i = 0; i < bsize_h; i++) {
508 for (j = 0; j < bsize_w; j++) {
509 e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
512 for (i = 0; i < bsize_h; i++) {
513 tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
514 tmp[i * bsize_w + bsize_w - 1] =
515 mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
516 for (j = 1; j < bsize_w - 1; j++) {
517 tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
518 e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
521 for (j = 0; j < bsize_w; j++) {
522 e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
523 e_lp[(bsize_h - 1) * bsize_w + j] =
524 mid * tmp[(bsize_h - 1) * bsize_w + j] +
525 2 * tmp[(bsize_h - 2) * bsize_w + j];
527 for (i = 1; i < bsize_h - 1; i++) {
528 for (j = 0; j < bsize_w; j++) {
529 e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
530 tmp[(i - 1) * bsize_w + j] +
531 tmp[(i + 1) * bsize_w + j];
534 for (i = 0; i < bsize_h; i += 8) {
535 for (j = 0; j < bsize_w; j += 8) {
536 sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j],
537 &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
538 bsize_w);
541 /* Scale according to linear regression against SSE, for 8x8 blocks. */
542 if (activity_masking) {
543 sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
544 (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
545 } else {
546 sum *= qindex >= 128
547 ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
548 : qindex <= 43
549 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
550 : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
553 return sum;
556 int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst,
557 int dst_stride, int bsw, int bsh, int qm,
558 int use_activity_masking, int qindex) {
559 int i, j;
560 int64_t d;
561 DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]);
562 DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]);
564 assert(qm == OD_HVS_QM);
566 for (j = 0; j < bsh; j++)
567 for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
569 for (j = 0; j < bsh; j++)
570 for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
572 d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh,
573 qindex);
574 return d;
576 #endif // CONFIG_DAALA_DIST
578 static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
579 const uint8_t *src, int src_stride,
580 const uint8_t *dst, int dst_stride,
581 double *hordist, double *verdist) {
582 const int bw = block_size_wide[bsize];
583 const int bh = block_size_high[bsize];
584 unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
586 const int f_index = bsize - BLOCK_16X16;
587 if (f_index < 0) {
588 const int w_shift = bw == 8 ? 1 : 2;
589 const int h_shift = bh == 8 ? 1 : 2;
590 #if CONFIG_HIGHBITDEPTH
591 if (cpi->common.use_highbitdepth) {
592 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
593 const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
594 for (int i = 0; i < bh; ++i)
595 for (int j = 0; j < bw; ++j) {
596 const int index = (j >> w_shift) + ((i >> h_shift) << 2);
597 esq[index] +=
598 (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
599 (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
601 } else {
602 #endif // CONFIG_HIGHBITDEPTH
604 for (int i = 0; i < bh; ++i)
605 for (int j = 0; j < bw; ++j) {
606 const int index = (j >> w_shift) + ((i >> h_shift) << 2);
607 esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
608 (src[j + i * src_stride] - dst[j + i * dst_stride]);
610 #if CONFIG_HIGHBITDEPTH
612 #endif // CONFIG_HIGHBITDEPTH
613 } else {
614 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
615 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
616 &esq[1]);
617 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
618 &esq[2]);
619 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
620 dst_stride, &esq[3]);
621 src += bh / 4 * src_stride;
622 dst += bh / 4 * dst_stride;
624 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
625 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
626 &esq[5]);
627 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
628 &esq[6]);
629 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
630 dst_stride, &esq[7]);
631 src += bh / 4 * src_stride;
632 dst += bh / 4 * dst_stride;
634 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
635 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
636 &esq[9]);
637 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
638 &esq[10]);
639 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
640 dst_stride, &esq[11]);
641 src += bh / 4 * src_stride;
642 dst += bh / 4 * dst_stride;
644 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
645 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
646 &esq[13]);
647 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
648 &esq[14]);
649 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
650 dst_stride, &esq[15]);
653 double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
654 esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
655 esq[12] + esq[13] + esq[14] + esq[15];
656 if (total > 0) {
657 const double e_recip = 1.0 / total;
658 hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
659 hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
660 hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
661 verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
662 verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
663 verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
664 } else {
665 hordist[0] = verdist[0] = 0.25;
666 hordist[1] = verdist[1] = 0.25;
667 hordist[2] = verdist[2] = 0.25;
671 static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
672 const uint8_t *src, int src_stride,
673 const uint8_t *dst, int dst_stride) {
674 int prune_bitmask = 0;
675 double svm_proj_h = 0, svm_proj_v = 0;
676 double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
677 get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
678 hdist, vdist);
680 svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
681 vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
682 svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
683 hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
684 if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
685 prune_bitmask |= 1 << FLIPADST_1D;
686 else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
687 prune_bitmask |= 1 << ADST_1D;
689 if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
690 prune_bitmask |= 1 << (FLIPADST_1D + 8);
691 else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
692 prune_bitmask |= 1 << (ADST_1D + 8);
694 return prune_bitmask;
697 #if CONFIG_EXT_TX
698 static void get_horver_correlation(const int16_t *diff, int stride, int w,
699 int h, double *hcorr, double *vcorr) {
700 // Returns hor/ver correlation coefficient
701 const int num = (h - 1) * (w - 1);
702 double num_r;
703 int i, j;
704 int64_t xy_sum = 0, xz_sum = 0;
705 int64_t x_sum = 0, y_sum = 0, z_sum = 0;
706 int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
707 double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
708 *hcorr = *vcorr = 1;
710 assert(num > 0);
711 num_r = 1.0 / num;
712 for (i = 1; i < h; ++i) {
713 for (j = 1; j < w; ++j) {
714 const int16_t x = diff[i * stride + j];
715 const int16_t y = diff[i * stride + j - 1];
716 const int16_t z = diff[(i - 1) * stride + j];
717 xy_sum += x * y;
718 xz_sum += x * z;
719 x_sum += x;
720 y_sum += y;
721 z_sum += z;
722 x2_sum += x * x;
723 y2_sum += y * y;
724 z2_sum += z * z;
727 x_var_n = x2_sum - (x_sum * x_sum) * num_r;
728 y_var_n = y2_sum - (y_sum * y_sum) * num_r;
729 z_var_n = z2_sum - (z_sum * z_sum) * num_r;
730 xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
731 xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
732 if (x_var_n > 0 && y_var_n > 0) {
733 *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
734 *hcorr = *hcorr < 0 ? 0 : *hcorr;
736 if (x_var_n > 0 && z_var_n > 0) {
737 *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
738 *vcorr = *vcorr < 0 ? 0 : *vcorr;
742 int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
743 double hcorr, vcorr;
744 int prune_bitmask = 0;
745 get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
747 if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
748 prune_bitmask |= 1 << IDTX_1D;
749 else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
750 prune_bitmask |= 1 << DCT_1D;
752 if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
753 prune_bitmask |= 1 << (IDTX_1D + 8);
754 else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
755 prune_bitmask |= 1 << (DCT_1D + 8);
756 return prune_bitmask;
759 // Performance drop: 0.5%, Speed improvement: 24%
760 static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
761 MACROBLOCK *x, const MACROBLOCKD *xd,
762 int adst_flipadst, int dct_idtx) {
763 int prune = 0;
765 if (adst_flipadst) {
766 const struct macroblock_plane *const p = &x->plane[0];
767 const struct macroblockd_plane *const pd = &xd->plane[0];
768 prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
769 pd->dst.buf, pd->dst.stride);
771 if (dct_idtx) {
772 av1_subtract_plane(x, bsize, 0);
773 const struct macroblock_plane *const p = &x->plane[0];
774 const int bw = 4 << (b_width_log2_lookup[bsize]);
775 const int bh = 4 << (b_height_log2_lookup[bsize]);
776 prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
779 return prune;
781 #endif // CONFIG_EXT_TX
783 // Performance drop: 0.3%, Speed improvement: 5%
784 static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
785 const MACROBLOCK *x, const MACROBLOCKD *xd) {
786 const struct macroblock_plane *const p = &x->plane[0];
787 const struct macroblockd_plane *const pd = &xd->plane[0];
788 return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
789 pd->dst.stride);
792 static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
793 const MACROBLOCKD *const xd, int tx_set) {
794 #if CONFIG_EXT_TX
795 const int *tx_set_1D = tx_set >= 0 ? ext_tx_used_inter_1D[tx_set] : NULL;
796 #else
797 const int tx_set_1D[TX_TYPES_1D] = { 0 };
798 #endif // CONFIG_EXT_TX
800 switch (cpi->sf.tx_type_search.prune_mode) {
801 case NO_PRUNE: return 0; break;
802 case PRUNE_ONE:
803 if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
804 return 0;
805 return prune_one_for_sby(cpi, bsize, x, xd);
806 break;
807 #if CONFIG_EXT_TX
808 case PRUNE_TWO:
809 if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
810 if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
811 return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
813 if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
814 return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
815 return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
816 break;
817 #endif // CONFIG_EXT_TX
819 assert(0);
820 return 0;
823 static int do_tx_type_search(TX_TYPE tx_type, int prune) {
824 // TODO(sarahparker) implement for non ext tx
825 #if CONFIG_EXT_TX
826 return !(((prune >> vtx_tab[tx_type]) & 1) |
827 ((prune >> (htx_tab[tx_type] + 8)) & 1));
828 #else
829 // temporary to avoid compiler warnings
830 (void)vtx_tab;
831 (void)htx_tab;
832 (void)tx_type;
833 (void)prune;
834 return 1;
835 #endif // CONFIG_EXT_TX
838 static void model_rd_from_sse(const AV1_COMP *const cpi,
839 const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
840 int plane, int64_t sse, int *rate,
841 int64_t *dist) {
842 const struct macroblockd_plane *const pd = &xd->plane[plane];
843 const int dequant_shift =
844 #if CONFIG_HIGHBITDEPTH
845 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
846 #endif // CONFIG_HIGHBITDEPTH
849 // Fast approximate the modelling function.
850 if (cpi->sf.simple_model_rd_from_var) {
851 const int64_t square_error = sse;
852 int quantizer = (pd->dequant[1] >> dequant_shift);
854 if (quantizer < 120)
855 *rate = (int)((square_error * (280 - quantizer)) >>
856 (16 - AV1_PROB_COST_SHIFT));
857 else
858 *rate = 0;
859 *dist = (square_error * quantizer) >> 8;
860 } else {
861 av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
862 pd->dequant[1] >> dequant_shift, rate, dist);
865 *dist <<= 4;
868 static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
869 MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
870 int plane_to, int *out_rate_sum,
871 int64_t *out_dist_sum, int *skip_txfm_sb,
872 int64_t *skip_sse_sb) {
873 // Note our transform coeffs are 8 times an orthogonal transform.
874 // Hence quantizer step is also 8 times. To get effective quantizer
875 // we need to divide by 8 before sending to modeling function.
876 int plane;
877 const int ref = xd->mi[0]->mbmi.ref_frame[0];
879 int64_t rate_sum = 0;
880 int64_t dist_sum = 0;
881 int64_t total_sse = 0;
883 x->pred_sse[ref] = 0;
885 for (plane = plane_from; plane <= plane_to; ++plane) {
886 struct macroblock_plane *const p = &x->plane[plane];
887 struct macroblockd_plane *const pd = &xd->plane[plane];
888 #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
889 const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
890 #else
891 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
892 #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
894 unsigned int sse;
895 int rate;
896 int64_t dist;
898 #if CONFIG_CB4X4
899 if (x->skip_chroma_rd && plane) continue;
900 #endif // CONFIG_CB4X4
902 // TODO(geza): Write direct sse functions that do not compute
903 // variance as well.
904 cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
905 &sse);
907 if (plane == 0) x->pred_sse[ref] = sse;
909 total_sse += sse;
911 model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
913 rate_sum += rate;
914 dist_sum += dist;
917 *skip_txfm_sb = total_sse == 0;
918 *skip_sse_sb = total_sse << 4;
919 *out_rate_sum = (int)rate_sum;
920 *out_dist_sum = dist_sum;
923 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
924 intptr_t block_size, int64_t *ssz) {
925 int i;
926 int64_t error = 0, sqcoeff = 0;
928 for (i = 0; i < block_size; i++) {
929 const int diff = coeff[i] - dqcoeff[i];
930 error += diff * diff;
931 sqcoeff += coeff[i] * coeff[i];
934 *ssz = sqcoeff;
935 return error;
938 int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
939 int block_size) {
940 int i;
941 int64_t error = 0;
943 for (i = 0; i < block_size; i++) {
944 const int diff = coeff[i] - dqcoeff[i];
945 error += diff * diff;
948 return error;
951 #if CONFIG_HIGHBITDEPTH
952 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
953 const tran_low_t *dqcoeff, intptr_t block_size,
954 int64_t *ssz, int bd) {
955 int i;
956 int64_t error = 0, sqcoeff = 0;
957 int shift = 2 * (bd - 8);
958 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
960 for (i = 0; i < block_size; i++) {
961 const int64_t diff = coeff[i] - dqcoeff[i];
962 error += diff * diff;
963 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
965 assert(error >= 0 && sqcoeff >= 0);
966 error = (error + rounding) >> shift;
967 sqcoeff = (sqcoeff + rounding) >> shift;
969 *ssz = sqcoeff;
970 return error;
972 #endif // CONFIG_HIGHBITDEPTH
974 #if CONFIG_PVQ
975 // Without PVQ, av1_block_error_c() return two kind of errors,
976 // 1) reconstruction (i.e. decoded) error and
977 // 2) Squared sum of transformed residue (i.e. 'coeff')
978 // However, if PVQ is enabled, coeff does not keep the transformed residue
979 // but instead a transformed original is kept.
980 // Hence, new parameter ref vector (i.e. transformed predicted signal)
981 // is required to derive the residue signal,
982 // i.e. coeff - ref = residue (all transformed).
984 #if CONFIG_HIGHBITDEPTH
985 static int64_t av1_highbd_block_error2_c(const tran_low_t *coeff,
986 const tran_low_t *dqcoeff,
987 const tran_low_t *ref,
988 intptr_t block_size, int64_t *ssz,
989 int bd) {
990 int64_t error;
991 int64_t sqcoeff;
992 int shift = 2 * (bd - 8);
993 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
994 // Use the existing sse codes for calculating distortion of decoded signal:
995 // i.e. (orig - decoded)^2
996 // For high bit depth, throw away ssz until a 32-bit version of
997 // av1_block_error_fp is written.
998 int64_t ssz_trash;
999 error = av1_block_error(coeff, dqcoeff, block_size, &ssz_trash);
1000 // prediction residue^2 = (orig - ref)^2
1001 sqcoeff = av1_block_error(coeff, ref, block_size, &ssz_trash);
1002 error = (error + rounding) >> shift;
1003 sqcoeff = (sqcoeff + rounding) >> shift;
1004 *ssz = sqcoeff;
1005 return error;
1007 #else
1008 // TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
1009 // a separate function that does not do the extra computations for ssz.
1010 static int64_t av1_block_error2_c(const tran_low_t *coeff,
1011 const tran_low_t *dqcoeff,
1012 const tran_low_t *ref, intptr_t block_size,
1013 int64_t *ssz) {
1014 int64_t error;
1015 // Use the existing sse codes for calculating distortion of decoded signal:
1016 // i.e. (orig - decoded)^2
1017 error = av1_block_error_fp(coeff, dqcoeff, block_size);
1018 // prediction residue^2 = (orig - ref)^2
1019 *ssz = av1_block_error_fp(coeff, ref, block_size);
1020 return error;
1022 #endif // CONFIG_HIGHBITDEPTH
1023 #endif // CONFIG_PVQ
1025 #if !CONFIG_PVQ || CONFIG_VAR_TX
1026 /* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
1027 * decide whether to include cost of a trailing EOB node or not (i.e. we
1028 * can skip this if the last coefficient in this transform block, e.g. the
1029 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
1030 * were non-zero). */
1031 #if !CONFIG_LV_MAP
1032 static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
1033 int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
1034 const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
1035 int use_fast_coef_costing) {
1036 MACROBLOCKD *const xd = &x->e_mbd;
1037 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1038 const struct macroblock_plane *p = &x->plane[plane];
1039 const struct macroblockd_plane *pd = &xd->plane[plane];
1040 const PLANE_TYPE type = pd->plane_type;
1041 const uint16_t *band_count = &band_count_table[tx_size][1];
1042 const int eob = p->eobs[block];
1043 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1044 const int tx_size_ctx = txsize_sqr_map[tx_size];
1045 unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
1046 x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
1047 uint8_t token_cache[MAX_TX_SQUARE];
1048 int pt = combine_entropy_contexts(*a, *l);
1049 int c, cost;
1050 const int16_t *scan = scan_order->scan;
1051 const int16_t *nb = scan_order->neighbors;
1052 const int ref = is_inter_block(mbmi);
1053 aom_prob *blockz_probs =
1054 cm->fc->blockzero_probs[txsize_sqr_map[tx_size]][type][ref];
1056 #if CONFIG_HIGHBITDEPTH
1057 const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
1058 #else
1059 const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
1060 #endif // CONFIG_HIGHBITDEPTH
1062 #if !CONFIG_VAR_TX && !CONFIG_SUPERTX
1063 // Check for consistency of tx_size with mode info
1064 assert(tx_size == get_tx_size(plane, xd));
1065 #endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX
1066 (void)cm;
1068 if (eob == 0) {
1069 // single eob token
1070 cost = av1_cost_bit(blockz_probs[pt], 0);
1071 } else {
1072 if (use_fast_coef_costing) {
1073 int band_left = *band_count++;
1075 // dc token
1076 int v = qcoeff[0];
1077 int16_t prev_t;
1078 cost = av1_get_token_cost(v, &prev_t, cat6_bits);
1079 cost += (*token_costs)[!prev_t][pt][prev_t];
1081 token_cache[0] = av1_pt_energy_class[prev_t];
1082 ++token_costs;
1084 // ac tokens
1085 for (c = 1; c < eob; c++) {
1086 const int rc = scan[c];
1087 int16_t t;
1089 v = qcoeff[rc];
1090 cost += av1_get_token_cost(v, &t, cat6_bits);
1091 cost += (*token_costs)[!t][!prev_t][t];
1092 prev_t = t;
1093 if (!--band_left) {
1094 band_left = *band_count++;
1095 ++token_costs;
1099 // eob token
1100 cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
1102 } else { // !use_fast_coef_costing
1103 int band_left = *band_count++;
1105 // dc token
1106 int v = qcoeff[0];
1107 int16_t tok;
1108 cost = av1_get_token_cost(v, &tok, cat6_bits);
1109 cost += (*token_costs)[!tok][pt][tok];
1111 token_cache[0] = av1_pt_energy_class[tok];
1112 ++token_costs;
1114 // ac tokens
1115 for (c = 1; c < eob; c++) {
1116 const int rc = scan[c];
1118 v = qcoeff[rc];
1119 cost += av1_get_token_cost(v, &tok, cat6_bits);
1120 pt = get_coef_context(nb, token_cache, c);
1121 cost += (*token_costs)[!tok][pt][tok];
1122 token_cache[rc] = av1_pt_energy_class[tok];
1123 if (!--band_left) {
1124 band_left = *band_count++;
1125 ++token_costs;
1129 // eob token
1130 pt = get_coef_context(nb, token_cache, c);
1131 cost += (*token_costs)[0][pt][EOB_TOKEN];
1135 return cost;
1137 #endif // !CONFIG_LV_MAP
1139 int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
1140 int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
1141 const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
1142 int use_fast_coef_costing) {
1143 #if !CONFIG_LV_MAP
1144 const AV1_COMMON *const cm = &cpi->common;
1145 return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
1146 use_fast_coef_costing);
1147 #else // !CONFIG_LV_MAP
1148 (void)scan_order;
1149 (void)use_fast_coef_costing;
1150 const MACROBLOCKD *xd = &x->e_mbd;
1151 const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1152 const struct macroblockd_plane *pd = &xd->plane[plane];
1153 const BLOCK_SIZE bsize = mbmi->sb_type;
1154 #if CONFIG_CB4X4
1155 #if CONFIG_CHROMA_2X2
1156 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
1157 #else
1158 const BLOCK_SIZE plane_bsize =
1159 AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
1160 #endif // CONFIG_CHROMA_2X2
1161 #else // CONFIG_CB4X4
1162 const BLOCK_SIZE plane_bsize =
1163 get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd);
1164 #endif // CONFIG_CB4X4
1166 TXB_CTX txb_ctx;
1167 get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
1168 return av1_cost_coeffs_txb(cpi, x, plane, block, &txb_ctx);
1169 #endif // !CONFIG_LV_MAP
1171 #endif // !CONFIG_PVQ || CONFIG_VAR_TX
1173 // Get transform block visible dimensions cropped to the MI units.
1174 static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
1175 BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
1176 BLOCK_SIZE tx_bsize, int *width, int *height,
1177 int *visible_width, int *visible_height) {
1178 #if !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
1179 assert(tx_bsize <= plane_bsize);
1180 #endif // !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
1181 int txb_height = block_size_high[tx_bsize];
1182 int txb_width = block_size_wide[tx_bsize];
1183 const int block_height = block_size_high[plane_bsize];
1184 const int block_width = block_size_wide[plane_bsize];
1185 const struct macroblockd_plane *const pd = &xd->plane[plane];
1186 // TODO(aconverse@google.com): Investigate using crop_width/height here rather
1187 // than the MI size
1188 const int block_rows =
1189 (xd->mb_to_bottom_edge >= 0)
1190 ? block_height
1191 : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
1192 const int block_cols =
1193 (xd->mb_to_right_edge >= 0)
1194 ? block_width
1195 : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
1196 const int tx_unit_size = tx_size_wide_log2[0];
1197 if (width) *width = txb_width;
1198 if (height) *height = txb_height;
1199 *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width);
1200 *visible_height =
1201 clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
1204 // Compute the pixel domain sum square error on all visible 4x4s in the
1205 // transform block.
1206 static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
1207 int plane, const uint8_t *src, const int src_stride,
1208 const uint8_t *dst, const int dst_stride, int blk_row,
1209 int blk_col, const BLOCK_SIZE plane_bsize,
1210 const BLOCK_SIZE tx_bsize) {
1211 int txb_rows, txb_cols, visible_rows, visible_cols;
1212 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
1213 &txb_cols, &txb_rows, &visible_cols, &visible_rows);
1214 assert(visible_rows > 0);
1215 assert(visible_cols > 0);
1216 #if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
1217 if ((txb_rows == visible_rows && txb_cols == visible_cols) &&
1218 tx_bsize < BLOCK_SIZES) {
1219 #else
1220 if (txb_rows == visible_rows && txb_cols == visible_cols) {
1221 #endif
1222 unsigned sse;
1223 cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
1224 return sse;
1226 #if CONFIG_HIGHBITDEPTH
1227 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1228 uint64_t sse = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
1229 visible_cols, visible_rows);
1230 return (unsigned int)ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1232 #endif // CONFIG_HIGHBITDEPTH
1233 unsigned sse = aom_sse_odd_size(src, src_stride, dst, dst_stride,
1234 visible_cols, visible_rows);
1235 return sse;
1238 // Compute the squares sum squares on all visible 4x4s in the transform block.
1239 static int64_t sum_squares_visible(const MACROBLOCKD *xd, int plane,
1240 const int16_t *diff, const int diff_stride,
1241 int blk_row, int blk_col,
1242 const BLOCK_SIZE plane_bsize,
1243 const BLOCK_SIZE tx_bsize) {
1244 int visible_rows, visible_cols;
1245 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
1246 NULL, &visible_cols, &visible_rows);
1247 return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
1250 void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
1251 BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
1252 TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
1253 OUTPUT_STATUS output_status) {
1254 MACROBLOCKD *const xd = &x->e_mbd;
1255 const struct macroblock_plane *const p = &x->plane[plane];
1256 #if CONFIG_DAALA_DIST
1257 int qm = OD_HVS_QM;
1258 int use_activity_masking = 0;
1259 #if CONFIG_PVQ
1260 use_activity_masking = x->daala_enc.use_activity_masking;
1261 #endif // CONFIG_PVQ
1262 struct macroblockd_plane *const pd = &xd->plane[plane];
1263 #else // CONFIG_DAALA_DIST
1264 const struct macroblockd_plane *const pd = &xd->plane[plane];
1265 #endif // CONFIG_DAALA_DIST
1267 if (cpi->sf.use_transform_domain_distortion && !CONFIG_DAALA_DIST) {
1268 // Transform domain distortion computation is more efficient as it does
1269 // not involve an inverse transform, but it is less accurate.
1270 const int buffer_length = tx_size_2d[tx_size];
1271 int64_t this_sse;
1272 int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
1273 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
1274 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1275 #if CONFIG_PVQ
1276 tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
1278 #if CONFIG_HIGHBITDEPTH
1279 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
1280 *out_dist = av1_highbd_block_error2_c(coeff, dqcoeff, ref_coeff,
1281 buffer_length, &this_sse, bd) >>
1282 shift;
1283 #else
1284 *out_dist = av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length,
1285 &this_sse) >>
1286 shift;
1287 #endif // CONFIG_HIGHBITDEPTH
1288 #elif CONFIG_HIGHBITDEPTH
1289 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
1290 *out_dist =
1291 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
1292 shift;
1293 #else
1294 *out_dist =
1295 av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
1296 #endif // CONFIG_PVQ
1297 *out_sse = this_sse >> shift;
1298 } else {
1299 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
1300 #if !CONFIG_PVQ || CONFIG_DAALA_DIST
1301 const int bsw = block_size_wide[tx_bsize];
1302 const int bsh = block_size_high[tx_bsize];
1303 #endif
1304 const int src_stride = x->plane[plane].src.stride;
1305 const int dst_stride = xd->plane[plane].dst.stride;
1306 // Scale the transform block index to pixel unit.
1307 const int src_idx = (blk_row * src_stride + blk_col)
1308 << tx_size_wide_log2[0];
1309 const int dst_idx = (blk_row * dst_stride + blk_col)
1310 << tx_size_wide_log2[0];
1311 const uint8_t *src = &x->plane[plane].src.buf[src_idx];
1312 const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
1313 const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1314 const uint16_t eob = p->eobs[block];
1316 assert(cpi != NULL);
1317 assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
1319 #if CONFIG_DAALA_DIST
1320 if (plane == 0 && bsw >= 8 && bsh >= 8) {
1321 if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
1322 const int pred_stride = block_size_wide[plane_bsize];
1323 const int pred_idx = (blk_row * pred_stride + blk_col)
1324 << tx_size_wide_log2[0];
1325 const int16_t *pred = &pd->pred[pred_idx];
1326 int i, j;
1327 DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]);
1329 for (j = 0; j < bsh; j++)
1330 for (i = 0; i < bsw; i++)
1331 pred8[j * bsw + i] = pred[j * pred_stride + i];
1332 *out_sse = av1_daala_dist(src, src_stride, pred8, bsw, bsw, bsh, qm,
1333 use_activity_masking, x->qindex);
1334 } else {
1335 *out_sse = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh,
1336 qm, use_activity_masking, x->qindex);
1338 } else
1339 #endif // CONFIG_DAALA_DIST
1341 const int diff_stride = block_size_wide[plane_bsize];
1342 const int diff_idx = (blk_row * diff_stride + blk_col)
1343 << tx_size_wide_log2[0];
1344 const int16_t *diff = &p->src_diff[diff_idx];
1345 *out_sse = sum_squares_visible(xd, plane, diff, diff_stride, blk_row,
1346 blk_col, plane_bsize, tx_bsize);
1347 #if CONFIG_HIGHBITDEPTH
1348 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1349 *out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2);
1350 #endif // CONFIG_HIGHBITDEPTH
1352 *out_sse *= 16;
1354 if (eob) {
1355 if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
1356 #if CONFIG_DAALA_DIST
1357 if (plane == 0 && bsw >= 8 && bsh >= 8)
1358 *out_dist = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh,
1359 qm, use_activity_masking, x->qindex);
1360 else
1361 #endif // CONFIG_DAALA_DIST
1362 *out_dist =
1363 pixel_sse(cpi, xd, plane, src, src_stride, dst, dst_stride,
1364 blk_row, blk_col, plane_bsize, tx_bsize);
1365 } else {
1366 #if CONFIG_HIGHBITDEPTH
1367 uint8_t *recon;
1368 DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
1370 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1371 recon = CONVERT_TO_BYTEPTR(recon16);
1372 else
1373 recon = (uint8_t *)recon16;
1374 #else
1375 DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
1376 #endif // CONFIG_HIGHBITDEPTH
1378 #if !CONFIG_PVQ
1379 #if CONFIG_HIGHBITDEPTH
1380 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1381 aom_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
1382 NULL, 0, bsw, bsh, xd->bd);
1383 } else {
1384 #endif // CONFIG_HIGHBITDEPTH
1385 aom_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL,
1386 0, bsw, bsh);
1387 #if CONFIG_HIGHBITDEPTH
1389 #endif // CONFIG_HIGHBITDEPTH
1390 #else
1391 (void)dst;
1392 #endif // !CONFIG_PVQ
1394 const PLANE_TYPE plane_type = get_plane_type(plane);
1395 TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
1397 av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, recon,
1398 MAX_TX_SIZE, eob);
1400 #if CONFIG_DAALA_DIST
1401 if (plane == 0 && bsw >= 8 && bsh >= 8) {
1402 *out_dist = av1_daala_dist(src, src_stride, recon, MAX_TX_SIZE, bsw,
1403 bsh, qm, use_activity_masking, x->qindex);
1404 } else {
1405 if (plane == 0) {
1406 // Save decoded pixels for inter block in pd->pred to avoid
1407 // block_8x8_rd_txfm_daala_dist() need to produce them
1408 // by calling av1_inverse_transform_block() again.
1409 const int pred_stride = block_size_wide[plane_bsize];
1410 const int pred_idx = (blk_row * pred_stride + blk_col)
1411 << tx_size_wide_log2[0];
1412 int16_t *pred = &pd->pred[pred_idx];
1413 int i, j;
1415 for (j = 0; j < bsh; j++)
1416 for (i = 0; i < bsw; i++)
1417 pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i];
1419 #endif // CONFIG_DAALA_DIST
1420 *out_dist =
1421 pixel_sse(cpi, xd, plane, src, src_stride, recon, MAX_TX_SIZE,
1422 blk_row, blk_col, plane_bsize, tx_bsize);
1423 #if CONFIG_DAALA_DIST
1425 #endif // CONFIG_DAALA_DIST
1427 *out_dist *= 16;
1428 } else {
1429 *out_dist = *out_sse;
1434 static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
1435 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
1436 struct rdcost_block_args *args = arg;
1437 MACROBLOCK *const x = args->x;
1438 MACROBLOCKD *const xd = &x->e_mbd;
1439 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1440 const AV1_COMP *cpi = args->cpi;
1441 ENTROPY_CONTEXT *a = args->t_above + blk_col;
1442 ENTROPY_CONTEXT *l = args->t_left + blk_row;
1443 #if !CONFIG_TXK_SEL
1444 const AV1_COMMON *cm = &cpi->common;
1445 #endif
1446 int64_t rd1, rd2, rd;
1447 RD_STATS this_rd_stats;
1449 assert(tx_size == get_tx_size(plane, xd));
1451 av1_init_rd_stats(&this_rd_stats);
1453 if (args->exit_early) return;
1455 if (!is_inter_block(mbmi)) {
1456 #if CONFIG_CFL
1458 #if CONFIG_EC_ADAPT
1459 FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
1460 #else
1461 FRAME_CONTEXT *const ec_ctx = cm->fc;
1462 #endif // CONFIG_EC_ADAPT
1464 av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
1465 blk_row, tx_size, plane_bsize);
1466 #else
1467 av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
1468 #endif
1469 #if CONFIG_DPCM_INTRA
1470 const int block_raster_idx =
1471 av1_block_index_to_raster_order(tx_size, block);
1472 const PREDICTION_MODE mode =
1473 (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
1474 TX_TYPE tx_type = get_tx_type((plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV,
1475 xd, block, tx_size);
1476 if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
1477 int8_t skip;
1478 av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
1479 plane_bsize, tx_size, tx_type, a, l, &skip);
1480 av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
1481 tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
1482 OUTPUT_HAS_DECODED_PIXELS);
1483 goto CALCULATE_RD;
1485 #endif // CONFIG_DPCM_INTRA
1486 av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
1489 #if !CONFIG_TXK_SEL
1490 // full forward transform and quantization
1491 const int coeff_ctx = combine_entropy_contexts(*a, *l);
1492 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
1493 coeff_ctx, AV1_XFORM_QUANT_FP);
1494 av1_optimize_b(cm, x, plane, block, tx_size, coeff_ctx);
1496 if (!is_inter_block(mbmi)) {
1497 struct macroblock_plane *const p = &x->plane[plane];
1498 av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
1499 p->eobs[block]);
1500 av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
1501 tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
1502 OUTPUT_HAS_DECODED_PIXELS);
1503 } else {
1504 av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
1505 tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
1506 OUTPUT_HAS_PREDICTED_PIXELS);
1508 #if CONFIG_CFL
1509 if (plane == AOM_PLANE_Y && x->cfl_store_y) {
1510 struct macroblockd_plane *const pd = &xd->plane[plane];
1511 const int dst_stride = pd->dst.stride;
1512 uint8_t *dst =
1513 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
1514 cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
1516 #endif
1517 #if CONFIG_DPCM_INTRA
1518 CALCULATE_RD : {}
1519 #endif // CONFIG_DPCM_INTRA
1520 rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
1521 if (args->this_rd + rd > args->best_rd) {
1522 args->exit_early = 1;
1523 return;
1525 #if !CONFIG_PVQ
1526 const PLANE_TYPE plane_type = get_plane_type(plane);
1527 const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
1528 const SCAN_ORDER *scan_order =
1529 get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
1530 this_rd_stats.rate =
1531 av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l,
1532 args->use_fast_coef_costing);
1533 #else // !CONFIG_PVQ
1534 this_rd_stats.rate = x->rate;
1535 #endif // !CONFIG_PVQ
1536 #else // !CONFIG_TXK_SEL
1537 av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
1538 tx_size, a, l, args->use_fast_coef_costing,
1539 &this_rd_stats);
1540 #endif // !CONFIG_TXK_SEL
1542 #if !CONFIG_PVQ
1543 #if CONFIG_RD_DEBUG
1544 av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
1545 this_rd_stats.rate);
1546 #endif // CONFIG_RD_DEBUG
1547 av1_set_txb_context(x, plane, block, tx_size, a, l);
1548 #endif // !CONFIG_PVQ
1550 rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
1551 rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
1553 // TODO(jingning): temporarily enabled only for luma component
1554 rd = AOMMIN(rd1, rd2);
1556 #if CONFIG_DAALA_DIST
1557 if (plane == 0 && plane_bsize >= BLOCK_8X8 &&
1558 (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) {
1559 this_rd_stats.dist = 0;
1560 this_rd_stats.sse = 0;
1561 rd = 0;
1562 x->rate_4x4[block] = this_rd_stats.rate;
1564 #endif // CONFIG_DAALA_DIST
1566 #if !CONFIG_PVQ
1567 this_rd_stats.skip &= !x->plane[plane].eobs[block];
1568 #else
1569 this_rd_stats.skip &= x->pvq_skip[plane];
1570 #endif // !CONFIG_PVQ
1571 av1_merge_rd_stats(&args->rd_stats, &this_rd_stats);
1573 args->this_rd += rd;
1575 if (args->this_rd > args->best_rd) {
1576 args->exit_early = 1;
1577 return;
1581 #if CONFIG_DAALA_DIST
1582 static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row,
1583 int blk_col, BLOCK_SIZE plane_bsize,
1584 TX_SIZE tx_size, void *arg) {
1585 struct rdcost_block_args *args = arg;
1586 MACROBLOCK *const x = args->x;
1587 MACROBLOCKD *const xd = &x->e_mbd;
1588 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1589 int64_t rd, rd1, rd2;
1590 RD_STATS this_rd_stats;
1591 int qm = OD_HVS_QM;
1592 int use_activity_masking = 0;
1594 (void)tx_size;
1596 assert(plane == 0);
1597 assert(plane_bsize >= BLOCK_8X8);
1598 #if CONFIG_PVQ
1599 use_activity_masking = x->daala_enc.use_activity_masking;
1600 #endif // CONFIG_PVQ
1601 av1_init_rd_stats(&this_rd_stats);
1603 if (args->exit_early) return;
1606 const struct macroblock_plane *const p = &x->plane[plane];
1607 struct macroblockd_plane *const pd = &xd->plane[plane];
1609 const int src_stride = p->src.stride;
1610 const int dst_stride = pd->dst.stride;
1611 const int diff_stride = block_size_wide[plane_bsize];
1613 const uint8_t *src =
1614 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
1615 const uint8_t *dst =
1616 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
1618 unsigned int tmp1, tmp2;
1619 int qindex = x->qindex;
1620 const int pred_stride = block_size_wide[plane_bsize];
1621 const int pred_idx = (blk_row * pred_stride + blk_col)
1622 << tx_size_wide_log2[0];
1623 int16_t *pred = &pd->pred[pred_idx];
1624 int i, j;
1625 const int tx_blk_size = 8;
1627 DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
1629 for (j = 0; j < tx_blk_size; j++)
1630 for (i = 0; i < tx_blk_size; i++)
1631 pred8[j * tx_blk_size + i] = pred[j * diff_stride + i];
1633 tmp1 = av1_daala_dist(src, src_stride, pred8, tx_blk_size, 8, 8, qm,
1634 use_activity_masking, qindex);
1635 tmp2 = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8, qm,
1636 use_activity_masking, qindex);
1638 if (!is_inter_block(mbmi)) {
1639 this_rd_stats.sse = (int64_t)tmp1 * 16;
1640 this_rd_stats.dist = (int64_t)tmp2 * 16;
1641 } else {
1642 // For inter mode, the decoded pixels are provided in pd->pred,
1643 // while the predicted pixels are in dst.
1644 this_rd_stats.sse = (int64_t)tmp2 * 16;
1645 this_rd_stats.dist = (int64_t)tmp1 * 16;
1649 rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
1650 if (args->this_rd + rd > args->best_rd) {
1651 args->exit_early = 1;
1652 return;
1656 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
1657 const uint8_t txw_unit = tx_size_wide_unit[tx_size];
1658 const uint8_t txh_unit = tx_size_high_unit[tx_size];
1659 const int step = txw_unit * txh_unit;
1660 int offset_h = tx_size_high_unit[TX_4X4];
1661 // The rate of the current 8x8 block is the sum of four 4x4 blocks in it.
1662 this_rd_stats.rate =
1663 x->rate_4x4[block - max_blocks_wide * offset_h - step] +
1664 x->rate_4x4[block - max_blocks_wide * offset_h] +
1665 x->rate_4x4[block - step] + x->rate_4x4[block];
1667 rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
1668 rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
1669 rd = AOMMIN(rd1, rd2);
1671 args->rd_stats.dist += this_rd_stats.dist;
1672 args->rd_stats.sse += this_rd_stats.sse;
1674 args->this_rd += rd;
1676 if (args->this_rd > args->best_rd) {
1677 args->exit_early = 1;
1678 return;
1681 #endif // CONFIG_DAALA_DIST
1683 static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
1684 RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
1685 BLOCK_SIZE bsize, TX_SIZE tx_size,
1686 int use_fast_coef_casting) {
1687 MACROBLOCKD *const xd = &x->e_mbd;
1688 const struct macroblockd_plane *const pd = &xd->plane[plane];
1689 struct rdcost_block_args args;
1690 av1_zero(args);
1691 args.x = x;
1692 args.cpi = cpi;
1693 args.best_rd = ref_best_rd;
1694 args.use_fast_coef_costing = use_fast_coef_casting;
1695 av1_init_rd_stats(&args.rd_stats);
1697 if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
1699 av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
1701 #if CONFIG_DAALA_DIST
1702 if (plane == 0 && bsize >= BLOCK_8X8 &&
1703 (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
1704 av1_foreach_8x8_transformed_block_in_yplane(
1705 xd, bsize, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args);
1706 else
1707 #endif // CONFIG_DAALA_DIST
1708 av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
1709 &args);
1711 if (args.exit_early) {
1712 av1_invalid_rd_stats(rd_stats);
1713 } else {
1714 *rd_stats = args.rd_stats;
1718 #if CONFIG_SUPERTX
1719 void av1_txfm_rd_in_plane_supertx(MACROBLOCK *x, const AV1_COMP *cpi, int *rate,
1720 int64_t *distortion, int *skippable,
1721 int64_t *sse, int64_t ref_best_rd, int plane,
1722 BLOCK_SIZE bsize, TX_SIZE tx_size,
1723 int use_fast_coef_casting) {
1724 MACROBLOCKD *const xd = &x->e_mbd;
1725 const struct macroblockd_plane *const pd = &xd->plane[plane];
1726 struct rdcost_block_args args;
1727 av1_zero(args);
1728 args.cpi = cpi;
1729 args.x = x;
1730 args.best_rd = ref_best_rd;
1731 args.use_fast_coef_costing = use_fast_coef_casting;
1733 #if CONFIG_EXT_TX
1734 assert(tx_size < TX_SIZES);
1735 #endif // CONFIG_EXT_TX
1737 if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
1739 av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
1741 block_rd_txfm(plane, 0, 0, 0, get_plane_block_size(bsize, pd), tx_size,
1742 &args);
1744 if (args.exit_early) {
1745 *rate = INT_MAX;
1746 *distortion = INT64_MAX;
1747 *sse = INT64_MAX;
1748 *skippable = 0;
1749 } else {
1750 *distortion = args.rd_stats.dist;
1751 *rate = args.rd_stats.rate;
1752 *sse = args.rd_stats.sse;
1753 *skippable = !x->plane[plane].eobs[0];
1756 #endif // CONFIG_SUPERTX
1758 static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x,
1759 BLOCK_SIZE bsize, TX_SIZE tx_size) {
1760 const AV1_COMMON *const cm = &cpi->common;
1761 const MACROBLOCKD *const xd = &x->e_mbd;
1762 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1764 const int tx_select =
1765 cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
1767 if (tx_select) {
1768 const int is_inter = is_inter_block(mbmi);
1769 const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
1770 : intra_tx_size_cat_lookup[bsize];
1771 const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
1772 const int depth = tx_size_to_depth(coded_tx_size);
1773 const int tx_size_ctx = get_tx_size_context(xd);
1774 int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
1775 #if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
1776 if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
1777 r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob,
1778 tx_size == quarter_txsize_lookup[bsize]);
1779 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
1780 return r_tx_size;
1781 } else {
1782 return 0;
1786 // #TODO(angiebird): use this function whenever it's possible
1787 int av1_tx_type_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd,
1788 BLOCK_SIZE bsize, int plane, TX_SIZE tx_size,
1789 TX_TYPE tx_type) {
1790 if (plane > 0) return 0;
1792 const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1793 const int is_inter = is_inter_block(mbmi);
1794 #if CONFIG_EXT_TX
1795 const AV1_COMMON *cm = &cpi->common;
1796 if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 &&
1797 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
1798 const int ext_tx_set =
1799 get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
1800 if (is_inter) {
1801 if (ext_tx_set > 0)
1802 return cpi
1803 ->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]][tx_type];
1804 } else {
1805 if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
1806 return cpi->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]]
1807 [mbmi->mode][tx_type];
1810 #else
1811 (void)bsize;
1812 if (tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
1813 !FIXED_TX_TYPE) {
1814 if (is_inter) {
1815 return cpi->inter_tx_type_costs[tx_size][tx_type];
1816 } else {
1817 return cpi->intra_tx_type_costs[tx_size]
1818 [intra_mode_to_tx_type_context[mbmi->mode]]
1819 [tx_type];
1822 #endif // CONFIG_EXT_TX
1823 return 0;
1825 static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
1826 RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
1827 TX_TYPE tx_type, int tx_size) {
1828 const AV1_COMMON *const cm = &cpi->common;
1829 MACROBLOCKD *const xd = &x->e_mbd;
1830 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1831 int64_t rd = INT64_MAX;
1832 aom_prob skip_prob = av1_get_skip_prob(cm, xd);
1833 int s0, s1;
1834 const int is_inter = is_inter_block(mbmi);
1835 const int tx_select =
1836 cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
1838 const int r_tx_size = tx_size_cost(cpi, x, bs, tx_size);
1840 assert(skip_prob > 0);
1841 #if CONFIG_EXT_TX && CONFIG_RECT_TX
1842 assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
1843 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
1845 s0 = av1_cost_bit(skip_prob, 0);
1846 s1 = av1_cost_bit(skip_prob, 1);
1848 mbmi->tx_type = tx_type;
1849 mbmi->tx_size = tx_size;
1850 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, tx_size,
1851 cpi->sf.use_fast_coef_costing);
1852 if (rd_stats->rate == INT_MAX) return INT64_MAX;
1853 #if !CONFIG_TXK_SEL
1854 int plane = 0;
1855 rd_stats->rate += av1_tx_type_cost(cpi, xd, bs, plane, tx_size, tx_type);
1856 #endif
1858 if (rd_stats->skip) {
1859 if (is_inter) {
1860 rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
1861 } else {
1862 rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select,
1863 rd_stats->sse);
1865 } else {
1866 rd = RDCOST(x->rdmult, x->rddiv,
1867 rd_stats->rate + s0 + r_tx_size * tx_select, rd_stats->dist);
1870 if (tx_select) rd_stats->rate += r_tx_size;
1872 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
1873 !(rd_stats->skip))
1874 rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
1876 return rd;
1879 static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
1880 TX_TYPE tx_type, TX_SIZE tx_size) {
1881 const MACROBLOCKD *const xd = &x->e_mbd;
1882 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1883 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
1884 const int is_inter = is_inter_block(mbmi);
1885 int prune = 0;
1886 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
1887 // passing -1 in for tx_type indicates that all 1D
1888 // transforms should be considered for pruning
1889 prune = prune_tx_types(cpi, bs, x, xd, -1);
1891 if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1;
1892 if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size))
1893 return 1;
1894 if (!is_inter && x->use_default_intra_tx_type &&
1895 tx_type != get_default_tx_type(0, xd, 0, tx_size))
1896 return 1;
1897 if (is_inter && x->use_default_inter_tx_type &&
1898 tx_type != get_default_tx_type(0, xd, 0, tx_size))
1899 return 1;
1900 if (max_tx_size >= TX_32X32 && tx_size == TX_4X4) return 1;
1901 #if CONFIG_EXT_TX
1902 const AV1_COMMON *const cm = &cpi->common;
1903 int ext_tx_set =
1904 get_ext_tx_set(tx_size, bs, is_inter, cm->reduced_tx_set_used);
1905 if (is_inter) {
1906 if (!ext_tx_used_inter[ext_tx_set][tx_type]) return 1;
1907 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
1908 if (!do_tx_type_search(tx_type, prune)) return 1;
1910 } else {
1911 if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
1912 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) return 1;
1914 if (!ext_tx_used_intra[ext_tx_set][tx_type]) return 1;
1916 #else // CONFIG_EXT_TX
1917 if (tx_size >= TX_32X32 && tx_type != DCT_DCT) return 1;
1918 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
1919 !do_tx_type_search(tx_type, prune))
1920 return 1;
1921 #endif // CONFIG_EXT_TX
1922 return 0;
1925 #if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
1926 static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
1927 MACROBLOCK *x, int *r, int64_t *d, int *s,
1928 int64_t *sse, int64_t ref_best_rd) {
1929 RD_STATS rd_stats;
1930 int64_t rd = txfm_yrd(cpi, x, &rd_stats, ref_best_rd, bs, DCT_DCT,
1931 max_txsize_lookup[bs]);
1932 *r = rd_stats.rate;
1933 *d = rd_stats.dist;
1934 *s = rd_stats.skip;
1935 *sse = rd_stats.sse;
1936 return rd;
1938 #endif // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
1940 static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
1941 RD_STATS *rd_stats, int64_t ref_best_rd,
1942 BLOCK_SIZE bs) {
1943 const AV1_COMMON *const cm = &cpi->common;
1944 MACROBLOCKD *const xd = &x->e_mbd;
1945 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1946 TX_TYPE tx_type, best_tx_type = DCT_DCT;
1947 int64_t this_rd, best_rd = INT64_MAX;
1948 aom_prob skip_prob = av1_get_skip_prob(cm, xd);
1949 int s0 = av1_cost_bit(skip_prob, 0);
1950 int s1 = av1_cost_bit(skip_prob, 1);
1951 const int is_inter = is_inter_block(mbmi);
1952 int prune = 0;
1953 const int plane = 0;
1954 #if CONFIG_EXT_TX
1955 int ext_tx_set;
1956 #endif // CONFIG_EXT_TX
1957 av1_invalid_rd_stats(rd_stats);
1959 mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
1960 #if CONFIG_VAR_TX
1961 mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
1962 #endif // CONFIG_VAR_TX
1963 #if CONFIG_EXT_TX
1964 ext_tx_set =
1965 get_ext_tx_set(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used);
1966 #endif // CONFIG_EXT_TX
1968 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
1969 #if CONFIG_EXT_TX
1970 prune = prune_tx_types(cpi, bs, x, xd, ext_tx_set);
1971 #else
1972 prune = prune_tx_types(cpi, bs, x, xd, 0);
1973 #endif // CONFIG_EXT_TX
1974 #if CONFIG_EXT_TX
1975 if (get_ext_tx_types(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used) >
1976 1 &&
1977 !xd->lossless[mbmi->segment_id]) {
1978 #if CONFIG_PVQ
1979 od_rollback_buffer pre_buf, post_buf;
1981 od_encode_checkpoint(&x->daala_enc, &pre_buf);
1982 od_encode_checkpoint(&x->daala_enc, &post_buf);
1983 #endif // CONFIG_PVQ
1985 for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
1986 RD_STATS this_rd_stats;
1987 if (is_inter) {
1988 if (x->use_default_inter_tx_type &&
1989 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
1990 continue;
1991 if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
1992 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
1993 if (!do_tx_type_search(tx_type, prune)) continue;
1995 } else {
1996 if (x->use_default_intra_tx_type &&
1997 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
1998 continue;
1999 if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
2000 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
2002 if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
2005 mbmi->tx_type = tx_type;
2007 txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
2008 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
2009 #if CONFIG_PVQ
2010 od_encode_rollback(&x->daala_enc, &pre_buf);
2011 #endif // CONFIG_PVQ
2012 if (this_rd_stats.rate == INT_MAX) continue;
2013 av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type);
2015 if (this_rd_stats.skip)
2016 this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
2017 else
2018 this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
2019 this_rd_stats.dist);
2020 if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
2021 !this_rd_stats.skip)
2022 this_rd =
2023 AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
2025 if (this_rd < best_rd) {
2026 best_rd = this_rd;
2027 best_tx_type = mbmi->tx_type;
2028 *rd_stats = this_rd_stats;
2029 #if CONFIG_PVQ
2030 od_encode_checkpoint(&x->daala_enc, &post_buf);
2031 #endif // CONFIG_PVQ
2034 #if CONFIG_PVQ
2035 od_encode_rollback(&x->daala_enc, &post_buf);
2036 #endif // CONFIG_PVQ
2037 } else {
2038 mbmi->tx_type = DCT_DCT;
2039 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2040 cpi->sf.use_fast_coef_costing);
2042 #else // CONFIG_EXT_TX
2043 if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id]) {
2044 for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
2045 RD_STATS this_rd_stats;
2046 if (!is_inter && x->use_default_intra_tx_type &&
2047 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2048 continue;
2049 if (is_inter && x->use_default_inter_tx_type &&
2050 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2051 continue;
2052 mbmi->tx_type = tx_type;
2053 txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
2054 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
2055 if (this_rd_stats.rate == INT_MAX) continue;
2057 av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type);
2058 if (is_inter) {
2059 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
2060 !do_tx_type_search(tx_type, prune))
2061 continue;
2063 if (this_rd_stats.skip)
2064 this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
2065 else
2066 this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
2067 this_rd_stats.dist);
2068 if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip)
2069 this_rd =
2070 AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
2072 if (this_rd < best_rd) {
2073 best_rd = this_rd;
2074 best_tx_type = mbmi->tx_type;
2075 *rd_stats = this_rd_stats;
2078 } else {
2079 mbmi->tx_type = DCT_DCT;
2080 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2081 cpi->sf.use_fast_coef_costing);
2083 #endif // CONFIG_EXT_TX
2084 mbmi->tx_type = best_tx_type;
2087 static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
2088 RD_STATS *rd_stats, int64_t ref_best_rd,
2089 BLOCK_SIZE bs) {
2090 MACROBLOCKD *const xd = &x->e_mbd;
2091 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2093 mbmi->tx_size = TX_4X4;
2094 mbmi->tx_type = DCT_DCT;
2095 #if CONFIG_VAR_TX
2096 mbmi->min_tx_size = get_min_tx_size(TX_4X4);
2097 #endif // CONFIG_VAR_TX
2099 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2100 cpi->sf.use_fast_coef_costing);
2103 #if CONFIG_TXK_SEL || CONFIG_VAR_TX
2104 static INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
2105 int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]);
2106 return num_blk;
2108 #endif // CONFIG_TXK_SEL || CONFIG_VAR_TX
2110 static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
2111 MACROBLOCK *x, RD_STATS *rd_stats,
2112 int64_t ref_best_rd, BLOCK_SIZE bs) {
2113 const AV1_COMMON *const cm = &cpi->common;
2114 MACROBLOCKD *const xd = &x->e_mbd;
2115 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2116 int64_t rd = INT64_MAX;
2117 int n;
2118 int start_tx, end_tx;
2119 int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
2120 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
2121 TX_SIZE best_tx_size = max_tx_size;
2122 TX_TYPE best_tx_type = DCT_DCT;
2123 #if CONFIG_TXK_SEL
2124 TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
2125 const int num_blk = bsize_to_num_blk(bs);
2126 #endif // CONFIG_TXK_SEL
2127 const int tx_select = cm->tx_mode == TX_MODE_SELECT;
2128 const int is_inter = is_inter_block(mbmi);
2129 #if CONFIG_PVQ
2130 od_rollback_buffer buf;
2131 od_encode_checkpoint(&x->daala_enc, &buf);
2132 #endif // CONFIG_PVQ
2134 av1_invalid_rd_stats(rd_stats);
2136 #if CONFIG_EXT_TX && CONFIG_RECT_TX
2137 int evaluate_rect_tx = 0;
2138 if (tx_select) {
2139 evaluate_rect_tx = is_rect_tx_allowed(xd, mbmi);
2140 } else {
2141 const TX_SIZE chosen_tx_size =
2142 tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2143 evaluate_rect_tx = is_rect_tx(chosen_tx_size);
2144 assert(IMPLIES(evaluate_rect_tx, is_rect_tx_allowed(xd, mbmi)));
2146 if (evaluate_rect_tx) {
2147 TX_TYPE tx_start = DCT_DCT;
2148 TX_TYPE tx_end = TX_TYPES;
2149 #if CONFIG_TXK_SEL
2150 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2151 // performed in av1_search_txk_type()
2152 tx_end = DCT_DCT + 1;
2153 #endif
2154 TX_TYPE tx_type;
2155 for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
2156 if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
2157 const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
2158 RD_STATS this_rd_stats;
2159 int ext_tx_set =
2160 get_ext_tx_set(rect_tx_size, bs, is_inter, cm->reduced_tx_set_used);
2161 if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
2162 (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
2163 rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type,
2164 rect_tx_size);
2165 if (rd < best_rd) {
2166 #if CONFIG_TXK_SEL
2167 memcpy(best_txk_type, mbmi->txk_type,
2168 sizeof(best_txk_type[0]) * num_blk);
2169 #endif
2170 best_tx_type = tx_type;
2171 best_tx_size = rect_tx_size;
2172 best_rd = rd;
2173 *rd_stats = this_rd_stats;
2176 #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2177 const int is_inter = is_inter_block(mbmi);
2178 if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
2179 #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2183 #if CONFIG_RECT_TX_EXT
2184 // test 1:4/4:1 tx
2185 int evaluate_quarter_tx = 0;
2186 if (is_quarter_tx_allowed(xd, mbmi, is_inter)) {
2187 if (tx_select) {
2188 evaluate_quarter_tx = 1;
2189 } else {
2190 const TX_SIZE chosen_tx_size =
2191 tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2192 evaluate_quarter_tx = chosen_tx_size == quarter_txsize_lookup[bs];
2195 if (evaluate_quarter_tx) {
2196 TX_TYPE tx_start = DCT_DCT;
2197 TX_TYPE tx_end = TX_TYPES;
2198 #if CONFIG_TXK_SEL
2199 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2200 // performed in av1_search_txk_type()
2201 tx_end = DCT_DCT + 1;
2202 #endif
2203 TX_TYPE tx_type;
2204 for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
2205 if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
2206 const TX_SIZE tx_size = quarter_txsize_lookup[bs];
2207 RD_STATS this_rd_stats;
2208 int ext_tx_set =
2209 get_ext_tx_set(tx_size, bs, is_inter, cm->reduced_tx_set_used);
2210 if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
2211 (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
2212 rd =
2213 txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, tx_size);
2214 if (rd < best_rd) {
2215 #if CONFIG_TXK_SEL
2216 memcpy(best_txk_type, mbmi->txk_type,
2217 sizeof(best_txk_type[0]) * num_blk);
2218 #endif
2219 best_tx_type = tx_type;
2220 best_tx_size = tx_size;
2221 best_rd = rd;
2222 *rd_stats = this_rd_stats;
2225 #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2226 const int is_inter = is_inter_block(mbmi);
2227 if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
2228 #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2231 #endif // CONFIG_RECT_TX_EXT
2232 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2234 if (tx_select) {
2235 start_tx = max_tx_size;
2236 end_tx = (max_tx_size >= TX_32X32) ? TX_8X8 : TX_4X4;
2237 } else {
2238 const TX_SIZE chosen_tx_size =
2239 tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2240 start_tx = chosen_tx_size;
2241 end_tx = chosen_tx_size;
2244 last_rd = INT64_MAX;
2245 for (n = start_tx; n >= end_tx; --n) {
2246 #if CONFIG_EXT_TX && CONFIG_RECT_TX
2247 if (is_rect_tx(n)) break;
2248 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2249 TX_TYPE tx_start = DCT_DCT;
2250 TX_TYPE tx_end = TX_TYPES;
2251 #if CONFIG_TXK_SEL
2252 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2253 // performed in av1_search_txk_type()
2254 tx_end = DCT_DCT + 1;
2255 #endif
2256 TX_TYPE tx_type;
2257 for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
2258 RD_STATS this_rd_stats;
2259 if (skip_txfm_search(cpi, x, bs, tx_type, n)) continue;
2260 rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, n);
2261 #if CONFIG_PVQ
2262 od_encode_rollback(&x->daala_enc, &buf);
2263 #endif // CONFIG_PVQ
2264 // Early termination in transform size search.
2265 if (cpi->sf.tx_size_search_breakout &&
2266 (rd == INT64_MAX ||
2267 (this_rd_stats.skip == 1 && tx_type != DCT_DCT && n < start_tx) ||
2268 (n < (int)max_tx_size && rd > last_rd)))
2269 break;
2271 last_rd = rd;
2272 if (rd < best_rd) {
2273 #if CONFIG_TXK_SEL
2274 memcpy(best_txk_type, mbmi->txk_type,
2275 sizeof(best_txk_type[0]) * num_blk);
2276 #endif
2277 best_tx_type = tx_type;
2278 best_tx_size = n;
2279 best_rd = rd;
2280 *rd_stats = this_rd_stats;
2282 #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2283 const int is_inter = is_inter_block(mbmi);
2284 if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
2285 #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2288 mbmi->tx_size = best_tx_size;
2289 mbmi->tx_type = best_tx_type;
2290 #if CONFIG_TXK_SEL
2291 memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * num_blk);
2292 #endif
2294 #if CONFIG_VAR_TX
2295 mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
2296 #endif // CONFIG_VAR_TX
2298 #if !CONFIG_EXT_TX
2299 if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT);
2300 #endif // !CONFIG_EXT_TX
2301 #if CONFIG_PVQ
2302 if (best_rd != INT64_MAX) {
2303 txfm_yrd(cpi, x, rd_stats, ref_best_rd, bs, best_tx_type, best_tx_size);
2305 #endif // CONFIG_PVQ
2308 static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
2309 RD_STATS *rd_stats, BLOCK_SIZE bs,
2310 int64_t ref_best_rd) {
2311 MACROBLOCKD *xd = &x->e_mbd;
2312 av1_init_rd_stats(rd_stats);
2314 assert(bs == xd->mi[0]->mbmi.sb_type);
2316 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
2317 choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
2318 } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
2319 choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
2320 } else {
2321 choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs);
2325 static int conditional_skipintra(PREDICTION_MODE mode,
2326 PREDICTION_MODE best_intra_mode) {
2327 if (mode == D117_PRED && best_intra_mode != V_PRED &&
2328 best_intra_mode != D135_PRED)
2329 return 1;
2330 if (mode == D63_PRED && best_intra_mode != V_PRED &&
2331 best_intra_mode != D45_PRED)
2332 return 1;
2333 if (mode == D207_PRED && best_intra_mode != H_PRED &&
2334 best_intra_mode != D45_PRED)
2335 return 1;
2336 if (mode == D153_PRED && best_intra_mode != H_PRED &&
2337 best_intra_mode != D135_PRED)
2338 return 1;
2339 return 0;
2342 // Model based RD estimation for luma intra blocks.
2343 static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
2344 BLOCK_SIZE bsize, int mode_cost) {
2345 MACROBLOCKD *const xd = &x->e_mbd;
2346 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2347 assert(!is_inter_block(mbmi));
2348 RD_STATS this_rd_stats;
2349 int row, col;
2350 int64_t temp_sse, this_rd;
2351 const TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cpi->common.tx_mode, 0);
2352 const int stepr = tx_size_high_unit[tx_size];
2353 const int stepc = tx_size_wide_unit[tx_size];
2354 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
2355 const int max_blocks_high = max_block_high(xd, bsize, 0);
2356 mbmi->tx_size = tx_size;
2357 // Prediction.
2358 const int step = stepr * stepc;
2359 int block = 0;
2360 for (row = 0; row < max_blocks_high; row += stepr) {
2361 for (col = 0; col < max_blocks_wide; col += stepc) {
2362 #if CONFIG_CFL
2363 const struct macroblockd_plane *const pd = &xd->plane[0];
2364 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
2366 #if CONFIG_EC_ADAPT
2367 FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
2368 #else
2369 FRAME_CONTEXT *const ec_ctx = cpi->common.fc;
2370 #endif // CONFIG_EC_ADAPT
2372 av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row,
2373 tx_size, plane_bsize);
2374 #else
2375 av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size);
2376 #endif
2377 block += step;
2380 // RD estimation.
2381 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate,
2382 &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse);
2383 #if CONFIG_EXT_INTRA
2384 if (av1_is_directional_mode(mbmi->mode, bsize)) {
2385 mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
2386 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
2388 #endif // CONFIG_EXT_INTRA
2389 #if CONFIG_FILTER_INTRA
2390 if (mbmi->mode == DC_PRED) {
2391 const aom_prob prob = cpi->common.fc->filter_intra_probs[0];
2392 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
2393 const int mode = mbmi->filter_intra_mode_info.filter_intra_mode[0];
2394 mode_cost += (av1_cost_bit(prob, 1) +
2395 write_uniform_cost(FILTER_INTRA_MODES, mode));
2396 } else {
2397 mode_cost += av1_cost_bit(prob, 0);
2400 #endif // CONFIG_FILTER_INTRA
2401 this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + mode_cost,
2402 this_rd_stats.dist);
2403 return this_rd;
2406 #if CONFIG_PALETTE
2407 // Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
2408 // new_height'. Extra rows and columns are filled in by copying last valid
2409 // row/column.
2410 static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
2411 int orig_height, int new_width,
2412 int new_height) {
2413 int j;
2414 assert(new_width >= orig_width);
2415 assert(new_height >= orig_height);
2416 if (new_width == orig_width && new_height == orig_height) return;
2418 for (j = orig_height - 1; j >= 0; --j) {
2419 memmove(color_map + j * new_width, color_map + j * orig_width, orig_width);
2420 // Copy last column to extra columns.
2421 memset(color_map + j * new_width + orig_width,
2422 color_map[j * new_width + orig_width - 1], new_width - orig_width);
2424 // Copy last row to extra rows.
2425 for (j = orig_height; j < new_height; ++j) {
2426 memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width,
2427 new_width);
2431 #if CONFIG_PALETTE_DELTA_ENCODING
2432 // Bias toward using colors in the cache.
2433 // TODO(huisu): Try other schemes to improve compression.
2434 static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
2435 int n_colors, int stride,
2436 float *centroids) {
2437 if (n_cache <= 0) return;
2438 for (int i = 0; i < n_colors * stride; i += stride) {
2439 float min_diff = fabsf(centroids[i] - color_cache[0]);
2440 int idx = 0;
2441 for (int j = 1; j < n_cache; ++j) {
2442 float this_diff = fabsf(centroids[i] - color_cache[j]);
2443 if (this_diff < min_diff) {
2444 min_diff = this_diff;
2445 idx = j;
2448 if (min_diff < 1.5) centroids[i] = color_cache[idx];
2451 #endif // CONFIG_PALETTE_DELTA_ENCODING
2453 static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
2454 BLOCK_SIZE bsize, int palette_ctx,
2455 int dc_mode_cost, MB_MODE_INFO *best_mbmi,
2456 uint8_t *best_palette_color_map,
2457 int64_t *best_rd, int64_t *best_model_rd,
2458 int *rate, int *rate_tokenonly,
2459 int64_t *distortion, int *skippable) {
2460 int rate_overhead = 0;
2461 MACROBLOCKD *const xd = &x->e_mbd;
2462 MODE_INFO *const mic = xd->mi[0];
2463 MB_MODE_INFO *const mbmi = &mic->mbmi;
2464 assert(!is_inter_block(mbmi));
2465 int this_rate, colors, n;
2466 const int src_stride = x->plane[0].src.stride;
2467 const uint8_t *const src = x->plane[0].src.buf;
2468 uint8_t *const color_map = xd->plane[0].color_index_map;
2469 int block_width, block_height, rows, cols;
2470 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
2471 &cols);
2473 assert(cpi->common.allow_screen_content_tools);
2475 #if CONFIG_HIGHBITDEPTH
2476 if (cpi->common.use_highbitdepth)
2477 colors = av1_count_colors_highbd(src, src_stride, rows, cols,
2478 cpi->common.bit_depth);
2479 else
2480 #endif // CONFIG_HIGHBITDEPTH
2481 colors = av1_count_colors(src, src_stride, rows, cols);
2482 #if CONFIG_FILTER_INTRA
2483 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
2484 #endif // CONFIG_FILTER_INTRA
2486 if (colors > 1 && colors <= 64) {
2487 int r, c, i, j, k, palette_mode_cost;
2488 const int max_itr = 50;
2489 uint8_t color_order[PALETTE_MAX_SIZE];
2490 float *const data = x->palette_buffer->kmeans_data_buf;
2491 float centroids[PALETTE_MAX_SIZE];
2492 float lb, ub, val;
2493 RD_STATS tokenonly_rd_stats;
2494 int64_t this_rd, this_model_rd;
2495 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
2496 #if CONFIG_HIGHBITDEPTH
2497 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
2498 if (cpi->common.use_highbitdepth)
2499 lb = ub = src16[0];
2500 else
2501 #endif // CONFIG_HIGHBITDEPTH
2502 lb = ub = src[0];
2504 #if CONFIG_HIGHBITDEPTH
2505 if (cpi->common.use_highbitdepth) {
2506 for (r = 0; r < rows; ++r) {
2507 for (c = 0; c < cols; ++c) {
2508 val = src16[r * src_stride + c];
2509 data[r * cols + c] = val;
2510 if (val < lb)
2511 lb = val;
2512 else if (val > ub)
2513 ub = val;
2516 } else {
2517 #endif // CONFIG_HIGHBITDEPTH
2518 for (r = 0; r < rows; ++r) {
2519 for (c = 0; c < cols; ++c) {
2520 val = src[r * src_stride + c];
2521 data[r * cols + c] = val;
2522 if (val < lb)
2523 lb = val;
2524 else if (val > ub)
2525 ub = val;
2528 #if CONFIG_HIGHBITDEPTH
2530 #endif // CONFIG_HIGHBITDEPTH
2532 mbmi->mode = DC_PRED;
2533 #if CONFIG_FILTER_INTRA
2534 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
2535 #endif // CONFIG_FILTER_INTRA
2537 if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return 0;
2539 #if CONFIG_PALETTE_DELTA_ENCODING
2540 const MODE_INFO *above_mi = xd->above_mi;
2541 const MODE_INFO *left_mi = xd->left_mi;
2542 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
2543 const int n_cache =
2544 av1_get_palette_cache(above_mi, left_mi, 0, color_cache);
2545 #endif // CONFIG_PALETTE_DELTA_ENCODING
2547 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
2548 --n) {
2549 if (colors == PALETTE_MIN_SIZE) {
2550 // Special case: These colors automatically become the centroids.
2551 assert(colors == n);
2552 assert(colors == 2);
2553 centroids[0] = lb;
2554 centroids[1] = ub;
2555 k = 2;
2556 } else {
2557 for (i = 0; i < n; ++i) {
2558 centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
2560 av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
2561 #if CONFIG_PALETTE_DELTA_ENCODING
2562 optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
2563 #endif // CONFIG_PALETTE_DELTA_ENCODING
2564 k = av1_remove_duplicates(centroids, n);
2565 if (k < PALETTE_MIN_SIZE) {
2566 // Too few unique colors to create a palette. And DC_PRED will work
2567 // well for that case anyway. So skip.
2568 continue;
2572 #if CONFIG_HIGHBITDEPTH
2573 if (cpi->common.use_highbitdepth)
2574 for (i = 0; i < k; ++i)
2575 pmi->palette_colors[i] =
2576 clip_pixel_highbd((int)centroids[i], cpi->common.bit_depth);
2577 else
2578 #endif // CONFIG_HIGHBITDEPTH
2579 for (i = 0; i < k; ++i)
2580 pmi->palette_colors[i] = clip_pixel((int)centroids[i]);
2581 pmi->palette_size[0] = k;
2583 av1_calc_indices(data, centroids, color_map, rows * cols, k, 1);
2584 extend_palette_color_map(color_map, cols, rows, block_width,
2585 block_height);
2586 palette_mode_cost =
2587 dc_mode_cost +
2588 cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - PALETTE_MIN_SIZE] +
2589 write_uniform_cost(k, color_map[0]) +
2590 av1_cost_bit(
2591 av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx],
2593 palette_mode_cost += av1_palette_color_cost_y(pmi,
2594 #if CONFIG_PALETTE_DELTA_ENCODING
2595 color_cache, n_cache,
2596 #endif // CONFIG_PALETTE_DELTA_ENCODING
2597 cpi->common.bit_depth);
2598 for (i = 0; i < rows; ++i) {
2599 for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
2600 int color_idx;
2601 const int color_ctx = av1_get_palette_color_index_context(
2602 color_map, block_width, i, j, k, color_order, &color_idx);
2603 assert(color_idx >= 0 && color_idx < k);
2604 palette_mode_cost += cpi->palette_y_color_cost[k - PALETTE_MIN_SIZE]
2605 [color_ctx][color_idx];
2608 this_model_rd = intra_model_yrd(cpi, x, bsize, palette_mode_cost);
2609 if (*best_model_rd != INT64_MAX &&
2610 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
2611 continue;
2612 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
2613 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
2614 if (tokenonly_rd_stats.rate == INT_MAX) continue;
2615 this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
2616 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
2617 if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
2618 tokenonly_rd_stats.rate -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
2620 if (this_rd < *best_rd) {
2621 *best_rd = this_rd;
2622 memcpy(best_palette_color_map, color_map,
2623 block_width * block_height * sizeof(color_map[0]));
2624 *best_mbmi = *mbmi;
2625 rate_overhead = this_rate - tokenonly_rd_stats.rate;
2626 if (rate) *rate = this_rate;
2627 if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
2628 if (distortion) *distortion = tokenonly_rd_stats.dist;
2629 if (skippable) *skippable = tokenonly_rd_stats.skip;
2634 if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
2635 memcpy(color_map, best_palette_color_map,
2636 rows * cols * sizeof(best_palette_color_map[0]));
2638 *mbmi = *best_mbmi;
2639 return rate_overhead;
2641 #endif // CONFIG_PALETTE
2643 static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
2644 const AV1_COMP *const cpi, MACROBLOCK *x, int row, int col,
2645 PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a,
2646 ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion,
2647 BLOCK_SIZE bsize, TX_SIZE tx_size, int *y_skip, int64_t rd_thresh) {
2648 const AV1_COMMON *const cm = &cpi->common;
2649 PREDICTION_MODE mode;
2650 MACROBLOCKD *const xd = &x->e_mbd;
2651 assert(!is_inter_block(&xd->mi[0]->mbmi));
2652 int64_t best_rd = rd_thresh;
2653 struct macroblock_plane *p = &x->plane[0];
2654 struct macroblockd_plane *pd = &xd->plane[0];
2655 const int src_stride = p->src.stride;
2656 const int dst_stride = pd->dst.stride;
2657 const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
2658 uint8_t *dst_init = &pd->dst.buf[row * 4 * dst_stride + col * 4];
2659 #if CONFIG_CHROMA_2X2
2660 // TODO(jingning): This is a temporal change. The whole function should be
2661 // out when cb4x4 is enabled.
2662 ENTROPY_CONTEXT ta[4], tempa[4];
2663 ENTROPY_CONTEXT tl[4], templ[4];
2664 #else
2665 ENTROPY_CONTEXT ta[2], tempa[2];
2666 ENTROPY_CONTEXT tl[2], templ[2];
2667 #endif // CONFIG_CHROMA_2X2
2669 const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
2670 const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
2671 const int tx_width_unit = tx_size_wide_unit[tx_size];
2672 const int tx_height_unit = tx_size_high_unit[tx_size];
2673 const int pred_block_width = block_size_wide[bsize];
2674 const int pred_block_height = block_size_high[bsize];
2675 const int tx_width = tx_size_wide[tx_size];
2676 const int tx_height = tx_size_high[tx_size];
2677 const int pred_width_in_transform_blocks = pred_block_width / tx_width;
2678 const int pred_height_in_transform_blocks = pred_block_height / tx_height;
2679 int idx, idy;
2680 int best_can_skip = 0;
2681 uint8_t best_dst[8 * 8];
2682 #if CONFIG_HIGHBITDEPTH
2683 uint16_t best_dst16[8 * 8];
2684 #endif // CONFIG_HIGHBITDEPTH
2685 const int is_lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
2686 #if CONFIG_EXT_TX && CONFIG_RECT_TX
2687 const int sub_bsize = bsize;
2688 #else
2689 const int sub_bsize = BLOCK_4X4;
2690 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2692 #if CONFIG_PVQ
2693 od_rollback_buffer pre_buf, post_buf;
2694 od_encode_checkpoint(&x->daala_enc, &pre_buf);
2695 od_encode_checkpoint(&x->daala_enc, &post_buf);
2696 #endif // CONFIG_PVQ
2698 assert(bsize < BLOCK_8X8);
2699 assert(tx_width < 8 || tx_height < 8);
2700 #if CONFIG_EXT_TX && CONFIG_RECT_TX
2701 if (is_lossless)
2702 assert(tx_width == 4 && tx_height == 4);
2703 else
2704 assert(tx_width == pred_block_width && tx_height == pred_block_height);
2705 #else
2706 assert(tx_width == 4 && tx_height == 4);
2707 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2709 memcpy(ta, a, pred_width_in_transform_blocks * sizeof(a[0]));
2710 memcpy(tl, l, pred_height_in_transform_blocks * sizeof(l[0]));
2712 xd->mi[0]->mbmi.tx_size = tx_size;
2714 #if CONFIG_PALETTE
2715 xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
2716 #endif // CONFIG_PALETTE
2718 #if CONFIG_HIGHBITDEPTH
2719 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2720 #if CONFIG_PVQ
2721 od_encode_checkpoint(&x->daala_enc, &pre_buf);
2722 #endif
2723 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
2724 int64_t this_rd;
2725 int ratey = 0;
2726 int64_t distortion = 0;
2727 int rate = bmode_costs[mode];
2728 int can_skip = 1;
2730 if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
2731 (1 << mode)))
2732 continue;
2734 // Only do the oblique modes if the best so far is
2735 // one of the neighboring directional modes
2736 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
2737 if (conditional_skipintra(mode, *best_mode)) continue;
2740 memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
2741 memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
2743 for (idy = 0; idy < pred_height_in_transform_blocks; ++idy) {
2744 for (idx = 0; idx < pred_width_in_transform_blocks; ++idx) {
2745 const int block_raster_idx = (row + idy) * 2 + (col + idx);
2746 const int block =
2747 av1_raster_order_to_block_index(tx_size, block_raster_idx);
2748 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
2749 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
2750 #if !CONFIG_PVQ
2751 int16_t *const src_diff = av1_raster_block_offset_int16(
2752 BLOCK_8X8, block_raster_idx, p->src_diff);
2753 #endif
2754 int skip;
2755 assert(block < 4);
2756 assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
2757 idx == 0 && idy == 0));
2758 assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
2759 block == 0 || block == 2));
2760 xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
2761 av1_predict_intra_block(
2762 xd, pd->width, pd->height, txsize_to_bsize[tx_size], mode, dst,
2763 dst_stride, dst, dst_stride, col + idx, row + idy, 0);
2764 #if !CONFIG_PVQ
2765 aom_highbd_subtract_block(tx_height, tx_width, src_diff, 8, src,
2766 src_stride, dst, dst_stride, xd->bd);
2767 #endif
2768 if (is_lossless) {
2769 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
2770 const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
2771 const int coeff_ctx =
2772 combine_entropy_contexts(tempa[idx], templ[idy]);
2773 #if !CONFIG_PVQ
2774 av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
2775 tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
2776 ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order,
2777 tempa + idx, templ + idy,
2778 cpi->sf.use_fast_coef_costing);
2779 skip = (p->eobs[block] == 0);
2780 can_skip &= skip;
2781 tempa[idx] = !skip;
2782 templ[idy] = !skip;
2783 #if CONFIG_EXT_TX
2784 if (tx_size == TX_8X4) {
2785 tempa[idx + 1] = tempa[idx];
2786 } else if (tx_size == TX_4X8) {
2787 templ[idy + 1] = templ[idy];
2789 #endif // CONFIG_EXT_TX
2790 #else
2791 (void)scan_order;
2793 av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
2794 tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
2796 ratey += x->rate;
2797 skip = x->pvq_skip[0];
2798 tempa[idx] = !skip;
2799 templ[idy] = !skip;
2800 can_skip &= skip;
2801 #endif
2802 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
2803 goto next_highbd;
2804 #if CONFIG_PVQ
2805 if (!skip)
2806 #endif
2807 av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
2808 DCT_DCT, tx_size, dst, dst_stride,
2809 p->eobs[block]);
2810 } else {
2811 int64_t dist;
2812 unsigned int tmp;
2813 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
2814 const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
2815 const int coeff_ctx =
2816 combine_entropy_contexts(tempa[idx], templ[idy]);
2817 #if !CONFIG_PVQ
2818 av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
2819 tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
2820 av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx);
2821 ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order,
2822 tempa + idx, templ + idy,
2823 cpi->sf.use_fast_coef_costing);
2824 skip = (p->eobs[block] == 0);
2825 can_skip &= skip;
2826 tempa[idx] = !skip;
2827 templ[idy] = !skip;
2828 #if CONFIG_EXT_TX
2829 if (tx_size == TX_8X4) {
2830 tempa[idx + 1] = tempa[idx];
2831 } else if (tx_size == TX_4X8) {
2832 templ[idy + 1] = templ[idy];
2834 #endif // CONFIG_EXT_TX
2835 #else
2836 (void)scan_order;
2838 av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
2839 tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
2840 ratey += x->rate;
2841 skip = x->pvq_skip[0];
2842 tempa[idx] = !skip;
2843 templ[idy] = !skip;
2844 can_skip &= skip;
2845 #endif
2846 #if CONFIG_PVQ
2847 if (!skip)
2848 #endif
2849 av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
2850 tx_type, tx_size, dst, dst_stride,
2851 p->eobs[block]);
2852 cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
2853 dist = (int64_t)tmp << 4;
2854 distortion += dist;
2855 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
2856 goto next_highbd;
2861 rate += ratey;
2862 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
2864 if (this_rd < best_rd) {
2865 *bestrate = rate;
2866 *bestratey = ratey;
2867 *bestdistortion = distortion;
2868 best_rd = this_rd;
2869 best_can_skip = can_skip;
2870 *best_mode = mode;
2871 memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
2872 memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
2873 #if CONFIG_PVQ
2874 od_encode_checkpoint(&x->daala_enc, &post_buf);
2875 #endif
2876 for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
2877 memcpy(best_dst16 + idy * 8,
2878 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
2879 pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
2882 next_highbd : {}
2883 #if CONFIG_PVQ
2884 od_encode_rollback(&x->daala_enc, &pre_buf);
2885 #endif
2888 if (best_rd >= rd_thresh) return best_rd;
2890 #if CONFIG_PVQ
2891 od_encode_rollback(&x->daala_enc, &post_buf);
2892 #endif
2894 if (y_skip) *y_skip &= best_can_skip;
2896 for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
2897 memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
2898 best_dst16 + idy * 8,
2899 pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
2902 return best_rd;
2904 #endif // CONFIG_HIGHBITDEPTH
2906 #if CONFIG_PVQ
2907 od_encode_checkpoint(&x->daala_enc, &pre_buf);
2908 #endif // CONFIG_PVQ
2910 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
2911 int64_t this_rd;
2912 int ratey = 0;
2913 int64_t distortion = 0;
2914 int rate = bmode_costs[mode];
2915 int can_skip = 1;
2917 if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
2918 (1 << mode))) {
2919 continue;
2922 // Only do the oblique modes if the best so far is
2923 // one of the neighboring directional modes
2924 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
2925 if (conditional_skipintra(mode, *best_mode)) continue;
2928 memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
2929 memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
2931 for (idy = 0; idy < pred_height_in_4x4_blocks; idy += tx_height_unit) {
2932 for (idx = 0; idx < pred_width_in_4x4_blocks; idx += tx_width_unit) {
2933 const int block_raster_idx = (row + idy) * 2 + (col + idx);
2934 int block = av1_raster_order_to_block_index(tx_size, block_raster_idx);
2935 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
2936 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
2937 #if !CONFIG_PVQ
2938 int16_t *const src_diff = av1_raster_block_offset_int16(
2939 BLOCK_8X8, block_raster_idx, p->src_diff);
2940 #endif // !CONFIG_PVQ
2941 int skip;
2942 assert(block < 4);
2943 assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
2944 idx == 0 && idy == 0));
2945 assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
2946 block == 0 || block == 2));
2947 xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
2948 av1_predict_intra_block(xd, pd->width, pd->height,
2949 txsize_to_bsize[tx_size], mode, dst, dst_stride,
2950 dst, dst_stride,
2951 #if CONFIG_CB4X4
2952 2 * (col + idx), 2 * (row + idy),
2953 #else
2954 col + idx, row + idy,
2955 #endif // CONFIG_CB4X4
2957 #if !CONFIG_PVQ
2958 aom_subtract_block(tx_height, tx_width, src_diff, 8, src, src_stride,
2959 dst, dst_stride);
2960 #endif // !CONFIG_PVQ
2962 TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
2963 const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
2964 const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]);
2965 #if CONFIG_CB4X4
2966 block = 4 * block;
2967 #endif // CONFIG_CB4X4
2968 #if !CONFIG_PVQ
2969 const AV1_XFORM_QUANT xform_quant =
2970 is_lossless ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP;
2971 av1_xform_quant(cm, x, 0, block,
2972 #if CONFIG_CB4X4
2973 2 * (row + idy), 2 * (col + idx),
2974 #else
2975 row + idy, col + idx,
2976 #endif // CONFIG_CB4X4
2977 BLOCK_8X8, tx_size, coeff_ctx, xform_quant);
2979 av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx);
2981 ratey +=
2982 av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, tempa + idx,
2983 templ + idy, cpi->sf.use_fast_coef_costing);
2984 skip = (p->eobs[block] == 0);
2985 can_skip &= skip;
2986 tempa[idx] = !skip;
2987 templ[idy] = !skip;
2988 #if CONFIG_EXT_TX
2989 if (tx_size == TX_8X4) {
2990 tempa[idx + 1] = tempa[idx];
2991 } else if (tx_size == TX_4X8) {
2992 templ[idy + 1] = templ[idy];
2994 #endif // CONFIG_EXT_TX
2995 #else
2996 (void)scan_order;
2998 av1_xform_quant(cm, x, 0, block,
2999 #if CONFIG_CB4X4
3000 2 * (row + idy), 2 * (col + idx),
3001 #else
3002 row + idy, col + idx,
3003 #endif // CONFIG_CB4X4
3004 BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
3006 ratey += x->rate;
3007 skip = x->pvq_skip[0];
3008 tempa[idx] = !skip;
3009 templ[idy] = !skip;
3010 can_skip &= skip;
3011 #endif // !CONFIG_PVQ
3013 if (!is_lossless) { // To use the pixel domain distortion, we need to
3014 // calculate inverse txfm *before* calculating RD
3015 // cost. Compared to calculating the distortion in
3016 // the frequency domain, the overhead of encoding
3017 // effort is low.
3018 #if CONFIG_PVQ
3019 if (!skip)
3020 #endif // CONFIG_PVQ
3021 av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
3022 tx_type, tx_size, dst, dst_stride,
3023 p->eobs[block]);
3024 unsigned int tmp;
3025 cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
3026 const int64_t dist = (int64_t)tmp << 4;
3027 distortion += dist;
3030 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
3031 goto next;
3033 if (is_lossless) { // Calculate inverse txfm *after* RD cost.
3034 #if CONFIG_PVQ
3035 if (!skip)
3036 #endif // CONFIG_PVQ
3037 av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
3038 DCT_DCT, tx_size, dst, dst_stride,
3039 p->eobs[block]);
3044 rate += ratey;
3045 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
3047 if (this_rd < best_rd) {
3048 *bestrate = rate;
3049 *bestratey = ratey;
3050 *bestdistortion = distortion;
3051 best_rd = this_rd;
3052 best_can_skip = can_skip;
3053 *best_mode = mode;
3054 memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
3055 memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
3056 #if CONFIG_PVQ
3057 od_encode_checkpoint(&x->daala_enc, &post_buf);
3058 #endif // CONFIG_PVQ
3059 for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
3060 memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
3061 pred_width_in_transform_blocks * 4);
3063 next : {}
3064 #if CONFIG_PVQ
3065 od_encode_rollback(&x->daala_enc, &pre_buf);
3066 #endif // CONFIG_PVQ
3067 } // mode decision loop
3069 if (best_rd >= rd_thresh) return best_rd;
3071 #if CONFIG_PVQ
3072 od_encode_rollback(&x->daala_enc, &post_buf);
3073 #endif // CONFIG_PVQ
3075 if (y_skip) *y_skip &= best_can_skip;
3077 for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
3078 memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
3079 pred_width_in_transform_blocks * 4);
3081 return best_rd;
3084 static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
3085 MACROBLOCK *mb, int *rate,
3086 int *rate_y, int64_t *distortion,
3087 int *y_skip, int64_t best_rd) {
3088 const MACROBLOCKD *const xd = &mb->e_mbd;
3089 MODE_INFO *const mic = xd->mi[0];
3090 const MODE_INFO *above_mi = xd->above_mi;
3091 const MODE_INFO *left_mi = xd->left_mi;
3092 MB_MODE_INFO *const mbmi = &mic->mbmi;
3093 assert(!is_inter_block(mbmi));
3094 const BLOCK_SIZE bsize = mbmi->sb_type;
3095 const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
3096 const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
3097 int idx, idy;
3098 int cost = 0;
3099 int64_t total_distortion = 0;
3100 int tot_rate_y = 0;
3101 int64_t total_rd = 0;
3102 const int *bmode_costs = cpi->mbmode_cost[0];
3103 const int is_lossless = xd->lossless[mbmi->segment_id];
3104 #if CONFIG_EXT_TX && CONFIG_RECT_TX
3105 const TX_SIZE tx_size = is_lossless ? TX_4X4 : max_txsize_rect_lookup[bsize];
3106 #else
3107 const TX_SIZE tx_size = TX_4X4;
3108 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
3110 #if CONFIG_EXT_INTRA
3111 #if CONFIG_INTRA_INTERP
3112 mbmi->intra_filter = INTRA_FILTER_LINEAR;
3113 #endif // CONFIG_INTRA_INTERP
3114 #endif // CONFIG_EXT_INTRA
3115 #if CONFIG_FILTER_INTRA
3116 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
3117 #endif // CONFIG_FILTER_INTRA
3119 // TODO(any): Add search of the tx_type to improve rd performance at the
3120 // expense of speed.
3121 mbmi->tx_type = DCT_DCT;
3122 mbmi->tx_size = tx_size;
3124 if (y_skip) *y_skip = 1;
3126 // Pick modes for each prediction sub-block (of size 4x4, 4x8, or 8x4) in this
3127 // 8x8 coding block.
3128 for (idy = 0; idy < 2; idy += pred_height_in_4x4_blocks) {
3129 for (idx = 0; idx < 2; idx += pred_width_in_4x4_blocks) {
3130 PREDICTION_MODE best_mode = DC_PRED;
3131 int r = INT_MAX, ry = INT_MAX;
3132 int64_t d = INT64_MAX, this_rd = INT64_MAX;
3133 int j;
3134 const int pred_block_idx = idy * 2 + idx;
3135 if (cpi->common.frame_type == KEY_FRAME) {
3136 const PREDICTION_MODE A =
3137 av1_above_block_mode(mic, above_mi, pred_block_idx);
3138 const PREDICTION_MODE L =
3139 av1_left_block_mode(mic, left_mi, pred_block_idx);
3141 bmode_costs = cpi->y_mode_costs[A][L];
3143 this_rd = rd_pick_intra_sub_8x8_y_subblock_mode(
3144 cpi, mb, idy, idx, &best_mode, bmode_costs,
3145 xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
3146 &ry, &d, bsize, tx_size, y_skip, best_rd - total_rd);
3147 #if !CONFIG_DAALA_DIST
3148 if (this_rd >= best_rd - total_rd) return INT64_MAX;
3149 #endif // !CONFIG_DAALA_DIST
3150 total_rd += this_rd;
3151 cost += r;
3152 total_distortion += d;
3153 tot_rate_y += ry;
3155 mic->bmi[pred_block_idx].as_mode = best_mode;
3156 for (j = 1; j < pred_height_in_4x4_blocks; ++j)
3157 mic->bmi[pred_block_idx + j * 2].as_mode = best_mode;
3158 for (j = 1; j < pred_width_in_4x4_blocks; ++j)
3159 mic->bmi[pred_block_idx + j].as_mode = best_mode;
3161 if (total_rd >= best_rd) return INT64_MAX;
3164 mbmi->mode = mic->bmi[3].as_mode;
3166 #if CONFIG_DAALA_DIST
3168 const struct macroblock_plane *p = &mb->plane[0];
3169 const struct macroblockd_plane *pd = &xd->plane[0];
3170 const int src_stride = p->src.stride;
3171 const int dst_stride = pd->dst.stride;
3172 uint8_t *src = p->src.buf;
3173 uint8_t *dst = pd->dst.buf;
3174 int use_activity_masking = 0;
3175 int qm = OD_HVS_QM;
3177 #if CONFIG_PVQ
3178 use_activity_masking = mb->daala_enc.use_activity_masking;
3179 #endif // CONFIG_PVQ
3180 // Daala-defined distortion computed for the block of 8x8 pixels
3181 total_distortion = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8,
3182 qm, use_activity_masking, mb->qindex)
3183 << 4;
3185 #endif // CONFIG_DAALA_DIST
3186 // Add in the cost of the transform type
3187 if (!is_lossless) {
3188 int rate_tx_type = 0;
3189 #if CONFIG_EXT_TX
3190 if (get_ext_tx_types(tx_size, bsize, 0, cpi->common.reduced_tx_set_used) >
3191 1) {
3192 const int eset =
3193 get_ext_tx_set(tx_size, bsize, 0, cpi->common.reduced_tx_set_used);
3194 rate_tx_type = cpi->intra_tx_type_costs[eset][txsize_sqr_map[tx_size]]
3195 [mbmi->mode][mbmi->tx_type];
3197 #else
3198 rate_tx_type =
3199 cpi->intra_tx_type_costs[txsize_sqr_map[tx_size]]
3200 [intra_mode_to_tx_type_context[mbmi->mode]]
3201 [mbmi->tx_type];
3202 #endif // CONFIG_EXT_TX
3203 assert(mbmi->tx_size == tx_size);
3204 cost += rate_tx_type;
3205 tot_rate_y += rate_tx_type;
3208 *rate = cost;
3209 *rate_y = tot_rate_y;
3210 *distortion = total_distortion;
3212 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
3215 #if CONFIG_FILTER_INTRA
3216 // Return 1 if an filter intra mode is selected; return 0 otherwise.
3217 static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
3218 int *rate, int *rate_tokenonly,
3219 int64_t *distortion, int *skippable,
3220 BLOCK_SIZE bsize, int mode_cost,
3221 int64_t *best_rd, int64_t *best_model_rd,
3222 uint16_t skip_mask) {
3223 MACROBLOCKD *const xd = &x->e_mbd;
3224 MODE_INFO *const mic = xd->mi[0];
3225 MB_MODE_INFO *mbmi = &mic->mbmi;
3226 int filter_intra_selected_flag = 0;
3227 FILTER_INTRA_MODE mode;
3228 TX_SIZE best_tx_size = TX_4X4;
3229 FILTER_INTRA_MODE_INFO filter_intra_mode_info;
3230 TX_TYPE best_tx_type;
3232 av1_zero(filter_intra_mode_info);
3233 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
3234 mbmi->mode = DC_PRED;
3235 #if CONFIG_PALETTE
3236 mbmi->palette_mode_info.palette_size[0] = 0;
3237 #endif // CONFIG_PALETTE
3239 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
3240 int this_rate;
3241 int64_t this_rd, this_model_rd;
3242 RD_STATS tokenonly_rd_stats;
3243 if (skip_mask & (1 << mode)) continue;
3244 mbmi->filter_intra_mode_info.filter_intra_mode[0] = mode;
3245 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3246 if (*best_model_rd != INT64_MAX &&
3247 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3248 continue;
3249 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
3250 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
3251 if (tokenonly_rd_stats.rate == INT_MAX) continue;
3252 this_rate = tokenonly_rd_stats.rate +
3253 av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 1) +
3254 write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
3255 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
3257 if (this_rd < *best_rd) {
3258 *best_rd = this_rd;
3259 best_tx_size = mic->mbmi.tx_size;
3260 filter_intra_mode_info = mbmi->filter_intra_mode_info;
3261 best_tx_type = mic->mbmi.tx_type;
3262 *rate = this_rate;
3263 *rate_tokenonly = tokenonly_rd_stats.rate;
3264 *distortion = tokenonly_rd_stats.dist;
3265 *skippable = tokenonly_rd_stats.skip;
3266 filter_intra_selected_flag = 1;
3270 if (filter_intra_selected_flag) {
3271 mbmi->mode = DC_PRED;
3272 mbmi->tx_size = best_tx_size;
3273 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] =
3274 filter_intra_mode_info.use_filter_intra_mode[0];
3275 mbmi->filter_intra_mode_info.filter_intra_mode[0] =
3276 filter_intra_mode_info.filter_intra_mode[0];
3277 mbmi->tx_type = best_tx_type;
3278 return 1;
3279 } else {
3280 return 0;
3283 #endif // CONFIG_FILTER_INTRA
3285 #if CONFIG_EXT_INTRA
3286 // Run RD calculation with given luma intra prediction angle., and return
3287 // the RD cost. Update the best mode info. if the RD cost is the best so far.
3288 static int64_t calc_rd_given_intra_angle(
3289 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mode_cost,
3290 int64_t best_rd_in, int8_t angle_delta, int max_angle_delta, int *rate,
3291 RD_STATS *rd_stats, int *best_angle_delta, TX_SIZE *best_tx_size,
3292 TX_TYPE *best_tx_type,
3293 #if CONFIG_INTRA_INTERP
3294 INTRA_FILTER *best_filter,
3295 #endif // CONFIG_INTRA_INTERP
3296 int64_t *best_rd, int64_t *best_model_rd) {
3297 int this_rate;
3298 RD_STATS tokenonly_rd_stats;
3299 int64_t this_rd, this_model_rd;
3300 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
3301 assert(!is_inter_block(mbmi));
3303 mbmi->angle_delta[0] = angle_delta;
3304 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3305 if (*best_model_rd != INT64_MAX &&
3306 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3307 return INT64_MAX;
3308 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
3309 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in);
3310 if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
3312 this_rate = tokenonly_rd_stats.rate + mode_cost +
3313 write_uniform_cost(2 * max_angle_delta + 1,
3314 mbmi->angle_delta[0] + max_angle_delta);
3315 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
3317 if (this_rd < *best_rd) {
3318 *best_rd = this_rd;
3319 *best_angle_delta = mbmi->angle_delta[0];
3320 *best_tx_size = mbmi->tx_size;
3321 #if CONFIG_INTRA_INTERP
3322 *best_filter = mbmi->intra_filter;
3323 #endif // CONFIG_INTRA_INTERP
3324 *best_tx_type = mbmi->tx_type;
3325 *rate = this_rate;
3326 rd_stats->rate = tokenonly_rd_stats.rate;
3327 rd_stats->dist = tokenonly_rd_stats.dist;
3328 rd_stats->skip = tokenonly_rd_stats.skip;
3330 return this_rd;
3333 // With given luma directional intra prediction mode, pick the best angle delta
3334 // Return the RD cost corresponding to the best angle delta.
3335 static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
3336 int *rate, RD_STATS *rd_stats,
3337 BLOCK_SIZE bsize, int mode_cost,
3338 int64_t best_rd,
3339 int64_t *best_model_rd) {
3340 MACROBLOCKD *const xd = &x->e_mbd;
3341 MODE_INFO *const mic = xd->mi[0];
3342 MB_MODE_INFO *mbmi = &mic->mbmi;
3343 assert(!is_inter_block(mbmi));
3344 int i, angle_delta, best_angle_delta = 0;
3345 int first_try = 1;
3346 #if CONFIG_INTRA_INTERP
3347 int p_angle;
3348 const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
3349 INTRA_FILTER filter, best_filter = INTRA_FILTER_LINEAR;
3350 #endif // CONFIG_INTRA_INTERP
3351 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
3352 TX_SIZE best_tx_size = mic->mbmi.tx_size;
3353 TX_TYPE best_tx_type = mbmi->tx_type;
3355 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
3357 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
3358 #if CONFIG_INTRA_INTERP
3359 for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
3360 if (FILTER_FAST_SEARCH && filter != INTRA_FILTER_LINEAR) continue;
3361 mic->mbmi.intra_filter = filter;
3362 #endif // CONFIG_INTRA_INTERP
3363 for (i = 0; i < 2; ++i) {
3364 best_rd_in = (best_rd == INT64_MAX)
3365 ? INT64_MAX
3366 : (best_rd + (best_rd >> (first_try ? 3 : 5)));
3367 this_rd = calc_rd_given_intra_angle(
3368 cpi, x, bsize,
3369 #if CONFIG_INTRA_INTERP
3370 mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
3371 #else
3372 mode_cost,
3373 #endif // CONFIG_INTRA_INTERP
3374 best_rd_in, (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate,
3375 rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
3376 #if CONFIG_INTRA_INTERP
3377 &best_filter,
3378 #endif // CONFIG_INTRA_INTERP
3379 &best_rd, best_model_rd);
3380 rd_cost[2 * angle_delta + i] = this_rd;
3381 if (first_try && this_rd == INT64_MAX) return best_rd;
3382 first_try = 0;
3383 if (angle_delta == 0) {
3384 rd_cost[1] = this_rd;
3385 break;
3388 #if CONFIG_INTRA_INTERP
3390 #endif // CONFIG_INTRA_INTERP
3393 assert(best_rd != INT64_MAX);
3394 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
3395 int64_t rd_thresh;
3396 #if CONFIG_INTRA_INTERP
3397 for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
3398 if (FILTER_FAST_SEARCH && filter != INTRA_FILTER_LINEAR) continue;
3399 mic->mbmi.intra_filter = filter;
3400 #endif // CONFIG_INTRA_INTERP
3401 for (i = 0; i < 2; ++i) {
3402 int skip_search = 0;
3403 rd_thresh = best_rd + (best_rd >> 5);
3404 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
3405 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
3406 skip_search = 1;
3407 if (!skip_search) {
3408 calc_rd_given_intra_angle(
3409 cpi, x, bsize,
3410 #if CONFIG_INTRA_INTERP
3411 mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
3412 #else
3413 mode_cost,
3414 #endif // CONFIG_INTRA_INTERP
3415 best_rd, (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate,
3416 rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
3417 #if CONFIG_INTRA_INTERP
3418 &best_filter,
3419 #endif // CONFIG_INTRA_INTERP
3420 &best_rd, best_model_rd);
3423 #if CONFIG_INTRA_INTERP
3425 #endif // CONFIG_INTRA_INTERP
3428 #if CONFIG_INTRA_INTERP
3429 if (FILTER_FAST_SEARCH && rd_stats->rate < INT_MAX) {
3430 p_angle = mode_to_angle_map[mbmi->mode] + best_angle_delta * ANGLE_STEP;
3431 if (av1_is_intra_filter_switchable(p_angle)) {
3432 for (filter = INTRA_FILTER_LINEAR + 1; filter < INTRA_FILTERS; ++filter) {
3433 mic->mbmi.intra_filter = filter;
3434 this_rd = calc_rd_given_intra_angle(
3435 cpi, x, bsize,
3436 mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
3437 best_rd, best_angle_delta, MAX_ANGLE_DELTA, rate, rd_stats,
3438 &best_angle_delta, &best_tx_size, &best_tx_type, &best_filter,
3439 &best_rd, best_model_rd);
3443 #endif // CONFIG_INTRA_INTERP
3445 mbmi->tx_size = best_tx_size;
3446 mbmi->angle_delta[0] = best_angle_delta;
3447 #if CONFIG_INTRA_INTERP
3448 mic->mbmi.intra_filter = best_filter;
3449 #endif // CONFIG_INTRA_INTERP
3450 mbmi->tx_type = best_tx_type;
3451 return best_rd;
3454 // Indices are sign, integer, and fractional part of the gradient value
3455 static const uint8_t gradient_to_angle_bin[2][7][16] = {
3457 { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 },
3458 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
3459 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3460 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3461 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3462 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3463 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3466 { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 },
3467 { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 },
3468 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3469 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3470 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3471 { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3472 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3476 /* clang-format off */
3477 static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
3478 0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
3479 #if CONFIG_ALT_INTRA
3481 #endif // CONFIG_ALT_INTRA
3483 /* clang-format on */
3485 static void angle_estimation(const uint8_t *src, int src_stride, int rows,
3486 int cols, BLOCK_SIZE bsize,
3487 uint8_t *directional_mode_skip_mask) {
3488 memset(directional_mode_skip_mask, 0,
3489 INTRA_MODES * sizeof(*directional_mode_skip_mask));
3490 // Sub-8x8 blocks do not use extra directions.
3491 if (bsize < BLOCK_8X8) return;
3492 uint64_t hist[DIRECTIONAL_MODES];
3493 memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
3494 src += src_stride;
3495 int r, c, dx, dy;
3496 for (r = 1; r < rows; ++r) {
3497 for (c = 1; c < cols; ++c) {
3498 dx = src[c] - src[c - 1];
3499 dy = src[c] - src[c - src_stride];
3500 int index;
3501 const int temp = dx * dx + dy * dy;
3502 if (dy == 0) {
3503 index = 2;
3504 } else {
3505 const int sn = (dx > 0) ^ (dy > 0);
3506 dx = abs(dx);
3507 dy = abs(dy);
3508 const int remd = (dx % dy) * 16 / dy;
3509 const int quot = dx / dy;
3510 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
3512 hist[index] += temp;
3514 src += src_stride;
3517 int i;
3518 uint64_t hist_sum = 0;
3519 for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
3520 for (i = 0; i < INTRA_MODES; ++i) {
3521 if (av1_is_directional_mode(i, bsize)) {
3522 const uint8_t angle_bin = mode_to_angle_bin[i];
3523 uint64_t score = 2 * hist[angle_bin];
3524 int weight = 2;
3525 if (angle_bin > 0) {
3526 score += hist[angle_bin - 1];
3527 ++weight;
3529 if (angle_bin < DIRECTIONAL_MODES - 1) {
3530 score += hist[angle_bin + 1];
3531 ++weight;
3533 if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
3534 directional_mode_skip_mask[i] = 1;
3539 #if CONFIG_HIGHBITDEPTH
3540 static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
3541 int rows, int cols, BLOCK_SIZE bsize,
3542 uint8_t *directional_mode_skip_mask) {
3543 memset(directional_mode_skip_mask, 0,
3544 INTRA_MODES * sizeof(*directional_mode_skip_mask));
3545 // Sub-8x8 blocks do not use extra directions.
3546 if (bsize < BLOCK_8X8) return;
3547 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
3548 uint64_t hist[DIRECTIONAL_MODES];
3549 memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
3550 src += src_stride;
3551 int r, c, dx, dy;
3552 for (r = 1; r < rows; ++r) {
3553 for (c = 1; c < cols; ++c) {
3554 dx = src[c] - src[c - 1];
3555 dy = src[c] - src[c - src_stride];
3556 int index;
3557 const int temp = dx * dx + dy * dy;
3558 if (dy == 0) {
3559 index = 2;
3560 } else {
3561 const int sn = (dx > 0) ^ (dy > 0);
3562 dx = abs(dx);
3563 dy = abs(dy);
3564 const int remd = (dx % dy) * 16 / dy;
3565 const int quot = dx / dy;
3566 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
3568 hist[index] += temp;
3570 src += src_stride;
3573 int i;
3574 uint64_t hist_sum = 0;
3575 for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
3576 for (i = 0; i < INTRA_MODES; ++i) {
3577 if (av1_is_directional_mode(i, bsize)) {
3578 const uint8_t angle_bin = mode_to_angle_bin[i];
3579 uint64_t score = 2 * hist[angle_bin];
3580 int weight = 2;
3581 if (angle_bin > 0) {
3582 score += hist[angle_bin - 1];
3583 ++weight;
3585 if (angle_bin < DIRECTIONAL_MODES - 1) {
3586 score += hist[angle_bin + 1];
3587 ++weight;
3589 if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
3590 directional_mode_skip_mask[i] = 1;
3594 #endif // CONFIG_HIGHBITDEPTH
3595 #endif // CONFIG_EXT_INTRA
3597 // This function is used only for intra_only frames
3598 static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
3599 int *rate, int *rate_tokenonly,
3600 int64_t *distortion, int *skippable,
3601 BLOCK_SIZE bsize, int64_t best_rd) {
3602 uint8_t mode_idx;
3603 MACROBLOCKD *const xd = &x->e_mbd;
3604 MODE_INFO *const mic = xd->mi[0];
3605 MB_MODE_INFO *const mbmi = &mic->mbmi;
3606 assert(!is_inter_block(mbmi));
3607 MB_MODE_INFO best_mbmi = *mbmi;
3608 int64_t best_model_rd = INT64_MAX;
3609 #if CONFIG_EXT_INTRA
3610 const int rows = block_size_high[bsize];
3611 const int cols = block_size_wide[bsize];
3612 #if CONFIG_INTRA_INTERP
3613 const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
3614 #endif // CONFIG_INTRA_INTERP
3615 int is_directional_mode;
3616 uint8_t directional_mode_skip_mask[INTRA_MODES];
3617 const int src_stride = x->plane[0].src.stride;
3618 const uint8_t *src = x->plane[0].src.buf;
3619 #endif // CONFIG_EXT_INTRA
3620 #if CONFIG_FILTER_INTRA
3621 int beat_best_rd = 0;
3622 uint16_t filter_intra_mode_skip_mask = (1 << FILTER_INTRA_MODES) - 1;
3623 #endif // CONFIG_FILTER_INTRA
3624 const int *bmode_costs;
3625 #if CONFIG_PALETTE
3626 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3627 uint8_t *best_palette_color_map =
3628 cpi->common.allow_screen_content_tools
3629 ? x->palette_buffer->best_palette_color_map
3630 : NULL;
3631 int palette_y_mode_ctx = 0;
3632 const int try_palette =
3633 cpi->common.allow_screen_content_tools && bsize >= BLOCK_8X8;
3634 #endif // CONFIG_PALETTE
3635 const MODE_INFO *above_mi = xd->above_mi;
3636 const MODE_INFO *left_mi = xd->left_mi;
3637 const PREDICTION_MODE A = av1_above_block_mode(mic, above_mi, 0);
3638 const PREDICTION_MODE L = av1_left_block_mode(mic, left_mi, 0);
3639 const PREDICTION_MODE FINAL_MODE_SEARCH = TM_PRED + 1;
3640 #if CONFIG_PVQ
3641 od_rollback_buffer pre_buf, post_buf;
3643 od_encode_checkpoint(&x->daala_enc, &pre_buf);
3644 od_encode_checkpoint(&x->daala_enc, &post_buf);
3645 #endif // CONFIG_PVQ
3646 bmode_costs = cpi->y_mode_costs[A][L];
3648 #if CONFIG_EXT_INTRA
3649 mbmi->angle_delta[0] = 0;
3650 #if CONFIG_HIGHBITDEPTH
3651 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3652 highbd_angle_estimation(src, src_stride, rows, cols, bsize,
3653 directional_mode_skip_mask);
3654 else
3655 #endif // CONFIG_HIGHBITDEPTH
3656 angle_estimation(src, src_stride, rows, cols, bsize,
3657 directional_mode_skip_mask);
3658 #endif // CONFIG_EXT_INTRA
3659 #if CONFIG_FILTER_INTRA
3660 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
3661 #endif // CONFIG_FILTER_INTRA
3662 #if CONFIG_PALETTE
3663 pmi->palette_size[0] = 0;
3664 if (above_mi)
3665 palette_y_mode_ctx +=
3666 (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
3667 if (left_mi)
3668 palette_y_mode_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
3669 #endif // CONFIG_PALETTE
3671 if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
3672 x->use_default_intra_tx_type = 1;
3673 else
3674 x->use_default_intra_tx_type = 0;
3676 /* Y Search for intra prediction mode */
3677 for (mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
3678 RD_STATS this_rd_stats;
3679 int this_rate, this_rate_tokenonly, s;
3680 int64_t this_distortion, this_rd, this_model_rd;
3681 if (mode_idx == FINAL_MODE_SEARCH) {
3682 if (x->use_default_intra_tx_type == 0) break;
3683 mbmi->mode = best_mbmi.mode;
3684 x->use_default_intra_tx_type = 0;
3685 } else {
3686 mbmi->mode = mode_idx;
3688 #if CONFIG_PVQ
3689 od_encode_rollback(&x->daala_enc, &pre_buf);
3690 #endif // CONFIG_PVQ
3691 #if CONFIG_EXT_INTRA
3692 mbmi->angle_delta[0] = 0;
3693 #endif // CONFIG_EXT_INTRA
3694 this_model_rd = intra_model_yrd(cpi, x, bsize, bmode_costs[mbmi->mode]);
3695 if (best_model_rd != INT64_MAX &&
3696 this_model_rd > best_model_rd + (best_model_rd >> 1))
3697 continue;
3698 if (this_model_rd < best_model_rd) best_model_rd = this_model_rd;
3699 #if CONFIG_EXT_INTRA
3700 is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
3701 if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
3702 if (is_directional_mode) {
3703 this_rd_stats.rate = INT_MAX;
3704 rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
3705 bmode_costs[mbmi->mode], best_rd, &best_model_rd);
3706 } else {
3707 super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
3709 #else
3710 super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
3711 #endif // CONFIG_EXT_INTRA
3712 this_rate_tokenonly = this_rd_stats.rate;
3713 this_distortion = this_rd_stats.dist;
3714 s = this_rd_stats.skip;
3716 if (this_rate_tokenonly == INT_MAX) continue;
3718 this_rate = this_rate_tokenonly + bmode_costs[mbmi->mode];
3720 if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
3721 // super_block_yrd above includes the cost of the tx_size in the
3722 // tokenonly rate, but for intra blocks, tx_size is always coded
3723 // (prediction granularity), so we account for it in the full rate,
3724 // not the tokenonly rate.
3725 this_rate_tokenonly -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
3727 #if CONFIG_PALETTE
3728 if (try_palette && mbmi->mode == DC_PRED) {
3729 this_rate +=
3730 av1_cost_bit(av1_default_palette_y_mode_prob[bsize - BLOCK_8X8]
3731 [palette_y_mode_ctx],
3734 #endif // CONFIG_PALETTE
3735 #if CONFIG_FILTER_INTRA
3736 if (mbmi->mode == DC_PRED)
3737 this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 0);
3738 #endif // CONFIG_FILTER_INTRA
3739 #if CONFIG_EXT_INTRA
3740 if (is_directional_mode) {
3741 #if CONFIG_INTRA_INTERP
3742 const int p_angle =
3743 mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
3744 if (av1_is_intra_filter_switchable(p_angle))
3745 this_rate +=
3746 cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
3747 #endif // CONFIG_INTRA_INTERP
3748 this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
3749 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
3751 #endif // CONFIG_EXT_INTRA
3752 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
3753 #if CONFIG_FILTER_INTRA
3754 if (best_rd == INT64_MAX || this_rd - best_rd < (best_rd >> 4)) {
3755 filter_intra_mode_skip_mask ^= (1 << mbmi->mode);
3757 #endif // CONFIG_FILTER_INTRA
3759 if (this_rd < best_rd) {
3760 best_mbmi = *mbmi;
3761 best_rd = this_rd;
3762 #if CONFIG_FILTER_INTRA
3763 beat_best_rd = 1;
3764 #endif // CONFIG_FILTER_INTRA
3765 *rate = this_rate;
3766 *rate_tokenonly = this_rate_tokenonly;
3767 *distortion = this_distortion;
3768 *skippable = s;
3769 #if CONFIG_PVQ
3770 od_encode_checkpoint(&x->daala_enc, &post_buf);
3771 #endif // CONFIG_PVQ
3775 #if CONFIG_PVQ
3776 od_encode_rollback(&x->daala_enc, &post_buf);
3777 #endif // CONFIG_PVQ
3779 #if CONFIG_CFL
3780 // Perform one extra txfm_rd_in_plane() call, this time with the best value so
3781 // we can store reconstructed luma values
3782 RD_STATS this_rd_stats;
3783 x->cfl_store_y = 1;
3784 txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, 0, bsize,
3785 mic->mbmi.tx_size, cpi->sf.use_fast_coef_costing);
3786 x->cfl_store_y = 0;
3787 #endif
3789 #if CONFIG_PALETTE
3790 if (try_palette) {
3791 rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx,
3792 bmode_costs[DC_PRED], &best_mbmi,
3793 best_palette_color_map, &best_rd, &best_model_rd,
3794 rate, rate_tokenonly, distortion, skippable);
3796 #endif // CONFIG_PALETTE
3798 #if CONFIG_FILTER_INTRA
3799 if (beat_best_rd) {
3800 if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
3801 skippable, bsize, bmode_costs[DC_PRED],
3802 &best_rd, &best_model_rd,
3803 filter_intra_mode_skip_mask)) {
3804 best_mbmi = *mbmi;
3807 #endif // CONFIG_FILTER_INTRA
3809 *mbmi = best_mbmi;
3810 return best_rd;
3813 // Return value 0: early termination triggered, no valid rd cost available;
3814 // 1: rd cost values are valid.
3815 static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
3816 RD_STATS *rd_stats, BLOCK_SIZE bsize,
3817 int64_t ref_best_rd) {
3818 MACROBLOCKD *const xd = &x->e_mbd;
3819 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3820 const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
3821 int plane;
3822 int is_cost_valid = 1;
3823 av1_init_rd_stats(rd_stats);
3825 if (ref_best_rd < 0) is_cost_valid = 0;
3827 #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
3828 if (x->skip_chroma_rd) return is_cost_valid;
3830 bsize = scale_chroma_bsize(bsize, xd->plane[1].subsampling_x,
3831 xd->plane[1].subsampling_y);
3832 #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
3834 #if !CONFIG_PVQ
3835 if (is_inter_block(mbmi) && is_cost_valid) {
3836 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
3837 av1_subtract_plane(x, bsize, plane);
3839 #endif // !CONFIG_PVQ
3841 if (is_cost_valid) {
3842 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
3843 RD_STATS pn_rd_stats;
3844 txfm_rd_in_plane(x, cpi, &pn_rd_stats, ref_best_rd, plane, bsize,
3845 uv_tx_size, cpi->sf.use_fast_coef_costing);
3846 if (pn_rd_stats.rate == INT_MAX) {
3847 is_cost_valid = 0;
3848 break;
3850 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
3851 if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >
3852 ref_best_rd &&
3853 RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse) > ref_best_rd) {
3854 is_cost_valid = 0;
3855 break;
3860 if (!is_cost_valid) {
3861 // reset cost value
3862 av1_invalid_rd_stats(rd_stats);
3865 return is_cost_valid;
3868 #if CONFIG_VAR_TX
3869 // FIXME crop these calls
3870 static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
3871 TX_SIZE tx_size) {
3872 return aom_sum_squares_2d_i16(diff, diff_stride, tx_size_wide[tx_size],
3873 tx_size_high[tx_size]);
3876 void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
3877 int blk_row, int blk_col, int plane, int block,
3878 int plane_bsize, const ENTROPY_CONTEXT *a,
3879 const ENTROPY_CONTEXT *l, RD_STATS *rd_stats) {
3880 const AV1_COMMON *const cm = &cpi->common;
3881 MACROBLOCKD *xd = &x->e_mbd;
3882 const struct macroblock_plane *const p = &x->plane[plane];
3883 struct macroblockd_plane *const pd = &xd->plane[plane];
3884 int64_t tmp;
3885 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
3886 PLANE_TYPE plane_type = get_plane_type(plane);
3887 TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
3888 const SCAN_ORDER *const scan_order =
3889 get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
3890 BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
3891 int bh = block_size_high[txm_bsize];
3892 int bw = block_size_wide[txm_bsize];
3893 int txb_h = tx_size_high_unit[tx_size];
3894 int txb_w = tx_size_wide_unit[tx_size];
3896 int src_stride = p->src.stride;
3897 uint8_t *src =
3898 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
3899 uint8_t *dst =
3900 &pd->dst
3901 .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
3902 #if CONFIG_HIGHBITDEPTH
3903 DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
3904 uint8_t *rec_buffer;
3905 #else
3906 DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]);
3907 #endif // CONFIG_HIGHBITDEPTH
3908 int max_blocks_high = block_size_high[plane_bsize];
3909 int max_blocks_wide = block_size_wide[plane_bsize];
3910 const int diff_stride = max_blocks_wide;
3911 const int16_t *diff =
3912 &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
3913 int txb_coeff_cost;
3915 assert(tx_size < TX_SIZES_ALL);
3917 if (xd->mb_to_bottom_edge < 0)
3918 max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
3919 if (xd->mb_to_right_edge < 0)
3920 max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
3922 max_blocks_high >>= tx_size_wide_log2[0];
3923 max_blocks_wide >>= tx_size_wide_log2[0];
3925 int coeff_ctx = get_entropy_context(tx_size, a, l);
3927 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
3928 coeff_ctx, AV1_XFORM_QUANT_FP);
3930 av1_optimize_b(cm, x, plane, block, tx_size, coeff_ctx);
3932 // TODO(any): Use av1_dist_block to compute distortion
3933 #if CONFIG_HIGHBITDEPTH
3934 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3935 rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16);
3936 aom_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL,
3937 0, NULL, 0, bw, bh, xd->bd);
3938 } else {
3939 rec_buffer = (uint8_t *)rec_buffer16;
3940 aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0,
3941 NULL, 0, bw, bh);
3943 #else
3944 aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0, NULL,
3945 0, bw, bh);
3946 #endif // CONFIG_HIGHBITDEPTH
3948 if (blk_row + txb_h > max_blocks_high || blk_col + txb_w > max_blocks_wide) {
3949 int idx, idy;
3950 int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
3951 int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
3952 tmp = 0;
3953 for (idy = 0; idy < blocks_height; ++idy) {
3954 for (idx = 0; idx < blocks_width; ++idx) {
3955 const int16_t *d =
3956 diff + ((idy * diff_stride + idx) << tx_size_wide_log2[0]);
3957 tmp += sum_squares_2d(d, diff_stride, 0);
3960 } else {
3961 tmp = sum_squares_2d(diff, diff_stride, tx_size);
3964 #if CONFIG_HIGHBITDEPTH
3965 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3966 tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
3967 #endif // CONFIG_HIGHBITDEPTH
3968 rd_stats->sse += tmp * 16;
3969 const int eob = p->eobs[block];
3971 av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, rec_buffer,
3972 MAX_TX_SIZE, eob);
3973 if (eob > 0) {
3974 if (txb_w + blk_col > max_blocks_wide ||
3975 txb_h + blk_row > max_blocks_high) {
3976 int idx, idy;
3977 unsigned int this_dist;
3978 int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
3979 int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
3980 tmp = 0;
3981 for (idy = 0; idy < blocks_height; ++idy) {
3982 for (idx = 0; idx < blocks_width; ++idx) {
3983 uint8_t *const s =
3984 src + ((idy * src_stride + idx) << tx_size_wide_log2[0]);
3985 uint8_t *const r =
3986 rec_buffer + ((idy * MAX_TX_SIZE + idx) << tx_size_wide_log2[0]);
3987 cpi->fn_ptr[0].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
3988 tmp += this_dist;
3991 } else {
3992 uint32_t this_dist;
3993 cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE,
3994 &this_dist);
3995 tmp = this_dist;
3998 rd_stats->dist += tmp * 16;
3999 txb_coeff_cost =
4000 av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l, 0);
4001 rd_stats->rate += txb_coeff_cost;
4002 rd_stats->skip &= (eob == 0);
4004 #if CONFIG_RD_DEBUG
4005 av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
4006 txb_coeff_cost);
4007 #endif // CONFIG_RD_DEBUG
4010 static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
4011 int blk_col, int plane, int block, int block32,
4012 TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize,
4013 ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
4014 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
4015 RD_STATS *rd_stats, int64_t ref_best_rd,
4016 int *is_cost_valid, RD_STATS *rd_stats_stack) {
4017 MACROBLOCKD *const xd = &x->e_mbd;
4018 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4019 struct macroblock_plane *const p = &x->plane[plane];
4020 struct macroblockd_plane *const pd = &xd->plane[plane];
4021 const int tx_row = blk_row >> (1 - pd->subsampling_y);
4022 const int tx_col = blk_col >> (1 - pd->subsampling_x);
4023 TX_SIZE(*const inter_tx_size)
4024 [MAX_MIB_SIZE] =
4025 (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
4026 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
4027 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
4028 const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4029 int64_t this_rd = INT64_MAX;
4030 ENTROPY_CONTEXT *pta = ta + blk_col;
4031 ENTROPY_CONTEXT *ptl = tl + blk_row;
4032 int coeff_ctx, i;
4033 int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
4034 mbmi->sb_type, tx_size);
4035 int64_t sum_rd = INT64_MAX;
4036 int tmp_eob = 0;
4037 int zero_blk_rate;
4038 RD_STATS sum_rd_stats;
4039 const int tx_size_ctx = txsize_sqr_map[tx_size];
4041 av1_init_rd_stats(&sum_rd_stats);
4043 assert(tx_size < TX_SIZES_ALL);
4045 if (ref_best_rd < 0) {
4046 *is_cost_valid = 0;
4047 return;
4050 coeff_ctx = get_entropy_context(tx_size, pta, ptl);
4052 av1_init_rd_stats(rd_stats);
4054 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
4056 zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
4057 [coeff_ctx][EOB_TOKEN];
4059 if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
4060 inter_tx_size[0][0] = tx_size;
4062 if (tx_size == TX_32X32 && mbmi->tx_type != DCT_DCT &&
4063 rd_stats_stack[block32].rate != INT_MAX) {
4064 *rd_stats = rd_stats_stack[block32];
4065 p->eobs[block] = !rd_stats->skip;
4066 x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
4067 } else {
4068 av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
4069 plane_bsize, pta, ptl, rd_stats);
4070 if (tx_size == TX_32X32) {
4071 rd_stats_stack[block32] = *rd_stats;
4075 if ((RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >=
4076 RDCOST(x->rdmult, x->rddiv, zero_blk_rate, rd_stats->sse) ||
4077 rd_stats->skip == 1) &&
4078 !xd->lossless[mbmi->segment_id]) {
4079 #if CONFIG_RD_DEBUG
4080 av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
4081 zero_blk_rate - rd_stats->rate);
4082 #endif // CONFIG_RD_DEBUG
4083 rd_stats->rate = zero_blk_rate;
4084 rd_stats->dist = rd_stats->sse;
4085 rd_stats->skip = 1;
4086 x->blk_skip[plane][blk_row * bw + blk_col] = 1;
4087 p->eobs[block] = 0;
4088 } else {
4089 x->blk_skip[plane][blk_row * bw + blk_col] = 0;
4090 rd_stats->skip = 0;
4093 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
4094 rd_stats->rate +=
4095 av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
4096 this_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
4097 tmp_eob = p->eobs[block];
4100 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
4101 const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
4102 const int bsl = tx_size_wide_unit[sub_txs];
4103 int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
4104 RD_STATS this_rd_stats;
4105 int this_cost_valid = 1;
4106 int64_t tmp_rd = 0;
4108 sum_rd_stats.rate =
4109 av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
4111 assert(tx_size < TX_SIZES_ALL);
4113 for (i = 0; i < 4 && this_cost_valid; ++i) {
4114 int offsetr = blk_row + (i >> 1) * bsl;
4115 int offsetc = blk_col + (i & 0x01) * bsl;
4117 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
4119 select_tx_block(cpi, x, offsetr, offsetc, plane, block, block32, sub_txs,
4120 depth + 1, plane_bsize, ta, tl, tx_above, tx_left,
4121 &this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid,
4122 rd_stats_stack);
4124 av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats);
4126 tmp_rd =
4127 RDCOST(x->rdmult, x->rddiv, sum_rd_stats.rate, sum_rd_stats.dist);
4128 if (this_rd < tmp_rd) break;
4129 block += sub_step;
4131 if (this_cost_valid) sum_rd = tmp_rd;
4134 if (this_rd < sum_rd) {
4135 int idx, idy;
4136 for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) pta[i] = !(tmp_eob == 0);
4137 for (i = 0; i < tx_size_high_unit[tx_size]; ++i) ptl[i] = !(tmp_eob == 0);
4138 txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
4139 tx_size);
4140 inter_tx_size[0][0] = tx_size;
4141 for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
4142 for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx)
4143 inter_tx_size[idy][idx] = tx_size;
4144 mbmi->tx_size = tx_size;
4145 if (this_rd == INT64_MAX) *is_cost_valid = 0;
4146 x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
4147 } else {
4148 *rd_stats = sum_rd_stats;
4149 if (sum_rd == INT64_MAX) *is_cost_valid = 0;
4153 static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
4154 RD_STATS *rd_stats, BLOCK_SIZE bsize,
4155 int64_t ref_best_rd, RD_STATS *rd_stats_stack) {
4156 MACROBLOCKD *const xd = &x->e_mbd;
4157 int is_cost_valid = 1;
4158 int64_t this_rd = 0;
4160 if (ref_best_rd < 0) is_cost_valid = 0;
4162 av1_init_rd_stats(rd_stats);
4164 if (is_cost_valid) {
4165 const struct macroblockd_plane *const pd = &xd->plane[0];
4166 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
4167 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4168 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
4169 const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
4170 const int bh = tx_size_high_unit[max_tx_size];
4171 const int bw = tx_size_wide_unit[max_tx_size];
4172 int idx, idy;
4173 int block = 0;
4174 int block32 = 0;
4175 int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
4176 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
4177 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
4178 TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2];
4179 TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
4181 RD_STATS pn_rd_stats;
4182 av1_init_rd_stats(&pn_rd_stats);
4184 av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
4185 memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
4186 memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
4188 for (idy = 0; idy < mi_height; idy += bh) {
4189 for (idx = 0; idx < mi_width; idx += bw) {
4190 select_tx_block(cpi, x, idy, idx, 0, block, block32, max_tx_size,
4191 mi_height != mi_width, plane_bsize, ctxa, ctxl,
4192 tx_above, tx_left, &pn_rd_stats, ref_best_rd - this_rd,
4193 &is_cost_valid, rd_stats_stack);
4194 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
4195 this_rd += AOMMIN(
4196 RDCOST(x->rdmult, x->rddiv, pn_rd_stats.rate, pn_rd_stats.dist),
4197 RDCOST(x->rdmult, x->rddiv, 0, pn_rd_stats.sse));
4198 block += step;
4199 ++block32;
4204 this_rd = AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
4205 RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
4206 if (this_rd > ref_best_rd) is_cost_valid = 0;
4208 if (!is_cost_valid) {
4209 // reset cost value
4210 av1_invalid_rd_stats(rd_stats);
4214 static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
4215 RD_STATS *rd_stats, BLOCK_SIZE bsize,
4216 int64_t ref_best_rd, TX_TYPE tx_type,
4217 RD_STATS *rd_stats_stack) {
4218 const AV1_COMMON *const cm = &cpi->common;
4219 MACROBLOCKD *const xd = &x->e_mbd;
4220 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4221 const int is_inter = is_inter_block(mbmi);
4222 aom_prob skip_prob = av1_get_skip_prob(cm, xd);
4223 int s0 = av1_cost_bit(skip_prob, 0);
4224 int s1 = av1_cost_bit(skip_prob, 1);
4225 int64_t rd;
4226 int row, col;
4227 const int max_blocks_high = max_block_high(xd, bsize, 0);
4228 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
4230 mbmi->tx_type = tx_type;
4231 inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, rd_stats_stack);
4232 mbmi->min_tx_size = get_min_tx_size(mbmi->inter_tx_size[0][0]);
4234 if (rd_stats->rate == INT_MAX) return INT64_MAX;
4236 for (row = 0; row < max_blocks_high / 2; ++row)
4237 for (col = 0; col < max_blocks_wide / 2; ++col)
4238 mbmi->min_tx_size = AOMMIN(
4239 mbmi->min_tx_size, get_min_tx_size(mbmi->inter_tx_size[row][col]));
4241 #if CONFIG_EXT_TX
4242 if (get_ext_tx_types(mbmi->min_tx_size, bsize, is_inter,
4243 cm->reduced_tx_set_used) > 1 &&
4244 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
4245 const int ext_tx_set = get_ext_tx_set(mbmi->min_tx_size, bsize, is_inter,
4246 cm->reduced_tx_set_used);
4247 if (is_inter) {
4248 if (ext_tx_set > 0)
4249 rd_stats->rate +=
4250 cpi->inter_tx_type_costs[ext_tx_set]
4251 [txsize_sqr_map[mbmi->min_tx_size]]
4252 [mbmi->tx_type];
4253 } else {
4254 if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
4255 rd_stats->rate +=
4256 cpi->intra_tx_type_costs[ext_tx_set][mbmi->min_tx_size][mbmi->mode]
4257 [mbmi->tx_type];
4260 #else // CONFIG_EXT_TX
4261 if (mbmi->min_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id])
4262 rd_stats->rate +=
4263 cpi->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type];
4264 #endif // CONFIG_EXT_TX
4266 if (rd_stats->skip)
4267 rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
4268 else
4269 rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate + s0, rd_stats->dist);
4271 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
4272 !(rd_stats->skip))
4273 rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
4275 return rd;
4278 static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
4279 RD_STATS *rd_stats, BLOCK_SIZE bsize,
4280 int64_t ref_best_rd) {
4281 const AV1_COMMON *cm = &cpi->common;
4282 const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
4283 MACROBLOCKD *const xd = &x->e_mbd;
4284 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4285 int64_t rd = INT64_MAX;
4286 int64_t best_rd = INT64_MAX;
4287 TX_TYPE tx_type, best_tx_type = DCT_DCT;
4288 const int is_inter = is_inter_block(mbmi);
4289 TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
4290 TX_SIZE best_tx = max_txsize_lookup[bsize];
4291 TX_SIZE best_min_tx_size = TX_SIZES_ALL;
4292 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
4293 const int n4 = bsize_to_num_blk(bsize);
4294 int idx, idy;
4295 int prune = 0;
4296 const int count32 =
4297 1 << (2 * (cm->mib_size_log2 - mi_width_log2_lookup[BLOCK_32X32]));
4298 #if CONFIG_EXT_PARTITION
4299 RD_STATS rd_stats_stack[16];
4300 #else
4301 RD_STATS rd_stats_stack[4];
4302 #endif // CONFIG_EXT_PARTITION
4303 #if CONFIG_EXT_TX
4304 const int ext_tx_set =
4305 get_ext_tx_set(max_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
4306 #endif // CONFIG_EXT_TX
4308 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
4309 #if CONFIG_EXT_TX
4310 prune = prune_tx_types(cpi, bsize, x, xd, ext_tx_set);
4311 #else
4312 prune = prune_tx_types(cpi, bsize, x, xd, 0);
4313 #endif // CONFIG_EXT_TX
4315 av1_invalid_rd_stats(rd_stats);
4317 for (idx = 0; idx < count32; ++idx)
4318 av1_invalid_rd_stats(&rd_stats_stack[idx]);
4320 for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
4321 RD_STATS this_rd_stats;
4322 av1_init_rd_stats(&this_rd_stats);
4323 #if CONFIG_EXT_TX
4324 if (is_inter) {
4325 if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
4326 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
4327 if (!do_tx_type_search(tx_type, prune)) continue;
4329 } else {
4330 if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
4331 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
4333 if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
4335 #else // CONFIG_EXT_TX
4336 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
4337 !do_tx_type_search(tx_type, prune))
4338 continue;
4339 #endif // CONFIG_EXT_TX
4340 if (is_inter && x->use_default_inter_tx_type &&
4341 tx_type != get_default_tx_type(0, xd, 0, max_tx_size))
4342 continue;
4344 if (xd->lossless[mbmi->segment_id])
4345 if (tx_type != DCT_DCT) continue;
4347 rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, ref_best_rd,
4348 tx_type, rd_stats_stack);
4350 if (rd < best_rd) {
4351 best_rd = rd;
4352 *rd_stats = this_rd_stats;
4353 best_tx_type = mbmi->tx_type;
4354 best_tx = mbmi->tx_size;
4355 best_min_tx_size = mbmi->min_tx_size;
4356 memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
4357 for (idy = 0; idy < xd->n8_h; ++idy)
4358 for (idx = 0; idx < xd->n8_w; ++idx)
4359 best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
4363 mbmi->tx_type = best_tx_type;
4364 for (idy = 0; idy < xd->n8_h; ++idy)
4365 for (idx = 0; idx < xd->n8_w; ++idx)
4366 mbmi->inter_tx_size[idy][idx] = best_tx_size[idy][idx];
4367 mbmi->tx_size = best_tx;
4368 mbmi->min_tx_size = best_min_tx_size;
4369 memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
4372 static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
4373 int blk_col, int plane, int block, TX_SIZE tx_size,
4374 BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx,
4375 ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats) {
4376 MACROBLOCKD *const xd = &x->e_mbd;
4377 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4378 struct macroblock_plane *const p = &x->plane[plane];
4379 struct macroblockd_plane *const pd = &xd->plane[plane];
4380 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
4381 const int tx_row = blk_row >> (1 - pd->subsampling_y);
4382 const int tx_col = blk_col >> (1 - pd->subsampling_x);
4383 TX_SIZE plane_tx_size;
4384 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
4385 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
4387 assert(tx_size < TX_SIZES_ALL);
4389 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
4391 plane_tx_size =
4392 plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0]
4393 : mbmi->inter_tx_size[tx_row][tx_col];
4395 if (tx_size == plane_tx_size) {
4396 int i;
4397 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
4398 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
4399 av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
4400 plane_bsize, ta, tl, rd_stats);
4402 for (i = 0; i < tx_size_wide_unit[tx_size]; ++i)
4403 ta[i] = !(p->eobs[block] == 0);
4404 for (i = 0; i < tx_size_high_unit[tx_size]; ++i)
4405 tl[i] = !(p->eobs[block] == 0);
4406 } else {
4407 const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
4408 const int bsl = tx_size_wide_unit[sub_txs];
4409 int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
4410 int i;
4412 assert(bsl > 0);
4414 for (i = 0; i < 4; ++i) {
4415 int offsetr = blk_row + (i >> 1) * bsl;
4416 int offsetc = blk_col + (i & 0x01) * bsl;
4418 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
4420 tx_block_rd(cpi, x, offsetr, offsetc, plane, block, sub_txs, plane_bsize,
4421 above_ctx, left_ctx, rd_stats);
4422 block += step;
4427 // Return value 0: early termination triggered, no valid rd cost available;
4428 // 1: rd cost values are valid.
4429 static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
4430 RD_STATS *rd_stats, BLOCK_SIZE bsize,
4431 int64_t ref_best_rd) {
4432 MACROBLOCKD *const xd = &x->e_mbd;
4433 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4434 int plane;
4435 int is_cost_valid = 1;
4436 int64_t this_rd;
4438 if (ref_best_rd < 0) is_cost_valid = 0;
4440 av1_init_rd_stats(rd_stats);
4442 #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
4443 if (x->skip_chroma_rd) return is_cost_valid;
4444 bsize = scale_chroma_bsize(mbmi->sb_type, xd->plane[1].subsampling_x,
4445 xd->plane[1].subsampling_y);
4446 #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
4448 #if CONFIG_EXT_TX && CONFIG_RECT_TX
4449 if (is_rect_tx(mbmi->tx_size)) {
4450 return super_block_uvrd(cpi, x, rd_stats, bsize, ref_best_rd);
4452 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
4454 if (is_inter_block(mbmi) && is_cost_valid) {
4455 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
4456 av1_subtract_plane(x, bsize, plane);
4459 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
4460 const struct macroblockd_plane *const pd = &xd->plane[plane];
4461 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
4462 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4463 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
4464 const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
4465 const int bh = tx_size_high_unit[max_tx_size];
4466 const int bw = tx_size_wide_unit[max_tx_size];
4467 int idx, idy;
4468 int block = 0;
4469 const int step = bh * bw;
4470 ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
4471 ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
4472 RD_STATS pn_rd_stats;
4473 av1_init_rd_stats(&pn_rd_stats);
4475 av1_get_entropy_contexts(bsize, 0, pd, ta, tl);
4477 for (idy = 0; idy < mi_height; idy += bh) {
4478 for (idx = 0; idx < mi_width; idx += bw) {
4479 tx_block_rd(cpi, x, idy, idx, plane, block, max_tx_size, plane_bsize,
4480 ta, tl, &pn_rd_stats);
4481 block += step;
4485 if (pn_rd_stats.rate == INT_MAX) {
4486 is_cost_valid = 0;
4487 break;
4490 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
4492 this_rd =
4493 AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
4494 RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
4496 if (this_rd > ref_best_rd) {
4497 is_cost_valid = 0;
4498 break;
4502 if (!is_cost_valid) {
4503 // reset cost value
4504 av1_invalid_rd_stats(rd_stats);
4507 return is_cost_valid;
4509 #endif // CONFIG_VAR_TX
4511 #if CONFIG_PALETTE
4512 static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
4513 int dc_mode_cost,
4514 uint8_t *best_palette_color_map,
4515 MB_MODE_INFO *const best_mbmi,
4516 int64_t *best_rd, int *rate,
4517 int *rate_tokenonly, int64_t *distortion,
4518 int *skippable) {
4519 MACROBLOCKD *const xd = &x->e_mbd;
4520 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4521 assert(!is_inter_block(mbmi));
4522 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4523 const BLOCK_SIZE bsize = mbmi->sb_type;
4524 int this_rate;
4525 int64_t this_rd;
4526 int colors_u, colors_v, colors;
4527 const int src_stride = x->plane[1].src.stride;
4528 const uint8_t *const src_u = x->plane[1].src.buf;
4529 const uint8_t *const src_v = x->plane[2].src.buf;
4530 uint8_t *const color_map = xd->plane[1].color_index_map;
4531 RD_STATS tokenonly_rd_stats;
4532 int plane_block_width, plane_block_height, rows, cols;
4533 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
4534 &plane_block_height, &rows, &cols);
4535 if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return;
4537 mbmi->uv_mode = DC_PRED;
4538 #if CONFIG_FILTER_INTRA
4539 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
4540 #endif // CONFIG_FILTER_INTRA
4542 #if CONFIG_HIGHBITDEPTH
4543 if (cpi->common.use_highbitdepth) {
4544 colors_u = av1_count_colors_highbd(src_u, src_stride, rows, cols,
4545 cpi->common.bit_depth);
4546 colors_v = av1_count_colors_highbd(src_v, src_stride, rows, cols,
4547 cpi->common.bit_depth);
4548 } else {
4549 #endif // CONFIG_HIGHBITDEPTH
4550 colors_u = av1_count_colors(src_u, src_stride, rows, cols);
4551 colors_v = av1_count_colors(src_v, src_stride, rows, cols);
4552 #if CONFIG_HIGHBITDEPTH
4554 #endif // CONFIG_HIGHBITDEPTH
4556 #if CONFIG_PALETTE_DELTA_ENCODING
4557 const MODE_INFO *above_mi = xd->above_mi;
4558 const MODE_INFO *left_mi = xd->left_mi;
4559 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
4560 const int n_cache = av1_get_palette_cache(above_mi, left_mi, 1, color_cache);
4561 #endif // CONFIG_PALETTE_DELTA_ENCODING
4563 colors = colors_u > colors_v ? colors_u : colors_v;
4564 if (colors > 1 && colors <= 64) {
4565 int r, c, n, i, j;
4566 const int max_itr = 50;
4567 uint8_t color_order[PALETTE_MAX_SIZE];
4568 float lb_u, ub_u, val_u;
4569 float lb_v, ub_v, val_v;
4570 float *const data = x->palette_buffer->kmeans_data_buf;
4571 float centroids[2 * PALETTE_MAX_SIZE];
4573 #if CONFIG_HIGHBITDEPTH
4574 uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
4575 uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v);
4576 if (cpi->common.use_highbitdepth) {
4577 lb_u = src_u16[0];
4578 ub_u = src_u16[0];
4579 lb_v = src_v16[0];
4580 ub_v = src_v16[0];
4581 } else {
4582 #endif // CONFIG_HIGHBITDEPTH
4583 lb_u = src_u[0];
4584 ub_u = src_u[0];
4585 lb_v = src_v[0];
4586 ub_v = src_v[0];
4587 #if CONFIG_HIGHBITDEPTH
4589 #endif // CONFIG_HIGHBITDEPTH
4591 for (r = 0; r < rows; ++r) {
4592 for (c = 0; c < cols; ++c) {
4593 #if CONFIG_HIGHBITDEPTH
4594 if (cpi->common.use_highbitdepth) {
4595 val_u = src_u16[r * src_stride + c];
4596 val_v = src_v16[r * src_stride + c];
4597 data[(r * cols + c) * 2] = val_u;
4598 data[(r * cols + c) * 2 + 1] = val_v;
4599 } else {
4600 #endif // CONFIG_HIGHBITDEPTH
4601 val_u = src_u[r * src_stride + c];
4602 val_v = src_v[r * src_stride + c];
4603 data[(r * cols + c) * 2] = val_u;
4604 data[(r * cols + c) * 2 + 1] = val_v;
4605 #if CONFIG_HIGHBITDEPTH
4607 #endif // CONFIG_HIGHBITDEPTH
4608 if (val_u < lb_u)
4609 lb_u = val_u;
4610 else if (val_u > ub_u)
4611 ub_u = val_u;
4612 if (val_v < lb_v)
4613 lb_v = val_v;
4614 else if (val_v > ub_v)
4615 ub_v = val_v;
4619 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
4620 --n) {
4621 for (i = 0; i < n; ++i) {
4622 centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
4623 centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
4625 av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
4626 #if CONFIG_PALETTE_DELTA_ENCODING
4627 optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
4628 // Sort the U channel colors in ascending order.
4629 for (i = 0; i < 2 * (n - 1); i += 2) {
4630 int min_idx = i;
4631 float min_val = centroids[i];
4632 for (j = i + 2; j < 2 * n; j += 2)
4633 if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
4634 if (min_idx != i) {
4635 float temp_u = centroids[i], temp_v = centroids[i + 1];
4636 centroids[i] = centroids[min_idx];
4637 centroids[i + 1] = centroids[min_idx + 1];
4638 centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
4641 av1_calc_indices(data, centroids, color_map, rows * cols, n, 2);
4642 #endif // CONFIG_PALETTE_DELTA_ENCODING
4643 extend_palette_color_map(color_map, cols, rows, plane_block_width,
4644 plane_block_height);
4645 pmi->palette_size[1] = n;
4646 for (i = 1; i < 3; ++i) {
4647 for (j = 0; j < n; ++j) {
4648 #if CONFIG_HIGHBITDEPTH
4649 if (cpi->common.use_highbitdepth)
4650 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd(
4651 (int)centroids[j * 2 + i - 1], cpi->common.bit_depth);
4652 else
4653 #endif // CONFIG_HIGHBITDEPTH
4654 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
4655 clip_pixel((int)centroids[j * 2 + i - 1]);
4659 super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
4660 if (tokenonly_rd_stats.rate == INT_MAX) continue;
4661 this_rate =
4662 tokenonly_rd_stats.rate + dc_mode_cost +
4663 cpi->palette_uv_size_cost[bsize - BLOCK_8X8][n - PALETTE_MIN_SIZE] +
4664 write_uniform_cost(n, color_map[0]) +
4665 av1_cost_bit(
4666 av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 1);
4667 this_rate += av1_palette_color_cost_uv(pmi,
4668 #if CONFIG_PALETTE_DELTA_ENCODING
4669 color_cache, n_cache,
4670 #endif // CONFIG_PALETTE_DELTA_ENCODING
4671 cpi->common.bit_depth);
4672 for (i = 0; i < rows; ++i) {
4673 for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
4674 int color_idx;
4675 const int color_ctx = av1_get_palette_color_index_context(
4676 color_map, plane_block_width, i, j, n, color_order, &color_idx);
4677 assert(color_idx >= 0 && color_idx < n);
4678 this_rate += cpi->palette_uv_color_cost[n - PALETTE_MIN_SIZE]
4679 [color_ctx][color_idx];
4683 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
4684 if (this_rd < *best_rd) {
4685 *best_rd = this_rd;
4686 *best_mbmi = *mbmi;
4687 memcpy(best_palette_color_map, color_map,
4688 plane_block_width * plane_block_height *
4689 sizeof(best_palette_color_map[0]));
4690 *rate = this_rate;
4691 *distortion = tokenonly_rd_stats.dist;
4692 *rate_tokenonly = tokenonly_rd_stats.rate;
4693 *skippable = tokenonly_rd_stats.skip;
4697 if (best_mbmi->palette_mode_info.palette_size[1] > 0) {
4698 memcpy(color_map, best_palette_color_map,
4699 rows * cols * sizeof(best_palette_color_map[0]));
4702 #endif // CONFIG_PALETTE
4704 #if CONFIG_FILTER_INTRA
4705 // Return 1 if an filter intra mode is selected; return 0 otherwise.
4706 static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
4707 int *rate, int *rate_tokenonly,
4708 int64_t *distortion, int *skippable,
4709 BLOCK_SIZE bsize, int64_t *best_rd) {
4710 MACROBLOCKD *const xd = &x->e_mbd;
4711 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
4712 int filter_intra_selected_flag = 0;
4713 int this_rate;
4714 int64_t this_rd;
4715 FILTER_INTRA_MODE mode;
4716 FILTER_INTRA_MODE_INFO filter_intra_mode_info;
4717 RD_STATS tokenonly_rd_stats;
4719 av1_zero(filter_intra_mode_info);
4720 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 1;
4721 mbmi->uv_mode = DC_PRED;
4722 #if CONFIG_PALETTE
4723 mbmi->palette_mode_info.palette_size[1] = 0;
4724 #endif // CONFIG_PALETTE
4726 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
4727 mbmi->filter_intra_mode_info.filter_intra_mode[1] = mode;
4728 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd))
4729 continue;
4731 this_rate = tokenonly_rd_stats.rate +
4732 av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 1) +
4733 cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
4734 write_uniform_cost(FILTER_INTRA_MODES, mode);
4735 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
4736 if (this_rd < *best_rd) {
4737 *best_rd = this_rd;
4738 *rate = this_rate;
4739 *rate_tokenonly = tokenonly_rd_stats.rate;
4740 *distortion = tokenonly_rd_stats.dist;
4741 *skippable = tokenonly_rd_stats.skip;
4742 filter_intra_mode_info = mbmi->filter_intra_mode_info;
4743 filter_intra_selected_flag = 1;
4747 if (filter_intra_selected_flag) {
4748 mbmi->uv_mode = DC_PRED;
4749 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
4750 filter_intra_mode_info.use_filter_intra_mode[1];
4751 mbmi->filter_intra_mode_info.filter_intra_mode[1] =
4752 filter_intra_mode_info.filter_intra_mode[1];
4753 return 1;
4754 } else {
4755 return 0;
4758 #endif // CONFIG_FILTER_INTRA
4760 #if CONFIG_EXT_INTRA
4761 // Run RD calculation with given chroma intra prediction angle., and return
4762 // the RD cost. Update the best mode info. if the RD cost is the best so far.
4763 static int64_t pick_intra_angle_routine_sbuv(
4764 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
4765 int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
4766 int *best_angle_delta, int64_t *best_rd) {
4767 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
4768 assert(!is_inter_block(mbmi));
4769 int this_rate;
4770 int64_t this_rd;
4771 RD_STATS tokenonly_rd_stats;
4773 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
4774 return INT64_MAX;
4775 this_rate = tokenonly_rd_stats.rate + rate_overhead;
4776 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
4777 if (this_rd < *best_rd) {
4778 *best_rd = this_rd;
4779 *best_angle_delta = mbmi->angle_delta[1];
4780 *rate = this_rate;
4781 rd_stats->rate = tokenonly_rd_stats.rate;
4782 rd_stats->dist = tokenonly_rd_stats.dist;
4783 rd_stats->skip = tokenonly_rd_stats.skip;
4785 return this_rd;
4788 // With given chroma directional intra prediction mode, pick the best angle
4789 // delta. Return true if a RD cost that is smaller than the input one is found.
4790 static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
4791 BLOCK_SIZE bsize, int rate_overhead,
4792 int64_t best_rd, int *rate,
4793 RD_STATS *rd_stats) {
4794 MACROBLOCKD *const xd = &x->e_mbd;
4795 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
4796 assert(!is_inter_block(mbmi));
4797 int i, angle_delta, best_angle_delta = 0;
4798 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
4800 rd_stats->rate = INT_MAX;
4801 rd_stats->skip = 0;
4802 rd_stats->dist = INT64_MAX;
4803 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
4805 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
4806 for (i = 0; i < 2; ++i) {
4807 best_rd_in = (best_rd == INT64_MAX)
4808 ? INT64_MAX
4809 : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
4810 mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
4811 this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
4812 best_rd_in, rate, rd_stats,
4813 &best_angle_delta, &best_rd);
4814 rd_cost[2 * angle_delta + i] = this_rd;
4815 if (angle_delta == 0) {
4816 if (this_rd == INT64_MAX) return 0;
4817 rd_cost[1] = this_rd;
4818 break;
4823 assert(best_rd != INT64_MAX);
4824 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
4825 int64_t rd_thresh;
4826 for (i = 0; i < 2; ++i) {
4827 int skip_search = 0;
4828 rd_thresh = best_rd + (best_rd >> 5);
4829 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
4830 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
4831 skip_search = 1;
4832 if (!skip_search) {
4833 mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
4834 pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
4835 rate, rd_stats, &best_angle_delta,
4836 &best_rd);
4841 mbmi->angle_delta[1] = best_angle_delta;
4842 return rd_stats->rate != INT_MAX;
4844 #endif // CONFIG_EXT_INTRA
4846 static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
4847 mbmi->uv_mode = DC_PRED;
4848 #if CONFIG_PALETTE
4849 mbmi->palette_mode_info.palette_size[1] = 0;
4850 #endif // CONFIG_PALETTE
4851 #if CONFIG_FILTER_INTRA
4852 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
4853 #endif // CONFIG_FILTER_INTRA
4856 static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
4857 int *rate, int *rate_tokenonly,
4858 int64_t *distortion, int *skippable,
4859 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
4860 MACROBLOCKD *xd = &x->e_mbd;
4861 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
4862 assert(!is_inter_block(mbmi));
4863 MB_MODE_INFO best_mbmi = *mbmi;
4864 PREDICTION_MODE mode;
4865 int64_t best_rd = INT64_MAX, this_rd;
4866 int this_rate;
4867 RD_STATS tokenonly_rd_stats;
4868 #if CONFIG_PVQ
4869 od_rollback_buffer buf;
4870 od_encode_checkpoint(&x->daala_enc, &buf);
4871 #endif // CONFIG_PVQ
4872 #if CONFIG_PALETTE
4873 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4874 uint8_t *best_palette_color_map = NULL;
4875 #endif // CONFIG_PALETTE
4877 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
4878 #if CONFIG_EXT_INTRA
4879 const int is_directional_mode =
4880 av1_is_directional_mode(mode, mbmi->sb_type);
4881 #endif // CONFIG_EXT_INTRA
4882 if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
4883 (1 << mode)))
4884 continue;
4886 mbmi->uv_mode = mode;
4887 #if CONFIG_EXT_INTRA
4888 mbmi->angle_delta[1] = 0;
4889 if (is_directional_mode) {
4890 const int rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
4891 write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0);
4892 if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
4893 &this_rate, &tokenonly_rd_stats))
4894 continue;
4895 } else {
4896 #endif // CONFIG_EXT_INTRA
4897 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
4898 #if CONFIG_PVQ
4899 od_encode_rollback(&x->daala_enc, &buf);
4900 #endif // CONFIG_PVQ
4901 continue;
4903 #if CONFIG_EXT_INTRA
4905 #endif // CONFIG_EXT_INTRA
4906 this_rate =
4907 tokenonly_rd_stats.rate + cpi->intra_uv_mode_cost[mbmi->mode][mode];
4909 #if CONFIG_EXT_INTRA
4910 if (is_directional_mode) {
4911 this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
4912 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
4914 #endif // CONFIG_EXT_INTRA
4915 #if CONFIG_FILTER_INTRA
4916 if (mbmi->sb_type >= BLOCK_8X8 && mode == DC_PRED)
4917 this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 0);
4918 #endif // CONFIG_FILTER_INTRA
4919 #if CONFIG_PALETTE
4920 if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8 &&
4921 mode == DC_PRED)
4922 this_rate += av1_cost_bit(
4923 av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0);
4924 #endif // CONFIG_PALETTE
4926 #if CONFIG_PVQ
4927 od_encode_rollback(&x->daala_enc, &buf);
4928 #endif // CONFIG_PVQ
4929 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
4931 if (this_rd < best_rd) {
4932 best_mbmi = *mbmi;
4933 best_rd = this_rd;
4934 *rate = this_rate;
4935 *rate_tokenonly = tokenonly_rd_stats.rate;
4936 *distortion = tokenonly_rd_stats.dist;
4937 *skippable = tokenonly_rd_stats.skip;
4941 #if CONFIG_PALETTE
4942 if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8) {
4943 best_palette_color_map = x->palette_buffer->best_palette_color_map;
4944 rd_pick_palette_intra_sbuv(cpi, x,
4945 cpi->intra_uv_mode_cost[mbmi->mode][DC_PRED],
4946 best_palette_color_map, &best_mbmi, &best_rd,
4947 rate, rate_tokenonly, distortion, skippable);
4949 #endif // CONFIG_PALETTE
4951 #if CONFIG_FILTER_INTRA
4952 if (mbmi->sb_type >= BLOCK_8X8) {
4953 if (rd_pick_filter_intra_sbuv(cpi, x, rate, rate_tokenonly, distortion,
4954 skippable, bsize, &best_rd))
4955 best_mbmi = *mbmi;
4957 #endif // CONFIG_FILTER_INTRA
4959 *mbmi = best_mbmi;
4960 // Make sure we actually chose a mode
4961 assert(best_rd < INT64_MAX);
4962 return best_rd;
4965 static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
4966 PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
4967 TX_SIZE max_tx_size, int *rate_uv,
4968 int *rate_uv_tokenonly, int64_t *dist_uv,
4969 int *skip_uv, PREDICTION_MODE *mode_uv) {
4970 // Use an estimated rd for uv_intra based on DC_PRED if the
4971 // appropriate speed flag is set.
4972 (void)ctx;
4973 init_sbuv_mode(&x->e_mbd.mi[0]->mbmi);
4974 #if CONFIG_CB4X4
4975 #if CONFIG_CHROMA_2X2
4976 rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
4977 bsize, max_tx_size);
4978 #else
4979 if (x->skip_chroma_rd) {
4980 *rate_uv = 0;
4981 *rate_uv_tokenonly = 0;
4982 *dist_uv = 0;
4983 *skip_uv = 1;
4984 *mode_uv = DC_PRED;
4985 return;
4987 BLOCK_SIZE bs = scale_chroma_bsize(bsize, x->e_mbd.plane[1].subsampling_x,
4988 x->e_mbd.plane[1].subsampling_y);
4989 rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
4990 bs, max_tx_size);
4991 #endif // CONFIG_CHROMA_2X2
4992 #else
4993 rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
4994 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
4995 #endif // CONFIG_CB4X4
4996 *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
4999 static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode,
5000 int16_t mode_context) {
5001 #if CONFIG_EXT_INTER
5002 if (is_inter_compound_mode(mode)) {
5003 return cpi
5004 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
5006 #endif
5008 int mode_cost = 0;
5009 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
5010 int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET);
5012 assert(is_inter_mode(mode));
5014 if (mode == NEWMV) {
5015 mode_cost = cpi->newmv_mode_cost[mode_ctx][0];
5016 return mode_cost;
5017 } else {
5018 mode_cost = cpi->newmv_mode_cost[mode_ctx][1];
5019 mode_ctx = (mode_context >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
5021 if (is_all_zero_mv) return mode_cost;
5023 if (mode == ZEROMV) {
5024 mode_cost += cpi->zeromv_mode_cost[mode_ctx][0];
5025 return mode_cost;
5026 } else {
5027 mode_cost += cpi->zeromv_mode_cost[mode_ctx][1];
5028 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
5030 if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) mode_ctx = 6;
5031 if (mode_context & (1 << SKIP_NEARMV_OFFSET)) mode_ctx = 7;
5032 if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) mode_ctx = 8;
5034 mode_cost += cpi->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
5035 return mode_cost;
5040 #if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
5041 static int get_interinter_compound_type_bits(BLOCK_SIZE bsize,
5042 COMPOUND_TYPE comp_type) {
5043 (void)bsize;
5044 switch (comp_type) {
5045 case COMPOUND_AVERAGE: return 0;
5046 #if CONFIG_WEDGE
5047 case COMPOUND_WEDGE: return get_interinter_wedge_bits(bsize);
5048 #endif // CONFIG_WEDGE
5049 #if CONFIG_COMPOUND_SEGMENT
5050 case COMPOUND_SEG: return 1;
5051 #endif // CONFIG_COMPOUND_SEGMENT
5052 default: assert(0); return 0;
5055 #endif // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
5057 typedef struct {
5058 int eobs;
5059 int brate;
5060 int byrate;
5061 int64_t bdist;
5062 int64_t bsse;
5063 int64_t brdcost;
5064 int_mv mvs[2];
5065 int_mv pred_mv[2];
5066 #if CONFIG_EXT_INTER
5067 int_mv ref_mv[2];
5068 #endif // CONFIG_EXT_INTER
5070 #if CONFIG_CHROMA_2X2
5071 ENTROPY_CONTEXT ta[4];
5072 ENTROPY_CONTEXT tl[4];
5073 #else
5074 ENTROPY_CONTEXT ta[2];
5075 ENTROPY_CONTEXT tl[2];
5076 #endif // CONFIG_CHROMA_2X2
5077 } SEG_RDSTAT;
5079 typedef struct {
5080 int_mv *ref_mv[2];
5081 int_mv mvp;
5083 int64_t segment_rd;
5084 int r;
5085 int64_t d;
5086 int64_t sse;
5087 int segment_yrate;
5088 PREDICTION_MODE modes[4];
5089 #if CONFIG_EXT_INTER
5090 SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
5091 #else
5092 SEG_RDSTAT rdstat[4][INTER_MODES];
5093 #endif // CONFIG_EXT_INTER
5094 int mvthresh;
5095 } BEST_SEG_INFO;
5097 static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
5098 return (mv->row >> 3) < mv_limits->row_min ||
5099 (mv->row >> 3) > mv_limits->row_max ||
5100 (mv->col >> 3) < mv_limits->col_min ||
5101 (mv->col >> 3) > mv_limits->col_max;
5104 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
5105 // TODO(aconverse): Find out if this is still productive then clean up or remove
5106 static int check_best_zero_mv(
5107 const AV1_COMP *const cpi, const int16_t mode_context[TOTAL_REFS_PER_FRAME],
5108 #if CONFIG_EXT_INTER
5109 const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
5110 #endif // CONFIG_EXT_INTER
5111 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
5112 const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block,
5113 int mi_row, int mi_col) {
5114 int_mv zeromv[2];
5115 int comp_pred_mode = ref_frames[1] > INTRA_FRAME;
5116 int cur_frm;
5117 (void)mi_row;
5118 (void)mi_col;
5119 for (cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
5120 #if CONFIG_GLOBAL_MOTION
5121 if (this_mode == ZEROMV
5122 #if CONFIG_EXT_INTER
5123 || this_mode == ZERO_ZEROMV
5124 #endif // CONFIG_EXT_INTER
5126 zeromv[cur_frm].as_int =
5127 gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]],
5128 cpi->common.allow_high_precision_mv, bsize,
5129 mi_col, mi_row, block)
5130 .as_int;
5131 else
5132 #endif // CONFIG_GLOBAL_MOTION
5133 zeromv[cur_frm].as_int = 0;
5135 #if !CONFIG_EXT_INTER
5136 assert(ref_frames[1] != INTRA_FRAME); // Just sanity check
5137 #endif // !CONFIG_EXT_INTER
5138 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
5139 frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
5140 (ref_frames[1] <= INTRA_FRAME ||
5141 frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int)) {
5142 int16_t rfc =
5143 av1_mode_context_analyzer(mode_context, ref_frames, bsize, block);
5144 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
5145 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
5146 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
5148 if (this_mode == NEARMV) {
5149 if (c1 > c3) return 0;
5150 } else if (this_mode == NEARESTMV) {
5151 if (c2 > c3) return 0;
5152 } else {
5153 assert(this_mode == ZEROMV);
5154 if (ref_frames[1] <= INTRA_FRAME) {
5155 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
5156 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
5157 return 0;
5158 } else {
5159 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
5160 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
5161 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
5162 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
5163 return 0;
5167 #if CONFIG_EXT_INTER
5168 else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5169 this_mode == ZERO_ZEROMV) &&
5170 frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
5171 frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int) {
5172 int16_t rfc = compound_mode_context[ref_frames[0]];
5173 int c2 = cost_mv_ref(cpi, NEAREST_NEARESTMV, rfc);
5174 int c3 = cost_mv_ref(cpi, ZERO_ZEROMV, rfc);
5175 int c5 = cost_mv_ref(cpi, NEAR_NEARMV, rfc);
5177 if (this_mode == NEAREST_NEARESTMV) {
5178 if (c2 > c3) return 0;
5179 } else if (this_mode == NEAR_NEARMV) {
5180 if (c5 > c3) return 0;
5181 } else {
5182 assert(this_mode == ZERO_ZEROMV);
5183 if ((c3 >= c2 && frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0 &&
5184 frame_mv[NEAREST_NEARESTMV][ref_frames[1]].as_int == 0) ||
5185 (c3 >= c5 && frame_mv[NEAR_NEARMV][ref_frames[0]].as_int == 0 &&
5186 frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0))
5187 return 0;
5190 #endif // CONFIG_EXT_INTER
5191 return 1;
5194 static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
5195 BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row,
5196 int mi_col,
5197 #if CONFIG_EXT_INTER
5198 int_mv *ref_mv_sub8x8[2], const uint8_t *mask,
5199 int mask_stride,
5200 #endif // CONFIG_EXT_INTER
5201 int *rate_mv, const int block) {
5202 const AV1_COMMON *const cm = &cpi->common;
5203 const int pw = block_size_wide[bsize];
5204 const int ph = block_size_high[bsize];
5205 MACROBLOCKD *xd = &x->e_mbd;
5206 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
5207 // This function should only ever be called for compound modes
5208 assert(has_second_ref(mbmi));
5209 const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
5210 int_mv ref_mv[2];
5211 int ite, ref;
5212 #if CONFIG_DUAL_FILTER
5213 InterpFilter interp_filter[4] = {
5214 mbmi->interp_filter[0], mbmi->interp_filter[1], mbmi->interp_filter[2],
5215 mbmi->interp_filter[3],
5217 #else
5218 const InterpFilter interp_filter = mbmi->interp_filter;
5219 #endif // CONFIG_DUAL_FILTER
5220 struct scale_factors sf;
5221 struct macroblockd_plane *const pd = &xd->plane[0];
5222 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5223 // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
5224 const int ic = block & 1;
5225 const int ir = (block - ic) >> 1;
5226 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
5227 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
5228 #if CONFIG_GLOBAL_MOTION
5229 int is_global[2];
5230 for (ref = 0; ref < 2; ++ref) {
5231 WarpedMotionParams *const wm =
5232 &xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]];
5233 is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype);
5235 #endif // CONFIG_GLOBAL_MOTION
5236 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5238 // Do joint motion search in compound mode to get more accurate mv.
5239 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
5240 int last_besterr[2] = { INT_MAX, INT_MAX };
5241 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
5242 av1_get_scaled_ref_frame(cpi, refs[0]),
5243 av1_get_scaled_ref_frame(cpi, refs[1])
5246 // Prediction buffer from second frame.
5247 #if CONFIG_HIGHBITDEPTH
5248 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
5249 uint8_t *second_pred;
5250 #else
5251 DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
5252 #endif // CONFIG_HIGHBITDEPTH
5254 #if CONFIG_EXT_INTER && CONFIG_CB4X4
5255 (void)ref_mv_sub8x8;
5256 #endif // CONFIG_EXT_INTER && CONFIG_CB4X4
5258 for (ref = 0; ref < 2; ++ref) {
5259 #if CONFIG_EXT_INTER && !CONFIG_CB4X4
5260 if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL)
5261 ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int;
5262 else
5263 #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
5264 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
5266 if (scaled_ref_frame[ref]) {
5267 int i;
5268 // Swap out the reference frame for a version that's been scaled to
5269 // match the resolution of the current frame, allowing the existing
5270 // motion search code to be used without additional modifications.
5271 for (i = 0; i < MAX_MB_PLANE; i++)
5272 backup_yv12[ref][i] = xd->plane[i].pre[ref];
5273 av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
5274 NULL);
5278 // Since we have scaled the reference frames to match the size of the current
5279 // frame we must use a unit scaling factor during mode selection.
5280 #if CONFIG_HIGHBITDEPTH
5281 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5282 cm->height, cm->use_highbitdepth);
5283 #else
5284 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5285 cm->height);
5286 #endif // CONFIG_HIGHBITDEPTH
5288 // Allow joint search multiple times iteratively for each reference frame
5289 // and break out of the search loop if it couldn't find a better mv.
5290 for (ite = 0; ite < 4; ite++) {
5291 struct buf_2d ref_yv12[2];
5292 int bestsme = INT_MAX;
5293 int sadpb = x->sadperbit16;
5294 MV *const best_mv = &x->best_mv.as_mv;
5295 int search_range = 3;
5297 MvLimits tmp_mv_limits = x->mv_limits;
5298 int id = ite % 2; // Even iterations search in the first reference frame,
5299 // odd iterations search in the second. The predictor
5300 // found for the 'other' reference frame is factored in.
5301 const int plane = 0;
5302 ConvolveParams conv_params = get_conv_params(0, plane);
5303 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5304 WarpTypesAllowed warp_types;
5305 #if CONFIG_GLOBAL_MOTION
5306 warp_types.global_warp_allowed = is_global[!id];
5307 #endif // CONFIG_GLOBAL_MOTION
5308 #if CONFIG_WARPED_MOTION
5309 warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
5310 #endif // CONFIG_WARPED_MOTION
5311 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5313 // Initialized here because of compiler problem in Visual Studio.
5314 ref_yv12[0] = xd->plane[plane].pre[0];
5315 ref_yv12[1] = xd->plane[plane].pre[1];
5317 #if CONFIG_DUAL_FILTER
5318 // reload the filter types
5319 interp_filter[0] =
5320 (id == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0];
5321 interp_filter[1] =
5322 (id == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1];
5323 #endif // CONFIG_DUAL_FILTER
5325 // Get the prediction block from the 'other' reference frame.
5326 #if CONFIG_HIGHBITDEPTH
5327 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5328 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
5329 av1_highbd_build_inter_predictor(
5330 ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
5331 &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, interp_filter,
5332 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5333 &warp_types, p_col, p_row,
5334 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5335 plane, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
5336 } else {
5337 second_pred = (uint8_t *)second_pred_alloc_16;
5338 #endif // CONFIG_HIGHBITDEPTH
5339 av1_build_inter_predictor(
5340 ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
5341 &frame_mv[refs[!id]].as_mv, &sf, pw, ph, &conv_params, interp_filter,
5342 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5343 &warp_types, p_col, p_row, plane, !id,
5344 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5345 MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
5346 #if CONFIG_HIGHBITDEPTH
5348 #endif // CONFIG_HIGHBITDEPTH
5350 // Do compound motion search on the current reference frame.
5351 if (id) xd->plane[plane].pre[0] = ref_yv12[id];
5352 av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
5354 // Use the mv result from the single mode as mv predictor.
5355 *best_mv = frame_mv[refs[id]].as_mv;
5357 best_mv->col >>= 3;
5358 best_mv->row >>= 3;
5360 av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
5362 // Small-range full-pixel motion search.
5363 bestsme =
5364 av1_refining_search_8p_c(x, sadpb, search_range, &cpi->fn_ptr[bsize],
5365 #if CONFIG_EXT_INTER
5366 mask, mask_stride, id,
5367 #endif
5368 &ref_mv[id].as_mv, second_pred);
5369 if (bestsme < INT_MAX) {
5370 #if CONFIG_EXT_INTER
5371 if (mask)
5372 bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv,
5373 second_pred, mask, mask_stride, id,
5374 &cpi->fn_ptr[bsize], 1);
5375 else
5376 #endif
5377 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
5378 second_pred, &cpi->fn_ptr[bsize], 1);
5381 x->mv_limits = tmp_mv_limits;
5383 if (bestsme < INT_MAX) {
5384 int dis; /* TODO: use dis in distortion calculation later. */
5385 unsigned int sse;
5386 if (cpi->sf.use_upsampled_references) {
5387 // Use up-sampled reference frames.
5388 struct buf_2d backup_pred = pd->pre[0];
5389 const YV12_BUFFER_CONFIG *upsampled_ref =
5390 get_upsampled_ref(cpi, refs[id]);
5392 // Set pred for Y plane
5393 setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
5394 upsampled_ref->y_crop_width,
5395 upsampled_ref->y_crop_height, upsampled_ref->y_stride,
5396 (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
5397 pd->subsampling_y);
5399 // If bsize < BLOCK_8X8, adjust pred pointer for this block
5400 #if !CONFIG_CB4X4
5401 if (bsize < BLOCK_8X8)
5402 pd->pre[0].buf =
5403 &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
5404 pd->pre[0].stride))
5405 << 3];
5406 #endif // !CONFIG_CB4X4
5408 bestsme = cpi->find_fractional_mv_step(
5409 x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
5410 x->errorperbit, &cpi->fn_ptr[bsize], 0,
5411 cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
5412 &dis, &sse, second_pred,
5413 #if CONFIG_EXT_INTER
5414 mask, mask_stride, id,
5415 #endif
5416 pw, ph, 1);
5418 // Restore the reference frames.
5419 pd->pre[0] = backup_pred;
5420 } else {
5421 (void)block;
5422 bestsme = cpi->find_fractional_mv_step(
5423 x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
5424 x->errorperbit, &cpi->fn_ptr[bsize], 0,
5425 cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
5426 &dis, &sse, second_pred,
5427 #if CONFIG_EXT_INTER
5428 mask, mask_stride, id,
5429 #endif
5430 pw, ph, 0);
5434 // Restore the pointer to the first (possibly scaled) prediction buffer.
5435 if (id) xd->plane[plane].pre[0] = ref_yv12[0];
5437 if (bestsme < last_besterr[id]) {
5438 frame_mv[refs[id]].as_mv = *best_mv;
5439 last_besterr[id] = bestsme;
5440 } else {
5441 break;
5445 *rate_mv = 0;
5447 for (ref = 0; ref < 2; ++ref) {
5448 if (scaled_ref_frame[ref]) {
5449 // Restore the prediction frame pointers to their unscaled versions.
5450 int i;
5451 for (i = 0; i < MAX_MB_PLANE; i++)
5452 xd->plane[i].pre[ref] = backup_yv12[ref][i];
5454 av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
5455 #if CONFIG_EXT_INTER && !CONFIG_CB4X4
5456 if (bsize >= BLOCK_8X8)
5457 #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
5458 *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
5459 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
5460 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
5461 #if CONFIG_EXT_INTER && !CONFIG_CB4X4
5462 else
5463 *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
5464 &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
5465 x->mvcost, MV_COST_WEIGHT);
5466 #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
5470 static void estimate_ref_frame_costs(const AV1_COMMON *cm,
5471 const MACROBLOCKD *xd, int segment_id,
5472 unsigned int *ref_costs_single,
5473 unsigned int *ref_costs_comp,
5474 aom_prob *comp_mode_p) {
5475 int seg_ref_active =
5476 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
5477 if (seg_ref_active) {
5478 memset(ref_costs_single, 0,
5479 TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single));
5480 memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp));
5481 *comp_mode_p = 128;
5482 } else {
5483 aom_prob intra_inter_p = av1_get_intra_inter_prob(cm, xd);
5484 aom_prob comp_inter_p = 128;
5486 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
5487 comp_inter_p = av1_get_reference_mode_prob(cm, xd);
5488 *comp_mode_p = comp_inter_p;
5489 } else {
5490 *comp_mode_p = 128;
5493 ref_costs_single[INTRA_FRAME] = av1_cost_bit(intra_inter_p, 0);
5495 if (cm->reference_mode != COMPOUND_REFERENCE) {
5496 aom_prob ref_single_p1 = av1_get_pred_prob_single_ref_p1(cm, xd);
5497 aom_prob ref_single_p2 = av1_get_pred_prob_single_ref_p2(cm, xd);
5498 #if CONFIG_EXT_REFS
5499 aom_prob ref_single_p3 = av1_get_pred_prob_single_ref_p3(cm, xd);
5500 aom_prob ref_single_p4 = av1_get_pred_prob_single_ref_p4(cm, xd);
5501 aom_prob ref_single_p5 = av1_get_pred_prob_single_ref_p5(cm, xd);
5502 #endif // CONFIG_EXT_REFS
5504 unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
5506 ref_costs_single[LAST_FRAME] =
5507 #if CONFIG_EXT_REFS
5508 ref_costs_single[LAST2_FRAME] = ref_costs_single[LAST3_FRAME] =
5509 ref_costs_single[BWDREF_FRAME] =
5510 #endif // CONFIG_EXT_REFS
5511 ref_costs_single[GOLDEN_FRAME] =
5512 ref_costs_single[ALTREF_FRAME] = base_cost;
5514 #if CONFIG_EXT_REFS
5515 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
5516 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p1, 0);
5517 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p1, 0);
5518 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 0);
5519 ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
5520 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
5522 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p3, 0);
5523 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p3, 0);
5524 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p3, 1);
5525 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p3, 1);
5527 ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p2, 0);
5528 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
5530 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p4, 0);
5531 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p4, 1);
5533 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0);
5534 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1);
5535 #else
5536 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
5537 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 1);
5538 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
5540 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p2, 0);
5541 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
5542 #endif // CONFIG_EXT_REFS
5543 } else {
5544 ref_costs_single[LAST_FRAME] = 512;
5545 #if CONFIG_EXT_REFS
5546 ref_costs_single[LAST2_FRAME] = 512;
5547 ref_costs_single[LAST3_FRAME] = 512;
5548 ref_costs_single[BWDREF_FRAME] = 512;
5549 #endif // CONFIG_EXT_REFS
5550 ref_costs_single[GOLDEN_FRAME] = 512;
5551 ref_costs_single[ALTREF_FRAME] = 512;
5554 if (cm->reference_mode != SINGLE_REFERENCE) {
5555 aom_prob ref_comp_p = av1_get_pred_prob_comp_ref_p(cm, xd);
5556 #if CONFIG_EXT_REFS
5557 aom_prob ref_comp_p1 = av1_get_pred_prob_comp_ref_p1(cm, xd);
5558 aom_prob ref_comp_p2 = av1_get_pred_prob_comp_ref_p2(cm, xd);
5559 aom_prob bwdref_comp_p = av1_get_pred_prob_comp_bwdref_p(cm, xd);
5560 #endif // CONFIG_EXT_REFS
5562 unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
5564 ref_costs_comp[LAST_FRAME] =
5565 #if CONFIG_EXT_REFS
5566 ref_costs_comp[LAST2_FRAME] = ref_costs_comp[LAST3_FRAME] =
5567 #endif // CONFIG_EXT_REFS
5568 ref_costs_comp[GOLDEN_FRAME] = base_cost;
5570 #if CONFIG_EXT_REFS
5571 ref_costs_comp[BWDREF_FRAME] = ref_costs_comp[ALTREF_FRAME] = 0;
5572 #endif // CONFIG_EXT_REFS
5574 #if CONFIG_EXT_REFS
5575 ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
5576 ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
5577 ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
5578 ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
5580 ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 1);
5581 ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 0);
5583 ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
5584 ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
5586 // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
5587 // more bit.
5588 ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
5589 ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
5590 #else
5591 ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
5592 ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
5593 #endif // CONFIG_EXT_REFS
5594 } else {
5595 ref_costs_comp[LAST_FRAME] = 512;
5596 #if CONFIG_EXT_REFS
5597 ref_costs_comp[LAST2_FRAME] = 512;
5598 ref_costs_comp[LAST3_FRAME] = 512;
5599 ref_costs_comp[BWDREF_FRAME] = 512;
5600 ref_costs_comp[ALTREF_FRAME] = 512;
5601 #endif // CONFIG_EXT_REFS
5602 ref_costs_comp[GOLDEN_FRAME] = 512;
5607 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
5608 int mode_index,
5609 int64_t comp_pred_diff[REFERENCE_MODES],
5610 int skippable) {
5611 MACROBLOCKD *const xd = &x->e_mbd;
5613 // Take a snapshot of the coding context so it can be
5614 // restored if we decide to encode this way
5615 ctx->skip = x->skip;
5616 ctx->skippable = skippable;
5617 ctx->best_mode_index = mode_index;
5618 ctx->mic = *xd->mi[0];
5619 ctx->mbmi_ext = *x->mbmi_ext;
5620 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
5621 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
5622 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
5625 static void setup_buffer_inter(
5626 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
5627 BLOCK_SIZE block_size, int mi_row, int mi_col,
5628 int_mv frame_nearest_mv[TOTAL_REFS_PER_FRAME],
5629 int_mv frame_near_mv[TOTAL_REFS_PER_FRAME],
5630 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]) {
5631 const AV1_COMMON *cm = &cpi->common;
5632 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
5633 MACROBLOCKD *const xd = &x->e_mbd;
5634 MODE_INFO *const mi = xd->mi[0];
5635 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
5636 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
5637 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
5639 assert(yv12 != NULL);
5641 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
5642 // use the UV scaling factors.
5643 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
5645 // Gets an initial list of candidate vectors from neighbours and orders them
5646 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
5647 mbmi_ext->ref_mv_stack[ref_frame],
5648 #if CONFIG_EXT_INTER
5649 mbmi_ext->compound_mode_context,
5650 #endif // CONFIG_EXT_INTER
5651 candidates, mi_row, mi_col, NULL, NULL,
5652 mbmi_ext->mode_context);
5654 // Candidate refinement carried out at encoder and decoder
5655 av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
5656 &frame_nearest_mv[ref_frame],
5657 &frame_near_mv[ref_frame]);
5659 // Further refinement that is encode side only to test the top few candidates
5660 // in full and choose the best as the centre point for subsequent searches.
5661 // The current implementation doesn't support scaling.
5662 #if CONFIG_CB4X4
5663 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
5664 block_size);
5665 #else
5666 if (!av1_is_scaled(sf) && block_size >= BLOCK_8X8)
5667 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
5668 block_size);
5669 #endif // CONFIG_CB4X4
5672 static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
5673 BLOCK_SIZE bsize, int mi_row, int mi_col,
5674 #if CONFIG_EXT_INTER
5675 int ref_idx,
5676 #endif // CONFIG_EXT_INTER
5677 int *rate_mv) {
5678 MACROBLOCKD *xd = &x->e_mbd;
5679 const AV1_COMMON *cm = &cpi->common;
5680 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
5681 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
5682 int bestsme = INT_MAX;
5683 int step_param;
5684 int sadpb = x->sadperbit16;
5685 MV mvp_full;
5686 #if CONFIG_EXT_INTER
5687 int ref = mbmi->ref_frame[ref_idx];
5688 #else
5689 int ref = mbmi->ref_frame[0];
5690 int ref_idx = 0;
5691 #endif // CONFIG_EXT_INTER
5692 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
5694 MvLimits tmp_mv_limits = x->mv_limits;
5695 int cost_list[5];
5697 const YV12_BUFFER_CONFIG *scaled_ref_frame =
5698 av1_get_scaled_ref_frame(cpi, ref);
5700 MV pred_mv[3];
5701 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
5702 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
5703 pred_mv[2] = x->pred_mv[ref];
5705 if (scaled_ref_frame) {
5706 int i;
5707 // Swap out the reference frame for a version that's been scaled to
5708 // match the resolution of the current frame, allowing the existing
5709 // motion search code to be used without additional modifications.
5710 for (i = 0; i < MAX_MB_PLANE; i++)
5711 backup_yv12[i] = xd->plane[i].pre[ref_idx];
5713 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
5716 av1_set_mv_search_range(&x->mv_limits, &ref_mv);
5718 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
5720 // Work out the size of the first step in the mv step search.
5721 // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
5722 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
5723 // Take wtd average of the step_params based on the last frame's
5724 // max mv magnitude and that based on the best ref mvs of the current
5725 // block for the given reference.
5726 step_param =
5727 (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
5729 } else {
5730 step_param = cpi->mv_step_param;
5733 if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
5734 int boffset =
5735 2 * (b_width_log2_lookup[cm->sb_size] -
5736 AOMMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
5737 step_param = AOMMAX(step_param, boffset);
5740 if (cpi->sf.adaptive_motion_search) {
5741 int bwl = b_width_log2_lookup[bsize];
5742 int bhl = b_height_log2_lookup[bsize];
5743 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
5745 if (tlevel < 5) step_param += 2;
5747 // prev_mv_sad is not setup for dynamically scaled frames.
5748 if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
5749 int i;
5750 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
5751 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
5752 x->pred_mv[ref].row = 0;
5753 x->pred_mv[ref].col = 0;
5754 x->best_mv.as_int = INVALID_MV;
5756 if (scaled_ref_frame) {
5757 int j;
5758 for (j = 0; j < MAX_MB_PLANE; ++j)
5759 xd->plane[j].pre[ref_idx] = backup_yv12[j];
5761 return;
5767 av1_set_mv_search_range(&x->mv_limits, &ref_mv);
5769 #if CONFIG_MOTION_VAR
5770 if (mbmi->motion_mode != SIMPLE_TRANSLATION)
5771 mvp_full = mbmi->mv[0].as_mv;
5772 else
5773 #endif // CONFIG_MOTION_VAR
5774 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
5776 mvp_full.col >>= 3;
5777 mvp_full.row >>= 3;
5779 x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
5781 #if CONFIG_MOTION_VAR
5782 switch (mbmi->motion_mode) {
5783 case SIMPLE_TRANSLATION:
5784 #endif // CONFIG_MOTION_VAR
5785 bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
5786 sadpb, cond_cost_list(cpi, cost_list),
5787 &ref_mv, INT_MAX, 1);
5788 #if CONFIG_MOTION_VAR
5789 break;
5790 case OBMC_CAUSAL:
5791 bestsme = av1_obmc_full_pixel_diamond(
5792 cpi, x, &mvp_full, step_param, sadpb,
5793 MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
5794 &(x->best_mv.as_mv), 0);
5795 break;
5796 default: assert("Invalid motion mode!\n");
5798 #endif // CONFIG_MOTION_VAR
5800 x->mv_limits = tmp_mv_limits;
5802 if (bestsme < INT_MAX) {
5803 int dis; /* TODO: use dis in distortion calculation later. */
5804 #if CONFIG_MOTION_VAR
5805 switch (mbmi->motion_mode) {
5806 case SIMPLE_TRANSLATION:
5807 #endif // CONFIG_MOTION_VAR
5808 if (cpi->sf.use_upsampled_references) {
5809 int best_mv_var;
5810 const int try_second = x->second_best_mv.as_int != INVALID_MV &&
5811 x->second_best_mv.as_int != x->best_mv.as_int;
5812 const int pw = block_size_wide[bsize];
5813 const int ph = block_size_high[bsize];
5814 // Use up-sampled reference frames.
5815 struct macroblockd_plane *const pd = &xd->plane[0];
5816 struct buf_2d backup_pred = pd->pre[ref_idx];
5817 const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
5819 // Set pred for Y plane
5820 setup_pred_plane(
5821 &pd->pre[ref_idx], bsize, upsampled_ref->y_buffer,
5822 upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
5823 upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL,
5824 pd->subsampling_x, pd->subsampling_y);
5826 best_mv_var = cpi->find_fractional_mv_step(
5827 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
5828 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
5829 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
5830 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL,
5831 #if CONFIG_EXT_INTER
5832 NULL, 0, 0,
5833 #endif
5834 pw, ph, 1);
5836 if (try_second) {
5837 const int minc =
5838 AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX);
5839 const int maxc =
5840 AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX);
5841 const int minr =
5842 AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX);
5843 const int maxr =
5844 AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX);
5845 int this_var;
5846 MV best_mv = x->best_mv.as_mv;
5848 x->best_mv = x->second_best_mv;
5849 if (x->best_mv.as_mv.row * 8 <= maxr &&
5850 x->best_mv.as_mv.row * 8 >= minr &&
5851 x->best_mv.as_mv.col * 8 <= maxc &&
5852 x->best_mv.as_mv.col * 8 >= minc) {
5853 this_var = cpi->find_fractional_mv_step(
5854 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
5855 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
5856 cpi->sf.mv.subpel_iters_per_step,
5857 cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
5858 &dis, &x->pred_sse[ref], NULL,
5859 #if CONFIG_EXT_INTER
5860 NULL, 0, 0,
5861 #endif
5862 pw, ph, 1);
5863 if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
5864 x->best_mv.as_mv = best_mv;
5868 // Restore the reference frames.
5869 pd->pre[ref_idx] = backup_pred;
5870 } else {
5871 cpi->find_fractional_mv_step(
5872 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
5873 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
5874 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
5875 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL,
5876 #if CONFIG_EXT_INTER
5877 NULL, 0, 0,
5878 #endif
5879 0, 0, 0);
5881 #if CONFIG_MOTION_VAR
5882 break;
5883 case OBMC_CAUSAL:
5884 av1_find_best_obmc_sub_pixel_tree_up(
5885 cpi, x, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv,
5886 cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
5887 cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
5888 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0,
5889 cpi->sf.use_upsampled_references);
5890 break;
5891 default: assert("Invalid motion mode!\n");
5893 #endif // CONFIG_MOTION_VAR
5895 *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost,
5896 x->mvcost, MV_COST_WEIGHT);
5898 #if CONFIG_MOTION_VAR
5899 if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION)
5900 #else
5901 if (cpi->sf.adaptive_motion_search)
5902 #endif // CONFIG_MOTION_VAR
5903 x->pred_mv[ref] = x->best_mv.as_mv;
5905 if (scaled_ref_frame) {
5906 int i;
5907 for (i = 0; i < MAX_MB_PLANE; i++)
5908 xd->plane[i].pre[ref_idx] = backup_yv12[i];
5912 static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst) {
5913 int i;
5914 for (i = 0; i < MAX_MB_PLANE; i++) {
5915 xd->plane[i].dst.buf = dst.plane[i];
5916 xd->plane[i].dst.stride = dst.stride[i];
5920 #if CONFIG_EXT_INTER
5921 static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
5922 BLOCK_SIZE bsize, const MV *other_mv,
5923 int mi_row, int mi_col, const int block,
5924 int ref_idx, uint8_t *second_pred) {
5925 const AV1_COMMON *const cm = &cpi->common;
5926 const int pw = block_size_wide[bsize];
5927 const int ph = block_size_high[bsize];
5928 MACROBLOCKD *xd = &x->e_mbd;
5929 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
5930 const int other_ref = mbmi->ref_frame[!ref_idx];
5931 #if CONFIG_DUAL_FILTER
5932 InterpFilter interp_filter[2] = {
5933 (ref_idx == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0],
5934 (ref_idx == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1]
5936 #else
5937 const InterpFilter interp_filter = mbmi->interp_filter;
5938 #endif // CONFIG_DUAL_FILTER
5939 struct scale_factors sf;
5940 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5941 struct macroblockd_plane *const pd = &xd->plane[0];
5942 // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
5943 const int ic = block & 1;
5944 const int ir = (block - ic) >> 1;
5945 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
5946 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
5947 #if CONFIG_GLOBAL_MOTION
5948 WarpedMotionParams *const wm = &xd->global_motion[other_ref];
5949 int is_global = is_global_mv_block(xd->mi[0], block, wm->wmtype);
5950 #endif // CONFIG_GLOBAL_MOTION
5951 #else
5952 (void)block;
5953 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5955 // This function should only ever be called for compound modes
5956 assert(has_second_ref(mbmi));
5958 struct buf_2d backup_yv12[MAX_MB_PLANE];
5959 const YV12_BUFFER_CONFIG *const scaled_ref_frame =
5960 av1_get_scaled_ref_frame(cpi, other_ref);
5962 if (scaled_ref_frame) {
5963 int i;
5964 // Swap out the reference frame for a version that's been scaled to
5965 // match the resolution of the current frame, allowing the existing
5966 // motion search code to be used without additional modifications.
5967 for (i = 0; i < MAX_MB_PLANE; i++)
5968 backup_yv12[i] = xd->plane[i].pre[!ref_idx];
5969 av1_setup_pre_planes(xd, !ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
5972 // Since we have scaled the reference frames to match the size of the current
5973 // frame we must use a unit scaling factor during mode selection.
5974 #if CONFIG_HIGHBITDEPTH
5975 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5976 cm->height, cm->use_highbitdepth);
5977 #else
5978 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5979 cm->height);
5980 #endif // CONFIG_HIGHBITDEPTH
5982 struct buf_2d ref_yv12;
5984 const int plane = 0;
5985 ConvolveParams conv_params = get_conv_params(0, plane);
5986 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5987 WarpTypesAllowed warp_types;
5988 #if CONFIG_GLOBAL_MOTION
5989 warp_types.global_warp_allowed = is_global;
5990 #endif // CONFIG_GLOBAL_MOTION
5991 #if CONFIG_WARPED_MOTION
5992 warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
5993 #endif // CONFIG_WARPED_MOTION
5994 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5996 // Initialized here because of compiler problem in Visual Studio.
5997 ref_yv12 = xd->plane[plane].pre[!ref_idx];
5999 // Get the prediction block from the 'other' reference frame.
6000 #if CONFIG_HIGHBITDEPTH
6001 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6002 av1_highbd_build_inter_predictor(
6003 ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
6004 0, interp_filter,
6005 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6006 &warp_types, p_col, p_row,
6007 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6008 plane, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
6009 } else {
6010 #endif // CONFIG_HIGHBITDEPTH
6011 av1_build_inter_predictor(
6012 ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
6013 &conv_params, interp_filter,
6014 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6015 &warp_types, p_col, p_row, plane, !ref_idx,
6016 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6017 MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
6018 #if CONFIG_HIGHBITDEPTH
6020 #endif // CONFIG_HIGHBITDEPTH
6022 if (scaled_ref_frame) {
6023 // Restore the prediction frame pointers to their unscaled versions.
6024 int i;
6025 for (i = 0; i < MAX_MB_PLANE; i++)
6026 xd->plane[i].pre[!ref_idx] = backup_yv12[i];
6030 // Search for the best mv for one component of a compound,
6031 // given that the other component is fixed.
6032 static void compound_single_motion_search(
6033 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *this_mv,
6034 int mi_row, int mi_col, const uint8_t *second_pred, const uint8_t *mask,
6035 int mask_stride, int *rate_mv, const int block, int ref_idx) {
6036 const int pw = block_size_wide[bsize];
6037 const int ph = block_size_high[bsize];
6038 MACROBLOCKD *xd = &x->e_mbd;
6039 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6040 const int ref = mbmi->ref_frame[ref_idx];
6041 int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0];
6042 struct macroblockd_plane *const pd = &xd->plane[0];
6044 struct buf_2d backup_yv12[MAX_MB_PLANE];
6045 const YV12_BUFFER_CONFIG *const scaled_ref_frame =
6046 av1_get_scaled_ref_frame(cpi, ref);
6048 // Check that this is either an interinter or an interintra block
6049 assert(has_second_ref(mbmi) ||
6050 (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
6052 if (scaled_ref_frame) {
6053 int i;
6054 // Swap out the reference frame for a version that's been scaled to
6055 // match the resolution of the current frame, allowing the existing
6056 // motion search code to be used without additional modifications.
6057 for (i = 0; i < MAX_MB_PLANE; i++)
6058 backup_yv12[i] = xd->plane[i].pre[ref_idx];
6059 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
6062 struct buf_2d orig_yv12;
6063 int bestsme = INT_MAX;
6064 int sadpb = x->sadperbit16;
6065 MV *const best_mv = &x->best_mv.as_mv;
6066 int search_range = 3;
6068 MvLimits tmp_mv_limits = x->mv_limits;
6070 // Initialized here because of compiler problem in Visual Studio.
6071 if (ref_idx) {
6072 orig_yv12 = pd->pre[0];
6073 pd->pre[0] = pd->pre[ref_idx];
6076 // Do compound motion search on the current reference frame.
6077 av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv);
6079 // Use the mv result from the single mode as mv predictor.
6080 *best_mv = *this_mv;
6082 best_mv->col >>= 3;
6083 best_mv->row >>= 3;
6085 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
6087 // Small-range full-pixel motion search.
6088 bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
6089 &cpi->fn_ptr[bsize], mask, mask_stride,
6090 ref_idx, &ref_mv.as_mv, second_pred);
6091 if (bestsme < INT_MAX) {
6092 if (mask)
6093 bestsme =
6094 av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask,
6095 mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1);
6096 else
6097 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred,
6098 &cpi->fn_ptr[bsize], 1);
6101 x->mv_limits = tmp_mv_limits;
6103 if (bestsme < INT_MAX) {
6104 int dis; /* TODO: use dis in distortion calculation later. */
6105 unsigned int sse;
6106 if (cpi->sf.use_upsampled_references) {
6107 // Use up-sampled reference frames.
6108 struct buf_2d backup_pred = pd->pre[0];
6109 const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
6111 // Set pred for Y plane
6112 setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
6113 upsampled_ref->y_crop_width,
6114 upsampled_ref->y_crop_height, upsampled_ref->y_stride,
6115 (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
6116 pd->subsampling_y);
6118 // If bsize < BLOCK_8X8, adjust pred pointer for this block
6119 #if !CONFIG_CB4X4
6120 if (bsize < BLOCK_8X8)
6121 pd->pre[0].buf =
6122 &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
6123 pd->pre[0].stride))
6124 << 3];
6125 #endif // !CONFIG_CB4X4
6127 bestsme = cpi->find_fractional_mv_step(
6128 x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
6129 &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
6130 x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
6131 mask_stride, ref_idx, pw, ph, 1);
6133 // Restore the reference frames.
6134 pd->pre[0] = backup_pred;
6135 } else {
6136 (void)block;
6137 bestsme = cpi->find_fractional_mv_step(
6138 x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
6139 &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
6140 x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
6141 mask_stride, ref_idx, pw, ph, 0);
6145 // Restore the pointer to the first (possibly scaled) prediction buffer.
6146 if (ref_idx) pd->pre[0] = orig_yv12;
6148 if (bestsme < INT_MAX) *this_mv = *best_mv;
6150 *rate_mv = 0;
6152 if (scaled_ref_frame) {
6153 // Restore the prediction frame pointers to their unscaled versions.
6154 int i;
6155 for (i = 0; i < MAX_MB_PLANE; i++)
6156 xd->plane[i].pre[ref_idx] = backup_yv12[i];
6159 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
6160 *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
6161 x->mvcost, MV_COST_WEIGHT);
6164 // Wrapper for compound_single_motion_search, for the common case
6165 // where the second prediction is also an inter mode.
6166 static void compound_single_motion_search_interinter(
6167 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv,
6168 int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
6169 const int block, int ref_idx) {
6170 MACROBLOCKD *xd = &x->e_mbd;
6171 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6173 // This function should only ever be called for compound modes
6174 assert(has_second_ref(mbmi));
6176 // Prediction buffer from second frame.
6177 #if CONFIG_HIGHBITDEPTH
6178 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
6179 uint8_t *second_pred;
6180 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
6181 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
6182 else
6183 second_pred = (uint8_t *)second_pred_alloc_16;
6184 #else
6185 DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
6186 #endif // CONFIG_HIGHBITDEPTH
6188 MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv;
6189 const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv;
6191 build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
6192 ref_idx, second_pred);
6194 compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
6195 second_pred, mask, mask_stride, rate_mv, block,
6196 ref_idx);
6199 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6200 static void do_masked_motion_search_indexed(
6201 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
6202 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize,
6203 int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) {
6204 // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
6205 MACROBLOCKD *xd = &x->e_mbd;
6206 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6207 BLOCK_SIZE sb_type = mbmi->sb_type;
6208 const uint8_t *mask;
6209 const int mask_stride = block_size_wide[bsize];
6211 mask = av1_get_compound_type_mask(comp_data, sb_type);
6213 int_mv frame_mv[TOTAL_REFS_PER_FRAME];
6214 MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
6215 assert(bsize >= BLOCK_8X8 || CONFIG_CB4X4);
6217 frame_mv[rf[0]].as_int = cur_mv[0].as_int;
6218 frame_mv[rf[1]].as_int = cur_mv[1].as_int;
6219 if (which == 0 || which == 1) {
6220 compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row,
6221 mi_col, mask, mask_stride, rate_mv,
6222 0, which);
6223 } else if (which == 2) {
6224 joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask,
6225 mask_stride, rate_mv, 0);
6227 tmp_mv[0].as_int = frame_mv[rf[0]].as_int;
6228 tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
6230 #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6231 #endif // CONFIG_EXT_INTER
6233 // In some situations we want to discount tha pparent cost of a new motion
6234 // vector. Where there is a subtle motion field and especially where there is
6235 // low spatial complexity then it can be hard to cover the cost of a new motion
6236 // vector in a single block, even if that motion vector reduces distortion.
6237 // However, once established that vector may be usable through the nearest and
6238 // near mv modes to reduce distortion in subsequent blocks and also improve
6239 // visual quality.
6240 static int discount_newmv_test(const AV1_COMP *const cpi, int this_mode,
6241 int_mv this_mv,
6242 int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
6243 int ref_frame) {
6244 return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
6245 (this_mv.as_int != 0) &&
6246 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
6247 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
6248 ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
6249 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
6252 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
6253 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
6255 // TODO(jingning): this mv clamping function should be block size dependent.
6256 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
6257 clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
6258 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
6259 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
6260 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
6263 #if CONFIG_EXT_INTER
6264 #if CONFIG_WEDGE
6265 static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
6266 const BLOCK_SIZE bsize, const uint8_t *pred0,
6267 int stride0, const uint8_t *pred1, int stride1) {
6268 const struct macroblock_plane *const p = &x->plane[0];
6269 const uint8_t *src = p->src.buf;
6270 int src_stride = p->src.stride;
6271 const int f_index = bsize - BLOCK_8X8;
6272 const int bw = block_size_wide[bsize];
6273 const int bh = block_size_high[bsize];
6274 uint32_t esq[2][4];
6275 int64_t tl, br;
6277 #if CONFIG_HIGHBITDEPTH
6278 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6279 pred0 = CONVERT_TO_BYTEPTR(pred0);
6280 pred1 = CONVERT_TO_BYTEPTR(pred1);
6282 #endif // CONFIG_HIGHBITDEPTH
6284 cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
6285 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0,
6286 &esq[0][1]);
6287 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
6288 pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
6289 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
6290 pred0 + bh / 2 * stride0 + bw / 2, stride0,
6291 &esq[0][3]);
6292 cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
6293 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1,
6294 &esq[1][1]);
6295 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
6296 pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
6297 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
6298 pred1 + bh / 2 * stride1 + bw / 2, stride0,
6299 &esq[1][3]);
6301 tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
6302 (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
6303 br = (int64_t)(esq[1][3] + esq[1][1] + esq[1][2]) -
6304 (int64_t)(esq[0][3] + esq[0][1] + esq[0][2]);
6305 return (tl + br > 0);
6307 #endif // CONFIG_WEDGE
6308 #endif // CONFIG_EXT_INTER
6310 #if !CONFIG_DUAL_FILTER
6311 static InterpFilter predict_interp_filter(
6312 const AV1_COMP *cpi, const MACROBLOCK *x, const BLOCK_SIZE bsize,
6313 const int mi_row, const int mi_col,
6314 InterpFilter (*single_filter)[TOTAL_REFS_PER_FRAME]) {
6315 InterpFilter best_filter = SWITCHABLE;
6316 const AV1_COMMON *cm = &cpi->common;
6317 const MACROBLOCKD *xd = &x->e_mbd;
6318 int bsl = mi_width_log2_lookup[bsize];
6319 int pred_filter_search =
6320 cpi->sf.cb_pred_filter_search
6321 ? (((mi_row + mi_col) >> bsl) +
6322 get_chessboard_index(cm->current_video_frame)) &
6324 : 0;
6325 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6326 const int is_comp_pred = has_second_ref(mbmi);
6327 const int this_mode = mbmi->mode;
6328 int refs[2] = { mbmi->ref_frame[0],
6329 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
6330 if (pred_filter_search) {
6331 InterpFilter af = SWITCHABLE, lf = SWITCHABLE;
6332 if (xd->up_available) af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
6333 if (xd->left_available) lf = xd->mi[-1]->mbmi.interp_filter;
6335 #if CONFIG_EXT_INTER
6336 if ((this_mode != NEWMV && this_mode != NEW_NEWMV) || (af == lf))
6337 #else
6338 if ((this_mode != NEWMV) || (af == lf))
6339 #endif // CONFIG_EXT_INTER
6340 best_filter = af;
6342 if (is_comp_pred) {
6343 if (cpi->sf.adaptive_mode_search) {
6344 #if CONFIG_EXT_INTER
6345 switch (this_mode) {
6346 case NEAREST_NEARESTMV:
6347 if (single_filter[NEARESTMV][refs[0]] ==
6348 single_filter[NEARESTMV][refs[1]])
6349 best_filter = single_filter[NEARESTMV][refs[0]];
6350 break;
6351 case NEAR_NEARMV:
6352 if (single_filter[NEARMV][refs[0]] == single_filter[NEARMV][refs[1]])
6353 best_filter = single_filter[NEARMV][refs[0]];
6354 break;
6355 case ZERO_ZEROMV:
6356 if (single_filter[ZEROMV][refs[0]] == single_filter[ZEROMV][refs[1]])
6357 best_filter = single_filter[ZEROMV][refs[0]];
6358 break;
6359 case NEW_NEWMV:
6360 if (single_filter[NEWMV][refs[0]] == single_filter[NEWMV][refs[1]])
6361 best_filter = single_filter[NEWMV][refs[0]];
6362 break;
6363 case NEAREST_NEWMV:
6364 if (single_filter[NEARESTMV][refs[0]] ==
6365 single_filter[NEWMV][refs[1]])
6366 best_filter = single_filter[NEARESTMV][refs[0]];
6367 break;
6368 case NEAR_NEWMV:
6369 if (single_filter[NEARMV][refs[0]] == single_filter[NEWMV][refs[1]])
6370 best_filter = single_filter[NEARMV][refs[0]];
6371 break;
6372 case NEW_NEARESTMV:
6373 if (single_filter[NEWMV][refs[0]] ==
6374 single_filter[NEARESTMV][refs[1]])
6375 best_filter = single_filter[NEWMV][refs[0]];
6376 break;
6377 case NEW_NEARMV:
6378 if (single_filter[NEWMV][refs[0]] == single_filter[NEARMV][refs[1]])
6379 best_filter = single_filter[NEWMV][refs[0]];
6380 break;
6381 default:
6382 if (single_filter[this_mode][refs[0]] ==
6383 single_filter[this_mode][refs[1]])
6384 best_filter = single_filter[this_mode][refs[0]];
6385 break;
6387 #else
6388 if (single_filter[this_mode][refs[0]] ==
6389 single_filter[this_mode][refs[1]])
6390 best_filter = single_filter[this_mode][refs[0]];
6391 #endif // CONFIG_EXT_INTER
6394 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
6395 best_filter = EIGHTTAP_REGULAR;
6397 return best_filter;
6399 #endif // !CONFIG_DUAL_FILTER
6401 #if CONFIG_EXT_INTER
6402 // Choose the best wedge index and sign
6403 #if CONFIG_WEDGE
6404 static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
6405 const BLOCK_SIZE bsize, const uint8_t *const p0,
6406 const uint8_t *const p1, int *const best_wedge_sign,
6407 int *const best_wedge_index) {
6408 const MACROBLOCKD *const xd = &x->e_mbd;
6409 const struct buf_2d *const src = &x->plane[0].src;
6410 const int bw = block_size_wide[bsize];
6411 const int bh = block_size_high[bsize];
6412 const int N = bw * bh;
6413 int rate;
6414 int64_t dist;
6415 int64_t rd, best_rd = INT64_MAX;
6416 int wedge_index;
6417 int wedge_sign;
6418 int wedge_types = (1 << get_wedge_bits_lookup(bsize));
6419 const uint8_t *mask;
6420 uint64_t sse;
6421 #if CONFIG_HIGHBITDEPTH
6422 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
6423 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
6424 #else
6425 const int bd_round = 0;
6426 #endif // CONFIG_HIGHBITDEPTH
6428 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
6429 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
6430 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
6431 DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
6433 int64_t sign_limit;
6435 #if CONFIG_HIGHBITDEPTH
6436 if (hbd) {
6437 aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
6438 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6439 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
6440 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
6441 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
6442 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6443 } else // NOLINT
6444 #endif // CONFIG_HIGHBITDEPTH
6446 aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
6447 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
6448 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
6451 sign_limit = ((int64_t)aom_sum_squares_i16(r0, N) -
6452 (int64_t)aom_sum_squares_i16(r1, N)) *
6453 (1 << WEDGE_WEIGHT_BITS) / 2;
6455 if (N < 64)
6456 av1_wedge_compute_delta_squares_c(ds, r0, r1, N);
6457 else
6458 av1_wedge_compute_delta_squares(ds, r0, r1, N);
6460 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
6461 mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
6463 // TODO(jingning): Make sse2 functions support N = 16 case
6464 if (N < 64)
6465 wedge_sign = av1_wedge_sign_from_residuals_c(ds, mask, N, sign_limit);
6466 else
6467 wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
6469 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
6470 if (N < 64)
6471 sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
6472 else
6473 sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
6474 sse = ROUND_POWER_OF_TWO(sse, bd_round);
6476 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
6477 rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
6479 if (rd < best_rd) {
6480 *best_wedge_index = wedge_index;
6481 *best_wedge_sign = wedge_sign;
6482 best_rd = rd;
6486 return best_rd;
6489 // Choose the best wedge index the specified sign
6490 static int64_t pick_wedge_fixed_sign(
6491 const AV1_COMP *const cpi, const MACROBLOCK *const x,
6492 const BLOCK_SIZE bsize, const uint8_t *const p0, const uint8_t *const p1,
6493 const int wedge_sign, int *const best_wedge_index) {
6494 const MACROBLOCKD *const xd = &x->e_mbd;
6495 const struct buf_2d *const src = &x->plane[0].src;
6496 const int bw = block_size_wide[bsize];
6497 const int bh = block_size_high[bsize];
6498 const int N = bw * bh;
6499 int rate;
6500 int64_t dist;
6501 int64_t rd, best_rd = INT64_MAX;
6502 int wedge_index;
6503 int wedge_types = (1 << get_wedge_bits_lookup(bsize));
6504 const uint8_t *mask;
6505 uint64_t sse;
6506 #if CONFIG_HIGHBITDEPTH
6507 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
6508 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
6509 #else
6510 const int bd_round = 0;
6511 #endif // CONFIG_HIGHBITDEPTH
6513 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
6514 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
6516 #if CONFIG_HIGHBITDEPTH
6517 if (hbd) {
6518 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
6519 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
6520 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
6521 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6522 } else // NOLINT
6523 #endif // CONFIG_HIGHBITDEPTH
6525 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
6526 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
6529 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
6530 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
6531 if (N < 64)
6532 sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
6533 else
6534 sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
6535 sse = ROUND_POWER_OF_TWO(sse, bd_round);
6537 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
6538 rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
6540 if (rd < best_rd) {
6541 *best_wedge_index = wedge_index;
6542 best_rd = rd;
6546 return best_rd;
6549 static int64_t pick_interinter_wedge(const AV1_COMP *const cpi,
6550 MACROBLOCK *const x,
6551 const BLOCK_SIZE bsize,
6552 const uint8_t *const p0,
6553 const uint8_t *const p1) {
6554 MACROBLOCKD *const xd = &x->e_mbd;
6555 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6556 const int bw = block_size_wide[bsize];
6558 int64_t rd;
6559 int wedge_index = -1;
6560 int wedge_sign = 0;
6562 assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
6563 assert(cpi->common.allow_masked_compound);
6565 if (cpi->sf.fast_wedge_sign_estimate) {
6566 wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
6567 rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, wedge_sign, &wedge_index);
6568 } else {
6569 rd = pick_wedge(cpi, x, bsize, p0, p1, &wedge_sign, &wedge_index);
6572 mbmi->wedge_sign = wedge_sign;
6573 mbmi->wedge_index = wedge_index;
6574 return rd;
6576 #endif // CONFIG_WEDGE
6578 #if CONFIG_COMPOUND_SEGMENT
6579 static int64_t pick_interinter_seg(const AV1_COMP *const cpi,
6580 MACROBLOCK *const x, const BLOCK_SIZE bsize,
6581 const uint8_t *const p0,
6582 const uint8_t *const p1) {
6583 MACROBLOCKD *const xd = &x->e_mbd;
6584 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6585 const struct buf_2d *const src = &x->plane[0].src;
6586 const int bw = block_size_wide[bsize];
6587 const int bh = block_size_high[bsize];
6588 const int N = bw * bh;
6589 int rate;
6590 uint64_t sse;
6591 int64_t dist;
6592 int64_t rd0;
6593 SEG_MASK_TYPE cur_mask_type;
6594 int64_t best_rd = INT64_MAX;
6595 SEG_MASK_TYPE best_mask_type = 0;
6596 #if CONFIG_HIGHBITDEPTH
6597 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
6598 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
6599 #else
6600 const int bd_round = 0;
6601 #endif // CONFIG_HIGHBITDEPTH
6602 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
6603 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
6604 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
6606 #if CONFIG_HIGHBITDEPTH
6607 if (hbd) {
6608 aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
6609 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6610 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
6611 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
6612 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
6613 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6614 } else // NOLINT
6615 #endif // CONFIG_HIGHBITDEPTH
6617 aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
6618 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
6619 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
6622 // try each mask type and its inverse
6623 for (cur_mask_type = 0; cur_mask_type < SEG_MASK_TYPES; cur_mask_type++) {
6624 // build mask and inverse
6625 #if CONFIG_HIGHBITDEPTH
6626 if (hbd)
6627 build_compound_seg_mask_highbd(
6628 xd->seg_mask, cur_mask_type, CONVERT_TO_BYTEPTR(p0), bw,
6629 CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
6630 else
6631 #endif // CONFIG_HIGHBITDEPTH
6632 build_compound_seg_mask(xd->seg_mask, cur_mask_type, p0, bw, p1, bw,
6633 bsize, bh, bw);
6635 // compute rd for mask
6636 sse = av1_wedge_sse_from_residuals(r1, d10, xd->seg_mask, N);
6637 sse = ROUND_POWER_OF_TWO(sse, bd_round);
6639 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
6640 rd0 = RDCOST(x->rdmult, x->rddiv, rate, dist);
6642 if (rd0 < best_rd) {
6643 best_mask_type = cur_mask_type;
6644 best_rd = rd0;
6648 // make final mask
6649 mbmi->mask_type = best_mask_type;
6650 #if CONFIG_HIGHBITDEPTH
6651 if (hbd)
6652 build_compound_seg_mask_highbd(
6653 xd->seg_mask, mbmi->mask_type, CONVERT_TO_BYTEPTR(p0), bw,
6654 CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
6655 else
6656 #endif // CONFIG_HIGHBITDEPTH
6657 build_compound_seg_mask(xd->seg_mask, mbmi->mask_type, p0, bw, p1, bw,
6658 bsize, bh, bw);
6660 return best_rd;
6662 #endif // CONFIG_COMPOUND_SEGMENT
6664 #if CONFIG_WEDGE && CONFIG_INTERINTRA
6665 static int64_t pick_interintra_wedge(const AV1_COMP *const cpi,
6666 const MACROBLOCK *const x,
6667 const BLOCK_SIZE bsize,
6668 const uint8_t *const p0,
6669 const uint8_t *const p1) {
6670 const MACROBLOCKD *const xd = &x->e_mbd;
6671 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6673 int64_t rd;
6674 int wedge_index = -1;
6676 assert(is_interintra_wedge_used(bsize));
6677 assert(cpi->common.allow_interintra_compound);
6679 rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, 0, &wedge_index);
6681 mbmi->interintra_wedge_sign = 0;
6682 mbmi->interintra_wedge_index = wedge_index;
6683 return rd;
6685 #endif // CONFIG_WEDGE && CONFIG_INTERINTRA
6687 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6688 static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x,
6689 const BLOCK_SIZE bsize,
6690 const uint8_t *const p0,
6691 const uint8_t *const p1) {
6692 const COMPOUND_TYPE compound_type =
6693 x->e_mbd.mi[0]->mbmi.interinter_compound_type;
6694 switch (compound_type) {
6695 #if CONFIG_WEDGE
6696 case COMPOUND_WEDGE: return pick_interinter_wedge(cpi, x, bsize, p0, p1);
6697 #endif // CONFIG_WEDGE
6698 #if CONFIG_COMPOUND_SEGMENT
6699 case COMPOUND_SEG: return pick_interinter_seg(cpi, x, bsize, p0, p1);
6700 #endif // CONFIG_COMPOUND_SEGMENT
6701 default: assert(0); return 0;
6705 static int interinter_compound_motion_search(
6706 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
6707 const BLOCK_SIZE bsize, const int this_mode, int mi_row, int mi_col) {
6708 MACROBLOCKD *const xd = &x->e_mbd;
6709 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6710 int_mv tmp_mv[2];
6711 int tmp_rate_mv = 0;
6712 const INTERINTER_COMPOUND_DATA compound_data = {
6713 #if CONFIG_WEDGE
6714 mbmi->wedge_index,
6715 mbmi->wedge_sign,
6716 #endif // CONFIG_WEDGE
6717 #if CONFIG_COMPOUND_SEGMENT
6718 mbmi->mask_type,
6719 xd->seg_mask,
6720 #endif // CONFIG_COMPOUND_SEGMENT
6721 mbmi->interinter_compound_type
6723 if (this_mode == NEW_NEWMV) {
6724 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
6725 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
6726 mbmi->mv[0].as_int = tmp_mv[0].as_int;
6727 mbmi->mv[1].as_int = tmp_mv[1].as_int;
6728 } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
6729 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
6730 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
6731 mbmi->mv[0].as_int = tmp_mv[0].as_int;
6732 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
6733 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
6734 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
6735 mbmi->mv[1].as_int = tmp_mv[1].as_int;
6737 return tmp_rate_mv;
6740 static int64_t build_and_cost_compound_type(
6741 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
6742 const BLOCK_SIZE bsize, const int this_mode, int rs2, int rate_mv,
6743 BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0, uint8_t **preds1,
6744 int *strides, int mi_row, int mi_col) {
6745 const AV1_COMMON *const cm = &cpi->common;
6746 MACROBLOCKD *xd = &x->e_mbd;
6747 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6748 int rate_sum;
6749 int64_t dist_sum;
6750 int64_t best_rd_cur = INT64_MAX;
6751 int64_t rd = INT64_MAX;
6752 int tmp_skip_txfm_sb;
6753 int64_t tmp_skip_sse_sb;
6754 const COMPOUND_TYPE compound_type = mbmi->interinter_compound_type;
6756 best_rd_cur = pick_interinter_mask(cpi, x, bsize, *preds0, *preds1);
6757 best_rd_cur += RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv, 0);
6759 if (have_newmv_in_inter_mode(this_mode) &&
6760 use_masked_motion_search(compound_type)) {
6761 *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize,
6762 this_mode, mi_row, mi_col);
6763 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
6764 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
6765 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
6766 rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum);
6767 if (rd >= best_rd_cur) {
6768 mbmi->mv[0].as_int = cur_mv[0].as_int;
6769 mbmi->mv[1].as_int = cur_mv[1].as_int;
6770 *out_rate_mv = rate_mv;
6771 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0,
6772 #if CONFIG_SUPERTX
6773 0, 0,
6774 #endif // CONFIG_SUPERTX
6775 preds0, strides, preds1,
6776 strides);
6778 av1_subtract_plane(x, bsize, 0);
6779 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
6780 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
6781 if (rd != INT64_MAX)
6782 rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum);
6783 best_rd_cur = rd;
6785 } else {
6786 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0,
6787 #if CONFIG_SUPERTX
6788 0, 0,
6789 #endif // CONFIG_SUPERTX
6790 preds0, strides, preds1, strides);
6791 av1_subtract_plane(x, bsize, 0);
6792 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
6793 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
6794 if (rd != INT64_MAX)
6795 rd = RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
6796 best_rd_cur = rd;
6798 return best_rd_cur;
6800 #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6801 #endif // CONFIG_EXT_INTER
6803 typedef struct {
6804 #if CONFIG_MOTION_VAR
6805 // Inter prediction buffers and respective strides
6806 uint8_t *above_pred_buf[MAX_MB_PLANE];
6807 int above_pred_stride[MAX_MB_PLANE];
6808 uint8_t *left_pred_buf[MAX_MB_PLANE];
6809 int left_pred_stride[MAX_MB_PLANE];
6810 #endif // CONFIG_MOTION_VAR
6811 int_mv *single_newmv;
6812 #if CONFIG_EXT_INTER
6813 // Pointer to array of motion vectors to use for each ref and their rates
6814 // Should point to first of 2 arrays in 2D array
6815 int *single_newmv_rate;
6816 // Pointer to array of predicted rate-distortion
6817 // Should point to first of 2 arrays in 2D array
6818 int64_t (*modelled_rd)[TOTAL_REFS_PER_FRAME];
6819 #endif // CONFIG_EXT_INTER
6820 InterpFilter single_filter[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
6821 } HandleInterModeArgs;
6823 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
6824 const BLOCK_SIZE bsize,
6825 int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME],
6826 const int mi_row, const int mi_col,
6827 int *const rate_mv, int_mv *const single_newmv,
6828 HandleInterModeArgs *const args) {
6829 const MACROBLOCKD *const xd = &x->e_mbd;
6830 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6831 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
6832 const int is_comp_pred = has_second_ref(mbmi);
6833 const PREDICTION_MODE this_mode = mbmi->mode;
6834 #if CONFIG_EXT_INTER
6835 const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
6836 #endif // CONFIG_EXT_INTER
6837 int_mv *const frame_mv = mode_mv[this_mode];
6838 const int refs[2] = { mbmi->ref_frame[0],
6839 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
6840 int i;
6842 (void)args;
6844 if (is_comp_pred) {
6845 #if CONFIG_EXT_INTER
6846 for (i = 0; i < 2; ++i) {
6847 single_newmv[refs[i]].as_int = args->single_newmv[refs[i]].as_int;
6850 if (this_mode == NEW_NEWMV) {
6851 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
6852 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
6854 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
6855 joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL,
6856 0, rate_mv, 0);
6857 } else {
6858 *rate_mv = 0;
6859 for (i = 0; i < 2; ++i) {
6860 av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
6861 *rate_mv += av1_mv_bit_cost(
6862 &frame_mv[refs[i]].as_mv, &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
6863 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
6866 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
6867 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
6868 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
6869 frame_mv[refs[0]].as_int =
6870 mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int;
6871 compound_single_motion_search_interinter(
6872 cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1);
6873 } else {
6874 av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx);
6875 *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv,
6876 &mbmi_ext->ref_mvs[refs[1]][0].as_mv,
6877 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
6879 } else {
6880 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
6881 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
6882 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
6883 frame_mv[refs[1]].as_int =
6884 mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int;
6885 compound_single_motion_search_interinter(
6886 cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0);
6887 } else {
6888 av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
6889 *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
6890 &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
6891 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
6894 #else
6895 // Initialize mv using single prediction mode result.
6896 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
6897 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
6899 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
6900 joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, rate_mv, 0);
6901 } else {
6902 *rate_mv = 0;
6903 for (i = 0; i < 2; ++i) {
6904 av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
6905 *rate_mv += av1_mv_bit_cost(&frame_mv[refs[i]].as_mv,
6906 &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
6907 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
6910 #endif // CONFIG_EXT_INTER
6911 } else {
6912 #if CONFIG_EXT_INTER
6913 if (is_comp_interintra_pred) {
6914 x->best_mv = args->single_newmv[refs[0]];
6915 *rate_mv = args->single_newmv_rate[refs[0]];
6916 } else {
6917 single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, rate_mv);
6918 args->single_newmv[refs[0]] = x->best_mv;
6919 args->single_newmv_rate[refs[0]] = *rate_mv;
6921 #else
6922 single_motion_search(cpi, x, bsize, mi_row, mi_col, rate_mv);
6923 single_newmv[refs[0]] = x->best_mv;
6924 #endif // CONFIG_EXT_INTER
6926 if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
6928 frame_mv[refs[0]] = x->best_mv;
6929 xd->mi[0]->bmi[0].as_mv[0] = x->best_mv;
6931 // Estimate the rate implications of a new mv but discount this
6932 // under certain circumstances where we want to help initiate a weak
6933 // motion field, where the distortion gain for a single block may not
6934 // be enough to overcome the cost of a new mv.
6935 if (discount_newmv_test(cpi, this_mode, x->best_mv, mode_mv, refs[0])) {
6936 *rate_mv = AOMMAX(*rate_mv / NEW_MV_DISCOUNT_FACTOR, 1);
6940 return 0;
6943 int64_t interpolation_filter_search(
6944 MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
6945 int mi_row, int mi_col, const BUFFER_SET *const tmp_dst,
6946 BUFFER_SET *const orig_dst,
6947 InterpFilter (*const single_filter)[TOTAL_REFS_PER_FRAME],
6948 int64_t *const rd, int *const switchable_rate, int *const skip_txfm_sb,
6949 int64_t *const skip_sse_sb) {
6950 const AV1_COMMON *cm = &cpi->common;
6951 MACROBLOCKD *const xd = &x->e_mbd;
6952 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6953 int i;
6954 int tmp_rate;
6955 int64_t tmp_dist;
6957 (void)single_filter;
6959 InterpFilter assign_filter = SWITCHABLE;
6961 if (cm->interp_filter == SWITCHABLE) {
6962 #if !CONFIG_DUAL_FILTER
6963 assign_filter = av1_is_interp_needed(xd)
6964 ? predict_interp_filter(cpi, x, bsize, mi_row, mi_col,
6965 single_filter)
6966 : cm->interp_filter;
6967 #endif // !CONFIG_DUAL_FILTER
6968 } else {
6969 assign_filter = cm->interp_filter;
6972 set_default_interp_filters(mbmi, assign_filter);
6974 *switchable_rate = av1_get_switchable_rate(cpi, xd);
6975 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
6976 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist,
6977 skip_txfm_sb, skip_sse_sb);
6978 *rd = RDCOST(x->rdmult, x->rddiv, *switchable_rate + tmp_rate, tmp_dist);
6980 if (assign_filter == SWITCHABLE) {
6981 // do interp_filter search
6982 if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd)) {
6983 #if CONFIG_DUAL_FILTER
6984 const int filter_set_size = DUAL_FILTER_SET_SIZE;
6985 #else
6986 const int filter_set_size = SWITCHABLE_FILTERS;
6987 #endif // CONFIG_DUAL_FILTER
6988 int best_in_temp = 0;
6989 #if CONFIG_DUAL_FILTER
6990 InterpFilter best_filter[4];
6991 av1_copy(best_filter, mbmi->interp_filter);
6992 #else
6993 InterpFilter best_filter = mbmi->interp_filter;
6994 #endif // CONFIG_DUAL_FILTER
6995 restore_dst_buf(xd, *tmp_dst);
6996 // EIGHTTAP_REGULAR mode is calculated beforehand
6997 for (i = 1; i < filter_set_size; ++i) {
6998 int tmp_skip_sb = 0;
6999 int64_t tmp_skip_sse = INT64_MAX;
7000 int tmp_rs;
7001 int64_t tmp_rd;
7002 #if CONFIG_DUAL_FILTER
7003 mbmi->interp_filter[0] = filter_sets[i][0];
7004 mbmi->interp_filter[1] = filter_sets[i][1];
7005 mbmi->interp_filter[2] = filter_sets[i][0];
7006 mbmi->interp_filter[3] = filter_sets[i][1];
7007 #else
7008 mbmi->interp_filter = (InterpFilter)i;
7009 #endif // CONFIG_DUAL_FILTER
7010 tmp_rs = av1_get_switchable_rate(cpi, xd);
7011 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
7012 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7013 &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
7014 tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rs + tmp_rate, tmp_dist);
7016 if (tmp_rd < *rd) {
7017 *rd = tmp_rd;
7018 *switchable_rate = av1_get_switchable_rate(cpi, xd);
7019 #if CONFIG_DUAL_FILTER
7020 av1_copy(best_filter, mbmi->interp_filter);
7021 #else
7022 best_filter = mbmi->interp_filter;
7023 #endif // CONFIG_DUAL_FILTER
7024 *skip_txfm_sb = tmp_skip_sb;
7025 *skip_sse_sb = tmp_skip_sse;
7026 best_in_temp = !best_in_temp;
7027 if (best_in_temp) {
7028 restore_dst_buf(xd, *orig_dst);
7029 } else {
7030 restore_dst_buf(xd, *tmp_dst);
7034 if (best_in_temp) {
7035 restore_dst_buf(xd, *tmp_dst);
7036 } else {
7037 restore_dst_buf(xd, *orig_dst);
7039 #if CONFIG_DUAL_FILTER
7040 av1_copy(mbmi->interp_filter, best_filter);
7041 #else
7042 mbmi->interp_filter = best_filter;
7043 #endif // CONFIG_DUAL_FILTER
7044 } else {
7045 #if CONFIG_DUAL_FILTER
7046 for (i = 0; i < 4; ++i)
7047 assert(mbmi->interp_filter[i] == EIGHTTAP_REGULAR);
7048 #else
7049 assert(mbmi->interp_filter == EIGHTTAP_REGULAR);
7050 #endif // CONFIG_DUAL_FILTER
7054 return 0;
7057 // TODO(afergs): Refactor the MBMI references in here - there's four
7058 // TODO(afergs): Refactor optional args - add them to a struct or remove
7059 static int64_t motion_mode_rd(
7060 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
7061 RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
7062 int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
7063 int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd,
7064 const int *refs, int rate_mv,
7065 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7066 int_mv *const single_newmv,
7067 #if CONFIG_EXT_INTER
7068 int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi,
7069 #if CONFIG_MOTION_VAR
7070 int rate_mv_bmc,
7071 #endif // CONFIG_MOTION_VAR
7072 #endif // CONFIG_EXT_INTER
7073 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7074 int rs, int *skip_txfm_sb, int64_t *skip_sse_sb, BUFFER_SET *orig_dst) {
7075 const AV1_COMMON *const cm = &cpi->common;
7076 MACROBLOCKD *xd = &x->e_mbd;
7077 MODE_INFO *mi = xd->mi[0];
7078 MB_MODE_INFO *mbmi = &mi->mbmi;
7079 const int is_comp_pred = has_second_ref(mbmi);
7080 const PREDICTION_MODE this_mode = mbmi->mode;
7082 (void)mode_mv;
7083 (void)mi_row;
7084 (void)mi_col;
7085 (void)args;
7086 (void)refs;
7087 (void)rate_mv;
7088 (void)is_comp_pred;
7089 (void)this_mode;
7091 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7092 MOTION_MODE motion_mode, last_motion_mode_allowed;
7093 int rate2_nocoeff = 0, best_xskip, best_disable_skip = 0;
7094 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
7095 MB_MODE_INFO base_mbmi, best_mbmi;
7096 #if CONFIG_VAR_TX
7097 uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
7098 #endif // CONFIG_VAR_TX
7099 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7101 #if CONFIG_WARPED_MOTION
7102 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
7103 #endif // CONFIG_WARPED_MOTION
7105 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7106 av1_invalid_rd_stats(&best_rd_stats);
7107 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7109 if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs;
7110 #if CONFIG_WARPED_MOTION
7111 aom_clear_system_state();
7112 mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
7113 #if CONFIG_EXT_INTER
7114 best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
7115 #endif // CONFIG_EXT_INTER
7116 #endif // CONFIG_WARPED_MOTION
7117 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7118 rate2_nocoeff = rd_stats->rate;
7119 last_motion_mode_allowed = motion_mode_allowed(
7120 #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
7121 0, xd->global_motion,
7122 #endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
7123 mi);
7124 base_mbmi = *mbmi;
7125 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7127 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7128 int64_t best_rd = INT64_MAX;
7129 for (motion_mode = SIMPLE_TRANSLATION;
7130 motion_mode <= last_motion_mode_allowed; motion_mode++) {
7131 int64_t tmp_rd = INT64_MAX;
7132 int tmp_rate;
7133 int64_t tmp_dist;
7134 #if CONFIG_EXT_INTER
7135 int tmp_rate2 =
7136 motion_mode != SIMPLE_TRANSLATION ? rate2_bmc_nocoeff : rate2_nocoeff;
7137 #else
7138 int tmp_rate2 = rate2_nocoeff;
7139 #endif // CONFIG_EXT_INTER
7141 *mbmi = base_mbmi;
7142 mbmi->motion_mode = motion_mode;
7143 #if CONFIG_MOTION_VAR
7144 if (mbmi->motion_mode == OBMC_CAUSAL) {
7145 #if CONFIG_EXT_INTER
7146 *mbmi = *best_bmc_mbmi;
7147 mbmi->motion_mode = OBMC_CAUSAL;
7148 #endif // CONFIG_EXT_INTER
7149 if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
7150 int tmp_rate_mv = 0;
7152 single_motion_search(cpi, x, bsize, mi_row, mi_col,
7153 #if CONFIG_EXT_INTER
7155 #endif // CONFIG_EXT_INTER
7156 &tmp_rate_mv);
7157 mbmi->mv[0].as_int = x->best_mv.as_int;
7158 if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
7159 refs[0])) {
7160 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
7162 #if CONFIG_EXT_INTER
7163 tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
7164 #else
7165 tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
7166 #endif // CONFIG_EXT_INTER
7167 #if CONFIG_DUAL_FILTER
7168 if (!has_subpel_mv_component(xd->mi[0], xd, 0))
7169 mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
7170 if (!has_subpel_mv_component(xd->mi[0], xd, 1))
7171 mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
7172 #endif // CONFIG_DUAL_FILTER
7173 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
7174 #if CONFIG_EXT_INTER
7175 } else {
7176 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
7177 #endif // CONFIG_EXT_INTER
7179 av1_build_obmc_inter_prediction(
7180 cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride,
7181 args->left_pred_buf, args->left_pred_stride);
7182 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7183 &tmp_dist, skip_txfm_sb, skip_sse_sb);
7185 #endif // CONFIG_MOTION_VAR
7187 #if CONFIG_WARPED_MOTION
7188 if (mbmi->motion_mode == WARPED_CAUSAL) {
7189 #if CONFIG_EXT_INTER
7190 *mbmi = *best_bmc_mbmi;
7191 mbmi->motion_mode = WARPED_CAUSAL;
7192 #endif // CONFIG_EXT_INTER
7193 mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
7194 #if CONFIG_DUAL_FILTER
7195 for (int dir = 0; dir < 4; ++dir)
7196 mbmi->interp_filter[dir] = cm->interp_filter == SWITCHABLE
7197 ? EIGHTTAP_REGULAR
7198 : cm->interp_filter;
7199 #else
7200 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
7201 : cm->interp_filter;
7202 #endif // CONFIG_DUAL_FILTER
7204 if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
7205 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
7206 &mbmi->wm_params[0], mi_row, mi_col)) {
7207 // Refine MV for NEWMV mode
7208 if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
7209 int tmp_rate_mv = 0;
7210 const int_mv mv0 = mbmi->mv[0];
7211 WarpedMotionParams wm_params0 = mbmi->wm_params[0];
7213 // Refine MV in a small range.
7214 av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
7216 // Keep the refined MV and WM parameters.
7217 if (mv0.as_int != mbmi->mv[0].as_int) {
7218 const int ref = refs[0];
7219 const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
7221 tmp_rate_mv =
7222 av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv, x->nmvjointcost,
7223 x->mvcost, MV_COST_WEIGHT);
7225 if (cpi->sf.adaptive_motion_search)
7226 x->pred_mv[ref] = mbmi->mv[0].as_mv;
7228 single_newmv[ref] = mbmi->mv[0];
7230 if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
7231 refs[0])) {
7232 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
7234 #if CONFIG_EXT_INTER
7235 tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
7236 #else
7237 tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
7238 #endif // CONFIG_EXT_INTER
7239 #if CONFIG_DUAL_FILTER
7240 if (!has_subpel_mv_component(xd->mi[0], xd, 0))
7241 mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
7242 if (!has_subpel_mv_component(xd->mi[0], xd, 1))
7243 mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
7244 #endif // CONFIG_DUAL_FILTER
7245 } else {
7246 // Restore the old MV and WM parameters.
7247 mbmi->mv[0] = mv0;
7248 mbmi->wm_params[0] = wm_params0;
7252 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
7253 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7254 &tmp_dist, skip_txfm_sb, skip_sse_sb);
7255 } else {
7256 continue;
7259 #endif // CONFIG_WARPED_MOTION
7260 x->skip = 0;
7262 rd_stats->dist = 0;
7263 rd_stats->sse = 0;
7264 rd_stats->skip = 1;
7265 rd_stats->rate = tmp_rate2;
7266 if (last_motion_mode_allowed > SIMPLE_TRANSLATION) {
7267 #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7268 if (last_motion_mode_allowed == WARPED_CAUSAL)
7269 #endif // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7270 rd_stats->rate += cpi->motion_mode_cost[bsize][mbmi->motion_mode];
7271 #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7272 else
7273 rd_stats->rate += cpi->motion_mode_cost1[bsize][mbmi->motion_mode];
7274 #endif // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7276 #if CONFIG_WARPED_MOTION
7277 if (mbmi->motion_mode == WARPED_CAUSAL) {
7278 rd_stats->rate -= rs;
7280 #endif // CONFIG_WARPED_MOTION
7281 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7282 if (!*skip_txfm_sb) {
7283 int64_t rdcosty = INT64_MAX;
7284 int is_cost_valid_uv = 0;
7286 // cost and distortion
7287 av1_subtract_plane(x, bsize, 0);
7288 #if CONFIG_VAR_TX
7289 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
7290 select_tx_type_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
7291 } else {
7292 int idx, idy;
7293 super_block_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
7294 for (idy = 0; idy < xd->n8_h; ++idy)
7295 for (idx = 0; idx < xd->n8_w; ++idx)
7296 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
7297 memset(x->blk_skip[0], rd_stats_y->skip,
7298 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
7300 #else
7301 /* clang-format off */
7302 super_block_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
7303 /* clang-format on */
7304 #endif // CONFIG_VAR_TX
7306 if (rd_stats_y->rate == INT_MAX) {
7307 av1_invalid_rd_stats(rd_stats);
7308 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7309 if (mbmi->motion_mode != SIMPLE_TRANSLATION) {
7310 continue;
7311 } else {
7312 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7313 restore_dst_buf(xd, *orig_dst);
7314 return INT64_MAX;
7315 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7317 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7320 av1_merge_rd_stats(rd_stats, rd_stats_y);
7322 rdcosty = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
7323 rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
7324 /* clang-format off */
7325 #if CONFIG_VAR_TX
7326 is_cost_valid_uv =
7327 inter_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty);
7328 #else
7329 is_cost_valid_uv =
7330 super_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty);
7331 #endif // CONFIG_VAR_TX
7332 if (!is_cost_valid_uv) {
7333 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7334 continue;
7335 #else
7336 restore_dst_buf(xd, *orig_dst);
7337 return INT64_MAX;
7338 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7340 /* clang-format on */
7341 av1_merge_rd_stats(rd_stats, rd_stats_uv);
7342 #if CONFIG_RD_DEBUG
7343 // record transform block coefficient cost
7344 // TODO(angiebird): So far rd_debug tool only detects discrepancy of
7345 // coefficient cost. Therefore, it is fine to copy rd_stats into mbmi
7346 // here because we already collect the coefficient cost. Move this part to
7347 // other place when we need to compare non-coefficient cost.
7348 mbmi->rd_stats = *rd_stats;
7349 #endif // CONFIG_RD_DEBUG
7350 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7351 if (rd_stats->skip) {
7352 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
7353 rd_stats_y->rate = 0;
7354 rd_stats_uv->rate = 0;
7355 rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
7356 mbmi->skip = 0;
7357 // here mbmi->skip temporarily plays a role as what this_skip2 does
7358 } else if (!xd->lossless[mbmi->segment_id] &&
7359 (RDCOST(x->rdmult, x->rddiv,
7360 rd_stats_y->rate + rd_stats_uv->rate +
7361 av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
7362 rd_stats->dist) >=
7363 RDCOST(x->rdmult, x->rddiv,
7364 av1_cost_bit(av1_get_skip_prob(cm, xd), 1),
7365 rd_stats->sse))) {
7366 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
7367 rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
7368 rd_stats->dist = rd_stats->sse;
7369 rd_stats_y->rate = 0;
7370 rd_stats_uv->rate = 0;
7371 mbmi->skip = 1;
7372 } else {
7373 rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
7374 mbmi->skip = 0;
7376 *disable_skip = 0;
7377 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7378 } else {
7379 x->skip = 1;
7380 *disable_skip = 1;
7381 mbmi->tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
7383 // The cost of skip bit needs to be added.
7384 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7385 mbmi->skip = 0;
7386 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7387 rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
7389 rd_stats->dist = *skip_sse_sb;
7390 rd_stats->sse = *skip_sse_sb;
7391 rd_stats_y->rate = 0;
7392 rd_stats_uv->rate = 0;
7393 rd_stats->skip = 1;
7396 #if CONFIG_GLOBAL_MOTION
7397 if (this_mode == ZEROMV
7398 #if CONFIG_EXT_INTER
7399 || this_mode == ZERO_ZEROMV
7400 #endif // CONFIG_EXT_INTER
7402 if (is_nontrans_global_motion(xd)) {
7403 rd_stats->rate -= rs;
7404 #if CONFIG_DUAL_FILTER
7405 mbmi->interp_filter[0] = cm->interp_filter == SWITCHABLE
7406 ? EIGHTTAP_REGULAR
7407 : cm->interp_filter;
7408 mbmi->interp_filter[1] = cm->interp_filter == SWITCHABLE
7409 ? EIGHTTAP_REGULAR
7410 : cm->interp_filter;
7411 #else
7412 mbmi->interp_filter = cm->interp_filter == SWITCHABLE
7413 ? EIGHTTAP_REGULAR
7414 : cm->interp_filter;
7415 #endif // CONFIG_DUAL_FILTER
7418 #endif // CONFIG_GLOBAL_MOTION
7420 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7421 tmp_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
7422 if (mbmi->motion_mode == SIMPLE_TRANSLATION || (tmp_rd < best_rd)) {
7423 best_mbmi = *mbmi;
7424 best_rd = tmp_rd;
7425 best_rd_stats = *rd_stats;
7426 best_rd_stats_y = *rd_stats_y;
7427 best_rd_stats_uv = *rd_stats_uv;
7428 #if CONFIG_VAR_TX
7429 for (int i = 0; i < MAX_MB_PLANE; ++i)
7430 memcpy(best_blk_skip[i], x->blk_skip[i],
7431 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
7432 #endif // CONFIG_VAR_TX
7433 best_xskip = x->skip;
7434 best_disable_skip = *disable_skip;
7438 if (best_rd == INT64_MAX) {
7439 av1_invalid_rd_stats(rd_stats);
7440 restore_dst_buf(xd, *orig_dst);
7441 return INT64_MAX;
7443 *mbmi = best_mbmi;
7444 *rd_stats = best_rd_stats;
7445 *rd_stats_y = best_rd_stats_y;
7446 *rd_stats_uv = best_rd_stats_uv;
7447 #if CONFIG_VAR_TX
7448 for (int i = 0; i < MAX_MB_PLANE; ++i)
7449 memcpy(x->blk_skip[i], best_blk_skip[i],
7450 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
7451 #endif // CONFIG_VAR_TX
7452 x->skip = best_xskip;
7453 *disable_skip = best_disable_skip;
7454 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7456 restore_dst_buf(xd, *orig_dst);
7457 return 0;
7460 static int64_t handle_inter_mode(
7461 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
7462 RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
7463 int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
7464 int mi_col, HandleInterModeArgs *args, const int64_t ref_best_rd) {
7465 const AV1_COMMON *cm = &cpi->common;
7466 (void)cm;
7467 MACROBLOCKD *xd = &x->e_mbd;
7468 MODE_INFO *mi = xd->mi[0];
7469 MB_MODE_INFO *mbmi = &mi->mbmi;
7470 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
7471 const int is_comp_pred = has_second_ref(mbmi);
7472 const int this_mode = mbmi->mode;
7473 int_mv *frame_mv = mode_mv[this_mode];
7474 int i;
7475 int refs[2] = { mbmi->ref_frame[0],
7476 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
7477 int_mv cur_mv[2];
7478 int rate_mv = 0;
7479 #if CONFIG_EXT_INTER
7480 int pred_exists = 1;
7481 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7482 const int bw = block_size_wide[bsize];
7483 #endif // ONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7484 int_mv single_newmv[TOTAL_REFS_PER_FRAME];
7485 #if CONFIG_INTERINTRA
7486 const unsigned int *const interintra_mode_cost =
7487 cpi->interintra_mode_cost[size_group_lookup[bsize]];
7488 #endif // CONFIG_INTERINTRA
7489 const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
7490 uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
7491 #else
7492 int_mv *const single_newmv = args->single_newmv;
7493 #endif // CONFIG_EXT_INTER
7494 #if CONFIG_HIGHBITDEPTH
7495 DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
7496 #else
7497 DECLARE_ALIGNED(16, uint8_t, tmp_buf_[MAX_MB_PLANE * MAX_SB_SQUARE]);
7498 #endif // CONFIG_HIGHBITDEPTH
7499 uint8_t *tmp_buf;
7501 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7502 #if CONFIG_EXT_INTER
7503 int rate2_bmc_nocoeff;
7504 MB_MODE_INFO best_bmc_mbmi;
7505 #if CONFIG_MOTION_VAR
7506 int rate_mv_bmc;
7507 #endif // CONFIG_MOTION_VAR
7508 #endif // CONFIG_EXT_INTER
7509 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7510 int64_t rd = INT64_MAX;
7511 BUFFER_SET orig_dst, tmp_dst;
7512 int rs = 0;
7514 int skip_txfm_sb = 0;
7515 int64_t skip_sse_sb = INT64_MAX;
7516 int16_t mode_ctx;
7518 #if CONFIG_EXT_INTER
7519 #if CONFIG_INTERINTRA
7520 int compmode_interintra_cost = 0;
7521 mbmi->use_wedge_interintra = 0;
7522 #endif
7523 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7524 int compmode_interinter_cost = 0;
7525 mbmi->interinter_compound_type = COMPOUND_AVERAGE;
7526 #endif
7528 #if CONFIG_INTERINTRA
7529 if (!cm->allow_interintra_compound && is_comp_interintra_pred)
7530 return INT64_MAX;
7531 #endif // CONFIG_INTERINTRA
7533 // is_comp_interintra_pred implies !is_comp_pred
7534 assert(!is_comp_interintra_pred || (!is_comp_pred));
7535 // is_comp_interintra_pred implies is_interintra_allowed(mbmi->sb_type)
7536 assert(!is_comp_interintra_pred || is_interintra_allowed(mbmi));
7537 #endif // CONFIG_EXT_INTER
7539 #if CONFIG_EXT_INTER
7540 if (is_comp_pred)
7541 mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
7542 else
7543 #endif // CONFIG_EXT_INTER
7544 mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
7545 mbmi->ref_frame, bsize, -1);
7547 #if CONFIG_HIGHBITDEPTH
7548 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
7549 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf_);
7550 else
7551 #endif // CONFIG_HIGHBITDEPTH
7552 tmp_buf = tmp_buf_;
7553 // Make sure that we didn't leave the plane destination buffers set
7554 // to tmp_buf at the end of the last iteration
7555 assert(xd->plane[0].dst.buf != tmp_buf);
7557 #if CONFIG_WARPED_MOTION
7558 mbmi->num_proj_ref[0] = 0;
7559 mbmi->num_proj_ref[1] = 0;
7560 #endif // CONFIG_WARPED_MOTION
7562 if (is_comp_pred) {
7563 if (frame_mv[refs[0]].as_int == INVALID_MV ||
7564 frame_mv[refs[1]].as_int == INVALID_MV)
7565 return INT64_MAX;
7568 mbmi->motion_mode = SIMPLE_TRANSLATION;
7569 if (have_newmv_in_inter_mode(this_mode)) {
7570 const int64_t ret_val = handle_newmv(cpi, x, bsize, mode_mv, mi_row, mi_col,
7571 &rate_mv, single_newmv, args);
7572 if (ret_val != 0)
7573 return ret_val;
7574 else
7575 rd_stats->rate += rate_mv;
7577 for (i = 0; i < is_comp_pred + 1; ++i) {
7578 cur_mv[i] = frame_mv[refs[i]];
7579 // Clip "next_nearest" so that it does not extend to far out of image
7580 if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd);
7581 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
7582 mbmi->mv[i].as_int = cur_mv[i].as_int;
7585 #if CONFIG_EXT_INTER
7586 if (this_mode == NEAREST_NEARESTMV)
7587 #else
7588 if (this_mode == NEARESTMV && is_comp_pred)
7589 #endif // CONFIG_EXT_INTER
7591 #if !CONFIG_EXT_INTER
7592 uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
7593 #endif // !CONFIG_EXT_INTER
7594 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
7595 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
7596 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
7598 for (i = 0; i < 2; ++i) {
7599 clamp_mv2(&cur_mv[i].as_mv, xd);
7600 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
7601 mbmi->mv[i].as_int = cur_mv[i].as_int;
7606 #if CONFIG_EXT_INTER
7607 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
7608 if (this_mode == NEAREST_NEWMV) {
7609 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
7611 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
7612 clamp_mv2(&cur_mv[0].as_mv, xd);
7613 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
7614 mbmi->mv[0].as_int = cur_mv[0].as_int;
7617 if (this_mode == NEW_NEARESTMV) {
7618 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
7620 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
7621 clamp_mv2(&cur_mv[1].as_mv, xd);
7622 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
7623 mbmi->mv[1].as_int = cur_mv[1].as_int;
7627 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
7628 int ref_mv_idx = mbmi->ref_mv_idx + 1;
7629 if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) {
7630 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
7632 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
7633 clamp_mv2(&cur_mv[0].as_mv, xd);
7634 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
7635 mbmi->mv[0].as_int = cur_mv[0].as_int;
7638 if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) {
7639 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
7641 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
7642 clamp_mv2(&cur_mv[1].as_mv, xd);
7643 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
7644 mbmi->mv[1].as_int = cur_mv[1].as_int;
7647 #else
7648 if (this_mode == NEARMV && is_comp_pred) {
7649 uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
7650 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
7651 int ref_mv_idx = mbmi->ref_mv_idx + 1;
7652 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
7653 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
7655 for (i = 0; i < 2; ++i) {
7656 clamp_mv2(&cur_mv[i].as_mv, xd);
7657 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
7658 mbmi->mv[i].as_int = cur_mv[i].as_int;
7662 #endif // CONFIG_EXT_INTER
7664 // do first prediction into the destination buffer. Do the next
7665 // prediction into a temporary buffer. Then keep track of which one
7666 // of these currently holds the best predictor, and use the other
7667 // one for future predictions. In the end, copy from tmp_buf to
7668 // dst if necessary.
7669 for (i = 0; i < MAX_MB_PLANE; i++) {
7670 tmp_dst.plane[i] = tmp_buf + i * MAX_SB_SQUARE;
7671 tmp_dst.stride[i] = MAX_SB_SIZE;
7673 for (i = 0; i < MAX_MB_PLANE; i++) {
7674 orig_dst.plane[i] = xd->plane[i].dst.buf;
7675 orig_dst.stride[i] = xd->plane[i].dst.stride;
7678 // We don't include the cost of the second reference here, because there
7679 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
7680 // words if you present them in that order, the second one is always known
7681 // if the first is known.
7683 // Under some circumstances we discount the cost of new mv mode to encourage
7684 // initiation of a motion field.
7685 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
7686 refs[0])) {
7687 #if CONFIG_EXT_INTER
7688 rd_stats->rate +=
7689 AOMMIN(cost_mv_ref(cpi, this_mode, mode_ctx),
7690 cost_mv_ref(cpi, is_comp_pred ? NEAREST_NEARESTMV : NEARESTMV,
7691 mode_ctx));
7692 #else
7693 rd_stats->rate += AOMMIN(cost_mv_ref(cpi, this_mode, mode_ctx),
7694 cost_mv_ref(cpi, NEARESTMV, mode_ctx));
7695 #endif // CONFIG_EXT_INTER
7696 } else {
7697 rd_stats->rate += cost_mv_ref(cpi, this_mode, mode_ctx);
7700 if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, 0) > ref_best_rd &&
7701 #if CONFIG_EXT_INTER
7702 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV
7703 #else
7704 mbmi->mode != NEARESTMV
7705 #endif // CONFIG_EXT_INTER
7707 return INT64_MAX;
7709 int64_t ret_val = interpolation_filter_search(
7710 x, cpi, bsize, mi_row, mi_col, &tmp_dst, &orig_dst, args->single_filter,
7711 &rd, &rs, &skip_txfm_sb, &skip_sse_sb);
7712 if (ret_val != 0) return ret_val;
7714 #if CONFIG_EXT_INTER
7715 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7716 best_bmc_mbmi = *mbmi;
7717 rate2_bmc_nocoeff = rd_stats->rate;
7718 if (cm->interp_filter == SWITCHABLE) rate2_bmc_nocoeff += rs;
7719 #if CONFIG_MOTION_VAR
7720 rate_mv_bmc = rate_mv;
7721 #endif // CONFIG_MOTION_VAR
7722 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7724 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7725 if (is_comp_pred) {
7726 int rate_sum, rs2;
7727 int64_t dist_sum;
7728 int64_t best_rd_compound = INT64_MAX, best_rd_cur = INT64_MAX;
7729 INTERINTER_COMPOUND_DATA best_compound_data;
7730 int_mv best_mv[2];
7731 int best_tmp_rate_mv = rate_mv;
7732 int tmp_skip_txfm_sb;
7733 int64_t tmp_skip_sse_sb;
7734 int compound_type_cost[COMPOUND_TYPES];
7735 uint8_t pred0[2 * MAX_SB_SQUARE];
7736 uint8_t pred1[2 * MAX_SB_SQUARE];
7737 uint8_t *preds0[1] = { pred0 };
7738 uint8_t *preds1[1] = { pred1 };
7739 int strides[1] = { bw };
7740 int tmp_rate_mv;
7741 int masked_compound_used = is_any_masked_compound_used(bsize);
7742 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
7743 masked_compound_used = masked_compound_used && cm->allow_masked_compound;
7744 #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
7745 COMPOUND_TYPE cur_type;
7747 best_mv[0].as_int = cur_mv[0].as_int;
7748 best_mv[1].as_int = cur_mv[1].as_int;
7749 memset(&best_compound_data, 0, sizeof(best_compound_data));
7750 #if CONFIG_COMPOUND_SEGMENT
7751 uint8_t tmp_mask_buf[2 * MAX_SB_SQUARE];
7752 best_compound_data.seg_mask = tmp_mask_buf;
7753 #endif // CONFIG_COMPOUND_SEGMENT
7755 if (masked_compound_used) {
7756 av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize],
7757 av1_compound_type_tree);
7758 // get inter predictors to use for masked compound modes
7759 av1_build_inter_predictors_for_planes_single_buf(
7760 xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
7761 av1_build_inter_predictors_for_planes_single_buf(
7762 xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
7765 for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
7766 if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
7767 if (!is_interinter_compound_used(cur_type, bsize)) break;
7768 tmp_rate_mv = rate_mv;
7769 best_rd_cur = INT64_MAX;
7770 mbmi->interinter_compound_type = cur_type;
7771 rs2 = av1_cost_literal(get_interinter_compound_type_bits(
7772 bsize, mbmi->interinter_compound_type)) +
7773 (masked_compound_used
7774 ? compound_type_cost[mbmi->interinter_compound_type]
7775 : 0);
7777 switch (cur_type) {
7778 case COMPOUND_AVERAGE:
7779 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
7780 bsize);
7781 av1_subtract_plane(x, bsize, 0);
7782 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7783 &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
7784 INT64_MAX);
7785 if (rd != INT64_MAX)
7786 best_rd_cur =
7787 RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
7788 best_rd_compound = best_rd_cur;
7789 break;
7790 #if CONFIG_WEDGE
7791 case COMPOUND_WEDGE:
7792 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
7793 best_rd_compound / 3 < ref_best_rd) {
7794 best_rd_cur = build_and_cost_compound_type(
7795 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
7796 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
7798 break;
7799 #endif // CONFIG_WEDGE
7800 #if CONFIG_COMPOUND_SEGMENT
7801 case COMPOUND_SEG:
7802 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
7803 best_rd_compound / 3 < ref_best_rd) {
7804 best_rd_cur = build_and_cost_compound_type(
7805 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
7806 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
7808 break;
7809 #endif // CONFIG_COMPOUND_SEGMENT
7810 default: assert(0); return 0;
7813 if (best_rd_cur < best_rd_compound) {
7814 best_rd_compound = best_rd_cur;
7815 #if CONFIG_WEDGE
7816 best_compound_data.wedge_index = mbmi->wedge_index;
7817 best_compound_data.wedge_sign = mbmi->wedge_sign;
7818 #endif // CONFIG_WEDGE
7819 #if CONFIG_COMPOUND_SEGMENT
7820 best_compound_data.mask_type = mbmi->mask_type;
7821 memcpy(best_compound_data.seg_mask, xd->seg_mask,
7822 2 * MAX_SB_SQUARE * sizeof(uint8_t));
7823 #endif // CONFIG_COMPOUND_SEGMENT
7824 best_compound_data.interinter_compound_type =
7825 mbmi->interinter_compound_type;
7826 if (have_newmv_in_inter_mode(this_mode)) {
7827 if (use_masked_motion_search(cur_type)) {
7828 best_tmp_rate_mv = tmp_rate_mv;
7829 best_mv[0].as_int = mbmi->mv[0].as_int;
7830 best_mv[1].as_int = mbmi->mv[1].as_int;
7831 } else {
7832 best_mv[0].as_int = cur_mv[0].as_int;
7833 best_mv[1].as_int = cur_mv[1].as_int;
7837 // reset to original mvs for next iteration
7838 mbmi->mv[0].as_int = cur_mv[0].as_int;
7839 mbmi->mv[1].as_int = cur_mv[1].as_int;
7841 #if CONFIG_WEDGE
7842 mbmi->wedge_index = best_compound_data.wedge_index;
7843 mbmi->wedge_sign = best_compound_data.wedge_sign;
7844 #endif // CONFIG_WEDGE
7845 #if CONFIG_COMPOUND_SEGMENT
7846 mbmi->mask_type = best_compound_data.mask_type;
7847 memcpy(xd->seg_mask, best_compound_data.seg_mask,
7848 2 * MAX_SB_SQUARE * sizeof(uint8_t));
7849 #endif // CONFIG_COMPOUND_SEGMENT
7850 mbmi->interinter_compound_type =
7851 best_compound_data.interinter_compound_type;
7852 if (have_newmv_in_inter_mode(this_mode)) {
7853 mbmi->mv[0].as_int = best_mv[0].as_int;
7854 mbmi->mv[1].as_int = best_mv[1].as_int;
7855 xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
7856 xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
7857 if (use_masked_motion_search(mbmi->interinter_compound_type)) {
7858 rd_stats->rate += best_tmp_rate_mv - rate_mv;
7859 rate_mv = best_tmp_rate_mv;
7863 if (ref_best_rd < INT64_MAX && best_rd_compound / 3 > ref_best_rd) {
7864 restore_dst_buf(xd, orig_dst);
7865 return INT64_MAX;
7868 pred_exists = 0;
7870 compmode_interinter_cost =
7871 av1_cost_literal(get_interinter_compound_type_bits(
7872 bsize, mbmi->interinter_compound_type)) +
7873 (masked_compound_used
7874 ? compound_type_cost[mbmi->interinter_compound_type]
7875 : 0);
7877 #endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7879 #if CONFIG_INTERINTRA
7880 if (is_comp_interintra_pred) {
7881 INTERINTRA_MODE best_interintra_mode = II_DC_PRED;
7882 int64_t best_interintra_rd = INT64_MAX;
7883 int rmode, rate_sum;
7884 int64_t dist_sum;
7885 int j;
7886 int tmp_rate_mv = 0;
7887 int tmp_skip_txfm_sb;
7888 int64_t tmp_skip_sse_sb;
7889 DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_SB_SQUARE]);
7890 uint8_t *intrapred;
7892 #if CONFIG_HIGHBITDEPTH
7893 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
7894 intrapred = CONVERT_TO_BYTEPTR(intrapred_);
7895 else
7896 #endif // CONFIG_HIGHBITDEPTH
7897 intrapred = intrapred_;
7899 mbmi->ref_frame[1] = NONE_FRAME;
7900 for (j = 0; j < MAX_MB_PLANE; j++) {
7901 xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
7902 xd->plane[j].dst.stride = bw;
7904 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, bsize);
7905 restore_dst_buf(xd, orig_dst);
7906 mbmi->ref_frame[1] = INTRA_FRAME;
7907 mbmi->use_wedge_interintra = 0;
7909 for (j = 0; j < INTERINTRA_MODES; ++j) {
7910 mbmi->interintra_mode = (INTERINTRA_MODE)j;
7911 rmode = interintra_mode_cost[mbmi->interintra_mode];
7912 av1_build_intra_predictors_for_interintra(xd, bsize, 0, &orig_dst,
7913 intrapred, bw);
7914 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
7915 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
7916 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
7917 rd =
7918 RDCOST(x->rdmult, x->rddiv, tmp_rate_mv + rate_sum + rmode, dist_sum);
7919 if (rd < best_interintra_rd) {
7920 best_interintra_rd = rd;
7921 best_interintra_mode = mbmi->interintra_mode;
7924 mbmi->interintra_mode = best_interintra_mode;
7925 rmode = interintra_mode_cost[mbmi->interintra_mode];
7926 av1_build_intra_predictors_for_interintra(xd, bsize, 0, &orig_dst,
7927 intrapred, bw);
7928 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
7929 av1_subtract_plane(x, bsize, 0);
7930 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7931 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
7932 if (rd != INT64_MAX)
7933 rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum);
7934 best_interintra_rd = rd;
7936 if (ref_best_rd < INT64_MAX && best_interintra_rd > 2 * ref_best_rd) {
7937 // Don't need to call restore_dst_buf here
7938 return INT64_MAX;
7940 #if CONFIG_WEDGE
7941 if (is_interintra_wedge_used(bsize)) {
7942 int64_t best_interintra_rd_nowedge = INT64_MAX;
7943 int64_t best_interintra_rd_wedge = INT64_MAX;
7944 int_mv tmp_mv;
7945 int rwedge = av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0);
7946 if (rd != INT64_MAX)
7947 rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge + rate_sum,
7948 dist_sum);
7949 best_interintra_rd_nowedge = best_interintra_rd;
7951 // Disable wedge search if source variance is small
7952 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
7953 mbmi->use_wedge_interintra = 1;
7955 rwedge = av1_cost_literal(get_interintra_wedge_bits(bsize)) +
7956 av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 1);
7958 best_interintra_rd_wedge =
7959 pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
7961 best_interintra_rd_wedge +=
7962 RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge, 0);
7963 // Refine motion vector.
7964 if (have_newmv_in_inter_mode(this_mode)) {
7965 // get negative of mask
7966 const uint8_t *mask = av1_get_contiguous_soft_mask(
7967 mbmi->interintra_wedge_index, 1, bsize);
7968 tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int;
7969 compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
7970 mi_col, intrapred, mask, bw,
7971 &tmp_rate_mv, 0, 0);
7972 mbmi->mv[0].as_int = tmp_mv.as_int;
7973 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
7974 bsize);
7975 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
7976 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
7977 rd = RDCOST(x->rdmult, x->rddiv,
7978 rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
7979 if (rd >= best_interintra_rd_wedge) {
7980 tmp_mv.as_int = cur_mv[0].as_int;
7981 tmp_rate_mv = rate_mv;
7983 } else {
7984 tmp_mv.as_int = cur_mv[0].as_int;
7985 tmp_rate_mv = rate_mv;
7986 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
7988 // Evaluate closer to true rd
7989 av1_subtract_plane(x, bsize, 0);
7990 rd =
7991 estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7992 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
7993 if (rd != INT64_MAX)
7994 rd = RDCOST(x->rdmult, x->rddiv,
7995 rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
7996 best_interintra_rd_wedge = rd;
7997 if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
7998 mbmi->use_wedge_interintra = 1;
7999 mbmi->mv[0].as_int = tmp_mv.as_int;
8000 rd_stats->rate += tmp_rate_mv - rate_mv;
8001 rate_mv = tmp_rate_mv;
8002 } else {
8003 mbmi->use_wedge_interintra = 0;
8004 mbmi->mv[0].as_int = cur_mv[0].as_int;
8006 } else {
8007 mbmi->use_wedge_interintra = 0;
8010 #endif // CONFIG_WEDGE
8012 pred_exists = 0;
8013 compmode_interintra_cost =
8014 av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 1) +
8015 interintra_mode_cost[mbmi->interintra_mode];
8016 if (is_interintra_wedge_used(bsize)) {
8017 compmode_interintra_cost += av1_cost_bit(
8018 cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra);
8019 if (mbmi->use_wedge_interintra) {
8020 compmode_interintra_cost +=
8021 av1_cost_literal(get_interintra_wedge_bits(bsize));
8024 } else if (is_interintra_allowed(mbmi)) {
8025 compmode_interintra_cost =
8026 av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 0);
8028 #endif // CONFIG_INTERINTRA
8030 if (pred_exists == 0) {
8031 int tmp_rate;
8032 int64_t tmp_dist;
8033 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize);
8034 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
8035 &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
8036 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
8038 #endif // CONFIG_EXT_INTER
8040 if (!is_comp_pred)
8041 #if CONFIG_DUAL_FILTER
8042 args->single_filter[this_mode][refs[0]] = mbmi->interp_filter[0];
8043 #else
8044 args->single_filter[this_mode][refs[0]] = mbmi->interp_filter;
8045 #endif // CONFIG_DUAL_FILTER
8047 #if CONFIG_EXT_INTER
8048 if (args->modelled_rd != NULL) {
8049 if (is_comp_pred) {
8050 const int mode0 = compound_ref0_mode(this_mode);
8051 const int mode1 = compound_ref1_mode(this_mode);
8052 const int64_t mrd = AOMMIN(args->modelled_rd[mode0][refs[0]],
8053 args->modelled_rd[mode1][refs[1]]);
8054 if (rd / 4 * 3 > mrd && ref_best_rd < INT64_MAX) {
8055 restore_dst_buf(xd, orig_dst);
8056 return INT64_MAX;
8058 } else if (!is_comp_interintra_pred) {
8059 args->modelled_rd[this_mode][refs[0]] = rd;
8062 #endif // CONFIG_EXT_INTER
8064 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
8065 // if current pred_error modeled rd is substantially more than the best
8066 // so far, do not bother doing full rd
8067 if (rd / 2 > ref_best_rd) {
8068 restore_dst_buf(xd, orig_dst);
8069 return INT64_MAX;
8073 #if CONFIG_EXT_INTER
8074 #if CONFIG_INTERINTRA
8075 rd_stats->rate += compmode_interintra_cost;
8076 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8077 rate2_bmc_nocoeff += compmode_interintra_cost;
8078 #endif
8079 #endif
8080 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
8081 rd_stats->rate += compmode_interinter_cost;
8082 #endif
8083 #endif
8085 ret_val = motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
8086 disable_skip, mode_mv, mi_row, mi_col, args,
8087 ref_best_rd, refs, rate_mv,
8088 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8089 single_newmv,
8090 #if CONFIG_EXT_INTER
8091 rate2_bmc_nocoeff, &best_bmc_mbmi,
8092 #if CONFIG_MOTION_VAR
8093 rate_mv_bmc,
8094 #endif // CONFIG_MOTION_VAR
8095 #endif // CONFIG_EXT_INTER
8096 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8097 rs, &skip_txfm_sb, &skip_sse_sb, &orig_dst);
8098 if (ret_val != 0) return ret_val;
8100 return 0; // The rate-distortion cost will be re-calculated by caller.
8103 #if CONFIG_INTRABC
8104 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
8105 RD_STATS *rd_cost, BLOCK_SIZE bsize,
8106 int64_t best_rd) {
8107 const AV1_COMMON *const cm = &cpi->common;
8108 if (bsize < BLOCK_8X8 || !cm->allow_screen_content_tools) return INT64_MAX;
8110 MACROBLOCKD *const xd = &x->e_mbd;
8111 const TileInfo *tile = &xd->tile;
8112 MODE_INFO *const mi = xd->mi[0];
8113 const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
8114 const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
8115 const int w = block_size_wide[bsize];
8116 const int h = block_size_high[bsize];
8117 const int sb_row = mi_row / MAX_MIB_SIZE;
8118 const int sb_col = mi_col / MAX_MIB_SIZE;
8120 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
8121 MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
8122 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
8123 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
8124 mbmi_ext->ref_mv_stack[ref_frame],
8125 #if CONFIG_EXT_INTER
8126 mbmi_ext->compound_mode_context,
8127 #endif // CONFIG_EXT_INTER
8128 candidates, mi_row, mi_col, NULL, NULL,
8129 mbmi_ext->mode_context);
8131 int_mv nearestmv, nearmv;
8132 av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv);
8134 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
8135 if (dv_ref.as_int == 0) av1_find_ref_dv(&dv_ref, mi_row, mi_col);
8136 mbmi_ext->ref_mvs[INTRA_FRAME][0] = dv_ref;
8138 struct buf_2d yv12_mb[MAX_MB_PLANE];
8139 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL);
8140 for (int i = 0; i < MAX_MB_PLANE; ++i) {
8141 xd->plane[i].pre[0] = yv12_mb[i];
8144 enum IntrabcMotionDirection {
8145 IBC_MOTION_ABOVE,
8146 IBC_MOTION_LEFT,
8147 IBC_MOTION_DIRECTIONS
8150 MB_MODE_INFO *mbmi = &mi->mbmi;
8151 MB_MODE_INFO best_mbmi = *mbmi;
8152 RD_STATS best_rdcost = *rd_cost;
8153 int best_skip = x->skip;
8155 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
8156 dir < IBC_MOTION_DIRECTIONS; ++dir) {
8157 const MvLimits tmp_mv_limits = x->mv_limits;
8158 switch (dir) {
8159 case IBC_MOTION_ABOVE:
8160 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
8161 x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
8162 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
8163 x->mv_limits.row_max = (sb_row * MAX_MIB_SIZE - mi_row) * MI_SIZE - h;
8164 break;
8165 case IBC_MOTION_LEFT:
8166 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
8167 x->mv_limits.col_max = (sb_col * MAX_MIB_SIZE - mi_col) * MI_SIZE - w;
8168 // TODO(aconverse@google.com): Minimize the overlap between above and
8169 // left areas.
8170 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
8171 int bottom_coded_mi_edge =
8172 AOMMIN((sb_row + 1) * MAX_MIB_SIZE, tile->mi_row_end);
8173 x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
8174 break;
8175 default: assert(0);
8177 assert(x->mv_limits.col_min >= tmp_mv_limits.col_min);
8178 assert(x->mv_limits.col_max <= tmp_mv_limits.col_max);
8179 assert(x->mv_limits.row_min >= tmp_mv_limits.row_min);
8180 assert(x->mv_limits.row_max <= tmp_mv_limits.row_max);
8181 av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv);
8183 if (x->mv_limits.col_max < x->mv_limits.col_min ||
8184 x->mv_limits.row_max < x->mv_limits.row_min) {
8185 x->mv_limits = tmp_mv_limits;
8186 continue;
8189 int step_param = cpi->mv_step_param;
8190 MV mvp_full = dv_ref.as_mv;
8191 mvp_full.col >>= 3;
8192 mvp_full.row >>= 3;
8193 int sadpb = x->sadperbit16;
8194 int cost_list[5];
8195 int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
8196 sadpb, cond_cost_list(cpi, cost_list),
8197 &dv_ref.as_mv, INT_MAX, 1);
8199 x->mv_limits = tmp_mv_limits;
8200 if (bestsme == INT_MAX) continue;
8201 mvp_full = x->best_mv.as_mv;
8202 MV dv = {.row = mvp_full.row * 8, .col = mvp_full.col * 8 };
8203 if (mv_check_bounds(&x->mv_limits, &dv)) continue;
8204 if (!is_dv_valid(dv, tile, mi_row, mi_col, bsize)) continue;
8206 #if CONFIG_PALETTE
8207 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
8208 #endif
8209 mbmi->use_intrabc = 1;
8210 mbmi->mode = DC_PRED;
8211 mbmi->uv_mode = DC_PRED;
8212 mbmi->mv[0].as_mv = dv;
8213 #if CONFIG_DUAL_FILTER
8214 for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR;
8215 #else
8216 mbmi->interp_filter = BILINEAR;
8217 #endif
8218 mbmi->skip = 0;
8219 x->skip = 0;
8220 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
8222 int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost,
8223 x->mvcost, MV_COST_WEIGHT);
8224 const PREDICTION_MODE A = av1_above_block_mode(mi, xd->above_mi, 0);
8225 const PREDICTION_MODE L = av1_left_block_mode(mi, xd->left_mi, 0);
8226 const int rate_mode =
8227 cpi->y_mode_costs[A][L][DC_PRED] + av1_cost_bit(INTRABC_PROB, 1);
8229 RD_STATS rd_stats, rd_stats_uv;
8230 av1_subtract_plane(x, bsize, 0);
8231 super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
8232 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
8233 av1_merge_rd_stats(&rd_stats, &rd_stats_uv);
8234 #if CONFIG_RD_DEBUG
8235 mbmi->rd_stats = rd_stats;
8236 #endif
8238 #if CONFIG_VAR_TX
8239 // TODO(aconverse@google.com): Evaluate allowing VAR TX on intrabc blocks
8240 const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
8241 const int height = block_size_high[bsize] >> tx_size_high_log2[0];
8242 int idx, idy;
8243 for (idy = 0; idy < height; ++idy)
8244 for (idx = 0; idx < width; ++idx)
8245 mbmi->inter_tx_size[idy >> 1][idx >> 1] = mbmi->tx_size;
8246 mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
8247 #endif // CONFIG_VAR_TX
8249 const aom_prob skip_prob = av1_get_skip_prob(cm, xd);
8251 RD_STATS rdc_noskip;
8252 av1_init_rd_stats(&rdc_noskip);
8253 rdc_noskip.rate =
8254 rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0);
8255 rdc_noskip.dist = rd_stats.dist;
8256 rdc_noskip.rdcost =
8257 RDCOST(x->rdmult, x->rddiv, rdc_noskip.rate, rdc_noskip.dist);
8258 if (rdc_noskip.rdcost < best_rd) {
8259 best_rd = rdc_noskip.rdcost;
8260 best_mbmi = *mbmi;
8261 best_skip = x->skip;
8262 best_rdcost = rdc_noskip;
8265 x->skip = 1;
8266 mbmi->skip = 1;
8267 RD_STATS rdc_skip;
8268 av1_init_rd_stats(&rdc_skip);
8269 rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1);
8270 rdc_skip.dist = rd_stats.sse;
8271 rdc_skip.rdcost = RDCOST(x->rdmult, x->rddiv, rdc_skip.rate, rdc_skip.dist);
8272 if (rdc_skip.rdcost < best_rd) {
8273 best_rd = rdc_skip.rdcost;
8274 best_mbmi = *mbmi;
8275 best_skip = x->skip;
8276 best_rdcost = rdc_skip;
8279 *mbmi = best_mbmi;
8280 *rd_cost = best_rdcost;
8281 x->skip = best_skip;
8282 return best_rd;
8284 #endif // CONFIG_INTRABC
8286 void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
8287 RD_STATS *rd_cost, BLOCK_SIZE bsize,
8288 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
8289 const AV1_COMMON *const cm = &cpi->common;
8290 MACROBLOCKD *const xd = &x->e_mbd;
8291 struct macroblockd_plane *const pd = xd->plane;
8292 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
8293 int y_skip = 0, uv_skip = 0;
8294 int64_t dist_y = 0, dist_uv = 0;
8295 TX_SIZE max_uv_tx_size;
8296 const int unify_bsize = CONFIG_CB4X4;
8298 ctx->skip = 0;
8299 xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
8300 xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME;
8301 #if CONFIG_INTRABC
8302 xd->mi[0]->mbmi.use_intrabc = 0;
8303 xd->mi[0]->mbmi.mv[0].as_int = 0;
8304 #endif // CONFIG_INTRABC
8306 const int64_t intra_yrd =
8307 (bsize >= BLOCK_8X8 || unify_bsize)
8308 ? rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
8309 &y_skip, bsize, best_rd)
8310 : rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
8311 &dist_y, &y_skip, best_rd);
8313 if (intra_yrd < best_rd) {
8314 max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->mbmi.tx_size]
8315 [pd[1].subsampling_x][pd[1].subsampling_y];
8316 init_sbuv_mode(&xd->mi[0]->mbmi);
8317 #if CONFIG_CB4X4
8318 if (!x->skip_chroma_rd)
8319 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
8320 &uv_skip, bsize, max_uv_tx_size);
8321 #else
8322 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
8323 &uv_skip, AOMMAX(BLOCK_8X8, bsize), max_uv_tx_size);
8324 #endif // CONFIG_CB4X4
8326 if (y_skip && uv_skip) {
8327 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
8328 av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
8329 rd_cost->dist = dist_y + dist_uv;
8330 } else {
8331 rd_cost->rate =
8332 rate_y + rate_uv + av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
8333 rd_cost->dist = dist_y + dist_uv;
8335 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
8336 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
8337 rd_cost->dist_y = dist_y;
8338 #endif
8339 } else {
8340 rd_cost->rate = INT_MAX;
8343 #if CONFIG_INTRABC
8344 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
8345 best_rd = rd_cost->rdcost;
8346 if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) {
8347 ctx->skip = x->skip; // FIXME where is the proper place to set this?!
8348 assert(rd_cost->rate != INT_MAX);
8349 rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
8351 #endif
8352 if (rd_cost->rate == INT_MAX) return;
8354 ctx->mic = *xd->mi[0];
8355 ctx->mbmi_ext = *x->mbmi_ext;
8358 // Do we have an internal image edge (e.g. formatting bars).
8359 int av1_internal_image_edge(const AV1_COMP *cpi) {
8360 return (cpi->oxcf.pass == 2) &&
8361 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
8362 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
8365 // Checks to see if a super block is on a horizontal image edge.
8366 // In most cases this is the "real" edge unless there are formatting
8367 // bars embedded in the stream.
8368 int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
8369 int top_edge = 0;
8370 int bottom_edge = cpi->common.mi_rows;
8371 int is_active_h_edge = 0;
8373 // For two pass account for any formatting bars detected.
8374 if (cpi->oxcf.pass == 2) {
8375 const TWO_PASS *const twopass = &cpi->twopass;
8377 // The inactive region is specified in MBs not mi units.
8378 // The image edge is in the following MB row.
8379 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
8381 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
8382 bottom_edge = AOMMAX(top_edge, bottom_edge);
8385 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
8386 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
8387 is_active_h_edge = 1;
8389 return is_active_h_edge;
8392 // Checks to see if a super block is on a vertical image edge.
8393 // In most cases this is the "real" edge unless there are formatting
8394 // bars embedded in the stream.
8395 int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
8396 int left_edge = 0;
8397 int right_edge = cpi->common.mi_cols;
8398 int is_active_v_edge = 0;
8400 // For two pass account for any formatting bars detected.
8401 if (cpi->oxcf.pass == 2) {
8402 const TWO_PASS *const twopass = &cpi->twopass;
8404 // The inactive region is specified in MBs not mi units.
8405 // The image edge is in the following MB row.
8406 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
8408 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
8409 right_edge = AOMMAX(left_edge, right_edge);
8412 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
8413 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
8414 is_active_v_edge = 1;
8416 return is_active_v_edge;
8419 // Checks to see if a super block is at the edge of the active image.
8420 // In most cases this is the "real" edge unless there are formatting
8421 // bars embedded in the stream.
8422 int av1_active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
8423 return av1_active_h_edge(cpi, mi_row, cpi->common.mib_size) ||
8424 av1_active_v_edge(cpi, mi_col, cpi->common.mib_size);
8427 #if CONFIG_PALETTE
8428 static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
8429 MACROBLOCKD *const xd = &x->e_mbd;
8430 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8431 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
8432 const BLOCK_SIZE bsize = mbmi->sb_type;
8433 int src_stride = x->plane[1].src.stride;
8434 const uint8_t *const src_u = x->plane[1].src.buf;
8435 const uint8_t *const src_v = x->plane[2].src.buf;
8436 float *const data = x->palette_buffer->kmeans_data_buf;
8437 float centroids[2 * PALETTE_MAX_SIZE];
8438 uint8_t *const color_map = xd->plane[1].color_index_map;
8439 int r, c;
8440 #if CONFIG_HIGHBITDEPTH
8441 const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u);
8442 const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v);
8443 #endif // CONFIG_HIGHBITDEPTH
8444 int plane_block_width, plane_block_height, rows, cols;
8445 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
8446 &plane_block_height, &rows, &cols);
8447 (void)cpi;
8449 for (r = 0; r < rows; ++r) {
8450 for (c = 0; c < cols; ++c) {
8451 #if CONFIG_HIGHBITDEPTH
8452 if (cpi->common.use_highbitdepth) {
8453 data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
8454 data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
8455 } else {
8456 #endif // CONFIG_HIGHBITDEPTH
8457 data[(r * cols + c) * 2] = src_u[r * src_stride + c];
8458 data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c];
8459 #if CONFIG_HIGHBITDEPTH
8461 #endif // CONFIG_HIGHBITDEPTH
8465 for (r = 1; r < 3; ++r) {
8466 for (c = 0; c < pmi->palette_size[1]; ++c) {
8467 centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c];
8471 av1_calc_indices(data, centroids, color_map, rows * cols,
8472 pmi->palette_size[1], 2);
8473 extend_palette_color_map(color_map, cols, rows, plane_block_width,
8474 plane_block_height);
8476 #endif // CONFIG_PALETTE
8478 #if CONFIG_FILTER_INTRA
8479 static void pick_filter_intra_interframe(
8480 const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
8481 BLOCK_SIZE bsize, int mi_row, int mi_col, int *rate_uv_intra,
8482 int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv,
8483 PREDICTION_MODE *mode_uv, FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv,
8484 #if CONFIG_EXT_INTRA
8485 int8_t *uv_angle_delta,
8486 #endif // CONFIG_EXT_INTRA
8487 #if CONFIG_PALETTE
8488 PALETTE_MODE_INFO *pmi_uv, int palette_ctx,
8489 #endif // CONFIG_PALETTE
8490 int skip_mask, unsigned int *ref_costs_single, int64_t *best_rd,
8491 int64_t *best_intra_rd, PREDICTION_MODE *best_intra_mode,
8492 int *best_mode_index, int *best_skip2, int *best_mode_skippable,
8493 #if CONFIG_SUPERTX
8494 int *returnrate_nocoef,
8495 #endif // CONFIG_SUPERTX
8496 int64_t *best_pred_rd, MB_MODE_INFO *best_mbmode, RD_STATS *rd_cost) {
8497 const AV1_COMMON *const cm = &cpi->common;
8498 MACROBLOCKD *const xd = &x->e_mbd;
8499 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8500 #if CONFIG_PALETTE
8501 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
8502 #endif // CONFIG_PALETTE
8503 int rate2 = 0, rate_y = INT_MAX, skippable = 0, rate_uv, rate_dummy, i;
8504 int dc_mode_index;
8505 const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
8506 int64_t distortion2 = 0, distortion_y = 0, this_rd = *best_rd;
8507 int64_t distortion_uv, model_rd = INT64_MAX;
8508 TX_SIZE uv_tx;
8510 for (i = 0; i < MAX_MODES; ++i)
8511 if (av1_mode_order[i].mode == DC_PRED &&
8512 av1_mode_order[i].ref_frame[0] == INTRA_FRAME)
8513 break;
8514 dc_mode_index = i;
8515 assert(i < MAX_MODES);
8517 // TODO(huisu): use skip_mask for further speedup.
8518 (void)skip_mask;
8519 mbmi->mode = DC_PRED;
8520 mbmi->uv_mode = DC_PRED;
8521 mbmi->ref_frame[0] = INTRA_FRAME;
8522 mbmi->ref_frame[1] = NONE_FRAME;
8523 if (!rd_pick_filter_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
8524 &skippable, bsize, intra_mode_cost[mbmi->mode],
8525 &this_rd, &model_rd, 0)) {
8526 return;
8528 if (rate_y == INT_MAX) return;
8530 uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
8531 [xd->plane[1].subsampling_y];
8532 if (rate_uv_intra[uv_tx] == INT_MAX) {
8533 choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
8534 &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
8535 &skip_uv[uv_tx], &mode_uv[uv_tx]);
8536 #if CONFIG_PALETTE
8537 if (cm->allow_screen_content_tools) pmi_uv[uv_tx] = *pmi;
8538 #endif // CONFIG_PALETTE
8539 filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
8540 #if CONFIG_EXT_INTRA
8541 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
8542 #endif // CONFIG_EXT_INTRA
8545 rate_uv = rate_uv_tokenonly[uv_tx];
8546 distortion_uv = dist_uv[uv_tx];
8547 skippable = skippable && skip_uv[uv_tx];
8548 mbmi->uv_mode = mode_uv[uv_tx];
8549 #if CONFIG_PALETTE
8550 if (cm->allow_screen_content_tools) {
8551 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
8552 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
8553 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
8554 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
8556 #endif // CONFIG_PALETTE
8557 #if CONFIG_EXT_INTRA
8558 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
8559 #endif // CONFIG_EXT_INTRA
8560 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
8561 filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
8562 if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
8563 mbmi->filter_intra_mode_info.filter_intra_mode[1] =
8564 filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
8567 rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
8568 cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
8569 #if CONFIG_PALETTE
8570 if (cpi->common.allow_screen_content_tools && mbmi->mode == DC_PRED &&
8571 bsize >= BLOCK_8X8)
8572 rate2 += av1_cost_bit(
8573 av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
8574 #endif // CONFIG_PALETTE
8576 if (!xd->lossless[mbmi->segment_id]) {
8577 // super_block_yrd above includes the cost of the tx_size in the
8578 // tokenonly rate, but for intra blocks, tx_size is always coded
8579 // (prediction granularity), so we account for it in the full rate,
8580 // not the tokenonly rate.
8581 rate_y -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
8584 rate2 += av1_cost_bit(cm->fc->filter_intra_probs[0],
8585 mbmi->filter_intra_mode_info.use_filter_intra_mode[0]);
8586 rate2 += write_uniform_cost(
8587 FILTER_INTRA_MODES, mbmi->filter_intra_mode_info.filter_intra_mode[0]);
8588 #if CONFIG_EXT_INTRA
8589 if (av1_is_directional_mode(mbmi->uv_mode, bsize)) {
8590 rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
8591 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
8593 #endif // CONFIG_EXT_INTRA
8594 if (mbmi->mode == DC_PRED) {
8595 rate2 +=
8596 av1_cost_bit(cpi->common.fc->filter_intra_probs[1],
8597 mbmi->filter_intra_mode_info.use_filter_intra_mode[1]);
8598 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[1])
8599 rate2 +=
8600 write_uniform_cost(FILTER_INTRA_MODES,
8601 mbmi->filter_intra_mode_info.filter_intra_mode[1]);
8603 distortion2 = distortion_y + distortion_uv;
8604 av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, 0, 0, mi_row,
8605 mi_col);
8607 rate2 += ref_costs_single[INTRA_FRAME];
8609 if (skippable) {
8610 rate2 -= (rate_y + rate_uv);
8611 rate_y = 0;
8612 rate_uv = 0;
8613 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
8614 } else {
8615 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
8617 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
8619 if (this_rd < *best_intra_rd) {
8620 *best_intra_rd = this_rd;
8621 *best_intra_mode = mbmi->mode;
8623 for (i = 0; i < REFERENCE_MODES; ++i)
8624 best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
8626 if (this_rd < *best_rd) {
8627 *best_mode_index = dc_mode_index;
8628 mbmi->mv[0].as_int = 0;
8629 rd_cost->rate = rate2;
8630 #if CONFIG_SUPERTX
8631 if (x->skip)
8632 *returnrate_nocoef = rate2;
8633 else
8634 *returnrate_nocoef = rate2 - rate_y - rate_uv;
8635 *returnrate_nocoef -= av1_cost_bit(av1_get_skip_prob(cm, xd), skippable);
8636 *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd),
8637 mbmi->ref_frame[0] != INTRA_FRAME);
8638 #endif // CONFIG_SUPERTX
8639 rd_cost->dist = distortion2;
8640 rd_cost->rdcost = this_rd;
8641 *best_rd = this_rd;
8642 *best_mbmode = *mbmi;
8643 *best_skip2 = 0;
8644 *best_mode_skippable = skippable;
8647 #endif // CONFIG_FILTER_INTRA
8649 #if CONFIG_MOTION_VAR
8650 static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
8651 const MACROBLOCKD *xd, int mi_row,
8652 int mi_col, const uint8_t *above,
8653 int above_stride, const uint8_t *left,
8654 int left_stride);
8655 #endif // CONFIG_MOTION_VAR
8657 void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
8658 MACROBLOCK *x, int mi_row, int mi_col,
8659 RD_STATS *rd_cost,
8660 #if CONFIG_SUPERTX
8661 int *returnrate_nocoef,
8662 #endif // CONFIG_SUPERTX
8663 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
8664 int64_t best_rd_so_far) {
8665 const AV1_COMMON *const cm = &cpi->common;
8666 const RD_OPT *const rd_opt = &cpi->rd;
8667 const SPEED_FEATURES *const sf = &cpi->sf;
8668 MACROBLOCKD *const xd = &x->e_mbd;
8669 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8670 #if CONFIG_PALETTE
8671 const int try_palette =
8672 cpi->common.allow_screen_content_tools && bsize >= BLOCK_8X8;
8673 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
8674 #endif // CONFIG_PALETTE
8675 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
8676 const struct segmentation *const seg = &cm->seg;
8677 PREDICTION_MODE this_mode;
8678 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
8679 unsigned char segment_id = mbmi->segment_id;
8680 int comp_pred, i, k;
8681 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
8682 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
8683 int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
8684 #if CONFIG_EXT_INTER
8685 int single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
8686 int64_t modelled_rd[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
8687 #endif // CONFIG_EXT_INTER
8688 static const int flag_list[TOTAL_REFS_PER_FRAME] = {
8690 AOM_LAST_FLAG,
8691 #if CONFIG_EXT_REFS
8692 AOM_LAST2_FLAG,
8693 AOM_LAST3_FLAG,
8694 #endif // CONFIG_EXT_REFS
8695 AOM_GOLD_FLAG,
8696 #if CONFIG_EXT_REFS
8697 AOM_BWD_FLAG,
8698 #endif // CONFIG_EXT_REFS
8699 AOM_ALT_FLAG
8701 int64_t best_rd = best_rd_so_far;
8702 int best_rate_y = INT_MAX, best_rate_uv = INT_MAX;
8703 int64_t best_pred_diff[REFERENCE_MODES];
8704 int64_t best_pred_rd[REFERENCE_MODES];
8705 MB_MODE_INFO best_mbmode;
8706 int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
8707 int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
8708 int best_mode_skippable = 0;
8709 int midx, best_mode_index = -1;
8710 unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
8711 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
8712 aom_prob comp_mode_p;
8713 int64_t best_intra_rd = INT64_MAX;
8714 unsigned int best_pred_sse = UINT_MAX;
8715 PREDICTION_MODE best_intra_mode = DC_PRED;
8716 int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL];
8717 int64_t dist_uvs[TX_SIZES_ALL];
8718 int skip_uvs[TX_SIZES_ALL];
8719 PREDICTION_MODE mode_uv[TX_SIZES_ALL];
8720 #if CONFIG_PALETTE
8721 PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
8722 #endif // CONFIG_PALETTE
8723 #if CONFIG_EXT_INTRA
8724 int8_t uv_angle_delta[TX_SIZES_ALL];
8725 int is_directional_mode, angle_stats_ready = 0;
8726 uint8_t directional_mode_skip_mask[INTRA_MODES];
8727 #endif // CONFIG_EXT_INTRA
8728 #if CONFIG_FILTER_INTRA
8729 int8_t dc_skipped = 1;
8730 FILTER_INTRA_MODE_INFO filter_intra_mode_info_uv[TX_SIZES_ALL];
8731 #endif // CONFIG_FILTER_INTRA
8732 const int intra_cost_penalty = av1_get_intra_cost_penalty(
8733 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
8734 const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
8735 int best_skip2 = 0;
8736 uint8_t ref_frame_skip_mask[2] = { 0 };
8737 uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
8738 #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
8739 MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME;
8740 int64_t best_single_inter_rd = INT64_MAX;
8741 #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
8742 int mode_skip_start = sf->mode_skip_start + 1;
8743 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
8744 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
8745 int64_t mode_threshold[MAX_MODES];
8746 int *mode_map = tile_data->mode_map[bsize];
8747 const int mode_search_skip_flags = sf->mode_search_skip_flags;
8748 #if CONFIG_PVQ
8749 od_rollback_buffer pre_buf;
8750 #endif // CONFIG_PVQ
8752 HandleInterModeArgs args = {
8753 #if CONFIG_MOTION_VAR
8754 { NULL },
8755 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
8756 { NULL },
8757 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
8758 #endif // CONFIG_MOTION_VAR
8759 #if CONFIG_EXT_INTER
8760 NULL,
8761 NULL,
8762 NULL,
8763 #else // CONFIG_EXT_INTER
8764 NULL,
8765 #endif // CONFIG_EXT_INTER
8766 { { 0 } },
8769 #if CONFIG_PALETTE || CONFIG_EXT_INTRA
8770 const int rows = block_size_high[bsize];
8771 const int cols = block_size_wide[bsize];
8772 #endif // CONFIG_PALETTE || CONFIG_EXT_INTRA
8773 #if CONFIG_PALETTE
8774 int palette_ctx = 0;
8775 const MODE_INFO *above_mi = xd->above_mi;
8776 const MODE_INFO *left_mi = xd->left_mi;
8777 #endif // CONFIG_PALETTE
8778 #if CONFIG_MOTION_VAR
8779 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
8780 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
8781 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
8782 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
8784 #if CONFIG_HIGHBITDEPTH
8785 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
8786 int len = sizeof(uint16_t);
8787 args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
8788 args.above_pred_buf[1] =
8789 CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
8790 args.above_pred_buf[2] =
8791 CONVERT_TO_BYTEPTR(x->above_pred_buf + 2 * MAX_SB_SQUARE * len);
8792 args.left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
8793 args.left_pred_buf[1] =
8794 CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
8795 args.left_pred_buf[2] =
8796 CONVERT_TO_BYTEPTR(x->left_pred_buf + 2 * MAX_SB_SQUARE * len);
8797 } else {
8798 #endif // CONFIG_HIGHBITDEPTH
8799 args.above_pred_buf[0] = x->above_pred_buf;
8800 args.above_pred_buf[1] = x->above_pred_buf + MAX_SB_SQUARE;
8801 args.above_pred_buf[2] = x->above_pred_buf + 2 * MAX_SB_SQUARE;
8802 args.left_pred_buf[0] = x->left_pred_buf;
8803 args.left_pred_buf[1] = x->left_pred_buf + MAX_SB_SQUARE;
8804 args.left_pred_buf[2] = x->left_pred_buf + 2 * MAX_SB_SQUARE;
8805 #if CONFIG_HIGHBITDEPTH
8807 #endif // CONFIG_HIGHBITDEPTH
8808 #endif // CONFIG_MOTION_VAR
8810 av1_zero(best_mbmode);
8812 #if CONFIG_PALETTE
8813 av1_zero(pmi_uv);
8814 if (try_palette) {
8815 if (above_mi)
8816 palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
8817 if (left_mi)
8818 palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
8820 #endif // CONFIG_PALETTE
8822 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
8823 &comp_mode_p);
8825 for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
8826 for (i = 0; i < TX_SIZES_ALL; i++) rate_uv_intra[i] = INT_MAX;
8827 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
8828 for (i = 0; i < MB_MODE_COUNT; ++i) {
8829 for (k = 0; k < TOTAL_REFS_PER_FRAME; ++k) {
8830 args.single_filter[i][k] = SWITCHABLE;
8834 rd_cost->rate = INT_MAX;
8835 #if CONFIG_SUPERTX
8836 *returnrate_nocoef = INT_MAX;
8837 #endif // CONFIG_SUPERTX
8839 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
8840 x->pred_mv_sad[ref_frame] = INT_MAX;
8841 x->mbmi_ext->mode_context[ref_frame] = 0;
8842 #if CONFIG_EXT_INTER
8843 x->mbmi_ext->compound_mode_context[ref_frame] = 0;
8844 #endif // CONFIG_EXT_INTER
8845 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
8846 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
8847 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
8848 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
8850 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
8851 #if CONFIG_GLOBAL_MOTION
8852 frame_mv[ZEROMV][ref_frame].as_int =
8853 gm_get_motion_vector(&cm->global_motion[ref_frame],
8854 cm->allow_high_precision_mv, bsize, mi_col, mi_row,
8856 .as_int;
8857 #else // CONFIG_GLOBAL_MOTION
8858 frame_mv[ZEROMV][ref_frame].as_int = 0;
8859 #endif // CONFIG_GLOBAL_MOTION
8860 #if CONFIG_EXT_INTER
8861 frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
8862 #if CONFIG_GLOBAL_MOTION
8863 frame_mv[ZERO_ZEROMV][ref_frame].as_int =
8864 gm_get_motion_vector(&cm->global_motion[ref_frame],
8865 cm->allow_high_precision_mv, bsize, mi_col, mi_row,
8867 .as_int;
8868 #else // CONFIG_GLOBAL_MOTION
8869 frame_mv[ZERO_ZEROMV][ref_frame].as_int = 0;
8870 #endif // CONFIG_GLOBAL_MOTION
8871 #endif // CONFIG_EXT_INTER
8874 for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
8875 MODE_INFO *const mi = xd->mi[0];
8876 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
8877 x->mbmi_ext->mode_context[ref_frame] = 0;
8878 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
8879 mbmi_ext->ref_mv_stack[ref_frame],
8880 #if CONFIG_EXT_INTER
8881 mbmi_ext->compound_mode_context,
8882 #endif // CONFIG_EXT_INTER
8883 candidates, mi_row, mi_col, NULL, NULL,
8884 mbmi_ext->mode_context);
8885 if (mbmi_ext->ref_mv_count[ref_frame] < 2) {
8886 MV_REFERENCE_FRAME rf[2];
8887 av1_set_ref_frame(rf, ref_frame);
8888 if (mbmi_ext->ref_mvs[rf[0]][0].as_int !=
8889 frame_mv[ZEROMV][rf[0]].as_int ||
8890 mbmi_ext->ref_mvs[rf[0]][1].as_int !=
8891 frame_mv[ZEROMV][rf[0]].as_int ||
8892 mbmi_ext->ref_mvs[rf[1]][0].as_int !=
8893 frame_mv[ZEROMV][rf[1]].as_int ||
8894 mbmi_ext->ref_mvs[rf[1]][1].as_int != frame_mv[ZEROMV][rf[1]].as_int)
8895 mbmi_ext->mode_context[ref_frame] &= ~(1 << ALL_ZERO_FLAG_OFFSET);
8899 #if CONFIG_MOTION_VAR
8900 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
8902 if (check_num_overlappable_neighbors(mbmi) &&
8903 is_motion_variation_allowed_bsize(bsize)) {
8904 av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
8905 args.above_pred_buf, dst_width1,
8906 dst_height1, args.above_pred_stride);
8907 av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col,
8908 args.left_pred_buf, dst_width2,
8909 dst_height2, args.left_pred_stride);
8910 av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
8911 mi_col);
8912 calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, args.above_pred_buf[0],
8913 args.above_pred_stride[0], args.left_pred_buf[0],
8914 args.left_pred_stride[0]);
8916 #endif // CONFIG_MOTION_VAR
8918 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
8919 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
8920 // Skip checking missing references in both single and compound reference
8921 // modes. Note that a mode will be skipped iff both reference frames
8922 // are masked out.
8923 #if CONFIG_EXT_REFS
8924 if (ref_frame == BWDREF_FRAME || ref_frame == ALTREF_FRAME) {
8925 ref_frame_skip_mask[0] |= (1 << ref_frame);
8926 ref_frame_skip_mask[1] |= ((1 << ref_frame) | 0x01);
8927 } else {
8928 #endif // CONFIG_EXT_REFS
8929 ref_frame_skip_mask[0] |= (1 << ref_frame);
8930 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
8931 #if CONFIG_EXT_REFS
8933 #endif // CONFIG_EXT_REFS
8934 } else {
8935 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
8936 // Skip fixed mv modes for poor references
8937 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
8938 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
8939 break;
8943 // If the segment reference frame feature is enabled....
8944 // then do nothing if the current ref frame is not allowed..
8945 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
8946 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
8947 ref_frame_skip_mask[0] |= (1 << ref_frame);
8948 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
8952 // Disable this drop out case if the ref frame
8953 // segment level feature is enabled for this segment. This is to
8954 // prevent the possibility that we end up unable to pick any mode.
8955 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
8956 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
8957 // unless ARNR filtering is enabled in which case we want
8958 // an unfiltered alternative. We allow near/nearest as well
8959 // because they may result in zero-zero MVs but be cheaper.
8960 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
8961 int_mv zeromv;
8962 ref_frame_skip_mask[0] = (1 << LAST_FRAME) |
8963 #if CONFIG_EXT_REFS
8964 (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
8965 (1 << BWDREF_FRAME) |
8966 #endif // CONFIG_EXT_REFS
8967 (1 << GOLDEN_FRAME);
8968 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
8969 // TODO(zoeliu): To further explore whether following needs to be done for
8970 // BWDREF_FRAME as well.
8971 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
8972 #if CONFIG_GLOBAL_MOTION
8973 zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ALTREF_FRAME],
8974 cm->allow_high_precision_mv, bsize,
8975 mi_col, mi_row, 0)
8976 .as_int;
8977 #else
8978 zeromv.as_int = 0;
8979 #endif // CONFIG_GLOBAL_MOTION
8980 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
8981 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
8982 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
8983 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
8984 #if CONFIG_EXT_INTER
8985 if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
8986 mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
8987 if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
8988 mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV);
8989 #endif // CONFIG_EXT_INTER
8993 if (cpi->rc.is_src_frame_alt_ref) {
8994 if (sf->alt_ref_search_fp) {
8995 assert(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]);
8996 mode_skip_mask[ALTREF_FRAME] = 0;
8997 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
8998 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
9002 if (sf->alt_ref_search_fp)
9003 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
9004 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
9005 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
9007 if (sf->adaptive_mode_search) {
9008 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
9009 cpi->rc.frames_since_golden >= 3)
9010 if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME])
9011 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
9014 if (bsize > sf->max_intra_bsize) {
9015 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
9016 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
9019 mode_skip_mask[INTRA_FRAME] |=
9020 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
9022 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
9023 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
9024 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
9026 midx = sf->schedule_mode_search ? mode_skip_start : 0;
9027 while (midx > 4) {
9028 uint8_t end_pos = 0;
9029 for (i = 5; i < midx; ++i) {
9030 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
9031 uint8_t tmp = mode_map[i];
9032 mode_map[i] = mode_map[i - 1];
9033 mode_map[i - 1] = tmp;
9034 end_pos = i;
9037 midx = end_pos;
9040 if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
9041 x->use_default_intra_tx_type = 1;
9042 else
9043 x->use_default_intra_tx_type = 0;
9045 if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
9046 x->use_default_inter_tx_type = 1;
9047 else
9048 x->use_default_inter_tx_type = 0;
9049 #if CONFIG_PVQ
9050 od_encode_checkpoint(&x->daala_enc, &pre_buf);
9051 #endif // CONFIG_PVQ
9052 #if CONFIG_EXT_INTER
9053 for (i = 0; i < MB_MODE_COUNT; ++i)
9054 for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
9055 modelled_rd[i][ref_frame] = INT64_MAX;
9056 #endif // CONFIG_EXT_INTER
9058 for (midx = 0; midx < MAX_MODES; ++midx) {
9059 int mode_index;
9060 int mode_excluded = 0;
9061 int64_t this_rd = INT64_MAX;
9062 int disable_skip = 0;
9063 int compmode_cost = 0;
9064 int rate2 = 0, rate_y = 0, rate_uv = 0;
9065 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
9066 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9067 int64_t distortion2_y = 0;
9068 int64_t total_sse_y = INT64_MAX;
9069 #endif
9070 int skippable = 0;
9071 int this_skip2 = 0;
9072 int64_t total_sse = INT64_MAX;
9073 uint8_t ref_frame_type;
9074 #if CONFIG_PVQ
9075 od_encode_rollback(&x->daala_enc, &pre_buf);
9076 #endif // CONFIG_PVQ
9077 mode_index = mode_map[midx];
9078 this_mode = av1_mode_order[mode_index].mode;
9079 ref_frame = av1_mode_order[mode_index].ref_frame[0];
9080 second_ref_frame = av1_mode_order[mode_index].ref_frame[1];
9081 mbmi->ref_mv_idx = 0;
9083 #if CONFIG_EXT_INTER
9084 if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) {
9085 // Mode must by compatible
9086 if (!is_interintra_allowed_mode(this_mode)) continue;
9087 if (!is_interintra_allowed_bsize(bsize)) continue;
9090 if (is_inter_compound_mode(this_mode)) {
9091 frame_mv[this_mode][ref_frame].as_int =
9092 frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
9093 frame_mv[this_mode][second_ref_frame].as_int =
9094 frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
9096 #endif // CONFIG_EXT_INTER
9098 // Look at the reference frame of the best mode so far and set the
9099 // skip mask to look at a subset of the remaining modes.
9100 if (midx == mode_skip_start && best_mode_index >= 0) {
9101 switch (best_mbmode.ref_frame[0]) {
9102 case INTRA_FRAME: break;
9103 case LAST_FRAME:
9104 ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
9105 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9106 break;
9107 #if CONFIG_EXT_REFS
9108 case LAST2_FRAME:
9109 ref_frame_skip_mask[0] |= LAST2_FRAME_MODE_MASK;
9110 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9111 break;
9112 case LAST3_FRAME:
9113 ref_frame_skip_mask[0] |= LAST3_FRAME_MODE_MASK;
9114 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9115 break;
9116 #endif // CONFIG_EXT_REFS
9117 case GOLDEN_FRAME:
9118 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
9119 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9120 break;
9121 #if CONFIG_EXT_REFS
9122 case BWDREF_FRAME:
9123 ref_frame_skip_mask[0] |= BWDREF_FRAME_MODE_MASK;
9124 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9125 break;
9126 #endif // CONFIG_EXT_REFS
9127 case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALTREF_FRAME_MODE_MASK;
9128 #if CONFIG_EXT_REFS
9129 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9130 #endif // CONFIG_EXT_REFS
9131 break;
9132 case NONE_FRAME:
9133 case TOTAL_REFS_PER_FRAME:
9134 assert(0 && "Invalid Reference frame");
9135 break;
9139 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
9140 (ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame))))
9141 continue;
9143 if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
9145 // Test best rd so far against threshold for trying this mode.
9146 if (best_mode_skippable && sf->schedule_mode_search)
9147 mode_threshold[mode_index] <<= 1;
9149 if (best_rd < mode_threshold[mode_index]) continue;
9151 // This is only used in motion vector unit test.
9152 if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
9154 #if CONFIG_ONE_SIDED_COMPOUND // Changes LL bitstream
9155 #if CONFIG_EXT_REFS
9156 if (cpi->oxcf.pass == 0) {
9157 // Complexity-compression trade-offs
9158 // if (ref_frame == ALTREF_FRAME) continue;
9159 // if (ref_frame == BWDREF_FRAME) continue;
9160 if (second_ref_frame == ALTREF_FRAME) continue;
9161 // if (second_ref_frame == BWDREF_FRAME) continue;
9163 #endif
9164 #endif
9165 comp_pred = second_ref_frame > INTRA_FRAME;
9166 if (comp_pred) {
9167 if (!cpi->allow_comp_inter_inter) continue;
9169 // Skip compound inter modes if ARF is not available.
9170 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
9172 // Do not allow compound prediction if the segment level reference frame
9173 // feature is in use as in this case there can only be one reference.
9174 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
9176 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
9177 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
9178 continue;
9180 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
9181 } else {
9182 if (ref_frame != INTRA_FRAME)
9183 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
9186 if (ref_frame == INTRA_FRAME) {
9187 if (sf->adaptive_mode_search)
9188 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
9189 continue;
9191 if (this_mode != DC_PRED) {
9192 // Disable intra modes other than DC_PRED for blocks with low variance
9193 // Threshold for intra skipping based on source variance
9194 // TODO(debargha): Specialize the threshold for super block sizes
9195 const unsigned int skip_intra_var_thresh = 64;
9196 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
9197 x->source_variance < skip_intra_var_thresh)
9198 continue;
9199 // Only search the oblique modes if the best so far is
9200 // one of the neighboring directional modes
9201 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
9202 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
9203 if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
9204 continue;
9206 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
9207 if (conditional_skipintra(this_mode, best_intra_mode)) continue;
9210 #if CONFIG_GLOBAL_MOTION
9211 } else if (cm->global_motion[ref_frame].wmtype == IDENTITY &&
9212 (!comp_pred ||
9213 cm->global_motion[second_ref_frame].wmtype == IDENTITY)) {
9214 #else // CONFIG_GLOBAL_MOTION
9215 } else {
9216 #endif // CONFIG_GLOBAL_MOTION
9217 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
9218 if (!check_best_zero_mv(cpi, mbmi_ext->mode_context,
9219 #if CONFIG_EXT_INTER
9220 mbmi_ext->compound_mode_context,
9221 #endif // CONFIG_EXT_INTER
9222 frame_mv, this_mode, ref_frames, bsize, -1,
9223 mi_row, mi_col))
9224 continue;
9227 mbmi->mode = this_mode;
9228 mbmi->uv_mode = DC_PRED;
9229 mbmi->ref_frame[0] = ref_frame;
9230 mbmi->ref_frame[1] = second_ref_frame;
9231 #if CONFIG_PALETTE
9232 pmi->palette_size[0] = 0;
9233 pmi->palette_size[1] = 0;
9234 #endif // CONFIG_PALETTE
9235 #if CONFIG_FILTER_INTRA
9236 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9237 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
9238 #endif // CONFIG_FILTER_INTRA
9239 // Evaluate all sub-pel filters irrespective of whether we can use
9240 // them for this frame.
9242 set_default_interp_filters(mbmi, cm->interp_filter);
9244 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
9245 mbmi->motion_mode = SIMPLE_TRANSLATION;
9247 x->skip = 0;
9248 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
9250 // Select prediction reference frames.
9251 for (i = 0; i < MAX_MB_PLANE; i++) {
9252 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
9253 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
9256 #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
9257 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
9258 #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
9260 if (ref_frame == INTRA_FRAME) {
9261 RD_STATS rd_stats_y;
9262 TX_SIZE uv_tx;
9263 struct macroblockd_plane *const pd = &xd->plane[1];
9264 #if CONFIG_EXT_INTRA
9265 is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
9266 if (is_directional_mode) {
9267 int rate_dummy;
9268 int64_t model_rd = INT64_MAX;
9269 if (!angle_stats_ready) {
9270 const int src_stride = x->plane[0].src.stride;
9271 const uint8_t *src = x->plane[0].src.buf;
9272 #if CONFIG_HIGHBITDEPTH
9273 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
9274 highbd_angle_estimation(src, src_stride, rows, cols, bsize,
9275 directional_mode_skip_mask);
9276 else
9277 #endif // CONFIG_HIGHBITDEPTH
9278 angle_estimation(src, src_stride, rows, cols, bsize,
9279 directional_mode_skip_mask);
9280 angle_stats_ready = 1;
9282 if (directional_mode_skip_mask[mbmi->mode]) continue;
9283 rd_stats_y.rate = INT_MAX;
9284 rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rd_stats_y, bsize,
9285 intra_mode_cost[mbmi->mode], best_rd,
9286 &model_rd);
9287 } else {
9288 mbmi->angle_delta[0] = 0;
9289 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
9291 #else
9292 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
9293 #endif // CONFIG_EXT_INTRA
9294 rate_y = rd_stats_y.rate;
9295 distortion_y = rd_stats_y.dist;
9296 skippable = rd_stats_y.skip;
9298 if (rate_y == INT_MAX) continue;
9300 #if CONFIG_FILTER_INTRA
9301 if (mbmi->mode == DC_PRED) dc_skipped = 0;
9302 #endif // CONFIG_FILTER_INTRA
9304 uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][pd->subsampling_x]
9305 [pd->subsampling_y];
9306 if (rate_uv_intra[uv_tx] == INT_MAX) {
9307 choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
9308 &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
9309 &skip_uvs[uv_tx], &mode_uv[uv_tx]);
9310 #if CONFIG_PALETTE
9311 if (try_palette) pmi_uv[uv_tx] = *pmi;
9312 #endif // CONFIG_PALETTE
9314 #if CONFIG_EXT_INTRA
9315 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
9316 #endif // CONFIG_EXT_INTRA
9317 #if CONFIG_FILTER_INTRA
9318 filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
9319 #endif // CONFIG_FILTER_INTRA
9322 rate_uv = rate_uv_tokenonly[uv_tx];
9323 distortion_uv = dist_uvs[uv_tx];
9324 skippable = skippable && skip_uvs[uv_tx];
9325 mbmi->uv_mode = mode_uv[uv_tx];
9326 #if CONFIG_PALETTE
9327 if (try_palette) {
9328 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
9329 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
9330 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
9331 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
9333 #endif // CONFIG_PALETTE
9335 #if CONFIG_EXT_INTRA
9336 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
9337 #endif // CONFIG_EXT_INTRA
9338 #if CONFIG_FILTER_INTRA
9339 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
9340 filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
9341 if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
9342 mbmi->filter_intra_mode_info.filter_intra_mode[1] =
9343 filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
9345 #endif // CONFIG_FILTER_INTRA
9347 #if CONFIG_CB4X4
9348 rate2 = rate_y + intra_mode_cost[mbmi->mode];
9349 if (!x->skip_chroma_rd)
9350 rate2 += rate_uv + cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
9351 #else
9352 rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
9353 cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
9354 #endif // CONFIG_CB4X4
9356 #if CONFIG_PALETTE
9357 if (try_palette && mbmi->mode == DC_PRED) {
9358 rate2 += av1_cost_bit(
9359 av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
9361 #endif // CONFIG_PALETTE
9363 if (!xd->lossless[mbmi->segment_id] && bsize >= BLOCK_8X8) {
9364 // super_block_yrd above includes the cost of the tx_size in the
9365 // tokenonly rate, but for intra blocks, tx_size is always coded
9366 // (prediction granularity), so we account for it in the full rate,
9367 // not the tokenonly rate.
9368 rate_y -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
9370 #if CONFIG_EXT_INTRA
9371 if (is_directional_mode) {
9372 #if CONFIG_INTRA_INTERP
9373 const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
9374 const int p_angle =
9375 mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
9376 if (av1_is_intra_filter_switchable(p_angle))
9377 rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
9378 #endif // CONFIG_INTRA_INTERP
9379 rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
9380 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
9382 if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
9383 rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
9384 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
9386 #endif // CONFIG_EXT_INTRA
9387 #if CONFIG_FILTER_INTRA
9388 if (mbmi->mode == DC_PRED) {
9389 rate2 +=
9390 av1_cost_bit(cm->fc->filter_intra_probs[0],
9391 mbmi->filter_intra_mode_info.use_filter_intra_mode[0]);
9392 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
9393 rate2 += write_uniform_cost(
9394 FILTER_INTRA_MODES,
9395 mbmi->filter_intra_mode_info.filter_intra_mode[0]);
9398 if (mbmi->uv_mode == DC_PRED) {
9399 rate2 +=
9400 av1_cost_bit(cpi->common.fc->filter_intra_probs[1],
9401 mbmi->filter_intra_mode_info.use_filter_intra_mode[1]);
9402 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[1])
9403 rate2 += write_uniform_cost(
9404 FILTER_INTRA_MODES,
9405 mbmi->filter_intra_mode_info.filter_intra_mode[1]);
9407 #endif // CONFIG_FILTER_INTRA
9408 if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
9409 rate2 += intra_cost_penalty;
9410 distortion2 = distortion_y + distortion_uv;
9411 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9412 if (bsize < BLOCK_8X8) distortion2_y = distortion_y;
9413 #endif
9414 } else {
9415 int_mv backup_ref_mv[2];
9417 #if !SUB8X8_COMP_REF
9418 if (bsize == BLOCK_4X4 && mbmi->ref_frame[1] > INTRA_FRAME) continue;
9419 #endif // !SUB8X8_COMP_REF
9421 backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
9422 if (comp_pred) backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
9423 #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
9424 if (second_ref_frame == INTRA_FRAME) {
9425 if (best_single_inter_ref != ref_frame) continue;
9426 mbmi->interintra_mode = intra_to_interintra_mode[best_intra_mode];
9427 // TODO(debargha|geza.lore):
9428 // Should we use ext_intra modes for interintra?
9429 #if CONFIG_EXT_INTRA
9430 mbmi->angle_delta[0] = 0;
9431 mbmi->angle_delta[1] = 0;
9432 #if CONFIG_INTRA_INTERP
9433 mbmi->intra_filter = INTRA_FILTER_LINEAR;
9434 #endif // CONFIG_INTRA_INTERP
9435 #endif // CONFIG_EXT_INTRA
9436 #if CONFIG_FILTER_INTRA
9437 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9438 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
9439 #endif // CONFIG_FILTER_INTRA
9441 #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
9442 mbmi->ref_mv_idx = 0;
9443 ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
9445 #if CONFIG_EXT_INTER
9446 if (comp_pred) {
9447 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
9448 int ref_mv_idx = 0;
9449 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
9450 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
9451 // mbmi->ref_mv_idx (like NEWMV)
9452 if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
9453 ref_mv_idx = 1;
9455 if (compound_ref0_mode(mbmi->mode) == NEWMV) {
9456 int_mv this_mv =
9457 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
9458 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9459 xd->n8_h << MI_SIZE_LOG2, xd);
9460 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
9462 if (compound_ref1_mode(mbmi->mode) == NEWMV) {
9463 int_mv this_mv =
9464 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
9465 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9466 xd->n8_h << MI_SIZE_LOG2, xd);
9467 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
9470 } else {
9471 #endif // CONFIG_EXT_INTER
9472 if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
9473 int ref;
9474 for (ref = 0; ref < 1 + comp_pred; ++ref) {
9475 int_mv this_mv =
9476 (ref == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv
9477 : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9478 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9479 xd->n8_h << MI_SIZE_LOG2, xd);
9480 mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
9483 #if CONFIG_EXT_INTER
9485 #endif // CONFIG_EXT_INTER
9487 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
9488 av1_init_rd_stats(&rd_stats);
9489 rd_stats.rate = rate2;
9491 // Point to variables that are maintained between loop iterations
9492 args.single_newmv = single_newmv;
9493 #if CONFIG_EXT_INTER
9494 args.single_newmv_rate = single_newmv_rate;
9495 args.modelled_rd = modelled_rd;
9496 #endif // CONFIG_EXT_INTER
9497 this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
9498 &rd_stats_uv, &disable_skip, frame_mv,
9499 mi_row, mi_col, &args, best_rd);
9501 rate2 = rd_stats.rate;
9502 skippable = rd_stats.skip;
9503 distortion2 = rd_stats.dist;
9504 total_sse = rd_stats.sse;
9505 rate_y = rd_stats_y.rate;
9506 rate_uv = rd_stats_uv.rate;
9507 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9508 if (bsize < BLOCK_8X8) distortion2_y = rd_stats_y.dist;
9509 #endif
9512 // TODO(jingning): This needs some refactoring to improve code quality
9513 // and reduce redundant steps.
9514 #if CONFIG_EXT_INTER
9515 if ((have_nearmv_in_inter_mode(mbmi->mode) &&
9516 mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
9517 ((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) &&
9518 mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
9519 #else
9520 if ((mbmi->mode == NEARMV &&
9521 mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
9522 (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
9523 #endif
9524 int_mv backup_mv = frame_mv[NEARMV][ref_frame];
9525 MB_MODE_INFO backup_mbmi = *mbmi;
9526 int backup_skip = x->skip;
9527 int64_t tmp_ref_rd = this_rd;
9528 int ref_idx;
9530 // TODO(jingning): This should be deprecated shortly.
9531 #if CONFIG_EXT_INTER
9532 int idx_offset = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
9533 #else
9534 int idx_offset = (mbmi->mode == NEARMV) ? 1 : 0;
9535 #endif // CONFIG_EXT_INTER
9536 int ref_set =
9537 AOMMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
9539 uint8_t drl_ctx =
9540 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx_offset);
9541 // Dummy
9542 int_mv backup_fmv[2];
9543 backup_fmv[0] = frame_mv[NEWMV][ref_frame];
9544 if (comp_pred) backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
9546 rate2 += (rate2 < INT_MAX ? cpi->drl_mode_cost0[drl_ctx][0] : 0);
9548 if (this_rd < INT64_MAX) {
9549 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
9550 RDCOST(x->rdmult, x->rddiv, 0, total_sse))
9551 tmp_ref_rd =
9552 RDCOST(x->rdmult, x->rddiv,
9553 rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
9554 distortion2);
9555 else
9556 tmp_ref_rd =
9557 RDCOST(x->rdmult, x->rddiv,
9558 rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
9559 rate_y - rate_uv,
9560 total_sse);
9562 #if CONFIG_VAR_TX
9563 for (i = 0; i < MAX_MB_PLANE; ++i)
9564 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
9565 sizeof(uint8_t) * ctx->num_4x4_blk);
9566 #endif // CONFIG_VAR_TX
9568 for (ref_idx = 0; ref_idx < ref_set; ++ref_idx) {
9569 int64_t tmp_alt_rd = INT64_MAX;
9570 int dummy_disable_skip = 0;
9571 int ref;
9572 int_mv cur_mv;
9573 RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv;
9575 av1_invalid_rd_stats(&tmp_rd_stats);
9576 x->skip = 0;
9578 mbmi->ref_mv_idx = 1 + ref_idx;
9580 #if CONFIG_EXT_INTER
9581 if (comp_pred) {
9582 int ref_mv_idx = mbmi->ref_mv_idx;
9583 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
9584 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
9585 // mbmi->ref_mv_idx (like NEWMV)
9586 if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
9587 ref_mv_idx = 1 + mbmi->ref_mv_idx;
9589 if (compound_ref0_mode(mbmi->mode) == NEWMV) {
9590 int_mv this_mv =
9591 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
9592 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9593 xd->n8_h << MI_SIZE_LOG2, xd);
9594 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
9595 } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV) {
9596 int_mv this_mv =
9597 mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
9598 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9599 xd->n8_h << MI_SIZE_LOG2, xd);
9600 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
9603 if (compound_ref1_mode(mbmi->mode) == NEWMV) {
9604 int_mv this_mv =
9605 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
9606 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9607 xd->n8_h << MI_SIZE_LOG2, xd);
9608 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
9609 } else if (compound_ref1_mode(mbmi->mode) == NEARESTMV) {
9610 int_mv this_mv =
9611 mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9612 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9613 xd->n8_h << MI_SIZE_LOG2, xd);
9614 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
9616 } else {
9617 #endif // CONFIG_EXT_INTER
9618 for (ref = 0; ref < 1 + comp_pred; ++ref) {
9619 int_mv this_mv =
9620 (ref == 0)
9621 ? mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
9622 .this_mv
9623 : mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
9624 .comp_mv;
9625 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9626 xd->n8_h << MI_SIZE_LOG2, xd);
9627 mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
9629 #if CONFIG_EXT_INTER
9631 #endif
9633 cur_mv =
9634 mbmi_ext->ref_mv_stack[ref_frame][mbmi->ref_mv_idx + idx_offset]
9635 .this_mv;
9636 clamp_mv2(&cur_mv.as_mv, xd);
9638 if (!mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
9639 int_mv dummy_single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
9640 #if CONFIG_EXT_INTER
9641 int dummy_single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
9642 #endif // CONFIG_EXT_INTER
9644 frame_mv[NEARMV][ref_frame] = cur_mv;
9645 av1_init_rd_stats(&tmp_rd_stats);
9647 // Point to variables that are not maintained between iterations
9648 args.single_newmv = dummy_single_newmv;
9649 #if CONFIG_EXT_INTER
9650 args.single_newmv_rate = dummy_single_newmv_rate;
9651 args.modelled_rd = NULL;
9652 #endif // CONFIG_EXT_INTER
9653 tmp_alt_rd = handle_inter_mode(
9654 cpi, x, bsize, &tmp_rd_stats, &tmp_rd_stats_y, &tmp_rd_stats_uv,
9655 &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd);
9656 // Prevent pointers from escaping local scope
9657 args.single_newmv = NULL;
9658 #if CONFIG_EXT_INTER
9659 args.single_newmv_rate = NULL;
9660 #endif // CONFIG_EXT_INTER
9663 for (i = 0; i < mbmi->ref_mv_idx; ++i) {
9664 uint8_t drl1_ctx = 0;
9665 drl1_ctx = av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
9666 i + idx_offset);
9667 tmp_rd_stats.rate +=
9668 (tmp_rd_stats.rate < INT_MAX ? cpi->drl_mode_cost0[drl1_ctx][1]
9669 : 0);
9672 if (mbmi_ext->ref_mv_count[ref_frame_type] >
9673 mbmi->ref_mv_idx + idx_offset + 1 &&
9674 ref_idx < ref_set - 1) {
9675 uint8_t drl1_ctx =
9676 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
9677 mbmi->ref_mv_idx + idx_offset);
9678 tmp_rd_stats.rate +=
9679 (tmp_rd_stats.rate < INT_MAX ? cpi->drl_mode_cost0[drl1_ctx][0]
9680 : 0);
9683 if (tmp_alt_rd < INT64_MAX) {
9684 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9685 tmp_alt_rd = RDCOST(x->rdmult, x->rddiv, tmp_rd_stats.rate,
9686 tmp_rd_stats.dist);
9687 #else
9688 if (RDCOST(x->rdmult, x->rddiv,
9689 tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate,
9690 tmp_rd_stats.dist) <
9691 RDCOST(x->rdmult, x->rddiv, 0, tmp_rd_stats.sse))
9692 tmp_alt_rd =
9693 RDCOST(x->rdmult, x->rddiv,
9694 tmp_rd_stats.rate +
9695 av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
9696 tmp_rd_stats.dist);
9697 else
9698 tmp_alt_rd =
9699 RDCOST(x->rdmult, x->rddiv,
9700 tmp_rd_stats.rate +
9701 av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
9702 tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate,
9703 tmp_rd_stats.sse);
9704 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9707 if (tmp_ref_rd > tmp_alt_rd) {
9708 rate2 = tmp_rd_stats.rate;
9709 disable_skip = dummy_disable_skip;
9710 distortion2 = tmp_rd_stats.dist;
9711 skippable = tmp_rd_stats.skip;
9712 rate_y = tmp_rd_stats_y.rate;
9713 rate_uv = tmp_rd_stats_uv.rate;
9714 total_sse = tmp_rd_stats.sse;
9715 this_rd = tmp_alt_rd;
9716 tmp_ref_rd = tmp_alt_rd;
9717 backup_mbmi = *mbmi;
9718 backup_skip = x->skip;
9719 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9720 if (bsize < BLOCK_8X8) {
9721 total_sse_y = tmp_rd_stats_y.sse;
9722 distortion2_y = tmp_rd_stats_y.dist;
9724 #endif
9725 #if CONFIG_VAR_TX
9726 for (i = 0; i < MAX_MB_PLANE; ++i)
9727 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
9728 sizeof(uint8_t) * ctx->num_4x4_blk);
9729 #endif // CONFIG_VAR_TX
9730 } else {
9731 *mbmi = backup_mbmi;
9732 x->skip = backup_skip;
9736 frame_mv[NEARMV][ref_frame] = backup_mv;
9737 frame_mv[NEWMV][ref_frame] = backup_fmv[0];
9738 if (comp_pred) frame_mv[NEWMV][second_ref_frame] = backup_fmv[1];
9739 #if CONFIG_VAR_TX
9740 for (i = 0; i < MAX_MB_PLANE; ++i)
9741 memcpy(x->blk_skip[i], x->blk_skip_drl[i],
9742 sizeof(uint8_t) * ctx->num_4x4_blk);
9743 #endif // CONFIG_VAR_TX
9745 mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0];
9746 if (comp_pred) mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1];
9748 if (this_rd == INT64_MAX) continue;
9750 #if SUB8X8_COMP_REF
9751 compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
9752 #else
9753 if (mbmi->sb_type != BLOCK_4X4)
9754 compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
9755 #endif // SUB8X8_COMP_REF
9757 if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
9760 // Estimate the reference frame signaling cost and add it
9761 // to the rolling cost variable.
9762 if (comp_pred) {
9763 rate2 += ref_costs_comp[ref_frame];
9764 #if CONFIG_EXT_REFS
9765 rate2 += ref_costs_comp[second_ref_frame];
9766 #endif // CONFIG_EXT_REFS
9767 } else {
9768 rate2 += ref_costs_single[ref_frame];
9771 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9772 if (ref_frame == INTRA_FRAME) {
9773 #else
9774 if (!disable_skip) {
9775 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9776 if (skippable) {
9777 // Back out the coefficient coding costs
9778 rate2 -= (rate_y + rate_uv);
9779 rate_y = 0;
9780 rate_uv = 0;
9781 // Cost the skip mb case
9782 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
9783 } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
9784 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + rate_skip0,
9785 distortion2) <
9786 RDCOST(x->rdmult, x->rddiv, rate_skip1, total_sse)) {
9787 // Add in the cost of the no skip flag.
9788 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
9789 } else {
9790 // FIXME(rbultje) make this work for splitmv also
9791 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
9792 distortion2 = total_sse;
9793 assert(total_sse >= 0);
9794 rate2 -= (rate_y + rate_uv);
9795 this_skip2 = 1;
9796 rate_y = 0;
9797 rate_uv = 0;
9798 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9799 if (bsize < BLOCK_8X8) distortion2_y = total_sse_y;
9800 #endif
9802 } else {
9803 // Add in the cost of the no skip flag.
9804 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
9807 // Calculate the final RD estimate for this mode.
9808 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
9809 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9810 } else {
9811 this_skip2 = mbmi->skip;
9812 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
9813 if (this_skip2) {
9814 rate_y = 0;
9815 rate_uv = 0;
9817 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9820 if (ref_frame == INTRA_FRAME) {
9821 // Keep record of best intra rd
9822 if (this_rd < best_intra_rd) {
9823 best_intra_rd = this_rd;
9824 best_intra_mode = mbmi->mode;
9826 #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
9827 } else if (second_ref_frame == NONE_FRAME) {
9828 if (this_rd < best_single_inter_rd) {
9829 best_single_inter_rd = this_rd;
9830 best_single_inter_ref = mbmi->ref_frame[0];
9832 #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
9835 if (!disable_skip && ref_frame == INTRA_FRAME) {
9836 for (i = 0; i < REFERENCE_MODES; ++i)
9837 best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
9840 // Did this mode help.. i.e. is it the new best mode
9841 if (this_rd < best_rd || x->skip) {
9842 if (!mode_excluded) {
9843 // Note index of best mode so far
9844 best_mode_index = mode_index;
9846 if (ref_frame == INTRA_FRAME) {
9847 /* required for left and above block mv */
9848 mbmi->mv[0].as_int = 0;
9849 } else {
9850 best_pred_sse = x->pred_sse[ref_frame];
9853 rd_cost->rate = rate2;
9854 #if CONFIG_SUPERTX
9855 if (x->skip)
9856 *returnrate_nocoef = rate2;
9857 else
9858 *returnrate_nocoef = rate2 - rate_y - rate_uv;
9859 *returnrate_nocoef -= av1_cost_bit(
9860 av1_get_skip_prob(cm, xd), disable_skip || skippable || this_skip2);
9861 *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd),
9862 mbmi->ref_frame[0] != INTRA_FRAME);
9863 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9864 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
9865 MODE_INFO *const mi = xd->mi[0];
9866 const MOTION_MODE motion_allowed = motion_mode_allowed(
9867 #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
9868 0, xd->global_motion,
9869 #endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
9870 mi);
9871 if (motion_allowed == WARPED_CAUSAL)
9872 *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
9873 else if (motion_allowed == OBMC_CAUSAL)
9874 *returnrate_nocoef -=
9875 cpi->motion_mode_cost1[bsize][mbmi->motion_mode];
9876 #else
9877 *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
9878 #endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
9879 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9880 #endif // CONFIG_SUPERTX
9881 rd_cost->dist = distortion2;
9882 rd_cost->rdcost = this_rd;
9883 best_rd = this_rd;
9884 best_mbmode = *mbmi;
9885 best_skip2 = this_skip2;
9886 best_mode_skippable = skippable;
9887 best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd),
9888 this_skip2 || skippable);
9889 best_rate_uv = rate_uv;
9890 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9891 if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2_y;
9892 #endif
9893 #if CONFIG_VAR_TX
9894 for (i = 0; i < MAX_MB_PLANE; ++i)
9895 memcpy(ctx->blk_skip[i], x->blk_skip[i],
9896 sizeof(uint8_t) * ctx->num_4x4_blk);
9897 #endif // CONFIG_VAR_TX
9901 /* keep record of best compound/single-only prediction */
9902 if (!disable_skip && ref_frame != INTRA_FRAME) {
9903 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
9905 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
9906 single_rate = rate2 - compmode_cost;
9907 hybrid_rate = rate2;
9908 } else {
9909 single_rate = rate2;
9910 hybrid_rate = rate2 + compmode_cost;
9913 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
9914 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
9916 if (!comp_pred) {
9917 if (single_rd < best_pred_rd[SINGLE_REFERENCE])
9918 best_pred_rd[SINGLE_REFERENCE] = single_rd;
9919 } else {
9920 if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
9921 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
9923 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
9924 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
9927 if (x->skip && !comp_pred) break;
9930 if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 &&
9931 ((sf->tx_type_search.fast_inter_tx_type_search == 1 &&
9932 is_inter_mode(best_mbmode.mode)) ||
9933 (sf->tx_type_search.fast_intra_tx_type_search == 1 &&
9934 !is_inter_mode(best_mbmode.mode)))) {
9935 int skip_blk = 0;
9936 RD_STATS rd_stats_y, rd_stats_uv;
9938 x->use_default_inter_tx_type = 0;
9939 x->use_default_intra_tx_type = 0;
9941 *mbmi = best_mbmode;
9943 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
9945 // Select prediction reference frames.
9946 for (i = 0; i < MAX_MB_PLANE; i++) {
9947 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
9948 if (has_second_ref(mbmi))
9949 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
9952 if (is_inter_mode(mbmi->mode)) {
9953 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
9954 #if CONFIG_MOTION_VAR
9955 if (mbmi->motion_mode == OBMC_CAUSAL) {
9956 av1_build_obmc_inter_prediction(
9957 cm, xd, mi_row, mi_col, args.above_pred_buf, args.above_pred_stride,
9958 args.left_pred_buf, args.left_pred_stride);
9960 #endif // CONFIG_MOTION_VAR
9961 av1_subtract_plane(x, bsize, 0);
9962 #if CONFIG_VAR_TX
9963 if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
9964 select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
9965 } else {
9966 int idx, idy;
9967 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
9968 for (idy = 0; idy < xd->n8_h; ++idy)
9969 for (idx = 0; idx < xd->n8_w; ++idx)
9970 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
9971 memset(x->blk_skip[0], rd_stats_y.skip,
9972 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
9975 inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
9976 #else
9977 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
9978 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
9979 #endif // CONFIG_VAR_TX
9980 } else {
9981 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
9982 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
9985 if (RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate,
9986 (rd_stats_y.dist + rd_stats_uv.dist)) >
9987 RDCOST(x->rdmult, x->rddiv, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
9988 skip_blk = 1;
9989 rd_stats_y.rate = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
9990 rd_stats_uv.rate = 0;
9991 rd_stats_y.dist = rd_stats_y.sse;
9992 rd_stats_uv.dist = rd_stats_uv.sse;
9993 } else {
9994 skip_blk = 0;
9995 rd_stats_y.rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
9998 if (RDCOST(x->rdmult, x->rddiv, best_rate_y + best_rate_uv, rd_cost->dist) >
9999 RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate,
10000 (rd_stats_y.dist + rd_stats_uv.dist))) {
10001 #if CONFIG_VAR_TX
10002 int idx, idy;
10003 #endif // CONFIG_VAR_TX
10004 best_mbmode.tx_type = mbmi->tx_type;
10005 best_mbmode.tx_size = mbmi->tx_size;
10006 #if CONFIG_VAR_TX
10007 for (idy = 0; idy < xd->n8_h; ++idy)
10008 for (idx = 0; idx < xd->n8_w; ++idx)
10009 best_mbmode.inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
10011 for (i = 0; i < MAX_MB_PLANE; ++i)
10012 memcpy(ctx->blk_skip[i], x->blk_skip[i],
10013 sizeof(uint8_t) * ctx->num_4x4_blk);
10015 best_mbmode.min_tx_size = mbmi->min_tx_size;
10016 #endif // CONFIG_VAR_TX
10017 rd_cost->rate +=
10018 (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
10019 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
10020 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
10021 if (bsize < BLOCK_8X8) rd_cost->dist_y = rd_stats_y.dist;
10022 #endif
10023 rd_cost->rdcost =
10024 RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
10025 best_skip2 = skip_blk;
10029 #if CONFIG_PALETTE
10030 // Only try palette mode when the best mode so far is an intra mode.
10031 if (try_palette && !is_inter_mode(best_mbmode.mode)) {
10032 int rate2 = 0;
10033 #if CONFIG_SUPERTX
10034 int best_rate_nocoef;
10035 #endif // CONFIG_SUPERTX
10036 int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd,
10037 best_model_rd_palette = INT64_MAX;
10038 int skippable = 0, rate_overhead_palette = 0;
10039 RD_STATS rd_stats_y;
10040 TX_SIZE uv_tx;
10041 uint8_t *const best_palette_color_map =
10042 x->palette_buffer->best_palette_color_map;
10043 uint8_t *const color_map = xd->plane[0].color_index_map;
10044 MB_MODE_INFO best_mbmi_palette = best_mbmode;
10046 mbmi->mode = DC_PRED;
10047 mbmi->uv_mode = DC_PRED;
10048 mbmi->ref_frame[0] = INTRA_FRAME;
10049 mbmi->ref_frame[1] = NONE_FRAME;
10050 rate_overhead_palette = rd_pick_palette_intra_sby(
10051 cpi, x, bsize, palette_ctx, intra_mode_cost[DC_PRED],
10052 &best_mbmi_palette, best_palette_color_map, &best_rd_palette,
10053 &best_model_rd_palette, NULL, NULL, NULL, NULL);
10054 if (pmi->palette_size[0] == 0) goto PALETTE_EXIT;
10055 memcpy(color_map, best_palette_color_map,
10056 rows * cols * sizeof(best_palette_color_map[0]));
10057 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
10058 if (rd_stats_y.rate == INT_MAX) goto PALETTE_EXIT;
10059 uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
10060 [xd->plane[1].subsampling_y];
10061 if (rate_uv_intra[uv_tx] == INT_MAX) {
10062 choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
10063 &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
10064 &skip_uvs[uv_tx], &mode_uv[uv_tx]);
10065 pmi_uv[uv_tx] = *pmi;
10066 #if CONFIG_EXT_INTRA
10067 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
10068 #endif // CONFIG_EXT_INTRA
10069 #if CONFIG_FILTER_INTRA
10070 filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
10071 #endif // CONFIG_FILTER_INTRA
10073 mbmi->uv_mode = mode_uv[uv_tx];
10074 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
10075 if (pmi->palette_size[1] > 0) {
10076 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
10077 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
10078 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
10080 #if CONFIG_EXT_INTRA
10081 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
10082 #endif // CONFIG_EXT_INTRA
10083 #if CONFIG_FILTER_INTRA
10084 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
10085 filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
10086 if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
10087 mbmi->filter_intra_mode_info.filter_intra_mode[1] =
10088 filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
10090 #endif // CONFIG_FILTER_INTRA
10091 skippable = rd_stats_y.skip && skip_uvs[uv_tx];
10092 distortion2 = rd_stats_y.dist + dist_uvs[uv_tx];
10093 rate2 = rd_stats_y.rate + rate_overhead_palette + rate_uv_intra[uv_tx];
10094 rate2 += ref_costs_single[INTRA_FRAME];
10096 if (skippable) {
10097 rate2 -= (rd_stats_y.rate + rate_uv_tokenonly[uv_tx]);
10098 #if CONFIG_SUPERTX
10099 best_rate_nocoef = rate2;
10100 #endif // CONFIG_SUPERTX
10101 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
10102 } else {
10103 #if CONFIG_SUPERTX
10104 best_rate_nocoef = rate2 - (rd_stats_y.rate + rate_uv_tokenonly[uv_tx]);
10105 #endif // CONFIG_SUPERTX
10106 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
10108 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
10109 if (this_rd < best_rd) {
10110 best_mode_index = 3;
10111 mbmi->mv[0].as_int = 0;
10112 rd_cost->rate = rate2;
10113 #if CONFIG_SUPERTX
10114 *returnrate_nocoef = best_rate_nocoef;
10115 #endif // CONFIG_SUPERTX
10116 rd_cost->dist = distortion2;
10117 rd_cost->rdcost = this_rd;
10118 best_rd = this_rd;
10119 best_mbmode = *mbmi;
10120 best_skip2 = 0;
10121 best_mode_skippable = skippable;
10124 PALETTE_EXIT:
10125 #endif // CONFIG_PALETTE
10127 #if CONFIG_FILTER_INTRA
10128 // TODO(huisu): filter-intra is turned off in lossless mode for now to
10129 // avoid a unit test failure
10130 if (!xd->lossless[mbmi->segment_id] &&
10131 #if CONFIG_PALETTE
10132 pmi->palette_size[0] == 0 &&
10133 #endif // CONFIG_PALETTE
10134 !dc_skipped && best_mode_index >= 0 &&
10135 best_intra_rd < (best_rd + (best_rd >> 3))) {
10136 pick_filter_intra_interframe(
10137 cpi, x, ctx, bsize, mi_row, mi_col, rate_uv_intra, rate_uv_tokenonly,
10138 dist_uvs, skip_uvs, mode_uv, filter_intra_mode_info_uv,
10139 #if CONFIG_EXT_INTRA
10140 uv_angle_delta,
10141 #endif // CONFIG_EXT_INTRA
10142 #if CONFIG_PALETTE
10143 pmi_uv, palette_ctx,
10144 #endif // CONFIG_PALETTE
10145 0, ref_costs_single, &best_rd, &best_intra_rd, &best_intra_mode,
10146 &best_mode_index, &best_skip2, &best_mode_skippable,
10147 #if CONFIG_SUPERTX
10148 returnrate_nocoef,
10149 #endif // CONFIG_SUPERTX
10150 best_pred_rd, &best_mbmode, rd_cost);
10152 #endif // CONFIG_FILTER_INTRA
10154 // The inter modes' rate costs are not calculated precisely in some cases.
10155 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
10156 // ZEROMV. Here, checks are added for those cases, and the mode decisions
10157 // are corrected.
10158 if (best_mbmode.mode == NEWMV
10159 #if CONFIG_EXT_INTER
10160 || best_mbmode.mode == NEW_NEWMV
10161 #endif // CONFIG_EXT_INTER
10163 const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
10164 best_mbmode.ref_frame[1] };
10165 int comp_pred_mode = refs[1] > INTRA_FRAME;
10166 int_mv zeromv[2];
10167 const uint8_t rf_type = av1_ref_frame_type(best_mbmode.ref_frame);
10168 #if CONFIG_GLOBAL_MOTION
10169 zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
10170 cm->allow_high_precision_mv, bsize,
10171 mi_col, mi_row, 0)
10172 .as_int;
10173 zeromv[1].as_int = comp_pred_mode
10174 ? gm_get_motion_vector(&cm->global_motion[refs[1]],
10175 cm->allow_high_precision_mv,
10176 bsize, mi_col, mi_row, 0)
10177 .as_int
10178 : 0;
10179 #else
10180 zeromv[0].as_int = 0;
10181 zeromv[1].as_int = 0;
10182 #endif // CONFIG_GLOBAL_MOTION
10183 if (!comp_pred_mode) {
10184 int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
10185 ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
10186 : INT_MAX;
10188 for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10189 int_mv cur_mv = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10190 if (cur_mv.as_int == best_mbmode.mv[0].as_int) {
10191 best_mbmode.mode = NEARMV;
10192 best_mbmode.ref_mv_idx = i;
10196 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
10197 best_mbmode.mode = NEARESTMV;
10198 else if (best_mbmode.mv[0].as_int == zeromv[0].as_int)
10199 best_mbmode.mode = ZEROMV;
10200 } else {
10201 int_mv nearestmv[2];
10202 int_mv nearmv[2];
10204 #if CONFIG_EXT_INTER
10205 if (mbmi_ext->ref_mv_count[rf_type] > 1) {
10206 nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
10207 nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
10208 } else {
10209 nearmv[0] = frame_mv[NEARMV][refs[0]];
10210 nearmv[1] = frame_mv[NEARMV][refs[1]];
10212 #else
10213 int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
10214 ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
10215 : INT_MAX;
10217 for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10218 nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10219 nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
10221 if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
10222 nearmv[1].as_int == best_mbmode.mv[1].as_int) {
10223 best_mbmode.mode = NEARMV;
10224 best_mbmode.ref_mv_idx = i;
10227 #endif // CONFIG_EXT_INTER
10228 if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
10229 nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
10230 nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
10231 } else {
10232 nearestmv[0] = frame_mv[NEARESTMV][refs[0]];
10233 nearestmv[1] = frame_mv[NEARESTMV][refs[1]];
10236 if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
10237 nearestmv[1].as_int == best_mbmode.mv[1].as_int) {
10238 #if CONFIG_EXT_INTER
10239 best_mbmode.mode = NEAREST_NEARESTMV;
10240 } else {
10241 int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
10242 ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
10243 : INT_MAX;
10245 for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10246 nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10247 nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
10249 // Try switching to the NEAR_NEARMV mode
10250 if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
10251 nearmv[1].as_int == best_mbmode.mv[1].as_int) {
10252 best_mbmode.mode = NEAR_NEARMV;
10253 best_mbmode.ref_mv_idx = i;
10257 if (best_mbmode.mode == NEW_NEWMV &&
10258 best_mbmode.mv[0].as_int == zeromv[0].as_int &&
10259 best_mbmode.mv[1].as_int == zeromv[1].as_int)
10260 best_mbmode.mode = ZERO_ZEROMV;
10262 #else
10263 best_mbmode.mode = NEARESTMV;
10264 } else if (best_mbmode.mv[0].as_int == zeromv[0].as_int &&
10265 best_mbmode.mv[1].as_int == zeromv[1].as_int) {
10266 best_mbmode.mode = ZEROMV;
10268 #endif // CONFIG_EXT_INTER
10272 // Make sure that the ref_mv_idx is only nonzero when we're
10273 // using a mode which can support ref_mv_idx
10274 if (best_mbmode.ref_mv_idx != 0 &&
10275 #if CONFIG_EXT_INTER
10276 !(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV ||
10277 have_nearmv_in_inter_mode(best_mbmode.mode))) {
10278 #else
10279 !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV)) {
10280 #endif
10281 best_mbmode.ref_mv_idx = 0;
10285 int8_t ref_frame_type = av1_ref_frame_type(best_mbmode.ref_frame);
10286 int16_t mode_ctx = mbmi_ext->mode_context[ref_frame_type];
10287 if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
10288 int_mv zeromv[2];
10289 #if CONFIG_GLOBAL_MOTION
10290 const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
10291 best_mbmode.ref_frame[1] };
10292 zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
10293 cm->allow_high_precision_mv,
10294 bsize, mi_col, mi_row, 0)
10295 .as_int;
10296 zeromv[1].as_int = (refs[1] != NONE_FRAME)
10297 ? gm_get_motion_vector(&cm->global_motion[refs[1]],
10298 cm->allow_high_precision_mv,
10299 bsize, mi_col, mi_row, 0)
10300 .as_int
10301 : 0;
10302 lower_mv_precision(&zeromv[0].as_mv, cm->allow_high_precision_mv);
10303 lower_mv_precision(&zeromv[1].as_mv, cm->allow_high_precision_mv);
10304 #else
10305 zeromv[0].as_int = zeromv[1].as_int = 0;
10306 #endif // CONFIG_GLOBAL_MOTION
10307 if (best_mbmode.ref_frame[0] > INTRA_FRAME &&
10308 best_mbmode.mv[0].as_int == zeromv[0].as_int &&
10309 #if CONFIG_EXT_INTER
10310 (best_mbmode.ref_frame[1] <= INTRA_FRAME)
10311 #else
10312 (best_mbmode.ref_frame[1] == NONE_FRAME ||
10313 best_mbmode.mv[1].as_int == zeromv[1].as_int)
10314 #endif // CONFIG_EXT_INTER
10316 best_mbmode.mode = ZEROMV;
10321 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
10322 rd_cost->rate = INT_MAX;
10323 rd_cost->rdcost = INT64_MAX;
10324 return;
10327 #if CONFIG_DUAL_FILTER
10328 assert((cm->interp_filter == SWITCHABLE) ||
10329 (cm->interp_filter == best_mbmode.interp_filter[0]) ||
10330 !is_inter_block(&best_mbmode));
10331 assert((cm->interp_filter == SWITCHABLE) ||
10332 (cm->interp_filter == best_mbmode.interp_filter[1]) ||
10333 !is_inter_block(&best_mbmode));
10334 if (best_mbmode.ref_frame[1] > INTRA_FRAME) {
10335 assert((cm->interp_filter == SWITCHABLE) ||
10336 (cm->interp_filter == best_mbmode.interp_filter[2]) ||
10337 !is_inter_block(&best_mbmode));
10338 assert((cm->interp_filter == SWITCHABLE) ||
10339 (cm->interp_filter == best_mbmode.interp_filter[3]) ||
10340 !is_inter_block(&best_mbmode));
10342 #else
10343 assert((cm->interp_filter == SWITCHABLE) ||
10344 (cm->interp_filter == best_mbmode.interp_filter) ||
10345 !is_inter_block(&best_mbmode));
10346 #endif // CONFIG_DUAL_FILTER
10348 if (!cpi->rc.is_src_frame_alt_ref)
10349 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
10350 sf->adaptive_rd_thresh, bsize, best_mode_index);
10352 // macroblock modes
10353 *mbmi = best_mbmode;
10354 x->skip |= best_skip2;
10356 // Note: this section is needed since the mode may have been forced to
10357 // ZEROMV by the all-zero mode handling of ref-mv.
10358 #if CONFIG_GLOBAL_MOTION
10359 if (mbmi->mode == ZEROMV
10360 #if CONFIG_EXT_INTER
10361 || mbmi->mode == ZERO_ZEROMV
10362 #endif // CONFIG_EXT_INTER
10364 #if CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
10365 // Correct the motion mode for ZEROMV
10366 const MOTION_MODE last_motion_mode_allowed = motion_mode_allowed(
10367 #if SEPARATE_GLOBAL_MOTION
10368 0, xd->global_motion,
10369 #endif // SEPARATE_GLOBAL_MOTION
10370 xd->mi[0]);
10371 if (mbmi->motion_mode > last_motion_mode_allowed)
10372 mbmi->motion_mode = last_motion_mode_allowed;
10373 #endif // CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
10375 // Correct the interpolation filter for ZEROMV
10376 if (is_nontrans_global_motion(xd)) {
10377 #if CONFIG_DUAL_FILTER
10378 mbmi->interp_filter[0] = cm->interp_filter == SWITCHABLE
10379 ? EIGHTTAP_REGULAR
10380 : cm->interp_filter;
10381 mbmi->interp_filter[1] = cm->interp_filter == SWITCHABLE
10382 ? EIGHTTAP_REGULAR
10383 : cm->interp_filter;
10384 #else
10385 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
10386 : cm->interp_filter;
10387 #endif // CONFIG_DUAL_FILTER
10390 #endif // CONFIG_GLOBAL_MOTION
10392 for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
10393 if (mbmi->mode != NEWMV)
10394 mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int;
10395 else
10396 mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int;
10399 for (i = 0; i < REFERENCE_MODES; ++i) {
10400 if (best_pred_rd[i] == INT64_MAX)
10401 best_pred_diff[i] = INT_MIN;
10402 else
10403 best_pred_diff[i] = best_rd - best_pred_rd[i];
10406 x->skip |= best_mode_skippable;
10408 assert(best_mode_index >= 0);
10410 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
10411 best_mode_skippable);
10413 #if CONFIG_PALETTE
10414 if (cm->allow_screen_content_tools && pmi->palette_size[1] > 0) {
10415 restore_uv_color_map(cpi, x);
10417 #endif // CONFIG_PALETTE
10420 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
10421 TileDataEnc *tile_data, MACROBLOCK *x,
10422 int mi_row, int mi_col,
10423 RD_STATS *rd_cost, BLOCK_SIZE bsize,
10424 PICK_MODE_CONTEXT *ctx,
10425 int64_t best_rd_so_far) {
10426 const AV1_COMMON *const cm = &cpi->common;
10427 MACROBLOCKD *const xd = &x->e_mbd;
10428 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
10429 unsigned char segment_id = mbmi->segment_id;
10430 const int comp_pred = 0;
10431 int i;
10432 int64_t best_pred_diff[REFERENCE_MODES];
10433 unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
10434 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
10435 aom_prob comp_mode_p;
10436 InterpFilter best_filter = SWITCHABLE;
10437 int64_t this_rd = INT64_MAX;
10438 int rate2 = 0;
10439 const int64_t distortion2 = 0;
10440 (void)mi_row;
10441 (void)mi_col;
10443 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
10444 &comp_mode_p);
10446 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
10447 for (i = LAST_FRAME; i < TOTAL_REFS_PER_FRAME; ++i)
10448 x->pred_mv_sad[i] = INT_MAX;
10450 rd_cost->rate = INT_MAX;
10452 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
10454 #if CONFIG_PALETTE
10455 mbmi->palette_mode_info.palette_size[0] = 0;
10456 mbmi->palette_mode_info.palette_size[1] = 0;
10457 #endif // CONFIG_PALETTE
10459 #if CONFIG_FILTER_INTRA
10460 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
10461 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
10462 #endif // CONFIG_FILTER_INTRA
10463 mbmi->mode = ZEROMV;
10464 mbmi->motion_mode = SIMPLE_TRANSLATION;
10465 mbmi->uv_mode = DC_PRED;
10466 mbmi->ref_frame[0] = LAST_FRAME;
10467 mbmi->ref_frame[1] = NONE_FRAME;
10468 #if CONFIG_GLOBAL_MOTION
10469 mbmi->mv[0].as_int =
10470 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
10471 cm->allow_high_precision_mv, bsize, mi_col, mi_row,
10473 .as_int;
10474 #else // CONFIG_GLOBAL_MOTION
10475 mbmi->mv[0].as_int = 0;
10476 #endif // CONFIG_GLOBAL_MOTION
10477 mbmi->tx_size = max_txsize_lookup[bsize];
10478 x->skip = 1;
10480 mbmi->ref_mv_idx = 0;
10481 mbmi->pred_mv[0].as_int = 0;
10483 mbmi->motion_mode = SIMPLE_TRANSLATION;
10484 #if CONFIG_MOTION_VAR
10485 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
10486 #endif
10487 #if CONFIG_WARPED_MOTION
10488 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
10489 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
10490 mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
10492 #endif
10494 set_default_interp_filters(mbmi, cm->interp_filter);
10496 if (cm->interp_filter != SWITCHABLE) {
10497 best_filter = cm->interp_filter;
10498 } else {
10499 best_filter = EIGHTTAP_REGULAR;
10500 if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd) &&
10501 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
10502 int rs;
10503 int best_rs = INT_MAX;
10504 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
10505 #if CONFIG_DUAL_FILTER
10506 int k;
10507 for (k = 0; k < 4; ++k) mbmi->interp_filter[k] = i;
10508 #else
10509 mbmi->interp_filter = i;
10510 #endif // CONFIG_DUAL_FILTER
10511 rs = av1_get_switchable_rate(cpi, xd);
10512 if (rs < best_rs) {
10513 best_rs = rs;
10514 #if CONFIG_DUAL_FILTER
10515 best_filter = mbmi->interp_filter[0];
10516 #else
10517 best_filter = mbmi->interp_filter;
10518 #endif // CONFIG_DUAL_FILTER
10523 // Set the appropriate filter
10524 #if CONFIG_DUAL_FILTER
10525 for (i = 0; i < 4; ++i) mbmi->interp_filter[i] = best_filter;
10526 #else
10527 mbmi->interp_filter = best_filter;
10528 #endif // CONFIG_DUAL_FILTER
10529 rate2 += av1_get_switchable_rate(cpi, xd);
10531 if (cm->reference_mode == REFERENCE_MODE_SELECT)
10532 rate2 += av1_cost_bit(comp_mode_p, comp_pred);
10534 // Estimate the reference frame signaling cost and add it
10535 // to the rolling cost variable.
10536 rate2 += ref_costs_single[LAST_FRAME];
10537 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
10539 rd_cost->rate = rate2;
10540 rd_cost->dist = distortion2;
10541 rd_cost->rdcost = this_rd;
10542 #if CONFIG_DAALA_DIST && CONFIG_CB4X4
10543 if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2;
10544 #endif
10545 if (this_rd >= best_rd_so_far) {
10546 rd_cost->rate = INT_MAX;
10547 rd_cost->rdcost = INT64_MAX;
10548 return;
10551 #if CONFIG_DUAL_FILTER
10552 assert((cm->interp_filter == SWITCHABLE) ||
10553 (cm->interp_filter == mbmi->interp_filter[0]));
10554 #else
10555 assert((cm->interp_filter == SWITCHABLE) ||
10556 (cm->interp_filter == mbmi->interp_filter));
10557 #endif // CONFIG_DUAL_FILTER
10559 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
10560 cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
10562 av1_zero(best_pred_diff);
10564 store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, 0);
10567 #if CONFIG_MOTION_VAR
10568 // This function has a structure similar to av1_build_obmc_inter_prediction
10570 // The OBMC predictor is computed as:
10572 // PObmc(x,y) =
10573 // AOM_BLEND_A64(Mh(x),
10574 // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
10575 // PLeft(x, y))
10577 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
10578 // rounding, this can be written as:
10580 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
10581 // Mh(x) * Mv(y) * P(x,y) +
10582 // Mh(x) * Cv(y) * Pabove(x,y) +
10583 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
10585 // Where :
10587 // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
10588 // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
10590 // This function computes 'wsrc' and 'mask' as:
10592 // wsrc(x, y) =
10593 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
10594 // Mh(x) * Cv(y) * Pabove(x,y) +
10595 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
10597 // mask(x, y) = Mh(x) * Mv(y)
10599 // These can then be used to efficiently approximate the error for any
10600 // predictor P in the context of the provided neighbouring predictors by
10601 // computing:
10603 // error(x, y) =
10604 // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
10606 static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
10607 const MACROBLOCKD *xd, int mi_row,
10608 int mi_col, const uint8_t *above,
10609 int above_stride, const uint8_t *left,
10610 int left_stride) {
10611 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
10612 int row, col, i;
10613 const int bw = xd->n8_w << MI_SIZE_LOG2;
10614 const int bh = xd->n8_h << MI_SIZE_LOG2;
10615 int32_t *mask_buf = x->mask_buf;
10616 int32_t *wsrc_buf = x->wsrc_buf;
10617 const int wsrc_stride = bw;
10618 const int mask_stride = bw;
10619 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
10620 #if CONFIG_HIGHBITDEPTH
10621 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
10622 #else
10623 const int is_hbd = 0;
10624 #endif // CONFIG_HIGHBITDEPTH
10626 // plane 0 should not be subsampled
10627 assert(xd->plane[0].subsampling_x == 0);
10628 assert(xd->plane[0].subsampling_y == 0);
10630 av1_zero_array(wsrc_buf, bw * bh);
10631 for (i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
10633 // handle above row
10634 if (xd->up_available) {
10635 const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
10636 const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
10637 const int mi_row_offset = -1;
10638 const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
10639 const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]];
10640 int neighbor_count = 0;
10642 assert(miw > 0);
10644 i = 0;
10645 do { // for each mi in the above row
10646 const int mi_col_offset = i;
10647 const MB_MODE_INFO *above_mbmi =
10648 &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
10649 #if CONFIG_CHROMA_SUB8X8
10650 if (above_mbmi->sb_type < BLOCK_8X8)
10651 above_mbmi =
10652 &xd->mi[mi_col_offset + 1 + mi_row_offset * xd->mi_stride]->mbmi;
10653 #endif
10654 const BLOCK_SIZE a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
10655 const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]);
10656 const int neighbor_bw = mi_step * MI_SIZE;
10658 if (is_neighbor_overlappable(above_mbmi)) {
10659 if (!CONFIG_CB4X4 && (a_bsize == BLOCK_4X4 || a_bsize == BLOCK_4X8))
10660 neighbor_count += 2;
10661 else
10662 neighbor_count++;
10663 if (neighbor_count > neighbor_limit) break;
10665 const int tmp_stride = above_stride;
10666 int32_t *wsrc = wsrc_buf + (i * MI_SIZE);
10667 int32_t *mask = mask_buf + (i * MI_SIZE);
10669 if (!is_hbd) {
10670 const uint8_t *tmp = above;
10672 for (row = 0; row < overlap; ++row) {
10673 const uint8_t m0 = mask1d[row];
10674 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
10675 for (col = 0; col < neighbor_bw; ++col) {
10676 wsrc[col] = m1 * tmp[col];
10677 mask[col] = m0;
10679 wsrc += wsrc_stride;
10680 mask += mask_stride;
10681 tmp += tmp_stride;
10683 #if CONFIG_HIGHBITDEPTH
10684 } else {
10685 const uint16_t *tmp = CONVERT_TO_SHORTPTR(above);
10687 for (row = 0; row < overlap; ++row) {
10688 const uint8_t m0 = mask1d[row];
10689 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
10690 for (col = 0; col < neighbor_bw; ++col) {
10691 wsrc[col] = m1 * tmp[col];
10692 mask[col] = m0;
10694 wsrc += wsrc_stride;
10695 mask += mask_stride;
10696 tmp += tmp_stride;
10698 #endif // CONFIG_HIGHBITDEPTH
10702 above += neighbor_bw;
10703 i += mi_step;
10704 } while (i < miw);
10707 for (i = 0; i < bw * bh; ++i) {
10708 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
10709 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
10712 // handle left column
10713 if (xd->left_available) {
10714 const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2;
10715 const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
10716 const int mi_col_offset = -1;
10717 const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
10718 const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]];
10719 int neighbor_count = 0;
10721 assert(mih > 0);
10723 i = 0;
10724 do { // for each mi in the left column
10725 const int mi_row_offset = i;
10726 MB_MODE_INFO *left_mbmi =
10727 &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
10729 #if CONFIG_CHROMA_SUB8X8
10730 if (left_mbmi->sb_type < BLOCK_8X8)
10731 left_mbmi =
10732 &xd->mi[mi_col_offset + (mi_row_offset + 1) * xd->mi_stride]->mbmi;
10733 #endif
10734 const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
10735 const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
10736 const int neighbor_bh = mi_step * MI_SIZE;
10738 if (is_neighbor_overlappable(left_mbmi)) {
10739 if (!CONFIG_CB4X4 && (l_bsize == BLOCK_4X4 || l_bsize == BLOCK_8X4))
10740 neighbor_count += 2;
10741 else
10742 neighbor_count++;
10743 if (neighbor_count > neighbor_limit) break;
10745 const int tmp_stride = left_stride;
10746 int32_t *wsrc = wsrc_buf + (i * MI_SIZE * wsrc_stride);
10747 int32_t *mask = mask_buf + (i * MI_SIZE * mask_stride);
10749 if (!is_hbd) {
10750 const uint8_t *tmp = left;
10752 for (row = 0; row < neighbor_bh; ++row) {
10753 for (col = 0; col < overlap; ++col) {
10754 const uint8_t m0 = mask1d[col];
10755 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
10756 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
10757 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
10758 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
10760 wsrc += wsrc_stride;
10761 mask += mask_stride;
10762 tmp += tmp_stride;
10764 #if CONFIG_HIGHBITDEPTH
10765 } else {
10766 const uint16_t *tmp = CONVERT_TO_SHORTPTR(left);
10768 for (row = 0; row < neighbor_bh; ++row) {
10769 for (col = 0; col < overlap; ++col) {
10770 const uint8_t m0 = mask1d[col];
10771 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
10772 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
10773 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
10774 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
10776 wsrc += wsrc_stride;
10777 mask += mask_stride;
10778 tmp += tmp_stride;
10780 #endif // CONFIG_HIGHBITDEPTH
10784 left += neighbor_bh * left_stride;
10785 i += mi_step;
10786 } while (i < mih);
10789 if (!is_hbd) {
10790 const uint8_t *src = x->plane[0].src.buf;
10792 for (row = 0; row < bh; ++row) {
10793 for (col = 0; col < bw; ++col) {
10794 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
10796 wsrc_buf += wsrc_stride;
10797 src += x->plane[0].src.stride;
10799 #if CONFIG_HIGHBITDEPTH
10800 } else {
10801 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
10803 for (row = 0; row < bh; ++row) {
10804 for (col = 0; col < bw; ++col) {
10805 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
10807 wsrc_buf += wsrc_stride;
10808 src += x->plane[0].src.stride;
10810 #endif // CONFIG_HIGHBITDEPTH
10814 #if CONFIG_NCOBMC
10815 void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
10816 int mi_row, int mi_col) {
10817 const AV1_COMMON *const cm = &cpi->common;
10818 MACROBLOCKD *const xd = &x->e_mbd;
10819 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
10820 MB_MODE_INFO backup_mbmi;
10821 BLOCK_SIZE bsize = mbmi->sb_type;
10822 int ref, skip_blk, backup_skip = x->skip;
10823 int64_t rd_causal;
10824 RD_STATS rd_stats_y, rd_stats_uv;
10825 int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
10826 int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
10828 // Recompute the best causal predictor and rd
10829 mbmi->motion_mode = SIMPLE_TRANSLATION;
10830 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
10831 for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
10832 YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]);
10833 assert(cfg != NULL);
10834 av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
10835 &xd->block_refs[ref]->sf);
10837 av1_setup_dst_planes(x->e_mbd.plane, bsize,
10838 get_frame_new_buffer(&cpi->common), mi_row, mi_col);
10840 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
10842 av1_subtract_plane(x, bsize, 0);
10843 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
10844 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
10845 assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
10846 if (rd_stats_y.skip && rd_stats_uv.skip) {
10847 rd_stats_y.rate = rate_skip1;
10848 rd_stats_uv.rate = 0;
10849 rd_stats_y.dist = rd_stats_y.sse;
10850 rd_stats_uv.dist = rd_stats_uv.sse;
10851 skip_blk = 0;
10852 } else if (RDCOST(x->rdmult, x->rddiv,
10853 (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
10854 (rd_stats_y.dist + rd_stats_uv.dist)) >
10855 RDCOST(x->rdmult, x->rddiv, rate_skip1,
10856 (rd_stats_y.sse + rd_stats_uv.sse))) {
10857 rd_stats_y.rate = rate_skip1;
10858 rd_stats_uv.rate = 0;
10859 rd_stats_y.dist = rd_stats_y.sse;
10860 rd_stats_uv.dist = rd_stats_uv.sse;
10861 skip_blk = 1;
10862 } else {
10863 rd_stats_y.rate += rate_skip0;
10864 skip_blk = 0;
10866 backup_skip = skip_blk;
10867 backup_mbmi = *mbmi;
10868 rd_causal = RDCOST(x->rdmult, x->rddiv, (rd_stats_y.rate + rd_stats_uv.rate),
10869 (rd_stats_y.dist + rd_stats_uv.dist));
10870 rd_causal += RDCOST(x->rdmult, x->rddiv,
10871 av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0);
10873 // Check non-causal mode
10874 mbmi->motion_mode = OBMC_CAUSAL;
10875 av1_build_ncobmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
10877 av1_subtract_plane(x, bsize, 0);
10878 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
10879 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
10880 assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
10881 if (rd_stats_y.skip && rd_stats_uv.skip) {
10882 rd_stats_y.rate = rate_skip1;
10883 rd_stats_uv.rate = 0;
10884 rd_stats_y.dist = rd_stats_y.sse;
10885 rd_stats_uv.dist = rd_stats_uv.sse;
10886 skip_blk = 0;
10887 } else if (RDCOST(x->rdmult, x->rddiv,
10888 (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
10889 (rd_stats_y.dist + rd_stats_uv.dist)) >
10890 RDCOST(x->rdmult, x->rddiv, rate_skip1,
10891 (rd_stats_y.sse + rd_stats_uv.sse))) {
10892 rd_stats_y.rate = rate_skip1;
10893 rd_stats_uv.rate = 0;
10894 rd_stats_y.dist = rd_stats_y.sse;
10895 rd_stats_uv.dist = rd_stats_uv.sse;
10896 skip_blk = 1;
10897 } else {
10898 rd_stats_y.rate += rate_skip0;
10899 skip_blk = 0;
10902 if (rd_causal >
10903 RDCOST(x->rdmult, x->rddiv,
10904 rd_stats_y.rate + rd_stats_uv.rate +
10905 av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1),
10906 (rd_stats_y.dist + rd_stats_uv.dist))) {
10907 x->skip = skip_blk;
10908 } else {
10909 *mbmi = backup_mbmi;
10910 x->skip = backup_skip;
10913 #endif // CONFIG_NCOBMC
10914 #endif // CONFIG_MOTION_VAR