Remove a duplicate block_size variable.
[aom.git] / vp9 / encoder / vp9_rdopt.c
blobba7d4db120b7b3c008a574f0586d8e9fe8603eb6
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 #include <assert.h>
12 #include <limits.h>
13 #include <math.h>
14 #include <stdio.h>
16 #include "./vp9_rtcd.h"
18 #include "vpx_mem/vpx_mem.h"
20 #include "vp9/common/vp9_common.h"
21 #include "vp9/common/vp9_entropy.h"
22 #include "vp9/common/vp9_entropymode.h"
23 #include "vp9/common/vp9_idct.h"
24 #include "vp9/common/vp9_mvref_common.h"
25 #include "vp9/common/vp9_pragmas.h"
26 #include "vp9/common/vp9_pred_common.h"
27 #include "vp9/common/vp9_quant_common.h"
28 #include "vp9/common/vp9_reconinter.h"
29 #include "vp9/common/vp9_reconintra.h"
30 #include "vp9/common/vp9_seg_common.h"
31 #include "vp9/common/vp9_systemdependent.h"
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_mcomp.h"
37 #include "vp9/encoder/vp9_onyx_int.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rdopt.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42 #include "vp9/encoder/vp9_variance.h"
44 #define RD_THRESH_MAX_FACT 64
45 #define RD_THRESH_INC 1
46 #define RD_THRESH_POW 1.25
47 #define RD_MULT_EPB_RATIO 64
49 /* Factor to weigh the rate for switchable interp filters */
50 #define SWITCHABLE_INTERP_RATE_FACTOR 1
52 #define LAST_FRAME_MODE_MASK 0xFFEDCD60
53 #define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0
54 #define ALT_REF_MODE_MASK 0xFFC648D0
56 #define MIN_EARLY_TERM_INDEX 3
58 typedef struct {
59 MB_PREDICTION_MODE mode;
60 MV_REFERENCE_FRAME ref_frame[2];
61 } MODE_DEFINITION;
63 typedef struct {
64 MV_REFERENCE_FRAME ref_frame[2];
65 } REF_DEFINITION;
67 struct rdcost_block_args {
68 MACROBLOCK *x;
69 ENTROPY_CONTEXT t_above[16];
70 ENTROPY_CONTEXT t_left[16];
71 int rate;
72 int64_t dist;
73 int64_t sse;
74 int this_rate;
75 int64_t this_dist;
76 int64_t this_sse;
77 int64_t this_rd;
78 int64_t best_rd;
79 int skip;
80 int use_fast_coef_costing;
81 const scan_order *so;
84 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
85 {NEARESTMV, {LAST_FRAME, NONE}},
86 {NEARESTMV, {ALTREF_FRAME, NONE}},
87 {NEARESTMV, {GOLDEN_FRAME, NONE}},
89 {DC_PRED, {INTRA_FRAME, NONE}},
91 {NEWMV, {LAST_FRAME, NONE}},
92 {NEWMV, {ALTREF_FRAME, NONE}},
93 {NEWMV, {GOLDEN_FRAME, NONE}},
95 {NEARMV, {LAST_FRAME, NONE}},
96 {NEARMV, {ALTREF_FRAME, NONE}},
97 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
98 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
100 {TM_PRED, {INTRA_FRAME, NONE}},
102 {NEARMV, {LAST_FRAME, ALTREF_FRAME}},
103 {NEWMV, {LAST_FRAME, ALTREF_FRAME}},
104 {NEARMV, {GOLDEN_FRAME, NONE}},
105 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
106 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
108 {ZEROMV, {LAST_FRAME, NONE}},
109 {ZEROMV, {GOLDEN_FRAME, NONE}},
110 {ZEROMV, {ALTREF_FRAME, NONE}},
111 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
112 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
114 {H_PRED, {INTRA_FRAME, NONE}},
115 {V_PRED, {INTRA_FRAME, NONE}},
116 {D135_PRED, {INTRA_FRAME, NONE}},
117 {D207_PRED, {INTRA_FRAME, NONE}},
118 {D153_PRED, {INTRA_FRAME, NONE}},
119 {D63_PRED, {INTRA_FRAME, NONE}},
120 {D117_PRED, {INTRA_FRAME, NONE}},
121 {D45_PRED, {INTRA_FRAME, NONE}},
124 const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
125 {{LAST_FRAME, NONE}},
126 {{GOLDEN_FRAME, NONE}},
127 {{ALTREF_FRAME, NONE}},
128 {{LAST_FRAME, ALTREF_FRAME}},
129 {{GOLDEN_FRAME, ALTREF_FRAME}},
130 {{INTRA_FRAME, NONE}},
133 // The baseline rd thresholds for breaking out of the rd loop for
134 // certain modes are assumed to be based on 8x8 blocks.
135 // This table is used to correct for blocks size.
136 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
137 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
138 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
140 static int raster_block_offset(BLOCK_SIZE plane_bsize,
141 int raster_block, int stride) {
142 const int bw = b_width_log2(plane_bsize);
143 const int y = 4 * (raster_block >> bw);
144 const int x = 4 * (raster_block & ((1 << bw) - 1));
145 return y * stride + x;
147 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
148 int raster_block, int16_t *base) {
149 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
150 return base + raster_block_offset(plane_bsize, raster_block, stride);
153 static void fill_mode_costs(VP9_COMP *cpi) {
154 MACROBLOCK *const x = &cpi->mb;
155 const FRAME_CONTEXT *const fc = &cpi->common.fc;
156 int i, j;
158 for (i = 0; i < INTRA_MODES; i++)
159 for (j = 0; j < INTRA_MODES; j++)
160 vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
161 vp9_intra_mode_tree);
163 // TODO(rbultje) separate tables for superblock costing?
164 vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
165 vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME],
166 vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
167 vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME],
168 fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
170 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
171 vp9_cost_tokens((int *)x->switchable_interp_costs[i],
172 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
175 static void fill_token_costs(vp9_coeff_cost *c,
176 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
177 int i, j, k, l;
178 TX_SIZE t;
179 for (t = TX_4X4; t <= TX_32X32; ++t)
180 for (i = 0; i < PLANE_TYPES; ++i)
181 for (j = 0; j < REF_TYPES; ++j)
182 for (k = 0; k < COEF_BANDS; ++k)
183 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
184 vp9_prob probs[ENTROPY_NODES];
185 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
186 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
187 vp9_coef_tree);
188 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
189 vp9_coef_tree);
190 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
191 c[t][i][j][k][1][l][EOB_TOKEN]);
195 static const int rd_iifactor[32] = {
196 4, 4, 3, 2, 1, 0, 0, 0,
197 0, 0, 0, 0, 0, 0, 0, 0,
198 0, 0, 0, 0, 0, 0, 0, 0,
199 0, 0, 0, 0, 0, 0, 0, 0,
202 // 3* dc_qlookup[Q]*dc_qlookup[Q];
204 /* values are now correlated to quantizer */
205 static int sad_per_bit16lut[QINDEX_RANGE];
206 static int sad_per_bit4lut[QINDEX_RANGE];
208 void vp9_init_me_luts() {
209 int i;
211 // Initialize the sad lut tables using a formulaic calculation for now
212 // This is to make it easier to resolve the impact of experimental changes
213 // to the quantizer tables.
214 for (i = 0; i < QINDEX_RANGE; i++) {
215 const double q = vp9_convert_qindex_to_q(i);
216 sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
217 sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
221 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
222 const int q = vp9_dc_quant(qindex, 0);
223 // TODO(debargha): Adjust the function below
224 int rdmult = 88 * q * q / 25;
225 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
226 if (cpi->twopass.next_iiratio > 31)
227 rdmult += (rdmult * rd_iifactor[31]) >> 4;
228 else
229 rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
231 return rdmult;
234 static int compute_rd_thresh_factor(int qindex) {
235 // TODO(debargha): Adjust the function below
236 const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
237 return MAX(q, 8);
240 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
241 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
242 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
245 static void set_block_thresholds(VP9_COMP *cpi) {
246 const VP9_COMMON *const cm = &cpi->common;
247 int i, bsize, segment_id;
249 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
250 const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
251 cm->base_qindex) + cm->y_dc_delta_q,
252 0, MAXQ);
253 const int q = compute_rd_thresh_factor(qindex);
255 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
256 // Threshold here seems unnecessarily harsh but fine given actual
257 // range of values used for cpi->sf.thresh_mult[].
258 const int t = q * rd_thresh_block_size_factor[bsize];
259 const int thresh_max = INT_MAX / t;
261 for (i = 0; i < MAX_MODES; ++i)
262 cpi->rd_threshes[segment_id][bsize][i] =
263 cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4
264 : INT_MAX;
266 for (i = 0; i < MAX_REFS; ++i) {
267 cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
268 cpi->rd_thresh_mult_sub8x8[i] < thresh_max
269 ? cpi->rd_thresh_mult_sub8x8[i] * t / 4
270 : INT_MAX;
276 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
277 VP9_COMMON *const cm = &cpi->common;
278 MACROBLOCK *const x = &cpi->mb;
279 int i;
281 vp9_clear_system_state();
283 cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
284 cpi->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
286 x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
287 x->errorperbit += (x->errorperbit == 0);
289 x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
290 cm->frame_type != KEY_FRAME) ? 0 : 1;
292 set_block_thresholds(cpi);
294 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
295 fill_token_costs(x->token_costs, cm->fc.coef_probs);
297 for (i = 0; i < PARTITION_CONTEXTS; i++)
298 vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
299 vp9_partition_tree);
302 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
303 cm->frame_type == KEY_FRAME) {
304 fill_mode_costs(cpi);
306 if (!frame_is_intra_only(cm)) {
307 vp9_build_nmv_cost_table(x->nmvjointcost,
308 cm->allow_high_precision_mv ? x->nmvcost_hp
309 : x->nmvcost,
310 &cm->fc.nmvc, cm->allow_high_precision_mv);
312 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
313 vp9_cost_tokens((int *)x->inter_mode_cost[i],
314 cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
319 static const int MAX_XSQ_Q10 = 245727;
321 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
322 // NOTE: The tables below must be of the same size
324 // The functions described below are sampled at the four most significant
325 // bits of x^2 + 8 / 256
327 // Normalized rate
328 // This table models the rate for a Laplacian source
329 // source with given variance when quantized with a uniform quantizer
330 // with given stepsize. The closed form expression is:
331 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
332 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
333 // and H(x) is the binary entropy function.
334 static const int rate_tab_q10[] = {
335 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
336 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
337 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
338 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
339 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
340 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
341 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
342 1159, 1086, 1021, 963, 911, 864, 821, 781,
343 745, 680, 623, 574, 530, 490, 455, 424,
344 395, 345, 304, 269, 239, 213, 190, 171,
345 154, 126, 104, 87, 73, 61, 52, 44,
346 38, 28, 21, 16, 12, 10, 8, 6,
347 5, 3, 2, 1, 1, 1, 0, 0,
349 // Normalized distortion
350 // This table models the normalized distortion for a Laplacian source
351 // source with given variance when quantized with a uniform quantizer
352 // with given stepsize. The closed form expression is:
353 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
354 // where x = qpstep / sqrt(variance)
355 // Note the actual distortion is Dn * variance.
356 static const int dist_tab_q10[] = {
357 0, 0, 1, 1, 1, 2, 2, 2,
358 3, 3, 4, 5, 5, 6, 7, 7,
359 8, 9, 11, 12, 13, 15, 16, 17,
360 18, 21, 24, 26, 29, 31, 34, 36,
361 39, 44, 49, 54, 59, 64, 69, 73,
362 78, 88, 97, 106, 115, 124, 133, 142,
363 151, 167, 184, 200, 215, 231, 245, 260,
364 274, 301, 327, 351, 375, 397, 418, 439,
365 458, 495, 528, 559, 587, 613, 637, 659,
366 680, 717, 749, 777, 801, 823, 842, 859,
367 874, 899, 919, 936, 949, 960, 969, 977,
368 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
369 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
371 static const int xsq_iq_q10[] = {
372 0, 4, 8, 12, 16, 20, 24, 28,
373 32, 40, 48, 56, 64, 72, 80, 88,
374 96, 112, 128, 144, 160, 176, 192, 208,
375 224, 256, 288, 320, 352, 384, 416, 448,
376 480, 544, 608, 672, 736, 800, 864, 928,
377 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
378 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
379 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
380 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
381 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
382 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
383 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
384 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
387 static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
388 assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
389 assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
390 assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
392 int tmp = (xsq_q10 >> 2) + 8;
393 int k = get_msb(tmp) - 3;
394 int xq = (k << 3) + ((tmp >> k) & 0x7);
395 const int one_q10 = 1 << 10;
396 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
397 const int b_q10 = one_q10 - a_q10;
398 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
399 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
402 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
403 unsigned int qstep, int *rate,
404 int64_t *dist) {
405 // This function models the rate and distortion for a Laplacian
406 // source with given variance when quantized with a uniform quantizer
407 // with given stepsize. The closed form expressions are in:
408 // Hang and Chen, "Source Model for transform video coder and its
409 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
410 // Sys. for Video Tech., April 1997.
411 if (var == 0) {
412 *rate = 0;
413 *dist = 0;
414 } else {
415 int d_q10, r_q10;
416 const uint64_t xsq_q10_64 =
417 ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
418 const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ?
419 MAX_XSQ_Q10 : (int)xsq_q10_64;
420 model_rd_norm(xsq_q10, &r_q10, &d_q10);
421 *rate = (n * r_q10 + 2) >> 2;
422 *dist = (var * (int64_t)d_q10 + 512) >> 10;
426 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
427 MACROBLOCK *x, MACROBLOCKD *xd,
428 int *out_rate_sum, int64_t *out_dist_sum) {
429 // Note our transform coeffs are 8 times an orthogonal transform.
430 // Hence quantizer step is also 8 times. To get effective quantizer
431 // we need to divide by 8 before sending to modeling function.
432 int i;
433 int64_t rate_sum = 0;
434 int64_t dist_sum = 0;
435 const int ref = xd->mi_8x8[0]->mbmi.ref_frame[0];
436 unsigned int sse;
438 for (i = 0; i < MAX_MB_PLANE; ++i) {
439 struct macroblock_plane *const p = &x->plane[i];
440 struct macroblockd_plane *const pd = &xd->plane[i];
441 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
443 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
444 pd->dst.buf, pd->dst.stride, &sse);
446 if (i == 0)
447 x->pred_sse[ref] = sse;
449 // Fast approximate the modelling function.
450 if (cpi->speed > 4) {
451 int64_t rate;
452 int64_t dist;
453 int64_t square_error = sse;
454 int quantizer = (pd->dequant[1] >> 3);
456 if (quantizer < 120)
457 rate = (square_error * (280 - quantizer)) >> 8;
458 else
459 rate = 0;
460 dist = (square_error * quantizer) >> 8;
461 rate_sum += rate;
462 dist_sum += dist;
463 } else {
464 int rate;
465 int64_t dist;
466 vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
467 pd->dequant[1] >> 3, &rate, &dist);
468 rate_sum += rate;
469 dist_sum += dist;
473 *out_rate_sum = (int)rate_sum;
474 *out_dist_sum = dist_sum << 4;
477 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
478 TX_SIZE tx_size,
479 MACROBLOCK *x, MACROBLOCKD *xd,
480 int *out_rate_sum, int64_t *out_dist_sum,
481 int *out_skip) {
482 int j, k;
483 BLOCK_SIZE bs;
484 const struct macroblock_plane *const p = &x->plane[0];
485 const struct macroblockd_plane *const pd = &xd->plane[0];
486 const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
487 const int height = 4 * num_4x4_blocks_high_lookup[bsize];
488 int rate_sum = 0;
489 int64_t dist_sum = 0;
490 const int t = 4 << tx_size;
492 if (tx_size == TX_4X4) {
493 bs = BLOCK_4X4;
494 } else if (tx_size == TX_8X8) {
495 bs = BLOCK_8X8;
496 } else if (tx_size == TX_16X16) {
497 bs = BLOCK_16X16;
498 } else if (tx_size == TX_32X32) {
499 bs = BLOCK_32X32;
500 } else {
501 assert(0);
504 *out_skip = 1;
505 for (j = 0; j < height; j += t) {
506 for (k = 0; k < width; k += t) {
507 int rate;
508 int64_t dist;
509 unsigned int sse;
510 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
511 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
512 &sse);
513 // sse works better than var, since there is no dc prediction used
514 vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
515 &rate, &dist);
516 rate_sum += rate;
517 dist_sum += dist;
518 *out_skip &= (rate < 1024);
522 *out_rate_sum = rate_sum;
523 *out_dist_sum = dist_sum << 4;
526 int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff,
527 intptr_t block_size, int64_t *ssz) {
528 int i;
529 int64_t error = 0, sqcoeff = 0;
531 for (i = 0; i < block_size; i++) {
532 const int diff = coeff[i] - dqcoeff[i];
533 error += diff * diff;
534 sqcoeff += coeff[i] * coeff[i];
537 *ssz = sqcoeff;
538 return error;
541 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
542 * decide whether to include cost of a trailing EOB node or not (i.e. we
543 * can skip this if the last coefficient in this transform block, e.g. the
544 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
545 * were non-zero). */
546 static const int16_t band_counts[TX_SIZES][8] = {
547 { 1, 2, 3, 4, 3, 16 - 13, 0 },
548 { 1, 2, 3, 4, 11, 64 - 21, 0 },
549 { 1, 2, 3, 4, 11, 256 - 21, 0 },
550 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
552 static INLINE int cost_coeffs(MACROBLOCK *x,
553 int plane, int block,
554 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
555 TX_SIZE tx_size,
556 const int16_t *scan, const int16_t *nb,
557 int use_fast_coef_costing) {
558 MACROBLOCKD *const xd = &x->e_mbd;
559 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
560 const struct macroblock_plane *p = &x->plane[plane];
561 const struct macroblockd_plane *pd = &xd->plane[plane];
562 const PLANE_TYPE type = pd->plane_type;
563 const int16_t *band_count = &band_counts[tx_size][1];
564 const int eob = p->eobs[block];
565 const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
566 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
567 x->token_costs[tx_size][type][is_inter_block(mbmi)];
568 uint8_t token_cache[32 * 32];
569 int pt = combine_entropy_contexts(*A, *L);
570 int c, cost;
571 // Check for consistency of tx_size with mode info
572 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
573 : get_uv_tx_size(mbmi) == tx_size);
575 if (eob == 0) {
576 // single eob token
577 cost = token_costs[0][0][pt][EOB_TOKEN];
578 c = 0;
579 } else {
580 int band_left = *band_count++;
582 // dc token
583 int v = qcoeff[0];
584 int prev_t = vp9_dct_value_tokens_ptr[v].token;
585 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
586 token_cache[0] = vp9_pt_energy_class[prev_t];
587 ++token_costs;
589 // ac tokens
590 for (c = 1; c < eob; c++) {
591 const int rc = scan[c];
592 int t;
594 v = qcoeff[rc];
595 t = vp9_dct_value_tokens_ptr[v].token;
596 if (use_fast_coef_costing) {
597 cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
598 } else {
599 pt = get_coef_context(nb, token_cache, c);
600 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
601 token_cache[rc] = vp9_pt_energy_class[t];
603 prev_t = t;
604 if (!--band_left) {
605 band_left = *band_count++;
606 ++token_costs;
610 // eob token
611 if (band_left) {
612 if (use_fast_coef_costing) {
613 cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
614 } else {
615 pt = get_coef_context(nb, token_cache, c);
616 cost += (*token_costs)[0][pt][EOB_TOKEN];
621 // is eob first coefficient;
622 *A = *L = (c > 0);
624 return cost;
626 static void dist_block(int plane, int block, TX_SIZE tx_size,
627 struct rdcost_block_args* args) {
628 const int ss_txfrm_size = tx_size << 1;
629 MACROBLOCK* const x = args->x;
630 MACROBLOCKD* const xd = &x->e_mbd;
631 const struct macroblock_plane *const p = &x->plane[plane];
632 const struct macroblockd_plane *const pd = &xd->plane[plane];
633 int64_t this_sse;
634 int shift = tx_size == TX_32X32 ? 0 : 2;
635 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
636 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
637 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
638 &this_sse) >> shift;
639 args->sse = this_sse >> shift;
641 if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
642 // TODO(jingning): tune the model to better capture the distortion.
643 int64_t p = (pd->dequant[1] * pd->dequant[1] *
644 (1 << ss_txfrm_size)) >> (shift + 2);
645 args->dist += (p >> 4);
646 args->sse += p;
650 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
651 TX_SIZE tx_size, struct rdcost_block_args* args) {
652 int x_idx, y_idx;
653 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
655 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
656 args->t_left + y_idx, tx_size,
657 args->so->scan, args->so->neighbors,
658 args->use_fast_coef_costing);
661 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
662 TX_SIZE tx_size, void *arg) {
663 struct rdcost_block_args *args = arg;
664 MACROBLOCK *const x = args->x;
665 MACROBLOCKD *const xd = &x->e_mbd;
666 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
667 int64_t rd1, rd2, rd;
669 if (args->skip)
670 return;
672 if (!is_inter_block(mbmi))
673 vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
674 else
675 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
677 dist_block(plane, block, tx_size, args);
678 rate_block(plane, block, plane_bsize, tx_size, args);
679 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
680 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
682 // TODO(jingning): temporarily enabled only for luma component
683 rd = MIN(rd1, rd2);
684 if (plane == 0)
685 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
686 (rd1 > rd2 && !xd->lossless);
688 args->this_rate += args->rate;
689 args->this_dist += args->dist;
690 args->this_sse += args->sse;
691 args->this_rd += rd;
693 if (args->this_rd > args->best_rd) {
694 args->skip = 1;
695 return;
699 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
700 const struct macroblockd_plane *pd,
701 ENTROPY_CONTEXT t_above[16],
702 ENTROPY_CONTEXT t_left[16]) {
703 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
704 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
705 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
706 const ENTROPY_CONTEXT *const above = pd->above_context;
707 const ENTROPY_CONTEXT *const left = pd->left_context;
709 int i;
710 switch (tx_size) {
711 case TX_4X4:
712 vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
713 vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
714 break;
715 case TX_8X8:
716 for (i = 0; i < num_4x4_w; i += 2)
717 t_above[i] = !!*(const uint16_t *)&above[i];
718 for (i = 0; i < num_4x4_h; i += 2)
719 t_left[i] = !!*(const uint16_t *)&left[i];
720 break;
721 case TX_16X16:
722 for (i = 0; i < num_4x4_w; i += 4)
723 t_above[i] = !!*(const uint32_t *)&above[i];
724 for (i = 0; i < num_4x4_h; i += 4)
725 t_left[i] = !!*(const uint32_t *)&left[i];
726 break;
727 case TX_32X32:
728 for (i = 0; i < num_4x4_w; i += 8)
729 t_above[i] = !!*(const uint64_t *)&above[i];
730 for (i = 0; i < num_4x4_h; i += 8)
731 t_left[i] = !!*(const uint64_t *)&left[i];
732 break;
733 default:
734 assert(0 && "Invalid transform size.");
738 static void txfm_rd_in_plane(MACROBLOCK *x,
739 int *rate, int64_t *distortion,
740 int *skippable, int64_t *sse,
741 int64_t ref_best_rd, int plane,
742 BLOCK_SIZE bsize, TX_SIZE tx_size,
743 int use_fast_coef_casting) {
744 MACROBLOCKD *const xd = &x->e_mbd;
745 const struct macroblockd_plane *const pd = &xd->plane[plane];
746 struct rdcost_block_args args = { 0 };
747 args.x = x;
748 args.best_rd = ref_best_rd;
749 args.use_fast_coef_costing = use_fast_coef_casting;
751 if (plane == 0)
752 xd->mi_8x8[0]->mbmi.tx_size = tx_size;
754 vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
756 args.so = get_scan(xd, tx_size, pd->plane_type, 0);
758 vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
759 block_rd_txfm, &args);
760 if (args.skip) {
761 *rate = INT_MAX;
762 *distortion = INT64_MAX;
763 *sse = INT64_MAX;
764 *skippable = 0;
765 } else {
766 *distortion = args.this_dist;
767 *rate = args.this_rate;
768 *sse = args.this_sse;
769 *skippable = vp9_is_skippable_in_plane(x, bsize, plane);
773 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
774 int *rate, int64_t *distortion,
775 int *skip, int64_t *sse,
776 int64_t ref_best_rd,
777 BLOCK_SIZE bs) {
778 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
779 VP9_COMMON *const cm = &cpi->common;
780 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
781 MACROBLOCKD *const xd = &x->e_mbd;
782 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
784 mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
786 txfm_rd_in_plane(x, rate, distortion, skip,
787 &sse[mbmi->tx_size], ref_best_rd, 0, bs,
788 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
789 cpi->tx_stepdown_count[0]++;
792 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
793 int (*r)[2], int *rate,
794 int64_t *d, int64_t *distortion,
795 int *s, int *skip,
796 int64_t tx_cache[TX_MODES],
797 BLOCK_SIZE bs) {
798 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
799 VP9_COMMON *const cm = &cpi->common;
800 MACROBLOCKD *const xd = &x->e_mbd;
801 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
802 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
803 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
804 {INT64_MAX, INT64_MAX},
805 {INT64_MAX, INT64_MAX},
806 {INT64_MAX, INT64_MAX}};
807 int n, m;
808 int s0, s1;
809 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
810 int64_t best_rd = INT64_MAX;
811 TX_SIZE best_tx = TX_4X4;
813 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
814 assert(skip_prob > 0);
815 s0 = vp9_cost_bit(skip_prob, 0);
816 s1 = vp9_cost_bit(skip_prob, 1);
818 for (n = TX_4X4; n <= max_tx_size; n++) {
819 r[n][1] = r[n][0];
820 if (r[n][0] < INT_MAX) {
821 for (m = 0; m <= n - (n == max_tx_size); m++) {
822 if (m == n)
823 r[n][1] += vp9_cost_zero(tx_probs[m]);
824 else
825 r[n][1] += vp9_cost_one(tx_probs[m]);
828 if (d[n] == INT64_MAX) {
829 rd[n][0] = rd[n][1] = INT64_MAX;
830 } else if (s[n]) {
831 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
832 } else {
833 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
834 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
837 if (rd[n][1] < best_rd) {
838 best_tx = n;
839 best_rd = rd[n][1];
842 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
843 best_tx : MIN(max_tx_size, max_mode_tx_size);
846 *distortion = d[mbmi->tx_size];
847 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
848 *skip = s[mbmi->tx_size];
850 tx_cache[ONLY_4X4] = rd[TX_4X4][0];
851 tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
852 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
853 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
855 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
856 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
857 cpi->tx_stepdown_count[0]++;
858 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
859 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
860 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
861 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
862 tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
863 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
864 } else {
865 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
866 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
870 static int64_t scaled_rd_cost(int rdmult, int rddiv,
871 int rate, int64_t dist, double scale) {
872 return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale);
875 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
876 int (*r)[2], int *rate,
877 int64_t *d, int64_t *distortion,
878 int *s, int *skip, int64_t *sse,
879 int64_t ref_best_rd,
880 BLOCK_SIZE bs) {
881 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
882 VP9_COMMON *const cm = &cpi->common;
883 MACROBLOCKD *const xd = &x->e_mbd;
884 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
885 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
886 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
887 {INT64_MAX, INT64_MAX},
888 {INT64_MAX, INT64_MAX},
889 {INT64_MAX, INT64_MAX}};
890 int n, m;
891 int s0, s1;
892 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
893 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
894 int64_t best_rd = INT64_MAX;
895 TX_SIZE best_tx = TX_4X4;
897 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
898 assert(skip_prob > 0);
899 s0 = vp9_cost_bit(skip_prob, 0);
900 s1 = vp9_cost_bit(skip_prob, 1);
902 for (n = TX_4X4; n <= max_tx_size; n++) {
903 double scale = scale_rd[n];
904 r[n][1] = r[n][0];
905 for (m = 0; m <= n - (n == max_tx_size); m++) {
906 if (m == n)
907 r[n][1] += vp9_cost_zero(tx_probs[m]);
908 else
909 r[n][1] += vp9_cost_one(tx_probs[m]);
911 if (s[n]) {
912 rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n],
913 scale);
914 } else {
915 rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n],
916 scale);
917 rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n],
918 scale);
920 if (rd[n][1] < best_rd) {
921 best_rd = rd[n][1];
922 best_tx = n;
926 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
927 best_tx : MIN(max_tx_size, max_mode_tx_size);
929 // Actually encode using the chosen mode if a model was used, but do not
930 // update the r, d costs
931 txfm_rd_in_plane(x, rate, distortion, skip,
932 &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size,
933 cpi->sf.use_fast_coef_costing);
935 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
936 cpi->tx_stepdown_count[0]++;
937 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
938 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
939 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
940 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
941 } else {
942 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
946 static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
947 int64_t *distortion, int *skip,
948 int64_t *psse, BLOCK_SIZE bs,
949 int64_t txfm_cache[TX_MODES],
950 int64_t ref_best_rd) {
951 int r[TX_SIZES][2], s[TX_SIZES];
952 int64_t d[TX_SIZES], sse[TX_SIZES];
953 MACROBLOCKD *xd = &x->e_mbd;
954 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
955 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
956 TX_SIZE tx_size;
958 assert(bs == mbmi->sb_type);
960 vp9_subtract_plane(x, bs, 0);
962 if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
963 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
964 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
965 ref_best_rd, bs);
966 if (psse)
967 *psse = sse[mbmi->tx_size];
968 return;
971 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) {
972 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
973 model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
974 &r[tx_size][0], &d[tx_size], &s[tx_size]);
975 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
976 skip, sse, ref_best_rd, bs);
977 } else {
978 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
979 txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
980 &s[tx_size], &sse[tx_size],
981 ref_best_rd, 0, bs, tx_size,
982 cpi->sf.use_fast_coef_costing);
983 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
984 skip, txfm_cache, bs);
986 if (psse)
987 *psse = sse[mbmi->tx_size];
990 static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
991 int64_t *distortion, int *skip,
992 int64_t *psse, BLOCK_SIZE bs,
993 int64_t txfm_cache[TX_MODES],
994 int64_t ref_best_rd) {
995 int64_t sse[TX_SIZES];
996 MACROBLOCKD *xd = &x->e_mbd;
997 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
999 assert(bs == mbmi->sb_type);
1000 if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
1001 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
1002 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
1003 ref_best_rd, bs);
1004 } else {
1005 int r[TX_SIZES][2], s[TX_SIZES];
1006 int64_t d[TX_SIZES];
1007 TX_SIZE tx_size;
1008 for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
1009 txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
1010 &s[tx_size], &sse[tx_size],
1011 ref_best_rd, 0, bs, tx_size,
1012 cpi->sf.use_fast_coef_costing);
1013 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
1014 skip, txfm_cache, bs);
1016 if (psse)
1017 *psse = sse[mbmi->tx_size];
1021 static int conditional_skipintra(MB_PREDICTION_MODE mode,
1022 MB_PREDICTION_MODE best_intra_mode) {
1023 if (mode == D117_PRED &&
1024 best_intra_mode != V_PRED &&
1025 best_intra_mode != D135_PRED)
1026 return 1;
1027 if (mode == D63_PRED &&
1028 best_intra_mode != V_PRED &&
1029 best_intra_mode != D45_PRED)
1030 return 1;
1031 if (mode == D207_PRED &&
1032 best_intra_mode != H_PRED &&
1033 best_intra_mode != D45_PRED)
1034 return 1;
1035 if (mode == D153_PRED &&
1036 best_intra_mode != H_PRED &&
1037 best_intra_mode != D135_PRED)
1038 return 1;
1039 return 0;
1042 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
1043 MB_PREDICTION_MODE *best_mode,
1044 const int *bmode_costs,
1045 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1046 int *bestrate, int *bestratey,
1047 int64_t *bestdistortion,
1048 BLOCK_SIZE bsize, int64_t rd_thresh) {
1049 MB_PREDICTION_MODE mode;
1050 MACROBLOCKD *const xd = &x->e_mbd;
1051 int64_t best_rd = rd_thresh;
1053 struct macroblock_plane *p = &x->plane[0];
1054 struct macroblockd_plane *pd = &xd->plane[0];
1055 const int src_stride = p->src.stride;
1056 const int dst_stride = pd->dst.stride;
1057 const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
1058 src_stride)];
1059 uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
1060 dst_stride)];
1061 ENTROPY_CONTEXT ta[2], tempa[2];
1062 ENTROPY_CONTEXT tl[2], templ[2];
1064 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1065 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1066 int idx, idy;
1067 uint8_t best_dst[8 * 8];
1069 assert(ib < 4);
1071 vpx_memcpy(ta, a, sizeof(ta));
1072 vpx_memcpy(tl, l, sizeof(tl));
1073 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
1075 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1076 int64_t this_rd;
1077 int ratey = 0;
1078 int64_t distortion = 0;
1079 int rate = bmode_costs[mode];
1081 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1082 continue;
1084 // Only do the oblique modes if the best so far is
1085 // one of the neighboring directional modes
1086 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1087 if (conditional_skipintra(mode, *best_mode))
1088 continue;
1091 vpx_memcpy(tempa, ta, sizeof(ta));
1092 vpx_memcpy(templ, tl, sizeof(tl));
1094 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1095 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1096 const int block = ib + idy * 2 + idx;
1097 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1098 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1099 int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
1100 p->src_diff);
1101 int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1102 xd->mi_8x8[0]->bmi[block].as_mode = mode;
1103 vp9_predict_intra_block(xd, block, 1,
1104 TX_4X4, mode,
1105 x->skip_encode ? src : dst,
1106 x->skip_encode ? src_stride : dst_stride,
1107 dst, dst_stride, idx, idy, 0);
1108 vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1110 if (xd->lossless) {
1111 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1112 vp9_fwht4x4(src_diff, coeff, 8);
1113 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1114 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1115 so->scan, so->neighbors,
1116 cpi->sf.use_fast_coef_costing);
1117 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1118 goto next;
1119 vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
1120 p->eobs[block]);
1121 } else {
1122 int64_t unused;
1123 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1124 const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
1125 vp9_fht4x4(src_diff, coeff, 8, tx_type);
1126 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1127 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1128 so->scan, so->neighbors,
1129 cpi->sf.use_fast_coef_costing);
1130 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1131 16, &unused) >> 2;
1132 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1133 goto next;
1134 vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
1135 dst, dst_stride, p->eobs[block]);
1140 rate += ratey;
1141 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1143 if (this_rd < best_rd) {
1144 *bestrate = rate;
1145 *bestratey = ratey;
1146 *bestdistortion = distortion;
1147 best_rd = this_rd;
1148 *best_mode = mode;
1149 vpx_memcpy(a, tempa, sizeof(tempa));
1150 vpx_memcpy(l, templ, sizeof(templ));
1151 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1152 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1153 num_4x4_blocks_wide * 4);
1155 next:
1159 if (best_rd >= rd_thresh || x->skip_encode)
1160 return best_rd;
1162 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1163 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1164 num_4x4_blocks_wide * 4);
1166 return best_rd;
1169 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
1170 int *rate, int *rate_y,
1171 int64_t *distortion,
1172 int64_t best_rd) {
1173 int i, j;
1174 const MACROBLOCKD *const xd = &mb->e_mbd;
1175 MODE_INFO *const mic = xd->mi_8x8[0];
1176 const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1177 const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1178 const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
1179 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1180 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1181 int idx, idy;
1182 int cost = 0;
1183 int64_t total_distortion = 0;
1184 int tot_rate_y = 0;
1185 int64_t total_rd = 0;
1186 ENTROPY_CONTEXT t_above[4], t_left[4];
1187 const int *bmode_costs = mb->mbmode_cost;
1189 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1190 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1192 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1193 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1194 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1195 MB_PREDICTION_MODE best_mode = DC_PRED;
1196 int r = INT_MAX, ry = INT_MAX;
1197 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1198 i = idy * 2 + idx;
1199 if (cpi->common.frame_type == KEY_FRAME) {
1200 const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
1201 const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
1203 bmode_costs = mb->y_mode_costs[A][L];
1206 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1207 t_above + idx, t_left + idy, &r, &ry, &d,
1208 bsize, best_rd - total_rd);
1209 if (this_rd >= best_rd - total_rd)
1210 return INT64_MAX;
1212 total_rd += this_rd;
1213 cost += r;
1214 total_distortion += d;
1215 tot_rate_y += ry;
1217 mic->bmi[i].as_mode = best_mode;
1218 for (j = 1; j < num_4x4_blocks_high; ++j)
1219 mic->bmi[i + j * 2].as_mode = best_mode;
1220 for (j = 1; j < num_4x4_blocks_wide; ++j)
1221 mic->bmi[i + j].as_mode = best_mode;
1223 if (total_rd >= best_rd)
1224 return INT64_MAX;
1228 *rate = cost;
1229 *rate_y = tot_rate_y;
1230 *distortion = total_distortion;
1231 mic->mbmi.mode = mic->bmi[3].as_mode;
1233 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1236 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1237 int *rate, int *rate_tokenonly,
1238 int64_t *distortion, int *skippable,
1239 BLOCK_SIZE bsize,
1240 int64_t tx_cache[TX_MODES],
1241 int64_t best_rd) {
1242 MB_PREDICTION_MODE mode;
1243 MB_PREDICTION_MODE mode_selected = DC_PRED;
1244 MACROBLOCKD *const xd = &x->e_mbd;
1245 MODE_INFO *const mic = xd->mi_8x8[0];
1246 int this_rate, this_rate_tokenonly, s;
1247 int64_t this_distortion, this_rd;
1248 TX_SIZE best_tx = TX_4X4;
1249 int i;
1250 int *bmode_costs = x->mbmode_cost;
1252 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1253 for (i = 0; i < TX_MODES; i++)
1254 tx_cache[i] = INT64_MAX;
1256 /* Y Search for intra prediction mode */
1257 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1258 int64_t local_tx_cache[TX_MODES];
1259 MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1260 MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1262 if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
1263 continue;
1265 if (cpi->common.frame_type == KEY_FRAME) {
1266 const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
1267 const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
1269 bmode_costs = x->y_mode_costs[A][L];
1271 mic->mbmi.mode = mode;
1273 intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1274 &s, NULL, bsize, local_tx_cache, best_rd);
1276 if (this_rate_tokenonly == INT_MAX)
1277 continue;
1279 this_rate = this_rate_tokenonly + bmode_costs[mode];
1280 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1282 if (this_rd < best_rd) {
1283 mode_selected = mode;
1284 best_rd = this_rd;
1285 best_tx = mic->mbmi.tx_size;
1286 *rate = this_rate;
1287 *rate_tokenonly = this_rate_tokenonly;
1288 *distortion = this_distortion;
1289 *skippable = s;
1292 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1293 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
1294 const int64_t adj_rd = this_rd + local_tx_cache[i] -
1295 local_tx_cache[cpi->common.tx_mode];
1296 if (adj_rd < tx_cache[i]) {
1297 tx_cache[i] = adj_rd;
1303 mic->mbmi.mode = mode_selected;
1304 mic->mbmi.tx_size = best_tx;
1306 return best_rd;
1309 static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
1310 int *rate, int64_t *distortion, int *skippable,
1311 int64_t *sse, BLOCK_SIZE bsize,
1312 int64_t ref_best_rd) {
1313 MACROBLOCKD *const xd = &x->e_mbd;
1314 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
1315 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1316 int plane;
1317 int pnrate = 0, pnskip = 1;
1318 int64_t pndist = 0, pnsse = 0;
1320 if (ref_best_rd < 0)
1321 goto term;
1323 if (is_inter_block(mbmi)) {
1324 int plane;
1325 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
1326 vp9_subtract_plane(x, bsize, plane);
1329 *rate = 0;
1330 *distortion = 0;
1331 *sse = 0;
1332 *skippable = 1;
1334 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1335 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
1336 ref_best_rd, plane, bsize, uv_txfm_size,
1337 cpi->sf.use_fast_coef_costing);
1338 if (pnrate == INT_MAX)
1339 goto term;
1340 *rate += pnrate;
1341 *distortion += pndist;
1342 *sse += pnsse;
1343 *skippable &= pnskip;
1345 return;
1347 term:
1348 *rate = INT_MAX;
1349 *distortion = INT64_MAX;
1350 *sse = INT64_MAX;
1351 *skippable = 0;
1352 return;
1355 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1356 PICK_MODE_CONTEXT *ctx,
1357 int *rate, int *rate_tokenonly,
1358 int64_t *distortion, int *skippable,
1359 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
1360 MACROBLOCKD *xd = &x->e_mbd;
1361 MB_PREDICTION_MODE mode;
1362 MB_PREDICTION_MODE mode_selected = DC_PRED;
1363 int64_t best_rd = INT64_MAX, this_rd;
1364 int this_rate_tokenonly, this_rate, s;
1365 int64_t this_distortion, this_sse;
1367 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1368 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
1369 continue;
1371 xd->mi_8x8[0]->mbmi.uv_mode = mode;
1373 super_block_uvrd(cpi, x, &this_rate_tokenonly,
1374 &this_distortion, &s, &this_sse, bsize, best_rd);
1375 if (this_rate_tokenonly == INT_MAX)
1376 continue;
1377 this_rate = this_rate_tokenonly +
1378 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
1379 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1381 if (this_rd < best_rd) {
1382 mode_selected = mode;
1383 best_rd = this_rd;
1384 *rate = this_rate;
1385 *rate_tokenonly = this_rate_tokenonly;
1386 *distortion = this_distortion;
1387 *skippable = s;
1388 if (!x->select_txfm_size) {
1389 int i;
1390 struct macroblock_plane *const p = x->plane;
1391 struct macroblockd_plane *const pd = xd->plane;
1392 for (i = 1; i < MAX_MB_PLANE; ++i) {
1393 p[i].coeff = ctx->coeff_pbuf[i][2];
1394 p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
1395 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1396 p[i].eobs = ctx->eobs_pbuf[i][2];
1398 ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
1399 ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
1400 ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
1401 ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
1403 ctx->coeff_pbuf[i][0] = p[i].coeff;
1404 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
1405 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
1406 ctx->eobs_pbuf[i][0] = p[i].eobs;
1412 xd->mi_8x8[0]->mbmi.uv_mode = mode_selected;
1413 return best_rd;
1416 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
1417 int *rate, int *rate_tokenonly,
1418 int64_t *distortion, int *skippable,
1419 BLOCK_SIZE bsize) {
1420 const VP9_COMMON *cm = &cpi->common;
1421 int64_t unused;
1423 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
1424 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
1425 skippable, &unused, bsize, INT64_MAX);
1426 *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED];
1427 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1430 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1431 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1432 int *rate_uv, int *rate_uv_tokenonly,
1433 int64_t *dist_uv, int *skip_uv,
1434 MB_PREDICTION_MODE *mode_uv) {
1435 MACROBLOCK *const x = &cpi->mb;
1437 // Use an estimated rd for uv_intra based on DC_PRED if the
1438 // appropriate speed flag is set.
1439 if (cpi->sf.use_uv_intra_rd_estimate) {
1440 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
1441 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1442 // Else do a proper rd search for each possible transform size that may
1443 // be considered in the main rd loop.
1444 } else {
1445 rd_pick_intra_sbuv_mode(cpi, x, ctx,
1446 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1447 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1449 *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
1452 static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode,
1453 int mode_context) {
1454 const MACROBLOCK *const x = &cpi->mb;
1455 const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id;
1457 // Don't account for mode here if segment skip is enabled.
1458 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1459 assert(is_inter_mode(mode));
1460 return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1461 } else {
1462 return 0;
1466 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1467 BLOCK_SIZE bsize,
1468 int_mv *frame_mv,
1469 int mi_row, int mi_col,
1470 int_mv single_newmv[MAX_REF_FRAMES],
1471 int *rate_mv);
1473 static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
1474 MB_PREDICTION_MODE mode,
1475 int_mv this_mv[2],
1476 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1477 int_mv seg_mvs[MAX_REF_FRAMES],
1478 int_mv *best_ref_mv[2],
1479 const int *mvjcost, int *mvcost[2]) {
1480 MODE_INFO *const mic = xd->mi_8x8[0];
1481 const MB_MODE_INFO *const mbmi = &mic->mbmi;
1482 int thismvcost = 0;
1483 int idx, idy;
1484 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1485 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1486 const int is_compound = has_second_ref(mbmi);
1488 // the only time we should do costing for new motion vector or mode
1489 // is when we are on a new label (jbb May 08, 2007)
1490 switch (mode) {
1491 case NEWMV:
1492 this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1493 thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1494 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1495 if (is_compound) {
1496 this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1497 thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1498 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1500 break;
1501 case NEARESTMV:
1502 this_mv[0].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1503 if (is_compound)
1504 this_mv[1].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1505 break;
1506 case NEARMV:
1507 this_mv[0].as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1508 if (is_compound)
1509 this_mv[1].as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1510 break;
1511 case ZEROMV:
1512 this_mv[0].as_int = 0;
1513 if (is_compound)
1514 this_mv[1].as_int = 0;
1515 break;
1516 default:
1517 break;
1520 mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1521 if (is_compound)
1522 mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1524 mic->bmi[i].as_mode = mode;
1526 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1527 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1528 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1529 &mic->bmi[i], sizeof(mic->bmi[i]));
1531 return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
1532 thismvcost;
1535 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1536 MACROBLOCK *x,
1537 int64_t best_yrd,
1538 int i,
1539 int *labelyrate,
1540 int64_t *distortion, int64_t *sse,
1541 ENTROPY_CONTEXT *ta,
1542 ENTROPY_CONTEXT *tl,
1543 int mi_row, int mi_col) {
1544 int k;
1545 MACROBLOCKD *xd = &x->e_mbd;
1546 struct macroblockd_plane *const pd = &xd->plane[0];
1547 struct macroblock_plane *const p = &x->plane[0];
1548 MODE_INFO *const mi = xd->mi_8x8[0];
1549 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1550 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1551 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1552 int idx, idy;
1554 const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
1555 p->src.stride)];
1556 uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
1557 pd->dst.stride)];
1558 int64_t thisdistortion = 0, thissse = 0;
1559 int thisrate = 0, ref;
1560 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1561 const int is_compound = has_second_ref(&mi->mbmi);
1562 for (ref = 0; ref < 1 + is_compound; ++ref) {
1563 const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1564 pd->pre[ref].stride)];
1565 vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1566 dst, pd->dst.stride,
1567 &mi->bmi[i].as_mv[ref].as_mv,
1568 &xd->block_refs[ref]->sf, width, height, ref,
1569 xd->interp_kernel, MV_PRECISION_Q3,
1570 mi_col * MI_SIZE + 4 * (i % 2),
1571 mi_row * MI_SIZE + 4 * (i / 2));
1574 vp9_subtract_block(height, width,
1575 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1576 src, p->src.stride,
1577 dst, pd->dst.stride);
1579 k = i;
1580 for (idy = 0; idy < height / 4; ++idy) {
1581 for (idx = 0; idx < width / 4; ++idx) {
1582 int64_t ssz, rd, rd1, rd2;
1583 int16_t* coeff;
1585 k += (idy * 2 + idx);
1586 coeff = BLOCK_OFFSET(p->coeff, k);
1587 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1588 coeff, 8);
1589 vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1590 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1591 16, &ssz);
1592 thissse += ssz;
1593 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1594 so->scan, so->neighbors,
1595 cpi->sf.use_fast_coef_costing);
1596 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1597 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1598 rd = MIN(rd1, rd2);
1599 if (rd >= best_yrd)
1600 return INT64_MAX;
1604 *distortion = thisdistortion >> 2;
1605 *labelyrate = thisrate;
1606 *sse = thissse >> 2;
1608 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1611 typedef struct {
1612 int eobs;
1613 int brate;
1614 int byrate;
1615 int64_t bdist;
1616 int64_t bsse;
1617 int64_t brdcost;
1618 int_mv mvs[2];
1619 ENTROPY_CONTEXT ta[2];
1620 ENTROPY_CONTEXT tl[2];
1621 } SEG_RDSTAT;
1623 typedef struct {
1624 int_mv *ref_mv[2];
1625 int_mv mvp;
1627 int64_t segment_rd;
1628 int r;
1629 int64_t d;
1630 int64_t sse;
1631 int segment_yrate;
1632 MB_PREDICTION_MODE modes[4];
1633 SEG_RDSTAT rdstat[4][INTER_MODES];
1634 int mvthresh;
1635 } BEST_SEG_INFO;
1637 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
1638 return (mv->row >> 3) < x->mv_row_min ||
1639 (mv->row >> 3) > x->mv_row_max ||
1640 (mv->col >> 3) < x->mv_col_min ||
1641 (mv->col >> 3) > x->mv_col_max;
1644 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1645 MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1646 struct macroblock_plane *const p = &x->plane[0];
1647 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1649 p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1650 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1651 pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
1652 pd->pre[0].stride)];
1653 if (has_second_ref(mbmi))
1654 pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
1655 pd->pre[1].stride)];
1658 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1659 struct buf_2d orig_pre[2]) {
1660 MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1661 x->plane[0].src = orig_src;
1662 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1663 if (has_second_ref(mbmi))
1664 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1667 static INLINE int mv_has_subpel(const MV *mv) {
1668 return (mv->row & 0x0F) || (mv->col & 0x0F);
1671 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1672 // TODO(aconverse): Find out if this is still productive then clean up or remove
1673 static int check_best_zero_mv(
1674 const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
1675 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1676 int disable_inter_mode_mask, int this_mode, int ref_frame,
1677 int second_ref_frame) {
1678 if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
1679 (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1680 frame_mv[this_mode][ref_frame].as_int == 0 &&
1681 (second_ref_frame == NONE ||
1682 frame_mv[this_mode][second_ref_frame].as_int == 0)) {
1683 int rfc = mode_context[ref_frame];
1684 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1685 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1686 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1688 if (this_mode == NEARMV) {
1689 if (c1 > c3) return 0;
1690 } else if (this_mode == NEARESTMV) {
1691 if (c2 > c3) return 0;
1692 } else {
1693 assert(this_mode == ZEROMV);
1694 if (second_ref_frame == NONE) {
1695 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
1696 (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0))
1697 return 0;
1698 } else {
1699 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
1700 frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
1701 (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 &&
1702 frame_mv[NEARMV][second_ref_frame].as_int == 0))
1703 return 0;
1707 return 1;
1710 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1711 const TileInfo *const tile,
1712 BEST_SEG_INFO *bsi_buf, int filter_idx,
1713 int_mv seg_mvs[4][MAX_REF_FRAMES],
1714 int mi_row, int mi_col) {
1715 int k, br = 0, idx, idy;
1716 int64_t bd = 0, block_sse = 0;
1717 MB_PREDICTION_MODE this_mode;
1718 MACROBLOCKD *xd = &x->e_mbd;
1719 VP9_COMMON *cm = &cpi->common;
1720 MODE_INFO *mi = xd->mi_8x8[0];
1721 MB_MODE_INFO *const mbmi = &mi->mbmi;
1722 struct macroblock_plane *const p = &x->plane[0];
1723 struct macroblockd_plane *const pd = &xd->plane[0];
1724 const int label_count = 4;
1725 int64_t this_segment_rd = 0;
1726 int label_mv_thresh;
1727 int segmentyrate = 0;
1728 const BLOCK_SIZE bsize = mbmi->sb_type;
1729 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1730 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1731 vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
1732 ENTROPY_CONTEXT t_above[2], t_left[2];
1733 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1734 int mode_idx;
1735 int subpelmv = 1, have_ref = 0;
1736 const int has_second_rf = has_second_ref(mbmi);
1737 const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
1739 vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1740 vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1742 // 64 makes this threshold really big effectively
1743 // making it so that we very rarely check mvs on
1744 // segments. setting this to 1 would make mv thresh
1745 // roughly equal to what it is for macroblocks
1746 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1748 // Segmentation method overheads
1749 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1750 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1751 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1752 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1753 int_mv mode_mv[MB_MODE_COUNT][2];
1754 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1755 MB_PREDICTION_MODE mode_selected = ZEROMV;
1756 int64_t best_rd = INT64_MAX;
1757 const int i = idy * 2 + idx;
1758 int ref;
1760 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1761 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
1762 frame_mv[ZEROMV][frame].as_int = 0;
1763 vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
1764 &frame_mv[NEARESTMV][frame],
1765 &frame_mv[NEARMV][frame]);
1768 // search for the best motion vector on this segment
1769 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1770 const struct buf_2d orig_src = x->plane[0].src;
1771 struct buf_2d orig_pre[2];
1773 mode_idx = INTER_OFFSET(this_mode);
1774 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1775 if (disable_inter_mode_mask & (1 << mode_idx))
1776 continue;
1778 if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
1779 disable_inter_mode_mask,
1780 this_mode, mbmi->ref_frame[0],
1781 mbmi->ref_frame[1]))
1782 continue;
1784 vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1785 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1786 sizeof(bsi->rdstat[i][mode_idx].ta));
1787 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1788 sizeof(bsi->rdstat[i][mode_idx].tl));
1790 // motion search for newmv (single predictor case only)
1791 if (!has_second_rf && this_mode == NEWMV &&
1792 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1793 int_mv *const new_mv = &mode_mv[NEWMV][0];
1794 int step_param = 0;
1795 int further_steps;
1796 int thissme, bestsme = INT_MAX;
1797 int sadpb = x->sadperbit4;
1798 MV mvp_full;
1799 int max_mv;
1801 /* Is the best so far sufficiently good that we cant justify doing
1802 * and new motion search. */
1803 if (best_rd < label_mv_thresh)
1804 break;
1806 if (cpi->oxcf.mode != MODE_SECONDPASS_BEST &&
1807 cpi->oxcf.mode != MODE_BESTQUALITY) {
1808 // use previous block's result as next block's MV predictor.
1809 if (i > 0) {
1810 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1811 if (i == 2)
1812 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1815 if (i == 0)
1816 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1817 else
1818 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1820 if (cpi->sf.auto_mv_step_size && cm->show_frame) {
1821 // Take wtd average of the step_params based on the last frame's
1822 // max mv magnitude and the best ref mvs of the current block for
1823 // the given reference.
1824 step_param = (vp9_init_search_range(cpi, max_mv) +
1825 cpi->mv_step_param) >> 1;
1826 } else {
1827 step_param = cpi->mv_step_param;
1830 mvp_full.row = bsi->mvp.as_mv.row >> 3;
1831 mvp_full.col = bsi->mvp.as_mv.col >> 3;
1833 if (cpi->sf.adaptive_motion_search && cm->show_frame) {
1834 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
1835 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
1836 step_param = MAX(step_param, 8);
1839 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1840 // adjust src pointer for this block
1841 mi_buf_shift(x, i);
1843 vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
1845 if (cpi->sf.search_method == HEX) {
1846 bestsme = vp9_hex_search(x, &mvp_full,
1847 step_param,
1848 sadpb, 1, v_fn_ptr, 1,
1849 &bsi->ref_mv[0]->as_mv,
1850 &new_mv->as_mv);
1851 if (bestsme < INT_MAX)
1852 bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
1853 &bsi->ref_mv[0]->as_mv,
1854 v_fn_ptr, 1);
1855 } else if (cpi->sf.search_method == SQUARE) {
1856 bestsme = vp9_square_search(x, &mvp_full,
1857 step_param,
1858 sadpb, 1, v_fn_ptr, 1,
1859 &bsi->ref_mv[0]->as_mv,
1860 &new_mv->as_mv);
1861 if (bestsme < INT_MAX)
1862 bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
1863 &bsi->ref_mv[0]->as_mv,
1864 v_fn_ptr, 1);
1865 } else if (cpi->sf.search_method == BIGDIA) {
1866 bestsme = vp9_bigdia_search(x, &mvp_full,
1867 step_param,
1868 sadpb, 1, v_fn_ptr, 1,
1869 &bsi->ref_mv[0]->as_mv,
1870 &new_mv->as_mv);
1871 if (bestsme < INT_MAX)
1872 bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
1873 &bsi->ref_mv[0]->as_mv,
1874 v_fn_ptr, 1);
1875 } else {
1876 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1877 sadpb, further_steps, 0, v_fn_ptr,
1878 &bsi->ref_mv[0]->as_mv,
1879 &new_mv->as_mv);
1882 // Should we do a full search (best quality only)
1883 if (cpi->oxcf.mode == MODE_BESTQUALITY ||
1884 cpi->oxcf.mode == MODE_SECONDPASS_BEST) {
1885 int_mv *const best_mv = &mi->bmi[i].as_mv[0];
1886 /* Check if mvp_full is within the range. */
1887 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1888 x->mv_row_min, x->mv_row_max);
1889 thissme = cpi->full_search_sad(x, &mvp_full,
1890 sadpb, 16, v_fn_ptr,
1891 x->nmvjointcost, x->mvcost,
1892 &bsi->ref_mv[0]->as_mv,
1893 &best_mv->as_mv);
1894 if (thissme < bestsme) {
1895 bestsme = thissme;
1896 new_mv->as_int = best_mv->as_int;
1897 } else {
1898 // The full search result is actually worse so re-instate the
1899 // previous best vector
1900 best_mv->as_int = new_mv->as_int;
1904 if (bestsme < INT_MAX) {
1905 int distortion;
1906 cpi->find_fractional_mv_step(x,
1907 &new_mv->as_mv,
1908 &bsi->ref_mv[0]->as_mv,
1909 cm->allow_high_precision_mv,
1910 x->errorperbit, v_fn_ptr,
1911 cpi->sf.subpel_force_stop,
1912 cpi->sf.subpel_iters_per_step,
1913 x->nmvjointcost, x->mvcost,
1914 &distortion,
1915 &x->pred_sse[mbmi->ref_frame[0]]);
1917 // save motion search result for use in compound prediction
1918 seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int;
1921 if (cpi->sf.adaptive_motion_search)
1922 x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int;
1924 // restore src pointers
1925 mi_buf_restore(x, orig_src, orig_pre);
1928 if (has_second_rf) {
1929 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1930 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1931 continue;
1934 if (has_second_rf && this_mode == NEWMV &&
1935 mbmi->interp_filter == EIGHTTAP) {
1936 // adjust src pointers
1937 mi_buf_shift(x, i);
1938 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1939 int rate_mv;
1940 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1941 mi_row, mi_col, seg_mvs[i],
1942 &rate_mv);
1943 seg_mvs[i][mbmi->ref_frame[0]].as_int =
1944 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1945 seg_mvs[i][mbmi->ref_frame[1]].as_int =
1946 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1948 // restore src pointers
1949 mi_buf_restore(x, orig_src, orig_pre);
1952 bsi->rdstat[i][mode_idx].brate =
1953 labels2mode(cpi, xd, i, this_mode, mode_mv[this_mode], frame_mv,
1954 seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost);
1956 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1957 bsi->rdstat[i][mode_idx].mvs[ref].as_int =
1958 mode_mv[this_mode][ref].as_int;
1959 if (num_4x4_blocks_wide > 1)
1960 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
1961 mode_mv[this_mode][ref].as_int;
1962 if (num_4x4_blocks_high > 1)
1963 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
1964 mode_mv[this_mode][ref].as_int;
1967 // Trap vectors that reach beyond the UMV borders
1968 if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
1969 (has_second_rf &&
1970 mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
1971 continue;
1973 if (filter_idx > 0) {
1974 BEST_SEG_INFO *ref_bsi = bsi_buf;
1975 subpelmv = 0;
1976 have_ref = 1;
1978 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1979 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
1980 have_ref &= mode_mv[this_mode][ref].as_int ==
1981 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1984 if (filter_idx > 1 && !subpelmv && !have_ref) {
1985 ref_bsi = bsi_buf + 1;
1986 have_ref = 1;
1987 for (ref = 0; ref < 1 + has_second_rf; ++ref)
1988 have_ref &= mode_mv[this_mode][ref].as_int ==
1989 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1992 if (!subpelmv && have_ref &&
1993 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1994 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1995 sizeof(SEG_RDSTAT));
1996 if (num_4x4_blocks_wide > 1)
1997 bsi->rdstat[i + 1][mode_idx].eobs =
1998 ref_bsi->rdstat[i + 1][mode_idx].eobs;
1999 if (num_4x4_blocks_high > 1)
2000 bsi->rdstat[i + 2][mode_idx].eobs =
2001 ref_bsi->rdstat[i + 2][mode_idx].eobs;
2003 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2004 mode_selected = this_mode;
2005 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2007 continue;
2011 bsi->rdstat[i][mode_idx].brdcost =
2012 encode_inter_mb_segment(cpi, x,
2013 bsi->segment_rd - this_segment_rd, i,
2014 &bsi->rdstat[i][mode_idx].byrate,
2015 &bsi->rdstat[i][mode_idx].bdist,
2016 &bsi->rdstat[i][mode_idx].bsse,
2017 bsi->rdstat[i][mode_idx].ta,
2018 bsi->rdstat[i][mode_idx].tl,
2019 mi_row, mi_col);
2020 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2021 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
2022 bsi->rdstat[i][mode_idx].brate, 0);
2023 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2024 bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
2025 if (num_4x4_blocks_wide > 1)
2026 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
2027 if (num_4x4_blocks_high > 1)
2028 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
2031 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2032 mode_selected = this_mode;
2033 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2035 } /*for each 4x4 mode*/
2037 if (best_rd == INT64_MAX) {
2038 int iy, midx;
2039 for (iy = i + 1; iy < 4; ++iy)
2040 for (midx = 0; midx < INTER_MODES; ++midx)
2041 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2042 bsi->segment_rd = INT64_MAX;
2043 return;
2046 mode_idx = INTER_OFFSET(mode_selected);
2047 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2048 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2050 labels2mode(cpi, xd, i, mode_selected, mode_mv[mode_selected],
2051 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
2052 x->mvcost);
2054 br += bsi->rdstat[i][mode_idx].brate;
2055 bd += bsi->rdstat[i][mode_idx].bdist;
2056 block_sse += bsi->rdstat[i][mode_idx].bsse;
2057 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2058 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2060 if (this_segment_rd > bsi->segment_rd) {
2061 int iy, midx;
2062 for (iy = i + 1; iy < 4; ++iy)
2063 for (midx = 0; midx < INTER_MODES; ++midx)
2064 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2065 bsi->segment_rd = INT64_MAX;
2066 return;
2069 } /* for each label */
2071 bsi->r = br;
2072 bsi->d = bd;
2073 bsi->segment_yrate = segmentyrate;
2074 bsi->segment_rd = this_segment_rd;
2075 bsi->sse = block_sse;
2077 // update the coding decisions
2078 for (k = 0; k < 4; ++k)
2079 bsi->modes[k] = mi->bmi[k].as_mode;
2082 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
2083 const TileInfo *const tile,
2084 int_mv *best_ref_mv,
2085 int_mv *second_best_ref_mv,
2086 int64_t best_rd,
2087 int *returntotrate,
2088 int *returnyrate,
2089 int64_t *returndistortion,
2090 int *skippable, int64_t *psse,
2091 int mvthresh,
2092 int_mv seg_mvs[4][MAX_REF_FRAMES],
2093 BEST_SEG_INFO *bsi_buf,
2094 int filter_idx,
2095 int mi_row, int mi_col) {
2096 int i;
2097 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2098 MACROBLOCKD *xd = &x->e_mbd;
2099 MODE_INFO *mi = xd->mi_8x8[0];
2100 MB_MODE_INFO *mbmi = &mi->mbmi;
2101 int mode_idx;
2103 vp9_zero(*bsi);
2105 bsi->segment_rd = best_rd;
2106 bsi->ref_mv[0] = best_ref_mv;
2107 bsi->ref_mv[1] = second_best_ref_mv;
2108 bsi->mvp.as_int = best_ref_mv->as_int;
2109 bsi->mvthresh = mvthresh;
2111 for (i = 0; i < 4; i++)
2112 bsi->modes[i] = ZEROMV;
2114 rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs,
2115 mi_row, mi_col);
2117 if (bsi->segment_rd > best_rd)
2118 return INT64_MAX;
2119 /* set it to the best */
2120 for (i = 0; i < 4; i++) {
2121 mode_idx = INTER_OFFSET(bsi->modes[i]);
2122 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2123 if (has_second_ref(mbmi))
2124 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2125 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2126 mi->bmi[i].as_mode = bsi->modes[i];
2130 * used to set mbmi->mv.as_int
2132 *returntotrate = bsi->r;
2133 *returndistortion = bsi->d;
2134 *returnyrate = bsi->segment_yrate;
2135 *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
2136 *psse = bsi->sse;
2137 mbmi->mode = bsi->modes[3];
2139 return bsi->segment_rd;
2142 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2143 uint8_t *ref_y_buffer, int ref_y_stride,
2144 int ref_frame, BLOCK_SIZE block_size ) {
2145 MACROBLOCKD *xd = &x->e_mbd;
2146 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2147 int_mv this_mv;
2148 int i;
2149 int zero_seen = 0;
2150 int best_index = 0;
2151 int best_sad = INT_MAX;
2152 int this_sad = INT_MAX;
2153 int max_mv = 0;
2155 uint8_t *src_y_ptr = x->plane[0].src.buf;
2156 uint8_t *ref_y_ptr;
2157 int row_offset, col_offset;
2158 int num_mv_refs = MAX_MV_REF_CANDIDATES +
2159 (cpi->sf.adaptive_motion_search &&
2160 cpi->common.show_frame &&
2161 block_size < cpi->sf.max_partition_size);
2163 int_mv pred_mv[3];
2164 pred_mv[0] = mbmi->ref_mvs[ref_frame][0];
2165 pred_mv[1] = mbmi->ref_mvs[ref_frame][1];
2166 pred_mv[2] = x->pred_mv[ref_frame];
2168 // Get the sad for each candidate reference mv
2169 for (i = 0; i < num_mv_refs; i++) {
2170 this_mv.as_int = pred_mv[i].as_int;
2172 max_mv = MAX(max_mv,
2173 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2174 // only need to check zero mv once
2175 if (!this_mv.as_int && zero_seen)
2176 continue;
2178 zero_seen = zero_seen || !this_mv.as_int;
2180 row_offset = this_mv.as_mv.row >> 3;
2181 col_offset = this_mv.as_mv.col >> 3;
2182 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2184 // Find sad for current vector.
2185 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2186 ref_y_ptr, ref_y_stride,
2187 0x7fffffff);
2189 // Note if it is the best so far.
2190 if (this_sad < best_sad) {
2191 best_sad = this_sad;
2192 best_index = i;
2196 // Note the index of the mv that worked best in the reference list.
2197 x->mv_best_ref_index[ref_frame] = best_index;
2198 x->max_mv_context[ref_frame] = max_mv;
2199 x->pred_mv_sad[ref_frame] = best_sad;
2202 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
2203 unsigned int *ref_costs_single,
2204 unsigned int *ref_costs_comp,
2205 vp9_prob *comp_mode_p) {
2206 VP9_COMMON *const cm = &cpi->common;
2207 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2208 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2209 SEG_LVL_REF_FRAME);
2210 if (seg_ref_active) {
2211 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2212 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2213 *comp_mode_p = 128;
2214 } else {
2215 vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
2216 vp9_prob comp_inter_p = 128;
2218 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2219 comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
2220 *comp_mode_p = comp_inter_p;
2221 } else {
2222 *comp_mode_p = 128;
2225 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2227 if (cm->reference_mode != COMPOUND_REFERENCE) {
2228 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2229 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2230 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2232 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2233 base_cost += vp9_cost_bit(comp_inter_p, 0);
2235 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2236 ref_costs_single[ALTREF_FRAME] = base_cost;
2237 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2238 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2239 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2240 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2241 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2242 } else {
2243 ref_costs_single[LAST_FRAME] = 512;
2244 ref_costs_single[GOLDEN_FRAME] = 512;
2245 ref_costs_single[ALTREF_FRAME] = 512;
2247 if (cm->reference_mode != SINGLE_REFERENCE) {
2248 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2249 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2251 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2252 base_cost += vp9_cost_bit(comp_inter_p, 1);
2254 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2255 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2256 } else {
2257 ref_costs_comp[LAST_FRAME] = 512;
2258 ref_costs_comp[GOLDEN_FRAME] = 512;
2263 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2264 int mode_index,
2265 int_mv *ref_mv,
2266 int_mv *second_ref_mv,
2267 int64_t comp_pred_diff[REFERENCE_MODES],
2268 int64_t tx_size_diff[TX_MODES],
2269 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
2270 MACROBLOCKD *const xd = &x->e_mbd;
2272 // Take a snapshot of the coding context so it can be
2273 // restored if we decide to encode this way
2274 ctx->skip = x->skip;
2275 ctx->best_mode_index = mode_index;
2276 ctx->mic = *xd->mi_8x8[0];
2278 ctx->best_ref_mv[0].as_int = ref_mv->as_int;
2279 ctx->best_ref_mv[1].as_int = second_ref_mv->as_int;
2281 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2282 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
2283 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2285 vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2286 vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
2287 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2290 static void setup_pred_block(const MACROBLOCKD *xd,
2291 struct buf_2d dst[MAX_MB_PLANE],
2292 const YV12_BUFFER_CONFIG *src,
2293 int mi_row, int mi_col,
2294 const struct scale_factors *scale,
2295 const struct scale_factors *scale_uv) {
2296 int i;
2298 dst[0].buf = src->y_buffer;
2299 dst[0].stride = src->y_stride;
2300 dst[1].buf = src->u_buffer;
2301 dst[2].buf = src->v_buffer;
2302 dst[1].stride = dst[2].stride = src->uv_stride;
2303 #if CONFIG_ALPHA
2304 dst[3].buf = src->alpha_buffer;
2305 dst[3].stride = src->alpha_stride;
2306 #endif
2308 // TODO(jkoleszar): Make scale factors per-plane data
2309 for (i = 0; i < MAX_MB_PLANE; i++) {
2310 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2311 i ? scale_uv : scale,
2312 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2316 void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2317 const TileInfo *const tile,
2318 MV_REFERENCE_FRAME ref_frame,
2319 BLOCK_SIZE block_size,
2320 int mi_row, int mi_col,
2321 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2322 int_mv frame_near_mv[MAX_REF_FRAMES],
2323 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2324 const VP9_COMMON *cm = &cpi->common;
2325 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
2326 MACROBLOCKD *const xd = &x->e_mbd;
2327 MODE_INFO *const mi = xd->mi_8x8[0];
2328 int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
2329 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
2331 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2332 // use the UV scaling factors.
2333 setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
2335 // Gets an initial list of candidate vectors from neighbours and orders them
2336 vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col);
2338 // Candidate refinement carried out at encoder and decoder
2339 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
2340 &frame_nearest_mv[ref_frame],
2341 &frame_near_mv[ref_frame]);
2343 // Further refinement that is encode side only to test the top few candidates
2344 // in full and choose the best as the centre point for subsequent searches.
2345 // The current implementation doesn't support scaling.
2346 if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
2347 mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
2348 ref_frame, block_size);
2351 const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
2352 int ref_frame) {
2353 const VP9_COMMON *const cm = &cpi->common;
2354 const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
2355 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
2356 return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
2359 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
2360 const MACROBLOCKD *const xd = &x->e_mbd;
2361 const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
2362 const int ctx = vp9_get_pred_context_switchable_interp(xd);
2363 return SWITCHABLE_INTERP_RATE_FACTOR *
2364 x->switchable_interp_costs[ctx][mbmi->interp_filter];
2367 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2368 const TileInfo *const tile,
2369 BLOCK_SIZE bsize,
2370 int mi_row, int mi_col,
2371 int_mv *tmp_mv, int *rate_mv) {
2372 MACROBLOCKD *xd = &x->e_mbd;
2373 VP9_COMMON *cm = &cpi->common;
2374 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2375 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2376 int bestsme = INT_MAX;
2377 int further_steps, step_param;
2378 int sadpb = x->sadperbit16;
2379 MV mvp_full;
2380 int ref = mbmi->ref_frame[0];
2381 MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
2383 int tmp_col_min = x->mv_col_min;
2384 int tmp_col_max = x->mv_col_max;
2385 int tmp_row_min = x->mv_row_min;
2386 int tmp_row_max = x->mv_row_max;
2388 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
2389 ref);
2391 MV pred_mv[3];
2392 pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
2393 pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
2394 pred_mv[2] = x->pred_mv[ref].as_mv;
2396 if (scaled_ref_frame) {
2397 int i;
2398 // Swap out the reference frame for a version that's been scaled to
2399 // match the resolution of the current frame, allowing the existing
2400 // motion search code to be used without additional modifications.
2401 for (i = 0; i < MAX_MB_PLANE; i++)
2402 backup_yv12[i] = xd->plane[i].pre[0];
2404 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2407 vp9_set_mv_search_range(x, &ref_mv);
2409 // Work out the size of the first step in the mv step search.
2410 // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2411 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
2412 // Take wtd average of the step_params based on the last frame's
2413 // max mv magnitude and that based on the best ref mvs of the current
2414 // block for the given reference.
2415 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2416 cpi->mv_step_param) >> 1;
2417 } else {
2418 step_param = cpi->mv_step_param;
2421 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
2422 cpi->common.show_frame) {
2423 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
2424 b_width_log2(bsize)));
2425 step_param = MAX(step_param, boffset);
2428 if (cpi->sf.adaptive_motion_search) {
2429 int bwl = b_width_log2_lookup[bsize];
2430 int bhl = b_height_log2_lookup[bsize];
2431 int i;
2432 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
2434 if (tlevel < 5)
2435 step_param += 2;
2437 for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) {
2438 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
2439 x->pred_mv[ref].as_int = 0;
2440 tmp_mv->as_int = INVALID_MV;
2442 if (scaled_ref_frame) {
2443 int i;
2444 for (i = 0; i < MAX_MB_PLANE; i++)
2445 xd->plane[i].pre[0] = backup_yv12[i];
2447 return;
2452 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
2454 mvp_full.col >>= 3;
2455 mvp_full.row >>= 3;
2457 // Further step/diamond searches as necessary
2458 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2460 if (cpi->sf.search_method == FAST_DIAMOND) {
2461 bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0,
2462 &cpi->fn_ptr[bsize], 1,
2463 &ref_mv, &tmp_mv->as_mv);
2464 if (bestsme < INT_MAX)
2465 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2466 &cpi->fn_ptr[bsize], 1);
2467 } else if (cpi->sf.search_method == FAST_HEX) {
2468 bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
2469 &cpi->fn_ptr[bsize], 1,
2470 &ref_mv, &tmp_mv->as_mv);
2471 if (bestsme < INT_MAX)
2472 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2473 &cpi->fn_ptr[bsize], 1);
2474 } else if (cpi->sf.search_method == HEX) {
2475 bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
2476 &cpi->fn_ptr[bsize], 1,
2477 &ref_mv, &tmp_mv->as_mv);
2478 if (bestsme < INT_MAX)
2479 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2480 &cpi->fn_ptr[bsize], 1);
2481 } else if (cpi->sf.search_method == SQUARE) {
2482 bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
2483 &cpi->fn_ptr[bsize], 1,
2484 &ref_mv, &tmp_mv->as_mv);
2485 if (bestsme < INT_MAX)
2486 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2487 &cpi->fn_ptr[bsize], 1);
2488 } else if (cpi->sf.search_method == BIGDIA) {
2489 bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
2490 &cpi->fn_ptr[bsize], 1,
2491 &ref_mv, &tmp_mv->as_mv);
2492 if (bestsme < INT_MAX)
2493 bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2494 &cpi->fn_ptr[bsize], 1);
2495 } else {
2496 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2497 sadpb, further_steps, 1,
2498 &cpi->fn_ptr[bsize],
2499 &ref_mv, &tmp_mv->as_mv);
2502 x->mv_col_min = tmp_col_min;
2503 x->mv_col_max = tmp_col_max;
2504 x->mv_row_min = tmp_row_min;
2505 x->mv_row_max = tmp_row_max;
2507 if (bestsme < INT_MAX) {
2508 int dis; /* TODO: use dis in distortion calculation later. */
2509 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
2510 cm->allow_high_precision_mv,
2511 x->errorperbit,
2512 &cpi->fn_ptr[bsize],
2513 cpi->sf.subpel_force_stop,
2514 cpi->sf.subpel_iters_per_step,
2515 x->nmvjointcost, x->mvcost,
2516 &dis, &x->pred_sse[ref]);
2518 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
2519 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2521 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
2522 x->pred_mv[ref].as_int = tmp_mv->as_int;
2524 if (scaled_ref_frame) {
2525 int i;
2526 for (i = 0; i < MAX_MB_PLANE; i++)
2527 xd->plane[i].pre[0] = backup_yv12[i];
2531 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2532 BLOCK_SIZE bsize,
2533 int_mv *frame_mv,
2534 int mi_row, int mi_col,
2535 int_mv single_newmv[MAX_REF_FRAMES],
2536 int *rate_mv) {
2537 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
2538 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
2539 MACROBLOCKD *xd = &x->e_mbd;
2540 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2541 const int refs[2] = { mbmi->ref_frame[0],
2542 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
2543 int_mv ref_mv[2];
2544 int ite, ref;
2545 // Prediction buffer from second frame.
2546 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2548 // Do joint motion search in compound mode to get more accurate mv.
2549 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2550 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
2551 int last_besterr[2] = {INT_MAX, INT_MAX};
2552 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2553 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2554 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2557 for (ref = 0; ref < 2; ++ref) {
2558 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
2560 if (scaled_ref_frame[ref]) {
2561 int i;
2562 // Swap out the reference frame for a version that's been scaled to
2563 // match the resolution of the current frame, allowing the existing
2564 // motion search code to be used without additional modifications.
2565 for (i = 0; i < MAX_MB_PLANE; i++)
2566 backup_yv12[ref][i] = xd->plane[i].pre[ref];
2567 vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
2568 NULL);
2571 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2574 // Allow joint search multiple times iteratively for each ref frame
2575 // and break out the search loop if it couldn't find better mv.
2576 for (ite = 0; ite < 4; ite++) {
2577 struct buf_2d ref_yv12[2];
2578 int bestsme = INT_MAX;
2579 int sadpb = x->sadperbit16;
2580 int_mv tmp_mv;
2581 int search_range = 3;
2583 int tmp_col_min = x->mv_col_min;
2584 int tmp_col_max = x->mv_col_max;
2585 int tmp_row_min = x->mv_row_min;
2586 int tmp_row_max = x->mv_row_max;
2587 int id = ite % 2;
2589 // Initialized here because of compiler problem in Visual Studio.
2590 ref_yv12[0] = xd->plane[0].pre[0];
2591 ref_yv12[1] = xd->plane[0].pre[1];
2593 // Get pred block from second frame.
2594 vp9_build_inter_predictor(ref_yv12[!id].buf,
2595 ref_yv12[!id].stride,
2596 second_pred, pw,
2597 &frame_mv[refs[!id]].as_mv,
2598 &xd->block_refs[!id]->sf,
2599 pw, ph, 0,
2600 xd->interp_kernel, MV_PRECISION_Q3,
2601 mi_col * MI_SIZE, mi_row * MI_SIZE);
2603 // Compound motion search on first ref frame.
2604 if (id)
2605 xd->plane[0].pre[0] = ref_yv12[id];
2606 vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
2608 // Use mv result from single mode as mvp.
2609 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2611 tmp_mv.as_mv.col >>= 3;
2612 tmp_mv.as_mv.row >>= 3;
2614 // Small-range full-pixel motion search
2615 bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb,
2616 search_range,
2617 &cpi->fn_ptr[bsize],
2618 x->nmvjointcost, x->mvcost,
2619 &ref_mv[id].as_mv, second_pred,
2620 pw, ph);
2621 if (bestsme < INT_MAX)
2622 bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv,
2623 second_pred, &cpi->fn_ptr[bsize], 1);
2625 x->mv_col_min = tmp_col_min;
2626 x->mv_col_max = tmp_col_max;
2627 x->mv_row_min = tmp_row_min;
2628 x->mv_row_max = tmp_row_max;
2630 if (bestsme < INT_MAX) {
2631 int dis; /* TODO: use dis in distortion calculation later. */
2632 unsigned int sse;
2633 bestsme = cpi->find_fractional_mv_step_comp(
2634 x, &tmp_mv.as_mv,
2635 &ref_mv[id].as_mv,
2636 cpi->common.allow_high_precision_mv,
2637 x->errorperbit,
2638 &cpi->fn_ptr[bsize],
2639 0, cpi->sf.subpel_iters_per_step,
2640 x->nmvjointcost, x->mvcost,
2641 &dis, &sse, second_pred,
2642 pw, ph);
2645 if (id)
2646 xd->plane[0].pre[0] = scaled_first_yv12;
2648 if (bestsme < last_besterr[id]) {
2649 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2650 last_besterr[id] = bestsme;
2651 } else {
2652 break;
2656 *rate_mv = 0;
2658 for (ref = 0; ref < 2; ++ref) {
2659 if (scaled_ref_frame[ref]) {
2660 // restore the predictor
2661 int i;
2662 for (i = 0; i < MAX_MB_PLANE; i++)
2663 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2666 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2667 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2668 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2671 vpx_free(second_pred);
2674 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2675 uint8_t *orig_dst[MAX_MB_PLANE],
2676 int orig_dst_stride[MAX_MB_PLANE]) {
2677 int i;
2678 for (i = 0; i < MAX_MB_PLANE; i++) {
2679 xd->plane[i].dst.buf = orig_dst[i];
2680 xd->plane[i].dst.stride = orig_dst_stride[i];
2684 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2685 const TileInfo *const tile,
2686 BLOCK_SIZE bsize,
2687 int64_t txfm_cache[],
2688 int *rate2, int64_t *distortion,
2689 int *skippable,
2690 int *rate_y, int64_t *distortion_y,
2691 int *rate_uv, int64_t *distortion_uv,
2692 int *mode_excluded, int *disable_skip,
2693 INTERP_FILTER *best_filter,
2694 int_mv (*mode_mv)[MAX_REF_FRAMES],
2695 int mi_row, int mi_col,
2696 int_mv single_newmv[MAX_REF_FRAMES],
2697 int64_t *psse,
2698 const int64_t ref_best_rd) {
2699 VP9_COMMON *cm = &cpi->common;
2700 MACROBLOCKD *xd = &x->e_mbd;
2701 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2702 const int is_comp_pred = has_second_ref(mbmi);
2703 const int num_refs = is_comp_pred ? 2 : 1;
2704 const int this_mode = mbmi->mode;
2705 int_mv *frame_mv = mode_mv[this_mode];
2706 int i;
2707 int refs[2] = { mbmi->ref_frame[0],
2708 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2709 int_mv cur_mv[2];
2710 int64_t this_rd = 0;
2711 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2712 int pred_exists = 0;
2713 int intpel_mv;
2714 int64_t rd, best_rd = INT64_MAX;
2715 int best_needs_copy = 0;
2716 uint8_t *orig_dst[MAX_MB_PLANE];
2717 int orig_dst_stride[MAX_MB_PLANE];
2718 int rs = 0;
2720 if (is_comp_pred) {
2721 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2722 frame_mv[refs[1]].as_int == INVALID_MV)
2723 return INT64_MAX;
2726 if (this_mode == NEWMV) {
2727 int rate_mv;
2728 if (is_comp_pred) {
2729 // Initialize mv using single prediction mode result.
2730 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2731 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2733 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2734 joint_motion_search(cpi, x, bsize, frame_mv,
2735 mi_row, mi_col, single_newmv, &rate_mv);
2736 } else {
2737 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2738 &mbmi->ref_mvs[refs[0]][0].as_mv,
2739 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2740 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2741 &mbmi->ref_mvs[refs[1]][0].as_mv,
2742 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2744 *rate2 += rate_mv;
2745 } else {
2746 int_mv tmp_mv;
2747 single_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
2748 &tmp_mv, &rate_mv);
2749 if (tmp_mv.as_int == INVALID_MV)
2750 return INT64_MAX;
2751 *rate2 += rate_mv;
2752 frame_mv[refs[0]].as_int =
2753 xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2754 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2758 for (i = 0; i < num_refs; ++i) {
2759 cur_mv[i] = frame_mv[refs[i]];
2760 // Clip "next_nearest" so that it does not extend to far out of image
2761 if (this_mode != NEWMV)
2762 clamp_mv2(&cur_mv[i].as_mv, xd);
2764 if (mv_check_bounds(x, &cur_mv[i].as_mv))
2765 return INT64_MAX;
2766 mbmi->mv[i].as_int = cur_mv[i].as_int;
2769 // do first prediction into the destination buffer. Do the next
2770 // prediction into a temporary buffer. Then keep track of which one
2771 // of these currently holds the best predictor, and use the other
2772 // one for future predictions. In the end, copy from tmp_buf to
2773 // dst if necessary.
2774 for (i = 0; i < MAX_MB_PLANE; i++) {
2775 orig_dst[i] = xd->plane[i].dst.buf;
2776 orig_dst_stride[i] = xd->plane[i].dst.stride;
2779 /* We don't include the cost of the second reference here, because there
2780 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2781 * words if you present them in that order, the second one is always known
2782 * if the first is known */
2783 *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]);
2785 if (!(*mode_excluded))
2786 *mode_excluded = is_comp_pred ? cm->reference_mode == SINGLE_REFERENCE
2787 : cm->reference_mode == COMPOUND_REFERENCE;
2789 pred_exists = 0;
2790 // Are all MVs integer pel for Y and UV
2791 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
2792 if (is_comp_pred)
2793 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
2795 // Search for best switchable filter by checking the variance of
2796 // pred error irrespective of whether the filter will be used
2797 cpi->mask_filter_rd = 0;
2798 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2799 cpi->rd_filter_cache[i] = INT64_MAX;
2801 if (cm->interp_filter != BILINEAR) {
2802 *best_filter = EIGHTTAP;
2803 if (x->source_variance <
2804 cpi->sf.disable_filter_search_var_thresh) {
2805 *best_filter = EIGHTTAP;
2806 } else {
2807 int newbest;
2808 int tmp_rate_sum = 0;
2809 int64_t tmp_dist_sum = 0;
2811 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2812 int j;
2813 int64_t rs_rd;
2814 mbmi->interp_filter = i;
2815 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
2816 rs = get_switchable_rate(x);
2817 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2819 if (i > 0 && intpel_mv) {
2820 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2821 cpi->rd_filter_cache[i] = rd;
2822 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2823 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2824 if (cm->interp_filter == SWITCHABLE)
2825 rd += rs_rd;
2826 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
2827 } else {
2828 int rate_sum = 0;
2829 int64_t dist_sum = 0;
2830 if ((cm->interp_filter == SWITCHABLE &&
2831 (!i || best_needs_copy)) ||
2832 (cm->interp_filter != SWITCHABLE &&
2833 (cm->interp_filter == mbmi->interp_filter ||
2834 (i == 0 && intpel_mv)))) {
2835 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2836 } else {
2837 for (j = 0; j < MAX_MB_PLANE; j++) {
2838 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2839 xd->plane[j].dst.stride = 64;
2842 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2843 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2845 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2846 cpi->rd_filter_cache[i] = rd;
2847 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2848 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2849 if (cm->interp_filter == SWITCHABLE)
2850 rd += rs_rd;
2851 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
2853 if (i == 0 && intpel_mv) {
2854 tmp_rate_sum = rate_sum;
2855 tmp_dist_sum = dist_sum;
2859 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2860 if (rd / 2 > ref_best_rd) {
2861 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2862 return INT64_MAX;
2865 newbest = i == 0 || rd < best_rd;
2867 if (newbest) {
2868 best_rd = rd;
2869 *best_filter = mbmi->interp_filter;
2870 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2871 best_needs_copy = !best_needs_copy;
2874 if ((cm->interp_filter == SWITCHABLE && newbest) ||
2875 (cm->interp_filter != SWITCHABLE &&
2876 cm->interp_filter == mbmi->interp_filter)) {
2877 pred_exists = 1;
2880 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2883 // Set the appropriate filter
2884 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2885 cm->interp_filter : *best_filter;
2886 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
2887 rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0;
2889 if (pred_exists) {
2890 if (best_needs_copy) {
2891 // again temporarily set the buffers to local memory to prevent a memcpy
2892 for (i = 0; i < MAX_MB_PLANE; i++) {
2893 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2894 xd->plane[i].dst.stride = 64;
2897 } else {
2898 // Handles the special case when a filter that is not in the
2899 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2900 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2903 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2904 int tmp_rate;
2905 int64_t tmp_dist;
2906 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2907 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2908 // if current pred_error modeled rd is substantially more than the best
2909 // so far, do not bother doing full rd
2910 if (rd / 2 > ref_best_rd) {
2911 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2912 return INT64_MAX;
2916 if (cm->interp_filter == SWITCHABLE)
2917 *rate2 += get_switchable_rate(x);
2919 if (!is_comp_pred) {
2920 if (!x->in_active_map) {
2921 if (psse)
2922 *psse = 0;
2923 *distortion = 0;
2924 x->skip = 1;
2925 } else if (cpi->allow_encode_breakout && x->encode_breakout) {
2926 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2927 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2928 unsigned int var, sse;
2929 // Skipping threshold for ac.
2930 unsigned int thresh_ac;
2931 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2932 // Use extreme low threshold for static frames to limit skipping.
2933 const unsigned int max_thresh = (cpi->allow_encode_breakout ==
2934 ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
2935 // The encode_breakout input
2936 const unsigned int min_thresh =
2937 MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
2939 // Calculate threshold according to dequant value.
2940 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2941 thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
2943 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2944 xd->plane[0].dst.buf,
2945 xd->plane[0].dst.stride, &sse);
2947 // Adjust threshold according to partition size.
2948 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2949 b_height_log2_lookup[bsize]);
2951 // Y skipping condition checking
2952 if (sse < thresh_ac || sse == 0) {
2953 // Skipping threshold for dc
2954 unsigned int thresh_dc;
2956 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2958 // dc skipping checking
2959 if ((sse - var) < thresh_dc || sse == var) {
2960 unsigned int sse_u, sse_v;
2961 unsigned int var_u, var_v;
2963 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2964 x->plane[1].src.stride,
2965 xd->plane[1].dst.buf,
2966 xd->plane[1].dst.stride, &sse_u);
2968 // U skipping condition checking
2969 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2970 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2971 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2972 x->plane[2].src.stride,
2973 xd->plane[2].dst.buf,
2974 xd->plane[2].dst.stride, &sse_v);
2976 // V skipping condition checking
2977 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2978 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2979 x->skip = 1;
2981 // The cost of skip bit needs to be added.
2982 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2984 // Scaling factor for SSE from spatial domain to frequency domain
2985 // is 16. Adjust distortion accordingly.
2986 *distortion_uv = (sse_u + sse_v) << 4;
2987 *distortion = (sse << 4) + *distortion_uv;
2989 *disable_skip = 1;
2990 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2998 if (!x->skip) {
2999 int skippable_y, skippable_uv;
3000 int64_t sseuv = INT64_MAX;
3001 int64_t rdcosty = INT64_MAX;
3003 // Y cost and distortion
3004 inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
3005 bsize, txfm_cache, ref_best_rd);
3007 if (*rate_y == INT_MAX) {
3008 *rate2 = INT_MAX;
3009 *distortion = INT64_MAX;
3010 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3011 return INT64_MAX;
3014 *rate2 += *rate_y;
3015 *distortion += *distortion_y;
3017 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
3018 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
3020 super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
3021 bsize, ref_best_rd - rdcosty);
3022 if (*rate_uv == INT_MAX) {
3023 *rate2 = INT_MAX;
3024 *distortion = INT64_MAX;
3025 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3026 return INT64_MAX;
3029 *psse += sseuv;
3030 *rate2 += *rate_uv;
3031 *distortion += *distortion_uv;
3032 *skippable = skippable_y && skippable_uv;
3035 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3036 return this_rd; // if 0, this will be re-calculated by caller
3039 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
3040 int max_plane) {
3041 struct macroblock_plane *const p = x->plane;
3042 struct macroblockd_plane *const pd = x->e_mbd.plane;
3043 int i;
3045 for (i = 0; i < max_plane; ++i) {
3046 p[i].coeff = ctx->coeff_pbuf[i][1];
3047 p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
3048 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
3049 p[i].eobs = ctx->eobs_pbuf[i][1];
3051 ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0];
3052 ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0];
3053 ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
3054 ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0];
3056 ctx->coeff_pbuf[i][0] = p[i].coeff;
3057 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
3058 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
3059 ctx->eobs_pbuf[i][0] = p[i].eobs;
3063 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3064 int *returnrate, int64_t *returndist,
3065 BLOCK_SIZE bsize,
3066 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3067 VP9_COMMON *const cm = &cpi->common;
3068 MACROBLOCKD *const xd = &x->e_mbd;
3069 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3070 int y_skip = 0, uv_skip = 0;
3071 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
3072 TX_SIZE max_uv_tx_size;
3073 x->skip_encode = 0;
3074 ctx->skip = 0;
3075 xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
3077 if (bsize >= BLOCK_8X8) {
3078 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3079 &dist_y, &y_skip, bsize, tx_cache,
3080 best_rd) >= best_rd) {
3081 *returnrate = INT_MAX;
3082 return;
3084 max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
3085 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3086 &dist_uv, &uv_skip, bsize, max_uv_tx_size);
3087 } else {
3088 y_skip = 0;
3089 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3090 &dist_y, best_rd) >= best_rd) {
3091 *returnrate = INT_MAX;
3092 return;
3094 max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
3095 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3096 &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
3099 if (y_skip && uv_skip) {
3100 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3101 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3102 *returndist = dist_y + dist_uv;
3103 vp9_zero(ctx->tx_rd_diff);
3104 } else {
3105 int i;
3106 *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3107 *returndist = dist_y + dist_uv;
3108 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3109 for (i = 0; i < TX_MODES; i++) {
3110 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
3111 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3112 else
3113 ctx->tx_rd_diff[i] = 0;
3117 ctx->mic = *xd->mi_8x8[0];
3120 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3121 const TileInfo *const tile,
3122 int mi_row, int mi_col,
3123 int *returnrate,
3124 int64_t *returndistortion,
3125 BLOCK_SIZE bsize,
3126 PICK_MODE_CONTEXT *ctx,
3127 int64_t best_rd_so_far) {
3128 VP9_COMMON *const cm = &cpi->common;
3129 MACROBLOCKD *const xd = &x->e_mbd;
3130 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
3131 const struct segmentation *const seg = &cm->seg;
3132 MB_PREDICTION_MODE this_mode;
3133 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3134 unsigned char segment_id = mbmi->segment_id;
3135 int comp_pred, i;
3136 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3137 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3138 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3139 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3140 VP9_ALT_FLAG };
3141 int64_t best_rd = best_rd_so_far;
3142 int64_t best_tx_rd[TX_MODES];
3143 int64_t best_tx_diff[TX_MODES];
3144 int64_t best_pred_diff[REFERENCE_MODES];
3145 int64_t best_pred_rd[REFERENCE_MODES];
3146 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3147 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3148 MB_MODE_INFO best_mbmode = { 0 };
3149 int mode_index, best_mode_index = 0;
3150 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3151 vp9_prob comp_mode_p;
3152 int64_t best_intra_rd = INT64_MAX;
3153 int64_t best_inter_rd = INT64_MAX;
3154 MB_PREDICTION_MODE best_intra_mode = DC_PRED;
3155 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3156 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3157 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3158 int64_t dist_uv[TX_SIZES];
3159 int skip_uv[TX_SIZES];
3160 MB_PREDICTION_MODE mode_uv[TX_SIZES];
3161 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3162 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3163 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
3164 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
3165 int best_skip2 = 0;
3166 int mode_skip_mask = 0;
3167 int mode_skip_start = cpi->sf.mode_skip_start + 1;
3168 const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
3169 const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
3170 const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
3171 const int intra_y_mode_mask =
3172 cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3173 int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
3175 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3177 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3178 &comp_mode_p);
3180 for (i = 0; i < REFERENCE_MODES; ++i)
3181 best_pred_rd[i] = INT64_MAX;
3182 for (i = 0; i < TX_MODES; i++)
3183 best_tx_rd[i] = INT64_MAX;
3184 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3185 best_filter_rd[i] = INT64_MAX;
3186 for (i = 0; i < TX_SIZES; i++)
3187 rate_uv_intra[i] = INT_MAX;
3188 for (i = 0; i < MAX_REF_FRAMES; ++i)
3189 x->pred_sse[i] = INT_MAX;
3191 *returnrate = INT_MAX;
3193 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3194 x->pred_mv_sad[ref_frame] = INT_MAX;
3195 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3196 vp9_setup_buffer_inter(cpi, x, tile,
3197 ref_frame, bsize, mi_row, mi_col,
3198 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3200 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3201 frame_mv[ZEROMV][ref_frame].as_int = 0;
3204 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3205 // All modes from vp9_mode_order that use this frame as any ref
3206 static const int ref_frame_mask_all[] = {
3207 0x0, 0x123291, 0x25c444, 0x39b722
3209 // Fixed mv modes (NEARESTMV, NEARMV, ZEROMV) from vp9_mode_order that use
3210 // this frame as their primary ref
3211 static const int ref_frame_mask_fixedmv[] = {
3212 0x0, 0x121281, 0x24c404, 0x080102
3214 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
3215 // Skip modes for missing references
3216 mode_skip_mask |= ref_frame_mask_all[ref_frame];
3217 } else if (cpi->sf.reference_masking) {
3218 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3219 // Skip fixed mv modes for poor references
3220 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3221 mode_skip_mask |= ref_frame_mask_fixedmv[ref_frame];
3222 break;
3226 // If the segment reference frame feature is enabled....
3227 // then do nothing if the current ref frame is not allowed..
3228 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3229 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3230 mode_skip_mask |= ref_frame_mask_all[ref_frame];
3234 // If the segment skip feature is enabled....
3235 // then do nothing if the current mode is not allowed..
3236 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
3237 const int inter_non_zero_mode_mask = 0x1F7F7;
3238 mode_skip_mask |= inter_non_zero_mode_mask;
3241 // Disable this drop out case if the ref frame
3242 // segment level feature is enabled for this segment. This is to
3243 // prevent the possibility that we end up unable to pick any mode.
3244 if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3245 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3246 // unless ARNR filtering is enabled in which case we want
3247 // an unfiltered alternative. We allow near/nearest as well
3248 // because they may result in zero-zero MVs but be cheaper.
3249 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3250 const int altref_zero_mask =
3251 ~((1 << THR_NEARESTA) | (1 << THR_NEARA) | (1 << THR_ZEROA));
3252 mode_skip_mask |= altref_zero_mask;
3253 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
3254 mode_skip_mask |= (1 << THR_NEARA);
3255 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
3256 mode_skip_mask |= (1 << THR_NEARESTA);
3260 // TODO(JBB): This is to make up for the fact that we don't have sad
3261 // functions that work when the block size reads outside the umv. We
3262 // should fix this either by making the motion search just work on
3263 // a representative block in the boundary ( first ) and then implement a
3264 // function that does sads when inside the border..
3265 if ((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) {
3266 const int new_modes_mask =
3267 (1 << THR_NEWMV) | (1 << THR_NEWG) | (1 << THR_NEWA) |
3268 (1 << THR_COMP_NEWLA) | (1 << THR_COMP_NEWGA);
3269 mode_skip_mask |= new_modes_mask;
3272 if (bsize > cpi->sf.max_intra_bsize) {
3273 mode_skip_mask |= 0xFF30808;
3276 if (!x->in_active_map) {
3277 int mode_index;
3278 assert(cpi->ref_frame_flags & VP9_LAST_FLAG);
3279 if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0)
3280 mode_index = THR_NEARESTMV;
3281 else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0)
3282 mode_index = THR_NEARMV;
3283 else
3284 mode_index = THR_ZEROMV;
3285 mode_skip_mask = ~(1 << mode_index);
3286 mode_skip_start = MAX_MODES;
3287 disable_inter_mode_mask = 0;
3290 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3291 int mode_excluded = 0;
3292 int64_t this_rd = INT64_MAX;
3293 int disable_skip = 0;
3294 int compmode_cost = 0;
3295 int rate2 = 0, rate_y = 0, rate_uv = 0;
3296 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3297 int skippable = 0;
3298 int64_t tx_cache[TX_MODES];
3299 int i;
3300 int this_skip2 = 0;
3301 int64_t total_sse = INT64_MAX;
3302 int early_term = 0;
3304 // Look at the reference frame of the best mode so far and set the
3305 // skip mask to look at a subset of the remaining modes.
3306 if (mode_index == mode_skip_start) {
3307 switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
3308 case INTRA_FRAME:
3309 break;
3310 case LAST_FRAME:
3311 mode_skip_mask |= LAST_FRAME_MODE_MASK;
3312 break;
3313 case GOLDEN_FRAME:
3314 mode_skip_mask |= GOLDEN_FRAME_MODE_MASK;
3315 break;
3316 case ALTREF_FRAME:
3317 mode_skip_mask |= ALT_REF_MODE_MASK;
3318 break;
3319 case NONE:
3320 case MAX_REF_FRAMES:
3321 assert(0 && "Invalid Reference frame");
3324 if (mode_skip_mask & (1 << mode_index))
3325 continue;
3327 // Test best rd so far against threshold for trying this mode.
3328 if (best_rd < ((int64_t)rd_threshes[mode_index] *
3329 rd_thresh_freq_fact[mode_index] >> 5) ||
3330 rd_threshes[mode_index] == INT_MAX)
3331 continue;
3333 this_mode = vp9_mode_order[mode_index].mode;
3334 ref_frame = vp9_mode_order[mode_index].ref_frame[0];
3335 if (ref_frame != INTRA_FRAME &&
3336 disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode)))
3337 continue;
3338 second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
3340 comp_pred = second_ref_frame > INTRA_FRAME;
3341 if (comp_pred) {
3342 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3343 vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
3344 continue;
3345 if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
3346 ref_frame != best_inter_ref_frame &&
3347 second_ref_frame != best_inter_ref_frame)
3348 continue;
3349 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3350 } else {
3351 if (ref_frame != INTRA_FRAME)
3352 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3355 if (ref_frame == INTRA_FRAME) {
3356 if (!(intra_y_mode_mask & (1 << this_mode)))
3357 continue;
3358 if (this_mode != DC_PRED) {
3359 // Disable intra modes other than DC_PRED for blocks with low variance
3360 // Threshold for intra skipping based on source variance
3361 // TODO(debargha): Specialize the threshold for super block sizes
3362 const unsigned int skip_intra_var_thresh = 64;
3363 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3364 x->source_variance < skip_intra_var_thresh)
3365 continue;
3366 // Only search the oblique modes if the best so far is
3367 // one of the neighboring directional modes
3368 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3369 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3370 if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
3371 continue;
3373 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3374 if (conditional_skipintra(this_mode, best_intra_mode))
3375 continue;
3378 } else {
3379 if (x->in_active_map &&
3380 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
3381 if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
3382 disable_inter_mode_mask, this_mode, ref_frame,
3383 second_ref_frame))
3384 continue;
3387 mbmi->mode = this_mode;
3388 mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode;
3389 mbmi->ref_frame[0] = ref_frame;
3390 mbmi->ref_frame[1] = second_ref_frame;
3391 // Evaluate all sub-pel filters irrespective of whether we can use
3392 // them for this frame.
3393 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3394 : cm->interp_filter;
3395 x->skip = 0;
3396 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3397 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
3399 // Select prediction reference frames.
3400 for (i = 0; i < MAX_MB_PLANE; i++) {
3401 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3402 if (comp_pred)
3403 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3406 for (i = 0; i < TX_MODES; ++i)
3407 tx_cache[i] = INT64_MAX;
3409 #ifdef MODE_TEST_HIT_STATS
3410 // TEST/DEBUG CODE
3411 // Keep a rcord of the number of test hits at each size
3412 cpi->mode_test_hits[bsize]++;
3413 #endif
3415 if (ref_frame == INTRA_FRAME) {
3416 TX_SIZE uv_tx;
3417 intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3418 bsize, tx_cache, best_rd);
3420 if (rate_y == INT_MAX)
3421 continue;
3423 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize);
3424 if (rate_uv_intra[uv_tx] == INT_MAX) {
3425 choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
3426 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
3427 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
3430 rate_uv = rate_uv_tokenonly[uv_tx];
3431 distortion_uv = dist_uv[uv_tx];
3432 skippable = skippable && skip_uv[uv_tx];
3433 mbmi->uv_mode = mode_uv[uv_tx];
3435 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3436 if (this_mode != DC_PRED && this_mode != TM_PRED)
3437 rate2 += intra_cost_penalty;
3438 distortion2 = distortion_y + distortion_uv;
3439 } else {
3440 this_rd = handle_inter_mode(cpi, x, tile, bsize,
3441 tx_cache,
3442 &rate2, &distortion2, &skippable,
3443 &rate_y, &distortion_y,
3444 &rate_uv, &distortion_uv,
3445 &mode_excluded, &disable_skip,
3446 &tmp_best_filter, frame_mv,
3447 mi_row, mi_col,
3448 single_newmv, &total_sse, best_rd);
3449 if (this_rd == INT64_MAX)
3450 continue;
3452 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3454 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3455 rate2 += compmode_cost;
3458 // Estimate the reference frame signaling cost and add it
3459 // to the rolling cost variable.
3460 if (comp_pred) {
3461 rate2 += ref_costs_comp[ref_frame];
3462 } else {
3463 rate2 += ref_costs_single[ref_frame];
3466 if (!disable_skip) {
3467 // Test for the condition where skip block will be activated
3468 // because there are no non zero coefficients and make any
3469 // necessary adjustment for rate. Ignore if skip is coded at
3470 // segment level as the cost wont have been added in.
3471 // Is Mb level skip allowed (i.e. not coded at segment level).
3472 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3473 SEG_LVL_SKIP);
3475 if (skippable) {
3476 // Back out the coefficient coding costs
3477 rate2 -= (rate_y + rate_uv);
3478 // for best yrd calculation
3479 rate_uv = 0;
3481 if (mb_skip_allowed) {
3482 int prob_skip_cost;
3484 // Cost the skip mb case
3485 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
3486 if (skip_prob) {
3487 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3488 rate2 += prob_skip_cost;
3491 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3492 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3493 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3494 // Add in the cost of the no skip flag.
3495 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3496 } else {
3497 // FIXME(rbultje) make this work for splitmv also
3498 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3499 distortion2 = total_sse;
3500 assert(total_sse >= 0);
3501 rate2 -= (rate_y + rate_uv);
3502 rate_y = 0;
3503 rate_uv = 0;
3504 this_skip2 = 1;
3506 } else if (mb_skip_allowed) {
3507 // Add in the cost of the no skip flag.
3508 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3511 // Calculate the final RD estimate for this mode.
3512 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3515 if (ref_frame == INTRA_FRAME) {
3516 // Keep record of best intra rd
3517 if (this_rd < best_intra_rd) {
3518 best_intra_rd = this_rd;
3519 best_intra_mode = mbmi->mode;
3521 } else {
3522 // Keep record of best inter rd with single reference
3523 if (!comp_pred && !mode_excluded && this_rd < best_inter_rd) {
3524 best_inter_rd = this_rd;
3525 best_inter_ref_frame = ref_frame;
3529 if (!disable_skip && ref_frame == INTRA_FRAME) {
3530 for (i = 0; i < REFERENCE_MODES; ++i)
3531 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3532 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3533 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3536 // Store the respective mode distortions for later use.
3537 if (mode_distortions[this_mode] == -1
3538 || distortion2 < mode_distortions[this_mode]) {
3539 mode_distortions[this_mode] = distortion2;
3542 // Did this mode help.. i.e. is it the new best mode
3543 if (this_rd < best_rd || x->skip) {
3544 int max_plane = MAX_MB_PLANE;
3545 if (!mode_excluded) {
3546 // Note index of best mode so far
3547 best_mode_index = mode_index;
3549 if (ref_frame == INTRA_FRAME) {
3550 /* required for left and above block mv */
3551 mbmi->mv[0].as_int = 0;
3552 max_plane = 1;
3555 *returnrate = rate2;
3556 *returndistortion = distortion2;
3557 best_rd = this_rd;
3558 best_mbmode = *mbmi;
3559 best_skip2 = this_skip2;
3560 if (!x->select_txfm_size)
3561 swap_block_ptr(x, ctx, max_plane);
3562 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3563 sizeof(uint8_t) * ctx->num_4x4_blk);
3565 // TODO(debargha): enhance this test with a better distortion prediction
3566 // based on qp, activity mask and history
3567 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3568 (mode_index > MIN_EARLY_TERM_INDEX)) {
3569 const int qstep = xd->plane[0].dequant[1];
3570 // TODO(debargha): Enhance this by specializing for each mode_index
3571 int scale = 4;
3572 if (x->source_variance < UINT_MAX) {
3573 const int var_adjust = (x->source_variance < 16);
3574 scale -= var_adjust;
3576 if (ref_frame > INTRA_FRAME &&
3577 distortion2 * scale < qstep * qstep) {
3578 early_term = 1;
3584 /* keep record of best compound/single-only prediction */
3585 if (!disable_skip && ref_frame != INTRA_FRAME) {
3586 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3588 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3589 single_rate = rate2 - compmode_cost;
3590 hybrid_rate = rate2;
3591 } else {
3592 single_rate = rate2;
3593 hybrid_rate = rate2 + compmode_cost;
3596 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3597 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3599 if (!comp_pred) {
3600 if (single_rd < best_pred_rd[SINGLE_REFERENCE]) {
3601 best_pred_rd[SINGLE_REFERENCE] = single_rd;
3603 } else {
3604 if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
3605 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3608 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3609 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3611 /* keep record of best filter type */
3612 if (!mode_excluded && cm->interp_filter != BILINEAR) {
3613 int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
3614 SWITCHABLE_FILTERS : cm->interp_filter];
3616 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3617 int64_t adj_rd;
3618 if (ref == INT64_MAX)
3619 adj_rd = 0;
3620 else if (cpi->rd_filter_cache[i] == INT64_MAX)
3621 // when early termination is triggered, the encoder does not have
3622 // access to the rate-distortion cost. it only knows that the cost
3623 // should be above the maximum valid value. hence it takes the known
3624 // maximum plus an arbitrary constant as the rate-distortion cost.
3625 adj_rd = cpi->mask_filter_rd - ref + 10;
3626 else
3627 adj_rd = cpi->rd_filter_cache[i] - ref;
3629 adj_rd += this_rd;
3630 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3635 /* keep record of best txfm size */
3636 if (bsize < BLOCK_32X32) {
3637 if (bsize < BLOCK_16X16)
3638 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3640 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3642 if (!mode_excluded && this_rd != INT64_MAX) {
3643 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3644 int64_t adj_rd = INT64_MAX;
3645 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3647 if (adj_rd < best_tx_rd[i])
3648 best_tx_rd[i] = adj_rd;
3652 if (early_term)
3653 break;
3655 if (x->skip && !comp_pred)
3656 break;
3659 if (best_rd >= best_rd_so_far)
3660 return INT64_MAX;
3662 // If we used an estimate for the uv intra rd in the loop above...
3663 if (cpi->sf.use_uv_intra_rd_estimate) {
3664 // Do Intra UV best rd mode selection if best mode choice above was intra.
3665 if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
3666 TX_SIZE uv_tx_size;
3667 *mbmi = best_mbmode;
3668 uv_tx_size = get_uv_tx_size(mbmi);
3669 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3670 &rate_uv_tokenonly[uv_tx_size],
3671 &dist_uv[uv_tx_size],
3672 &skip_uv[uv_tx_size],
3673 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3674 uv_tx_size);
3678 assert((cm->interp_filter == SWITCHABLE) ||
3679 (cm->interp_filter == best_mbmode.interp_filter) ||
3680 !is_inter_block(&best_mbmode));
3682 // Updating rd_thresh_freq_fact[] here means that the different
3683 // partition/block sizes are handled independently based on the best
3684 // choice for the current partition. It may well be better to keep a scaled
3685 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3686 // combination that wins out.
3687 if (cpi->sf.adaptive_rd_thresh) {
3688 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3689 int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index];
3691 if (mode_index == best_mode_index) {
3692 *fact -= (*fact >> 3);
3693 } else {
3694 *fact = MIN(*fact + RD_THRESH_INC,
3695 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
3700 // macroblock modes
3701 *mbmi = best_mbmode;
3702 x->skip |= best_skip2;
3704 for (i = 0; i < REFERENCE_MODES; ++i) {
3705 if (best_pred_rd[i] == INT64_MAX)
3706 best_pred_diff[i] = INT_MIN;
3707 else
3708 best_pred_diff[i] = best_rd - best_pred_rd[i];
3711 if (!x->skip) {
3712 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3713 if (best_filter_rd[i] == INT64_MAX)
3714 best_filter_diff[i] = 0;
3715 else
3716 best_filter_diff[i] = best_rd - best_filter_rd[i];
3718 if (cm->interp_filter == SWITCHABLE)
3719 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3720 for (i = 0; i < TX_MODES; i++) {
3721 if (best_tx_rd[i] == INT64_MAX)
3722 best_tx_diff[i] = 0;
3723 else
3724 best_tx_diff[i] = best_rd - best_tx_rd[i];
3726 } else {
3727 vp9_zero(best_filter_diff);
3728 vp9_zero(best_tx_diff);
3731 if (!x->in_active_map) {
3732 assert(mbmi->ref_frame[0] == LAST_FRAME);
3733 assert(mbmi->ref_frame[1] == NONE);
3734 assert(mbmi->mode == NEARESTMV ||
3735 mbmi->mode == NEARMV ||
3736 mbmi->mode == ZEROMV);
3737 assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0);
3738 assert(mbmi->mode == mbmi->uv_mode);
3741 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3742 store_coding_context(x, ctx, best_mode_index,
3743 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3744 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3745 mbmi->ref_frame[1]][0],
3746 best_pred_diff, best_tx_diff, best_filter_diff);
3748 return best_rd;
3752 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3753 const TileInfo *const tile,
3754 int mi_row, int mi_col,
3755 int *returnrate,
3756 int64_t *returndistortion,
3757 BLOCK_SIZE bsize,
3758 PICK_MODE_CONTEXT *ctx,
3759 int64_t best_rd_so_far) {
3760 VP9_COMMON *cm = &cpi->common;
3761 MACROBLOCKD *xd = &x->e_mbd;
3762 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
3763 const struct segmentation *seg = &cm->seg;
3764 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3765 unsigned char segment_id = mbmi->segment_id;
3766 int comp_pred, i;
3767 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3768 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3769 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3770 VP9_ALT_FLAG };
3771 int64_t best_rd = best_rd_so_far;
3772 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3773 int64_t best_tx_rd[TX_MODES];
3774 int64_t best_tx_diff[TX_MODES];
3775 int64_t best_pred_diff[REFERENCE_MODES];
3776 int64_t best_pred_rd[REFERENCE_MODES];
3777 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3778 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3779 MB_MODE_INFO best_mbmode = { 0 };
3780 int mode_index, best_mode_index = 0;
3781 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3782 vp9_prob comp_mode_p;
3783 int64_t best_inter_rd = INT64_MAX;
3784 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3785 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3786 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3787 int64_t dist_uv[TX_SIZES];
3788 int skip_uv[TX_SIZES];
3789 MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 };
3790 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3791 int_mv seg_mvs[4][MAX_REF_FRAMES];
3792 b_mode_info best_bmodes[4];
3793 int best_skip2 = 0;
3794 int ref_frame_mask = 0;
3795 int mode_skip_mask = 0;
3797 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3798 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3800 for (i = 0; i < 4; i++) {
3801 int j;
3802 for (j = 0; j < MAX_REF_FRAMES; j++)
3803 seg_mvs[i][j].as_int = INVALID_MV;
3806 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3807 &comp_mode_p);
3809 for (i = 0; i < REFERENCE_MODES; ++i)
3810 best_pred_rd[i] = INT64_MAX;
3811 for (i = 0; i < TX_MODES; i++)
3812 best_tx_rd[i] = INT64_MAX;
3813 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3814 best_filter_rd[i] = INT64_MAX;
3815 for (i = 0; i < TX_SIZES; i++)
3816 rate_uv_intra[i] = INT_MAX;
3818 *returnrate = INT_MAX;
3820 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3821 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3822 vp9_setup_buffer_inter(cpi, x, tile,
3823 ref_frame, bsize, mi_row, mi_col,
3824 frame_mv[NEARESTMV], frame_mv[NEARMV],
3825 yv12_mb);
3827 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3828 frame_mv[ZEROMV][ref_frame].as_int = 0;
3831 for (ref_frame = LAST_FRAME;
3832 ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
3833 int i;
3834 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3835 if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) {
3836 ref_frame_mask |= (1 << ref_frame);
3837 break;
3842 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
3843 int mode_excluded = 0;
3844 int64_t this_rd = INT64_MAX;
3845 int disable_skip = 0;
3846 int compmode_cost = 0;
3847 int rate2 = 0, rate_y = 0, rate_uv = 0;
3848 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3849 int skippable = 0;
3850 int64_t tx_cache[TX_MODES];
3851 int i;
3852 int this_skip2 = 0;
3853 int64_t total_sse = INT_MAX;
3854 int early_term = 0;
3856 for (i = 0; i < TX_MODES; ++i)
3857 tx_cache[i] = INT64_MAX;
3859 x->skip = 0;
3860 ref_frame = vp9_ref_order[mode_index].ref_frame[0];
3861 second_ref_frame = vp9_ref_order[mode_index].ref_frame[1];
3863 // Look at the reference frame of the best mode so far and set the
3864 // skip mask to look at a subset of the remaining modes.
3865 if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3866 if (mode_index == 3) {
3867 switch (vp9_ref_order[best_mode_index].ref_frame[0]) {
3868 case INTRA_FRAME:
3869 mode_skip_mask = 0;
3870 break;
3871 case LAST_FRAME:
3872 mode_skip_mask = 0x0010;
3873 break;
3874 case GOLDEN_FRAME:
3875 mode_skip_mask = 0x0008;
3876 break;
3877 case ALTREF_FRAME:
3878 mode_skip_mask = 0x0000;
3879 break;
3880 case NONE:
3881 case MAX_REF_FRAMES:
3882 assert(0 && "Invalid Reference frame");
3885 if (mode_skip_mask & (1 << mode_index))
3886 continue;
3889 // Test best rd so far against threshold for trying this mode.
3890 if ((best_rd <
3891 ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] *
3892 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
3893 cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
3894 continue;
3896 // Do not allow compound prediction if the segment level reference
3897 // frame feature is in use as in this case there can only be one reference.
3898 if ((second_ref_frame > INTRA_FRAME) &&
3899 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3900 continue;
3902 mbmi->ref_frame[0] = ref_frame;
3903 mbmi->ref_frame[1] = second_ref_frame;
3905 if (!(ref_frame == INTRA_FRAME
3906 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3907 continue;
3909 if (!(second_ref_frame == NONE
3910 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3911 continue;
3914 comp_pred = second_ref_frame > INTRA_FRAME;
3915 if (comp_pred) {
3916 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3917 if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
3918 continue;
3919 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3920 if (ref_frame != best_inter_ref_frame &&
3921 second_ref_frame != best_inter_ref_frame)
3922 continue;
3925 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3926 // sub8x8 blocks.
3927 if (ref_frame > 0 && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3928 continue;
3930 if (second_ref_frame > 0 &&
3931 vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
3932 continue;
3934 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3935 mbmi->uv_mode = DC_PRED;
3937 // Evaluate all sub-pel filters irrespective of whether we can use
3938 // them for this frame.
3939 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3940 : cm->interp_filter;
3941 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
3943 if (comp_pred) {
3944 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3945 continue;
3947 mode_excluded = mode_excluded ? mode_excluded
3948 : cm->reference_mode == SINGLE_REFERENCE;
3949 } else {
3950 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
3951 mode_excluded = mode_excluded ?
3952 mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
3956 // Select prediction reference frames.
3957 for (i = 0; i < MAX_MB_PLANE; i++) {
3958 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3959 if (comp_pred)
3960 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3963 // If the segment reference frame feature is enabled....
3964 // then do nothing if the current ref frame is not allowed..
3965 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3966 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3967 (int)ref_frame) {
3968 continue;
3969 // If the segment skip feature is enabled....
3970 // then do nothing if the current mode is not allowed..
3971 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3972 ref_frame != INTRA_FRAME) {
3973 continue;
3974 // Disable this drop out case if the ref frame
3975 // segment level feature is enabled for this segment. This is to
3976 // prevent the possibility that we end up unable to pick any mode.
3977 } else if (!vp9_segfeature_active(seg, segment_id,
3978 SEG_LVL_REF_FRAME)) {
3979 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3980 // unless ARNR filtering is enabled in which case we want
3981 // an unfiltered alternative. We allow near/nearest as well
3982 // because they may result in zero-zero MVs but be cheaper.
3983 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3984 continue;
3987 #ifdef MODE_TEST_HIT_STATS
3988 // TEST/DEBUG CODE
3989 // Keep a rcord of the number of test hits at each size
3990 cpi->mode_test_hits[bsize]++;
3991 #endif
3993 if (ref_frame == INTRA_FRAME) {
3994 int rate;
3995 mbmi->tx_size = TX_4X4;
3996 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3997 &distortion_y, best_rd) >= best_rd)
3998 continue;
3999 rate2 += rate;
4000 rate2 += intra_cost_penalty;
4001 distortion2 += distortion_y;
4003 if (rate_uv_intra[TX_4X4] == INT_MAX) {
4004 choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
4005 &rate_uv_intra[TX_4X4],
4006 &rate_uv_tokenonly[TX_4X4],
4007 &dist_uv[TX_4X4], &skip_uv[TX_4X4],
4008 &mode_uv[TX_4X4]);
4010 rate2 += rate_uv_intra[TX_4X4];
4011 rate_uv = rate_uv_tokenonly[TX_4X4];
4012 distortion2 += dist_uv[TX_4X4];
4013 distortion_uv = dist_uv[TX_4X4];
4014 mbmi->uv_mode = mode_uv[TX_4X4];
4015 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4016 for (i = 0; i < TX_MODES; ++i)
4017 tx_cache[i] = tx_cache[ONLY_4X4];
4018 } else {
4019 int rate;
4020 int64_t distortion;
4021 int64_t this_rd_thresh;
4022 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
4023 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
4024 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
4025 int tmp_best_skippable = 0;
4026 int switchable_filter_index;
4027 int_mv *second_ref = comp_pred ?
4028 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
4029 b_mode_info tmp_best_bmodes[16];
4030 MB_MODE_INFO tmp_best_mbmode;
4031 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
4032 int pred_exists = 0;
4033 int uv_skippable;
4035 this_rd_thresh = (ref_frame == LAST_FRAME) ?
4036 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] :
4037 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
4038 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4039 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4040 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
4042 cpi->mask_filter_rd = 0;
4043 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4044 cpi->rd_filter_cache[i] = INT64_MAX;
4046 if (cm->interp_filter != BILINEAR) {
4047 tmp_best_filter = EIGHTTAP;
4048 if (x->source_variance <
4049 cpi->sf.disable_filter_search_var_thresh) {
4050 tmp_best_filter = EIGHTTAP;
4051 } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
4052 ctx->pred_interp_filter < SWITCHABLE) {
4053 tmp_best_filter = ctx->pred_interp_filter;
4054 } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
4055 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
4056 ctx->pred_interp_filter : 0;
4057 } else {
4058 for (switchable_filter_index = 0;
4059 switchable_filter_index < SWITCHABLE_FILTERS;
4060 ++switchable_filter_index) {
4061 int newbest, rs;
4062 int64_t rs_rd;
4063 mbmi->interp_filter = switchable_filter_index;
4064 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
4065 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4066 &mbmi->ref_mvs[ref_frame][0],
4067 second_ref,
4068 best_yrd,
4069 &rate, &rate_y, &distortion,
4070 &skippable, &total_sse,
4071 (int)this_rd_thresh, seg_mvs,
4072 bsi, switchable_filter_index,
4073 mi_row, mi_col);
4075 if (tmp_rd == INT64_MAX)
4076 continue;
4077 rs = get_switchable_rate(x);
4078 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4079 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
4080 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
4081 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
4082 tmp_rd + rs_rd);
4083 if (cm->interp_filter == SWITCHABLE)
4084 tmp_rd += rs_rd;
4086 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd);
4088 newbest = (tmp_rd < tmp_best_rd);
4089 if (newbest) {
4090 tmp_best_filter = mbmi->interp_filter;
4091 tmp_best_rd = tmp_rd;
4093 if ((newbest && cm->interp_filter == SWITCHABLE) ||
4094 (mbmi->interp_filter == cm->interp_filter &&
4095 cm->interp_filter != SWITCHABLE)) {
4096 tmp_best_rdu = tmp_rd;
4097 tmp_best_rate = rate;
4098 tmp_best_ratey = rate_y;
4099 tmp_best_distortion = distortion;
4100 tmp_best_sse = total_sse;
4101 tmp_best_skippable = skippable;
4102 tmp_best_mbmode = *mbmi;
4103 for (i = 0; i < 4; i++) {
4104 tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4105 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4107 pred_exists = 1;
4108 if (switchable_filter_index == 0 &&
4109 cpi->sf.use_rd_breakout &&
4110 best_rd < INT64_MAX) {
4111 if (tmp_best_rdu / 2 > best_rd) {
4112 // skip searching the other filters if the first is
4113 // already substantially larger than the best so far
4114 tmp_best_filter = mbmi->interp_filter;
4115 tmp_best_rdu = INT64_MAX;
4116 break;
4120 } // switchable_filter_index loop
4124 if (tmp_best_rdu == INT64_MAX && pred_exists)
4125 continue;
4127 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
4128 tmp_best_filter : cm->interp_filter);
4129 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
4130 if (!pred_exists) {
4131 // Handles the special case when a filter that is not in the
4132 // switchable list (bilinear, 6-tap) is indicated at the frame level
4133 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4134 &mbmi->ref_mvs[ref_frame][0],
4135 second_ref,
4136 best_yrd,
4137 &rate, &rate_y, &distortion,
4138 &skippable, &total_sse,
4139 (int)this_rd_thresh, seg_mvs,
4140 bsi, 0,
4141 mi_row, mi_col);
4142 if (tmp_rd == INT64_MAX)
4143 continue;
4144 } else {
4145 total_sse = tmp_best_sse;
4146 rate = tmp_best_rate;
4147 rate_y = tmp_best_ratey;
4148 distortion = tmp_best_distortion;
4149 skippable = tmp_best_skippable;
4150 *mbmi = tmp_best_mbmode;
4151 for (i = 0; i < 4; i++)
4152 xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i];
4155 rate2 += rate;
4156 distortion2 += distortion;
4158 if (cm->interp_filter == SWITCHABLE)
4159 rate2 += get_switchable_rate(x);
4161 if (!mode_excluded)
4162 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4163 : cm->reference_mode == COMPOUND_REFERENCE;
4165 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4167 tmp_best_rdu = best_rd -
4168 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4169 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4171 if (tmp_best_rdu > 0) {
4172 // If even the 'Y' rd value of split is higher than best so far
4173 // then dont bother looking at UV
4174 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4175 BLOCK_8X8);
4176 super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4177 &uv_sse, BLOCK_8X8, tmp_best_rdu);
4178 if (rate_uv == INT_MAX)
4179 continue;
4180 rate2 += rate_uv;
4181 distortion2 += distortion_uv;
4182 skippable = skippable && uv_skippable;
4183 total_sse += uv_sse;
4185 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4186 for (i = 0; i < TX_MODES; ++i)
4187 tx_cache[i] = tx_cache[ONLY_4X4];
4191 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4192 rate2 += compmode_cost;
4194 // Estimate the reference frame signaling cost and add it
4195 // to the rolling cost variable.
4196 if (second_ref_frame > INTRA_FRAME) {
4197 rate2 += ref_costs_comp[ref_frame];
4198 } else {
4199 rate2 += ref_costs_single[ref_frame];
4202 if (!disable_skip) {
4203 // Test for the condition where skip block will be activated
4204 // because there are no non zero coefficients and make any
4205 // necessary adjustment for rate. Ignore if skip is coded at
4206 // segment level as the cost wont have been added in.
4207 // Is Mb level skip allowed (i.e. not coded at segment level).
4208 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
4209 SEG_LVL_SKIP);
4211 if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
4212 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4213 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4214 // Add in the cost of the no skip flag.
4215 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4216 } else {
4217 // FIXME(rbultje) make this work for splitmv also
4218 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
4219 distortion2 = total_sse;
4220 assert(total_sse >= 0);
4221 rate2 -= (rate_y + rate_uv);
4222 rate_y = 0;
4223 rate_uv = 0;
4224 this_skip2 = 1;
4226 } else if (mb_skip_allowed) {
4227 // Add in the cost of the no skip flag.
4228 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4231 // Calculate the final RD estimate for this mode.
4232 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4235 // Keep record of best inter rd with single reference
4236 if (is_inter_block(&xd->mi_8x8[0]->mbmi) &&
4237 !has_second_ref(&xd->mi_8x8[0]->mbmi) &&
4238 !mode_excluded &&
4239 this_rd < best_inter_rd) {
4240 best_inter_rd = this_rd;
4241 best_inter_ref_frame = ref_frame;
4244 if (!disable_skip && ref_frame == INTRA_FRAME) {
4245 for (i = 0; i < REFERENCE_MODES; ++i)
4246 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
4247 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4248 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
4251 // Did this mode help.. i.e. is it the new best mode
4252 if (this_rd < best_rd || x->skip) {
4253 if (!mode_excluded) {
4254 int max_plane = MAX_MB_PLANE;
4255 // Note index of best mode so far
4256 best_mode_index = mode_index;
4258 if (ref_frame == INTRA_FRAME) {
4259 /* required for left and above block mv */
4260 mbmi->mv[0].as_int = 0;
4261 max_plane = 1;
4264 *returnrate = rate2;
4265 *returndistortion = distortion2;
4266 best_rd = this_rd;
4267 best_yrd = best_rd -
4268 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4269 best_mbmode = *mbmi;
4270 best_skip2 = this_skip2;
4271 if (!x->select_txfm_size)
4272 swap_block_ptr(x, ctx, max_plane);
4273 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
4274 sizeof(uint8_t) * ctx->num_4x4_blk);
4276 for (i = 0; i < 4; i++)
4277 best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4279 // TODO(debargha): enhance this test with a better distortion prediction
4280 // based on qp, activity mask and history
4281 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4282 (mode_index > MIN_EARLY_TERM_INDEX)) {
4283 const int qstep = xd->plane[0].dequant[1];
4284 // TODO(debargha): Enhance this by specializing for each mode_index
4285 int scale = 4;
4286 if (x->source_variance < UINT_MAX) {
4287 const int var_adjust = (x->source_variance < 16);
4288 scale -= var_adjust;
4290 if (ref_frame > INTRA_FRAME &&
4291 distortion2 * scale < qstep * qstep) {
4292 early_term = 1;
4298 /* keep record of best compound/single-only prediction */
4299 if (!disable_skip && ref_frame != INTRA_FRAME) {
4300 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4302 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4303 single_rate = rate2 - compmode_cost;
4304 hybrid_rate = rate2;
4305 } else {
4306 single_rate = rate2;
4307 hybrid_rate = rate2 + compmode_cost;
4310 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4311 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4313 if (second_ref_frame <= INTRA_FRAME &&
4314 single_rd < best_pred_rd[SINGLE_REFERENCE]) {
4315 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4316 } else if (second_ref_frame > INTRA_FRAME &&
4317 single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
4318 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4320 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4321 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4324 /* keep record of best filter type */
4325 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4326 cm->interp_filter != BILINEAR) {
4327 int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
4328 SWITCHABLE_FILTERS : cm->interp_filter];
4329 int64_t adj_rd;
4330 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4331 if (ref == INT64_MAX)
4332 adj_rd = 0;
4333 else if (cpi->rd_filter_cache[i] == INT64_MAX)
4334 // when early termination is triggered, the encoder does not have
4335 // access to the rate-distortion cost. it only knows that the cost
4336 // should be above the maximum valid value. hence it takes the known
4337 // maximum plus an arbitrary constant as the rate-distortion cost.
4338 adj_rd = cpi->mask_filter_rd - ref + 10;
4339 else
4340 adj_rd = cpi->rd_filter_cache[i] - ref;
4342 adj_rd += this_rd;
4343 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
4347 /* keep record of best txfm size */
4348 if (bsize < BLOCK_32X32) {
4349 if (bsize < BLOCK_16X16) {
4350 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
4351 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
4353 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
4355 if (!mode_excluded && this_rd != INT64_MAX) {
4356 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
4357 int64_t adj_rd = INT64_MAX;
4358 if (ref_frame > INTRA_FRAME)
4359 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
4360 else
4361 adj_rd = this_rd;
4363 if (adj_rd < best_tx_rd[i])
4364 best_tx_rd[i] = adj_rd;
4368 if (early_term)
4369 break;
4371 if (x->skip && !comp_pred)
4372 break;
4375 if (best_rd >= best_rd_so_far)
4376 return INT64_MAX;
4378 // If we used an estimate for the uv intra rd in the loop above...
4379 if (cpi->sf.use_uv_intra_rd_estimate) {
4380 // Do Intra UV best rd mode selection if best mode choice above was intra.
4381 if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
4382 TX_SIZE uv_tx_size;
4383 *mbmi = best_mbmode;
4384 uv_tx_size = get_uv_tx_size(mbmi);
4385 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
4386 &rate_uv_tokenonly[uv_tx_size],
4387 &dist_uv[uv_tx_size],
4388 &skip_uv[uv_tx_size],
4389 BLOCK_8X8, uv_tx_size);
4393 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
4394 *returnrate = INT_MAX;
4395 *returndistortion = INT64_MAX;
4396 return best_rd;
4399 assert((cm->interp_filter == SWITCHABLE) ||
4400 (cm->interp_filter == best_mbmode.interp_filter) ||
4401 !is_inter_block(&best_mbmode));
4403 // Updating rd_thresh_freq_fact[] here means that the different
4404 // partition/block sizes are handled independently based on the best
4405 // choice for the current partition. It may well be better to keep a scaled
4406 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
4407 // combination that wins out.
4408 if (cpi->sf.adaptive_rd_thresh) {
4409 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
4410 int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index];
4412 if (mode_index == best_mode_index) {
4413 *fact -= (*fact >> 3);
4414 } else {
4415 *fact = MIN(*fact + RD_THRESH_INC,
4416 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
4421 // macroblock modes
4422 *mbmi = best_mbmode;
4423 x->skip |= best_skip2;
4424 if (!is_inter_block(&best_mbmode)) {
4425 for (i = 0; i < 4; i++)
4426 xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4427 } else {
4428 for (i = 0; i < 4; ++i)
4429 vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4431 mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int;
4432 mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int;
4435 for (i = 0; i < REFERENCE_MODES; ++i) {
4436 if (best_pred_rd[i] == INT64_MAX)
4437 best_pred_diff[i] = INT_MIN;
4438 else
4439 best_pred_diff[i] = best_rd - best_pred_rd[i];
4442 if (!x->skip) {
4443 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4444 if (best_filter_rd[i] == INT64_MAX)
4445 best_filter_diff[i] = 0;
4446 else
4447 best_filter_diff[i] = best_rd - best_filter_rd[i];
4449 if (cm->interp_filter == SWITCHABLE)
4450 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4451 } else {
4452 vp9_zero(best_filter_diff);
4455 if (!x->skip) {
4456 for (i = 0; i < TX_MODES; i++) {
4457 if (best_tx_rd[i] == INT64_MAX)
4458 best_tx_diff[i] = 0;
4459 else
4460 best_tx_diff[i] = best_rd - best_tx_rd[i];
4462 } else {
4463 vp9_zero(best_tx_diff);
4466 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4467 store_coding_context(x, ctx, best_mode_index,
4468 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4469 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4470 mbmi->ref_frame[1]][0],
4471 best_pred_diff, best_tx_diff, best_filter_diff);
4473 return best_rd;