vp9/encoder/vp9_rdopt.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12 #include <limits.h>
  13 #include <math.h>
  14 #include <stdio.h>
  15
  16 #include "./vp9_rtcd.h"
  17
  18 #include "vpx_mem/vpx_mem.h"
  19
  20 #include "vp9/common/vp9_common.h"
  21 #include "vp9/common/vp9_entropy.h"
  22 #include "vp9/common/vp9_entropymode.h"
  23 #include "vp9/common/vp9_idct.h"
  24 #include "vp9/common/vp9_mvref_common.h"
  25 #include "vp9/common/vp9_pragmas.h"
  26 #include "vp9/common/vp9_pred_common.h"
  27 #include "vp9/common/vp9_quant_common.h"
  28 #include "vp9/common/vp9_reconinter.h"
  29 #include "vp9/common/vp9_reconintra.h"
  30 #include "vp9/common/vp9_seg_common.h"
  31 #include "vp9/common/vp9_systemdependent.h"
  32
  33 #include "vp9/encoder/vp9_cost.h"
  34 #include "vp9/encoder/vp9_encodemb.h"
  35 #include "vp9/encoder/vp9_encodemv.h"
  36 #include "vp9/encoder/vp9_mcomp.h"
  37 #include "vp9/encoder/vp9_onyx_int.h"
  38 #include "vp9/encoder/vp9_quantize.h"
  39 #include "vp9/encoder/vp9_ratectrl.h"
  40 #include "vp9/encoder/vp9_rdopt.h"
  41 #include "vp9/encoder/vp9_tokenize.h"
  42 #include "vp9/encoder/vp9_variance.h"
  43
  44 #define RD_THRESH_MAX_FACT 64
  45 #define RD_THRESH_INC      1
  46 #define RD_THRESH_POW      1.25
  47 #define RD_MULT_EPB_RATIO  64
  48
  49 /* Factor to weigh the rate for switchable interp filters */
  50 #define SWITCHABLE_INTERP_RATE_FACTOR 1
  51
  52 #define LAST_FRAME_MODE_MASK    0xFFEDCD60
  53 #define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
  54 #define ALT_REF_MODE_MASK       0xFFC648D0
  55
  56 #define MIN_EARLY_TERM_INDEX    3
  57
  58 typedef struct {
  59   MB_PREDICTION_MODE mode;
  60   MV_REFERENCE_FRAME ref_frame[2];
  61 } MODE_DEFINITION;
  62
  63 typedef struct {
  64   MV_REFERENCE_FRAME ref_frame[2];
  65 } REF_DEFINITION;
  66
  67 struct rdcost_block_args {
  68   MACROBLOCK *x;
  69   ENTROPY_CONTEXT t_above[16];
  70   ENTROPY_CONTEXT t_left[16];
  71   int rate;
  72   int64_t dist;
  73   int64_t sse;
  74   int this_rate;
  75   int64_t this_dist;
  76   int64_t this_sse;
  77   int64_t this_rd;
  78   int64_t best_rd;
  79   int skip;
  80   int use_fast_coef_costing;
  81   const scan_order *so;
  82 };
  83
  84 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
  85   {NEARESTMV, {LAST_FRAME,   NONE}},
  86   {NEARESTMV, {ALTREF_FRAME, NONE}},
  87   {NEARESTMV, {GOLDEN_FRAME, NONE}},
  88
  89   {DC_PRED,   {INTRA_FRAME,  NONE}},
  90
  91   {NEWMV,     {LAST_FRAME,   NONE}},
  92   {NEWMV,     {ALTREF_FRAME, NONE}},
  93   {NEWMV,     {GOLDEN_FRAME, NONE}},
  94
  95   {NEARMV,    {LAST_FRAME,   NONE}},
  96   {NEARMV,    {ALTREF_FRAME, NONE}},
  97   {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  98   {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
  99
 100   {TM_PRED,   {INTRA_FRAME,  NONE}},
 101
 102   {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
 103   {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
 104   {NEARMV,    {GOLDEN_FRAME, NONE}},
 105   {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
 106   {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},
 107
 108   {ZEROMV,    {LAST_FRAME,   NONE}},
 109   {ZEROMV,    {GOLDEN_FRAME, NONE}},
 110   {ZEROMV,    {ALTREF_FRAME, NONE}},
 111   {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
 112   {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
 113
 114   {H_PRED,    {INTRA_FRAME,  NONE}},
 115   {V_PRED,    {INTRA_FRAME,  NONE}},
 116   {D135_PRED, {INTRA_FRAME,  NONE}},
 117   {D207_PRED, {INTRA_FRAME,  NONE}},
 118   {D153_PRED, {INTRA_FRAME,  NONE}},
 119   {D63_PRED,  {INTRA_FRAME,  NONE}},
 120   {D117_PRED, {INTRA_FRAME,  NONE}},
 121   {D45_PRED,  {INTRA_FRAME,  NONE}},
 122 };
 123
 124 const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
 125   {{LAST_FRAME,   NONE}},
 126   {{GOLDEN_FRAME, NONE}},
 127   {{ALTREF_FRAME, NONE}},
 128   {{LAST_FRAME,   ALTREF_FRAME}},
 129   {{GOLDEN_FRAME, ALTREF_FRAME}},
 130   {{INTRA_FRAME,  NONE}},
 131 };
 132
 133 // The baseline rd thresholds for breaking out of the rd loop for
 134 // certain modes are assumed to be based on 8x8 blocks.
 135 // This table is used to correct for blocks size.
 136 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
 137 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
 138   {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
 139
 140 static int raster_block_offset(BLOCK_SIZE plane_bsize,
 141                                int raster_block, int stride) {
 142   const int bw = b_width_log2(plane_bsize);
 143   const int y = 4 * (raster_block >> bw);
 144   const int x = 4 * (raster_block & ((1 << bw) - 1));
 145   return y * stride + x;
 146 }
 147 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
 148                                           int raster_block, int16_t *base) {
 149   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 150   return base + raster_block_offset(plane_bsize, raster_block, stride);
 151 }
 152
 153 static void fill_mode_costs(VP9_COMP *cpi) {
 154   MACROBLOCK *const x = &cpi->mb;
 155   const FRAME_CONTEXT *const fc = &cpi->common.fc;
 156   int i, j;
 157
 158   for (i = 0; i < INTRA_MODES; i++)
 159     for (j = 0; j < INTRA_MODES; j++)
 160       vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
 161                       vp9_intra_mode_tree);
 162
 163   // TODO(rbultje) separate tables for superblock costing?
 164   vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
 165   vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME],
 166                   vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
 167   vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME],
 168                   fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
 169
 170   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
 171     vp9_cost_tokens((int *)x->switchable_interp_costs[i],
 172                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
 173 }
 174
 175 static void fill_token_costs(vp9_coeff_cost *c,
 176                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
 177   int i, j, k, l;
 178   TX_SIZE t;
 179   for (t = TX_4X4; t <= TX_32X32; ++t)
 180     for (i = 0; i < PLANE_TYPES; ++i)
 181       for (j = 0; j < REF_TYPES; ++j)
 182         for (k = 0; k < COEF_BANDS; ++k)
 183           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
 184             vp9_prob probs[ENTROPY_NODES];
 185             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
 186             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
 187                             vp9_coef_tree);
 188             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
 189                                  vp9_coef_tree);
 190             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
 191                    c[t][i][j][k][1][l][EOB_TOKEN]);
 192           }
 193 }
 194
 195 static const int rd_iifactor[32] = {
 196   4, 4, 3, 2, 1, 0, 0, 0,
 197   0, 0, 0, 0, 0, 0, 0, 0,
 198   0, 0, 0, 0, 0, 0, 0, 0,
 199   0, 0, 0, 0, 0, 0, 0, 0,
 200 };
 201
 202 // 3* dc_qlookup[Q]*dc_qlookup[Q];
 203
 204 /* values are now correlated to quantizer */
 205 static int sad_per_bit16lut[QINDEX_RANGE];
 206 static int sad_per_bit4lut[QINDEX_RANGE];
 207
 208 void vp9_init_me_luts() {
 209   int i;
 210
 211   // Initialize the sad lut tables using a formulaic calculation for now
 212   // This is to make it easier to resolve the impact of experimental changes
 213   // to the quantizer tables.
 214   for (i = 0; i < QINDEX_RANGE; i++) {
 215     const double q = vp9_convert_qindex_to_q(i);
 216     sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
 217     sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
 218   }
 219 }
 220
 221 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
 222   const int q = vp9_dc_quant(qindex, 0);
 223   // TODO(debargha): Adjust the function below
 224   int rdmult = 88 * q * q / 25;
 225   if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
 226     if (cpi->twopass.next_iiratio > 31)
 227       rdmult += (rdmult * rd_iifactor[31]) >> 4;
 228     else
 229       rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
 230   }
 231   return rdmult;
 232 }
 233
 234 static int compute_rd_thresh_factor(int qindex) {
 235   // TODO(debargha): Adjust the function below
 236   const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
 237   return MAX(q, 8);
 238 }
 239
 240 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
 241   cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
 242   cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
 243 }
 244
 245 static void set_block_thresholds(VP9_COMP *cpi) {
 246   const VP9_COMMON *const cm = &cpi->common;
 247   int i, bsize, segment_id;
 248
 249   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
 250     const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
 251                                             cm->base_qindex) + cm->y_dc_delta_q,
 252                              0, MAXQ);
 253     const int q = compute_rd_thresh_factor(qindex);
 254
 255     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
 256       // Threshold here seems unnecessarily harsh but fine given actual
 257       // range of values used for cpi->sf.thresh_mult[].
 258       const int t = q * rd_thresh_block_size_factor[bsize];
 259       const int thresh_max = INT_MAX / t;
 260
 261       for (i = 0; i < MAX_MODES; ++i)
 262         cpi->rd_threshes[segment_id][bsize][i] =
 263             cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4
 264                                             : INT_MAX;
 265
 266       for (i = 0; i < MAX_REFS; ++i) {
 267         cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
 268             cpi->rd_thresh_mult_sub8x8[i] < thresh_max
 269                 ? cpi->rd_thresh_mult_sub8x8[i] * t / 4
 270                 : INT_MAX;
 271       }
 272     }
 273   }
 274 }
 275
 276 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
 277   VP9_COMMON *const cm = &cpi->common;
 278   MACROBLOCK *const x = &cpi->mb;
 279   int i;
 280
 281   vp9_clear_system_state();
 282
 283   cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
 284   cpi->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
 285
 286   x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
 287   x->errorperbit += (x->errorperbit == 0);
 288
 289   x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
 290                          cm->frame_type != KEY_FRAME) ? 0 : 1;
 291
 292   set_block_thresholds(cpi);
 293
 294   if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
 295     fill_token_costs(x->token_costs, cm->fc.coef_probs);
 296
 297     for (i = 0; i < PARTITION_CONTEXTS; i++)
 298       vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
 299                       vp9_partition_tree);
 300   }
 301
 302   if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
 303       cm->frame_type == KEY_FRAME) {
 304     fill_mode_costs(cpi);
 305
 306     if (!frame_is_intra_only(cm)) {
 307       vp9_build_nmv_cost_table(x->nmvjointcost,
 308                                cm->allow_high_precision_mv ? x->nmvcost_hp
 309                                                            : x->nmvcost,
 310                                &cm->fc.nmvc, cm->allow_high_precision_mv);
 311
 312       for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
 313         vp9_cost_tokens((int *)x->inter_mode_cost[i],
 314                         cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
 315     }
 316   }
 317 }
 318
 319 static const int MAX_XSQ_Q10 = 245727;
 320
 321 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
 322   // NOTE: The tables below must be of the same size
 323
 324   // The functions described below are sampled at the four most significant
 325   // bits of x^2 + 8 / 256
 326
 327   // Normalized rate
 328   // This table models the rate for a Laplacian source
 329   // source with given variance when quantized with a uniform quantizer
 330   // with given stepsize. The closed form expression is:
 331   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
 332   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
 333   // and H(x) is the binary entropy function.
 334   static const int rate_tab_q10[] = {
 335     65536,  6086,  5574,  5275,  5063,  4899,  4764,  4651,
 336      4553,  4389,  4255,  4142,  4044,  3958,  3881,  3811,
 337      3748,  3635,  3538,  3453,  3376,  3307,  3244,  3186,
 338      3133,  3037,  2952,  2877,  2809,  2747,  2690,  2638,
 339      2589,  2501,  2423,  2353,  2290,  2232,  2179,  2130,
 340      2084,  2001,  1928,  1862,  1802,  1748,  1698,  1651,
 341      1608,  1530,  1460,  1398,  1342,  1290,  1243,  1199,
 342      1159,  1086,  1021,   963,   911,   864,   821,   781,
 343       745,   680,   623,   574,   530,   490,   455,   424,
 344       395,   345,   304,   269,   239,   213,   190,   171,
 345       154,   126,   104,    87,    73,    61,    52,    44,
 346        38,    28,    21,    16,    12,    10,     8,     6,
 347         5,     3,     2,     1,     1,     1,     0,     0,
 348   };
 349   // Normalized distortion
 350   // This table models the normalized distortion for a Laplacian source
 351   // source with given variance when quantized with a uniform quantizer
 352   // with given stepsize. The closed form expression is:
 353   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
 354   // where x = qpstep / sqrt(variance)
 355   // Note the actual distortion is Dn * variance.
 356   static const int dist_tab_q10[] = {
 357        0,     0,     1,     1,     1,     2,     2,     2,
 358        3,     3,     4,     5,     5,     6,     7,     7,
 359        8,     9,    11,    12,    13,    15,    16,    17,
 360       18,    21,    24,    26,    29,    31,    34,    36,
 361       39,    44,    49,    54,    59,    64,    69,    73,
 362       78,    88,    97,   106,   115,   124,   133,   142,
 363      151,   167,   184,   200,   215,   231,   245,   260,
 364      274,   301,   327,   351,   375,   397,   418,   439,
 365      458,   495,   528,   559,   587,   613,   637,   659,
 366      680,   717,   749,   777,   801,   823,   842,   859,
 367      874,   899,   919,   936,   949,   960,   969,   977,
 368      983,   994,  1001,  1006,  1010,  1013,  1015,  1017,
 369     1018,  1020,  1022,  1022,  1023,  1023,  1023,  1024,
 370   };
 371   static const int xsq_iq_q10[] = {
 372          0,      4,      8,     12,     16,     20,     24,     28,
 373         32,     40,     48,     56,     64,     72,     80,     88,
 374         96,    112,    128,    144,    160,    176,    192,    208,
 375        224,    256,    288,    320,    352,    384,    416,    448,
 376        480,    544,    608,    672,    736,    800,    864,    928,
 377        992,   1120,   1248,   1376,   1504,   1632,   1760,   1888,
 378       2016,   2272,   2528,   2784,   3040,   3296,   3552,   3808,
 379       4064,   4576,   5088,   5600,   6112,   6624,   7136,   7648,
 380       8160,   9184,  10208,  11232,  12256,  13280,  14304,  15328,
 381      16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,
 382      32736,  36832,  40928,  45024,  49120,  53216,  57312,  61408,
 383      65504,  73696,  81888,  90080,  98272, 106464, 114656, 122848,
 384     131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
 385   };
 386   /*
 387   static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
 388   assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
 389   assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
 390   assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
 391   */
 392   int tmp = (xsq_q10 >> 2) + 8;
 393   int k = get_msb(tmp) - 3;
 394   int xq = (k << 3) + ((tmp >> k) & 0x7);
 395   const int one_q10 = 1 << 10;
 396   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
 397   const int b_q10 = one_q10 - a_q10;
 398   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
 399   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
 400 }
 401
 402 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
 403                                   unsigned int qstep, int *rate,
 404                                   int64_t *dist) {
 405   // This function models the rate and distortion for a Laplacian
 406   // source with given variance when quantized with a uniform quantizer
 407   // with given stepsize. The closed form expressions are in:
 408   // Hang and Chen, "Source Model for transform video coder and its
 409   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
 410   // Sys. for Video Tech., April 1997.
 411   if (var == 0) {
 412     *rate = 0;
 413     *dist = 0;
 414   } else {
 415     int d_q10, r_q10;
 416     const uint64_t xsq_q10_64 =
 417         ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
 418     const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ?
 419                         MAX_XSQ_Q10 : (int)xsq_q10_64;
 420     model_rd_norm(xsq_q10, &r_q10, &d_q10);
 421     *rate = (n * r_q10 + 2) >> 2;
 422     *dist = (var * (int64_t)d_q10 + 512) >> 10;
 423   }
 424 }
 425
 426 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
 427                             MACROBLOCK *x, MACROBLOCKD *xd,
 428                             int *out_rate_sum, int64_t *out_dist_sum) {
 429   // Note our transform coeffs are 8 times an orthogonal transform.
 430   // Hence quantizer step is also 8 times. To get effective quantizer
 431   // we need to divide by 8 before sending to modeling function.
 432   int i;
 433   int64_t rate_sum = 0;
 434   int64_t dist_sum = 0;
 435   const int ref = xd->mi_8x8[0]->mbmi.ref_frame[0];
 436   unsigned int sse;
 437
 438   for (i = 0; i < MAX_MB_PLANE; ++i) {
 439     struct macroblock_plane *const p = &x->plane[i];
 440     struct macroblockd_plane *const pd = &xd->plane[i];
 441     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
 442
 443     (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
 444                               pd->dst.buf, pd->dst.stride, &sse);
 445
 446     if (i == 0)
 447       x->pred_sse[ref] = sse;
 448
 449     // Fast approximate the modelling function.
 450     if (cpi->speed > 4) {
 451       int64_t rate;
 452       int64_t dist;
 453       int64_t square_error = sse;
 454       int quantizer = (pd->dequant[1] >> 3);
 455
 456       if (quantizer < 120)
 457         rate = (square_error * (280 - quantizer)) >> 8;
 458       else
 459         rate = 0;
 460       dist = (square_error * quantizer) >> 8;
 461       rate_sum += rate;
 462       dist_sum += dist;
 463     } else {
 464       int rate;
 465       int64_t dist;
 466       vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
 467                                    pd->dequant[1] >> 3, &rate, &dist);
 468       rate_sum += rate;
 469       dist_sum += dist;
 470     }
 471   }
 472
 473   *out_rate_sum = (int)rate_sum;
 474   *out_dist_sum = dist_sum << 4;
 475 }
 476
 477 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
 478                                  TX_SIZE tx_size,
 479                                  MACROBLOCK *x, MACROBLOCKD *xd,
 480                                  int *out_rate_sum, int64_t *out_dist_sum,
 481                                  int *out_skip) {
 482   int j, k;
 483   BLOCK_SIZE bs;
 484   const struct macroblock_plane *const p = &x->plane[0];
 485   const struct macroblockd_plane *const pd = &xd->plane[0];
 486   const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
 487   const int height = 4 * num_4x4_blocks_high_lookup[bsize];
 488   int rate_sum = 0;
 489   int64_t dist_sum = 0;
 490   const int t = 4 << tx_size;
 491
 492   if (tx_size == TX_4X4) {
 493     bs = BLOCK_4X4;
 494   } else if (tx_size == TX_8X8) {
 495     bs = BLOCK_8X8;
 496   } else if (tx_size == TX_16X16) {
 497     bs = BLOCK_16X16;
 498   } else if (tx_size == TX_32X32) {
 499     bs = BLOCK_32X32;
 500   } else {
 501     assert(0);
 502   }
 503
 504   *out_skip = 1;
 505   for (j = 0; j < height; j += t) {
 506     for (k = 0; k < width; k += t) {
 507       int rate;
 508       int64_t dist;
 509       unsigned int sse;
 510       cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
 511                          &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
 512                          &sse);
 513       // sse works better than var, since there is no dc prediction used
 514       vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
 515                                    &rate, &dist);
 516       rate_sum += rate;
 517       dist_sum += dist;
 518       *out_skip &= (rate < 1024);
 519     }
 520   }
 521
 522   *out_rate_sum = rate_sum;
 523   *out_dist_sum = dist_sum << 4;
 524 }
 525
 526 int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff,
 527                           intptr_t block_size, int64_t *ssz) {
 528   int i;
 529   int64_t error = 0, sqcoeff = 0;
 530
 531   for (i = 0; i < block_size; i++) {
 532     const int diff = coeff[i] - dqcoeff[i];
 533     error +=  diff * diff;
 534     sqcoeff += coeff[i] * coeff[i];
 535   }
 536
 537   *ssz = sqcoeff;
 538   return error;
 539 }
 540
 541 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
 542  * decide whether to include cost of a trailing EOB node or not (i.e. we
 543  * can skip this if the last coefficient in this transform block, e.g. the
 544  * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 545  * were non-zero). */
 546 static const int16_t band_counts[TX_SIZES][8] = {
 547   { 1, 2, 3, 4,  3,   16 - 13, 0 },
 548   { 1, 2, 3, 4, 11,   64 - 21, 0 },
 549   { 1, 2, 3, 4, 11,  256 - 21, 0 },
 550   { 1, 2, 3, 4, 11, 1024 - 21, 0 },
 551 };
 552 static INLINE int cost_coeffs(MACROBLOCK *x,
 553                               int plane, int block,
 554                               ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
 555                               TX_SIZE tx_size,
 556                               const int16_t *scan, const int16_t *nb,
 557                               int use_fast_coef_costing) {
 558   MACROBLOCKD *const xd = &x->e_mbd;
 559   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
 560   const struct macroblock_plane *p = &x->plane[plane];
 561   const struct macroblockd_plane *pd = &xd->plane[plane];
 562   const PLANE_TYPE type = pd->plane_type;
 563   const int16_t *band_count = &band_counts[tx_size][1];
 564   const int eob = p->eobs[block];
 565   const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
 566   unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
 567                    x->token_costs[tx_size][type][is_inter_block(mbmi)];
 568   uint8_t token_cache[32 * 32];
 569   int pt = combine_entropy_contexts(*A, *L);
 570   int c, cost;
 571   // Check for consistency of tx_size with mode info
 572   assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
 573                               : get_uv_tx_size(mbmi) == tx_size);
 574
 575   if (eob == 0) {
 576     // single eob token
 577     cost = token_costs[0][0][pt][EOB_TOKEN];
 578     c = 0;
 579   } else {
 580     int band_left = *band_count++;
 581
 582     // dc token
 583     int v = qcoeff[0];
 584     int prev_t = vp9_dct_value_tokens_ptr[v].token;
 585     cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
 586     token_cache[0] = vp9_pt_energy_class[prev_t];
 587     ++token_costs;
 588
 589     // ac tokens
 590     for (c = 1; c < eob; c++) {
 591       const int rc = scan[c];
 592       int t;
 593
 594       v = qcoeff[rc];
 595       t = vp9_dct_value_tokens_ptr[v].token;
 596       if (use_fast_coef_costing) {
 597         cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
 598       } else {
 599         pt = get_coef_context(nb, token_cache, c);
 600         cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
 601         token_cache[rc] = vp9_pt_energy_class[t];
 602       }
 603       prev_t = t;
 604       if (!--band_left) {
 605         band_left = *band_count++;
 606         ++token_costs;
 607       }
 608     }
 609
 610     // eob token
 611     if (band_left) {
 612       if (use_fast_coef_costing) {
 613         cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
 614       } else {
 615         pt = get_coef_context(nb, token_cache, c);
 616         cost += (*token_costs)[0][pt][EOB_TOKEN];
 617       }
 618     }
 619   }
 620
 621   // is eob first coefficient;
 622   *A = *L = (c > 0);
 623
 624   return cost;
 625 }
 626 static void dist_block(int plane, int block, TX_SIZE tx_size,
 627                        struct rdcost_block_args* args) {
 628   const int ss_txfrm_size = tx_size << 1;
 629   MACROBLOCK* const x = args->x;
 630   MACROBLOCKD* const xd = &x->e_mbd;
 631   const struct macroblock_plane *const p = &x->plane[plane];
 632   const struct macroblockd_plane *const pd = &xd->plane[plane];
 633   int64_t this_sse;
 634   int shift = tx_size == TX_32X32 ? 0 : 2;
 635   int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
 636   int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 637   args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
 638                                &this_sse) >> shift;
 639   args->sse  = this_sse >> shift;
 640
 641   if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
 642     // TODO(jingning): tune the model to better capture the distortion.
 643     int64_t p = (pd->dequant[1] * pd->dequant[1] *
 644                     (1 << ss_txfrm_size)) >> (shift + 2);
 645     args->dist += (p >> 4);
 646     args->sse  += p;
 647   }
 648 }
 649
 650 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
 651                        TX_SIZE tx_size, struct rdcost_block_args* args) {
 652   int x_idx, y_idx;
 653   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
 654
 655   args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
 656                            args->t_left + y_idx, tx_size,
 657                            args->so->scan, args->so->neighbors,
 658                            args->use_fast_coef_costing);
 659 }
 660
 661 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
 662                           TX_SIZE tx_size, void *arg) {
 663   struct rdcost_block_args *args = arg;
 664   MACROBLOCK *const x = args->x;
 665   MACROBLOCKD *const xd = &x->e_mbd;
 666   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
 667   int64_t rd1, rd2, rd;
 668
 669   if (args->skip)
 670     return;
 671
 672   if (!is_inter_block(mbmi))
 673     vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
 674   else
 675     vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 676
 677   dist_block(plane, block, tx_size, args);
 678   rate_block(plane, block, plane_bsize, tx_size, args);
 679   rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
 680   rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
 681
 682   // TODO(jingning): temporarily enabled only for luma component
 683   rd = MIN(rd1, rd2);
 684   if (plane == 0)
 685     x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
 686                                     (rd1 > rd2 && !xd->lossless);
 687
 688   args->this_rate += args->rate;
 689   args->this_dist += args->dist;
 690   args->this_sse  += args->sse;
 691   args->this_rd += rd;
 692
 693   if (args->this_rd > args->best_rd) {
 694     args->skip = 1;
 695     return;
 696   }
 697 }
 698
 699 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
 700                               const struct macroblockd_plane *pd,
 701                               ENTROPY_CONTEXT t_above[16],
 702                               ENTROPY_CONTEXT t_left[16]) {
 703   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
 704   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
 705   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
 706   const ENTROPY_CONTEXT *const above = pd->above_context;
 707   const ENTROPY_CONTEXT *const left = pd->left_context;
 708
 709   int i;
 710   switch (tx_size) {
 711     case TX_4X4:
 712       vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
 713       vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
 714       break;
 715     case TX_8X8:
 716       for (i = 0; i < num_4x4_w; i += 2)
 717         t_above[i] = !!*(const uint16_t *)&above[i];
 718       for (i = 0; i < num_4x4_h; i += 2)
 719         t_left[i] = !!*(const uint16_t *)&left[i];
 720       break;
 721     case TX_16X16:
 722       for (i = 0; i < num_4x4_w; i += 4)
 723         t_above[i] = !!*(const uint32_t *)&above[i];
 724       for (i = 0; i < num_4x4_h; i += 4)
 725         t_left[i] = !!*(const uint32_t *)&left[i];
 726       break;
 727     case TX_32X32:
 728       for (i = 0; i < num_4x4_w; i += 8)
 729         t_above[i] = !!*(const uint64_t *)&above[i];
 730       for (i = 0; i < num_4x4_h; i += 8)
 731         t_left[i] = !!*(const uint64_t *)&left[i];
 732       break;
 733     default:
 734       assert(0 && "Invalid transform size.");
 735   }
 736 }
 737
 738 static void txfm_rd_in_plane(MACROBLOCK *x,
 739                              int *rate, int64_t *distortion,
 740                              int *skippable, int64_t *sse,
 741                              int64_t ref_best_rd, int plane,
 742                              BLOCK_SIZE bsize, TX_SIZE tx_size,
 743                              int use_fast_coef_casting) {
 744   MACROBLOCKD *const xd = &x->e_mbd;
 745   const struct macroblockd_plane *const pd = &xd->plane[plane];
 746   struct rdcost_block_args args = { 0 };
 747   args.x = x;
 748   args.best_rd = ref_best_rd;
 749   args.use_fast_coef_costing = use_fast_coef_casting;
 750
 751   if (plane == 0)
 752     xd->mi_8x8[0]->mbmi.tx_size = tx_size;
 753
 754   vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
 755
 756   args.so = get_scan(xd, tx_size, pd->plane_type, 0);
 757
 758   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
 759                                          block_rd_txfm, &args);
 760   if (args.skip) {
 761     *rate       = INT_MAX;
 762     *distortion = INT64_MAX;
 763     *sse        = INT64_MAX;
 764     *skippable  = 0;
 765   } else {
 766     *distortion = args.this_dist;
 767     *rate       = args.this_rate;
 768     *sse        = args.this_sse;
 769     *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
 770   }
 771 }
 772
 773 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
 774                                      int *rate, int64_t *distortion,
 775                                      int *skip, int64_t *sse,
 776                                      int64_t ref_best_rd,
 777                                      BLOCK_SIZE bs) {
 778   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 779   VP9_COMMON *const cm = &cpi->common;
 780   const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 781   MACROBLOCKD *const xd = &x->e_mbd;
 782   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
 783
 784   mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
 785
 786   txfm_rd_in_plane(x, rate, distortion, skip,
 787                    &sse[mbmi->tx_size], ref_best_rd, 0, bs,
 788                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
 789   cpi->tx_stepdown_count[0]++;
 790 }
 791
 792 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
 793                                      int (*r)[2], int *rate,
 794                                      int64_t *d, int64_t *distortion,
 795                                      int *s, int *skip,
 796                                      int64_t tx_cache[TX_MODES],
 797                                      BLOCK_SIZE bs) {
 798   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 799   VP9_COMMON *const cm = &cpi->common;
 800   MACROBLOCKD *const xd = &x->e_mbd;
 801   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
 802   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
 803   int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
 804                              {INT64_MAX, INT64_MAX},
 805                              {INT64_MAX, INT64_MAX},
 806                              {INT64_MAX, INT64_MAX}};
 807   int n, m;
 808   int s0, s1;
 809   const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 810   int64_t best_rd = INT64_MAX;
 811   TX_SIZE best_tx = TX_4X4;
 812
 813   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
 814   assert(skip_prob > 0);
 815   s0 = vp9_cost_bit(skip_prob, 0);
 816   s1 = vp9_cost_bit(skip_prob, 1);
 817
 818   for (n = TX_4X4; n <= max_tx_size; n++) {
 819     r[n][1] = r[n][0];
 820     if (r[n][0] < INT_MAX) {
 821       for (m = 0; m <= n - (n == max_tx_size); m++) {
 822         if (m == n)
 823           r[n][1] += vp9_cost_zero(tx_probs[m]);
 824         else
 825           r[n][1] += vp9_cost_one(tx_probs[m]);
 826       }
 827     }
 828     if (d[n] == INT64_MAX) {
 829       rd[n][0] = rd[n][1] = INT64_MAX;
 830     } else if (s[n]) {
 831       rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
 832     } else {
 833       rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
 834       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
 835     }
 836
 837     if (rd[n][1] < best_rd) {
 838       best_tx = n;
 839       best_rd = rd[n][1];
 840     }
 841   }
 842   mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
 843                       best_tx : MIN(max_tx_size, max_mode_tx_size);
 844
 845
 846   *distortion = d[mbmi->tx_size];
 847   *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
 848   *skip       = s[mbmi->tx_size];
 849
 850   tx_cache[ONLY_4X4] = rd[TX_4X4][0];
 851   tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
 852   tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
 853   tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
 854
 855   if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
 856     tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
 857     cpi->tx_stepdown_count[0]++;
 858   } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
 859     tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
 860     cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
 861   } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
 862     tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
 863     cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
 864   } else {
 865     tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
 866     cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
 867   }
 868 }
 869
 870 static int64_t scaled_rd_cost(int rdmult, int rddiv,
 871                               int rate, int64_t dist, double scale) {
 872   return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale);
 873 }
 874
 875 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
 876                                           int (*r)[2], int *rate,
 877                                           int64_t *d, int64_t *distortion,
 878                                           int *s, int *skip, int64_t *sse,
 879                                           int64_t ref_best_rd,
 880                                           BLOCK_SIZE bs) {
 881   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 882   VP9_COMMON *const cm = &cpi->common;
 883   MACROBLOCKD *const xd = &x->e_mbd;
 884   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
 885   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
 886   int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
 887                              {INT64_MAX, INT64_MAX},
 888                              {INT64_MAX, INT64_MAX},
 889                              {INT64_MAX, INT64_MAX}};
 890   int n, m;
 891   int s0, s1;
 892   double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
 893   const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 894   int64_t best_rd = INT64_MAX;
 895   TX_SIZE best_tx = TX_4X4;
 896
 897   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
 898   assert(skip_prob > 0);
 899   s0 = vp9_cost_bit(skip_prob, 0);
 900   s1 = vp9_cost_bit(skip_prob, 1);
 901
 902   for (n = TX_4X4; n <= max_tx_size; n++) {
 903     double scale = scale_rd[n];
 904     r[n][1] = r[n][0];
 905     for (m = 0; m <= n - (n == max_tx_size); m++) {
 906       if (m == n)
 907         r[n][1] += vp9_cost_zero(tx_probs[m]);
 908       else
 909         r[n][1] += vp9_cost_one(tx_probs[m]);
 910     }
 911     if (s[n]) {
 912       rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n],
 913                                            scale);
 914     } else {
 915       rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n],
 916                                 scale);
 917       rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n],
 918                                 scale);
 919     }
 920     if (rd[n][1] < best_rd) {
 921       best_rd = rd[n][1];
 922       best_tx = n;
 923     }
 924   }
 925
 926   mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
 927                       best_tx : MIN(max_tx_size, max_mode_tx_size);
 928
 929   // Actually encode using the chosen mode if a model was used, but do not
 930   // update the r, d costs
 931   txfm_rd_in_plane(x, rate, distortion, skip,
 932                    &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size,
 933                    cpi->sf.use_fast_coef_costing);
 934
 935   if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
 936     cpi->tx_stepdown_count[0]++;
 937   } else if (max_tx_size >= TX_16X16 &&  best_tx == TX_16X16) {
 938     cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
 939   } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
 940     cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
 941   } else {
 942     cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
 943   }
 944 }
 945
 946 static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
 947                                   int64_t *distortion, int *skip,
 948                                   int64_t *psse, BLOCK_SIZE bs,
 949                                   int64_t txfm_cache[TX_MODES],
 950                                   int64_t ref_best_rd) {
 951   int r[TX_SIZES][2], s[TX_SIZES];
 952   int64_t d[TX_SIZES], sse[TX_SIZES];
 953   MACROBLOCKD *xd = &x->e_mbd;
 954   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
 955   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 956   TX_SIZE tx_size;
 957
 958   assert(bs == mbmi->sb_type);
 959
 960   vp9_subtract_plane(x, bs, 0);
 961
 962   if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
 963     vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
 964     choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
 965                              ref_best_rd, bs);
 966     if (psse)
 967       *psse = sse[mbmi->tx_size];
 968     return;
 969   }
 970
 971   if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) {
 972     for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
 973       model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
 974                            &r[tx_size][0], &d[tx_size], &s[tx_size]);
 975     choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
 976                                   skip, sse, ref_best_rd, bs);
 977   } else {
 978     for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
 979       txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
 980                        &s[tx_size], &sse[tx_size],
 981                        ref_best_rd, 0, bs, tx_size,
 982                        cpi->sf.use_fast_coef_costing);
 983     choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
 984                              skip, txfm_cache, bs);
 985   }
 986   if (psse)
 987     *psse = sse[mbmi->tx_size];
 988 }
 989
 990 static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
 991                                   int64_t *distortion, int *skip,
 992                                   int64_t *psse, BLOCK_SIZE bs,
 993                                   int64_t txfm_cache[TX_MODES],
 994                                   int64_t ref_best_rd) {
 995   int64_t sse[TX_SIZES];
 996   MACROBLOCKD *xd = &x->e_mbd;
 997   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
 998
 999   assert(bs == mbmi->sb_type);
1000   if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
1001     vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
1002     choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
1003                              ref_best_rd, bs);
1004   } else {
1005     int r[TX_SIZES][2], s[TX_SIZES];
1006     int64_t d[TX_SIZES];
1007     TX_SIZE tx_size;
1008     for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
1009       txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
1010                        &s[tx_size], &sse[tx_size],
1011                        ref_best_rd, 0, bs, tx_size,
1012                        cpi->sf.use_fast_coef_costing);
1013     choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
1014                              skip, txfm_cache, bs);
1015   }
1016   if (psse)
1017     *psse = sse[mbmi->tx_size];
1018 }
1019
1020
1021 static int conditional_skipintra(MB_PREDICTION_MODE mode,
1022                                  MB_PREDICTION_MODE best_intra_mode) {
1023   if (mode == D117_PRED &&
1024       best_intra_mode != V_PRED &&
1025       best_intra_mode != D135_PRED)
1026     return 1;
1027   if (mode == D63_PRED &&
1028       best_intra_mode != V_PRED &&
1029       best_intra_mode != D45_PRED)
1030     return 1;
1031   if (mode == D207_PRED &&
1032       best_intra_mode != H_PRED &&
1033       best_intra_mode != D45_PRED)
1034     return 1;
1035   if (mode == D153_PRED &&
1036       best_intra_mode != H_PRED &&
1037       best_intra_mode != D135_PRED)
1038     return 1;
1039   return 0;
1040 }
1041
1042 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
1043                                      MB_PREDICTION_MODE *best_mode,
1044                                      const int *bmode_costs,
1045                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1046                                      int *bestrate, int *bestratey,
1047                                      int64_t *bestdistortion,
1048                                      BLOCK_SIZE bsize, int64_t rd_thresh) {
1049   MB_PREDICTION_MODE mode;
1050   MACROBLOCKD *const xd = &x->e_mbd;
1051   int64_t best_rd = rd_thresh;
1052
1053   struct macroblock_plane *p = &x->plane[0];
1054   struct macroblockd_plane *pd = &xd->plane[0];
1055   const int src_stride = p->src.stride;
1056   const int dst_stride = pd->dst.stride;
1057   const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
1058                                                             src_stride)];
1059   uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
1060                                                        dst_stride)];
1061   ENTROPY_CONTEXT ta[2], tempa[2];
1062   ENTROPY_CONTEXT tl[2], templ[2];
1063
1064   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1065   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1066   int idx, idy;
1067   uint8_t best_dst[8 * 8];
1068
1069   assert(ib < 4);
1070
1071   vpx_memcpy(ta, a, sizeof(ta));
1072   vpx_memcpy(tl, l, sizeof(tl));
1073   xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
1074
1075   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1076     int64_t this_rd;
1077     int ratey = 0;
1078     int64_t distortion = 0;
1079     int rate = bmode_costs[mode];
1080
1081     if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1082       continue;
1083
1084     // Only do the oblique modes if the best so far is
1085     // one of the neighboring directional modes
1086     if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1087       if (conditional_skipintra(mode, *best_mode))
1088           continue;
1089     }
1090
1091     vpx_memcpy(tempa, ta, sizeof(ta));
1092     vpx_memcpy(templ, tl, sizeof(tl));
1093
1094     for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1095       for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1096         const int block = ib + idy * 2 + idx;
1097         const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1098         uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1099         int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
1100                                                             p->src_diff);
1101         int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1102         xd->mi_8x8[0]->bmi[block].as_mode = mode;
1103         vp9_predict_intra_block(xd, block, 1,
1104                                 TX_4X4, mode,
1105                                 x->skip_encode ? src : dst,
1106                                 x->skip_encode ? src_stride : dst_stride,
1107                                 dst, dst_stride, idx, idy, 0);
1108         vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1109
1110         if (xd->lossless) {
1111           const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1112           vp9_fwht4x4(src_diff, coeff, 8);
1113           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1114           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1115                                so->scan, so->neighbors,
1116                                cpi->sf.use_fast_coef_costing);
1117           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1118             goto next;
1119           vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
1120                           p->eobs[block]);
1121         } else {
1122           int64_t unused;
1123           const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1124           const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
1125           vp9_fht4x4(src_diff, coeff, 8, tx_type);
1126           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1127           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1128                              so->scan, so->neighbors,
1129                              cpi->sf.use_fast_coef_costing);
1130           distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1131                                         16, &unused) >> 2;
1132           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1133             goto next;
1134           vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
1135                          dst, dst_stride, p->eobs[block]);
1136         }
1137       }
1138     }
1139
1140     rate += ratey;
1141     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1142
1143     if (this_rd < best_rd) {
1144       *bestrate = rate;
1145       *bestratey = ratey;
1146       *bestdistortion = distortion;
1147       best_rd = this_rd;
1148       *best_mode = mode;
1149       vpx_memcpy(a, tempa, sizeof(tempa));
1150       vpx_memcpy(l, templ, sizeof(templ));
1151       for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1152         vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1153                    num_4x4_blocks_wide * 4);
1154     }
1155   next:
1156     {}
1157   }
1158
1159   if (best_rd >= rd_thresh || x->skip_encode)
1160     return best_rd;
1161
1162   for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1163     vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1164                num_4x4_blocks_wide * 4);
1165
1166   return best_rd;
1167 }
1168
1169 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
1170                                             int *rate, int *rate_y,
1171                                             int64_t *distortion,
1172                                             int64_t best_rd) {
1173   int i, j;
1174   const MACROBLOCKD *const xd = &mb->e_mbd;
1175   MODE_INFO *const mic = xd->mi_8x8[0];
1176   const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1177   const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1178   const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
1179   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1180   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1181   int idx, idy;
1182   int cost = 0;
1183   int64_t total_distortion = 0;
1184   int tot_rate_y = 0;
1185   int64_t total_rd = 0;
1186   ENTROPY_CONTEXT t_above[4], t_left[4];
1187   const int *bmode_costs = mb->mbmode_cost;
1188
1189   vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1190   vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1191
1192   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1193   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1194     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1195       MB_PREDICTION_MODE best_mode = DC_PRED;
1196       int r = INT_MAX, ry = INT_MAX;
1197       int64_t d = INT64_MAX, this_rd = INT64_MAX;
1198       i = idy * 2 + idx;
1199       if (cpi->common.frame_type == KEY_FRAME) {
1200         const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
1201         const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
1202
1203         bmode_costs  = mb->y_mode_costs[A][L];
1204       }
1205
1206       this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1207                                       t_above + idx, t_left + idy, &r, &ry, &d,
1208                                       bsize, best_rd - total_rd);
1209       if (this_rd >= best_rd - total_rd)
1210         return INT64_MAX;
1211
1212       total_rd += this_rd;
1213       cost += r;
1214       total_distortion += d;
1215       tot_rate_y += ry;
1216
1217       mic->bmi[i].as_mode = best_mode;
1218       for (j = 1; j < num_4x4_blocks_high; ++j)
1219         mic->bmi[i + j * 2].as_mode = best_mode;
1220       for (j = 1; j < num_4x4_blocks_wide; ++j)
1221         mic->bmi[i + j].as_mode = best_mode;
1222
1223       if (total_rd >= best_rd)
1224         return INT64_MAX;
1225     }
1226   }
1227
1228   *rate = cost;
1229   *rate_y = tot_rate_y;
1230   *distortion = total_distortion;
1231   mic->mbmi.mode = mic->bmi[3].as_mode;
1232
1233   return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1234 }
1235
1236 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1237                                       int *rate, int *rate_tokenonly,
1238                                       int64_t *distortion, int *skippable,
1239                                       BLOCK_SIZE bsize,
1240                                       int64_t tx_cache[TX_MODES],
1241                                       int64_t best_rd) {
1242   MB_PREDICTION_MODE mode;
1243   MB_PREDICTION_MODE mode_selected = DC_PRED;
1244   MACROBLOCKD *const xd = &x->e_mbd;
1245   MODE_INFO *const mic = xd->mi_8x8[0];
1246   int this_rate, this_rate_tokenonly, s;
1247   int64_t this_distortion, this_rd;
1248   TX_SIZE best_tx = TX_4X4;
1249   int i;
1250   int *bmode_costs = x->mbmode_cost;
1251
1252   if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1253     for (i = 0; i < TX_MODES; i++)
1254       tx_cache[i] = INT64_MAX;
1255
1256   /* Y Search for intra prediction mode */
1257   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1258     int64_t local_tx_cache[TX_MODES];
1259     MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1260     MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1261
1262     if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
1263       continue;
1264
1265     if (cpi->common.frame_type == KEY_FRAME) {
1266       const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
1267       const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
1268
1269       bmode_costs = x->y_mode_costs[A][L];
1270     }
1271     mic->mbmi.mode = mode;
1272
1273     intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1274         &s, NULL, bsize, local_tx_cache, best_rd);
1275
1276     if (this_rate_tokenonly == INT_MAX)
1277       continue;
1278
1279     this_rate = this_rate_tokenonly + bmode_costs[mode];
1280     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1281
1282     if (this_rd < best_rd) {
1283       mode_selected   = mode;
1284       best_rd         = this_rd;
1285       best_tx         = mic->mbmi.tx_size;
1286       *rate           = this_rate;
1287       *rate_tokenonly = this_rate_tokenonly;
1288       *distortion     = this_distortion;
1289       *skippable      = s;
1290     }
1291
1292     if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1293       for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
1294         const int64_t adj_rd = this_rd + local_tx_cache[i] -
1295             local_tx_cache[cpi->common.tx_mode];
1296         if (adj_rd < tx_cache[i]) {
1297           tx_cache[i] = adj_rd;
1298         }
1299       }
1300     }
1301   }
1302
1303   mic->mbmi.mode = mode_selected;
1304   mic->mbmi.tx_size = best_tx;
1305
1306   return best_rd;
1307 }
1308
1309 static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
1310                              int *rate, int64_t *distortion, int *skippable,
1311                              int64_t *sse, BLOCK_SIZE bsize,
1312                              int64_t ref_best_rd) {
1313   MACROBLOCKD *const xd = &x->e_mbd;
1314   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
1315   TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1316   int plane;
1317   int pnrate = 0, pnskip = 1;
1318   int64_t pndist = 0, pnsse = 0;
1319
1320   if (ref_best_rd < 0)
1321     goto term;
1322
1323   if (is_inter_block(mbmi)) {
1324     int plane;
1325     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
1326       vp9_subtract_plane(x, bsize, plane);
1327   }
1328
1329   *rate = 0;
1330   *distortion = 0;
1331   *sse = 0;
1332   *skippable = 1;
1333
1334   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1335     txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
1336                      ref_best_rd, plane, bsize, uv_txfm_size,
1337                      cpi->sf.use_fast_coef_costing);
1338     if (pnrate == INT_MAX)
1339       goto term;
1340     *rate += pnrate;
1341     *distortion += pndist;
1342     *sse += pnsse;
1343     *skippable &= pnskip;
1344   }
1345   return;
1346
1347   term:
1348   *rate = INT_MAX;
1349   *distortion = INT64_MAX;
1350   *sse = INT64_MAX;
1351   *skippable = 0;
1352   return;
1353 }
1354
1355 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1356                                        PICK_MODE_CONTEXT *ctx,
1357                                        int *rate, int *rate_tokenonly,
1358                                        int64_t *distortion, int *skippable,
1359                                        BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
1360   MACROBLOCKD *xd = &x->e_mbd;
1361   MB_PREDICTION_MODE mode;
1362   MB_PREDICTION_MODE mode_selected = DC_PRED;
1363   int64_t best_rd = INT64_MAX, this_rd;
1364   int this_rate_tokenonly, this_rate, s;
1365   int64_t this_distortion, this_sse;
1366
1367   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1368     if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
1369       continue;
1370
1371     xd->mi_8x8[0]->mbmi.uv_mode = mode;
1372
1373     super_block_uvrd(cpi, x, &this_rate_tokenonly,
1374                      &this_distortion, &s, &this_sse, bsize, best_rd);
1375     if (this_rate_tokenonly == INT_MAX)
1376       continue;
1377     this_rate = this_rate_tokenonly +
1378                 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
1379     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1380
1381     if (this_rd < best_rd) {
1382       mode_selected   = mode;
1383       best_rd         = this_rd;
1384       *rate           = this_rate;
1385       *rate_tokenonly = this_rate_tokenonly;
1386       *distortion     = this_distortion;
1387       *skippable      = s;
1388       if (!x->select_txfm_size) {
1389         int i;
1390         struct macroblock_plane *const p = x->plane;
1391         struct macroblockd_plane *const pd = xd->plane;
1392         for (i = 1; i < MAX_MB_PLANE; ++i) {
1393           p[i].coeff    = ctx->coeff_pbuf[i][2];
1394           p[i].qcoeff   = ctx->qcoeff_pbuf[i][2];
1395           pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1396           p[i].eobs    = ctx->eobs_pbuf[i][2];
1397
1398           ctx->coeff_pbuf[i][2]   = ctx->coeff_pbuf[i][0];
1399           ctx->qcoeff_pbuf[i][2]  = ctx->qcoeff_pbuf[i][0];
1400           ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
1401           ctx->eobs_pbuf[i][2]    = ctx->eobs_pbuf[i][0];
1402
1403           ctx->coeff_pbuf[i][0]   = p[i].coeff;
1404           ctx->qcoeff_pbuf[i][0]  = p[i].qcoeff;
1405           ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
1406           ctx->eobs_pbuf[i][0]    = p[i].eobs;
1407         }
1408       }
1409     }
1410   }
1411
1412   xd->mi_8x8[0]->mbmi.uv_mode = mode_selected;
1413   return best_rd;
1414 }
1415
1416 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
1417                               int *rate, int *rate_tokenonly,
1418                               int64_t *distortion, int *skippable,
1419                               BLOCK_SIZE bsize) {
1420   const VP9_COMMON *cm = &cpi->common;
1421   int64_t unused;
1422
1423   x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
1424   super_block_uvrd(cpi, x, rate_tokenonly, distortion,
1425                    skippable, &unused, bsize, INT64_MAX);
1426   *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED];
1427   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1428 }
1429
1430 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1431                                  BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1432                                  int *rate_uv, int *rate_uv_tokenonly,
1433                                  int64_t *dist_uv, int *skip_uv,
1434                                  MB_PREDICTION_MODE *mode_uv) {
1435   MACROBLOCK *const x = &cpi->mb;
1436
1437   // Use an estimated rd for uv_intra based on DC_PRED if the
1438   // appropriate speed flag is set.
1439   if (cpi->sf.use_uv_intra_rd_estimate) {
1440     rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
1441                    skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1442   // Else do a proper rd search for each possible transform size that may
1443   // be considered in the main rd loop.
1444   } else {
1445     rd_pick_intra_sbuv_mode(cpi, x, ctx,
1446                             rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1447                             bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1448   }
1449   *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
1450 }
1451
1452 static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode,
1453                        int mode_context) {
1454   const MACROBLOCK *const x = &cpi->mb;
1455   const int segment_id = x->e_mbd.mi_8x8[0]->mbmi.segment_id;
1456
1457   // Don't account for mode here if segment skip is enabled.
1458   if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1459     assert(is_inter_mode(mode));
1460     return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1461   } else {
1462     return 0;
1463   }
1464 }
1465
1466 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1467                                 BLOCK_SIZE bsize,
1468                                 int_mv *frame_mv,
1469                                 int mi_row, int mi_col,
1470                                 int_mv single_newmv[MAX_REF_FRAMES],
1471                                 int *rate_mv);
1472
1473 static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
1474                        MB_PREDICTION_MODE mode,
1475                        int_mv this_mv[2],
1476                        int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1477                        int_mv seg_mvs[MAX_REF_FRAMES],
1478                        int_mv *best_ref_mv[2],
1479                        const int *mvjcost, int *mvcost[2]) {
1480   MODE_INFO *const mic = xd->mi_8x8[0];
1481   const MB_MODE_INFO *const mbmi = &mic->mbmi;
1482   int thismvcost = 0;
1483   int idx, idy;
1484   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1485   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1486   const int is_compound = has_second_ref(mbmi);
1487
1488   // the only time we should do costing for new motion vector or mode
1489   // is when we are on a new label  (jbb May 08, 2007)
1490   switch (mode) {
1491     case NEWMV:
1492       this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1493       thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1494                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1495       if (is_compound) {
1496         this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1497         thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1498                                       mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1499       }
1500       break;
1501     case NEARESTMV:
1502       this_mv[0].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1503       if (is_compound)
1504         this_mv[1].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1505       break;
1506     case NEARMV:
1507       this_mv[0].as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1508       if (is_compound)
1509         this_mv[1].as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1510       break;
1511     case ZEROMV:
1512       this_mv[0].as_int = 0;
1513       if (is_compound)
1514         this_mv[1].as_int = 0;
1515       break;
1516     default:
1517       break;
1518   }
1519
1520   mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1521   if (is_compound)
1522     mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1523
1524   mic->bmi[i].as_mode = mode;
1525
1526   for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1527     for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1528       vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1529                  &mic->bmi[i], sizeof(mic->bmi[i]));
1530
1531   return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
1532             thismvcost;
1533 }
1534
1535 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1536                                        MACROBLOCK *x,
1537                                        int64_t best_yrd,
1538                                        int i,
1539                                        int *labelyrate,
1540                                        int64_t *distortion, int64_t *sse,
1541                                        ENTROPY_CONTEXT *ta,
1542                                        ENTROPY_CONTEXT *tl,
1543                                        int mi_row, int mi_col) {
1544   int k;
1545   MACROBLOCKD *xd = &x->e_mbd;
1546   struct macroblockd_plane *const pd = &xd->plane[0];
1547   struct macroblock_plane *const p = &x->plane[0];
1548   MODE_INFO *const mi = xd->mi_8x8[0];
1549   const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1550   const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1551   const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1552   int idx, idy;
1553
1554   const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
1555                                                              p->src.stride)];
1556   uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
1557                                                         pd->dst.stride)];
1558   int64_t thisdistortion = 0, thissse = 0;
1559   int thisrate = 0, ref;
1560   const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1561   const int is_compound = has_second_ref(&mi->mbmi);
1562   for (ref = 0; ref < 1 + is_compound; ++ref) {
1563     const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1564                                                pd->pre[ref].stride)];
1565     vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1566                               dst, pd->dst.stride,
1567                               &mi->bmi[i].as_mv[ref].as_mv,
1568                               &xd->block_refs[ref]->sf, width, height, ref,
1569                               xd->interp_kernel, MV_PRECISION_Q3,
1570                               mi_col * MI_SIZE + 4 * (i % 2),
1571                               mi_row * MI_SIZE + 4 * (i / 2));
1572   }
1573
1574   vp9_subtract_block(height, width,
1575                      raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1576                      src, p->src.stride,
1577                      dst, pd->dst.stride);
1578
1579   k = i;
1580   for (idy = 0; idy < height / 4; ++idy) {
1581     for (idx = 0; idx < width / 4; ++idx) {
1582       int64_t ssz, rd, rd1, rd2;
1583       int16_t* coeff;
1584
1585       k += (idy * 2 + idx);
1586       coeff = BLOCK_OFFSET(p->coeff, k);
1587       x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1588                     coeff, 8);
1589       vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1590       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1591                                         16, &ssz);
1592       thissse += ssz;
1593       thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1594                               so->scan, so->neighbors,
1595                               cpi->sf.use_fast_coef_costing);
1596       rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1597       rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1598       rd = MIN(rd1, rd2);
1599       if (rd >= best_yrd)
1600         return INT64_MAX;
1601     }
1602   }
1603
1604   *distortion = thisdistortion >> 2;
1605   *labelyrate = thisrate;
1606   *sse = thissse >> 2;
1607
1608   return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1609 }
1610
1611 typedef struct {
1612   int eobs;
1613   int brate;
1614   int byrate;
1615   int64_t bdist;
1616   int64_t bsse;
1617   int64_t brdcost;
1618   int_mv mvs[2];
1619   ENTROPY_CONTEXT ta[2];
1620   ENTROPY_CONTEXT tl[2];
1621 } SEG_RDSTAT;
1622
1623 typedef struct {
1624   int_mv *ref_mv[2];
1625   int_mv mvp;
1626
1627   int64_t segment_rd;
1628   int r;
1629   int64_t d;
1630   int64_t sse;
1631   int segment_yrate;
1632   MB_PREDICTION_MODE modes[4];
1633   SEG_RDSTAT rdstat[4][INTER_MODES];
1634   int mvthresh;
1635 } BEST_SEG_INFO;
1636
1637 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
1638   return (mv->row >> 3) < x->mv_row_min ||
1639          (mv->row >> 3) > x->mv_row_max ||
1640          (mv->col >> 3) < x->mv_col_min ||
1641          (mv->col >> 3) > x->mv_col_max;
1642 }
1643
1644 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1645   MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1646   struct macroblock_plane *const p = &x->plane[0];
1647   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1648
1649   p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1650   assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1651   pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
1652                                                        pd->pre[0].stride)];
1653   if (has_second_ref(mbmi))
1654     pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
1655                                                          pd->pre[1].stride)];
1656 }
1657
1658 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1659                                   struct buf_2d orig_pre[2]) {
1660   MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1661   x->plane[0].src = orig_src;
1662   x->e_mbd.plane[0].pre[0] = orig_pre[0];
1663   if (has_second_ref(mbmi))
1664     x->e_mbd.plane[0].pre[1] = orig_pre[1];
1665 }
1666
1667 static INLINE int mv_has_subpel(const MV *mv) {
1668   return (mv->row & 0x0F) || (mv->col & 0x0F);
1669 }
1670
1671 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1672 // TODO(aconverse): Find out if this is still productive then clean up or remove
1673 static int check_best_zero_mv(
1674     const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
1675     int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1676     int disable_inter_mode_mask, int this_mode, int ref_frame,
1677     int second_ref_frame) {
1678   if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
1679       (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1680       frame_mv[this_mode][ref_frame].as_int == 0 &&
1681       (second_ref_frame == NONE ||
1682        frame_mv[this_mode][second_ref_frame].as_int == 0)) {
1683     int rfc = mode_context[ref_frame];
1684     int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1685     int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1686     int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1687
1688     if (this_mode == NEARMV) {
1689       if (c1 > c3) return 0;
1690     } else if (this_mode == NEARESTMV) {
1691       if (c2 > c3) return 0;
1692     } else {
1693       assert(this_mode == ZEROMV);
1694       if (second_ref_frame == NONE) {
1695         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
1696             (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0))
1697           return 0;
1698       } else {
1699         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
1700              frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
1701             (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 &&
1702              frame_mv[NEARMV][second_ref_frame].as_int == 0))
1703           return 0;
1704       }
1705     }
1706   }
1707   return 1;
1708 }
1709
1710 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1711                                     const TileInfo *const tile,
1712                                     BEST_SEG_INFO *bsi_buf, int filter_idx,
1713                                     int_mv seg_mvs[4][MAX_REF_FRAMES],
1714                                     int mi_row, int mi_col) {
1715   int k, br = 0, idx, idy;
1716   int64_t bd = 0, block_sse = 0;
1717   MB_PREDICTION_MODE this_mode;
1718   MACROBLOCKD *xd = &x->e_mbd;
1719   VP9_COMMON *cm = &cpi->common;
1720   MODE_INFO *mi = xd->mi_8x8[0];
1721   MB_MODE_INFO *const mbmi = &mi->mbmi;
1722   struct macroblock_plane *const p = &x->plane[0];
1723   struct macroblockd_plane *const pd = &xd->plane[0];
1724   const int label_count = 4;
1725   int64_t this_segment_rd = 0;
1726   int label_mv_thresh;
1727   int segmentyrate = 0;
1728   const BLOCK_SIZE bsize = mbmi->sb_type;
1729   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1730   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1731   vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
1732   ENTROPY_CONTEXT t_above[2], t_left[2];
1733   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1734   int mode_idx;
1735   int subpelmv = 1, have_ref = 0;
1736   const int has_second_rf = has_second_ref(mbmi);
1737   const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
1738
1739   vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1740   vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1741
1742   // 64 makes this threshold really big effectively
1743   // making it so that we very rarely check mvs on
1744   // segments.   setting this to 1 would make mv thresh
1745   // roughly equal to what it is for macroblocks
1746   label_mv_thresh = 1 * bsi->mvthresh / label_count;
1747
1748   // Segmentation method overheads
1749   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1750     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1751       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1752       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1753       int_mv mode_mv[MB_MODE_COUNT][2];
1754       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1755       MB_PREDICTION_MODE mode_selected = ZEROMV;
1756       int64_t best_rd = INT64_MAX;
1757       const int i = idy * 2 + idx;
1758       int ref;
1759
1760       for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1761         const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
1762         frame_mv[ZEROMV][frame].as_int = 0;
1763         vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
1764                                       &frame_mv[NEARESTMV][frame],
1765                                       &frame_mv[NEARMV][frame]);
1766       }
1767
1768       // search for the best motion vector on this segment
1769       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1770         const struct buf_2d orig_src = x->plane[0].src;
1771         struct buf_2d orig_pre[2];
1772
1773         mode_idx = INTER_OFFSET(this_mode);
1774         bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1775         if (disable_inter_mode_mask & (1 << mode_idx))
1776           continue;
1777
1778         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
1779                                 disable_inter_mode_mask,
1780                                 this_mode, mbmi->ref_frame[0],
1781                                 mbmi->ref_frame[1]))
1782           continue;
1783
1784         vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1785         vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1786                    sizeof(bsi->rdstat[i][mode_idx].ta));
1787         vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1788                    sizeof(bsi->rdstat[i][mode_idx].tl));
1789
1790         // motion search for newmv (single predictor case only)
1791         if (!has_second_rf && this_mode == NEWMV &&
1792             seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1793           int_mv *const new_mv = &mode_mv[NEWMV][0];
1794           int step_param = 0;
1795           int further_steps;
1796           int thissme, bestsme = INT_MAX;
1797           int sadpb = x->sadperbit4;
1798           MV mvp_full;
1799           int max_mv;
1800
1801           /* Is the best so far sufficiently good that we cant justify doing
1802            * and new motion search. */
1803           if (best_rd < label_mv_thresh)
1804             break;
1805
1806           if (cpi->oxcf.mode != MODE_SECONDPASS_BEST &&
1807               cpi->oxcf.mode != MODE_BESTQUALITY) {
1808             // use previous block's result as next block's MV predictor.
1809             if (i > 0) {
1810               bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1811               if (i == 2)
1812                 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1813             }
1814           }
1815           if (i == 0)
1816             max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1817           else
1818             max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1819
1820           if (cpi->sf.auto_mv_step_size && cm->show_frame) {
1821             // Take wtd average of the step_params based on the last frame's
1822             // max mv magnitude and the best ref mvs of the current block for
1823             // the given reference.
1824             step_param = (vp9_init_search_range(cpi, max_mv) +
1825                           cpi->mv_step_param) >> 1;
1826           } else {
1827             step_param = cpi->mv_step_param;
1828           }
1829
1830           mvp_full.row = bsi->mvp.as_mv.row >> 3;
1831           mvp_full.col = bsi->mvp.as_mv.col >> 3;
1832
1833           if (cpi->sf.adaptive_motion_search && cm->show_frame) {
1834             mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
1835             mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
1836             step_param = MAX(step_param, 8);
1837           }
1838
1839           further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1840           // adjust src pointer for this block
1841           mi_buf_shift(x, i);
1842
1843           vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
1844
1845           if (cpi->sf.search_method == HEX) {
1846             bestsme = vp9_hex_search(x, &mvp_full,
1847                                      step_param,
1848                                      sadpb, 1, v_fn_ptr, 1,
1849                                      &bsi->ref_mv[0]->as_mv,
1850                                      &new_mv->as_mv);
1851             if (bestsme < INT_MAX)
1852               bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
1853                                            &bsi->ref_mv[0]->as_mv,
1854                                            v_fn_ptr, 1);
1855           } else if (cpi->sf.search_method == SQUARE) {
1856             bestsme = vp9_square_search(x, &mvp_full,
1857                                         step_param,
1858                                         sadpb, 1, v_fn_ptr, 1,
1859                                         &bsi->ref_mv[0]->as_mv,
1860                                         &new_mv->as_mv);
1861             if (bestsme < INT_MAX)
1862               bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
1863                                            &bsi->ref_mv[0]->as_mv,
1864                                            v_fn_ptr, 1);
1865           } else if (cpi->sf.search_method == BIGDIA) {
1866             bestsme = vp9_bigdia_search(x, &mvp_full,
1867                                         step_param,
1868                                         sadpb, 1, v_fn_ptr, 1,
1869                                         &bsi->ref_mv[0]->as_mv,
1870                                         &new_mv->as_mv);
1871             if (bestsme < INT_MAX)
1872               bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
1873                                            &bsi->ref_mv[0]->as_mv,
1874                                            v_fn_ptr, 1);
1875           } else {
1876             bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1877                                              sadpb, further_steps, 0, v_fn_ptr,
1878                                              &bsi->ref_mv[0]->as_mv,
1879                                              &new_mv->as_mv);
1880           }
1881
1882           // Should we do a full search (best quality only)
1883           if (cpi->oxcf.mode == MODE_BESTQUALITY ||
1884               cpi->oxcf.mode == MODE_SECONDPASS_BEST) {
1885             int_mv *const best_mv = &mi->bmi[i].as_mv[0];
1886             /* Check if mvp_full is within the range. */
1887             clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1888                      x->mv_row_min, x->mv_row_max);
1889             thissme = cpi->full_search_sad(x, &mvp_full,
1890                                            sadpb, 16, v_fn_ptr,
1891                                            x->nmvjointcost, x->mvcost,
1892                                            &bsi->ref_mv[0]->as_mv,
1893                                            &best_mv->as_mv);
1894             if (thissme < bestsme) {
1895               bestsme = thissme;
1896               new_mv->as_int = best_mv->as_int;
1897             } else {
1898               // The full search result is actually worse so re-instate the
1899               // previous best vector
1900               best_mv->as_int = new_mv->as_int;
1901             }
1902           }
1903
1904           if (bestsme < INT_MAX) {
1905             int distortion;
1906             cpi->find_fractional_mv_step(x,
1907                                          &new_mv->as_mv,
1908                                          &bsi->ref_mv[0]->as_mv,
1909                                          cm->allow_high_precision_mv,
1910                                          x->errorperbit, v_fn_ptr,
1911                                          cpi->sf.subpel_force_stop,
1912                                          cpi->sf.subpel_iters_per_step,
1913                                          x->nmvjointcost, x->mvcost,
1914                                          &distortion,
1915                                          &x->pred_sse[mbmi->ref_frame[0]]);
1916
1917             // save motion search result for use in compound prediction
1918             seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int;
1919           }
1920
1921           if (cpi->sf.adaptive_motion_search)
1922             x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int;
1923
1924           // restore src pointers
1925           mi_buf_restore(x, orig_src, orig_pre);
1926         }
1927
1928         if (has_second_rf) {
1929           if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1930               seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1931             continue;
1932         }
1933
1934         if (has_second_rf && this_mode == NEWMV &&
1935             mbmi->interp_filter == EIGHTTAP) {
1936           // adjust src pointers
1937           mi_buf_shift(x, i);
1938           if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1939             int rate_mv;
1940             joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1941                                 mi_row, mi_col, seg_mvs[i],
1942                                 &rate_mv);
1943             seg_mvs[i][mbmi->ref_frame[0]].as_int =
1944                 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1945             seg_mvs[i][mbmi->ref_frame[1]].as_int =
1946                 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1947           }
1948           // restore src pointers
1949           mi_buf_restore(x, orig_src, orig_pre);
1950         }
1951
1952         bsi->rdstat[i][mode_idx].brate =
1953             labels2mode(cpi, xd, i, this_mode, mode_mv[this_mode], frame_mv,
1954                         seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost);
1955
1956         for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1957           bsi->rdstat[i][mode_idx].mvs[ref].as_int =
1958               mode_mv[this_mode][ref].as_int;
1959           if (num_4x4_blocks_wide > 1)
1960             bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
1961                 mode_mv[this_mode][ref].as_int;
1962           if (num_4x4_blocks_high > 1)
1963             bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
1964                 mode_mv[this_mode][ref].as_int;
1965         }
1966
1967         // Trap vectors that reach beyond the UMV borders
1968         if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
1969             (has_second_rf &&
1970              mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
1971           continue;
1972
1973         if (filter_idx > 0) {
1974           BEST_SEG_INFO *ref_bsi = bsi_buf;
1975           subpelmv = 0;
1976           have_ref = 1;
1977
1978           for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1979             subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
1980             have_ref &= mode_mv[this_mode][ref].as_int ==
1981                 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1982           }
1983
1984           if (filter_idx > 1 && !subpelmv && !have_ref) {
1985             ref_bsi = bsi_buf + 1;
1986             have_ref = 1;
1987             for (ref = 0; ref < 1 + has_second_rf; ++ref)
1988               have_ref &= mode_mv[this_mode][ref].as_int ==
1989                   ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1990           }
1991
1992           if (!subpelmv && have_ref &&
1993               ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1994             vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1995                        sizeof(SEG_RDSTAT));
1996             if (num_4x4_blocks_wide > 1)
1997               bsi->rdstat[i + 1][mode_idx].eobs =
1998                   ref_bsi->rdstat[i + 1][mode_idx].eobs;
1999             if (num_4x4_blocks_high > 1)
2000               bsi->rdstat[i + 2][mode_idx].eobs =
2001                   ref_bsi->rdstat[i + 2][mode_idx].eobs;
2002
2003             if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2004               mode_selected = this_mode;
2005               best_rd = bsi->rdstat[i][mode_idx].brdcost;
2006             }
2007             continue;
2008           }
2009         }
2010
2011         bsi->rdstat[i][mode_idx].brdcost =
2012             encode_inter_mb_segment(cpi, x,
2013                                     bsi->segment_rd - this_segment_rd, i,
2014                                     &bsi->rdstat[i][mode_idx].byrate,
2015                                     &bsi->rdstat[i][mode_idx].bdist,
2016                                     &bsi->rdstat[i][mode_idx].bsse,
2017                                     bsi->rdstat[i][mode_idx].ta,
2018                                     bsi->rdstat[i][mode_idx].tl,
2019                                     mi_row, mi_col);
2020         if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2021           bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
2022                                             bsi->rdstat[i][mode_idx].brate, 0);
2023           bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2024           bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
2025           if (num_4x4_blocks_wide > 1)
2026             bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
2027           if (num_4x4_blocks_high > 1)
2028             bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
2029         }
2030
2031         if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2032           mode_selected = this_mode;
2033           best_rd = bsi->rdstat[i][mode_idx].brdcost;
2034         }
2035       } /*for each 4x4 mode*/
2036
2037       if (best_rd == INT64_MAX) {
2038         int iy, midx;
2039         for (iy = i + 1; iy < 4; ++iy)
2040           for (midx = 0; midx < INTER_MODES; ++midx)
2041             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2042         bsi->segment_rd = INT64_MAX;
2043         return;
2044       }
2045
2046       mode_idx = INTER_OFFSET(mode_selected);
2047       vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2048       vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2049
2050       labels2mode(cpi, xd, i, mode_selected, mode_mv[mode_selected],
2051                   frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
2052                   x->mvcost);
2053
2054       br += bsi->rdstat[i][mode_idx].brate;
2055       bd += bsi->rdstat[i][mode_idx].bdist;
2056       block_sse += bsi->rdstat[i][mode_idx].bsse;
2057       segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2058       this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2059
2060       if (this_segment_rd > bsi->segment_rd) {
2061         int iy, midx;
2062         for (iy = i + 1; iy < 4; ++iy)
2063           for (midx = 0; midx < INTER_MODES; ++midx)
2064             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2065         bsi->segment_rd = INT64_MAX;
2066         return;
2067       }
2068     }
2069   } /* for each label */
2070
2071   bsi->r = br;
2072   bsi->d = bd;
2073   bsi->segment_yrate = segmentyrate;
2074   bsi->segment_rd = this_segment_rd;
2075   bsi->sse = block_sse;
2076
2077   // update the coding decisions
2078   for (k = 0; k < 4; ++k)
2079     bsi->modes[k] = mi->bmi[k].as_mode;
2080 }
2081
2082 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
2083                                            const TileInfo *const tile,
2084                                            int_mv *best_ref_mv,
2085                                            int_mv *second_best_ref_mv,
2086                                            int64_t best_rd,
2087                                            int *returntotrate,
2088                                            int *returnyrate,
2089                                            int64_t *returndistortion,
2090                                            int *skippable, int64_t *psse,
2091                                            int mvthresh,
2092                                            int_mv seg_mvs[4][MAX_REF_FRAMES],
2093                                            BEST_SEG_INFO *bsi_buf,
2094                                            int filter_idx,
2095                                            int mi_row, int mi_col) {
2096   int i;
2097   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2098   MACROBLOCKD *xd = &x->e_mbd;
2099   MODE_INFO *mi = xd->mi_8x8[0];
2100   MB_MODE_INFO *mbmi = &mi->mbmi;
2101   int mode_idx;
2102
2103   vp9_zero(*bsi);
2104
2105   bsi->segment_rd = best_rd;
2106   bsi->ref_mv[0] = best_ref_mv;
2107   bsi->ref_mv[1] = second_best_ref_mv;
2108   bsi->mvp.as_int = best_ref_mv->as_int;
2109   bsi->mvthresh = mvthresh;
2110
2111   for (i = 0; i < 4; i++)
2112     bsi->modes[i] = ZEROMV;
2113
2114   rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs,
2115                           mi_row, mi_col);
2116
2117   if (bsi->segment_rd > best_rd)
2118     return INT64_MAX;
2119   /* set it to the best */
2120   for (i = 0; i < 4; i++) {
2121     mode_idx = INTER_OFFSET(bsi->modes[i]);
2122     mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2123     if (has_second_ref(mbmi))
2124       mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2125     x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2126     mi->bmi[i].as_mode = bsi->modes[i];
2127   }
2128
2129   /*
2130    * used to set mbmi->mv.as_int
2131    */
2132   *returntotrate = bsi->r;
2133   *returndistortion = bsi->d;
2134   *returnyrate = bsi->segment_yrate;
2135   *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
2136   *psse = bsi->sse;
2137   mbmi->mode = bsi->modes[3];
2138
2139   return bsi->segment_rd;
2140 }
2141
2142 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2143                     uint8_t *ref_y_buffer, int ref_y_stride,
2144                     int ref_frame, BLOCK_SIZE block_size ) {
2145   MACROBLOCKD *xd = &x->e_mbd;
2146   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2147   int_mv this_mv;
2148   int i;
2149   int zero_seen = 0;
2150   int best_index = 0;
2151   int best_sad = INT_MAX;
2152   int this_sad = INT_MAX;
2153   int max_mv = 0;
2154
2155   uint8_t *src_y_ptr = x->plane[0].src.buf;
2156   uint8_t *ref_y_ptr;
2157   int row_offset, col_offset;
2158   int num_mv_refs = MAX_MV_REF_CANDIDATES +
2159                     (cpi->sf.adaptive_motion_search &&
2160                      cpi->common.show_frame &&
2161                      block_size < cpi->sf.max_partition_size);
2162
2163   int_mv pred_mv[3];
2164   pred_mv[0] = mbmi->ref_mvs[ref_frame][0];
2165   pred_mv[1] = mbmi->ref_mvs[ref_frame][1];
2166   pred_mv[2] = x->pred_mv[ref_frame];
2167
2168   // Get the sad for each candidate reference mv
2169   for (i = 0; i < num_mv_refs; i++) {
2170     this_mv.as_int = pred_mv[i].as_int;
2171
2172     max_mv = MAX(max_mv,
2173                  MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2174     // only need to check zero mv once
2175     if (!this_mv.as_int && zero_seen)
2176       continue;
2177
2178     zero_seen = zero_seen || !this_mv.as_int;
2179
2180     row_offset = this_mv.as_mv.row >> 3;
2181     col_offset = this_mv.as_mv.col >> 3;
2182     ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2183
2184     // Find sad for current vector.
2185     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2186                                            ref_y_ptr, ref_y_stride,
2187                                            0x7fffffff);
2188
2189     // Note if it is the best so far.
2190     if (this_sad < best_sad) {
2191       best_sad = this_sad;
2192       best_index = i;
2193     }
2194   }
2195
2196   // Note the index of the mv that worked best in the reference list.
2197   x->mv_best_ref_index[ref_frame] = best_index;
2198   x->max_mv_context[ref_frame] = max_mv;
2199   x->pred_mv_sad[ref_frame] = best_sad;
2200 }
2201
2202 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
2203                                      unsigned int *ref_costs_single,
2204                                      unsigned int *ref_costs_comp,
2205                                      vp9_prob *comp_mode_p) {
2206   VP9_COMMON *const cm = &cpi->common;
2207   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2208   int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2209                                              SEG_LVL_REF_FRAME);
2210   if (seg_ref_active) {
2211     vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2212     vpx_memset(ref_costs_comp,   0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2213     *comp_mode_p = 128;
2214   } else {
2215     vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
2216     vp9_prob comp_inter_p = 128;
2217
2218     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2219       comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
2220       *comp_mode_p = comp_inter_p;
2221     } else {
2222       *comp_mode_p = 128;
2223     }
2224
2225     ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2226
2227     if (cm->reference_mode != COMPOUND_REFERENCE) {
2228       vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2229       vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2230       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2231
2232       if (cm->reference_mode == REFERENCE_MODE_SELECT)
2233         base_cost += vp9_cost_bit(comp_inter_p, 0);
2234
2235       ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2236           ref_costs_single[ALTREF_FRAME] = base_cost;
2237       ref_costs_single[LAST_FRAME]   += vp9_cost_bit(ref_single_p1, 0);
2238       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2239       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2240       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2241       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2242     } else {
2243       ref_costs_single[LAST_FRAME]   = 512;
2244       ref_costs_single[GOLDEN_FRAME] = 512;
2245       ref_costs_single[ALTREF_FRAME] = 512;
2246     }
2247     if (cm->reference_mode != SINGLE_REFERENCE) {
2248       vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2249       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2250
2251       if (cm->reference_mode == REFERENCE_MODE_SELECT)
2252         base_cost += vp9_cost_bit(comp_inter_p, 1);
2253
2254       ref_costs_comp[LAST_FRAME]   = base_cost + vp9_cost_bit(ref_comp_p, 0);
2255       ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2256     } else {
2257       ref_costs_comp[LAST_FRAME]   = 512;
2258       ref_costs_comp[GOLDEN_FRAME] = 512;
2259     }
2260   }
2261 }
2262
2263 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2264                          int mode_index,
2265                          int_mv *ref_mv,
2266                          int_mv *second_ref_mv,
2267                          int64_t comp_pred_diff[REFERENCE_MODES],
2268                          int64_t tx_size_diff[TX_MODES],
2269                          int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
2270   MACROBLOCKD *const xd = &x->e_mbd;
2271
2272   // Take a snapshot of the coding context so it can be
2273   // restored if we decide to encode this way
2274   ctx->skip = x->skip;
2275   ctx->best_mode_index = mode_index;
2276   ctx->mic = *xd->mi_8x8[0];
2277
2278   ctx->best_ref_mv[0].as_int = ref_mv->as_int;
2279   ctx->best_ref_mv[1].as_int = second_ref_mv->as_int;
2280
2281   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2282   ctx->comp_pred_diff   = (int)comp_pred_diff[COMPOUND_REFERENCE];
2283   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2284
2285   vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2286   vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
2287              sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2288 }
2289
2290 static void setup_pred_block(const MACROBLOCKD *xd,
2291                              struct buf_2d dst[MAX_MB_PLANE],
2292                              const YV12_BUFFER_CONFIG *src,
2293                              int mi_row, int mi_col,
2294                              const struct scale_factors *scale,
2295                              const struct scale_factors *scale_uv) {
2296   int i;
2297
2298   dst[0].buf = src->y_buffer;
2299   dst[0].stride = src->y_stride;
2300   dst[1].buf = src->u_buffer;
2301   dst[2].buf = src->v_buffer;
2302   dst[1].stride = dst[2].stride = src->uv_stride;
2303 #if CONFIG_ALPHA
2304   dst[3].buf = src->alpha_buffer;
2305   dst[3].stride = src->alpha_stride;
2306 #endif
2307
2308   // TODO(jkoleszar): Make scale factors per-plane data
2309   for (i = 0; i < MAX_MB_PLANE; i++) {
2310     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2311                      i ? scale_uv : scale,
2312                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2313   }
2314 }
2315
2316 void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2317                             const TileInfo *const tile,
2318                             MV_REFERENCE_FRAME ref_frame,
2319                             BLOCK_SIZE block_size,
2320                             int mi_row, int mi_col,
2321                             int_mv frame_nearest_mv[MAX_REF_FRAMES],
2322                             int_mv frame_near_mv[MAX_REF_FRAMES],
2323                             struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2324   const VP9_COMMON *cm = &cpi->common;
2325   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
2326   MACROBLOCKD *const xd = &x->e_mbd;
2327   MODE_INFO *const mi = xd->mi_8x8[0];
2328   int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
2329   const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
2330
2331   // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2332   // use the UV scaling factors.
2333   setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
2334
2335   // Gets an initial list of candidate vectors from neighbours and orders them
2336   vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col);
2337
2338   // Candidate refinement carried out at encoder and decoder
2339   vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
2340                         &frame_nearest_mv[ref_frame],
2341                         &frame_near_mv[ref_frame]);
2342
2343   // Further refinement that is encode side only to test the top few candidates
2344   // in full and choose the best as the centre point for subsequent searches.
2345   // The current implementation doesn't support scaling.
2346   if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
2347     mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
2348             ref_frame, block_size);
2349 }
2350
2351 const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
2352                                                    int ref_frame) {
2353   const VP9_COMMON *const cm = &cpi->common;
2354   const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
2355   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
2356   return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
2357 }
2358
2359 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
2360   const MACROBLOCKD *const xd = &x->e_mbd;
2361   const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
2362   const int ctx = vp9_get_pred_context_switchable_interp(xd);
2363   return SWITCHABLE_INTERP_RATE_FACTOR *
2364              x->switchable_interp_costs[ctx][mbmi->interp_filter];
2365 }
2366
2367 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2368                                  const TileInfo *const tile,
2369                                  BLOCK_SIZE bsize,
2370                                  int mi_row, int mi_col,
2371                                  int_mv *tmp_mv, int *rate_mv) {
2372   MACROBLOCKD *xd = &x->e_mbd;
2373   VP9_COMMON *cm = &cpi->common;
2374   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2375   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2376   int bestsme = INT_MAX;
2377   int further_steps, step_param;
2378   int sadpb = x->sadperbit16;
2379   MV mvp_full;
2380   int ref = mbmi->ref_frame[0];
2381   MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
2382
2383   int tmp_col_min = x->mv_col_min;
2384   int tmp_col_max = x->mv_col_max;
2385   int tmp_row_min = x->mv_row_min;
2386   int tmp_row_max = x->mv_row_max;
2387
2388   const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
2389                                                                         ref);
2390
2391   MV pred_mv[3];
2392   pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
2393   pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
2394   pred_mv[2] = x->pred_mv[ref].as_mv;
2395
2396   if (scaled_ref_frame) {
2397     int i;
2398     // Swap out the reference frame for a version that's been scaled to
2399     // match the resolution of the current frame, allowing the existing
2400     // motion search code to be used without additional modifications.
2401     for (i = 0; i < MAX_MB_PLANE; i++)
2402       backup_yv12[i] = xd->plane[i].pre[0];
2403
2404     vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2405   }
2406
2407   vp9_set_mv_search_range(x, &ref_mv);
2408
2409   // Work out the size of the first step in the mv step search.
2410   // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2411   if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
2412     // Take wtd average of the step_params based on the last frame's
2413     // max mv magnitude and that based on the best ref mvs of the current
2414     // block for the given reference.
2415     step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2416                   cpi->mv_step_param) >> 1;
2417   } else {
2418     step_param = cpi->mv_step_param;
2419   }
2420
2421   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
2422       cpi->common.show_frame) {
2423     int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
2424                                                        b_width_log2(bsize)));
2425     step_param = MAX(step_param, boffset);
2426   }
2427
2428   if (cpi->sf.adaptive_motion_search) {
2429     int bwl = b_width_log2_lookup[bsize];
2430     int bhl = b_height_log2_lookup[bsize];
2431     int i;
2432     int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
2433
2434     if (tlevel < 5)
2435       step_param += 2;
2436
2437     for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) {
2438       if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
2439         x->pred_mv[ref].as_int = 0;
2440         tmp_mv->as_int = INVALID_MV;
2441
2442         if (scaled_ref_frame) {
2443           int i;
2444           for (i = 0; i < MAX_MB_PLANE; i++)
2445             xd->plane[i].pre[0] = backup_yv12[i];
2446         }
2447         return;
2448       }
2449     }
2450   }
2451
2452   mvp_full = pred_mv[x->mv_best_ref_index[ref]];
2453
2454   mvp_full.col >>= 3;
2455   mvp_full.row >>= 3;
2456
2457   // Further step/diamond searches as necessary
2458   further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2459
2460   if (cpi->sf.search_method == FAST_DIAMOND) {
2461     bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0,
2462                                   &cpi->fn_ptr[bsize], 1,
2463                                   &ref_mv, &tmp_mv->as_mv);
2464     if (bestsme < INT_MAX)
2465       bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2466                                    &cpi->fn_ptr[bsize], 1);
2467   } else if (cpi->sf.search_method == FAST_HEX) {
2468     bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
2469                                   &cpi->fn_ptr[bsize], 1,
2470                                   &ref_mv, &tmp_mv->as_mv);
2471     if (bestsme < INT_MAX)
2472       bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2473                                    &cpi->fn_ptr[bsize], 1);
2474   } else if (cpi->sf.search_method == HEX) {
2475     bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
2476                              &cpi->fn_ptr[bsize], 1,
2477                              &ref_mv, &tmp_mv->as_mv);
2478     if (bestsme < INT_MAX)
2479       bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2480                                    &cpi->fn_ptr[bsize], 1);
2481   } else if (cpi->sf.search_method == SQUARE) {
2482     bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
2483                                 &cpi->fn_ptr[bsize], 1,
2484                                 &ref_mv, &tmp_mv->as_mv);
2485     if (bestsme < INT_MAX)
2486       bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2487                                    &cpi->fn_ptr[bsize], 1);
2488   } else if (cpi->sf.search_method == BIGDIA) {
2489     bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
2490                                 &cpi->fn_ptr[bsize], 1,
2491                                 &ref_mv, &tmp_mv->as_mv);
2492     if (bestsme < INT_MAX)
2493       bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
2494                                    &cpi->fn_ptr[bsize], 1);
2495   } else {
2496     bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2497                                      sadpb, further_steps, 1,
2498                                      &cpi->fn_ptr[bsize],
2499                                      &ref_mv, &tmp_mv->as_mv);
2500   }
2501
2502   x->mv_col_min = tmp_col_min;
2503   x->mv_col_max = tmp_col_max;
2504   x->mv_row_min = tmp_row_min;
2505   x->mv_row_max = tmp_row_max;
2506
2507   if (bestsme < INT_MAX) {
2508     int dis;  /* TODO: use dis in distortion calculation later. */
2509     cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
2510                                  cm->allow_high_precision_mv,
2511                                  x->errorperbit,
2512                                  &cpi->fn_ptr[bsize],
2513                                  cpi->sf.subpel_force_stop,
2514                                  cpi->sf.subpel_iters_per_step,
2515                                  x->nmvjointcost, x->mvcost,
2516                                  &dis, &x->pred_sse[ref]);
2517   }
2518   *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
2519                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2520
2521   if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
2522     x->pred_mv[ref].as_int = tmp_mv->as_int;
2523
2524   if (scaled_ref_frame) {
2525     int i;
2526     for (i = 0; i < MAX_MB_PLANE; i++)
2527       xd->plane[i].pre[0] = backup_yv12[i];
2528   }
2529 }
2530
2531 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2532                                 BLOCK_SIZE bsize,
2533                                 int_mv *frame_mv,
2534                                 int mi_row, int mi_col,
2535                                 int_mv single_newmv[MAX_REF_FRAMES],
2536                                 int *rate_mv) {
2537   const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
2538   const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
2539   MACROBLOCKD *xd = &x->e_mbd;
2540   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2541   const int refs[2] = { mbmi->ref_frame[0],
2542                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
2543   int_mv ref_mv[2];
2544   int ite, ref;
2545   // Prediction buffer from second frame.
2546   uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2547
2548   // Do joint motion search in compound mode to get more accurate mv.
2549   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2550   struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
2551   int last_besterr[2] = {INT_MAX, INT_MAX};
2552   const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2553     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2554     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2555   };
2556
2557   for (ref = 0; ref < 2; ++ref) {
2558     ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
2559
2560     if (scaled_ref_frame[ref]) {
2561       int i;
2562       // Swap out the reference frame for a version that's been scaled to
2563       // match the resolution of the current frame, allowing the existing
2564       // motion search code to be used without additional modifications.
2565       for (i = 0; i < MAX_MB_PLANE; i++)
2566         backup_yv12[ref][i] = xd->plane[i].pre[ref];
2567       vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
2568                            NULL);
2569     }
2570
2571     frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2572   }
2573
2574   // Allow joint search multiple times iteratively for each ref frame
2575   // and break out the search loop if it couldn't find better mv.
2576   for (ite = 0; ite < 4; ite++) {
2577     struct buf_2d ref_yv12[2];
2578     int bestsme = INT_MAX;
2579     int sadpb = x->sadperbit16;
2580     int_mv tmp_mv;
2581     int search_range = 3;
2582
2583     int tmp_col_min = x->mv_col_min;
2584     int tmp_col_max = x->mv_col_max;
2585     int tmp_row_min = x->mv_row_min;
2586     int tmp_row_max = x->mv_row_max;
2587     int id = ite % 2;
2588
2589     // Initialized here because of compiler problem in Visual Studio.
2590     ref_yv12[0] = xd->plane[0].pre[0];
2591     ref_yv12[1] = xd->plane[0].pre[1];
2592
2593     // Get pred block from second frame.
2594     vp9_build_inter_predictor(ref_yv12[!id].buf,
2595                               ref_yv12[!id].stride,
2596                               second_pred, pw,
2597                               &frame_mv[refs[!id]].as_mv,
2598                               &xd->block_refs[!id]->sf,
2599                               pw, ph, 0,
2600                               xd->interp_kernel, MV_PRECISION_Q3,
2601                               mi_col * MI_SIZE, mi_row * MI_SIZE);
2602
2603     // Compound motion search on first ref frame.
2604     if (id)
2605       xd->plane[0].pre[0] = ref_yv12[id];
2606     vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
2607
2608     // Use mv result from single mode as mvp.
2609     tmp_mv.as_int = frame_mv[refs[id]].as_int;
2610
2611     tmp_mv.as_mv.col >>= 3;
2612     tmp_mv.as_mv.row >>= 3;
2613
2614     // Small-range full-pixel motion search
2615     bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb,
2616                                        search_range,
2617                                        &cpi->fn_ptr[bsize],
2618                                        x->nmvjointcost, x->mvcost,
2619                                        &ref_mv[id].as_mv, second_pred,
2620                                        pw, ph);
2621     if (bestsme < INT_MAX)
2622       bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv,
2623                                       second_pred, &cpi->fn_ptr[bsize], 1);
2624
2625     x->mv_col_min = tmp_col_min;
2626     x->mv_col_max = tmp_col_max;
2627     x->mv_row_min = tmp_row_min;
2628     x->mv_row_max = tmp_row_max;
2629
2630     if (bestsme < INT_MAX) {
2631       int dis; /* TODO: use dis in distortion calculation later. */
2632       unsigned int sse;
2633       bestsme = cpi->find_fractional_mv_step_comp(
2634           x, &tmp_mv.as_mv,
2635           &ref_mv[id].as_mv,
2636           cpi->common.allow_high_precision_mv,
2637           x->errorperbit,
2638           &cpi->fn_ptr[bsize],
2639           0, cpi->sf.subpel_iters_per_step,
2640           x->nmvjointcost, x->mvcost,
2641           &dis, &sse, second_pred,
2642           pw, ph);
2643     }
2644
2645     if (id)
2646       xd->plane[0].pre[0] = scaled_first_yv12;
2647
2648     if (bestsme < last_besterr[id]) {
2649       frame_mv[refs[id]].as_int = tmp_mv.as_int;
2650       last_besterr[id] = bestsme;
2651     } else {
2652       break;
2653     }
2654   }
2655
2656   *rate_mv = 0;
2657
2658   for (ref = 0; ref < 2; ++ref) {
2659     if (scaled_ref_frame[ref]) {
2660       // restore the predictor
2661       int i;
2662       for (i = 0; i < MAX_MB_PLANE; i++)
2663         xd->plane[i].pre[ref] = backup_yv12[ref][i];
2664     }
2665
2666     *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2667                                 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2668                                 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2669   }
2670
2671   vpx_free(second_pred);
2672 }
2673
2674 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2675                                    uint8_t *orig_dst[MAX_MB_PLANE],
2676                                    int orig_dst_stride[MAX_MB_PLANE]) {
2677   int i;
2678   for (i = 0; i < MAX_MB_PLANE; i++) {
2679     xd->plane[i].dst.buf = orig_dst[i];
2680     xd->plane[i].dst.stride = orig_dst_stride[i];
2681   }
2682 }
2683
2684 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2685                                  const TileInfo *const tile,
2686                                  BLOCK_SIZE bsize,
2687                                  int64_t txfm_cache[],
2688                                  int *rate2, int64_t *distortion,
2689                                  int *skippable,
2690                                  int *rate_y, int64_t *distortion_y,
2691                                  int *rate_uv, int64_t *distortion_uv,
2692                                  int *mode_excluded, int *disable_skip,
2693                                  INTERP_FILTER *best_filter,
2694                                  int_mv (*mode_mv)[MAX_REF_FRAMES],
2695                                  int mi_row, int mi_col,
2696                                  int_mv single_newmv[MAX_REF_FRAMES],
2697                                  int64_t *psse,
2698                                  const int64_t ref_best_rd) {
2699   VP9_COMMON *cm = &cpi->common;
2700   MACROBLOCKD *xd = &x->e_mbd;
2701   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2702   const int is_comp_pred = has_second_ref(mbmi);
2703   const int num_refs = is_comp_pred ? 2 : 1;
2704   const int this_mode = mbmi->mode;
2705   int_mv *frame_mv = mode_mv[this_mode];
2706   int i;
2707   int refs[2] = { mbmi->ref_frame[0],
2708     (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2709   int_mv cur_mv[2];
2710   int64_t this_rd = 0;
2711   DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2712   int pred_exists = 0;
2713   int intpel_mv;
2714   int64_t rd, best_rd = INT64_MAX;
2715   int best_needs_copy = 0;
2716   uint8_t *orig_dst[MAX_MB_PLANE];
2717   int orig_dst_stride[MAX_MB_PLANE];
2718   int rs = 0;
2719
2720   if (is_comp_pred) {
2721     if (frame_mv[refs[0]].as_int == INVALID_MV ||
2722         frame_mv[refs[1]].as_int == INVALID_MV)
2723       return INT64_MAX;
2724   }
2725
2726   if (this_mode == NEWMV) {
2727     int rate_mv;
2728     if (is_comp_pred) {
2729       // Initialize mv using single prediction mode result.
2730       frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2731       frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2732
2733       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2734         joint_motion_search(cpi, x, bsize, frame_mv,
2735                             mi_row, mi_col, single_newmv, &rate_mv);
2736       } else {
2737         rate_mv  = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2738                                    &mbmi->ref_mvs[refs[0]][0].as_mv,
2739                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2740         rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2741                                    &mbmi->ref_mvs[refs[1]][0].as_mv,
2742                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2743       }
2744       *rate2 += rate_mv;
2745     } else {
2746       int_mv tmp_mv;
2747       single_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
2748                            &tmp_mv, &rate_mv);
2749       if (tmp_mv.as_int == INVALID_MV)
2750         return INT64_MAX;
2751       *rate2 += rate_mv;
2752       frame_mv[refs[0]].as_int =
2753           xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2754       single_newmv[refs[0]].as_int = tmp_mv.as_int;
2755     }
2756   }
2757
2758   for (i = 0; i < num_refs; ++i) {
2759     cur_mv[i] = frame_mv[refs[i]];
2760     // Clip "next_nearest" so that it does not extend to far out of image
2761     if (this_mode != NEWMV)
2762       clamp_mv2(&cur_mv[i].as_mv, xd);
2763
2764     if (mv_check_bounds(x, &cur_mv[i].as_mv))
2765       return INT64_MAX;
2766     mbmi->mv[i].as_int = cur_mv[i].as_int;
2767   }
2768
2769   // do first prediction into the destination buffer. Do the next
2770   // prediction into a temporary buffer. Then keep track of which one
2771   // of these currently holds the best predictor, and use the other
2772   // one for future predictions. In the end, copy from tmp_buf to
2773   // dst if necessary.
2774   for (i = 0; i < MAX_MB_PLANE; i++) {
2775     orig_dst[i] = xd->plane[i].dst.buf;
2776     orig_dst_stride[i] = xd->plane[i].dst.stride;
2777   }
2778
2779   /* We don't include the cost of the second reference here, because there
2780    * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2781    * words if you present them in that order, the second one is always known
2782    * if the first is known */
2783   *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]);
2784
2785   if (!(*mode_excluded))
2786     *mode_excluded = is_comp_pred ? cm->reference_mode == SINGLE_REFERENCE
2787                                   : cm->reference_mode == COMPOUND_REFERENCE;
2788
2789   pred_exists = 0;
2790   // Are all MVs integer pel for Y and UV
2791   intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
2792   if (is_comp_pred)
2793     intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
2794
2795   // Search for best switchable filter by checking the variance of
2796   // pred error irrespective of whether the filter will be used
2797   cpi->mask_filter_rd = 0;
2798   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2799     cpi->rd_filter_cache[i] = INT64_MAX;
2800
2801   if (cm->interp_filter != BILINEAR) {
2802     *best_filter = EIGHTTAP;
2803     if (x->source_variance <
2804         cpi->sf.disable_filter_search_var_thresh) {
2805       *best_filter = EIGHTTAP;
2806     } else {
2807       int newbest;
2808       int tmp_rate_sum = 0;
2809       int64_t tmp_dist_sum = 0;
2810
2811       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2812         int j;
2813         int64_t rs_rd;
2814         mbmi->interp_filter = i;
2815         xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
2816         rs = get_switchable_rate(x);
2817         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2818
2819         if (i > 0 && intpel_mv) {
2820           rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2821           cpi->rd_filter_cache[i] = rd;
2822           cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2823               MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2824           if (cm->interp_filter == SWITCHABLE)
2825             rd += rs_rd;
2826           cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
2827         } else {
2828           int rate_sum = 0;
2829           int64_t dist_sum = 0;
2830           if ((cm->interp_filter == SWITCHABLE &&
2831                (!i || best_needs_copy)) ||
2832               (cm->interp_filter != SWITCHABLE &&
2833                (cm->interp_filter == mbmi->interp_filter ||
2834                 (i == 0 && intpel_mv)))) {
2835             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2836           } else {
2837             for (j = 0; j < MAX_MB_PLANE; j++) {
2838               xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2839               xd->plane[j].dst.stride = 64;
2840             }
2841           }
2842           vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2843           model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2844
2845           rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2846           cpi->rd_filter_cache[i] = rd;
2847           cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2848               MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2849           if (cm->interp_filter == SWITCHABLE)
2850             rd += rs_rd;
2851           cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
2852
2853           if (i == 0 && intpel_mv) {
2854             tmp_rate_sum = rate_sum;
2855             tmp_dist_sum = dist_sum;
2856           }
2857         }
2858
2859         if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2860           if (rd / 2 > ref_best_rd) {
2861             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2862             return INT64_MAX;
2863           }
2864         }
2865         newbest = i == 0 || rd < best_rd;
2866
2867         if (newbest) {
2868           best_rd = rd;
2869           *best_filter = mbmi->interp_filter;
2870           if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2871             best_needs_copy = !best_needs_copy;
2872         }
2873
2874         if ((cm->interp_filter == SWITCHABLE && newbest) ||
2875             (cm->interp_filter != SWITCHABLE &&
2876              cm->interp_filter == mbmi->interp_filter)) {
2877           pred_exists = 1;
2878         }
2879       }
2880       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2881     }
2882   }
2883   // Set the appropriate filter
2884   mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2885       cm->interp_filter : *best_filter;
2886   xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
2887   rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0;
2888
2889   if (pred_exists) {
2890     if (best_needs_copy) {
2891       // again temporarily set the buffers to local memory to prevent a memcpy
2892       for (i = 0; i < MAX_MB_PLANE; i++) {
2893         xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2894         xd->plane[i].dst.stride = 64;
2895       }
2896     }
2897   } else {
2898     // Handles the special case when a filter that is not in the
2899     // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2900     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2901   }
2902
2903   if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2904     int tmp_rate;
2905     int64_t tmp_dist;
2906     model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2907     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2908     // if current pred_error modeled rd is substantially more than the best
2909     // so far, do not bother doing full rd
2910     if (rd / 2 > ref_best_rd) {
2911       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2912       return INT64_MAX;
2913     }
2914   }
2915
2916   if (cm->interp_filter == SWITCHABLE)
2917     *rate2 += get_switchable_rate(x);
2918
2919   if (!is_comp_pred) {
2920     if (!x->in_active_map) {
2921       if (psse)
2922         *psse = 0;
2923       *distortion = 0;
2924       x->skip = 1;
2925     } else if (cpi->allow_encode_breakout && x->encode_breakout) {
2926       const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2927       const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2928       unsigned int var, sse;
2929       // Skipping threshold for ac.
2930       unsigned int thresh_ac;
2931       // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2932       // Use extreme low threshold for static frames to limit skipping.
2933       const unsigned int max_thresh = (cpi->allow_encode_breakout ==
2934                                       ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
2935       // The encode_breakout input
2936       const unsigned int min_thresh =
2937           MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
2938
2939       // Calculate threshold according to dequant value.
2940       thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2941       thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
2942
2943       var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2944                                    xd->plane[0].dst.buf,
2945                                    xd->plane[0].dst.stride, &sse);
2946
2947       // Adjust threshold according to partition size.
2948       thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2949           b_height_log2_lookup[bsize]);
2950
2951       // Y skipping condition checking
2952       if (sse < thresh_ac || sse == 0) {
2953         // Skipping threshold for dc
2954         unsigned int thresh_dc;
2955
2956         thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2957
2958         // dc skipping checking
2959         if ((sse - var) < thresh_dc || sse == var) {
2960           unsigned int sse_u, sse_v;
2961           unsigned int var_u, var_v;
2962
2963           var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2964                                           x->plane[1].src.stride,
2965                                           xd->plane[1].dst.buf,
2966                                           xd->plane[1].dst.stride, &sse_u);
2967
2968           // U skipping condition checking
2969           if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2970               (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2971             var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2972                                             x->plane[2].src.stride,
2973                                             xd->plane[2].dst.buf,
2974                                             xd->plane[2].dst.stride, &sse_v);
2975
2976             // V skipping condition checking
2977             if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2978                 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2979               x->skip = 1;
2980
2981               // The cost of skip bit needs to be added.
2982               *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2983
2984               // Scaling factor for SSE from spatial domain to frequency domain
2985               // is 16. Adjust distortion accordingly.
2986               *distortion_uv = (sse_u + sse_v) << 4;
2987               *distortion = (sse << 4) + *distortion_uv;
2988
2989               *disable_skip = 1;
2990               this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2991             }
2992           }
2993         }
2994       }
2995     }
2996   }
2997
2998   if (!x->skip) {
2999     int skippable_y, skippable_uv;
3000     int64_t sseuv = INT64_MAX;
3001     int64_t rdcosty = INT64_MAX;
3002
3003     // Y cost and distortion
3004     inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
3005                           bsize, txfm_cache, ref_best_rd);
3006
3007     if (*rate_y == INT_MAX) {
3008       *rate2 = INT_MAX;
3009       *distortion = INT64_MAX;
3010       restore_dst_buf(xd, orig_dst, orig_dst_stride);
3011       return INT64_MAX;
3012     }
3013
3014     *rate2 += *rate_y;
3015     *distortion += *distortion_y;
3016
3017     rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
3018     rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
3019
3020     super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
3021                      bsize, ref_best_rd - rdcosty);
3022     if (*rate_uv == INT_MAX) {
3023       *rate2 = INT_MAX;
3024       *distortion = INT64_MAX;
3025       restore_dst_buf(xd, orig_dst, orig_dst_stride);
3026       return INT64_MAX;
3027     }
3028
3029     *psse += sseuv;
3030     *rate2 += *rate_uv;
3031     *distortion += *distortion_uv;
3032     *skippable = skippable_y && skippable_uv;
3033   }
3034
3035   restore_dst_buf(xd, orig_dst, orig_dst_stride);
3036   return this_rd;  // if 0, this will be re-calculated by caller
3037 }
3038
3039 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
3040                            int max_plane) {
3041   struct macroblock_plane *const p = x->plane;
3042   struct macroblockd_plane *const pd = x->e_mbd.plane;
3043   int i;
3044
3045   for (i = 0; i < max_plane; ++i) {
3046     p[i].coeff    = ctx->coeff_pbuf[i][1];
3047     p[i].qcoeff  = ctx->qcoeff_pbuf[i][1];
3048     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
3049     p[i].eobs    = ctx->eobs_pbuf[i][1];
3050
3051     ctx->coeff_pbuf[i][1]   = ctx->coeff_pbuf[i][0];
3052     ctx->qcoeff_pbuf[i][1]  = ctx->qcoeff_pbuf[i][0];
3053     ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
3054     ctx->eobs_pbuf[i][1]    = ctx->eobs_pbuf[i][0];
3055
3056     ctx->coeff_pbuf[i][0]   = p[i].coeff;
3057     ctx->qcoeff_pbuf[i][0]  = p[i].qcoeff;
3058     ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
3059     ctx->eobs_pbuf[i][0]    = p[i].eobs;
3060   }
3061 }
3062
3063 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3064                                int *returnrate, int64_t *returndist,
3065                                BLOCK_SIZE bsize,
3066                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3067   VP9_COMMON *const cm = &cpi->common;
3068   MACROBLOCKD *const xd = &x->e_mbd;
3069   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3070   int y_skip = 0, uv_skip = 0;
3071   int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
3072   TX_SIZE max_uv_tx_size;
3073   x->skip_encode = 0;
3074   ctx->skip = 0;
3075   xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
3076
3077   if (bsize >= BLOCK_8X8) {
3078     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3079                                &dist_y, &y_skip, bsize, tx_cache,
3080                                best_rd) >= best_rd) {
3081       *returnrate = INT_MAX;
3082       return;
3083     }
3084     max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
3085     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3086                             &dist_uv, &uv_skip, bsize, max_uv_tx_size);
3087   } else {
3088     y_skip = 0;
3089     if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3090                                      &dist_y, best_rd) >= best_rd) {
3091       *returnrate = INT_MAX;
3092       return;
3093     }
3094     max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
3095     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3096                             &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
3097   }
3098
3099   if (y_skip && uv_skip) {
3100     *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3101                   vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3102     *returndist = dist_y + dist_uv;
3103     vp9_zero(ctx->tx_rd_diff);
3104   } else {
3105     int i;
3106     *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3107     *returndist = dist_y + dist_uv;
3108     if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3109       for (i = 0; i < TX_MODES; i++) {
3110         if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
3111           ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3112         else
3113           ctx->tx_rd_diff[i] = 0;
3114       }
3115   }
3116
3117   ctx->mic = *xd->mi_8x8[0];
3118 }
3119
3120 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3121                                   const TileInfo *const tile,
3122                                   int mi_row, int mi_col,
3123                                   int *returnrate,
3124                                   int64_t *returndistortion,
3125                                   BLOCK_SIZE bsize,
3126                                   PICK_MODE_CONTEXT *ctx,
3127                                   int64_t best_rd_so_far) {
3128   VP9_COMMON *const cm = &cpi->common;
3129   MACROBLOCKD *const xd = &x->e_mbd;
3130   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
3131   const struct segmentation *const seg = &cm->seg;
3132   MB_PREDICTION_MODE this_mode;
3133   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3134   unsigned char segment_id = mbmi->segment_id;
3135   int comp_pred, i;
3136   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3137   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3138   int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3139   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3140                                     VP9_ALT_FLAG };
3141   int64_t best_rd = best_rd_so_far;
3142   int64_t best_tx_rd[TX_MODES];
3143   int64_t best_tx_diff[TX_MODES];
3144   int64_t best_pred_diff[REFERENCE_MODES];
3145   int64_t best_pred_rd[REFERENCE_MODES];
3146   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3147   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3148   MB_MODE_INFO best_mbmode = { 0 };
3149   int mode_index, best_mode_index = 0;
3150   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3151   vp9_prob comp_mode_p;
3152   int64_t best_intra_rd = INT64_MAX;
3153   int64_t best_inter_rd = INT64_MAX;
3154   MB_PREDICTION_MODE best_intra_mode = DC_PRED;
3155   MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3156   INTERP_FILTER tmp_best_filter = SWITCHABLE;
3157   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3158   int64_t dist_uv[TX_SIZES];
3159   int skip_uv[TX_SIZES];
3160   MB_PREDICTION_MODE mode_uv[TX_SIZES];
3161   int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3162   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3163   const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
3164   const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
3165   int best_skip2 = 0;
3166   int mode_skip_mask = 0;
3167   int mode_skip_start = cpi->sf.mode_skip_start + 1;
3168   const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
3169   const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
3170   const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
3171   const int intra_y_mode_mask =
3172       cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3173   int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
3174
3175   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3176
3177   estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3178                            &comp_mode_p);
3179
3180   for (i = 0; i < REFERENCE_MODES; ++i)
3181     best_pred_rd[i] = INT64_MAX;
3182   for (i = 0; i < TX_MODES; i++)
3183     best_tx_rd[i] = INT64_MAX;
3184   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3185     best_filter_rd[i] = INT64_MAX;
3186   for (i = 0; i < TX_SIZES; i++)
3187     rate_uv_intra[i] = INT_MAX;
3188   for (i = 0; i < MAX_REF_FRAMES; ++i)
3189     x->pred_sse[i] = INT_MAX;
3190
3191   *returnrate = INT_MAX;
3192
3193   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3194     x->pred_mv_sad[ref_frame] = INT_MAX;
3195     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3196       vp9_setup_buffer_inter(cpi, x, tile,
3197                              ref_frame, bsize, mi_row, mi_col,
3198                              frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3199     }
3200     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3201     frame_mv[ZEROMV][ref_frame].as_int = 0;
3202   }
3203
3204   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3205     // All modes from vp9_mode_order that use this frame as any ref
3206     static const int ref_frame_mask_all[] = {
3207         0x0, 0x123291, 0x25c444, 0x39b722
3208     };
3209     // Fixed mv modes (NEARESTMV, NEARMV, ZEROMV) from vp9_mode_order that use
3210     // this frame as their primary ref
3211     static const int ref_frame_mask_fixedmv[] = {
3212         0x0, 0x121281, 0x24c404, 0x080102
3213     };
3214     if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
3215       // Skip modes for missing references
3216       mode_skip_mask |= ref_frame_mask_all[ref_frame];
3217     } else if (cpi->sf.reference_masking) {
3218       for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3219         // Skip fixed mv modes for poor references
3220         if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3221           mode_skip_mask |= ref_frame_mask_fixedmv[ref_frame];
3222           break;
3223         }
3224       }
3225     }
3226     // If the segment reference frame feature is enabled....
3227     // then do nothing if the current ref frame is not allowed..
3228     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3229         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3230       mode_skip_mask |= ref_frame_mask_all[ref_frame];
3231     }
3232   }
3233
3234   // If the segment skip feature is enabled....
3235   // then do nothing if the current mode is not allowed..
3236   if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
3237     const int inter_non_zero_mode_mask = 0x1F7F7;
3238     mode_skip_mask |= inter_non_zero_mode_mask;
3239   }
3240
3241   // Disable this drop out case if the ref frame
3242   // segment level feature is enabled for this segment. This is to
3243   // prevent the possibility that we end up unable to pick any mode.
3244   if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3245     // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3246     // unless ARNR filtering is enabled in which case we want
3247     // an unfiltered alternative. We allow near/nearest as well
3248     // because they may result in zero-zero MVs but be cheaper.
3249     if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3250       const int altref_zero_mask =
3251           ~((1 << THR_NEARESTA) | (1 << THR_NEARA) | (1 << THR_ZEROA));
3252       mode_skip_mask |= altref_zero_mask;
3253       if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
3254         mode_skip_mask |= (1 << THR_NEARA);
3255       if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
3256         mode_skip_mask |= (1 << THR_NEARESTA);
3257     }
3258   }
3259
3260   // TODO(JBB): This is to make up for the fact that we don't have sad
3261   // functions that work when the block size reads outside the umv.  We
3262   // should fix this either by making the motion search just work on
3263   // a representative block in the boundary ( first ) and then implement a
3264   // function that does sads when inside the border..
3265   if ((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) {
3266     const int new_modes_mask =
3267         (1 << THR_NEWMV) | (1 << THR_NEWG) | (1 << THR_NEWA) |
3268         (1 << THR_COMP_NEWLA) | (1 << THR_COMP_NEWGA);
3269     mode_skip_mask |= new_modes_mask;
3270   }
3271
3272   if (bsize > cpi->sf.max_intra_bsize) {
3273     mode_skip_mask |= 0xFF30808;
3274   }
3275
3276   if (!x->in_active_map) {
3277     int mode_index;
3278     assert(cpi->ref_frame_flags & VP9_LAST_FLAG);
3279     if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0)
3280       mode_index = THR_NEARESTMV;
3281     else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0)
3282       mode_index = THR_NEARMV;
3283     else
3284       mode_index = THR_ZEROMV;
3285     mode_skip_mask = ~(1 << mode_index);
3286     mode_skip_start = MAX_MODES;
3287     disable_inter_mode_mask = 0;
3288   }
3289
3290   for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3291     int mode_excluded = 0;
3292     int64_t this_rd = INT64_MAX;
3293     int disable_skip = 0;
3294     int compmode_cost = 0;
3295     int rate2 = 0, rate_y = 0, rate_uv = 0;
3296     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3297     int skippable = 0;
3298     int64_t tx_cache[TX_MODES];
3299     int i;
3300     int this_skip2 = 0;
3301     int64_t total_sse = INT64_MAX;
3302     int early_term = 0;
3303
3304     // Look at the reference frame of the best mode so far and set the
3305     // skip mask to look at a subset of the remaining modes.
3306     if (mode_index == mode_skip_start) {
3307       switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
3308         case INTRA_FRAME:
3309           break;
3310         case LAST_FRAME:
3311           mode_skip_mask |= LAST_FRAME_MODE_MASK;
3312           break;
3313         case GOLDEN_FRAME:
3314           mode_skip_mask |= GOLDEN_FRAME_MODE_MASK;
3315           break;
3316         case ALTREF_FRAME:
3317           mode_skip_mask |= ALT_REF_MODE_MASK;
3318           break;
3319         case NONE:
3320         case MAX_REF_FRAMES:
3321           assert(0 && "Invalid Reference frame");
3322       }
3323     }
3324     if (mode_skip_mask & (1 << mode_index))
3325       continue;
3326
3327     // Test best rd so far against threshold for trying this mode.
3328     if (best_rd < ((int64_t)rd_threshes[mode_index] *
3329                   rd_thresh_freq_fact[mode_index] >> 5) ||
3330         rd_threshes[mode_index] == INT_MAX)
3331      continue;
3332
3333     this_mode = vp9_mode_order[mode_index].mode;
3334     ref_frame = vp9_mode_order[mode_index].ref_frame[0];
3335     if (ref_frame != INTRA_FRAME &&
3336         disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode)))
3337       continue;
3338     second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
3339
3340     comp_pred = second_ref_frame > INTRA_FRAME;
3341     if (comp_pred) {
3342       if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3343           vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
3344         continue;
3345       if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
3346           ref_frame != best_inter_ref_frame &&
3347           second_ref_frame != best_inter_ref_frame)
3348         continue;
3349       mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3350     } else {
3351       if (ref_frame != INTRA_FRAME)
3352         mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3353     }
3354
3355     if (ref_frame == INTRA_FRAME) {
3356       if (!(intra_y_mode_mask & (1 << this_mode)))
3357         continue;
3358       if (this_mode != DC_PRED) {
3359         // Disable intra modes other than DC_PRED for blocks with low variance
3360         // Threshold for intra skipping based on source variance
3361         // TODO(debargha): Specialize the threshold for super block sizes
3362         const unsigned int skip_intra_var_thresh = 64;
3363         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3364             x->source_variance < skip_intra_var_thresh)
3365           continue;
3366         // Only search the oblique modes if the best so far is
3367         // one of the neighboring directional modes
3368         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3369             (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3370           if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
3371             continue;
3372         }
3373         if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3374           if (conditional_skipintra(this_mode, best_intra_mode))
3375               continue;
3376         }
3377       }
3378     } else {
3379       if (x->in_active_map &&
3380           !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
3381         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
3382                                 disable_inter_mode_mask, this_mode, ref_frame,
3383                                 second_ref_frame))
3384           continue;
3385     }
3386
3387     mbmi->mode = this_mode;
3388     mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode;
3389     mbmi->ref_frame[0] = ref_frame;
3390     mbmi->ref_frame[1] = second_ref_frame;
3391     // Evaluate all sub-pel filters irrespective of whether we can use
3392     // them for this frame.
3393     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3394                                                           : cm->interp_filter;
3395     x->skip = 0;
3396     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3397     xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
3398
3399     // Select prediction reference frames.
3400     for (i = 0; i < MAX_MB_PLANE; i++) {
3401       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3402       if (comp_pred)
3403         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3404     }
3405
3406     for (i = 0; i < TX_MODES; ++i)
3407       tx_cache[i] = INT64_MAX;
3408
3409 #ifdef MODE_TEST_HIT_STATS
3410     // TEST/DEBUG CODE
3411     // Keep a rcord of the number of test hits at each size
3412     cpi->mode_test_hits[bsize]++;
3413 #endif
3414
3415     if (ref_frame == INTRA_FRAME) {
3416       TX_SIZE uv_tx;
3417       intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3418                             bsize, tx_cache, best_rd);
3419
3420       if (rate_y == INT_MAX)
3421         continue;
3422
3423       uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize);
3424       if (rate_uv_intra[uv_tx] == INT_MAX) {
3425         choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
3426                              &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
3427                              &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
3428       }
3429
3430       rate_uv = rate_uv_tokenonly[uv_tx];
3431       distortion_uv = dist_uv[uv_tx];
3432       skippable = skippable && skip_uv[uv_tx];
3433       mbmi->uv_mode = mode_uv[uv_tx];
3434
3435       rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3436       if (this_mode != DC_PRED && this_mode != TM_PRED)
3437         rate2 += intra_cost_penalty;
3438       distortion2 = distortion_y + distortion_uv;
3439     } else {
3440       this_rd = handle_inter_mode(cpi, x, tile, bsize,
3441                                   tx_cache,
3442                                   &rate2, &distortion2, &skippable,
3443                                   &rate_y, &distortion_y,
3444                                   &rate_uv, &distortion_uv,
3445                                   &mode_excluded, &disable_skip,
3446                                   &tmp_best_filter, frame_mv,
3447                                   mi_row, mi_col,
3448                                   single_newmv, &total_sse, best_rd);
3449       if (this_rd == INT64_MAX)
3450         continue;
3451
3452       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3453
3454       if (cm->reference_mode == REFERENCE_MODE_SELECT)
3455         rate2 += compmode_cost;
3456     }
3457
3458     // Estimate the reference frame signaling cost and add it
3459     // to the rolling cost variable.
3460     if (comp_pred) {
3461       rate2 += ref_costs_comp[ref_frame];
3462     } else {
3463       rate2 += ref_costs_single[ref_frame];
3464     }
3465
3466     if (!disable_skip) {
3467       // Test for the condition where skip block will be activated
3468       // because there are no non zero coefficients and make any
3469       // necessary adjustment for rate. Ignore if skip is coded at
3470       // segment level as the cost wont have been added in.
3471       // Is Mb level skip allowed (i.e. not coded at segment level).
3472       const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3473                                                          SEG_LVL_SKIP);
3474
3475       if (skippable) {
3476         // Back out the coefficient coding costs
3477         rate2 -= (rate_y + rate_uv);
3478         // for best yrd calculation
3479         rate_uv = 0;
3480
3481         if (mb_skip_allowed) {
3482           int prob_skip_cost;
3483
3484           // Cost the skip mb case
3485           vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
3486           if (skip_prob) {
3487             prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3488             rate2 += prob_skip_cost;
3489           }
3490         }
3491       } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3492         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3493             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3494           // Add in the cost of the no skip flag.
3495           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3496         } else {
3497           // FIXME(rbultje) make this work for splitmv also
3498           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3499           distortion2 = total_sse;
3500           assert(total_sse >= 0);
3501           rate2 -= (rate_y + rate_uv);
3502           rate_y = 0;
3503           rate_uv = 0;
3504           this_skip2 = 1;
3505         }
3506       } else if (mb_skip_allowed) {
3507         // Add in the cost of the no skip flag.
3508         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3509       }
3510
3511       // Calculate the final RD estimate for this mode.
3512       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3513     }
3514
3515     if (ref_frame == INTRA_FRAME) {
3516     // Keep record of best intra rd
3517       if (this_rd < best_intra_rd) {
3518         best_intra_rd = this_rd;
3519         best_intra_mode = mbmi->mode;
3520       }
3521     } else {
3522       // Keep record of best inter rd with single reference
3523       if (!comp_pred && !mode_excluded && this_rd < best_inter_rd) {
3524         best_inter_rd = this_rd;
3525         best_inter_ref_frame = ref_frame;
3526       }
3527     }
3528
3529     if (!disable_skip && ref_frame == INTRA_FRAME) {
3530       for (i = 0; i < REFERENCE_MODES; ++i)
3531         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3532       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3533         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3534     }
3535
3536     // Store the respective mode distortions for later use.
3537     if (mode_distortions[this_mode] == -1
3538         || distortion2 < mode_distortions[this_mode]) {
3539       mode_distortions[this_mode] = distortion2;
3540     }
3541
3542     // Did this mode help.. i.e. is it the new best mode
3543     if (this_rd < best_rd || x->skip) {
3544       int max_plane = MAX_MB_PLANE;
3545       if (!mode_excluded) {
3546         // Note index of best mode so far
3547         best_mode_index = mode_index;
3548
3549         if (ref_frame == INTRA_FRAME) {
3550           /* required for left and above block mv */
3551           mbmi->mv[0].as_int = 0;
3552           max_plane = 1;
3553         }
3554
3555         *returnrate = rate2;
3556         *returndistortion = distortion2;
3557         best_rd = this_rd;
3558         best_mbmode = *mbmi;
3559         best_skip2 = this_skip2;
3560         if (!x->select_txfm_size)
3561           swap_block_ptr(x, ctx, max_plane);
3562         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3563                    sizeof(uint8_t) * ctx->num_4x4_blk);
3564
3565         // TODO(debargha): enhance this test with a better distortion prediction
3566         // based on qp, activity mask and history
3567         if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3568             (mode_index > MIN_EARLY_TERM_INDEX)) {
3569           const int qstep = xd->plane[0].dequant[1];
3570           // TODO(debargha): Enhance this by specializing for each mode_index
3571           int scale = 4;
3572           if (x->source_variance < UINT_MAX) {
3573             const int var_adjust = (x->source_variance < 16);
3574             scale -= var_adjust;
3575           }
3576           if (ref_frame > INTRA_FRAME &&
3577               distortion2 * scale < qstep * qstep) {
3578             early_term = 1;
3579           }
3580         }
3581       }
3582     }
3583
3584     /* keep record of best compound/single-only prediction */
3585     if (!disable_skip && ref_frame != INTRA_FRAME) {
3586       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3587
3588       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3589         single_rate = rate2 - compmode_cost;
3590         hybrid_rate = rate2;
3591       } else {
3592         single_rate = rate2;
3593         hybrid_rate = rate2 + compmode_cost;
3594       }
3595
3596       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3597       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3598
3599       if (!comp_pred) {
3600         if (single_rd < best_pred_rd[SINGLE_REFERENCE]) {
3601           best_pred_rd[SINGLE_REFERENCE] = single_rd;
3602         }
3603       } else {
3604         if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
3605           best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3606         }
3607       }
3608       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3609         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3610
3611       /* keep record of best filter type */
3612       if (!mode_excluded && cm->interp_filter != BILINEAR) {
3613         int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
3614                               SWITCHABLE_FILTERS : cm->interp_filter];
3615
3616         for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3617           int64_t adj_rd;
3618           if (ref == INT64_MAX)
3619             adj_rd = 0;
3620           else if (cpi->rd_filter_cache[i] == INT64_MAX)
3621             // when early termination is triggered, the encoder does not have
3622             // access to the rate-distortion cost. it only knows that the cost
3623             // should be above the maximum valid value. hence it takes the known
3624             // maximum plus an arbitrary constant as the rate-distortion cost.
3625             adj_rd = cpi->mask_filter_rd - ref + 10;
3626           else
3627             adj_rd = cpi->rd_filter_cache[i] - ref;
3628
3629           adj_rd += this_rd;
3630           best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3631         }
3632       }
3633     }
3634
3635     /* keep record of best txfm size */
3636     if (bsize < BLOCK_32X32) {
3637       if (bsize < BLOCK_16X16)
3638         tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3639
3640       tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3641     }
3642     if (!mode_excluded && this_rd != INT64_MAX) {
3643       for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3644         int64_t adj_rd = INT64_MAX;
3645         adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3646
3647         if (adj_rd < best_tx_rd[i])
3648           best_tx_rd[i] = adj_rd;
3649       }
3650     }
3651
3652     if (early_term)
3653       break;
3654
3655     if (x->skip && !comp_pred)
3656       break;
3657   }
3658
3659   if (best_rd >= best_rd_so_far)
3660     return INT64_MAX;
3661
3662   // If we used an estimate for the uv intra rd in the loop above...
3663   if (cpi->sf.use_uv_intra_rd_estimate) {
3664     // Do Intra UV best rd mode selection if best mode choice above was intra.
3665     if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
3666       TX_SIZE uv_tx_size;
3667       *mbmi = best_mbmode;
3668       uv_tx_size = get_uv_tx_size(mbmi);
3669       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3670                               &rate_uv_tokenonly[uv_tx_size],
3671                               &dist_uv[uv_tx_size],
3672                               &skip_uv[uv_tx_size],
3673                               bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3674                               uv_tx_size);
3675     }
3676   }
3677
3678   assert((cm->interp_filter == SWITCHABLE) ||
3679          (cm->interp_filter == best_mbmode.interp_filter) ||
3680          !is_inter_block(&best_mbmode));
3681
3682   // Updating rd_thresh_freq_fact[] here means that the different
3683   // partition/block sizes are handled independently based on the best
3684   // choice for the current partition. It may well be better to keep a scaled
3685   // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3686   // combination that wins out.
3687   if (cpi->sf.adaptive_rd_thresh) {
3688     for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3689       int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index];
3690
3691       if (mode_index == best_mode_index) {
3692         *fact -= (*fact >> 3);
3693       } else {
3694         *fact = MIN(*fact + RD_THRESH_INC,
3695                     cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
3696       }
3697     }
3698   }
3699
3700   // macroblock modes
3701   *mbmi = best_mbmode;
3702   x->skip |= best_skip2;
3703
3704   for (i = 0; i < REFERENCE_MODES; ++i) {
3705     if (best_pred_rd[i] == INT64_MAX)
3706       best_pred_diff[i] = INT_MIN;
3707     else
3708       best_pred_diff[i] = best_rd - best_pred_rd[i];
3709   }
3710
3711   if (!x->skip) {
3712     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3713       if (best_filter_rd[i] == INT64_MAX)
3714         best_filter_diff[i] = 0;
3715       else
3716         best_filter_diff[i] = best_rd - best_filter_rd[i];
3717     }
3718     if (cm->interp_filter == SWITCHABLE)
3719       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3720     for (i = 0; i < TX_MODES; i++) {
3721       if (best_tx_rd[i] == INT64_MAX)
3722         best_tx_diff[i] = 0;
3723       else
3724         best_tx_diff[i] = best_rd - best_tx_rd[i];
3725     }
3726   } else {
3727     vp9_zero(best_filter_diff);
3728     vp9_zero(best_tx_diff);
3729   }
3730
3731   if (!x->in_active_map) {
3732     assert(mbmi->ref_frame[0] == LAST_FRAME);
3733     assert(mbmi->ref_frame[1] == NONE);
3734     assert(mbmi->mode == NEARESTMV ||
3735            mbmi->mode == NEARMV ||
3736            mbmi->mode == ZEROMV);
3737     assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0);
3738     assert(mbmi->mode == mbmi->uv_mode);
3739   }
3740
3741   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3742   store_coding_context(x, ctx, best_mode_index,
3743                        &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3744                        &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3745                                       mbmi->ref_frame[1]][0],
3746                        best_pred_diff, best_tx_diff, best_filter_diff);
3747
3748   return best_rd;
3749 }
3750
3751
3752 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3753                                       const TileInfo *const tile,
3754                                       int mi_row, int mi_col,
3755                                       int *returnrate,
3756                                       int64_t *returndistortion,
3757                                       BLOCK_SIZE bsize,
3758                                       PICK_MODE_CONTEXT *ctx,
3759                                       int64_t best_rd_so_far) {
3760   VP9_COMMON *cm = &cpi->common;
3761   MACROBLOCKD *xd = &x->e_mbd;
3762   MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
3763   const struct segmentation *seg = &cm->seg;
3764   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3765   unsigned char segment_id = mbmi->segment_id;
3766   int comp_pred, i;
3767   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3768   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3769   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3770                                     VP9_ALT_FLAG };
3771   int64_t best_rd = best_rd_so_far;
3772   int64_t best_yrd = best_rd_so_far;  // FIXME(rbultje) more precise
3773   int64_t best_tx_rd[TX_MODES];
3774   int64_t best_tx_diff[TX_MODES];
3775   int64_t best_pred_diff[REFERENCE_MODES];
3776   int64_t best_pred_rd[REFERENCE_MODES];
3777   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3778   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3779   MB_MODE_INFO best_mbmode = { 0 };
3780   int mode_index, best_mode_index = 0;
3781   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3782   vp9_prob comp_mode_p;
3783   int64_t best_inter_rd = INT64_MAX;
3784   MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3785   INTERP_FILTER tmp_best_filter = SWITCHABLE;
3786   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3787   int64_t dist_uv[TX_SIZES];
3788   int skip_uv[TX_SIZES];
3789   MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 };
3790   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3791   int_mv seg_mvs[4][MAX_REF_FRAMES];
3792   b_mode_info best_bmodes[4];
3793   int best_skip2 = 0;
3794   int ref_frame_mask = 0;
3795   int mode_skip_mask = 0;
3796
3797   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3798   vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3799
3800   for (i = 0; i < 4; i++) {
3801     int j;
3802     for (j = 0; j < MAX_REF_FRAMES; j++)
3803       seg_mvs[i][j].as_int = INVALID_MV;
3804   }
3805
3806   estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3807                            &comp_mode_p);
3808
3809   for (i = 0; i < REFERENCE_MODES; ++i)
3810     best_pred_rd[i] = INT64_MAX;
3811   for (i = 0; i < TX_MODES; i++)
3812     best_tx_rd[i] = INT64_MAX;
3813   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3814     best_filter_rd[i] = INT64_MAX;
3815   for (i = 0; i < TX_SIZES; i++)
3816     rate_uv_intra[i] = INT_MAX;
3817
3818   *returnrate = INT_MAX;
3819
3820   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3821     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3822       vp9_setup_buffer_inter(cpi, x, tile,
3823                              ref_frame, bsize, mi_row, mi_col,
3824                              frame_mv[NEARESTMV], frame_mv[NEARMV],
3825                              yv12_mb);
3826     }
3827     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3828     frame_mv[ZEROMV][ref_frame].as_int = 0;
3829   }
3830
3831   for (ref_frame = LAST_FRAME;
3832        ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
3833     int i;
3834     for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3835       if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) {
3836         ref_frame_mask |= (1 << ref_frame);
3837         break;
3838       }
3839     }
3840   }
3841
3842   for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
3843     int mode_excluded = 0;
3844     int64_t this_rd = INT64_MAX;
3845     int disable_skip = 0;
3846     int compmode_cost = 0;
3847     int rate2 = 0, rate_y = 0, rate_uv = 0;
3848     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3849     int skippable = 0;
3850     int64_t tx_cache[TX_MODES];
3851     int i;
3852     int this_skip2 = 0;
3853     int64_t total_sse = INT_MAX;
3854     int early_term = 0;
3855
3856     for (i = 0; i < TX_MODES; ++i)
3857       tx_cache[i] = INT64_MAX;
3858
3859     x->skip = 0;
3860     ref_frame = vp9_ref_order[mode_index].ref_frame[0];
3861     second_ref_frame = vp9_ref_order[mode_index].ref_frame[1];
3862
3863     // Look at the reference frame of the best mode so far and set the
3864     // skip mask to look at a subset of the remaining modes.
3865     if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3866       if (mode_index == 3) {
3867         switch (vp9_ref_order[best_mode_index].ref_frame[0]) {
3868           case INTRA_FRAME:
3869             mode_skip_mask = 0;
3870             break;
3871           case LAST_FRAME:
3872             mode_skip_mask = 0x0010;
3873             break;
3874           case GOLDEN_FRAME:
3875             mode_skip_mask = 0x0008;
3876             break;
3877           case ALTREF_FRAME:
3878             mode_skip_mask = 0x0000;
3879             break;
3880           case NONE:
3881           case MAX_REF_FRAMES:
3882             assert(0 && "Invalid Reference frame");
3883         }
3884       }
3885       if (mode_skip_mask & (1 << mode_index))
3886         continue;
3887     }
3888
3889     // Test best rd so far against threshold for trying this mode.
3890     if ((best_rd <
3891          ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] *
3892           cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
3893         cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
3894       continue;
3895
3896     // Do not allow compound prediction if the segment level reference
3897     // frame feature is in use as in this case there can only be one reference.
3898     if ((second_ref_frame > INTRA_FRAME) &&
3899          vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3900       continue;
3901
3902     mbmi->ref_frame[0] = ref_frame;
3903     mbmi->ref_frame[1] = second_ref_frame;
3904
3905     if (!(ref_frame == INTRA_FRAME
3906         || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3907       continue;
3908     }
3909     if (!(second_ref_frame == NONE
3910         || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3911       continue;
3912     }
3913
3914     comp_pred = second_ref_frame > INTRA_FRAME;
3915     if (comp_pred) {
3916       if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3917         if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
3918           continue;
3919       if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3920         if (ref_frame != best_inter_ref_frame &&
3921             second_ref_frame != best_inter_ref_frame)
3922           continue;
3923     }
3924
3925     // TODO(jingning, jkoleszar): scaling reference frame not supported for
3926     // sub8x8 blocks.
3927     if (ref_frame > 0 && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3928       continue;
3929
3930     if (second_ref_frame > 0 &&
3931         vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
3932       continue;
3933
3934     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3935     mbmi->uv_mode = DC_PRED;
3936
3937     // Evaluate all sub-pel filters irrespective of whether we can use
3938     // them for this frame.
3939     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3940                                                           : cm->interp_filter;
3941     xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
3942
3943     if (comp_pred) {
3944       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3945         continue;
3946
3947       mode_excluded = mode_excluded ? mode_excluded
3948                                     : cm->reference_mode == SINGLE_REFERENCE;
3949     } else {
3950       if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
3951         mode_excluded = mode_excluded ?
3952             mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
3953       }
3954     }
3955
3956     // Select prediction reference frames.
3957     for (i = 0; i < MAX_MB_PLANE; i++) {
3958       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3959       if (comp_pred)
3960         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3961     }
3962
3963     // If the segment reference frame feature is enabled....
3964     // then do nothing if the current ref frame is not allowed..
3965     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3966         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3967             (int)ref_frame) {
3968       continue;
3969     // If the segment skip feature is enabled....
3970     // then do nothing if the current mode is not allowed..
3971     } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3972                ref_frame != INTRA_FRAME) {
3973       continue;
3974     // Disable this drop out case if the ref frame
3975     // segment level feature is enabled for this segment. This is to
3976     // prevent the possibility that we end up unable to pick any mode.
3977     } else if (!vp9_segfeature_active(seg, segment_id,
3978                                       SEG_LVL_REF_FRAME)) {
3979       // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3980       // unless ARNR filtering is enabled in which case we want
3981       // an unfiltered alternative. We allow near/nearest as well
3982       // because they may result in zero-zero MVs but be cheaper.
3983       if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3984         continue;
3985     }
3986
3987 #ifdef MODE_TEST_HIT_STATS
3988     // TEST/DEBUG CODE
3989     // Keep a rcord of the number of test hits at each size
3990     cpi->mode_test_hits[bsize]++;
3991 #endif
3992
3993     if (ref_frame == INTRA_FRAME) {
3994       int rate;
3995       mbmi->tx_size = TX_4X4;
3996       if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3997                                        &distortion_y, best_rd) >= best_rd)
3998         continue;
3999       rate2 += rate;
4000       rate2 += intra_cost_penalty;
4001       distortion2 += distortion_y;
4002
4003       if (rate_uv_intra[TX_4X4] == INT_MAX) {
4004         choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
4005                              &rate_uv_intra[TX_4X4],
4006                              &rate_uv_tokenonly[TX_4X4],
4007                              &dist_uv[TX_4X4], &skip_uv[TX_4X4],
4008                              &mode_uv[TX_4X4]);
4009       }
4010       rate2 += rate_uv_intra[TX_4X4];
4011       rate_uv = rate_uv_tokenonly[TX_4X4];
4012       distortion2 += dist_uv[TX_4X4];
4013       distortion_uv = dist_uv[TX_4X4];
4014       mbmi->uv_mode = mode_uv[TX_4X4];
4015       tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4016       for (i = 0; i < TX_MODES; ++i)
4017         tx_cache[i] = tx_cache[ONLY_4X4];
4018     } else {
4019       int rate;
4020       int64_t distortion;
4021       int64_t this_rd_thresh;
4022       int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
4023       int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
4024       int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
4025       int tmp_best_skippable = 0;
4026       int switchable_filter_index;
4027       int_mv *second_ref = comp_pred ?
4028                              &mbmi->ref_mvs[second_ref_frame][0] : NULL;
4029       b_mode_info tmp_best_bmodes[16];
4030       MB_MODE_INFO tmp_best_mbmode;
4031       BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
4032       int pred_exists = 0;
4033       int uv_skippable;
4034
4035       this_rd_thresh = (ref_frame == LAST_FRAME) ?
4036           cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] :
4037           cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
4038       this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4039           cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4040       xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
4041
4042       cpi->mask_filter_rd = 0;
4043       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4044         cpi->rd_filter_cache[i] = INT64_MAX;
4045
4046       if (cm->interp_filter != BILINEAR) {
4047         tmp_best_filter = EIGHTTAP;
4048         if (x->source_variance <
4049             cpi->sf.disable_filter_search_var_thresh) {
4050           tmp_best_filter = EIGHTTAP;
4051         } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
4052                    ctx->pred_interp_filter < SWITCHABLE) {
4053           tmp_best_filter = ctx->pred_interp_filter;
4054         } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
4055           tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
4056                               ctx->pred_interp_filter : 0;
4057         } else {
4058           for (switchable_filter_index = 0;
4059                switchable_filter_index < SWITCHABLE_FILTERS;
4060                ++switchable_filter_index) {
4061             int newbest, rs;
4062             int64_t rs_rd;
4063             mbmi->interp_filter = switchable_filter_index;
4064             xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
4065             tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4066                                                  &mbmi->ref_mvs[ref_frame][0],
4067                                                  second_ref,
4068                                                  best_yrd,
4069                                                  &rate, &rate_y, &distortion,
4070                                                  &skippable, &total_sse,
4071                                                  (int)this_rd_thresh, seg_mvs,
4072                                                  bsi, switchable_filter_index,
4073                                                  mi_row, mi_col);
4074
4075             if (tmp_rd == INT64_MAX)
4076               continue;
4077             rs = get_switchable_rate(x);
4078             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4079             cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
4080             cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
4081                 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
4082                     tmp_rd + rs_rd);
4083             if (cm->interp_filter == SWITCHABLE)
4084               tmp_rd += rs_rd;
4085
4086             cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd);
4087
4088             newbest = (tmp_rd < tmp_best_rd);
4089             if (newbest) {
4090               tmp_best_filter = mbmi->interp_filter;
4091               tmp_best_rd = tmp_rd;
4092             }
4093             if ((newbest && cm->interp_filter == SWITCHABLE) ||
4094                 (mbmi->interp_filter == cm->interp_filter &&
4095                  cm->interp_filter != SWITCHABLE)) {
4096               tmp_best_rdu = tmp_rd;
4097               tmp_best_rate = rate;
4098               tmp_best_ratey = rate_y;
4099               tmp_best_distortion = distortion;
4100               tmp_best_sse = total_sse;
4101               tmp_best_skippable = skippable;
4102               tmp_best_mbmode = *mbmi;
4103               for (i = 0; i < 4; i++) {
4104                 tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4105                 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4106               }
4107               pred_exists = 1;
4108               if (switchable_filter_index == 0 &&
4109                   cpi->sf.use_rd_breakout &&
4110                   best_rd < INT64_MAX) {
4111                 if (tmp_best_rdu / 2 > best_rd) {
4112                   // skip searching the other filters if the first is
4113                   // already substantially larger than the best so far
4114                   tmp_best_filter = mbmi->interp_filter;
4115                   tmp_best_rdu = INT64_MAX;
4116                   break;
4117                 }
4118               }
4119             }
4120           }  // switchable_filter_index loop
4121         }
4122       }
4123
4124       if (tmp_best_rdu == INT64_MAX && pred_exists)
4125         continue;
4126
4127       mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
4128                              tmp_best_filter : cm->interp_filter);
4129       xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
4130       if (!pred_exists) {
4131         // Handles the special case when a filter that is not in the
4132         // switchable list (bilinear, 6-tap) is indicated at the frame level
4133         tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4134                      &mbmi->ref_mvs[ref_frame][0],
4135                      second_ref,
4136                      best_yrd,
4137                      &rate, &rate_y, &distortion,
4138                      &skippable, &total_sse,
4139                      (int)this_rd_thresh, seg_mvs,
4140                      bsi, 0,
4141                      mi_row, mi_col);
4142         if (tmp_rd == INT64_MAX)
4143           continue;
4144       } else {
4145         total_sse = tmp_best_sse;
4146         rate = tmp_best_rate;
4147         rate_y = tmp_best_ratey;
4148         distortion = tmp_best_distortion;
4149         skippable = tmp_best_skippable;
4150         *mbmi = tmp_best_mbmode;
4151         for (i = 0; i < 4; i++)
4152           xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i];
4153       }
4154
4155       rate2 += rate;
4156       distortion2 += distortion;
4157
4158       if (cm->interp_filter == SWITCHABLE)
4159         rate2 += get_switchable_rate(x);
4160
4161       if (!mode_excluded)
4162         mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4163                                   : cm->reference_mode == COMPOUND_REFERENCE;
4164
4165       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4166
4167       tmp_best_rdu = best_rd -
4168           MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4169               RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4170
4171       if (tmp_best_rdu > 0) {
4172         // If even the 'Y' rd value of split is higher than best so far
4173         // then dont bother looking at UV
4174         vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4175                                         BLOCK_8X8);
4176         super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4177                          &uv_sse, BLOCK_8X8, tmp_best_rdu);
4178         if (rate_uv == INT_MAX)
4179           continue;
4180         rate2 += rate_uv;
4181         distortion2 += distortion_uv;
4182         skippable = skippable && uv_skippable;
4183         total_sse += uv_sse;
4184
4185         tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4186         for (i = 0; i < TX_MODES; ++i)
4187           tx_cache[i] = tx_cache[ONLY_4X4];
4188       }
4189     }
4190
4191     if (cm->reference_mode == REFERENCE_MODE_SELECT)
4192       rate2 += compmode_cost;
4193
4194     // Estimate the reference frame signaling cost and add it
4195     // to the rolling cost variable.
4196     if (second_ref_frame > INTRA_FRAME) {
4197       rate2 += ref_costs_comp[ref_frame];
4198     } else {
4199       rate2 += ref_costs_single[ref_frame];
4200     }
4201
4202     if (!disable_skip) {
4203       // Test for the condition where skip block will be activated
4204       // because there are no non zero coefficients and make any
4205       // necessary adjustment for rate. Ignore if skip is coded at
4206       // segment level as the cost wont have been added in.
4207       // Is Mb level skip allowed (i.e. not coded at segment level).
4208       const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
4209                                                          SEG_LVL_SKIP);
4210
4211       if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
4212         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4213             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4214           // Add in the cost of the no skip flag.
4215           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4216         } else {
4217           // FIXME(rbultje) make this work for splitmv also
4218           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
4219           distortion2 = total_sse;
4220           assert(total_sse >= 0);
4221           rate2 -= (rate_y + rate_uv);
4222           rate_y = 0;
4223           rate_uv = 0;
4224           this_skip2 = 1;
4225         }
4226       } else if (mb_skip_allowed) {
4227         // Add in the cost of the no skip flag.
4228         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4229       }
4230
4231       // Calculate the final RD estimate for this mode.
4232       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4233     }
4234
4235     // Keep record of best inter rd with single reference
4236     if (is_inter_block(&xd->mi_8x8[0]->mbmi) &&
4237         !has_second_ref(&xd->mi_8x8[0]->mbmi) &&
4238         !mode_excluded &&
4239         this_rd < best_inter_rd) {
4240       best_inter_rd = this_rd;
4241       best_inter_ref_frame = ref_frame;
4242     }
4243
4244     if (!disable_skip && ref_frame == INTRA_FRAME) {
4245       for (i = 0; i < REFERENCE_MODES; ++i)
4246         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
4247       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4248         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
4249     }
4250
4251     // Did this mode help.. i.e. is it the new best mode
4252     if (this_rd < best_rd || x->skip) {
4253       if (!mode_excluded) {
4254         int max_plane = MAX_MB_PLANE;
4255         // Note index of best mode so far
4256         best_mode_index = mode_index;
4257
4258         if (ref_frame == INTRA_FRAME) {
4259           /* required for left and above block mv */
4260           mbmi->mv[0].as_int = 0;
4261           max_plane = 1;
4262         }
4263
4264         *returnrate = rate2;
4265         *returndistortion = distortion2;
4266         best_rd = this_rd;
4267         best_yrd = best_rd -
4268                    RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4269         best_mbmode = *mbmi;
4270         best_skip2 = this_skip2;
4271         if (!x->select_txfm_size)
4272           swap_block_ptr(x, ctx, max_plane);
4273         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
4274                    sizeof(uint8_t) * ctx->num_4x4_blk);
4275
4276         for (i = 0; i < 4; i++)
4277           best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4278
4279         // TODO(debargha): enhance this test with a better distortion prediction
4280         // based on qp, activity mask and history
4281         if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4282             (mode_index > MIN_EARLY_TERM_INDEX)) {
4283           const int qstep = xd->plane[0].dequant[1];
4284           // TODO(debargha): Enhance this by specializing for each mode_index
4285           int scale = 4;
4286           if (x->source_variance < UINT_MAX) {
4287             const int var_adjust = (x->source_variance < 16);
4288             scale -= var_adjust;
4289           }
4290           if (ref_frame > INTRA_FRAME &&
4291               distortion2 * scale < qstep * qstep) {
4292             early_term = 1;
4293           }
4294         }
4295       }
4296     }
4297
4298     /* keep record of best compound/single-only prediction */
4299     if (!disable_skip && ref_frame != INTRA_FRAME) {
4300       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4301
4302       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4303         single_rate = rate2 - compmode_cost;
4304         hybrid_rate = rate2;
4305       } else {
4306         single_rate = rate2;
4307         hybrid_rate = rate2 + compmode_cost;
4308       }
4309
4310       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4311       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4312
4313       if (second_ref_frame <= INTRA_FRAME &&
4314           single_rd < best_pred_rd[SINGLE_REFERENCE]) {
4315         best_pred_rd[SINGLE_REFERENCE] = single_rd;
4316       } else if (second_ref_frame > INTRA_FRAME &&
4317                  single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
4318         best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4319       }
4320       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4321         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4322     }
4323
4324     /* keep record of best filter type */
4325     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4326         cm->interp_filter != BILINEAR) {
4327       int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
4328                               SWITCHABLE_FILTERS : cm->interp_filter];
4329       int64_t adj_rd;
4330       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4331         if (ref == INT64_MAX)
4332           adj_rd = 0;
4333         else if (cpi->rd_filter_cache[i] == INT64_MAX)
4334           // when early termination is triggered, the encoder does not have
4335           // access to the rate-distortion cost. it only knows that the cost
4336           // should be above the maximum valid value. hence it takes the known
4337           // maximum plus an arbitrary constant as the rate-distortion cost.
4338           adj_rd = cpi->mask_filter_rd - ref + 10;
4339         else
4340           adj_rd = cpi->rd_filter_cache[i] - ref;
4341
4342         adj_rd += this_rd;
4343         best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
4344       }
4345     }
4346
4347     /* keep record of best txfm size */
4348     if (bsize < BLOCK_32X32) {
4349       if (bsize < BLOCK_16X16) {
4350         tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
4351         tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
4352       }
4353       tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
4354     }
4355     if (!mode_excluded && this_rd != INT64_MAX) {
4356       for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
4357         int64_t adj_rd = INT64_MAX;
4358         if (ref_frame > INTRA_FRAME)
4359           adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
4360         else
4361           adj_rd = this_rd;
4362
4363         if (adj_rd < best_tx_rd[i])
4364           best_tx_rd[i] = adj_rd;
4365       }
4366     }
4367
4368     if (early_term)
4369       break;
4370
4371     if (x->skip && !comp_pred)
4372       break;
4373   }
4374
4375   if (best_rd >= best_rd_so_far)
4376     return INT64_MAX;
4377
4378   // If we used an estimate for the uv intra rd in the loop above...
4379   if (cpi->sf.use_uv_intra_rd_estimate) {
4380     // Do Intra UV best rd mode selection if best mode choice above was intra.
4381     if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
4382       TX_SIZE uv_tx_size;
4383       *mbmi = best_mbmode;
4384       uv_tx_size = get_uv_tx_size(mbmi);
4385       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
4386                               &rate_uv_tokenonly[uv_tx_size],
4387                               &dist_uv[uv_tx_size],
4388                               &skip_uv[uv_tx_size],
4389                               BLOCK_8X8, uv_tx_size);
4390     }
4391   }
4392
4393   if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
4394     *returnrate = INT_MAX;
4395     *returndistortion = INT64_MAX;
4396     return best_rd;
4397   }
4398
4399   assert((cm->interp_filter == SWITCHABLE) ||
4400          (cm->interp_filter == best_mbmode.interp_filter) ||
4401          !is_inter_block(&best_mbmode));
4402
4403   // Updating rd_thresh_freq_fact[] here means that the different
4404   // partition/block sizes are handled independently based on the best
4405   // choice for the current partition. It may well be better to keep a scaled
4406   // best rd so far value and update rd_thresh_freq_fact based on the mode/size
4407   // combination that wins out.
4408   if (cpi->sf.adaptive_rd_thresh) {
4409     for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
4410       int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index];
4411
4412       if (mode_index == best_mode_index) {
4413         *fact -= (*fact >> 3);
4414       } else {
4415         *fact = MIN(*fact + RD_THRESH_INC,
4416                     cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
4417       }
4418     }
4419   }
4420
4421   // macroblock modes
4422   *mbmi = best_mbmode;
4423   x->skip |= best_skip2;
4424   if (!is_inter_block(&best_mbmode)) {
4425     for (i = 0; i < 4; i++)
4426       xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4427   } else {
4428     for (i = 0; i < 4; ++i)
4429       vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4430
4431     mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int;
4432     mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int;
4433   }
4434
4435   for (i = 0; i < REFERENCE_MODES; ++i) {
4436     if (best_pred_rd[i] == INT64_MAX)
4437       best_pred_diff[i] = INT_MIN;
4438     else
4439       best_pred_diff[i] = best_rd - best_pred_rd[i];
4440   }
4441
4442   if (!x->skip) {
4443     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4444       if (best_filter_rd[i] == INT64_MAX)
4445         best_filter_diff[i] = 0;
4446       else
4447         best_filter_diff[i] = best_rd - best_filter_rd[i];
4448     }
4449     if (cm->interp_filter == SWITCHABLE)
4450       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4451   } else {
4452     vp9_zero(best_filter_diff);
4453   }
4454
4455   if (!x->skip) {
4456     for (i = 0; i < TX_MODES; i++) {
4457       if (best_tx_rd[i] == INT64_MAX)
4458         best_tx_diff[i] = 0;
4459       else
4460         best_tx_diff[i] = best_rd - best_tx_rd[i];
4461     }
4462   } else {
4463     vp9_zero(best_tx_diff);
4464   }
4465
4466   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4467   store_coding_context(x, ctx, best_mode_index,
4468                        &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4469                        &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4470                                       mbmi->ref_frame[1]][0],
4471                        best_pred_diff, best_tx_diff, best_filter_diff);
4472
4473   return best_rd;
4474 }