vp8/encoder/encodeframe.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "vpx_ports/config.h"
  13 #include "encodemb.h"
  14 #include "encodemv.h"
  15 #include "common.h"
  16 #include "onyx_int.h"
  17 #include "extend.h"
  18 #include "entropymode.h"
  19 #include "quant_common.h"
  20 #include "segmentation.h"
  21 #include "setupintrarecon.h"
  22 #include "encodeintra.h"
  23 #include "reconinter.h"
  24 #include "rdopt.h"
  25 #include "pickinter.h"
  26 #include "findnearmv.h"
  27 #include "reconintra.h"
  28 #include <stdio.h>
  29 #include <limits.h>
  30 #include "subpixel.h"
  31 #include "vpx_ports/vpx_timer.h"
  32
  33 #if CONFIG_RUNTIME_CPU_DETECT
  34 #define RTCD(x)     &cpi->common.rtcd.x
  35 #define IF_RTCD(x)  (x)
  36 #else
  37 #define RTCD(x)     NULL
  38 #define IF_RTCD(x)  NULL
  39 #endif
  40 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
  41
  42 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
  43 extern void vp8_auto_select_speed(VP8_COMP *cpi);
  44 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
  45                                       MACROBLOCK *x,
  46                                       MB_ROW_COMP *mbr_ei,
  47                                       int mb_row,
  48                                       int count);
  49 void vp8_build_block_offsets(MACROBLOCK *x);
  50 void vp8_setup_block_ptrs(MACROBLOCK *x);
  51 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
  52 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
  53
  54 #ifdef MODE_STATS
  55 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  56 unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
  57 unsigned int inter_b_modes[15]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  58 unsigned int y_modes[5]   = {0, 0, 0, 0, 0};
  59 unsigned int uv_modes[4]  = {0, 0, 0, 0};
  60 unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  61 #endif
  62
  63 static const int qrounding_factors[129] =
  64 {
  65     48, 48, 48, 48, 48, 48, 48, 48,
  66     48, 48, 48, 48, 48, 48, 48, 48,
  67     48, 48, 48, 48, 48, 48, 48, 48,
  68     48, 48, 48, 48, 48, 48, 48, 48,
  69     48, 48, 48, 48, 48, 48, 48, 48,
  70     48, 48, 48, 48, 48, 48, 48, 48,
  71     48, 48, 48, 48, 48, 48, 48, 48,
  72     48, 48, 48, 48, 48, 48, 48, 48,
  73     48, 48, 48, 48, 48, 48, 48, 48,
  74     48, 48, 48, 48, 48, 48, 48, 48,
  75     48, 48, 48, 48, 48, 48, 48, 48,
  76     48, 48, 48, 48, 48, 48, 48, 48,
  77     48, 48, 48, 48, 48, 48, 48, 48,
  78     48, 48, 48, 48, 48, 48, 48, 48,
  79     48, 48, 48, 48, 48, 48, 48, 48,
  80     48, 48, 48, 48, 48, 48, 48, 48,
  81     48
  82 };
  83
  84 static const int qzbin_factors[129] =
  85 {
  86     84, 84, 84, 84, 84, 84, 84, 84,
  87     84, 84, 84, 84, 84, 84, 84, 84,
  88     84, 84, 84, 84, 84, 84, 84, 84,
  89     84, 84, 84, 84, 84, 84, 84, 84,
  90     84, 84, 84, 84, 84, 84, 84, 84,
  91     84, 84, 84, 84, 84, 84, 84, 84,
  92     80, 80, 80, 80, 80, 80, 80, 80,
  93     80, 80, 80, 80, 80, 80, 80, 80,
  94     80, 80, 80, 80, 80, 80, 80, 80,
  95     80, 80, 80, 80, 80, 80, 80, 80,
  96     80, 80, 80, 80, 80, 80, 80, 80,
  97     80, 80, 80, 80, 80, 80, 80, 80,
  98     80, 80, 80, 80, 80, 80, 80, 80,
  99     80, 80, 80, 80, 80, 80, 80, 80,
 100     80, 80, 80, 80, 80, 80, 80, 80,
 101     80, 80, 80, 80, 80, 80, 80, 80,
 102     80
 103 };
 104
 105 static const int qrounding_factors_y2[129] =
 106 {
 107     48, 48, 48, 48, 48, 48, 48, 48,
 108     48, 48, 48, 48, 48, 48, 48, 48,
 109     48, 48, 48, 48, 48, 48, 48, 48,
 110     48, 48, 48, 48, 48, 48, 48, 48,
 111     48, 48, 48, 48, 48, 48, 48, 48,
 112     48, 48, 48, 48, 48, 48, 48, 48,
 113     48, 48, 48, 48, 48, 48, 48, 48,
 114     48, 48, 48, 48, 48, 48, 48, 48,
 115     48, 48, 48, 48, 48, 48, 48, 48,
 116     48, 48, 48, 48, 48, 48, 48, 48,
 117     48, 48, 48, 48, 48, 48, 48, 48,
 118     48, 48, 48, 48, 48, 48, 48, 48,
 119     48, 48, 48, 48, 48, 48, 48, 48,
 120     48, 48, 48, 48, 48, 48, 48, 48,
 121     48, 48, 48, 48, 48, 48, 48, 48,
 122     48, 48, 48, 48, 48, 48, 48, 48,
 123     48
 124 };
 125
 126 static const int qzbin_factors_y2[129] =
 127 {
 128     84, 84, 84, 84, 84, 84, 84, 84,
 129     84, 84, 84, 84, 84, 84, 84, 84,
 130     84, 84, 84, 84, 84, 84, 84, 84,
 131     84, 84, 84, 84, 84, 84, 84, 84,
 132     84, 84, 84, 84, 84, 84, 84, 84,
 133     84, 84, 84, 84, 84, 84, 84, 84,
 134     80, 80, 80, 80, 80, 80, 80, 80,
 135     80, 80, 80, 80, 80, 80, 80, 80,
 136     80, 80, 80, 80, 80, 80, 80, 80,
 137     80, 80, 80, 80, 80, 80, 80, 80,
 138     80, 80, 80, 80, 80, 80, 80, 80,
 139     80, 80, 80, 80, 80, 80, 80, 80,
 140     80, 80, 80, 80, 80, 80, 80, 80,
 141     80, 80, 80, 80, 80, 80, 80, 80,
 142     80, 80, 80, 80, 80, 80, 80, 80,
 143     80, 80, 80, 80, 80, 80, 80, 80,
 144     80
 145 };
 146
 147 #define EXACT_QUANT
 148 #ifdef EXACT_QUANT
 149 static void vp8cx_invert_quant(int improved_quant, short *quant,
 150                                short *shift, short d)
 151 {
 152     if(improved_quant)
 153     {
 154         unsigned t;
 155         int l;
 156         t = d;
 157         for(l = 0; t > 1; l++)
 158             t>>=1;
 159         t = 1 + (1<<(16+l))/d;
 160         *quant = (short)(t - (1<<16));
 161         *shift = l;
 162     }
 163     else
 164     {
 165         *quant = (1 << 16) / d;
 166         *shift = 0;
 167     }
 168 }
 169
 170 void vp8cx_init_quantizer(VP8_COMP *cpi)
 171 {
 172     int i;
 173     int quant_val;
 174     int Q;
 175
 176     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 177
 178     for (Q = 0; Q < QINDEX_RANGE; Q++)
 179     {
 180         // dc values
 181         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 182         cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
 183         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
 184                            cpi->Y1quant_shift[Q] + 0, quant_val);
 185         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 186         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 187         cpi->common.Y1dequant[Q][0] = quant_val;
 188         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 189
 190         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 191         cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
 192         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
 193                            cpi->Y2quant_shift[Q] + 0, quant_val);
 194         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 195         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 196         cpi->common.Y2dequant[Q][0] = quant_val;
 197         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 198
 199         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 200         cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
 201         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
 202                            cpi->UVquant_shift[Q] + 0, quant_val);
 203         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 204         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 205         cpi->common.UVdequant[Q][0] = quant_val;
 206         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 207
 208         // all the ac values = ;
 209         for (i = 1; i < 16; i++)
 210         {
 211             int rc = vp8_default_zig_zag1d[i];
 212
 213             quant_val = vp8_ac_yquant(Q);
 214             cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
 215             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
 216                                cpi->Y1quant_shift[Q] + rc, quant_val);
 217             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 218             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 219             cpi->common.Y1dequant[Q][rc] = quant_val;
 220             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 221
 222             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 223             cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
 224             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
 225                                cpi->Y2quant_shift[Q] + rc, quant_val);
 226             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 227             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 228             cpi->common.Y2dequant[Q][rc] = quant_val;
 229             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 230
 231             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 232             cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
 233             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
 234                                cpi->UVquant_shift[Q] + rc, quant_val);
 235             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 236             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 237             cpi->common.UVdequant[Q][rc] = quant_val;
 238             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 239         }
 240     }
 241 }
 242 #else
 243 void vp8cx_init_quantizer(VP8_COMP *cpi)
 244 {
 245     int i;
 246     int quant_val;
 247     int Q;
 248
 249     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 250
 251     for (Q = 0; Q < QINDEX_RANGE; Q++)
 252     {
 253         // dc values
 254         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 255         cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
 256         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 257         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 258         cpi->common.Y1dequant[Q][0] = quant_val;
 259         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 260
 261         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 262         cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
 263         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 264         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 265         cpi->common.Y2dequant[Q][0] = quant_val;
 266         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 267
 268         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 269         cpi->UVquant[Q][0] = (1 << 16) / quant_val;
 270         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 271         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 272         cpi->common.UVdequant[Q][0] = quant_val;
 273         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 274
 275         // all the ac values = ;
 276         for (i = 1; i < 16; i++)
 277         {
 278             int rc = vp8_default_zig_zag1d[i];
 279
 280             quant_val = vp8_ac_yquant(Q);
 281             cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
 282             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 283             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 284             cpi->common.Y1dequant[Q][rc] = quant_val;
 285             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 286
 287             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 288             cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
 289             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 290             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 291             cpi->common.Y2dequant[Q][rc] = quant_val;
 292             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 293
 294             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 295             cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
 296             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 297             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 298             cpi->common.UVdequant[Q][rc] = quant_val;
 299             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 300         }
 301     }
 302 }
 303 #endif
 304 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
 305 {
 306     int i;
 307     int QIndex;
 308     MACROBLOCKD *xd = &x->e_mbd;
 309     int zbin_extra;
 310
 311     // Select the baseline MB Q index.
 312     if (xd->segmentation_enabled)
 313     {
 314         // Abs Value
 315         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
 316
 317             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 318         // Delta Value
 319         else
 320         {
 321             QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 322             QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
 323         }
 324     }
 325     else
 326         QIndex = cpi->common.base_qindex;
 327
 328     // Y
 329     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 330
 331     for (i = 0; i < 16; i++)
 332     {
 333         x->block[i].quant = cpi->Y1quant[QIndex];
 334         x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
 335         x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
 336         x->block[i].zbin = cpi->Y1zbin[QIndex];
 337         x->block[i].round = cpi->Y1round[QIndex];
 338         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
 339         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
 340         x->block[i].zbin_extra = (short)zbin_extra;
 341     }
 342
 343     // UV
 344     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 345
 346     for (i = 16; i < 24; i++)
 347     {
 348         x->block[i].quant = cpi->UVquant[QIndex];
 349         x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
 350         x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
 351         x->block[i].zbin = cpi->UVzbin[QIndex];
 352         x->block[i].round = cpi->UVround[QIndex];
 353         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
 354         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
 355         x->block[i].zbin_extra = (short)zbin_extra;
 356     }
 357
 358     // Y2
 359     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
 360     x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
 361     x->block[24].quant = cpi->Y2quant[QIndex];
 362     x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
 363     x->block[24].zbin = cpi->Y2zbin[QIndex];
 364     x->block[24].round = cpi->Y2round[QIndex];
 365     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
 366     x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
 367     x->block[24].zbin_extra = (short)zbin_extra;
 368
 369     /* save this macroblock QIndex for vp8_update_zbin_extra() */
 370     x->q_index = QIndex;
 371 }
 372 void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
 373 {
 374     int i;
 375     int QIndex = x->q_index;
 376     int zbin_extra;
 377
 378     // Y
 379     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 380     for (i = 0; i < 16; i++)
 381     {
 382         x->block[i].zbin_extra = (short)zbin_extra;
 383     }
 384
 385     // UV
 386     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 387     for (i = 16; i < 24; i++)
 388     {
 389         x->block[i].zbin_extra = (short)zbin_extra;
 390     }
 391
 392     // Y2
 393     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
 394     x->block[24].zbin_extra = (short)zbin_extra;
 395 }
 396
 397 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
 398 {
 399     // Clear Zbin mode boost for default case
 400     cpi->zbin_mode_boost = 0;
 401
 402     // vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
 403     // when these values are not all zero.
 404     if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
 405     {
 406         vp8cx_init_quantizer(cpi);
 407     }
 408
 409     // MB level quantizer setup
 410     vp8cx_mb_init_quantizer(cpi, &cpi->mb);
 411 }
 412
 413
 414 /* activity_avg must be positive, or flat regions could get a zero weight
 415  *  (infinite lambda), which confounds analysis.
 416  * This also avoids the need for divide by zero checks in
 417  *  vp8_activity_masking().
 418  */
 419 #define VP8_ACTIVITY_AVG_MIN (64)
 420
 421 /* This is used as a reference when computing the source variance for the
 422  *  purposes of activity masking.
 423  * Eventually this should be replaced by custom no-reference routines,
 424  *  which will be faster.
 425  */
 426 static const unsigned char VP8_VAR_OFFS[16]=
 427 {
 428     128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
 429 };
 430
 431 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
 432 {
 433     unsigned int act;
 434     unsigned int sse;
 435     int sum;
 436     unsigned int a;
 437     unsigned int b;
 438     /* TODO: This could also be done over smaller areas (8x8), but that would
 439      *  require extensive changes elsewhere, as lambda is assumed to be fixed
 440      *  over an entire MB in most of the code.
 441      * Another option is to compute four 8x8 variances, and pick a single
 442      *  lambda using a non-linear combination (e.g., the smallest, or second
 443      *  smallest, etc.).
 444      */
 445     VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
 446      x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
 447     /* This requires a full 32 bits of precision. */
 448     act = (sse<<8) - sum*sum;
 449     /* Drop 4 to give us some headroom to work with. */
 450     act = (act + 8) >> 4;
 451     /* If the region is flat, lower the activity some more. */
 452     if (act < 8<<12)
 453         act = act < 5<<12 ? act : 5<<12;
 454     /* TODO: For non-flat regions, edge regions should receive less masking
 455      *  than textured regions, but identifying edge regions quickly and
 456      *  reliably enough is still a subject of experimentation.
 457      * This will be most noticable near edges with a complex shape (e.g.,
 458      *  text), but the 4x4 transform size should make this less of a problem
 459      *  than it would be for an 8x8 transform.
 460      */
 461     /* Apply the masking to the RD multiplier. */
 462     a = act + 4*cpi->activity_avg;
 463     b = 4*act + cpi->activity_avg;
 464     x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
 465     return act;
 466 }
 467
 468
 469
 470 static
 471 void encode_mb_row(VP8_COMP *cpi,
 472                    VP8_COMMON *cm,
 473                    int mb_row,
 474                    MACROBLOCK  *x,
 475                    MACROBLOCKD *xd,
 476                    TOKENEXTRA **tp,
 477                    int *segment_counts,
 478                    int *totalrate)
 479 {
 480     INT64 activity_sum = 0;
 481     int i;
 482     int recon_yoffset, recon_uvoffset;
 483     int mb_col;
 484     int ref_fb_idx = cm->lst_fb_idx;
 485     int dst_fb_idx = cm->new_fb_idx;
 486     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
 487     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
 488     int seg_map_index = (mb_row * cpi->common.mb_cols);
 489
 490
 491     // reset above block coeffs
 492     xd->above_context = cm->above_context;
 493
 494     xd->up_available = (mb_row != 0);
 495     recon_yoffset = (mb_row * recon_y_stride * 16);
 496     recon_uvoffset = (mb_row * recon_uv_stride * 8);
 497
 498     cpi->tplist[mb_row].start = *tp;
 499     //printf("Main mb_row = %d\n", mb_row);
 500
 501     // Distance of Mb to the top & bottom edges, specified in 1/8th pel
 502     // units as they are always compared to values that are in 1/8th pel units
 503     xd->mb_to_top_edge = -((mb_row * 16) << 3);
 504     xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
 505
 506     // Set up limit values for vertical motion vector components
 507     // to prevent them extending beyond the UMV borders
 508     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
 509     x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
 510                         + (VP8BORDERINPIXELS - 16);
 511
 512     // for each macroblock col in image
 513     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
 514     {
 515         // Distance of Mb to the left & right edges, specified in
 516         // 1/8th pel units as they are always compared to values
 517         // that are in 1/8th pel units
 518         xd->mb_to_left_edge = -((mb_col * 16) << 3);
 519         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
 520
 521         // Set up limit values for horizontal motion vector components
 522         // to prevent them extending beyond the UMV borders
 523         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
 524         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
 525                             + (VP8BORDERINPIXELS - 16);
 526
 527         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
 528         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
 529         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 530         xd->left_available = (mb_col != 0);
 531
 532         x->rddiv = cpi->RDDIV;
 533         x->rdmult = cpi->RDMULT;
 534
 535         if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
 536             activity_sum += vp8_activity_masking(cpi, x);
 537
 538         // Is segmentation enabled
 539         // MB level adjutment to quantizer
 540         if (xd->segmentation_enabled)
 541         {
 542             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
 543             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
 544                 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
 545             else
 546                 xd->mode_info_context->mbmi.segment_id = 0;
 547
 548             vp8cx_mb_init_quantizer(cpi, x);
 549         }
 550         else
 551             xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
 552
 553         x->active_ptr = cpi->active_map + seg_map_index + mb_col;
 554
 555         if (cm->frame_type == KEY_FRAME)
 556         {
 557             *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
 558 #ifdef MODE_STATS
 559             y_modes[xd->mbmi.mode] ++;
 560 #endif
 561         }
 562         else
 563         {
 564             *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
 565
 566 #ifdef MODE_STATS
 567             inter_y_modes[xd->mbmi.mode] ++;
 568
 569             if (xd->mbmi.mode == SPLITMV)
 570             {
 571                 int b;
 572
 573                 for (b = 0; b < xd->mbmi.partition_count; b++)
 574                 {
 575                     inter_b_modes[x->partition->bmi[b].mode] ++;
 576                 }
 577             }
 578
 579 #endif
 580
 581             // Count of last ref frame 0,0 useage
 582             if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 583                 cpi->inter_zz_count ++;
 584
 585             // Special case code for cyclic refresh
 586             // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
 587             // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
 588             if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
 589             {
 590                 cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
 591
 592                 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
 593                 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
 594                 // else mark it as dirty (1).
 595                 if (xd->mode_info_context->mbmi.segment_id)
 596                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
 597                 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 598                 {
 599                     if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
 600                         cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
 601                 }
 602                 else
 603                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
 604
 605             }
 606         }
 607
 608         cpi->tplist[mb_row].stop = *tp;
 609
 610         x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 611
 612         for (i = 0; i < 16; i++)
 613             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
 614
 615         // adjust to the next column of macroblocks
 616         x->src.y_buffer += 16;
 617         x->src.u_buffer += 8;
 618         x->src.v_buffer += 8;
 619
 620         recon_yoffset += 16;
 621         recon_uvoffset += 8;
 622
 623         // Keep track of segment useage
 624         segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
 625
 626         // skip to next mb
 627         xd->mode_info_context++;
 628         x->partition_info++;
 629
 630         xd->above_context++;
 631         cpi->current_mb_col_main = mb_col;
 632     }
 633
 634     //extend the recon for intra prediction
 635     vp8_extend_mb_row(
 636         &cm->yv12_fb[dst_fb_idx],
 637         xd->dst.y_buffer + 16,
 638         xd->dst.u_buffer + 8,
 639         xd->dst.v_buffer + 8);
 640
 641     // this is to account for the border
 642     xd->mode_info_context++;
 643     x->partition_info++;
 644     x->activity_sum += activity_sum;
 645 }
 646
 647
 648
 649
 650
 651 void vp8_encode_frame(VP8_COMP *cpi)
 652 {
 653     int mb_row;
 654     MACROBLOCK *const x = & cpi->mb;
 655     VP8_COMMON *const cm = & cpi->common;
 656     MACROBLOCKD *const xd = & x->e_mbd;
 657
 658     TOKENEXTRA *tp = cpi->tok;
 659     int segment_counts[MAX_MB_SEGMENTS];
 660     int totalrate;
 661
 662     // Functions setup for all frame types so we can use MC in AltRef
 663     if (cm->mcomp_filter_type == SIXTAP)
 664     {
 665         xd->subpixel_predict        = SUBPIX_INVOKE(
 666                                         &cpi->common.rtcd.subpix, sixtap4x4);
 667         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 668                                         &cpi->common.rtcd.subpix, sixtap8x4);
 669         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 670                                         &cpi->common.rtcd.subpix, sixtap8x8);
 671         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 672                                         &cpi->common.rtcd.subpix, sixtap16x16);
 673     }
 674     else
 675     {
 676         xd->subpixel_predict        = SUBPIX_INVOKE(
 677                                         &cpi->common.rtcd.subpix, bilinear4x4);
 678         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 679                                         &cpi->common.rtcd.subpix, bilinear8x4);
 680         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 681                                         &cpi->common.rtcd.subpix, bilinear8x8);
 682         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 683                                       &cpi->common.rtcd.subpix, bilinear16x16);
 684     }
 685
 686     x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
 687
 688     x->vector_range = 32;
 689
 690     // Count of MBs using the alternate Q if any
 691     cpi->alt_qcount = 0;
 692
 693     // Reset frame count of inter 0,0 motion vector useage.
 694     cpi->inter_zz_count = 0;
 695
 696     vpx_memset(segment_counts, 0, sizeof(segment_counts));
 697
 698     cpi->prediction_error = 0;
 699     cpi->intra_error = 0;
 700     cpi->skip_true_count = 0;
 701     cpi->skip_false_count = 0;
 702
 703 #if 0
 704     // Experimental code
 705     cpi->frame_distortion = 0;
 706     cpi->last_mb_distortion = 0;
 707 #endif
 708
 709     totalrate = 0;
 710
 711     x->partition_info = x->pi;
 712
 713     xd->mode_info_context = cm->mi;
 714     xd->mode_info_stride = cm->mode_info_stride;
 715
 716     xd->frame_type = cm->frame_type;
 717
 718     xd->frames_since_golden = cm->frames_since_golden;
 719     xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
 720     vp8_zero(cpi->MVcount);
 721     // vp8_zero( Contexts)
 722     vp8_zero(cpi->coef_counts);
 723
 724     // reset intra mode contexts
 725     if (cm->frame_type == KEY_FRAME)
 726         vp8_init_mbmode_probs(cm);
 727
 728
 729     vp8cx_frame_init_quantizer(cpi);
 730
 731     if (cpi->compressor_speed == 2)
 732     {
 733         if (cpi->oxcf.cpu_used < 0)
 734             cpi->Speed = -(cpi->oxcf.cpu_used);
 735         else
 736             vp8_auto_select_speed(cpi);
 737     }
 738
 739     vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
 740     vp8cx_initialize_me_consts(cpi, cm->base_qindex);
 741
 742     // Copy data over into macro block data sturctures.
 743
 744     x->src = * cpi->Source;
 745     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
 746     xd->dst = cm->yv12_fb[cm->new_fb_idx];
 747
 748     // set up frame new frame for intra coded blocks
 749
 750     vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
 751
 752     vp8_build_block_offsets(x);
 753
 754     vp8_setup_block_dptrs(&x->e_mbd);
 755
 756     vp8_setup_block_ptrs(x);
 757
 758     x->activity_sum = 0;
 759
 760     xd->mode_info_context->mbmi.mode = DC_PRED;
 761     xd->mode_info_context->mbmi.uv_mode = DC_PRED;
 762
 763     xd->left_context = &cm->left_context;
 764
 765     vp8_zero(cpi->count_mb_ref_frame_usage)
 766     vp8_zero(cpi->ymode_count)
 767     vp8_zero(cpi->uv_mode_count)
 768
 769     x->mvc = cm->fc.mvc;
 770
 771     vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 772
 773     {
 774         struct vpx_usec_timer  emr_timer;
 775         vpx_usec_timer_start(&emr_timer);
 776
 777         if (!cpi->b_multi_threaded)
 778         {
 779             // for each macroblock row in image
 780             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
 781             {
 782
 783                 vp8_zero(cm->left_context)
 784
 785                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 786
 787                 // adjust to the next row of mbs
 788                 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 789                 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 790                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 791             }
 792
 793             cpi->tok_count = tp - cpi->tok;
 794
 795         }
 796         else
 797         {
 798 #if CONFIG_MULTITHREAD
 799             int i;
 800
 801             vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
 802
 803             for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
 804             {
 805                 cpi->current_mb_col_main = -1;
 806
 807                 for (i = 0; i < cpi->encoding_thread_count; i++)
 808                 {
 809                     if ((mb_row + i + 1) >= cm->mb_rows)
 810                         break;
 811
 812                     cpi->mb_row_ei[i].mb_row = mb_row + i + 1;
 813                     cpi->mb_row_ei[i].tp  = cpi->tok + (mb_row + i + 1) * (cm->mb_cols * 16 * 24);
 814                     cpi->mb_row_ei[i].current_mb_col = -1;
 815                     //SetEvent(cpi->h_event_mbrencoding[i]);
 816                     sem_post(&cpi->h_event_mbrencoding[i]);
 817                 }
 818
 819                 vp8_zero(cm->left_context)
 820
 821                 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 822
 823                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 824
 825                 // adjust to the next row of mbs
 826                 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
 827                 x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 828                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 829
 830                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
 831                 x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
 832
 833                 if (mb_row < cm->mb_rows - 1)
 834                     //WaitForSingleObject(cpi->h_event_main, INFINITE);
 835                     sem_wait(&cpi->h_event_main);
 836             }
 837
 838             /*
 839             for( ;mb_row<cm->mb_rows; mb_row ++)
 840             {
 841             vp8_zero( cm->left_context)
 842
 843             tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 844
 845             encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 846             // adjust to the next row of mbs
 847             x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 848             x->src.u_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 849             x->src.v_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 850
 851             }
 852             */
 853             cpi->tok_count = 0;
 854
 855             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
 856             {
 857                 cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
 858             }
 859
 860             if (xd->segmentation_enabled)
 861             {
 862
 863                 int i, j;
 864
 865                 if (xd->segmentation_enabled)
 866                 {
 867
 868                     for (i = 0; i < cpi->encoding_thread_count; i++)
 869                     {
 870                         for (j = 0; j < 4; j++)
 871                             segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
 872                     }
 873                 }
 874
 875             }
 876
 877             for (i = 0; i < cpi->encoding_thread_count; i++)
 878             {
 879                 totalrate += cpi->mb_row_ei[i].totalrate;
 880             }
 881
 882             for (i = 0; i < cpi->encoding_thread_count; i++)
 883             {
 884                 x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
 885             }
 886
 887 #endif
 888
 889         }
 890
 891         vpx_usec_timer_mark(&emr_timer);
 892         cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
 893
 894     }
 895
 896
 897     // Work out the segment probabilites if segmentation is enabled
 898     if (xd->segmentation_enabled)
 899     {
 900         int tot_count;
 901         int i;
 902
 903         // Set to defaults
 904         vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
 905
 906         tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
 907
 908         if (tot_count)
 909         {
 910             xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
 911
 912             tot_count = segment_counts[0] + segment_counts[1];
 913
 914             if (tot_count > 0)
 915             {
 916                 xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
 917             }
 918
 919             tot_count = segment_counts[2] + segment_counts[3];
 920
 921             if (tot_count > 0)
 922                 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
 923
 924             // Zero probabilities not allowed
 925             for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
 926             {
 927                 if (xd->mb_segment_tree_probs[i] == 0)
 928                     xd->mb_segment_tree_probs[i] = 1;
 929             }
 930         }
 931     }
 932
 933     // 256 rate units to the bit
 934     cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
 935
 936     // Make a note of the percentage MBs coded Intra.
 937     if (cm->frame_type == KEY_FRAME)
 938     {
 939         cpi->this_frame_percent_intra = 100;
 940     }
 941     else
 942     {
 943         int tot_modes;
 944
 945         tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
 946                     + cpi->count_mb_ref_frame_usage[LAST_FRAME]
 947                     + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
 948                     + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
 949
 950         if (tot_modes)
 951             cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
 952
 953     }
 954
 955 #if 0
 956     {
 957         int cnt = 0;
 958         int flag[2] = {0, 0};
 959
 960         for (cnt = 0; cnt < MVPcount; cnt++)
 961         {
 962             if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
 963             {
 964                 flag[0] = 1;
 965                 vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
 966                 break;
 967             }
 968         }
 969
 970         for (cnt = 0; cnt < MVPcount; cnt++)
 971         {
 972             if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
 973             {
 974                 flag[1] = 1;
 975                 vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
 976                 break;
 977             }
 978         }
 979
 980         if (flag[0] || flag[1])
 981             vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
 982     }
 983 #endif
 984
 985     // Adjust the projected reference frame useage probability numbers to reflect
 986     // what we have just seen. This may be usefull when we make multiple itterations
 987     // of the recode loop rather than continuing to use values from the previous frame.
 988     if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
 989     {
 990         const int *const rfct = cpi->count_mb_ref_frame_usage;
 991         const int rf_intra = rfct[INTRA_FRAME];
 992         const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
 993
 994         if ((rf_intra + rf_inter) > 0)
 995         {
 996             cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
 997
 998             if (cpi->prob_intra_coded < 1)
 999                 cpi->prob_intra_coded = 1;
1000
1001             if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
1002             {
1003                 cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
1004
1005                 if (cpi->prob_last_coded < 1)
1006                     cpi->prob_last_coded = 1;
1007
1008                 cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
1009                                      ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
1010
1011                 if (cpi->prob_gf_coded < 1)
1012                     cpi->prob_gf_coded = 1;
1013             }
1014         }
1015     }
1016
1017 #if 0
1018     // Keep record of the total distortion this time around for future use
1019     cpi->last_frame_distortion = cpi->frame_distortion;
1020 #endif
1021
1022     /* Update the average activity for the next frame.
1023      * This is feed-forward for now; it could also be saved in two-pass, or
1024      *  done during lookahead when that is eventually added.
1025      */
1026     cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
1027     if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
1028         cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
1029
1030 }
1031 void vp8_setup_block_ptrs(MACROBLOCK *x)
1032 {
1033     int r, c;
1034     int i;
1035
1036     for (r = 0; r < 4; r++)
1037     {
1038         for (c = 0; c < 4; c++)
1039         {
1040             x->block[r*4+c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
1041         }
1042     }
1043
1044     for (r = 0; r < 2; r++)
1045     {
1046         for (c = 0; c < 2; c++)
1047         {
1048             x->block[16 + r*2+c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
1049         }
1050     }
1051
1052
1053     for (r = 0; r < 2; r++)
1054     {
1055         for (c = 0; c < 2; c++)
1056         {
1057             x->block[20 + r*2+c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
1058         }
1059     }
1060
1061     x->block[24].src_diff = x->src_diff + 384;
1062
1063
1064     for (i = 0; i < 25; i++)
1065     {
1066         x->block[i].coeff = x->coeff + i * 16;
1067     }
1068 }
1069
1070 void vp8_build_block_offsets(MACROBLOCK *x)
1071 {
1072     int block = 0;
1073     int br, bc;
1074
1075     vp8_build_block_doffsets(&x->e_mbd);
1076
1077     // y blocks
1078     for (br = 0; br < 4; br++)
1079     {
1080         for (bc = 0; bc < 4; bc++)
1081         {
1082             BLOCK *this_block = &x->block[block];
1083             this_block->base_src = &x->src.y_buffer;
1084             this_block->src_stride = x->src.y_stride;
1085             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1086             ++block;
1087         }
1088     }
1089
1090     // u blocks
1091     for (br = 0; br < 2; br++)
1092     {
1093         for (bc = 0; bc < 2; bc++)
1094         {
1095             BLOCK *this_block = &x->block[block];
1096             this_block->base_src = &x->src.u_buffer;
1097             this_block->src_stride = x->src.uv_stride;
1098             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1099             ++block;
1100         }
1101     }
1102
1103     // v blocks
1104     for (br = 0; br < 2; br++)
1105     {
1106         for (bc = 0; bc < 2; bc++)
1107         {
1108             BLOCK *this_block = &x->block[block];
1109             this_block->base_src = &x->src.v_buffer;
1110             this_block->src_stride = x->src.uv_stride;
1111             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1112             ++block;
1113         }
1114     }
1115 }
1116
1117 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
1118 {
1119     const MACROBLOCKD *xd = & x->e_mbd;
1120     const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
1121     const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
1122
1123 #ifdef MODE_STATS
1124     const int is_key = cpi->common.frame_type == KEY_FRAME;
1125
1126     ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
1127
1128     if (m == B_PRED)
1129     {
1130         unsigned int *const bct = is_key ? b_modes : inter_b_modes;
1131
1132         int b = 0;
1133
1134         do
1135         {
1136             ++ bct[xd->block[b].bmi.mode];
1137         }
1138         while (++b < 16);
1139     }
1140
1141 #endif
1142
1143     ++cpi->ymode_count[m];
1144     ++cpi->uv_mode_count[uvm];
1145
1146 }
1147 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
1148 {
1149     int Error4x4, Error16x16, error_uv;
1150     int rate4x4, rate16x16, rateuv;
1151     int dist4x4, dist16x16, distuv;
1152     int rate = 0;
1153     int rate4x4_tokenonly = 0;
1154     int rate16x16_tokenonly = 0;
1155     int rateuv_tokenonly = 0;
1156
1157     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
1158
1159 #if !(CONFIG_REALTIME_ONLY)
1160     if (cpi->sf.RD && cpi->compressor_speed != 2)
1161     {
1162         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
1163         rate += rateuv;
1164
1165         Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
1166
1167         Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4);
1168
1169         rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16;
1170     }
1171     else
1172 #endif
1173     {
1174         int rate2, best_distortion;
1175         MB_PREDICTION_MODE mode, best_mode = DC_PRED;
1176         int this_rd;
1177         Error16x16 = INT_MAX;
1178
1179         vp8_pick_intra_mbuv_mode(x);
1180
1181         for (mode = DC_PRED; mode <= TM_PRED; mode ++)
1182         {
1183             int distortion2;
1184
1185             x->e_mbd.mode_info_context->mbmi.mode = mode;
1186             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
1187             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
1188             rate2  = x->mbmode_cost[x->e_mbd.frame_type][mode];
1189             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1190
1191             if (Error16x16 > this_rd)
1192             {
1193                 Error16x16 = this_rd;
1194                 best_mode = mode;
1195                 best_distortion = distortion2;
1196             }
1197         }
1198         x->e_mbd.mode_info_context->mbmi.mode = best_mode;
1199
1200         vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &best_distortion);
1201
1202         if (best_distortion == INT_MAX)
1203             Error4x4 = INT_MAX;
1204         else
1205             Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, best_distortion);
1206     }
1207
1208     if (Error4x4 < Error16x16)
1209     {
1210         x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1211         vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1212     }
1213     else
1214     {
1215         vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1216     }
1217
1218     vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1219     sum_intra_stats(cpi, x);
1220     vp8_tokenize_mb(cpi, &x->e_mbd, t);
1221
1222     return rate;
1223 }
1224 #ifdef SPEEDSTATS
1225 extern int cnt_pm;
1226 #endif
1227
1228 extern void vp8_fix_contexts(MACROBLOCKD *x);
1229
1230 int vp8cx_encode_inter_macroblock
1231 (
1232     VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
1233     int recon_yoffset, int recon_uvoffset
1234 )
1235 {
1236     MACROBLOCKD *const xd = &x->e_mbd;
1237     int inter_error;
1238     int intra_error = 0;
1239     int rate;
1240     int distortion;
1241
1242     x->skip = 0;
1243
1244     if (xd->segmentation_enabled)
1245         x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
1246     else
1247         x->encode_breakout = cpi->oxcf.encode_breakout;
1248
1249 #if !(CONFIG_REALTIME_ONLY)
1250
1251     if (cpi->sf.RD)
1252     {
1253         int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
1254
1255         /* Are we using the fast quantizer for the mode selection? */
1256         if(cpi->sf.use_fastquant_for_pick)
1257         {
1258             cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
1259
1260             /* the fast quantizer does not use zbin_extra, so
1261              * do not recalculate */
1262             cpi->zbin_mode_boost_enabled = 0;
1263         }
1264         inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1265
1266         /* switch back to the regular quantizer for the encode */
1267         if (cpi->sf.improved_quant)
1268         {
1269             cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
1270         }
1271
1272         /* restore cpi->zbin_mode_boost_enabled */
1273         cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
1274
1275     }
1276     else
1277 #endif
1278         inter_error = vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1279
1280
1281     cpi->prediction_error += inter_error;
1282     cpi->intra_error += intra_error;
1283
1284 #if 0
1285     // Experimental RD code
1286     cpi->frame_distortion += distortion;
1287     cpi->last_mb_distortion = distortion;
1288 #endif
1289
1290     // MB level adjutment to quantizer setup
1291     if (xd->segmentation_enabled)
1292     {
1293         // If cyclic update enabled
1294         if (cpi->cyclic_refresh_mode_enabled)
1295         {
1296             // Clear segment_id back to 0 if not coded (last frame 0,0)
1297             if ((xd->mode_info_context->mbmi.segment_id == 1) &&
1298                 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
1299             {
1300                 xd->mode_info_context->mbmi.segment_id = 0;
1301
1302                 /* segment_id changed, so update */
1303                 vp8cx_mb_init_quantizer(cpi, x);
1304             }
1305         }
1306     }
1307
1308     {
1309         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
1310         if (cpi->zbin_mode_boost_enabled)
1311         {
1312             if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
1313                  cpi->zbin_mode_boost = 0;
1314             else
1315             {
1316                 if (xd->mode_info_context->mbmi.mode == ZEROMV)
1317                 {
1318                     if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
1319                         cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1320                     else
1321                         cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1322                 }
1323                 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
1324                     cpi->zbin_mode_boost = 0;
1325                 else
1326                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
1327             }
1328         }
1329         else
1330             cpi->zbin_mode_boost = 0;
1331
1332         vp8_update_zbin_extra(cpi, x);
1333     }
1334
1335     cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
1336
1337     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
1338     {
1339         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1340
1341         if (xd->mode_info_context->mbmi.mode == B_PRED)
1342         {
1343             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1344         }
1345         else
1346         {
1347             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1348         }
1349
1350         sum_intra_stats(cpi, x);
1351     }
1352     else
1353     {
1354         MV best_ref_mv;
1355         MV nearest, nearby;
1356         int mdcounts[4];
1357         int ref_fb_idx;
1358
1359         vp8_find_near_mvs(xd, xd->mode_info_context,
1360                           &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
1361
1362         vp8_build_uvmvs(xd, cpi->common.full_pixel);
1363
1364         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
1365             ref_fb_idx = cpi->common.lst_fb_idx;
1366         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
1367             ref_fb_idx = cpi->common.gld_fb_idx;
1368         else
1369             ref_fb_idx = cpi->common.alt_fb_idx;
1370
1371         xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
1372         xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
1373         xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
1374
1375         if (xd->mode_info_context->mbmi.mode == SPLITMV)
1376         {
1377             int i;
1378
1379             for (i = 0; i < 16; i++)
1380             {
1381                 if (xd->block[i].bmi.mode == NEW4X4)
1382                 {
1383                     cpi->MVcount[0][mv_max+((xd->block[i].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1384                     cpi->MVcount[1][mv_max+((xd->block[i].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1385                 }
1386             }
1387         }
1388         else if (xd->mode_info_context->mbmi.mode == NEWMV)
1389         {
1390             cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1391             cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1392         }
1393
1394         if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
1395         {
1396             vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
1397
1398             // Clear mb_skip_coeff if mb_no_coeff_skip is not set
1399             if (!cpi->common.mb_no_coeff_skip)
1400                 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1401
1402         }
1403         else
1404             vp8_stuff_inter16x16(x);
1405     }
1406
1407     if (!x->skip)
1408         vp8_tokenize_mb(cpi, xd, t);
1409     else
1410     {
1411         if (cpi->common.mb_no_coeff_skip)
1412         {
1413             if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
1414                 xd->mode_info_context->mbmi.dc_diff = 0;
1415             else
1416                 xd->mode_info_context->mbmi.dc_diff = 1;
1417
1418             xd->mode_info_context->mbmi.mb_skip_coeff = 1;
1419             cpi->skip_true_count ++;
1420             vp8_fix_contexts(xd);
1421         }
1422         else
1423         {
1424             vp8_stuff_mb(cpi, xd, t);
1425             xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1426             cpi->skip_false_count ++;
1427         }
1428     }
1429
1430     return rate;
1431 }