2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
16 #include "./vp9_rtcd.h"
17 #include "./vpx_dsp_rtcd.h"
19 #include "vpx_mem/vpx_mem.h"
20 #include "vpx_ports/mem.h"
22 #include "vp9/common/vp9_blockd.h"
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_mvref_common.h"
25 #include "vp9/common/vp9_pred_common.h"
26 #include "vp9/common/vp9_reconinter.h"
27 #include "vp9/common/vp9_reconintra.h"
28 #include "vp9/common/vp9_scan.h"
30 #include "vp9/encoder/vp9_cost.h"
31 #include "vp9/encoder/vp9_encoder.h"
32 #include "vp9/encoder/vp9_pickmode.h"
33 #include "vp9/encoder/vp9_ratectrl.h"
34 #include "vp9/encoder/vp9_rd.h"
42 static int mv_refs_rt(const VP9_COMMON
*cm
, const MACROBLOCK
*x
,
43 const MACROBLOCKD
*xd
,
44 const TileInfo
*const tile
,
45 MODE_INFO
*mi
, MV_REFERENCE_FRAME ref_frame
,
47 int mi_row
, int mi_col
) {
48 const int *ref_sign_bias
= cm
->ref_frame_sign_bias
;
49 int i
, refmv_count
= 0;
51 const POSITION
*const mv_ref_search
= mv_ref_blocks
[mi
->mbmi
.sb_type
];
53 int different_ref_found
= 0;
54 int context_counter
= 0;
57 // Blank the reference vector list
58 memset(mv_ref_list
, 0, sizeof(*mv_ref_list
) * MAX_MV_REF_CANDIDATES
);
60 // The nearest 2 blocks are treated differently
61 // if the size < 8x8 we get the mv from the bmi substructure,
62 // and we also need to keep a mode count.
63 for (i
= 0; i
< 2; ++i
) {
64 const POSITION
*const mv_ref
= &mv_ref_search
[i
];
65 if (is_inside(tile
, mi_col
, mi_row
, cm
->mi_rows
, mv_ref
)) {
66 const MODE_INFO
*const candidate_mi
= xd
->mi
[mv_ref
->col
+ mv_ref
->row
*
68 const MB_MODE_INFO
*const candidate
= &candidate_mi
->mbmi
;
69 // Keep counts for entropy encoding.
70 context_counter
+= mode_2_counter
[candidate
->mode
];
71 different_ref_found
= 1;
73 if (candidate
->ref_frame
[0] == ref_frame
)
74 ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi
, 0, mv_ref
->col
, -1),
75 refmv_count
, mv_ref_list
, Done
);
81 // Check the rest of the neighbors in much the same way
82 // as before except we don't need to keep track of sub blocks or
84 for (; i
< MVREF_NEIGHBOURS
&& !refmv_count
; ++i
) {
85 const POSITION
*const mv_ref
= &mv_ref_search
[i
];
86 if (is_inside(tile
, mi_col
, mi_row
, cm
->mi_rows
, mv_ref
)) {
87 const MB_MODE_INFO
*const candidate
= &xd
->mi
[mv_ref
->col
+ mv_ref
->row
*
89 different_ref_found
= 1;
91 if (candidate
->ref_frame
[0] == ref_frame
)
92 ADD_MV_REF_LIST(candidate
->mv
[0], refmv_count
, mv_ref_list
, Done
);
96 // Since we couldn't find 2 mvs from the same reference frame
97 // go back through the neighbors and find motion vectors from
98 // different reference frames.
99 if (different_ref_found
&& !refmv_count
) {
100 for (i
= 0; i
< MVREF_NEIGHBOURS
; ++i
) {
101 const POSITION
*mv_ref
= &mv_ref_search
[i
];
102 if (is_inside(tile
, mi_col
, mi_row
, cm
->mi_rows
, mv_ref
)) {
103 const MB_MODE_INFO
*const candidate
= &xd
->mi
[mv_ref
->col
+ mv_ref
->row
104 * xd
->mi_stride
]->mbmi
;
106 // If the candidate is INTRA we don't want to consider its mv.
107 IF_DIFF_REF_FRAME_ADD_MV(candidate
, ref_frame
, ref_sign_bias
,
108 refmv_count
, mv_ref_list
, Done
);
115 x
->mbmi_ext
->mode_context
[ref_frame
] = counter_to_context
[context_counter
];
118 for (i
= 0; i
< MAX_MV_REF_CANDIDATES
; ++i
)
119 clamp_mv_ref(&mv_ref_list
[i
].as_mv
, xd
);
124 static int combined_motion_search(VP9_COMP
*cpi
, MACROBLOCK
*x
,
125 BLOCK_SIZE bsize
, int mi_row
, int mi_col
,
126 int_mv
*tmp_mv
, int *rate_mv
,
127 int64_t best_rd_sofar
) {
128 MACROBLOCKD
*xd
= &x
->e_mbd
;
129 MB_MODE_INFO
*mbmi
= &xd
->mi
[0]->mbmi
;
130 struct buf_2d backup_yv12
[MAX_MB_PLANE
] = {{0, 0}};
131 const int step_param
= cpi
->sf
.mv
.fullpel_search_step_param
;
132 const int sadpb
= x
->sadperbit16
;
134 const int ref
= mbmi
->ref_frame
[0];
135 const MV ref_mv
= x
->mbmi_ext
->ref_mvs
[ref
][0].as_mv
;
138 const int tmp_col_min
= x
->mv_col_min
;
139 const int tmp_col_max
= x
->mv_col_max
;
140 const int tmp_row_min
= x
->mv_row_min
;
141 const int tmp_row_max
= x
->mv_row_max
;
144 const YV12_BUFFER_CONFIG
*scaled_ref_frame
= vp9_get_scaled_ref_frame(cpi
,
146 if (scaled_ref_frame
) {
148 // Swap out the reference frame for a version that's been scaled to
149 // match the resolution of the current frame, allowing the existing
150 // motion search code to be used without additional modifications.
151 for (i
= 0; i
< MAX_MB_PLANE
; i
++)
152 backup_yv12
[i
] = xd
->plane
[i
].pre
[0];
153 vp9_setup_pre_planes(xd
, 0, scaled_ref_frame
, mi_row
, mi_col
, NULL
);
155 vp9_set_mv_search_range(x
, &ref_mv
);
157 assert(x
->mv_best_ref_index
[ref
] <= 2);
158 if (x
->mv_best_ref_index
[ref
] < 2)
159 mvp_full
= x
->mbmi_ext
->ref_mvs
[ref
][x
->mv_best_ref_index
[ref
]].as_mv
;
161 mvp_full
= x
->pred_mv
[ref
];
166 vp9_full_pixel_search(cpi
, x
, bsize
, &mvp_full
, step_param
, sadpb
,
167 cond_cost_list(cpi
, cost_list
),
168 &ref_mv
, &tmp_mv
->as_mv
, INT_MAX
, 0);
170 x
->mv_col_min
= tmp_col_min
;
171 x
->mv_col_max
= tmp_col_max
;
172 x
->mv_row_min
= tmp_row_min
;
173 x
->mv_row_max
= tmp_row_max
;
175 // calculate the bit cost on motion vector
176 mvp_full
.row
= tmp_mv
->as_mv
.row
* 8;
177 mvp_full
.col
= tmp_mv
->as_mv
.col
* 8;
179 *rate_mv
= vp9_mv_bit_cost(&mvp_full
, &ref_mv
,
180 x
->nmvjointcost
, x
->mvcost
, MV_COST_WEIGHT
);
182 rate_mode
= cpi
->inter_mode_cost
[x
->mbmi_ext
->mode_context
[ref
]]
183 [INTER_OFFSET(NEWMV
)];
184 rv
= !(RDCOST(x
->rdmult
, x
->rddiv
, (*rate_mv
+ rate_mode
), 0) >
188 cpi
->find_fractional_mv_step(x
, &tmp_mv
->as_mv
, &ref_mv
,
189 cpi
->common
.allow_high_precision_mv
,
192 cpi
->sf
.mv
.subpel_force_stop
,
193 cpi
->sf
.mv
.subpel_iters_per_step
,
194 cond_cost_list(cpi
, cost_list
),
195 x
->nmvjointcost
, x
->mvcost
,
196 &dis
, &x
->pred_sse
[ref
], NULL
, 0, 0);
197 *rate_mv
= vp9_mv_bit_cost(&tmp_mv
->as_mv
, &ref_mv
,
198 x
->nmvjointcost
, x
->mvcost
, MV_COST_WEIGHT
);
201 if (scaled_ref_frame
) {
203 for (i
= 0; i
< MAX_MB_PLANE
; i
++)
204 xd
->plane
[i
].pre
[0] = backup_yv12
[i
];
209 static void block_variance(const uint8_t *src
, int src_stride
,
210 const uint8_t *ref
, int ref_stride
,
211 int w
, int h
, unsigned int *sse
, int *sum
,
212 int block_size
, unsigned int *sse8x8
,
213 int *sum8x8
, unsigned int *var8x8
) {
219 for (i
= 0; i
< h
; i
+= block_size
) {
220 for (j
= 0; j
< w
; j
+= block_size
) {
221 vpx_get8x8var(src
+ src_stride
* i
+ j
, src_stride
,
222 ref
+ ref_stride
* i
+ j
, ref_stride
,
223 &sse8x8
[k
], &sum8x8
[k
]);
226 var8x8
[k
] = sse8x8
[k
] - (((unsigned int)sum8x8
[k
] * sum8x8
[k
]) >> 6);
232 static void calculate_variance(int bw
, int bh
, TX_SIZE tx_size
,
233 unsigned int *sse_i
, int *sum_i
,
234 unsigned int *var_o
, unsigned int *sse_o
,
236 const BLOCK_SIZE unit_size
= txsize_to_bsize
[tx_size
];
237 const int nw
= 1 << (bw
- b_width_log2_lookup
[unit_size
]);
238 const int nh
= 1 << (bh
- b_height_log2_lookup
[unit_size
]);
241 for (i
= 0; i
< nh
; i
+= 2) {
242 for (j
= 0; j
< nw
; j
+= 2) {
243 sse_o
[k
] = sse_i
[i
* nw
+ j
] + sse_i
[i
* nw
+ j
+ 1] +
244 sse_i
[(i
+ 1) * nw
+ j
] + sse_i
[(i
+ 1) * nw
+ j
+ 1];
245 sum_o
[k
] = sum_i
[i
* nw
+ j
] + sum_i
[i
* nw
+ j
+ 1] +
246 sum_i
[(i
+ 1) * nw
+ j
] + sum_i
[(i
+ 1) * nw
+ j
+ 1];
247 var_o
[k
] = sse_o
[k
] - (((unsigned int)sum_o
[k
] * sum_o
[k
]) >>
248 (b_width_log2_lookup
[unit_size
] +
249 b_height_log2_lookup
[unit_size
] + 6));
255 static void model_rd_for_sb_y_large(VP9_COMP
*cpi
, BLOCK_SIZE bsize
,
256 MACROBLOCK
*x
, MACROBLOCKD
*xd
,
257 int *out_rate_sum
, int64_t *out_dist_sum
,
258 unsigned int *var_y
, unsigned int *sse_y
,
259 int mi_row
, int mi_col
, int *early_term
) {
260 // Note our transform coeffs are 8 times an orthogonal transform.
261 // Hence quantizer step is also 8 times. To get effective quantizer
262 // we need to divide by 8 before sending to modeling function.
266 struct macroblock_plane
*const p
= &x
->plane
[0];
267 struct macroblockd_plane
*const pd
= &xd
->plane
[0];
268 const uint32_t dc_quant
= pd
->dequant
[0];
269 const uint32_t ac_quant
= pd
->dequant
[1];
270 const int64_t dc_thr
= dc_quant
* dc_quant
>> 6;
271 const int64_t ac_thr
= ac_quant
* ac_quant
>> 6;
276 const int bw
= b_width_log2_lookup
[bsize
];
277 const int bh
= b_height_log2_lookup
[bsize
];
278 const int num8x8
= 1 << (bw
+ bh
- 2);
279 unsigned int sse8x8
[64] = {0};
280 int sum8x8
[64] = {0};
281 unsigned int var8x8
[64] = {0};
285 // Calculate variance for whole partition, and also save 8x8 blocks' variance
286 // to be used in following transform skipping test.
287 block_variance(p
->src
.buf
, p
->src
.stride
, pd
->dst
.buf
, pd
->dst
.stride
,
288 4 << bw
, 4 << bh
, &sse
, &sum
, 8, sse8x8
, sum8x8
, var8x8
);
289 var
= sse
- (((int64_t)sum
* sum
) >> (bw
+ bh
+ 4));
294 if (cpi
->common
.tx_mode
== TX_MODE_SELECT
) {
295 if (sse
> (var
<< 2))
296 tx_size
= MIN(max_txsize_lookup
[bsize
],
297 tx_mode_to_biggest_tx_size
[cpi
->common
.tx_mode
]);
301 if (cpi
->oxcf
.aq_mode
== CYCLIC_REFRESH_AQ
&&
302 cyclic_refresh_segment_id_boosted(xd
->mi
[0]->mbmi
.segment_id
))
304 else if (tx_size
> TX_16X16
)
307 tx_size
= MIN(max_txsize_lookup
[bsize
],
308 tx_mode_to_biggest_tx_size
[cpi
->common
.tx_mode
]);
311 assert(tx_size
>= TX_8X8
);
312 xd
->mi
[0]->mbmi
.tx_size
= tx_size
;
314 // Evaluate if the partition block is a skippable block in Y plane.
316 unsigned int sse16x16
[16] = {0};
317 int sum16x16
[16] = {0};
318 unsigned int var16x16
[16] = {0};
319 const int num16x16
= num8x8
>> 2;
321 unsigned int sse32x32
[4] = {0};
322 int sum32x32
[4] = {0};
323 unsigned int var32x32
[4] = {0};
324 const int num32x32
= num8x8
>> 4;
328 const int num
= (tx_size
== TX_8X8
) ? num8x8
:
329 ((tx_size
== TX_16X16
) ? num16x16
: num32x32
);
330 const unsigned int *sse_tx
= (tx_size
== TX_8X8
) ? sse8x8
:
331 ((tx_size
== TX_16X16
) ? sse16x16
: sse32x32
);
332 const unsigned int *var_tx
= (tx_size
== TX_8X8
) ? var8x8
:
333 ((tx_size
== TX_16X16
) ? var16x16
: var32x32
);
335 // Calculate variance if tx_size > TX_8X8
336 if (tx_size
>= TX_16X16
)
337 calculate_variance(bw
, bh
, TX_8X8
, sse8x8
, sum8x8
, var16x16
, sse16x16
,
339 if (tx_size
== TX_32X32
)
340 calculate_variance(bw
, bh
, TX_16X16
, sse16x16
, sum16x16
, var32x32
,
345 for (k
= 0; k
< num
; k
++)
346 // Check if all ac coefficients can be quantized to zero.
347 if (!(var_tx
[k
] < ac_thr
|| var
== 0)) {
352 for (k
= 0; k
< num
; k
++)
353 // Check if dc coefficient can be quantized to zero.
354 if (!(sse_tx
[k
] - var_tx
[k
] < dc_thr
|| sse
== var
)) {
364 } else if (dc_test
) {
369 if (x
->skip_txfm
[0] == 1) {
370 int skip_uv
[2] = {0};
371 unsigned int var_uv
[2];
372 unsigned int sse_uv
[2];
375 *out_dist_sum
= sse
<< 4;
377 // Transform skipping test in UV planes.
378 for (i
= 1; i
<= 2; i
++) {
379 struct macroblock_plane
*const p
= &x
->plane
[i
];
380 struct macroblockd_plane
*const pd
= &xd
->plane
[i
];
381 const TX_SIZE uv_tx_size
= get_uv_tx_size(&xd
->mi
[0]->mbmi
, pd
);
382 const BLOCK_SIZE unit_size
= txsize_to_bsize
[uv_tx_size
];
383 const BLOCK_SIZE uv_bsize
= get_plane_block_size(bsize
, pd
);
384 const int uv_bw
= b_width_log2_lookup
[uv_bsize
];
385 const int uv_bh
= b_height_log2_lookup
[uv_bsize
];
386 const int sf
= (uv_bw
- b_width_log2_lookup
[unit_size
]) +
387 (uv_bh
- b_height_log2_lookup
[unit_size
]);
388 const uint32_t uv_dc_thr
= pd
->dequant
[0] * pd
->dequant
[0] >> (6 - sf
);
389 const uint32_t uv_ac_thr
= pd
->dequant
[1] * pd
->dequant
[1] >> (6 - sf
);
392 vp9_build_inter_predictors_sbp(xd
, mi_row
, mi_col
, bsize
, i
);
393 var_uv
[j
] = cpi
->fn_ptr
[uv_bsize
].vf(p
->src
.buf
, p
->src
.stride
,
394 pd
->dst
.buf
, pd
->dst
.stride
, &sse_uv
[j
]);
396 if ((var_uv
[j
] < uv_ac_thr
|| var_uv
[j
] == 0) &&
397 (sse_uv
[j
] - var_uv
[j
] < uv_dc_thr
|| sse_uv
[j
] == var_uv
[j
]))
403 // If the transform in YUV planes are skippable, the mode search checks
404 // fewer inter modes and doesn't check intra modes.
405 if (skip_uv
[0] & skip_uv
[1]) {
413 #if CONFIG_VP9_HIGHBITDEPTH
414 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
415 vp9_model_rd_from_var_lapndz(sse
- var
, num_pels_log2_lookup
[bsize
],
416 dc_quant
>> (xd
->bd
- 5), &rate
, &dist
);
418 vp9_model_rd_from_var_lapndz(sse
- var
, num_pels_log2_lookup
[bsize
],
419 dc_quant
>> 3, &rate
, &dist
);
422 vp9_model_rd_from_var_lapndz(sse
- var
, num_pels_log2_lookup
[bsize
],
423 dc_quant
>> 3, &rate
, &dist
);
424 #endif // CONFIG_VP9_HIGHBITDEPTH
428 *out_rate_sum
= rate
>> 1;
429 *out_dist_sum
= dist
<< 3;
432 *out_dist_sum
= (sse
- var
) << 4;
435 #if CONFIG_VP9_HIGHBITDEPTH
436 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
437 vp9_model_rd_from_var_lapndz(var
, num_pels_log2_lookup
[bsize
],
438 ac_quant
>> (xd
->bd
- 5), &rate
, &dist
);
440 vp9_model_rd_from_var_lapndz(var
, num_pels_log2_lookup
[bsize
],
441 ac_quant
>> 3, &rate
, &dist
);
444 vp9_model_rd_from_var_lapndz(var
, num_pels_log2_lookup
[bsize
],
445 ac_quant
>> 3, &rate
, &dist
);
446 #endif // CONFIG_VP9_HIGHBITDEPTH
448 *out_rate_sum
+= rate
;
449 *out_dist_sum
+= dist
<< 4;
452 static void model_rd_for_sb_y(VP9_COMP
*cpi
, BLOCK_SIZE bsize
,
453 MACROBLOCK
*x
, MACROBLOCKD
*xd
,
454 int *out_rate_sum
, int64_t *out_dist_sum
,
455 unsigned int *var_y
, unsigned int *sse_y
) {
456 // Note our transform coeffs are 8 times an orthogonal transform.
457 // Hence quantizer step is also 8 times. To get effective quantizer
458 // we need to divide by 8 before sending to modeling function.
462 struct macroblock_plane
*const p
= &x
->plane
[0];
463 struct macroblockd_plane
*const pd
= &xd
->plane
[0];
464 const int64_t dc_thr
= p
->quant_thred
[0] >> 6;
465 const int64_t ac_thr
= p
->quant_thred
[1] >> 6;
466 const uint32_t dc_quant
= pd
->dequant
[0];
467 const uint32_t ac_quant
= pd
->dequant
[1];
468 unsigned int var
= cpi
->fn_ptr
[bsize
].vf(p
->src
.buf
, p
->src
.stride
,
469 pd
->dst
.buf
, pd
->dst
.stride
, &sse
);
475 if (cpi
->common
.tx_mode
== TX_MODE_SELECT
) {
476 if (sse
> (var
<< 2))
477 xd
->mi
[0]->mbmi
.tx_size
=
478 MIN(max_txsize_lookup
[bsize
],
479 tx_mode_to_biggest_tx_size
[cpi
->common
.tx_mode
]);
481 xd
->mi
[0]->mbmi
.tx_size
= TX_8X8
;
483 if (cpi
->oxcf
.aq_mode
== CYCLIC_REFRESH_AQ
&&
484 cyclic_refresh_segment_id_boosted(xd
->mi
[0]->mbmi
.segment_id
))
485 xd
->mi
[0]->mbmi
.tx_size
= TX_8X8
;
486 else if (xd
->mi
[0]->mbmi
.tx_size
> TX_16X16
)
487 xd
->mi
[0]->mbmi
.tx_size
= TX_16X16
;
489 xd
->mi
[0]->mbmi
.tx_size
=
490 MIN(max_txsize_lookup
[bsize
],
491 tx_mode_to_biggest_tx_size
[cpi
->common
.tx_mode
]);
494 // Evaluate if the partition block is a skippable block in Y plane.
496 const BLOCK_SIZE unit_size
=
497 txsize_to_bsize
[xd
->mi
[0]->mbmi
.tx_size
];
498 const unsigned int num_blk_log2
=
499 (b_width_log2_lookup
[bsize
] - b_width_log2_lookup
[unit_size
]) +
500 (b_height_log2_lookup
[bsize
] - b_height_log2_lookup
[unit_size
]);
501 const unsigned int sse_tx
= sse
>> num_blk_log2
;
502 const unsigned int var_tx
= var
>> num_blk_log2
;
505 // Check if all ac coefficients can be quantized to zero.
506 if (var_tx
< ac_thr
|| var
== 0) {
508 // Check if dc coefficient can be quantized to zero.
509 if (sse_tx
- var_tx
< dc_thr
|| sse
== var
)
512 if (sse_tx
- var_tx
< dc_thr
|| sse
== var
)
517 if (x
->skip_txfm
[0] == 1) {
519 *out_dist_sum
= sse
<< 4;
524 #if CONFIG_VP9_HIGHBITDEPTH
525 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
526 vp9_model_rd_from_var_lapndz(sse
- var
, num_pels_log2_lookup
[bsize
],
527 dc_quant
>> (xd
->bd
- 5), &rate
, &dist
);
529 vp9_model_rd_from_var_lapndz(sse
- var
, num_pels_log2_lookup
[bsize
],
530 dc_quant
>> 3, &rate
, &dist
);
533 vp9_model_rd_from_var_lapndz(sse
- var
, num_pels_log2_lookup
[bsize
],
534 dc_quant
>> 3, &rate
, &dist
);
535 #endif // CONFIG_VP9_HIGHBITDEPTH
539 *out_rate_sum
= rate
>> 1;
540 *out_dist_sum
= dist
<< 3;
543 *out_dist_sum
= (sse
- var
) << 4;
546 #if CONFIG_VP9_HIGHBITDEPTH
547 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
548 vp9_model_rd_from_var_lapndz(var
, num_pels_log2_lookup
[bsize
],
549 ac_quant
>> (xd
->bd
- 5), &rate
, &dist
);
551 vp9_model_rd_from_var_lapndz(var
, num_pels_log2_lookup
[bsize
],
552 ac_quant
>> 3, &rate
, &dist
);
555 vp9_model_rd_from_var_lapndz(var
, num_pels_log2_lookup
[bsize
],
556 ac_quant
>> 3, &rate
, &dist
);
557 #endif // CONFIG_VP9_HIGHBITDEPTH
559 *out_rate_sum
+= rate
;
560 *out_dist_sum
+= dist
<< 4;
563 #if CONFIG_VP9_HIGHBITDEPTH
564 static void block_yrd(VP9_COMP
*cpi
, MACROBLOCK
*x
, int *rate
, int64_t *dist
,
565 int *skippable
, int64_t *sse
, int plane
,
566 BLOCK_SIZE bsize
, TX_SIZE tx_size
) {
567 MACROBLOCKD
*xd
= &x
->e_mbd
;
568 unsigned int var_y
, sse_y
;
571 model_rd_for_sb_y(cpi
, bsize
, x
, xd
, rate
, dist
, &var_y
, &sse_y
);
577 static void block_yrd(VP9_COMP
*cpi
, MACROBLOCK
*x
, int *rate
, int64_t *dist
,
578 int *skippable
, int64_t *sse
, int plane
,
579 BLOCK_SIZE bsize
, TX_SIZE tx_size
) {
580 MACROBLOCKD
*xd
= &x
->e_mbd
;
581 const struct macroblockd_plane
*pd
= &xd
->plane
[plane
];
582 const struct macroblock_plane
*const p
= &x
->plane
[plane
];
583 const int num_4x4_w
= num_4x4_blocks_wide_lookup
[bsize
];
584 const int num_4x4_h
= num_4x4_blocks_high_lookup
[bsize
];
585 const int step
= 1 << (tx_size
<< 1);
586 const int block_step
= (1 << tx_size
);
588 int shift
= tx_size
== TX_32X32
? 0 : 2;
589 const int max_blocks_wide
= num_4x4_w
+ (xd
->mb_to_right_edge
>= 0 ? 0 :
590 xd
->mb_to_right_edge
>> (5 + pd
->subsampling_x
));
591 const int max_blocks_high
= num_4x4_h
+ (xd
->mb_to_bottom_edge
>= 0 ? 0 :
592 xd
->mb_to_bottom_edge
>> (5 + pd
->subsampling_y
));
596 vp9_subtract_plane(x
, bsize
, plane
);
598 // Keep track of the row and column of the blocks we use so that we know
599 // if we are in the unrestricted motion border.
600 for (r
= 0; r
< max_blocks_high
; r
+= block_step
) {
601 for (c
= 0; c
< num_4x4_w
; c
+= block_step
) {
602 if (c
< max_blocks_wide
) {
603 const scan_order
*const scan_order
= &vp9_default_scan_orders
[tx_size
];
604 tran_low_t
*const coeff
= BLOCK_OFFSET(p
->coeff
, block
);
605 tran_low_t
*const qcoeff
= BLOCK_OFFSET(p
->qcoeff
, block
);
606 tran_low_t
*const dqcoeff
= BLOCK_OFFSET(pd
->dqcoeff
, block
);
607 uint16_t *const eob
= &p
->eobs
[block
];
608 const int diff_stride
= 4 * num_4x4_blocks_wide_lookup
[bsize
];
609 const int16_t *src_diff
;
610 src_diff
= &p
->src_diff
[(r
* diff_stride
+ c
) << 2];
614 vp9_fdct32x32_rd(src_diff
, coeff
, diff_stride
);
615 vp9_quantize_fp_32x32(coeff
, 1024, x
->skip_block
, p
->zbin
,
616 p
->round_fp
, p
->quant_fp
, p
->quant_shift
,
617 qcoeff
, dqcoeff
, pd
->dequant
, eob
,
618 scan_order
->scan
, scan_order
->iscan
);
621 vp9_hadamard_16x16(src_diff
, diff_stride
, (int16_t *)coeff
);
622 vp9_quantize_fp(coeff
, 256, x
->skip_block
, p
->zbin
, p
->round_fp
,
623 p
->quant_fp
, p
->quant_shift
, qcoeff
, dqcoeff
,
625 scan_order
->scan
, scan_order
->iscan
);
628 vp9_hadamard_8x8(src_diff
, diff_stride
, (int16_t *)coeff
);
629 vp9_quantize_fp(coeff
, 64, x
->skip_block
, p
->zbin
, p
->round_fp
,
630 p
->quant_fp
, p
->quant_shift
, qcoeff
, dqcoeff
,
632 scan_order
->scan
, scan_order
->iscan
);
635 x
->fwd_txm4x4(src_diff
, coeff
, diff_stride
);
636 vp9_quantize_fp(coeff
, 16, x
->skip_block
, p
->zbin
, p
->round_fp
,
637 p
->quant_fp
, p
->quant_shift
, qcoeff
, dqcoeff
,
639 scan_order
->scan
, scan_order
->iscan
);
645 *skippable
&= (*eob
== 0);
652 if (*skippable
&& *sse
< INT64_MAX
) {
654 *dist
= (*sse
<< 6) >> shift
;
662 if (*sse
< INT64_MAX
)
663 *sse
= (*sse
<< 6) >> shift
;
664 for (r
= 0; r
< max_blocks_high
; r
+= block_step
) {
665 for (c
= 0; c
< num_4x4_w
; c
+= block_step
) {
666 if (c
< max_blocks_wide
) {
667 tran_low_t
*const coeff
= BLOCK_OFFSET(p
->coeff
, block
);
668 tran_low_t
*const qcoeff
= BLOCK_OFFSET(p
->qcoeff
, block
);
669 tran_low_t
*const dqcoeff
= BLOCK_OFFSET(pd
->dqcoeff
, block
);
670 uint16_t *const eob
= &p
->eobs
[block
];
673 *rate
+= (int)abs(qcoeff
[0]);
675 *rate
+= (int)vp9_satd((const int16_t *)qcoeff
, step
<< 4);
677 *dist
+= vp9_block_error_fp(coeff
, dqcoeff
, step
<< 4) >> shift
;
683 if (*skippable
== 0) {
685 *rate
+= (eob_cost
<< 8);
690 static void model_rd_for_sb_uv(VP9_COMP
*cpi
, BLOCK_SIZE bsize
,
691 MACROBLOCK
*x
, MACROBLOCKD
*xd
,
692 int *out_rate_sum
, int64_t *out_dist_sum
,
693 unsigned int *var_y
, unsigned int *sse_y
) {
694 // Note our transform coeffs are 8 times an orthogonal transform.
695 // Hence quantizer step is also 8 times. To get effective quantizer
696 // we need to divide by 8 before sending to modeling function.
705 for (i
= 1; i
<= 2; ++i
) {
706 struct macroblock_plane
*const p
= &x
->plane
[i
];
707 struct macroblockd_plane
*const pd
= &xd
->plane
[i
];
708 const uint32_t dc_quant
= pd
->dequant
[0];
709 const uint32_t ac_quant
= pd
->dequant
[1];
710 const BLOCK_SIZE bs
= get_plane_block_size(bsize
, pd
);
713 if (!x
->color_sensitivity
[i
- 1])
716 var
= cpi
->fn_ptr
[bs
].vf(p
->src
.buf
, p
->src
.stride
,
717 pd
->dst
.buf
, pd
->dst
.stride
, &sse
);
721 #if CONFIG_VP9_HIGHBITDEPTH
722 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
723 vp9_model_rd_from_var_lapndz(sse
- var
, num_pels_log2_lookup
[bs
],
724 dc_quant
>> (xd
->bd
- 5), &rate
, &dist
);
726 vp9_model_rd_from_var_lapndz(sse
- var
, num_pels_log2_lookup
[bs
],
727 dc_quant
>> 3, &rate
, &dist
);
730 vp9_model_rd_from_var_lapndz(sse
- var
, num_pels_log2_lookup
[bs
],
731 dc_quant
>> 3, &rate
, &dist
);
732 #endif // CONFIG_VP9_HIGHBITDEPTH
734 *out_rate_sum
+= rate
>> 1;
735 *out_dist_sum
+= dist
<< 3;
737 #if CONFIG_VP9_HIGHBITDEPTH
738 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
739 vp9_model_rd_from_var_lapndz(var
, num_pels_log2_lookup
[bs
],
740 ac_quant
>> (xd
->bd
- 5), &rate
, &dist
);
742 vp9_model_rd_from_var_lapndz(var
, num_pels_log2_lookup
[bs
],
743 ac_quant
>> 3, &rate
, &dist
);
746 vp9_model_rd_from_var_lapndz(var
, num_pels_log2_lookup
[bs
],
747 ac_quant
>> 3, &rate
, &dist
);
748 #endif // CONFIG_VP9_HIGHBITDEPTH
750 *out_rate_sum
+= rate
;
751 *out_dist_sum
+= dist
<< 4;
755 static int get_pred_buffer(PRED_BUFFER
*p
, int len
) {
758 for (i
= 0; i
< len
; i
++) {
767 static void free_pred_buffer(PRED_BUFFER
*p
) {
772 static void encode_breakout_test(VP9_COMP
*cpi
, MACROBLOCK
*x
,
773 BLOCK_SIZE bsize
, int mi_row
, int mi_col
,
774 MV_REFERENCE_FRAME ref_frame
,
775 PREDICTION_MODE this_mode
,
776 unsigned int var_y
, unsigned int sse_y
,
777 struct buf_2d yv12_mb
[][MAX_MB_PLANE
],
778 int *rate
, int64_t *dist
) {
779 MACROBLOCKD
*xd
= &x
->e_mbd
;
781 const BLOCK_SIZE uv_size
= get_plane_block_size(bsize
, &xd
->plane
[1]);
782 unsigned int var
= var_y
, sse
= sse_y
;
783 // Skipping threshold for ac.
784 unsigned int thresh_ac
;
785 // Skipping threshold for dc.
786 unsigned int thresh_dc
;
787 if (x
->encode_breakout
> 0) {
788 // Set a maximum for threshold to avoid big PSNR loss in low bit rate
789 // case. Use extreme low threshold for static frames to limit
791 const unsigned int max_thresh
= 36000;
792 // The encode_breakout input
793 const unsigned int min_thresh
=
794 MIN(((unsigned int)x
->encode_breakout
<< 4), max_thresh
);
795 #if CONFIG_VP9_HIGHBITDEPTH
796 const int shift
= (xd
->bd
<< 1) - 16;
799 // Calculate threshold according to dequant value.
800 thresh_ac
= (xd
->plane
[0].dequant
[1] * xd
->plane
[0].dequant
[1]) >> 3;
801 #if CONFIG_VP9_HIGHBITDEPTH
802 if ((xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) && shift
> 0) {
803 thresh_ac
= ROUND_POWER_OF_TWO(thresh_ac
, shift
);
805 #endif // CONFIG_VP9_HIGHBITDEPTH
806 thresh_ac
= clamp(thresh_ac
, min_thresh
, max_thresh
);
808 // Adjust ac threshold according to partition size.
810 8 - (b_width_log2_lookup
[bsize
] + b_height_log2_lookup
[bsize
]);
812 thresh_dc
= (xd
->plane
[0].dequant
[0] * xd
->plane
[0].dequant
[0] >> 6);
813 #if CONFIG_VP9_HIGHBITDEPTH
814 if ((xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) && shift
> 0) {
815 thresh_dc
= ROUND_POWER_OF_TWO(thresh_dc
, shift
);
817 #endif // CONFIG_VP9_HIGHBITDEPTH
823 // Y skipping condition checking for ac and dc.
824 if (var
<= thresh_ac
&& (sse
- var
) <= thresh_dc
) {
825 unsigned int sse_u
, sse_v
;
826 unsigned int var_u
, var_v
;
828 // Skip UV prediction unless breakout is zero (lossless) to save
829 // computation with low impact on the result
830 if (x
->encode_breakout
== 0) {
831 xd
->plane
[1].pre
[0] = yv12_mb
[ref_frame
][1];
832 xd
->plane
[2].pre
[0] = yv12_mb
[ref_frame
][2];
833 vp9_build_inter_predictors_sbuv(xd
, mi_row
, mi_col
, bsize
);
836 var_u
= cpi
->fn_ptr
[uv_size
].vf(x
->plane
[1].src
.buf
,
837 x
->plane
[1].src
.stride
,
838 xd
->plane
[1].dst
.buf
,
839 xd
->plane
[1].dst
.stride
, &sse_u
);
841 // U skipping condition checking
842 if (((var_u
<< 2) <= thresh_ac
) && (sse_u
- var_u
<= thresh_dc
)) {
843 var_v
= cpi
->fn_ptr
[uv_size
].vf(x
->plane
[2].src
.buf
,
844 x
->plane
[2].src
.stride
,
845 xd
->plane
[2].dst
.buf
,
846 xd
->plane
[2].dst
.stride
, &sse_v
);
848 // V skipping condition checking
849 if (((var_v
<< 2) <= thresh_ac
) && (sse_v
- var_v
<= thresh_dc
)) {
852 // The cost of skip bit needs to be added.
853 *rate
= cpi
->inter_mode_cost
[x
->mbmi_ext
->mode_context
[ref_frame
]]
854 [INTER_OFFSET(this_mode
)];
856 // More on this part of rate
857 // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
859 // Scaling factor for SSE from spatial domain to frequency
860 // domain is 16. Adjust distortion accordingly.
861 // TODO(yunqingwang): In this function, only y-plane dist is
863 *dist
= (sse
<< 4); // + ((sse_u + sse_v) << 4);
865 // *disable_skip = 1;
871 struct estimate_block_intra_args
{
874 PREDICTION_MODE mode
;
879 static void estimate_block_intra(int plane
, int block
, BLOCK_SIZE plane_bsize
,
880 TX_SIZE tx_size
, void *arg
) {
881 struct estimate_block_intra_args
* const args
= arg
;
882 VP9_COMP
*const cpi
= args
->cpi
;
883 MACROBLOCK
*const x
= args
->x
;
884 MACROBLOCKD
*const xd
= &x
->e_mbd
;
885 struct macroblock_plane
*const p
= &x
->plane
[0];
886 struct macroblockd_plane
*const pd
= &xd
->plane
[0];
887 const BLOCK_SIZE bsize_tx
= txsize_to_bsize
[tx_size
];
888 uint8_t *const src_buf_base
= p
->src
.buf
;
889 uint8_t *const dst_buf_base
= pd
->dst
.buf
;
890 const int src_stride
= p
->src
.stride
;
891 const int dst_stride
= pd
->dst
.stride
;
895 int64_t this_sse
= INT64_MAX
;
898 txfrm_block_to_raster_xy(plane_bsize
, tx_size
, block
, &i
, &j
);
902 p
->src
.buf
= &src_buf_base
[4 * (j
* src_stride
+ i
)];
903 pd
->dst
.buf
= &dst_buf_base
[4 * (j
* dst_stride
+ i
)];
904 // Use source buffer as an approximation for the fully reconstructed buffer.
905 vp9_predict_intra_block(xd
, b_width_log2_lookup
[plane_bsize
],
907 x
->skip_encode
? p
->src
.buf
: pd
->dst
.buf
,
908 x
->skip_encode
? src_stride
: dst_stride
,
909 pd
->dst
.buf
, dst_stride
,
912 // TODO(jingning): This needs further refactoring.
913 block_yrd(cpi
, x
, &rate
, &dist
, &is_skippable
, &this_sse
, 0,
914 bsize_tx
, MIN(tx_size
, TX_16X16
));
915 x
->skip_txfm
[0] = is_skippable
;
916 rate
+= vp9_cost_bit(vp9_get_skip_prob(&cpi
->common
, xd
), is_skippable
);
918 p
->src
.buf
= src_buf_base
;
919 pd
->dst
.buf
= dst_buf_base
;
924 static const THR_MODES mode_idx
[MAX_REF_FRAMES
- 1][4] = {
925 {THR_DC
, THR_V_PRED
, THR_H_PRED
, THR_TM
},
926 {THR_NEARESTMV
, THR_NEARMV
, THR_ZEROMV
, THR_NEWMV
},
927 {THR_NEARESTG
, THR_NEARG
, THR_ZEROG
, THR_NEWG
},
930 static const PREDICTION_MODE intra_mode_list
[] = {
931 DC_PRED
, V_PRED
, H_PRED
, TM_PRED
934 static int mode_offset(const PREDICTION_MODE mode
) {
935 if (mode
>= NEARESTMV
) {
936 return INTER_OFFSET(mode
);
953 static INLINE
void update_thresh_freq_fact(VP9_COMP
*cpi
,
954 TileDataEnc
*tile_data
,
956 MV_REFERENCE_FRAME ref_frame
,
957 THR_MODES best_mode_idx
,
958 PREDICTION_MODE mode
) {
959 THR_MODES thr_mode_idx
= mode_idx
[ref_frame
][mode_offset(mode
)];
960 int *freq_fact
= &tile_data
->thresh_freq_fact
[bsize
][thr_mode_idx
];
961 if (thr_mode_idx
== best_mode_idx
)
962 *freq_fact
-= (*freq_fact
>> 4);
964 *freq_fact
= MIN(*freq_fact
+ RD_THRESH_INC
,
965 cpi
->sf
.adaptive_rd_thresh
* RD_THRESH_MAX_FACT
);
968 void vp9_pick_intra_mode(VP9_COMP
*cpi
, MACROBLOCK
*x
, RD_COST
*rd_cost
,
969 BLOCK_SIZE bsize
, PICK_MODE_CONTEXT
*ctx
) {
970 MACROBLOCKD
*const xd
= &x
->e_mbd
;
971 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
972 RD_COST this_rdc
, best_rdc
;
973 PREDICTION_MODE this_mode
;
974 struct estimate_block_intra_args args
= { cpi
, x
, DC_PRED
, 0, 0 };
975 const TX_SIZE intra_tx_size
=
976 MIN(max_txsize_lookup
[bsize
],
977 tx_mode_to_biggest_tx_size
[cpi
->common
.tx_mode
]);
978 MODE_INFO
*const mic
= xd
->mi
[0];
980 const MODE_INFO
*above_mi
= xd
->mi
[-xd
->mi_stride
];
981 const MODE_INFO
*left_mi
= xd
->left_available
? xd
->mi
[-1] : NULL
;
982 const PREDICTION_MODE A
= vp9_above_block_mode(mic
, above_mi
, 0);
983 const PREDICTION_MODE L
= vp9_left_block_mode(mic
, left_mi
, 0);
984 bmode_costs
= cpi
->y_mode_costs
[A
][L
];
987 vp9_rd_cost_reset(&best_rdc
);
988 vp9_rd_cost_reset(&this_rdc
);
990 mbmi
->ref_frame
[0] = INTRA_FRAME
;
991 mbmi
->mv
[0].as_int
= INVALID_MV
;
992 mbmi
->uv_mode
= DC_PRED
;
993 memset(x
->skip_txfm
, 0, sizeof(x
->skip_txfm
));
995 // Change the limit of this loop to add other intra prediction
997 for (this_mode
= DC_PRED
; this_mode
<= H_PRED
; ++this_mode
) {
998 args
.mode
= this_mode
;
1001 mbmi
->tx_size
= intra_tx_size
;
1002 vp9_foreach_transformed_block_in_plane(xd
, bsize
, 0,
1003 estimate_block_intra
, &args
);
1004 this_rdc
.rate
= args
.rate
;
1005 this_rdc
.dist
= args
.dist
;
1006 this_rdc
.rate
+= bmode_costs
[this_mode
];
1007 this_rdc
.rdcost
= RDCOST(x
->rdmult
, x
->rddiv
,
1008 this_rdc
.rate
, this_rdc
.dist
);
1010 if (this_rdc
.rdcost
< best_rdc
.rdcost
) {
1011 best_rdc
= this_rdc
;
1012 mbmi
->mode
= this_mode
;
1016 *rd_cost
= best_rdc
;
1019 static void init_ref_frame_cost(VP9_COMMON
*const cm
,
1020 MACROBLOCKD
*const xd
,
1021 int ref_frame_cost
[MAX_REF_FRAMES
]) {
1022 vpx_prob intra_inter_p
= vp9_get_intra_inter_prob(cm
, xd
);
1023 vpx_prob ref_single_p1
= vp9_get_pred_prob_single_ref_p1(cm
, xd
);
1024 vpx_prob ref_single_p2
= vp9_get_pred_prob_single_ref_p2(cm
, xd
);
1026 ref_frame_cost
[INTRA_FRAME
] = vp9_cost_bit(intra_inter_p
, 0);
1027 ref_frame_cost
[LAST_FRAME
] = ref_frame_cost
[GOLDEN_FRAME
] =
1028 ref_frame_cost
[ALTREF_FRAME
] = vp9_cost_bit(intra_inter_p
, 1);
1030 ref_frame_cost
[LAST_FRAME
] += vp9_cost_bit(ref_single_p1
, 0);
1031 ref_frame_cost
[GOLDEN_FRAME
] += vp9_cost_bit(ref_single_p1
, 1);
1032 ref_frame_cost
[ALTREF_FRAME
] += vp9_cost_bit(ref_single_p1
, 1);
1033 ref_frame_cost
[GOLDEN_FRAME
] += vp9_cost_bit(ref_single_p2
, 0);
1034 ref_frame_cost
[ALTREF_FRAME
] += vp9_cost_bit(ref_single_p2
, 1);
1038 MV_REFERENCE_FRAME ref_frame
;
1039 PREDICTION_MODE pred_mode
;
1042 #define RT_INTER_MODES 8
1043 static const REF_MODE ref_mode_set
[RT_INTER_MODES
] = {
1044 {LAST_FRAME
, ZEROMV
},
1045 {LAST_FRAME
, NEARESTMV
},
1046 {GOLDEN_FRAME
, ZEROMV
},
1047 {LAST_FRAME
, NEARMV
},
1048 {LAST_FRAME
, NEWMV
},
1049 {GOLDEN_FRAME
, NEARESTMV
},
1050 {GOLDEN_FRAME
, NEARMV
},
1051 {GOLDEN_FRAME
, NEWMV
}
1054 // TODO(jingning) placeholder for inter-frame non-RD mode decision.
1055 // this needs various further optimizations. to be continued..
1056 void vp9_pick_inter_mode(VP9_COMP
*cpi
, MACROBLOCK
*x
,
1057 TileDataEnc
*tile_data
,
1058 int mi_row
, int mi_col
, RD_COST
*rd_cost
,
1059 BLOCK_SIZE bsize
, PICK_MODE_CONTEXT
*ctx
) {
1060 VP9_COMMON
*const cm
= &cpi
->common
;
1061 SPEED_FEATURES
*const sf
= &cpi
->sf
;
1062 TileInfo
*const tile_info
= &tile_data
->tile_info
;
1063 MACROBLOCKD
*const xd
= &x
->e_mbd
;
1064 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
1065 struct macroblockd_plane
*const pd
= &xd
->plane
[0];
1066 PREDICTION_MODE best_mode
= ZEROMV
;
1067 MV_REFERENCE_FRAME ref_frame
, best_ref_frame
= LAST_FRAME
;
1068 MV_REFERENCE_FRAME usable_ref_frame
;
1069 TX_SIZE best_tx_size
= TX_SIZES
;
1070 INTERP_FILTER best_pred_filter
= EIGHTTAP
;
1071 int_mv frame_mv
[MB_MODE_COUNT
][MAX_REF_FRAMES
];
1072 struct buf_2d yv12_mb
[4][MAX_MB_PLANE
];
1073 static const int flag_list
[4] = { 0, VP9_LAST_FLAG
, VP9_GOLD_FLAG
,
1075 RD_COST this_rdc
, best_rdc
;
1076 uint8_t skip_txfm
= 0, best_mode_skip_txfm
= 0;
1077 // var_y and sse_y are saved to be used in skipping checking
1078 unsigned int var_y
= UINT_MAX
;
1079 unsigned int sse_y
= UINT_MAX
;
1080 // Reduce the intra cost penalty for small blocks (<=16x16).
1081 const int reduction_fac
= (bsize
<= BLOCK_16X16
) ?
1082 ((bsize
<= BLOCK_8X8
) ? 4 : 2) : 0;
1083 const int intra_cost_penalty
= vp9_get_intra_cost_penalty(
1084 cm
->base_qindex
, cm
->y_dc_delta_q
, cm
->bit_depth
) >> reduction_fac
;
1085 const int64_t inter_mode_thresh
= RDCOST(x
->rdmult
, x
->rddiv
,
1086 intra_cost_penalty
, 0);
1087 const int *const rd_threshes
= cpi
->rd
.threshes
[mbmi
->segment_id
][bsize
];
1088 const int *const rd_thresh_freq_fact
= tile_data
->thresh_freq_fact
[bsize
];
1089 INTERP_FILTER filter_ref
;
1090 const int bsl
= mi_width_log2_lookup
[bsize
];
1091 const int pred_filter_search
= cm
->interp_filter
== SWITCHABLE
?
1092 (((mi_row
+ mi_col
) >> bsl
) +
1093 get_chessboard_index(cm
->current_video_frame
)) & 0x1 : 0;
1094 int const_motion
[MAX_REF_FRAMES
] = { 0 };
1095 const int bh
= num_4x4_blocks_high_lookup
[bsize
] << 2;
1096 const int bw
= num_4x4_blocks_wide_lookup
[bsize
] << 2;
1097 // For speed 6, the result of interp filter is reused later in actual encoding
1099 // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
1101 DECLARE_ALIGNED(16, uint8_t, pred_buf
[3 * 64 * 64]);
1102 #if CONFIG_VP9_HIGHBITDEPTH
1103 DECLARE_ALIGNED(16, uint16_t, pred_buf_16
[3 * 64 * 64]);
1105 struct buf_2d orig_dst
= pd
->dst
;
1106 PRED_BUFFER
*best_pred
= NULL
;
1107 PRED_BUFFER
*this_mode_pred
= NULL
;
1108 const int pixels_in_block
= bh
* bw
;
1109 int reuse_inter_pred
= cpi
->sf
.reuse_inter_pred_sby
&& ctx
->pred_pixel_ready
;
1110 int ref_frame_skip_mask
= 0;
1112 int best_pred_sad
= INT_MAX
;
1113 int best_early_term
= 0;
1114 int ref_frame_cost
[MAX_REF_FRAMES
];
1116 init_ref_frame_cost(cm
, xd
, ref_frame_cost
);
1118 if (reuse_inter_pred
) {
1120 for (i
= 0; i
< 3; i
++) {
1121 #if CONFIG_VP9_HIGHBITDEPTH
1122 if (cm
->use_highbitdepth
)
1123 tmp
[i
].data
= CONVERT_TO_BYTEPTR(&pred_buf_16
[pixels_in_block
* i
]);
1125 tmp
[i
].data
= &pred_buf
[pixels_in_block
* i
];
1127 tmp
[i
].data
= &pred_buf
[pixels_in_block
* i
];
1128 #endif // CONFIG_VP9_HIGHBITDEPTH
1132 tmp
[3].data
= pd
->dst
.buf
;
1133 tmp
[3].stride
= pd
->dst
.stride
;
1137 x
->skip_encode
= cpi
->sf
.skip_encode_frame
&& x
->q_index
< QIDX_SKIP_THRESH
;
1140 if (xd
->up_available
)
1141 filter_ref
= xd
->mi
[-xd
->mi_stride
]->mbmi
.interp_filter
;
1142 else if (xd
->left_available
)
1143 filter_ref
= xd
->mi
[-1]->mbmi
.interp_filter
;
1145 filter_ref
= cm
->interp_filter
;
1147 // initialize mode decisions
1148 vp9_rd_cost_reset(&best_rdc
);
1149 vp9_rd_cost_reset(rd_cost
);
1150 mbmi
->sb_type
= bsize
;
1151 mbmi
->ref_frame
[0] = NONE
;
1152 mbmi
->ref_frame
[1] = NONE
;
1153 mbmi
->tx_size
= MIN(max_txsize_lookup
[bsize
],
1154 tx_mode_to_biggest_tx_size
[cm
->tx_mode
]);
1156 #if CONFIG_VP9_TEMPORAL_DENOISING
1157 vp9_denoiser_reset_frame_stats(ctx
);
1160 if (cpi
->rc
.frames_since_golden
== 0) {
1161 usable_ref_frame
= LAST_FRAME
;
1163 usable_ref_frame
= GOLDEN_FRAME
;
1166 for (ref_frame
= LAST_FRAME
; ref_frame
<= usable_ref_frame
; ++ref_frame
) {
1167 const YV12_BUFFER_CONFIG
*yv12
= get_ref_frame_buffer(cpi
, ref_frame
);
1169 x
->pred_mv_sad
[ref_frame
] = INT_MAX
;
1170 frame_mv
[NEWMV
][ref_frame
].as_int
= INVALID_MV
;
1171 frame_mv
[ZEROMV
][ref_frame
].as_int
= 0;
1173 if ((cpi
->ref_frame_flags
& flag_list
[ref_frame
]) && (yv12
!= NULL
)) {
1174 int_mv
*const candidates
= x
->mbmi_ext
->ref_mvs
[ref_frame
];
1175 const struct scale_factors
*const sf
= &cm
->frame_refs
[ref_frame
- 1].sf
;
1177 vp9_setup_pred_block(xd
, yv12_mb
[ref_frame
], yv12
, mi_row
, mi_col
,
1180 if (cm
->use_prev_frame_mvs
)
1181 vp9_find_mv_refs(cm
, xd
, xd
->mi
[0], ref_frame
,
1182 candidates
, mi_row
, mi_col
, NULL
, NULL
,
1183 x
->mbmi_ext
->mode_context
);
1185 const_motion
[ref_frame
] = mv_refs_rt(cm
, x
, xd
, tile_info
,
1187 ref_frame
, candidates
,
1190 vp9_find_best_ref_mvs(xd
, cm
->allow_high_precision_mv
, candidates
,
1191 &frame_mv
[NEARESTMV
][ref_frame
],
1192 &frame_mv
[NEARMV
][ref_frame
]);
1194 if (!vp9_is_scaled(sf
) && bsize
>= BLOCK_8X8
)
1195 vp9_mv_pred(cpi
, x
, yv12_mb
[ref_frame
][0].buf
, yv12
->y_stride
,
1198 ref_frame_skip_mask
|= (1 << ref_frame
);
1202 for (idx
= 0; idx
< RT_INTER_MODES
; ++idx
) {
1207 PREDICTION_MODE this_mode
= ref_mode_set
[idx
].pred_mode
;
1210 int this_early_term
= 0;
1212 if (!(cpi
->sf
.inter_mode_mask
[bsize
] & (1 << this_mode
)))
1215 ref_frame
= ref_mode_set
[idx
].ref_frame
;
1216 if (!(cpi
->ref_frame_flags
& flag_list
[ref_frame
]))
1218 if (const_motion
[ref_frame
] && this_mode
== NEARMV
)
1221 i
= (ref_frame
== LAST_FRAME
) ? GOLDEN_FRAME
: LAST_FRAME
;
1222 if ((cpi
->ref_frame_flags
& flag_list
[i
]) && sf
->reference_masking
)
1223 if (x
->pred_mv_sad
[ref_frame
] > (x
->pred_mv_sad
[i
] << 1))
1224 ref_frame_skip_mask
|= (1 << ref_frame
);
1225 if (ref_frame_skip_mask
& (1 << ref_frame
))
1228 // Select prediction reference frames.
1229 for (i
= 0; i
< MAX_MB_PLANE
; i
++)
1230 xd
->plane
[i
].pre
[0] = yv12_mb
[ref_frame
][i
];
1232 mbmi
->ref_frame
[0] = ref_frame
;
1233 set_ref_ptrs(cm
, xd
, ref_frame
, NONE
);
1235 mode_index
= mode_idx
[ref_frame
][INTER_OFFSET(this_mode
)];
1236 mode_rd_thresh
= best_mode_skip_txfm
?
1237 rd_threshes
[mode_index
] << 1 : rd_threshes
[mode_index
];
1238 if (rd_less_than_thresh(best_rdc
.rdcost
, mode_rd_thresh
,
1239 rd_thresh_freq_fact
[mode_index
]))
1242 if (this_mode
== NEWMV
) {
1243 if (ref_frame
> LAST_FRAME
) {
1245 int dis
, cost_list
[5];
1247 if (bsize
< BLOCK_16X16
)
1250 tmp_sad
= vp9_int_pro_motion_estimation(cpi
, x
, bsize
, mi_row
, mi_col
);
1252 if (tmp_sad
> x
->pred_mv_sad
[LAST_FRAME
])
1254 if (tmp_sad
+ (num_pels_log2_lookup
[bsize
] << 4) > best_pred_sad
)
1257 frame_mv
[NEWMV
][ref_frame
].as_int
= mbmi
->mv
[0].as_int
;
1258 rate_mv
= vp9_mv_bit_cost(&frame_mv
[NEWMV
][ref_frame
].as_mv
,
1259 &x
->mbmi_ext
->ref_mvs
[ref_frame
][0].as_mv
,
1260 x
->nmvjointcost
, x
->mvcost
, MV_COST_WEIGHT
);
1261 frame_mv
[NEWMV
][ref_frame
].as_mv
.row
>>= 3;
1262 frame_mv
[NEWMV
][ref_frame
].as_mv
.col
>>= 3;
1264 cpi
->find_fractional_mv_step(x
, &frame_mv
[NEWMV
][ref_frame
].as_mv
,
1265 &x
->mbmi_ext
->ref_mvs
[ref_frame
][0].as_mv
,
1266 cpi
->common
.allow_high_precision_mv
,
1268 &cpi
->fn_ptr
[bsize
],
1269 cpi
->sf
.mv
.subpel_force_stop
,
1270 cpi
->sf
.mv
.subpel_iters_per_step
,
1271 cond_cost_list(cpi
, cost_list
),
1272 x
->nmvjointcost
, x
->mvcost
, &dis
,
1273 &x
->pred_sse
[ref_frame
], NULL
, 0, 0);
1274 } else if (!combined_motion_search(cpi
, x
, bsize
, mi_row
, mi_col
,
1275 &frame_mv
[NEWMV
][ref_frame
], &rate_mv
, best_rdc
.rdcost
)) {
1280 if (this_mode
== NEWMV
&& ref_frame
== LAST_FRAME
&&
1281 frame_mv
[NEWMV
][LAST_FRAME
].as_int
!= INVALID_MV
) {
1282 const int pre_stride
= xd
->plane
[0].pre
[0].stride
;
1283 const uint8_t * const pre_buf
= xd
->plane
[0].pre
[0].buf
+
1284 (frame_mv
[NEWMV
][LAST_FRAME
].as_mv
.row
>> 3) * pre_stride
+
1285 (frame_mv
[NEWMV
][LAST_FRAME
].as_mv
.col
>> 3);
1286 best_pred_sad
= cpi
->fn_ptr
[bsize
].sdf(x
->plane
[0].src
.buf
,
1287 x
->plane
[0].src
.stride
,
1288 pre_buf
, pre_stride
);
1289 x
->pred_mv_sad
[LAST_FRAME
] = best_pred_sad
;
1292 if (this_mode
!= NEARESTMV
&&
1293 frame_mv
[this_mode
][ref_frame
].as_int
==
1294 frame_mv
[NEARESTMV
][ref_frame
].as_int
)
1297 mbmi
->mode
= this_mode
;
1298 mbmi
->mv
[0].as_int
= frame_mv
[this_mode
][ref_frame
].as_int
;
1300 // Search for the best prediction filter type, when the resulting
1301 // motion vector is at sub-pixel accuracy level for luma component, i.e.,
1302 // the last three bits are all zeros.
1303 if (reuse_inter_pred
) {
1304 if (!this_mode_pred
) {
1305 this_mode_pred
= &tmp
[3];
1307 this_mode_pred
= &tmp
[get_pred_buffer(tmp
, 3)];
1308 pd
->dst
.buf
= this_mode_pred
->data
;
1309 pd
->dst
.stride
= bw
;
1313 if ((this_mode
== NEWMV
|| filter_ref
== SWITCHABLE
) && pred_filter_search
1314 && (ref_frame
== LAST_FRAME
)
1315 && (((mbmi
->mv
[0].as_mv
.row
| mbmi
->mv
[0].as_mv
.col
) & 0x07) != 0)) {
1318 unsigned int pf_var
[3];
1319 unsigned int pf_sse
[3];
1320 TX_SIZE pf_tx_size
[3];
1321 int64_t best_cost
= INT64_MAX
;
1322 INTERP_FILTER best_filter
= SWITCHABLE
, filter
;
1323 PRED_BUFFER
*current_pred
= this_mode_pred
;
1325 for (filter
= EIGHTTAP
; filter
<= EIGHTTAP_SMOOTH
; ++filter
) {
1327 mbmi
->interp_filter
= filter
;
1328 vp9_build_inter_predictors_sby(xd
, mi_row
, mi_col
, bsize
);
1329 model_rd_for_sb_y(cpi
, bsize
, x
, xd
, &pf_rate
[filter
], &pf_dist
[filter
],
1330 &pf_var
[filter
], &pf_sse
[filter
]);
1331 pf_rate
[filter
] += vp9_get_switchable_rate(cpi
, xd
);
1332 cost
= RDCOST(x
->rdmult
, x
->rddiv
, pf_rate
[filter
], pf_dist
[filter
]);
1333 pf_tx_size
[filter
] = mbmi
->tx_size
;
1334 if (cost
< best_cost
) {
1335 best_filter
= filter
;
1337 skip_txfm
= x
->skip_txfm
[0];
1339 if (reuse_inter_pred
) {
1340 if (this_mode_pred
!= current_pred
) {
1341 free_pred_buffer(this_mode_pred
);
1342 this_mode_pred
= current_pred
;
1345 if (filter
< EIGHTTAP_SHARP
) {
1346 current_pred
= &tmp
[get_pred_buffer(tmp
, 3)];
1347 pd
->dst
.buf
= current_pred
->data
;
1348 pd
->dst
.stride
= bw
;
1354 if (reuse_inter_pred
&& this_mode_pred
!= current_pred
)
1355 free_pred_buffer(current_pred
);
1357 mbmi
->interp_filter
= best_filter
;
1358 mbmi
->tx_size
= pf_tx_size
[best_filter
];
1359 this_rdc
.rate
= pf_rate
[best_filter
];
1360 this_rdc
.dist
= pf_dist
[best_filter
];
1361 var_y
= pf_var
[best_filter
];
1362 sse_y
= pf_sse
[best_filter
];
1363 x
->skip_txfm
[0] = skip_txfm
;
1364 if (reuse_inter_pred
) {
1365 pd
->dst
.buf
= this_mode_pred
->data
;
1366 pd
->dst
.stride
= this_mode_pred
->stride
;
1369 mbmi
->interp_filter
= (filter_ref
== SWITCHABLE
) ? EIGHTTAP
: filter_ref
;
1370 vp9_build_inter_predictors_sby(xd
, mi_row
, mi_col
, bsize
);
1372 // For large partition blocks, extra testing is done.
1373 if (bsize
> BLOCK_32X32
&&
1374 !cyclic_refresh_segment_id_boosted(xd
->mi
[0]->mbmi
.segment_id
) &&
1376 model_rd_for_sb_y_large(cpi
, bsize
, x
, xd
, &this_rdc
.rate
,
1377 &this_rdc
.dist
, &var_y
, &sse_y
, mi_row
, mi_col
,
1380 model_rd_for_sb_y(cpi
, bsize
, x
, xd
, &this_rdc
.rate
, &this_rdc
.dist
,
1385 if (!this_early_term
) {
1386 this_sse
= (int64_t)sse_y
;
1387 block_yrd(cpi
, x
, &this_rdc
.rate
, &this_rdc
.dist
, &is_skippable
,
1388 &this_sse
, 0, bsize
, MIN(mbmi
->tx_size
, TX_16X16
));
1389 x
->skip_txfm
[0] = is_skippable
;
1391 this_rdc
.rate
= vp9_cost_bit(vp9_get_skip_prob(cm
, xd
), 1);
1393 if (RDCOST(x
->rdmult
, x
->rddiv
, this_rdc
.rate
, this_rdc
.dist
) <
1394 RDCOST(x
->rdmult
, x
->rddiv
, 0, this_sse
)) {
1395 this_rdc
.rate
+= vp9_cost_bit(vp9_get_skip_prob(cm
, xd
), 0);
1397 this_rdc
.rate
= vp9_cost_bit(vp9_get_skip_prob(cm
, xd
), 1);
1398 this_rdc
.dist
= this_sse
;
1399 x
->skip_txfm
[0] = 1;
1403 if (cm
->interp_filter
== SWITCHABLE
) {
1404 if ((mbmi
->mv
[0].as_mv
.row
| mbmi
->mv
[0].as_mv
.col
) & 0x07)
1405 this_rdc
.rate
+= vp9_get_switchable_rate(cpi
, xd
);
1408 this_rdc
.rate
+= cm
->interp_filter
== SWITCHABLE
?
1409 vp9_get_switchable_rate(cpi
, xd
) : 0;
1410 this_rdc
.rate
+= vp9_cost_bit(vp9_get_skip_prob(cm
, xd
), 1);
1413 if (x
->color_sensitivity
[0] || x
->color_sensitivity
[1]) {
1415 int64_t uv_dist
= 0;
1416 if (x
->color_sensitivity
[0])
1417 vp9_build_inter_predictors_sbp(xd
, mi_row
, mi_col
, bsize
, 1);
1418 if (x
->color_sensitivity
[1])
1419 vp9_build_inter_predictors_sbp(xd
, mi_row
, mi_col
, bsize
, 2);
1420 model_rd_for_sb_uv(cpi
, bsize
, x
, xd
, &uv_rate
, &uv_dist
,
1422 this_rdc
.rate
+= uv_rate
;
1423 this_rdc
.dist
+= uv_dist
;
1426 this_rdc
.rate
+= rate_mv
;
1428 cpi
->inter_mode_cost
[x
->mbmi_ext
->mode_context
[ref_frame
]][INTER_OFFSET(
1430 this_rdc
.rate
+= ref_frame_cost
[ref_frame
];
1431 this_rdc
.rdcost
= RDCOST(x
->rdmult
, x
->rddiv
, this_rdc
.rate
, this_rdc
.dist
);
1433 // Skipping checking: test to see if this block can be reconstructed by
1435 if (cpi
->allow_encode_breakout
) {
1436 encode_breakout_test(cpi
, x
, bsize
, mi_row
, mi_col
, ref_frame
, this_mode
,
1437 var_y
, sse_y
, yv12_mb
, &this_rdc
.rate
,
1440 this_rdc
.rate
+= rate_mv
;
1441 this_rdc
.rdcost
= RDCOST(x
->rdmult
, x
->rddiv
, this_rdc
.rate
,
1446 #if CONFIG_VP9_TEMPORAL_DENOISING
1447 if (cpi
->oxcf
.noise_sensitivity
> 0)
1448 vp9_denoiser_update_frame_stats(mbmi
, sse_y
, this_mode
, ctx
);
1453 if (this_rdc
.rdcost
< best_rdc
.rdcost
|| x
->skip
) {
1454 best_rdc
= this_rdc
;
1455 best_mode
= this_mode
;
1456 best_pred_filter
= mbmi
->interp_filter
;
1457 best_tx_size
= mbmi
->tx_size
;
1458 best_ref_frame
= ref_frame
;
1459 best_mode_skip_txfm
= x
->skip_txfm
[0];
1460 best_early_term
= this_early_term
;
1462 if (reuse_inter_pred
) {
1463 free_pred_buffer(best_pred
);
1464 best_pred
= this_mode_pred
;
1467 if (reuse_inter_pred
)
1468 free_pred_buffer(this_mode_pred
);
1474 // If early termination flag is 1 and at least 2 modes are checked,
1475 // the mode search is terminated.
1476 if (best_early_term
&& idx
> 0) {
1482 mbmi
->mode
= best_mode
;
1483 mbmi
->interp_filter
= best_pred_filter
;
1484 mbmi
->tx_size
= best_tx_size
;
1485 mbmi
->ref_frame
[0] = best_ref_frame
;
1486 mbmi
->mv
[0].as_int
= frame_mv
[best_mode
][best_ref_frame
].as_int
;
1487 xd
->mi
[0]->bmi
[0].as_mv
[0].as_int
= mbmi
->mv
[0].as_int
;
1488 x
->skip_txfm
[0] = best_mode_skip_txfm
;
1490 // Perform intra prediction search, if the best SAD is above a certain
1492 if (best_rdc
.rdcost
== INT64_MAX
||
1493 (!x
->skip
&& best_rdc
.rdcost
> inter_mode_thresh
&&
1494 bsize
<= cpi
->sf
.max_intra_bsize
)) {
1495 struct estimate_block_intra_args args
= { cpi
, x
, DC_PRED
, 0, 0 };
1496 const TX_SIZE intra_tx_size
=
1497 MIN(max_txsize_lookup
[bsize
],
1498 tx_mode_to_biggest_tx_size
[cpi
->common
.tx_mode
]);
1500 TX_SIZE best_intra_tx_size
= TX_SIZES
;
1502 if (reuse_inter_pred
&& best_pred
!= NULL
) {
1503 if (best_pred
->data
== orig_dst
.buf
) {
1504 this_mode_pred
= &tmp
[get_pred_buffer(tmp
, 3)];
1505 #if CONFIG_VP9_HIGHBITDEPTH
1506 if (cm
->use_highbitdepth
)
1507 vp9_highbd_convolve_copy(best_pred
->data
, best_pred
->stride
,
1508 this_mode_pred
->data
, this_mode_pred
->stride
,
1509 NULL
, 0, NULL
, 0, bw
, bh
, xd
->bd
);
1511 vp9_convolve_copy(best_pred
->data
, best_pred
->stride
,
1512 this_mode_pred
->data
, this_mode_pred
->stride
,
1513 NULL
, 0, NULL
, 0, bw
, bh
);
1515 vp9_convolve_copy(best_pred
->data
, best_pred
->stride
,
1516 this_mode_pred
->data
, this_mode_pred
->stride
,
1517 NULL
, 0, NULL
, 0, bw
, bh
);
1518 #endif // CONFIG_VP9_HIGHBITDEPTH
1519 best_pred
= this_mode_pred
;
1524 for (i
= 0; i
< 4; ++i
) {
1525 const PREDICTION_MODE this_mode
= intra_mode_list
[i
];
1526 THR_MODES mode_index
= mode_idx
[INTRA_FRAME
][mode_offset(this_mode
)];
1527 int mode_rd_thresh
= rd_threshes
[mode_index
];
1529 if (!((1 << this_mode
) & cpi
->sf
.intra_y_mode_bsize_mask
[bsize
]))
1532 if (rd_less_than_thresh(best_rdc
.rdcost
, mode_rd_thresh
,
1533 rd_thresh_freq_fact
[mode_index
]))
1536 mbmi
->mode
= this_mode
;
1537 mbmi
->ref_frame
[0] = INTRA_FRAME
;
1538 args
.mode
= this_mode
;
1541 mbmi
->tx_size
= intra_tx_size
;
1542 vp9_foreach_transformed_block_in_plane(xd
, bsize
, 0,
1543 estimate_block_intra
, &args
);
1544 this_rdc
.rate
= args
.rate
;
1545 this_rdc
.dist
= args
.dist
;
1546 this_rdc
.rate
+= cpi
->mbmode_cost
[this_mode
];
1547 this_rdc
.rate
+= ref_frame_cost
[INTRA_FRAME
];
1548 this_rdc
.rate
+= intra_cost_penalty
;
1549 this_rdc
.rdcost
= RDCOST(x
->rdmult
, x
->rddiv
,
1550 this_rdc
.rate
, this_rdc
.dist
);
1552 if (this_rdc
.rdcost
< best_rdc
.rdcost
) {
1553 best_rdc
= this_rdc
;
1554 best_mode
= this_mode
;
1555 best_intra_tx_size
= mbmi
->tx_size
;
1556 best_ref_frame
= INTRA_FRAME
;
1557 mbmi
->uv_mode
= this_mode
;
1558 mbmi
->mv
[0].as_int
= INVALID_MV
;
1559 best_mode_skip_txfm
= x
->skip_txfm
[0];
1563 // Reset mb_mode_info to the best inter mode.
1564 if (best_ref_frame
!= INTRA_FRAME
) {
1565 mbmi
->tx_size
= best_tx_size
;
1567 mbmi
->tx_size
= best_intra_tx_size
;
1572 mbmi
->mode
= best_mode
;
1573 mbmi
->ref_frame
[0] = best_ref_frame
;
1574 x
->skip_txfm
[0] = best_mode_skip_txfm
;
1576 if (reuse_inter_pred
&& best_pred
!= NULL
) {
1577 if (best_pred
->data
!= orig_dst
.buf
&& is_inter_mode(mbmi
->mode
)) {
1578 #if CONFIG_VP9_HIGHBITDEPTH
1579 if (cm
->use_highbitdepth
)
1580 vp9_highbd_convolve_copy(best_pred
->data
, best_pred
->stride
,
1581 pd
->dst
.buf
, pd
->dst
.stride
, NULL
, 0,
1582 NULL
, 0, bw
, bh
, xd
->bd
);
1584 vp9_convolve_copy(best_pred
->data
, best_pred
->stride
,
1585 pd
->dst
.buf
, pd
->dst
.stride
, NULL
, 0,
1588 vp9_convolve_copy(best_pred
->data
, best_pred
->stride
,
1589 pd
->dst
.buf
, pd
->dst
.stride
, NULL
, 0,
1591 #endif // CONFIG_VP9_HIGHBITDEPTH
1595 if (cpi
->sf
.adaptive_rd_thresh
) {
1596 THR_MODES best_mode_idx
= mode_idx
[best_ref_frame
][mode_offset(mbmi
->mode
)];
1598 if (best_ref_frame
== INTRA_FRAME
) {
1599 // Only consider the modes that are included in the intra_mode_list.
1600 int intra_modes
= sizeof(intra_mode_list
)/sizeof(PREDICTION_MODE
);
1603 // TODO(yunqingwang): Check intra mode mask and only update freq_fact
1604 // for those valid modes.
1605 for (i
= 0; i
< intra_modes
; i
++) {
1606 update_thresh_freq_fact(cpi
, tile_data
, bsize
, INTRA_FRAME
,
1607 best_mode_idx
, intra_mode_list
[i
]);
1610 for (ref_frame
= LAST_FRAME
; ref_frame
<= GOLDEN_FRAME
; ++ref_frame
) {
1611 PREDICTION_MODE this_mode
;
1612 if (best_ref_frame
!= ref_frame
) continue;
1613 for (this_mode
= NEARESTMV
; this_mode
<= NEWMV
; ++this_mode
) {
1614 update_thresh_freq_fact(cpi
, tile_data
, bsize
, ref_frame
,
1615 best_mode_idx
, this_mode
);
1621 *rd_cost
= best_rdc
;
1624 void vp9_pick_inter_mode_sub8x8(VP9_COMP
*cpi
, MACROBLOCK
*x
,
1625 int mi_row
, int mi_col
, RD_COST
*rd_cost
,
1626 BLOCK_SIZE bsize
, PICK_MODE_CONTEXT
*ctx
) {
1627 VP9_COMMON
*const cm
= &cpi
->common
;
1628 SPEED_FEATURES
*const sf
= &cpi
->sf
;
1629 MACROBLOCKD
*const xd
= &x
->e_mbd
;
1630 MB_MODE_INFO
*const mbmi
= &xd
->mi
[0]->mbmi
;
1631 MB_MODE_INFO_EXT
*const mbmi_ext
= x
->mbmi_ext
;
1632 const struct segmentation
*const seg
= &cm
->seg
;
1633 MV_REFERENCE_FRAME ref_frame
, second_ref_frame
= NONE
;
1634 MV_REFERENCE_FRAME best_ref_frame
= NONE
;
1635 unsigned char segment_id
= mbmi
->segment_id
;
1636 struct buf_2d yv12_mb
[4][MAX_MB_PLANE
];
1637 static const int flag_list
[4] = { 0, VP9_LAST_FLAG
, VP9_GOLD_FLAG
,
1639 int64_t best_rd
= INT64_MAX
;
1640 b_mode_info bsi
[MAX_REF_FRAMES
][4];
1641 int ref_frame_skip_mask
= 0;
1642 const int num_4x4_blocks_wide
= num_4x4_blocks_wide_lookup
[bsize
];
1643 const int num_4x4_blocks_high
= num_4x4_blocks_high_lookup
[bsize
];
1646 x
->skip_encode
= sf
->skip_encode_frame
&& x
->q_index
< QIDX_SKIP_THRESH
;
1647 ctx
->pred_pixel_ready
= 0;
1649 for (ref_frame
= LAST_FRAME
; ref_frame
<= GOLDEN_FRAME
; ++ref_frame
) {
1650 const YV12_BUFFER_CONFIG
*yv12
= get_ref_frame_buffer(cpi
, ref_frame
);
1652 x
->pred_mv_sad
[ref_frame
] = INT_MAX
;
1654 if ((cpi
->ref_frame_flags
& flag_list
[ref_frame
]) && (yv12
!= NULL
)) {
1655 int_mv
*const candidates
= mbmi_ext
->ref_mvs
[ref_frame
];
1656 const struct scale_factors
*const sf
=
1657 &cm
->frame_refs
[ref_frame
- 1].sf
;
1658 vp9_setup_pred_block(xd
, yv12_mb
[ref_frame
], yv12
, mi_row
, mi_col
,
1660 vp9_find_mv_refs(cm
, xd
, xd
->mi
[0], ref_frame
,
1661 candidates
, mi_row
, mi_col
, NULL
, NULL
,
1662 mbmi_ext
->mode_context
);
1664 vp9_find_best_ref_mvs(xd
, cm
->allow_high_precision_mv
, candidates
,
1665 &dummy_mv
[0], &dummy_mv
[1]);
1667 ref_frame_skip_mask
|= (1 << ref_frame
);
1671 mbmi
->sb_type
= bsize
;
1672 mbmi
->tx_size
= TX_4X4
;
1673 mbmi
->uv_mode
= DC_PRED
;
1674 mbmi
->ref_frame
[0] = LAST_FRAME
;
1675 mbmi
->ref_frame
[1] = NONE
;
1676 mbmi
->interp_filter
= cm
->interp_filter
== SWITCHABLE
? EIGHTTAP
1677 : cm
->interp_filter
;
1679 for (ref_frame
= LAST_FRAME
; ref_frame
<= GOLDEN_FRAME
; ++ref_frame
) {
1680 int64_t this_rd
= 0;
1683 if (ref_frame_skip_mask
& (1 << ref_frame
))
1686 // TODO(jingning, agrange): Scaling reference frame not supported for
1687 // sub8x8 blocks. Is this supported now?
1688 if (ref_frame
> INTRA_FRAME
&&
1689 vp9_is_scaled(&cm
->frame_refs
[ref_frame
- 1].sf
))
1692 // If the segment reference frame feature is enabled....
1693 // then do nothing if the current ref frame is not allowed..
1694 if (segfeature_active(seg
, segment_id
, SEG_LVL_REF_FRAME
) &&
1695 get_segdata(seg
, segment_id
, SEG_LVL_REF_FRAME
) != (int)ref_frame
)
1698 mbmi
->ref_frame
[0] = ref_frame
;
1700 set_ref_ptrs(cm
, xd
, ref_frame
, second_ref_frame
);
1702 // Select prediction reference frames.
1703 for (plane
= 0; plane
< MAX_MB_PLANE
; plane
++)
1704 xd
->plane
[plane
].pre
[0] = yv12_mb
[ref_frame
][plane
];
1706 for (idy
= 0; idy
< 2; idy
+= num_4x4_blocks_high
) {
1707 for (idx
= 0; idx
< 2; idx
+= num_4x4_blocks_wide
) {
1708 int_mv b_mv
[MB_MODE_COUNT
];
1709 int64_t b_best_rd
= INT64_MAX
;
1710 const int i
= idy
* 2 + idx
;
1711 PREDICTION_MODE this_mode
;
1713 unsigned int var_y
, sse_y
;
1715 struct macroblock_plane
*p
= &x
->plane
[0];
1716 struct macroblockd_plane
*pd
= &xd
->plane
[0];
1718 const struct buf_2d orig_src
= p
->src
;
1719 const struct buf_2d orig_dst
= pd
->dst
;
1720 struct buf_2d orig_pre
[2];
1721 memcpy(orig_pre
, xd
->plane
[0].pre
, sizeof(orig_pre
));
1723 // set buffer pointers for sub8x8 motion search.
1725 &p
->src
.buf
[vp9_raster_block_offset(BLOCK_8X8
, i
, p
->src
.stride
)];
1727 &pd
->dst
.buf
[vp9_raster_block_offset(BLOCK_8X8
, i
, pd
->dst
.stride
)];
1729 &pd
->pre
[0].buf
[vp9_raster_block_offset(BLOCK_8X8
,
1730 i
, pd
->pre
[0].stride
)];
1732 b_mv
[ZEROMV
].as_int
= 0;
1733 b_mv
[NEWMV
].as_int
= INVALID_MV
;
1734 vp9_append_sub8x8_mvs_for_idx(cm
, xd
, i
, 0, mi_row
, mi_col
,
1737 mbmi_ext
->mode_context
);
1739 for (this_mode
= NEARESTMV
; this_mode
<= NEWMV
; ++this_mode
) {
1741 xd
->mi
[0]->bmi
[i
].as_mv
[0].as_int
= b_mv
[this_mode
].as_int
;
1743 if (this_mode
== NEWMV
) {
1744 const int step_param
= cpi
->sf
.mv
.fullpel_search_step_param
;
1748 const int tmp_col_min
= x
->mv_col_min
;
1749 const int tmp_col_max
= x
->mv_col_max
;
1750 const int tmp_row_min
= x
->mv_row_min
;
1751 const int tmp_row_max
= x
->mv_row_max
;
1755 mvp_full
.row
= b_mv
[NEARESTMV
].as_mv
.row
>> 3;
1756 mvp_full
.col
= b_mv
[NEARESTMV
].as_mv
.col
>> 3;
1758 mvp_full
.row
= xd
->mi
[0]->bmi
[0].as_mv
[0].as_mv
.row
>> 3;
1759 mvp_full
.col
= xd
->mi
[0]->bmi
[0].as_mv
[0].as_mv
.col
>> 3;
1762 vp9_set_mv_search_range(x
, &mbmi_ext
->ref_mvs
[0]->as_mv
);
1764 vp9_full_pixel_search(
1765 cpi
, x
, bsize
, &mvp_full
, step_param
, x
->sadperbit4
,
1766 cond_cost_list(cpi
, cost_list
),
1767 &mbmi_ext
->ref_mvs
[ref_frame
][0].as_mv
, &tmp_mv
,
1770 x
->mv_col_min
= tmp_col_min
;
1771 x
->mv_col_max
= tmp_col_max
;
1772 x
->mv_row_min
= tmp_row_min
;
1773 x
->mv_row_max
= tmp_row_max
;
1775 // calculate the bit cost on motion vector
1776 mvp_full
.row
= tmp_mv
.row
* 8;
1777 mvp_full
.col
= tmp_mv
.col
* 8;
1779 b_rate
+= vp9_mv_bit_cost(&mvp_full
,
1780 &mbmi_ext
->ref_mvs
[ref_frame
][0].as_mv
,
1781 x
->nmvjointcost
, x
->mvcost
,
1784 b_rate
+= cpi
->inter_mode_cost
[x
->mbmi_ext
->mode_context
[ref_frame
]]
1785 [INTER_OFFSET(NEWMV
)];
1786 if (RDCOST(x
->rdmult
, x
->rddiv
, b_rate
, 0) > b_best_rd
)
1789 cpi
->find_fractional_mv_step(x
, &tmp_mv
,
1790 &mbmi_ext
->ref_mvs
[ref_frame
][0].as_mv
,
1791 cpi
->common
.allow_high_precision_mv
,
1793 &cpi
->fn_ptr
[bsize
],
1794 cpi
->sf
.mv
.subpel_force_stop
,
1795 cpi
->sf
.mv
.subpel_iters_per_step
,
1796 cond_cost_list(cpi
, cost_list
),
1797 x
->nmvjointcost
, x
->mvcost
,
1799 &x
->pred_sse
[ref_frame
], NULL
, 0, 0);
1801 xd
->mi
[0]->bmi
[i
].as_mv
[0].as_mv
= tmp_mv
;
1803 b_rate
+= cpi
->inter_mode_cost
[x
->mbmi_ext
->mode_context
[ref_frame
]]
1804 [INTER_OFFSET(this_mode
)];
1807 #if CONFIG_VP9_HIGHBITDEPTH
1808 if (xd
->cur_buf
->flags
& YV12_FLAG_HIGHBITDEPTH
) {
1809 vp9_highbd_build_inter_predictor(pd
->pre
[0].buf
, pd
->pre
[0].stride
,
1810 pd
->dst
.buf
, pd
->dst
.stride
,
1811 &xd
->mi
[0]->bmi
[i
].as_mv
[0].as_mv
,
1812 &xd
->block_refs
[0]->sf
,
1813 4 * num_4x4_blocks_wide
,
1814 4 * num_4x4_blocks_high
, 0,
1815 vp9_filter_kernels
[mbmi
->interp_filter
],
1817 mi_col
* MI_SIZE
+ 4 * (i
& 0x01),
1818 mi_row
* MI_SIZE
+ 4 * (i
>> 1), xd
->bd
);
1821 vp9_build_inter_predictor(pd
->pre
[0].buf
, pd
->pre
[0].stride
,
1822 pd
->dst
.buf
, pd
->dst
.stride
,
1823 &xd
->mi
[0]->bmi
[i
].as_mv
[0].as_mv
,
1824 &xd
->block_refs
[0]->sf
,
1825 4 * num_4x4_blocks_wide
,
1826 4 * num_4x4_blocks_high
, 0,
1827 vp9_filter_kernels
[mbmi
->interp_filter
],
1829 mi_col
* MI_SIZE
+ 4 * (i
& 0x01),
1830 mi_row
* MI_SIZE
+ 4 * (i
>> 1));
1832 #if CONFIG_VP9_HIGHBITDEPTH
1836 model_rd_for_sb_y(cpi
, bsize
, x
, xd
, &this_rdc
.rate
, &this_rdc
.dist
,
1839 this_rdc
.rate
+= b_rate
;
1840 this_rdc
.rdcost
= RDCOST(x
->rdmult
, x
->rddiv
,
1841 this_rdc
.rate
, this_rdc
.dist
);
1842 if (this_rdc
.rdcost
< b_best_rd
) {
1843 b_best_rd
= this_rdc
.rdcost
;
1844 bsi
[ref_frame
][i
].as_mode
= this_mode
;
1845 bsi
[ref_frame
][i
].as_mv
[0].as_mv
= xd
->mi
[0]->bmi
[i
].as_mv
[0].as_mv
;
1849 // restore source and prediction buffer pointers.
1851 pd
->pre
[0] = orig_pre
[0];
1853 this_rd
+= b_best_rd
;
1855 xd
->mi
[0]->bmi
[i
] = bsi
[ref_frame
][i
];
1856 if (num_4x4_blocks_wide
> 1)
1857 xd
->mi
[0]->bmi
[i
+ 1] = xd
->mi
[0]->bmi
[i
];
1858 if (num_4x4_blocks_high
> 1)
1859 xd
->mi
[0]->bmi
[i
+ 2] = xd
->mi
[0]->bmi
[i
];
1861 } // loop through sub8x8 blocks
1863 if (this_rd
< best_rd
) {
1865 best_ref_frame
= ref_frame
;
1867 } // reference frames
1869 mbmi
->tx_size
= TX_4X4
;
1870 mbmi
->ref_frame
[0] = best_ref_frame
;
1871 for (idy
= 0; idy
< 2; idy
+= num_4x4_blocks_high
) {
1872 for (idx
= 0; idx
< 2; idx
+= num_4x4_blocks_wide
) {
1873 const int block
= idy
* 2 + idx
;
1874 xd
->mi
[0]->bmi
[block
] = bsi
[best_ref_frame
][block
];
1875 if (num_4x4_blocks_wide
> 1)
1876 xd
->mi
[0]->bmi
[block
+ 1] = bsi
[best_ref_frame
][block
];
1877 if (num_4x4_blocks_high
> 1)
1878 xd
->mi
[0]->bmi
[block
+ 2] = bsi
[best_ref_frame
][block
];
1881 mbmi
->mode
= xd
->mi
[0]->bmi
[3].as_mode
;
1882 ctx
->mic
= *(xd
->mi
[0]);
1883 ctx
->mbmi_ext
= *x
->mbmi_ext
;
1884 ctx
->skip_txfm
[0] = 0;
1886 // Dummy assignment for speed -5. No effect in speed -6.
1887 rd_cost
->rdcost
= best_rd
;