1 /*****************************************************************************
2 * slicetype.c: lookahead analysis
3 *****************************************************************************
4 * Copyright (C) 2005-2019 x264 project
6 * Authors: Fiona Glaser <fiona@x264.com>
7 * Loren Merritt <lorenm@u.washington.edu>
8 * Dylan Yudaken <dyudaken@gmail.com>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
24 * This program is also available under a commercial proprietary license.
25 * For more information, contact us at licensing@x264.com.
26 *****************************************************************************/
28 #include "common/common.h"
29 #include "macroblock.h"
32 // Indexed by pic_struct values
33 static const uint8_t delta_tfi_divisor
[10] = { 0, 2, 1, 1, 2, 2, 3, 3, 4, 6 };
35 static int slicetype_frame_cost( x264_t
*h
, x264_mb_analysis_t
*a
,
36 x264_frame_t
**frames
, int p0
, int p1
, int b
);
38 #define x264_weights_analyse x264_template(weights_analyse)
39 void x264_weights_analyse( x264_t
*h
, x264_frame_t
*fenc
, x264_frame_t
*ref
, int b_lookahead
);
42 #include "slicetype-cl.h"
45 static void lowres_context_init( x264_t
*h
, x264_mb_analysis_t
*a
)
47 a
->i_qp
= X264_LOOKAHEAD_QP
;
48 a
->i_lambda
= x264_lambda_tab
[ a
->i_qp
];
49 mb_analyse_load_costs( h
, a
);
50 if( h
->param
.analyse
.i_subpel_refine
> 1 )
52 h
->mb
.i_me_method
= X264_MIN( X264_ME_HEX
, h
->param
.analyse
.i_me_method
);
53 h
->mb
.i_subpel_refine
= 4;
57 h
->mb
.i_me_method
= X264_ME_DIA
;
58 h
->mb
.i_subpel_refine
= 2;
60 h
->mb
.b_chroma_me
= 0;
63 /* makes a non-h264 weight (i.e. fix7), into an h264 weight */
64 static void weight_get_h264( int weight_nonh264
, int offset
, x264_weight_t
*w
)
68 w
->i_scale
= weight_nonh264
;
69 while( w
->i_denom
> 0 && (w
->i_scale
> 127) )
74 w
->i_scale
= X264_MIN( w
->i_scale
, 127 );
77 static NOINLINE pixel
*weight_cost_init_luma( x264_t
*h
, x264_frame_t
*fenc
, x264_frame_t
*ref
, pixel
*dest
)
79 int ref0_distance
= fenc
->i_frame
- ref
->i_frame
- 1;
80 /* Note: this will never run during lookahead as weights_analyse is only called if no
81 * motion search has been done. */
82 if( fenc
->lowres_mvs
[0][ref0_distance
][0][0] != 0x7FFF )
84 int i_stride
= fenc
->i_stride_lowres
;
85 int i_lines
= fenc
->i_lines_lowres
;
86 int i_width
= fenc
->i_width_lowres
;
90 for( int y
= 0; y
< i_lines
; y
+= 8, p
+= i_stride
*8 )
91 for( int x
= 0; x
< i_width
; x
+= 8, i_mb_xy
++ )
93 int mvx
= fenc
->lowres_mvs
[0][ref0_distance
][i_mb_xy
][0];
94 int mvy
= fenc
->lowres_mvs
[0][ref0_distance
][i_mb_xy
][1];
95 h
->mc
.mc_luma( p
+x
, i_stride
, ref
->lowres
, i_stride
,
96 mvx
+(x
<<2), mvy
+(y
<<2), 8, 8, x264_weight_none
);
102 return ref
->lowres
[0];
105 /* How data is organized for 4:2:0/4:2:2 chroma weightp:
108 * fenc = ref + offset
109 * v = u + stride * chroma height */
111 static NOINLINE
void weight_cost_init_chroma( x264_t
*h
, x264_frame_t
*fenc
, x264_frame_t
*ref
, pixel
*dstu
, pixel
*dstv
)
113 int ref0_distance
= fenc
->i_frame
- ref
->i_frame
- 1;
114 int i_stride
= fenc
->i_stride
[1];
115 int i_offset
= i_stride
/ 2;
116 int i_lines
= fenc
->i_lines
[1];
117 int i_width
= fenc
->i_width
[1];
118 int v_shift
= CHROMA_V_SHIFT
;
119 int cw
= 8*h
->mb
.i_mb_width
;
120 int ch
= 16*h
->mb
.i_mb_height
>> v_shift
;
121 int height
= 16 >> v_shift
;
123 if( fenc
->lowres_mvs
[0][ref0_distance
][0][0] != 0x7FFF )
125 x264_frame_expand_border_chroma( h
, ref
, 1 );
126 for( int y
= 0, mb_xy
= 0, pel_offset_y
= 0; y
< i_lines
; y
+= height
, pel_offset_y
= y
*i_stride
)
127 for( int x
= 0, pel_offset_x
= 0; x
< i_width
; x
+= 8, mb_xy
++, pel_offset_x
+= 8 )
129 pixel
*pixu
= dstu
+ pel_offset_y
+ pel_offset_x
;
130 pixel
*pixv
= dstv
+ pel_offset_y
+ pel_offset_x
;
131 pixel
*src1
= ref
->plane
[1] + pel_offset_y
+ pel_offset_x
*2; /* NV12/NV16 */
132 int mvx
= fenc
->lowres_mvs
[0][ref0_distance
][mb_xy
][0];
133 int mvy
= fenc
->lowres_mvs
[0][ref0_distance
][mb_xy
][1];
134 h
->mc
.mc_chroma( pixu
, pixv
, i_stride
, src1
, i_stride
, mvx
, 2*mvy
>>v_shift
, 8, height
);
138 h
->mc
.plane_copy_deinterleave( dstu
, i_stride
, dstv
, i_stride
, ref
->plane
[1], i_stride
, cw
, ch
);
139 h
->mc
.plane_copy_deinterleave( dstu
+i_offset
, i_stride
, dstv
+i_offset
, i_stride
, fenc
->plane
[1], i_stride
, cw
, ch
);
143 static NOINLINE pixel
*weight_cost_init_chroma444( x264_t
*h
, x264_frame_t
*fenc
, x264_frame_t
*ref
, pixel
*dst
, int p
)
145 int ref0_distance
= fenc
->i_frame
- ref
->i_frame
- 1;
146 int i_stride
= fenc
->i_stride
[p
];
147 int i_lines
= fenc
->i_lines
[p
];
148 int i_width
= fenc
->i_width
[p
];
150 if( fenc
->lowres_mvs
[0][ref0_distance
][0][0] != 0x7FFF )
152 x264_frame_expand_border_chroma( h
, ref
, p
);
153 for( int y
= 0, mb_xy
= 0, pel_offset_y
= 0; y
< i_lines
; y
+= 16, pel_offset_y
= y
*i_stride
)
154 for( int x
= 0, pel_offset_x
= 0; x
< i_width
; x
+= 16, mb_xy
++, pel_offset_x
+= 16 )
156 pixel
*pix
= dst
+ pel_offset_y
+ pel_offset_x
;
157 pixel
*src
= ref
->plane
[p
] + pel_offset_y
+ pel_offset_x
;
158 int mvx
= fenc
->lowres_mvs
[0][ref0_distance
][mb_xy
][0] / 2;
159 int mvy
= fenc
->lowres_mvs
[0][ref0_distance
][mb_xy
][1] / 2;
160 /* We don't want to calculate hpels for fenc frames, so we round the motion
161 * vectors to fullpel here. It's not too bad, I guess? */
162 h
->mc
.copy_16x16_unaligned( pix
, i_stride
, src
+mvx
+mvy
*i_stride
, i_stride
, 16 );
168 return ref
->plane
[p
];
171 static int weight_slice_header_cost( x264_t
*h
, x264_weight_t
*w
, int b_chroma
)
173 /* Add cost of weights in the slice header. */
174 int lambda
= x264_lambda_tab
[X264_LOOKAHEAD_QP
];
175 /* 4 times higher, because chroma is analyzed at full resolution. */
179 if( h
->param
.i_slice_count
)
180 numslices
= h
->param
.i_slice_count
;
181 else if( h
->param
.i_slice_max_mbs
)
182 numslices
= (h
->mb
.i_mb_width
* h
->mb
.i_mb_height
+ h
->param
.i_slice_max_mbs
-1) / h
->param
.i_slice_max_mbs
;
185 /* FIXME: find a way to account for --slice-max-size?
186 * Multiply by 2 as there will be a duplicate. 10 bits added as if there is a weighted frame, then an additional duplicate is used.
187 * Cut denom cost in half if chroma, since it's shared between the two chroma planes. */
188 int denom_cost
= bs_size_ue( w
[0].i_denom
) * (2 - b_chroma
);
189 return lambda
* numslices
* ( 10 + denom_cost
+ 2 * (bs_size_se( w
[0].i_scale
) + bs_size_se( w
[0].i_offset
)) );
192 static NOINLINE
unsigned int weight_cost_luma( x264_t
*h
, x264_frame_t
*fenc
, pixel
*src
, x264_weight_t
*w
)
194 unsigned int cost
= 0;
195 int i_stride
= fenc
->i_stride_lowres
;
196 int i_lines
= fenc
->i_lines_lowres
;
197 int i_width
= fenc
->i_width_lowres
;
198 pixel
*fenc_plane
= fenc
->lowres
[0];
199 ALIGNED_ARRAY_16( pixel
, buf
,[8*8] );
205 for( int y
= 0; y
< i_lines
; y
+= 8, pixoff
= y
*i_stride
)
206 for( int x
= 0; x
< i_width
; x
+= 8, i_mb
++, pixoff
+= 8)
208 w
->weightfn
[8>>2]( buf
, 8, &src
[pixoff
], i_stride
, w
, 8 );
209 int cmp
= h
->pixf
.mbcmp
[PIXEL_8x8
]( buf
, 8, &fenc_plane
[pixoff
], i_stride
);
210 cost
+= X264_MIN( cmp
, fenc
->i_intra_cost
[i_mb
] );
212 cost
+= weight_slice_header_cost( h
, w
, 0 );
215 for( int y
= 0; y
< i_lines
; y
+= 8, pixoff
= y
*i_stride
)
216 for( int x
= 0; x
< i_width
; x
+= 8, i_mb
++, pixoff
+= 8 )
218 int cmp
= h
->pixf
.mbcmp
[PIXEL_8x8
]( &src
[pixoff
], i_stride
, &fenc_plane
[pixoff
], i_stride
);
219 cost
+= X264_MIN( cmp
, fenc
->i_intra_cost
[i_mb
] );
225 static NOINLINE
unsigned int weight_cost_chroma( x264_t
*h
, x264_frame_t
*fenc
, pixel
*ref
, x264_weight_t
*w
)
227 unsigned int cost
= 0;
228 int i_stride
= fenc
->i_stride
[1];
229 int i_lines
= fenc
->i_lines
[1];
230 int i_width
= fenc
->i_width
[1];
231 pixel
*src
= ref
+ (i_stride
>> 1);
232 ALIGNED_ARRAY_16( pixel
, buf
, [8*16] );
234 int height
= 16 >> CHROMA_V_SHIFT
;
237 for( int y
= 0; y
< i_lines
; y
+= height
, pixoff
= y
*i_stride
)
238 for( int x
= 0; x
< i_width
; x
+= 8, pixoff
+= 8 )
240 w
->weightfn
[8>>2]( buf
, 8, &ref
[pixoff
], i_stride
, w
, height
);
241 /* The naive and seemingly sensible algorithm is to use mbcmp as in luma.
242 * But testing shows that for chroma the DC coefficient is by far the most
243 * important part of the coding cost. Thus a more useful chroma weight is
244 * obtained by comparing each block's DC coefficient instead of the actual
246 cost
+= h
->pixf
.asd8( buf
, 8, &src
[pixoff
], i_stride
, height
);
248 cost
+= weight_slice_header_cost( h
, w
, 1 );
251 for( int y
= 0; y
< i_lines
; y
+= height
, pixoff
= y
*i_stride
)
252 for( int x
= 0; x
< i_width
; x
+= 8, pixoff
+= 8 )
253 cost
+= h
->pixf
.asd8( &ref
[pixoff
], i_stride
, &src
[pixoff
], i_stride
, height
);
258 static NOINLINE
unsigned int weight_cost_chroma444( x264_t
*h
, x264_frame_t
*fenc
, pixel
*ref
, x264_weight_t
*w
, int p
)
260 unsigned int cost
= 0;
261 int i_stride
= fenc
->i_stride
[p
];
262 int i_lines
= fenc
->i_lines
[p
];
263 int i_width
= fenc
->i_width
[p
];
264 pixel
*src
= fenc
->plane
[p
];
265 ALIGNED_ARRAY_64( pixel
, buf
, [16*16] );
269 for( int y
= 0; y
< i_lines
; y
+= 16, pixoff
= y
*i_stride
)
270 for( int x
= 0; x
< i_width
; x
+= 16, pixoff
+= 16 )
272 w
->weightfn
[16>>2]( buf
, 16, &ref
[pixoff
], i_stride
, w
, 16 );
273 cost
+= h
->pixf
.mbcmp
[PIXEL_16x16
]( buf
, 16, &src
[pixoff
], i_stride
);
275 cost
+= weight_slice_header_cost( h
, w
, 1 );
278 for( int y
= 0; y
< i_lines
; y
+= 16, pixoff
= y
*i_stride
)
279 for( int x
= 0; x
< i_width
; x
+= 16, pixoff
+= 16 )
280 cost
+= h
->pixf
.mbcmp
[PIXEL_16x16
]( &ref
[pixoff
], i_stride
, &src
[pixoff
], i_stride
);
285 void x264_weights_analyse( x264_t
*h
, x264_frame_t
*fenc
, x264_frame_t
*ref
, int b_lookahead
)
287 int i_delta_index
= fenc
->i_frame
- ref
->i_frame
- 1;
288 /* epsilon is chosen to require at least a numerator of 127 (with denominator = 128) */
289 const float epsilon
= 1.f
/128.f
;
290 x264_weight_t
*weights
= fenc
->weight
[0];
291 SET_WEIGHT( weights
[0], 0, 1, 0, 0 );
292 SET_WEIGHT( weights
[1], 0, 1, 0, 0 );
293 SET_WEIGHT( weights
[2], 0, 1, 0, 0 );
294 int chroma_initted
= 0;
295 float guess_scale
[3];
298 for( int plane
= 0; plane
<= 2*!b_lookahead
; plane
++ )
300 int zero_bias
= !ref
->i_pixel_ssd
[plane
];
301 float fenc_var
= fenc
->i_pixel_ssd
[plane
] + zero_bias
;
302 float ref_var
= ref
->i_pixel_ssd
[plane
] + zero_bias
;
303 guess_scale
[plane
] = sqrtf( fenc_var
/ ref_var
);
304 fenc_mean
[plane
] = (float)(fenc
->i_pixel_sum
[plane
] + zero_bias
) / (fenc
->i_lines
[!!plane
] * fenc
->i_width
[!!plane
]) / (1 << (BIT_DEPTH
- 8));
305 ref_mean
[plane
] = (float)( ref
->i_pixel_sum
[plane
] + zero_bias
) / (fenc
->i_lines
[!!plane
] * fenc
->i_width
[!!plane
]) / (1 << (BIT_DEPTH
- 8));
308 int chroma_denom
= 7;
311 /* make sure both our scale factors fit */
312 while( chroma_denom
> 0 )
314 float thresh
= 127.f
/ (1<<chroma_denom
);
315 if( guess_scale
[1] < thresh
&& guess_scale
[2] < thresh
)
321 /* Don't check chroma in lookahead, or if there wasn't a luma weight. */
322 for( int plane
= 0; plane
< (CHROMA_FORMAT
? 3 : 1) && !( plane
&& ( !weights
[0].weightfn
|| b_lookahead
) ); plane
++ )
324 int minoff
, minscale
, mindenom
;
325 unsigned int minscore
, origscore
;
329 if( fabsf( ref_mean
[plane
] - fenc_mean
[plane
] ) < 0.5f
&& fabsf( 1.f
- guess_scale
[plane
] ) < epsilon
)
331 SET_WEIGHT( weights
[plane
], 0, 1, 0, 0 );
337 weights
[plane
].i_denom
= chroma_denom
;
338 weights
[plane
].i_scale
= x264_clip3( round( guess_scale
[plane
] * (1<<chroma_denom
) ), 0, 255 );
339 if( weights
[plane
].i_scale
> 127 )
341 weights
[1].weightfn
= weights
[2].weightfn
= NULL
;
346 weight_get_h264( round( guess_scale
[plane
] * 128 ), 0, &weights
[plane
] );
349 mindenom
= weights
[plane
].i_denom
;
350 minscale
= weights
[plane
].i_scale
;
356 if( !fenc
->b_intra_calculated
)
358 x264_mb_analysis_t a
;
359 lowres_context_init( h
, &a
);
360 slicetype_frame_cost( h
, &a
, &fenc
, 0, 0, 0 );
362 mcbuf
= weight_cost_init_luma( h
, fenc
, ref
, h
->mb
.p_weight_buf
[0] );
363 origscore
= minscore
= weight_cost_luma( h
, fenc
, mcbuf
, NULL
);
369 mcbuf
= weight_cost_init_chroma444( h
, fenc
, ref
, h
->mb
.p_weight_buf
[0], plane
);
370 origscore
= minscore
= weight_cost_chroma444( h
, fenc
, mcbuf
, NULL
, plane
);
374 pixel
*dstu
= h
->mb
.p_weight_buf
[0];
375 pixel
*dstv
= h
->mb
.p_weight_buf
[0]+fenc
->i_stride
[1]*fenc
->i_lines
[1];
376 if( !chroma_initted
++ )
377 weight_cost_init_chroma( h
, fenc
, ref
, dstu
, dstv
);
378 mcbuf
= plane
== 1 ? dstu
: dstv
;
379 origscore
= minscore
= weight_cost_chroma( h
, fenc
, mcbuf
, NULL
);
386 /* Picked somewhat arbitrarily */
387 static const uint8_t weight_check_distance
[][2] =
389 {0,0},{0,0},{0,1},{0,1},
390 {0,1},{0,1},{0,1},{1,1},
391 {1,1},{2,1},{2,1},{4,2}
393 int scale_dist
= b_lookahead
? 0 : weight_check_distance
[h
->param
.analyse
.i_subpel_refine
][0];
394 int offset_dist
= b_lookahead
? 0 : weight_check_distance
[h
->param
.analyse
.i_subpel_refine
][1];
396 int start_scale
= x264_clip3( minscale
- scale_dist
, 0, 127 );
397 int end_scale
= x264_clip3( minscale
+ scale_dist
, 0, 127 );
398 for( int i_scale
= start_scale
; i_scale
<= end_scale
; i_scale
++ )
400 int cur_scale
= i_scale
;
401 int cur_offset
= fenc_mean
[plane
] - ref_mean
[plane
] * cur_scale
/ (1 << mindenom
) + 0.5f
* b_lookahead
;
402 if( cur_offset
< - 128 || cur_offset
> 127 )
404 /* Rescale considering the constraints on cur_offset. We do it in this order
405 * because scale has a much wider range than offset (because of denom), so
406 * it should almost never need to be clamped. */
407 cur_offset
= x264_clip3( cur_offset
, -128, 127 );
408 cur_scale
= (1 << mindenom
) * (fenc_mean
[plane
] - cur_offset
) / ref_mean
[plane
] + 0.5f
;
409 cur_scale
= x264_clip3( cur_scale
, 0, 127 );
411 int start_offset
= x264_clip3( cur_offset
- offset_dist
, -128, 127 );
412 int end_offset
= x264_clip3( cur_offset
+ offset_dist
, -128, 127 );
413 for( int i_off
= start_offset
; i_off
<= end_offset
; i_off
++ )
415 SET_WEIGHT( weights
[plane
], 1, cur_scale
, mindenom
, i_off
);
420 s
= weight_cost_chroma444( h
, fenc
, mcbuf
, &weights
[plane
], plane
);
422 s
= weight_cost_chroma( h
, fenc
, mcbuf
, &weights
[plane
] );
425 s
= weight_cost_luma( h
, fenc
, mcbuf
, &weights
[plane
] );
426 COPY4_IF_LT( minscore
, s
, minscale
, cur_scale
, minoff
, i_off
, found
, 1 );
428 // Don't check any more offsets if the previous one had a lower cost than the current one
429 if( minoff
== start_offset
&& i_off
!= start_offset
)
435 /* Use a smaller denominator if possible */
438 while( mindenom
> 0 && !(minscale
&1) )
445 /* FIXME: More analysis can be done here on SAD vs. SATD termination. */
446 /* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
447 if( !found
|| (minscale
== 1 << mindenom
&& minoff
== 0) || (float)minscore
/ origscore
> 0.998f
)
449 SET_WEIGHT( weights
[plane
], 0, 1, 0, 0 );
453 SET_WEIGHT( weights
[plane
], 1, minscale
, mindenom
, minoff
);
455 if( h
->param
.analyse
.i_weighted_pred
== X264_WEIGHTP_FAKE
&& weights
[0].weightfn
&& !plane
)
456 fenc
->f_weighted_cost_delta
[i_delta_index
] = (float)minscore
/ origscore
;
459 /* Optimize and unify denominator */
460 if( weights
[1].weightfn
|| weights
[2].weightfn
)
462 int denom
= weights
[1].weightfn
? weights
[1].i_denom
: weights
[2].i_denom
;
463 int both_weighted
= weights
[1].weightfn
&& weights
[2].weightfn
;
464 /* If only one plane is weighted, the other has an implicit scale of 1<<denom.
465 * With denom==7, this comes out to 128, which is invalid, so don't allow that. */
466 while( (!both_weighted
&& denom
==7) ||
467 (denom
> 0 && !(weights
[1].weightfn
&& (weights
[1].i_scale
&1))
468 && !(weights
[2].weightfn
&& (weights
[2].i_scale
&1))) )
471 for( int i
= 1; i
<= 2; i
++ )
472 if( weights
[i
].weightfn
)
474 weights
[i
].i_scale
>>= 1;
475 weights
[i
].i_denom
= denom
;
479 for( int i
= 1; i
<= 2; i
++ )
480 if( weights
[i
].weightfn
)
481 h
->mc
.weight_cache( h
, &weights
[i
] );
483 if( weights
[0].weightfn
&& b_lookahead
)
485 //scale lowres in lookahead for slicetype_frame_cost
486 pixel
*src
= ref
->buffer_lowres
;
487 pixel
*dst
= h
->mb
.p_weight_buf
[0];
488 int width
= ref
->i_width_lowres
+ PADH
*2;
489 int height
= ref
->i_lines_lowres
+ PADV
*2;
490 x264_weight_scale_plane( h
, dst
, ref
->i_stride_lowres
, src
, ref
->i_stride_lowres
,
491 width
, height
, &weights
[0] );
492 fenc
->weighted
[0] = h
->mb
.p_weight_buf
[0] + PADH
+ ref
->i_stride_lowres
* PADV
;
496 /* Output buffers are separated by 128 bytes to avoid false sharing of cachelines
497 * in multithreaded lookahead. */
499 /* cost_est, cost_est_aq, intra_mbs, num rows */
502 #define COST_EST_AQ 1
505 #define ROW_SATD (NUM_INTS + (h->mb.i_mb_y - h->i_threadslice_start))
507 static void slicetype_mb_cost( x264_t
*h
, x264_mb_analysis_t
*a
,
508 x264_frame_t
**frames
, int p0
, int p1
, int b
,
509 int dist_scale_factor
, int do_search
[2], const x264_weight_t
*w
,
510 int *output_inter
, int *output_intra
)
512 x264_frame_t
*fref0
= frames
[p0
];
513 x264_frame_t
*fref1
= frames
[p1
];
514 x264_frame_t
*fenc
= frames
[b
];
515 const int b_bidir
= (b
< p1
);
516 const int i_mb_x
= h
->mb
.i_mb_x
;
517 const int i_mb_y
= h
->mb
.i_mb_y
;
518 const int i_mb_stride
= h
->mb
.i_mb_width
;
519 const int i_mb_xy
= i_mb_x
+ i_mb_y
* i_mb_stride
;
520 const int i_stride
= fenc
->i_stride_lowres
;
521 const int i_pel_offset
= 8 * (i_mb_x
+ i_mb_y
* i_stride
);
522 const int i_bipred_weight
= h
->param
.analyse
.b_weighted_bipred
? 64 - (dist_scale_factor
>>2) : 32;
523 int16_t (*fenc_mvs
[2])[2] = { &fenc
->lowres_mvs
[0][b
-p0
-1][i_mb_xy
], &fenc
->lowres_mvs
[1][p1
-b
-1][i_mb_xy
] };
524 int (*fenc_costs
[2]) = { &fenc
->lowres_mv_costs
[0][b
-p0
-1][i_mb_xy
], &fenc
->lowres_mv_costs
[1][p1
-b
-1][i_mb_xy
] };
525 int b_frame_score_mb
= (i_mb_x
> 0 && i_mb_x
< h
->mb
.i_mb_width
- 1 &&
526 i_mb_y
> 0 && i_mb_y
< h
->mb
.i_mb_height
- 1) ||
527 h
->mb
.i_mb_width
<= 2 || h
->mb
.i_mb_height
<= 2;
529 ALIGNED_ARRAY_16( pixel
, pix1
,[9*FDEC_STRIDE
] );
530 pixel
*pix2
= pix1
+8;
532 int i_bcost
= COST_MAX
;
534 /* A small, arbitrary bias to avoid VBV problems caused by zero-residual lookahead blocks. */
535 int lowres_penalty
= 4;
537 h
->mb
.pic
.p_fenc
[0] = h
->mb
.pic
.fenc_buf
;
538 h
->mc
.copy
[PIXEL_8x8
]( h
->mb
.pic
.p_fenc
[0], FENC_STRIDE
, &fenc
->lowres
[0][i_pel_offset
], i_stride
, 8 );
541 goto lowres_intra_mb
;
543 int mv_range
= 2 * h
->param
.analyse
.i_mv_range
;
544 // no need for h->mb.mv_min[]
545 h
->mb
.mv_min_spel
[0] = X264_MAX( 4*(-8*h
->mb
.i_mb_x
- 12), -mv_range
);
546 h
->mb
.mv_max_spel
[0] = X264_MIN( 4*(8*(h
->mb
.i_mb_width
- h
->mb
.i_mb_x
- 1) + 12), mv_range
-1 );
547 h
->mb
.mv_limit_fpel
[0][0] = h
->mb
.mv_min_spel
[0] >> 2;
548 h
->mb
.mv_limit_fpel
[1][0] = h
->mb
.mv_max_spel
[0] >> 2;
549 if( h
->mb
.i_mb_x
>= h
->mb
.i_mb_width
- 2 )
551 h
->mb
.mv_min_spel
[1] = X264_MAX( 4*(-8*h
->mb
.i_mb_y
- 12), -mv_range
);
552 h
->mb
.mv_max_spel
[1] = X264_MIN( 4*(8*( h
->mb
.i_mb_height
- h
->mb
.i_mb_y
- 1) + 12), mv_range
-1 );
553 h
->mb
.mv_limit_fpel
[0][1] = h
->mb
.mv_min_spel
[1] >> 2;
554 h
->mb
.mv_limit_fpel
[1][1] = h
->mb
.mv_max_spel
[1] >> 2;
557 #define LOAD_HPELS_LUMA(dst, src) \
559 (dst)[0] = &(src)[0][i_pel_offset]; \
560 (dst)[1] = &(src)[1][i_pel_offset]; \
561 (dst)[2] = &(src)[2][i_pel_offset]; \
562 (dst)[3] = &(src)[3][i_pel_offset]; \
564 #define LOAD_WPELS_LUMA(dst,src) \
565 (dst) = &(src)[i_pel_offset];
567 #define CLIP_MV( mv ) \
569 mv[0] = x264_clip3( mv[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); \
570 mv[1] = x264_clip3( mv[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] ); \
572 #define TRY_BIDIR( mv0, mv1, penalty ) \
575 if( h->param.analyse.i_subpel_refine <= 1 ) \
577 int hpel_idx1 = (((mv0)[0]&2)>>1) + ((mv0)[1]&2); \
578 int hpel_idx2 = (((mv1)[0]&2)>>1) + ((mv1)[1]&2); \
579 pixel *src1 = m[0].p_fref[hpel_idx1] + ((mv0)[0]>>2) + ((mv0)[1]>>2) * m[0].i_stride[0]; \
580 pixel *src2 = m[1].p_fref[hpel_idx2] + ((mv1)[0]>>2) + ((mv1)[1]>>2) * m[1].i_stride[0]; \
581 h->mc.avg[PIXEL_8x8]( pix1, 16, src1, m[0].i_stride[0], src2, m[1].i_stride[0], i_bipred_weight ); \
585 intptr_t stride1 = 16, stride2 = 16; \
586 pixel *src1, *src2; \
587 src1 = h->mc.get_ref( pix1, &stride1, m[0].p_fref, m[0].i_stride[0], \
588 (mv0)[0], (mv0)[1], 8, 8, w ); \
589 src2 = h->mc.get_ref( pix2, &stride2, m[1].p_fref, m[1].i_stride[0], \
590 (mv1)[0], (mv1)[1], 8, 8, w ); \
591 h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
593 i_cost = penalty * a->i_lambda + h->pixf.mbcmp[PIXEL_8x8]( \
594 m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
595 COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
598 m
[0].i_pixel
= PIXEL_8x8
;
599 m
[0].p_cost_mv
= a
->p_cost_mv
;
600 m
[0].i_stride
[0] = i_stride
;
601 m
[0].p_fenc
[0] = h
->mb
.pic
.p_fenc
[0];
604 LOAD_HPELS_LUMA( m
[0].p_fref
, fref0
->lowres
);
605 m
[0].p_fref_w
= m
[0].p_fref
[0];
607 LOAD_WPELS_LUMA( m
[0].p_fref_w
, fenc
->weighted
[0] );
611 ALIGNED_ARRAY_8( int16_t, dmv
,[2],[2] );
613 m
[1].i_pixel
= PIXEL_8x8
;
614 m
[1].p_cost_mv
= a
->p_cost_mv
;
615 m
[1].i_stride
[0] = i_stride
;
616 m
[1].p_fenc
[0] = h
->mb
.pic
.p_fenc
[0];
618 m
[1].weight
= x264_weight_none
;
619 LOAD_HPELS_LUMA( m
[1].p_fref
, fref1
->lowres
);
620 m
[1].p_fref_w
= m
[1].p_fref
[0];
622 if( fref1
->lowres_mvs
[0][p1
-p0
-1][0][0] != 0x7FFF )
624 int16_t *mvr
= fref1
->lowres_mvs
[0][p1
-p0
-1][i_mb_xy
];
625 dmv
[0][0] = ( mvr
[0] * dist_scale_factor
+ 128 ) >> 8;
626 dmv
[0][1] = ( mvr
[1] * dist_scale_factor
+ 128 ) >> 8;
627 dmv
[1][0] = dmv
[0][0] - mvr
[0];
628 dmv
[1][1] = dmv
[0][1] - mvr
[1];
631 if( h
->param
.analyse
.i_subpel_refine
<= 1 )
632 M64( dmv
) &= ~0x0001000100010001ULL
; /* mv & ~1 */
637 TRY_BIDIR( dmv
[0], dmv
[1], 0 );
641 h
->mc
.avg
[PIXEL_8x8
]( pix1
, 16, m
[0].p_fref
[0], m
[0].i_stride
[0], m
[1].p_fref
[0], m
[1].i_stride
[0], i_bipred_weight
);
642 i_cost
= h
->pixf
.mbcmp
[PIXEL_8x8
]( m
[0].p_fenc
[0], FENC_STRIDE
, pix1
, 16 );
643 COPY2_IF_LT( i_bcost
, i_cost
, list_used
, 3 );
647 for( int l
= 0; l
< 1 + b_bidir
; l
++ )
652 int16_t (*fenc_mv
)[2] = fenc_mvs
[l
];
653 ALIGNED_4( int16_t mvc
[4][2] );
655 /* Reverse-order MV prediction. */
658 #define MVC(mv) { CP32( mvc[i_mvc], mv ); i_mvc++; }
659 if( i_mb_x
< h
->mb
.i_mb_width
- 1 )
661 if( i_mb_y
< h
->i_threadslice_end
- 1 )
663 MVC( fenc_mv
[i_mb_stride
] );
665 MVC( fenc_mv
[i_mb_stride
-1] );
666 if( i_mb_x
< h
->mb
.i_mb_width
- 1 )
667 MVC( fenc_mv
[i_mb_stride
+1] );
671 CP32( m
[l
].mvp
, mvc
[0] );
673 x264_median_mv( m
[l
].mvp
, mvc
[0], mvc
[1], mvc
[2] );
675 /* Fast skip for cases of near-zero residual. Shortcut: don't bother except in the mv0 case,
676 * since anything else is likely to have enough residual to not trigger the skip. */
677 if( !M32( m
[l
].mvp
) )
679 m
[l
].cost
= h
->pixf
.mbcmp
[PIXEL_8x8
]( m
[l
].p_fenc
[0], FENC_STRIDE
, m
[l
].p_fref
[0], m
[l
].i_stride
[0] );
687 x264_me_search( h
, &m
[l
], mvc
, i_mvc
);
688 m
[l
].cost
-= a
->p_cost_mv
[0]; // remove mvcost from skip mbs
690 m
[l
].cost
+= 5 * a
->i_lambda
;
693 CP32( fenc_mvs
[l
], m
[l
].mv
);
694 *fenc_costs
[l
] = m
[l
].cost
;
698 CP32( m
[l
].mv
, fenc_mvs
[l
] );
699 m
[l
].cost
= *fenc_costs
[l
];
701 COPY2_IF_LT( i_bcost
, m
[l
].cost
, list_used
, l
+1 );
704 if( b_bidir
&& ( M32( m
[0].mv
) || M32( m
[1].mv
) ) )
705 TRY_BIDIR( m
[0].mv
, m
[1].mv
, 5 );
708 if( !fenc
->b_intra_calculated
)
710 ALIGNED_ARRAY_16( pixel
, edge
,[36] );
711 pixel
*pix
= &pix1
[8+FDEC_STRIDE
];
712 pixel
*src
= &fenc
->lowres
[0][i_pel_offset
];
713 const int intra_penalty
= 5 * a
->i_lambda
;
715 int pixoff
= 4 / sizeof(pixel
);
717 /* Avoid store forwarding stalls by writing larger chunks */
718 memcpy( pix
-FDEC_STRIDE
, src
-i_stride
, 16 * sizeof(pixel
) );
719 for( int i
= -1; i
< 8; i
++ )
720 M32( &pix
[i
*FDEC_STRIDE
-pixoff
] ) = M32( &src
[i
*i_stride
-pixoff
] );
722 h
->pixf
.intra_mbcmp_x3_8x8c( h
->mb
.pic
.p_fenc
[0], pix
, satds
);
723 int i_icost
= X264_MIN3( satds
[0], satds
[1], satds
[2] );
725 if( h
->param
.analyse
.i_subpel_refine
> 1 )
727 h
->predict_8x8c
[I_PRED_CHROMA_P
]( pix
);
728 int satd
= h
->pixf
.mbcmp
[PIXEL_8x8
]( h
->mb
.pic
.p_fenc
[0], FENC_STRIDE
, pix
, FDEC_STRIDE
);
729 i_icost
= X264_MIN( i_icost
, satd
);
730 h
->predict_8x8_filter( pix
, edge
, ALL_NEIGHBORS
, ALL_NEIGHBORS
);
731 for( int i
= 3; i
< 9; i
++ )
733 h
->predict_8x8
[i
]( pix
, edge
);
734 satd
= h
->pixf
.mbcmp
[PIXEL_8x8
]( h
->mb
.pic
.p_fenc
[0], FENC_STRIDE
, pix
, FDEC_STRIDE
);
735 i_icost
= X264_MIN( i_icost
, satd
);
739 i_icost
= ((i_icost
+ intra_penalty
) >> (BIT_DEPTH
- 8)) + lowres_penalty
;
740 fenc
->i_intra_cost
[i_mb_xy
] = i_icost
;
741 int i_icost_aq
= i_icost
;
742 if( h
->param
.rc
.i_aq_mode
)
743 i_icost_aq
= (i_icost_aq
* fenc
->i_inv_qscale_factor
[i_mb_xy
] + 128) >> 8;
744 output_intra
[ROW_SATD
] += i_icost_aq
;
745 if( b_frame_score_mb
)
747 output_intra
[COST_EST
] += i_icost
;
748 output_intra
[COST_EST_AQ
] += i_icost_aq
;
751 i_bcost
= (i_bcost
>> (BIT_DEPTH
- 8)) + lowres_penalty
;
753 /* forbid intra-mbs in B-frames, because it's rare and not worth checking */
754 /* FIXME: Should we still forbid them now that we cache intra scores? */
757 int i_icost
= fenc
->i_intra_cost
[i_mb_xy
];
758 int b_intra
= i_icost
< i_bcost
;
764 if( b_frame_score_mb
)
765 output_inter
[INTRA_MBS
] += b_intra
;
768 /* In an I-frame, we've already added the results above in the intra section. */
771 int i_bcost_aq
= i_bcost
;
772 if( h
->param
.rc
.i_aq_mode
)
773 i_bcost_aq
= (i_bcost_aq
* fenc
->i_inv_qscale_factor
[i_mb_xy
] + 128) >> 8;
774 output_inter
[ROW_SATD
] += i_bcost_aq
;
775 if( b_frame_score_mb
)
777 /* Don't use AQ-weighted costs for slicetype decision, only for ratecontrol. */
778 output_inter
[COST_EST
] += i_bcost
;
779 output_inter
[COST_EST_AQ
] += i_bcost_aq
;
783 fenc
->lowres_costs
[b
-p0
][p1
-b
][i_mb_xy
] = X264_MIN( i_bcost
, LOWRES_COST_MASK
) + (list_used
<< LOWRES_COST_SHIFT
);
788 (h->mb.i_mb_width > 2 && h->mb.i_mb_height > 2 ?\
789 (h->mb.i_mb_width - 2) * (h->mb.i_mb_height - 2) :\
790 h->mb.i_mb_width * h->mb.i_mb_height)
795 x264_mb_analysis_t
*a
;
796 x264_frame_t
**frames
;
800 int dist_scale_factor
;
802 const x264_weight_t
*w
;
805 } x264_slicetype_slice_t
;
807 static void slicetype_slice_cost( x264_slicetype_slice_t
*s
)
811 /* Lowres lookahead goes backwards because the MVs are used as predictors in the main encode.
812 * This considerably improves MV prediction overall. */
814 /* The edge mbs seem to reduce the predictive quality of the
815 * whole frame's score, but are needed for a spatial distribution. */
816 int do_edges
= h
->param
.rc
.b_mb_tree
|| h
->param
.rc
.i_vbv_buffer_size
|| h
->mb
.i_mb_width
<= 2 || h
->mb
.i_mb_height
<= 2;
818 int start_y
= X264_MIN( h
->i_threadslice_end
- 1, h
->mb
.i_mb_height
- 2 + do_edges
);
819 int end_y
= X264_MAX( h
->i_threadslice_start
, 1 - do_edges
);
820 int start_x
= h
->mb
.i_mb_width
- 2 + do_edges
;
821 int end_x
= 1 - do_edges
;
823 for( h
->mb
.i_mb_y
= start_y
; h
->mb
.i_mb_y
>= end_y
; h
->mb
.i_mb_y
-- )
824 for( h
->mb
.i_mb_x
= start_x
; h
->mb
.i_mb_x
>= end_x
; h
->mb
.i_mb_x
-- )
825 slicetype_mb_cost( h
, s
->a
, s
->frames
, s
->p0
, s
->p1
, s
->b
, s
->dist_scale_factor
,
826 s
->do_search
, s
->w
, s
->output_inter
, s
->output_intra
);
829 static int slicetype_frame_cost( x264_t
*h
, x264_mb_analysis_t
*a
,
830 x264_frame_t
**frames
, int p0
, int p1
, int b
)
834 const x264_weight_t
*w
= x264_weight_none
;
835 x264_frame_t
*fenc
= frames
[b
];
837 /* Check whether we already evaluated this frame
838 * If we have tried this frame as P, then we have also tried
839 * the preceding frames as B. (is this still true?) */
840 /* Also check that we already calculated the row SATDs for the current frame. */
841 if( fenc
->i_cost_est
[b
-p0
][p1
-b
] >= 0 && (!h
->param
.rc
.i_vbv_buffer_size
|| fenc
->i_row_satds
[b
-p0
][p1
-b
][0] != -1) )
842 i_score
= fenc
->i_cost_est
[b
-p0
][p1
-b
];
845 int dist_scale_factor
= 128;
847 /* For each list, check to see whether we have lowres motion-searched this reference frame before. */
848 do_search
[0] = b
!= p0
&& fenc
->lowres_mvs
[0][b
-p0
-1][0][0] == 0x7FFF;
849 do_search
[1] = b
!= p1
&& fenc
->lowres_mvs
[1][p1
-b
-1][0][0] == 0x7FFF;
852 if( h
->param
.analyse
.i_weighted_pred
&& b
== p1
)
855 x264_weights_analyse( h
, fenc
, frames
[p0
], 1 );
858 fenc
->lowres_mvs
[0][b
-p0
-1][0][0] = 0;
860 if( do_search
[1] ) fenc
->lowres_mvs
[1][p1
-b
-1][0][0] = 0;
863 dist_scale_factor
= ( ((b
-p0
) << 8) + ((p1
-p0
) >> 1) ) / (p1
-p0
);
865 int output_buf_size
= h
->mb
.i_mb_height
+ (NUM_INTS
+ PAD_SIZE
) * h
->param
.i_lookahead_threads
;
866 int *output_inter
[X264_LOOKAHEAD_THREAD_MAX
+1];
867 int *output_intra
[X264_LOOKAHEAD_THREAD_MAX
+1];
868 output_inter
[0] = h
->scratch_buffer2
;
869 output_intra
[0] = output_inter
[0] + output_buf_size
;
872 if( h
->param
.b_opencl
)
874 x264_opencl_lowres_init(h
, fenc
, a
->i_lambda
);
877 x264_opencl_lowres_init( h
, frames
[p0
], a
->i_lambda
);
878 x264_opencl_motionsearch( h
, frames
, b
, p0
, 0, a
->i_lambda
, w
);
882 x264_opencl_lowres_init( h
, frames
[p1
], a
->i_lambda
);
883 x264_opencl_motionsearch( h
, frames
, b
, p1
, 1, a
->i_lambda
, NULL
);
886 x264_opencl_finalize_cost( h
, a
->i_lambda
, frames
, p0
, p1
, b
, dist_scale_factor
);
887 x264_opencl_flush( h
);
889 i_score
= fenc
->i_cost_est
[b
-p0
][p1
-b
];
894 if( h
->param
.i_lookahead_threads
> 1 )
896 x264_slicetype_slice_t s
[X264_LOOKAHEAD_THREAD_MAX
];
898 for( int i
= 0; i
< h
->param
.i_lookahead_threads
; i
++ )
900 x264_t
*t
= h
->lookahead_thread
[i
];
902 /* FIXME move this somewhere else */
903 t
->mb
.i_me_method
= h
->mb
.i_me_method
;
904 t
->mb
.i_subpel_refine
= h
->mb
.i_subpel_refine
;
905 t
->mb
.b_chroma_me
= h
->mb
.b_chroma_me
;
907 s
[i
] = (x264_slicetype_slice_t
){ t
, a
, frames
, p0
, p1
, b
, dist_scale_factor
, do_search
, w
,
908 output_inter
[i
], output_intra
[i
] };
910 t
->i_threadslice_start
= ((h
->mb
.i_mb_height
* i
+ h
->param
.i_lookahead_threads
/2) / h
->param
.i_lookahead_threads
);
911 t
->i_threadslice_end
= ((h
->mb
.i_mb_height
* (i
+1) + h
->param
.i_lookahead_threads
/2) / h
->param
.i_lookahead_threads
);
913 int thread_height
= t
->i_threadslice_end
- t
->i_threadslice_start
;
914 int thread_output_size
= thread_height
+ NUM_INTS
;
915 memset( output_inter
[i
], 0, thread_output_size
* sizeof(int) );
916 memset( output_intra
[i
], 0, thread_output_size
* sizeof(int) );
917 output_inter
[i
][NUM_ROWS
] = output_intra
[i
][NUM_ROWS
] = thread_height
;
919 output_inter
[i
+1] = output_inter
[i
] + thread_output_size
+ PAD_SIZE
;
920 output_intra
[i
+1] = output_intra
[i
] + thread_output_size
+ PAD_SIZE
;
922 x264_threadpool_run( h
->lookaheadpool
, (void*)slicetype_slice_cost
, &s
[i
] );
924 for( int i
= 0; i
< h
->param
.i_lookahead_threads
; i
++ )
925 x264_threadpool_wait( h
->lookaheadpool
, &s
[i
] );
929 h
->i_threadslice_start
= 0;
930 h
->i_threadslice_end
= h
->mb
.i_mb_height
;
931 memset( output_inter
[0], 0, (output_buf_size
- PAD_SIZE
) * sizeof(int) );
932 memset( output_intra
[0], 0, (output_buf_size
- PAD_SIZE
) * sizeof(int) );
933 output_inter
[0][NUM_ROWS
] = output_intra
[0][NUM_ROWS
] = h
->mb
.i_mb_height
;
934 x264_slicetype_slice_t s
= (x264_slicetype_slice_t
){ h
, a
, frames
, p0
, p1
, b
, dist_scale_factor
, do_search
, w
,
935 output_inter
[0], output_intra
[0] };
936 slicetype_slice_cost( &s
);
939 /* Sum up accumulators */
941 fenc
->i_intra_mbs
[b
-p0
] = 0;
942 if( !fenc
->b_intra_calculated
)
944 fenc
->i_cost_est
[0][0] = 0;
945 fenc
->i_cost_est_aq
[0][0] = 0;
947 fenc
->i_cost_est
[b
-p0
][p1
-b
] = 0;
948 fenc
->i_cost_est_aq
[b
-p0
][p1
-b
] = 0;
950 int *row_satd_inter
= fenc
->i_row_satds
[b
-p0
][p1
-b
];
951 int *row_satd_intra
= fenc
->i_row_satds
[0][0];
952 for( int i
= 0; i
< h
->param
.i_lookahead_threads
; i
++ )
955 fenc
->i_intra_mbs
[b
-p0
] += output_inter
[i
][INTRA_MBS
];
956 if( !fenc
->b_intra_calculated
)
958 fenc
->i_cost_est
[0][0] += output_intra
[i
][COST_EST
];
959 fenc
->i_cost_est_aq
[0][0] += output_intra
[i
][COST_EST_AQ
];
962 fenc
->i_cost_est
[b
-p0
][p1
-b
] += output_inter
[i
][COST_EST
];
963 fenc
->i_cost_est_aq
[b
-p0
][p1
-b
] += output_inter
[i
][COST_EST_AQ
];
965 if( h
->param
.rc
.i_vbv_buffer_size
)
967 int row_count
= output_inter
[i
][NUM_ROWS
];
968 memcpy( row_satd_inter
, output_inter
[i
] + NUM_INTS
, row_count
* sizeof(int) );
969 if( !fenc
->b_intra_calculated
)
970 memcpy( row_satd_intra
, output_intra
[i
] + NUM_INTS
, row_count
* sizeof(int) );
971 row_satd_inter
+= row_count
;
972 row_satd_intra
+= row_count
;
976 i_score
= fenc
->i_cost_est
[b
-p0
][p1
-b
];
978 i_score
= (uint64_t)i_score
* 100 / (120 + h
->param
.i_bframe_bias
);
980 fenc
->b_intra_calculated
= 1;
982 fenc
->i_cost_est
[b
-p0
][p1
-b
] = i_score
;
990 /* If MB-tree changes the quantizers, we need to recalculate the frame cost without
991 * re-running lookahead. */
992 static int slicetype_frame_cost_recalculate( x264_t
*h
, x264_frame_t
**frames
, int p0
, int p1
, int b
)
995 int *row_satd
= frames
[b
]->i_row_satds
[b
-p0
][p1
-b
];
996 float *qp_offset
= IS_X264_TYPE_B(frames
[b
]->i_type
) ? frames
[b
]->f_qp_offset_aq
: frames
[b
]->f_qp_offset
;
998 for( h
->mb
.i_mb_y
= h
->mb
.i_mb_height
- 1; h
->mb
.i_mb_y
>= 0; h
->mb
.i_mb_y
-- )
1000 row_satd
[ h
->mb
.i_mb_y
] = 0;
1001 for( h
->mb
.i_mb_x
= h
->mb
.i_mb_width
- 1; h
->mb
.i_mb_x
>= 0; h
->mb
.i_mb_x
-- )
1003 int i_mb_xy
= h
->mb
.i_mb_x
+ h
->mb
.i_mb_y
*h
->mb
.i_mb_stride
;
1004 int i_mb_cost
= frames
[b
]->lowres_costs
[b
-p0
][p1
-b
][i_mb_xy
] & LOWRES_COST_MASK
;
1005 float qp_adj
= qp_offset
[i_mb_xy
];
1006 i_mb_cost
= (i_mb_cost
* x264_exp2fix8(qp_adj
) + 128) >> 8;
1007 row_satd
[ h
->mb
.i_mb_y
] += i_mb_cost
;
1008 if( (h
->mb
.i_mb_y
> 0 && h
->mb
.i_mb_y
< h
->mb
.i_mb_height
- 1 &&
1009 h
->mb
.i_mb_x
> 0 && h
->mb
.i_mb_x
< h
->mb
.i_mb_width
- 1) ||
1010 h
->mb
.i_mb_width
<= 2 || h
->mb
.i_mb_height
<= 2 )
1012 i_score
+= i_mb_cost
;
1019 /* Trade off precision in mbtree for increased range */
1020 #define MBTREE_PRECISION 0.5f
1022 static void macroblock_tree_finish( x264_t
*h
, x264_frame_t
*frame
, float average_duration
, int ref0_distance
)
1024 int fps_factor
= round( CLIP_DURATION(average_duration
) / CLIP_DURATION(frame
->f_duration
) * 256 / MBTREE_PRECISION
);
1025 float weightdelta
= 0.0;
1026 if( ref0_distance
&& frame
->f_weighted_cost_delta
[ref0_distance
-1] > 0 )
1027 weightdelta
= (1.0 - frame
->f_weighted_cost_delta
[ref0_distance
-1]);
1029 /* Allow the strength to be adjusted via qcompress, since the two
1030 * concepts are very similar. */
1031 float strength
= 5.0f
* (1.0f
- h
->param
.rc
.f_qcompress
);
1032 for( int mb_index
= 0; mb_index
< h
->mb
.i_mb_count
; mb_index
++ )
1034 int intra_cost
= (frame
->i_intra_cost
[mb_index
] * frame
->i_inv_qscale_factor
[mb_index
] + 128) >> 8;
1037 int propagate_cost
= (frame
->i_propagate_cost
[mb_index
] * fps_factor
+ 128) >> 8;
1038 float log2_ratio
= x264_log2(intra_cost
+ propagate_cost
) - x264_log2(intra_cost
) + weightdelta
;
1039 frame
->f_qp_offset
[mb_index
] = frame
->f_qp_offset_aq
[mb_index
] - strength
* log2_ratio
;
1044 static void macroblock_tree_propagate( x264_t
*h
, x264_frame_t
**frames
, float average_duration
, int p0
, int p1
, int b
, int referenced
)
1046 uint16_t *ref_costs
[2] = {frames
[p0
]->i_propagate_cost
,frames
[p1
]->i_propagate_cost
};
1047 int dist_scale_factor
= ( ((b
-p0
) << 8) + ((p1
-p0
) >> 1) ) / (p1
-p0
);
1048 int i_bipred_weight
= h
->param
.analyse
.b_weighted_bipred
? 64 - (dist_scale_factor
>>2) : 32;
1049 int16_t (*mvs
[2])[2] = { frames
[b
]->lowres_mvs
[0][b
-p0
-1], frames
[b
]->lowres_mvs
[1][p1
-b
-1] };
1050 int bipred_weights
[2] = {i_bipred_weight
, 64 - i_bipred_weight
};
1051 int16_t *buf
= h
->scratch_buffer
;
1052 uint16_t *propagate_cost
= frames
[b
]->i_propagate_cost
;
1053 uint16_t *lowres_costs
= frames
[b
]->lowres_costs
[b
-p0
][p1
-b
];
1056 float fps_factor
= CLIP_DURATION(frames
[b
]->f_duration
) / (CLIP_DURATION(average_duration
) * 256.0f
) * MBTREE_PRECISION
;
1058 /* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
1060 memset( frames
[b
]->i_propagate_cost
, 0, h
->mb
.i_mb_width
* sizeof(uint16_t) );
1062 for( h
->mb
.i_mb_y
= 0; h
->mb
.i_mb_y
< h
->mb
.i_mb_height
; h
->mb
.i_mb_y
++ )
1064 int mb_index
= h
->mb
.i_mb_y
*h
->mb
.i_mb_stride
;
1065 h
->mc
.mbtree_propagate_cost( buf
, propagate_cost
,
1066 frames
[b
]->i_intra_cost
+mb_index
, lowres_costs
+mb_index
,
1067 frames
[b
]->i_inv_qscale_factor
+mb_index
, &fps_factor
, h
->mb
.i_mb_width
);
1069 propagate_cost
+= h
->mb
.i_mb_width
;
1071 h
->mc
.mbtree_propagate_list( h
, ref_costs
[0], &mvs
[0][mb_index
], buf
, &lowres_costs
[mb_index
],
1072 bipred_weights
[0], h
->mb
.i_mb_y
, h
->mb
.i_mb_width
, 0 );
1075 h
->mc
.mbtree_propagate_list( h
, ref_costs
[1], &mvs
[1][mb_index
], buf
, &lowres_costs
[mb_index
],
1076 bipred_weights
[1], h
->mb
.i_mb_y
, h
->mb
.i_mb_width
, 1 );
1080 if( h
->param
.rc
.i_vbv_buffer_size
&& h
->param
.rc
.i_lookahead
&& referenced
)
1081 macroblock_tree_finish( h
, frames
[b
], average_duration
, b
== p1
? b
- p0
: 0 );
1084 static void macroblock_tree( x264_t
*h
, x264_mb_analysis_t
*a
, x264_frame_t
**frames
, int num_frames
, int b_intra
)
1087 int last_nonb
, cur_nonb
= 1;
1091 float total_duration
= 0.0;
1092 for( int j
= 0; j
<= num_frames
; j
++ )
1093 total_duration
+= frames
[j
]->f_duration
;
1094 float average_duration
= total_duration
/ (num_frames
+ 1);
1099 slicetype_frame_cost( h
, a
, frames
, 0, 0, 0 );
1101 while( i
> 0 && IS_X264_TYPE_B( frames
[i
]->i_type
) )
1105 /* Lookaheadless MB-tree is not a theoretically distinct case; the same extrapolation could
1106 * be applied to the end of a lookahead buffer of any size. However, it's most needed when
1107 * lookahead=0, so that's what's currently implemented. */
1108 if( !h
->param
.rc
.i_lookahead
)
1112 memset( frames
[0]->i_propagate_cost
, 0, h
->mb
.i_mb_count
* sizeof(uint16_t) );
1113 memcpy( frames
[0]->f_qp_offset
, frames
[0]->f_qp_offset_aq
, h
->mb
.i_mb_count
* sizeof(float) );
1116 XCHG( uint16_t*, frames
[last_nonb
]->i_propagate_cost
, frames
[0]->i_propagate_cost
);
1117 memset( frames
[0]->i_propagate_cost
, 0, h
->mb
.i_mb_count
* sizeof(uint16_t) );
1121 if( last_nonb
< idx
)
1123 memset( frames
[last_nonb
]->i_propagate_cost
, 0, h
->mb
.i_mb_count
* sizeof(uint16_t) );
1129 while( IS_X264_TYPE_B( frames
[cur_nonb
]->i_type
) && cur_nonb
> 0 )
1131 if( cur_nonb
< idx
)
1133 slicetype_frame_cost( h
, a
, frames
, cur_nonb
, last_nonb
, last_nonb
);
1134 memset( frames
[cur_nonb
]->i_propagate_cost
, 0, h
->mb
.i_mb_count
* sizeof(uint16_t) );
1135 bframes
= last_nonb
- cur_nonb
- 1;
1136 if( h
->param
.i_bframe_pyramid
&& bframes
> 1 )
1138 int middle
= (bframes
+ 1)/2 + cur_nonb
;
1139 slicetype_frame_cost( h
, a
, frames
, cur_nonb
, last_nonb
, middle
);
1140 memset( frames
[middle
]->i_propagate_cost
, 0, h
->mb
.i_mb_count
* sizeof(uint16_t) );
1141 while( i
> cur_nonb
)
1143 int p0
= i
> middle
? middle
: cur_nonb
;
1144 int p1
= i
< middle
? middle
: last_nonb
;
1147 slicetype_frame_cost( h
, a
, frames
, p0
, p1
, i
);
1148 macroblock_tree_propagate( h
, frames
, average_duration
, p0
, p1
, i
, 0 );
1152 macroblock_tree_propagate( h
, frames
, average_duration
, cur_nonb
, last_nonb
, middle
, 1 );
1156 while( i
> cur_nonb
)
1158 slicetype_frame_cost( h
, a
, frames
, cur_nonb
, last_nonb
, i
);
1159 macroblock_tree_propagate( h
, frames
, average_duration
, cur_nonb
, last_nonb
, i
, 0 );
1163 macroblock_tree_propagate( h
, frames
, average_duration
, cur_nonb
, last_nonb
, last_nonb
, 1 );
1164 last_nonb
= cur_nonb
;
1167 if( !h
->param
.rc
.i_lookahead
)
1169 slicetype_frame_cost( h
, a
, frames
, 0, last_nonb
, last_nonb
);
1170 macroblock_tree_propagate( h
, frames
, average_duration
, 0, last_nonb
, last_nonb
, 1 );
1171 XCHG( uint16_t*, frames
[last_nonb
]->i_propagate_cost
, frames
[0]->i_propagate_cost
);
1174 macroblock_tree_finish( h
, frames
[last_nonb
], average_duration
, last_nonb
);
1175 if( h
->param
.i_bframe_pyramid
&& bframes
> 1 && !h
->param
.rc
.i_vbv_buffer_size
)
1176 macroblock_tree_finish( h
, frames
[last_nonb
+(bframes
+1)/2], average_duration
, 0 );
1179 static int vbv_frame_cost( x264_t
*h
, x264_mb_analysis_t
*a
, x264_frame_t
**frames
, int p0
, int p1
, int b
)
1181 int cost
= slicetype_frame_cost( h
, a
, frames
, p0
, p1
, b
);
1182 if( h
->param
.rc
.i_aq_mode
)
1184 if( h
->param
.rc
.b_mb_tree
)
1185 return slicetype_frame_cost_recalculate( h
, frames
, p0
, p1
, b
);
1187 return frames
[b
]->i_cost_est_aq
[b
-p0
][p1
-b
];
1192 static void calculate_durations( x264_t
*h
, x264_frame_t
*cur_frame
, x264_frame_t
*prev_frame
, int64_t *i_cpb_delay
, int64_t *i_coded_fields
)
1194 cur_frame
->i_cpb_delay
= *i_cpb_delay
;
1195 cur_frame
->i_dpb_output_delay
= cur_frame
->i_field_cnt
- *i_coded_fields
;
1197 // add a correction term for frame reordering
1198 cur_frame
->i_dpb_output_delay
+= h
->sps
->vui
.i_num_reorder_frames
*2;
1200 // fix possible negative dpb_output_delay because of pulldown changes and reordering
1201 if( cur_frame
->i_dpb_output_delay
< 0 )
1203 cur_frame
->i_cpb_delay
+= cur_frame
->i_dpb_output_delay
;
1204 cur_frame
->i_dpb_output_delay
= 0;
1206 prev_frame
->i_cpb_duration
+= cur_frame
->i_dpb_output_delay
;
1209 // don't reset cpb delay for IDR frames when using intra-refresh
1210 if( cur_frame
->b_keyframe
&& !h
->param
.b_intra_refresh
)
1213 *i_cpb_delay
+= cur_frame
->i_duration
;
1214 *i_coded_fields
+= cur_frame
->i_duration
;
1215 cur_frame
->i_cpb_duration
= cur_frame
->i_duration
;
1218 static void vbv_lookahead( x264_t
*h
, x264_mb_analysis_t
*a
, x264_frame_t
**frames
, int num_frames
, int keyframe
)
1220 int last_nonb
= 0, cur_nonb
= 1, idx
= 0;
1221 x264_frame_t
*prev_frame
= NULL
;
1222 int prev_frame_idx
= 0;
1223 while( cur_nonb
< num_frames
&& IS_X264_TYPE_B( frames
[cur_nonb
]->i_type
) )
1225 int next_nonb
= keyframe
? last_nonb
: cur_nonb
;
1227 if( frames
[cur_nonb
]->i_coded_fields_lookahead
>= 0 )
1229 h
->i_coded_fields_lookahead
= frames
[cur_nonb
]->i_coded_fields_lookahead
;
1230 h
->i_cpb_delay_lookahead
= frames
[cur_nonb
]->i_cpb_delay_lookahead
;
1233 while( cur_nonb
< num_frames
)
1235 /* P/I cost: This shouldn't include the cost of next_nonb */
1236 if( next_nonb
!= cur_nonb
)
1238 int p0
= IS_X264_TYPE_I( frames
[cur_nonb
]->i_type
) ? cur_nonb
: last_nonb
;
1239 frames
[next_nonb
]->i_planned_satd
[idx
] = vbv_frame_cost( h
, a
, frames
, p0
, cur_nonb
, cur_nonb
);
1240 frames
[next_nonb
]->i_planned_type
[idx
] = frames
[cur_nonb
]->i_type
;
1241 frames
[cur_nonb
]->i_coded_fields_lookahead
= h
->i_coded_fields_lookahead
;
1242 frames
[cur_nonb
]->i_cpb_delay_lookahead
= h
->i_cpb_delay_lookahead
;
1243 calculate_durations( h
, frames
[cur_nonb
], prev_frame
, &h
->i_cpb_delay_lookahead
, &h
->i_coded_fields_lookahead
);
1246 frames
[next_nonb
]->f_planned_cpb_duration
[prev_frame_idx
] = (double)prev_frame
->i_cpb_duration
*
1247 h
->sps
->vui
.i_num_units_in_tick
/ h
->sps
->vui
.i_time_scale
;
1249 frames
[next_nonb
]->f_planned_cpb_duration
[idx
] = (double)frames
[cur_nonb
]->i_cpb_duration
*
1250 h
->sps
->vui
.i_num_units_in_tick
/ h
->sps
->vui
.i_time_scale
;
1251 prev_frame
= frames
[cur_nonb
];
1252 prev_frame_idx
= idx
;
1255 /* Handle the B-frames: coded order */
1256 for( int i
= last_nonb
+1; i
< cur_nonb
; i
++, idx
++ )
1258 frames
[next_nonb
]->i_planned_satd
[idx
] = vbv_frame_cost( h
, a
, frames
, last_nonb
, cur_nonb
, i
);
1259 frames
[next_nonb
]->i_planned_type
[idx
] = X264_TYPE_B
;
1260 frames
[i
]->i_coded_fields_lookahead
= h
->i_coded_fields_lookahead
;
1261 frames
[i
]->i_cpb_delay_lookahead
= h
->i_cpb_delay_lookahead
;
1262 calculate_durations( h
, frames
[i
], prev_frame
, &h
->i_cpb_delay_lookahead
, &h
->i_coded_fields_lookahead
);
1265 frames
[next_nonb
]->f_planned_cpb_duration
[prev_frame_idx
] = (double)prev_frame
->i_cpb_duration
*
1266 h
->sps
->vui
.i_num_units_in_tick
/ h
->sps
->vui
.i_time_scale
;
1268 frames
[next_nonb
]->f_planned_cpb_duration
[idx
] = (double)frames
[i
]->i_cpb_duration
*
1269 h
->sps
->vui
.i_num_units_in_tick
/ h
->sps
->vui
.i_time_scale
;
1270 prev_frame
= frames
[i
];
1271 prev_frame_idx
= idx
;
1273 last_nonb
= cur_nonb
;
1275 while( cur_nonb
<= num_frames
&& IS_X264_TYPE_B( frames
[cur_nonb
]->i_type
) )
1278 frames
[next_nonb
]->i_planned_type
[idx
] = X264_TYPE_AUTO
;
1281 static uint64_t slicetype_path_cost( x264_t
*h
, x264_mb_analysis_t
*a
, x264_frame_t
**frames
, char *path
, uint64_t threshold
)
1286 path
--; /* Since the 1st path element is really the second frame */
1289 int next_nonb
= loc
;
1290 /* Find the location of the next non-B-frame. */
1291 while( path
[next_nonb
] == 'B' )
1294 /* Add the cost of the non-B-frame found above */
1295 if( path
[next_nonb
] == 'P' )
1296 cost
+= slicetype_frame_cost( h
, a
, frames
, cur_nonb
, next_nonb
, next_nonb
);
1298 cost
+= slicetype_frame_cost( h
, a
, frames
, next_nonb
, next_nonb
, next_nonb
);
1299 /* Early terminate if the cost we have found is larger than the best path cost so far */
1300 if( cost
> threshold
)
1303 if( h
->param
.i_bframe_pyramid
&& next_nonb
- cur_nonb
> 2 )
1305 int middle
= cur_nonb
+ (next_nonb
- cur_nonb
)/2;
1306 cost
+= slicetype_frame_cost( h
, a
, frames
, cur_nonb
, next_nonb
, middle
);
1307 for( int next_b
= loc
; next_b
< middle
&& cost
< threshold
; next_b
++ )
1308 cost
+= slicetype_frame_cost( h
, a
, frames
, cur_nonb
, middle
, next_b
);
1309 for( int next_b
= middle
+1; next_b
< next_nonb
&& cost
< threshold
; next_b
++ )
1310 cost
+= slicetype_frame_cost( h
, a
, frames
, middle
, next_nonb
, next_b
);
1313 for( int next_b
= loc
; next_b
< next_nonb
&& cost
< threshold
; next_b
++ )
1314 cost
+= slicetype_frame_cost( h
, a
, frames
, cur_nonb
, next_nonb
, next_b
);
1316 loc
= next_nonb
+ 1;
1317 cur_nonb
= next_nonb
;
1322 /* Viterbi/trellis slicetype decision algorithm. */
1323 /* Uses strings due to the fact that the speed of the control functions is
1324 negligible compared to the cost of running slicetype_frame_cost, and because
1325 it makes debugging easier. */
1326 static void slicetype_path( x264_t
*h
, x264_mb_analysis_t
*a
, x264_frame_t
**frames
, int length
, char (*best_paths
)[X264_LOOKAHEAD_MAX
+1] )
1328 char paths
[2][X264_LOOKAHEAD_MAX
+1];
1329 int num_paths
= X264_MIN( h
->param
.i_bframe
+1, length
);
1330 uint64_t best_cost
= COST_MAX64
;
1331 int best_possible
= 0;
1334 /* Iterate over all currently possible paths */
1335 for( int path
= 0; path
< num_paths
; path
++ )
1337 /* Add suffixes to the current path */
1338 int len
= length
- (path
+ 1);
1339 memcpy( paths
[idx
], best_paths
[len
% (X264_BFRAME_MAX
+1)], len
);
1340 memset( paths
[idx
]+len
, 'B', path
);
1341 strcpy( paths
[idx
]+len
+path
, "P" );
1344 for( int i
= 1; i
<= length
; i
++ )
1346 int i_type
= frames
[i
]->i_type
;
1347 if( i_type
== X264_TYPE_AUTO
)
1349 if( IS_X264_TYPE_B( i_type
) )
1350 possible
= possible
&& (i
< len
|| i
== length
|| paths
[idx
][i
-1] == 'B');
1353 possible
= possible
&& (i
< len
|| paths
[idx
][i
-1] != 'B');
1354 paths
[idx
][i
-1] = IS_X264_TYPE_I( i_type
) ? 'I' : 'P';
1358 if( possible
|| !best_possible
)
1360 if( possible
&& !best_possible
)
1361 best_cost
= COST_MAX64
;
1362 /* Calculate the actual cost of the current path */
1363 uint64_t cost
= slicetype_path_cost( h
, a
, frames
, paths
[idx
], best_cost
);
1364 if( cost
< best_cost
)
1367 best_possible
= possible
;
1373 /* Store the best path. */
1374 memcpy( best_paths
[length
% (X264_BFRAME_MAX
+1)], paths
[idx
^1], length
);
1377 static int scenecut_internal( x264_t
*h
, x264_mb_analysis_t
*a
, x264_frame_t
**frames
, int p0
, int p1
, int real_scenecut
)
1379 x264_frame_t
*frame
= frames
[p1
];
1381 /* Don't do scenecuts on the right view of a frame-packed video. */
1382 if( real_scenecut
&& h
->param
.i_frame_packing
== 5 && (frame
->i_frame
&1) )
1385 slicetype_frame_cost( h
, a
, frames
, p0
, p1
, p1
);
1387 int icost
= frame
->i_cost_est
[0][0];
1388 int pcost
= frame
->i_cost_est
[p1
-p0
][0];
1390 int i_gop_size
= frame
->i_frame
- h
->lookahead
->i_last_keyframe
;
1391 float f_thresh_max
= h
->param
.i_scenecut_threshold
/ 100.0;
1392 /* magic numbers pulled out of thin air */
1393 float f_thresh_min
= f_thresh_max
* 0.25;
1396 if( h
->param
.i_keyint_min
== h
->param
.i_keyint_max
)
1397 f_thresh_min
= f_thresh_max
;
1398 if( i_gop_size
<= h
->param
.i_keyint_min
/ 4 || h
->param
.b_intra_refresh
)
1399 f_bias
= f_thresh_min
/ 4;
1400 else if( i_gop_size
<= h
->param
.i_keyint_min
)
1401 f_bias
= f_thresh_min
* i_gop_size
/ h
->param
.i_keyint_min
;
1404 f_bias
= f_thresh_min
1405 + ( f_thresh_max
- f_thresh_min
)
1406 * ( i_gop_size
- h
->param
.i_keyint_min
)
1407 / ( h
->param
.i_keyint_max
- h
->param
.i_keyint_min
);
1410 res
= pcost
>= (1.0 - f_bias
) * icost
;
1411 if( res
&& real_scenecut
)
1413 int imb
= frame
->i_intra_mbs
[p1
-p0
];
1414 int pmb
= NUM_MBS
- imb
;
1415 x264_log( h
, X264_LOG_DEBUG
, "scene cut at %d Icost:%d Pcost:%d ratio:%.4f bias:%.4f gop:%d (imb:%d pmb:%d)\n",
1417 icost
, pcost
, 1. - (double)pcost
/ icost
,
1418 f_bias
, i_gop_size
, imb
, pmb
);
1423 static int scenecut( x264_t
*h
, x264_mb_analysis_t
*a
, x264_frame_t
**frames
, int p0
, int p1
, int real_scenecut
, int num_frames
, int i_max_search
)
1425 /* Only do analysis during a normal scenecut check. */
1426 if( real_scenecut
&& h
->param
.i_bframe
)
1428 int origmaxp1
= p0
+ 1;
1429 /* Look ahead to avoid coding short flashes as scenecuts. */
1430 if( h
->param
.i_bframe_adaptive
== X264_B_ADAPT_TRELLIS
)
1431 /* Don't analyse any more frames than the trellis would have covered. */
1432 origmaxp1
+= h
->param
.i_bframe
;
1435 int maxp1
= X264_MIN( origmaxp1
, num_frames
);
1437 /* Where A and B are scenes: AAAAAABBBAAAAAA
1438 * If BBB is shorter than (maxp1-p0), it is detected as a flash
1439 * and not considered a scenecut. */
1440 for( int curp1
= p1
; curp1
<= maxp1
; curp1
++ )
1441 if( !scenecut_internal( h
, a
, frames
, p0
, curp1
, 0 ) )
1442 /* Any frame in between p0 and cur_p1 cannot be a real scenecut. */
1443 for( int i
= curp1
; i
> p0
; i
-- )
1444 frames
[i
]->b_scenecut
= 0;
1446 /* Where A-F are scenes: AAAAABBCCDDEEFFFFFF
1447 * If each of BB ... EE are shorter than (maxp1-p0), they are
1448 * detected as flashes and not considered scenecuts.
1449 * Instead, the first F frame becomes a scenecut.
1450 * If the video ends before F, no frame becomes a scenecut. */
1451 for( int curp0
= p0
; curp0
<= maxp1
; curp0
++ )
1452 if( origmaxp1
> i_max_search
|| (curp0
< maxp1
&& scenecut_internal( h
, a
, frames
, curp0
, maxp1
, 0 )) )
1453 /* If cur_p0 is the p0 of a scenecut, it cannot be the p1 of a scenecut. */
1454 frames
[curp0
]->b_scenecut
= 0;
1457 /* Ignore frames that are part of a flash, i.e. cannot be real scenecuts. */
1458 if( !frames
[p1
]->b_scenecut
)
1460 return scenecut_internal( h
, a
, frames
, p0
, p1
, real_scenecut
);
1463 #define IS_X264_TYPE_AUTO_OR_I(x) ((x)==X264_TYPE_AUTO || IS_X264_TYPE_I(x))
1464 #define IS_X264_TYPE_AUTO_OR_B(x) ((x)==X264_TYPE_AUTO || IS_X264_TYPE_B(x))
1466 void x264_slicetype_analyse( x264_t
*h
, int intra_minigop
)
1468 x264_mb_analysis_t a
;
1469 x264_frame_t
*frames
[X264_LOOKAHEAD_MAX
+3] = { NULL
, };
1470 int num_frames
, orig_num_frames
, keyint_limit
, framecnt
;
1471 int i_max_search
= X264_MIN( h
->lookahead
->next
.i_size
, X264_LOOKAHEAD_MAX
);
1472 int b_vbv_lookahead
= h
->param
.rc
.i_vbv_buffer_size
&& h
->param
.rc
.i_lookahead
;
1473 /* For determinism we should limit the search to the number of frames lookahead has for sure
1474 * in h->lookahead->next.list buffer, except at the end of stream.
1475 * For normal calls with (intra_minigop == 0) that is h->lookahead->i_slicetype_length + 1 frames.
1476 * And for I-frame calls (intra_minigop != 0) we already removed intra_minigop frames from there. */
1477 if( h
->param
.b_deterministic
)
1478 i_max_search
= X264_MIN( i_max_search
, h
->lookahead
->i_slicetype_length
+ 1 - intra_minigop
);
1479 int keyframe
= !!intra_minigop
;
1481 assert( h
->frames
.b_have_lowres
);
1483 if( !h
->lookahead
->last_nonb
)
1485 frames
[0] = h
->lookahead
->last_nonb
;
1486 for( framecnt
= 0; framecnt
< i_max_search
; framecnt
++ )
1487 frames
[framecnt
+1] = h
->lookahead
->next
.list
[framecnt
];
1489 lowres_context_init( h
, &a
);
1493 if( h
->param
.rc
.b_mb_tree
)
1494 macroblock_tree( h
, &a
, frames
, 0, keyframe
);
1498 keyint_limit
= h
->param
.i_keyint_max
- frames
[0]->i_frame
+ h
->lookahead
->i_last_keyframe
- 1;
1499 orig_num_frames
= num_frames
= h
->param
.b_intra_refresh
? framecnt
: X264_MIN( framecnt
, keyint_limit
);
1501 /* This is important psy-wise: if we have a non-scenecut keyframe,
1502 * there will be significant visual artifacts if the frames just before
1503 * go down in quality due to being referenced less, despite it being
1504 * more RD-optimal. */
1505 if( (h
->param
.analyse
.b_psy
&& h
->param
.rc
.b_mb_tree
) || b_vbv_lookahead
)
1506 num_frames
= framecnt
;
1507 else if( h
->param
.b_open_gop
&& num_frames
< framecnt
)
1509 else if( num_frames
== 0 )
1511 frames
[1]->i_type
= X264_TYPE_I
;
1515 if( IS_X264_TYPE_AUTO_OR_I( frames
[1]->i_type
) &&
1516 h
->param
.i_scenecut_threshold
&& scenecut( h
, &a
, frames
, 0, 1, 1, orig_num_frames
, i_max_search
) )
1518 if( frames
[1]->i_type
== X264_TYPE_AUTO
)
1519 frames
[1]->i_type
= X264_TYPE_I
;
1524 x264_opencl_slicetype_prep( h
, frames
, num_frames
, a
.i_lambda
);
1527 /* Replace forced keyframes with I/IDR-frames */
1528 for( int j
= 1; j
<= num_frames
; j
++ )
1530 if( frames
[j
]->i_type
== X264_TYPE_KEYFRAME
)
1531 frames
[j
]->i_type
= h
->param
.b_open_gop
? X264_TYPE_I
: X264_TYPE_IDR
;
1534 /* Close GOP at IDR-frames */
1535 for( int j
= 2; j
<= num_frames
; j
++ )
1537 if( frames
[j
]->i_type
== X264_TYPE_IDR
&& IS_X264_TYPE_AUTO_OR_B( frames
[j
-1]->i_type
) )
1538 frames
[j
-1]->i_type
= X264_TYPE_P
;
1541 int num_analysed_frames
= num_frames
;
1544 if( h
->param
.i_bframe
)
1546 if( h
->param
.i_bframe_adaptive
== X264_B_ADAPT_TRELLIS
)
1548 if( num_frames
> 1 )
1550 char best_paths
[X264_BFRAME_MAX
+1][X264_LOOKAHEAD_MAX
+1] = {"","P"};
1551 int best_path_index
= num_frames
% (X264_BFRAME_MAX
+1);
1553 /* Perform the frametype analysis. */
1554 for( int j
= 2; j
<= num_frames
; j
++ )
1555 slicetype_path( h
, &a
, frames
, j
, best_paths
);
1557 /* Load the results of the analysis into the frame types. */
1558 for( int j
= 1; j
< num_frames
; j
++ )
1560 if( best_paths
[best_path_index
][j
-1] != 'B' )
1562 if( IS_X264_TYPE_AUTO_OR_B( frames
[j
]->i_type
) )
1563 frames
[j
]->i_type
= X264_TYPE_P
;
1567 if( frames
[j
]->i_type
== X264_TYPE_AUTO
)
1568 frames
[j
]->i_type
= X264_TYPE_B
;
1573 else if( h
->param
.i_bframe_adaptive
== X264_B_ADAPT_FAST
)
1576 int num_bframes
= h
->param
.i_bframe
;
1577 char path
[X264_LOOKAHEAD_MAX
+1];
1578 for( int j
= 1; j
< num_frames
; j
++ )
1580 if( j
-1 > 0 && IS_X264_TYPE_B( frames
[j
-1]->i_type
) )
1585 num_bframes
= h
->param
.i_bframe
;
1589 if( IS_X264_TYPE_AUTO_OR_B( frames
[j
]->i_type
) )
1590 frames
[j
]->i_type
= X264_TYPE_P
;
1594 if( frames
[j
]->i_type
!= X264_TYPE_AUTO
)
1597 if( IS_X264_TYPE_B( frames
[j
+1]->i_type
) )
1599 frames
[j
]->i_type
= X264_TYPE_P
;
1603 int bframes
= j
- last_nonb
- 1;
1604 memset( path
, 'B', bframes
);
1605 strcpy( path
+bframes
, "PP" );
1606 uint64_t cost_p
= slicetype_path_cost( h
, &a
, frames
+last_nonb
, path
, COST_MAX64
);
1607 strcpy( path
+bframes
, "BP" );
1608 uint64_t cost_b
= slicetype_path_cost( h
, &a
, frames
+last_nonb
, path
, cost_p
);
1610 if( cost_b
< cost_p
)
1611 frames
[j
]->i_type
= X264_TYPE_B
;
1613 frames
[j
]->i_type
= X264_TYPE_P
;
1618 int num_bframes
= h
->param
.i_bframe
;
1619 for( int j
= 1; j
< num_frames
; j
++ )
1623 if( IS_X264_TYPE_AUTO_OR_B( frames
[j
]->i_type
) )
1624 frames
[j
]->i_type
= X264_TYPE_P
;
1626 else if( frames
[j
]->i_type
== X264_TYPE_AUTO
)
1628 if( IS_X264_TYPE_B( frames
[j
+1]->i_type
) )
1629 frames
[j
]->i_type
= X264_TYPE_P
;
1631 frames
[j
]->i_type
= X264_TYPE_B
;
1633 if( IS_X264_TYPE_B( frames
[j
]->i_type
) )
1636 num_bframes
= h
->param
.i_bframe
;
1639 if( IS_X264_TYPE_AUTO_OR_B( frames
[num_frames
]->i_type
) )
1640 frames
[num_frames
]->i_type
= X264_TYPE_P
;
1642 int num_bframes
= 0;
1643 while( num_bframes
< num_frames
&& IS_X264_TYPE_B( frames
[num_bframes
+1]->i_type
) )
1646 /* Check scenecut on the first minigop. */
1647 for( int j
= 1; j
< num_bframes
+1; j
++ )
1649 if( frames
[j
]->i_forced_type
== X264_TYPE_AUTO
&& IS_X264_TYPE_AUTO_OR_I( frames
[j
+1]->i_forced_type
) &&
1650 h
->param
.i_scenecut_threshold
&& scenecut( h
, &a
, frames
, j
, j
+1, 0, orig_num_frames
, i_max_search
) )
1652 frames
[j
]->i_type
= X264_TYPE_P
;
1653 num_analysed_frames
= j
;
1658 reset_start
= keyframe
? 1 : X264_MIN( num_bframes
+2, num_analysed_frames
+1 );
1662 for( int j
= 1; j
<= num_frames
; j
++ )
1663 if( IS_X264_TYPE_AUTO_OR_B( frames
[j
]->i_type
) )
1664 frames
[j
]->i_type
= X264_TYPE_P
;
1665 reset_start
= !keyframe
+ 1;
1668 /* Perform the actual macroblock tree analysis.
1669 * Don't go farther than the maximum keyframe interval; this helps in short GOPs. */
1670 if( h
->param
.rc
.b_mb_tree
)
1671 macroblock_tree( h
, &a
, frames
, X264_MIN(num_frames
, h
->param
.i_keyint_max
), keyframe
);
1673 /* Enforce keyframe limit. */
1674 if( !h
->param
.b_intra_refresh
)
1676 int last_keyframe
= h
->lookahead
->i_last_keyframe
;
1677 int last_possible
= 0;
1678 for( int j
= 1; j
<= num_frames
; j
++ )
1680 x264_frame_t
*frm
= frames
[j
];
1681 int keyframe_dist
= frm
->i_frame
- last_keyframe
;
1683 if( IS_X264_TYPE_AUTO_OR_I( frm
->i_forced_type
) )
1685 if( h
->param
.b_open_gop
|| !IS_X264_TYPE_B( frames
[j
-1]->i_forced_type
) )
1688 if( keyframe_dist
>= h
->param
.i_keyint_max
)
1690 if( last_possible
!= 0 && last_possible
!= j
)
1694 keyframe_dist
= frm
->i_frame
- last_keyframe
;
1697 if( frm
->i_type
!= X264_TYPE_IDR
)
1698 frm
->i_type
= h
->param
.b_open_gop
? X264_TYPE_I
: X264_TYPE_IDR
;
1700 if( frm
->i_type
== X264_TYPE_I
&& keyframe_dist
>= h
->param
.i_keyint_min
)
1702 if( h
->param
.b_open_gop
)
1704 last_keyframe
= frm
->i_frame
;
1705 if( h
->param
.b_bluray_compat
)
1709 while( bframes
< j
-1 && IS_X264_TYPE_B( frames
[j
-1-bframes
]->i_type
) )
1711 last_keyframe
-= bframes
;
1714 else if( frm
->i_forced_type
!= X264_TYPE_I
)
1715 frm
->i_type
= X264_TYPE_IDR
;
1717 if( frm
->i_type
== X264_TYPE_IDR
)
1719 last_keyframe
= frm
->i_frame
;
1720 if( j
> 1 && IS_X264_TYPE_B( frames
[j
-1]->i_type
) )
1721 frames
[j
-1]->i_type
= X264_TYPE_P
;
1726 if( b_vbv_lookahead
)
1727 vbv_lookahead( h
, &a
, frames
, num_frames
, keyframe
);
1729 /* Restore frametypes for all frames that haven't actually been decided yet. */
1730 for( int j
= reset_start
; j
<= num_frames
; j
++ )
1731 frames
[j
]->i_type
= frames
[j
]->i_forced_type
;
1734 x264_opencl_slicetype_end( h
);
1738 void x264_slicetype_decide( x264_t
*h
)
1740 x264_frame_t
*frames
[X264_BFRAME_MAX
+2];
1745 if( !h
->lookahead
->next
.i_size
)
1748 int lookahead_size
= h
->lookahead
->next
.i_size
;
1750 for( int i
= 0; i
< h
->lookahead
->next
.i_size
; i
++ )
1752 if( h
->param
.b_vfr_input
)
1754 if( lookahead_size
-- > 1 )
1755 h
->lookahead
->next
.list
[i
]->i_duration
= 2 * (h
->lookahead
->next
.list
[i
+1]->i_pts
- h
->lookahead
->next
.list
[i
]->i_pts
);
1757 h
->lookahead
->next
.list
[i
]->i_duration
= h
->i_prev_duration
;
1760 h
->lookahead
->next
.list
[i
]->i_duration
= delta_tfi_divisor
[h
->lookahead
->next
.list
[i
]->i_pic_struct
];
1761 h
->i_prev_duration
= h
->lookahead
->next
.list
[i
]->i_duration
;
1762 h
->lookahead
->next
.list
[i
]->f_duration
= (double)h
->lookahead
->next
.list
[i
]->i_duration
1763 * h
->sps
->vui
.i_num_units_in_tick
1764 / h
->sps
->vui
.i_time_scale
;
1766 if( h
->lookahead
->next
.list
[i
]->i_frame
> h
->i_disp_fields_last_frame
&& lookahead_size
> 0 )
1768 h
->lookahead
->next
.list
[i
]->i_field_cnt
= h
->i_disp_fields
;
1769 h
->i_disp_fields
+= h
->lookahead
->next
.list
[i
]->i_duration
;
1770 h
->i_disp_fields_last_frame
= h
->lookahead
->next
.list
[i
]->i_frame
;
1772 else if( lookahead_size
== 0 )
1774 h
->lookahead
->next
.list
[i
]->i_field_cnt
= h
->i_disp_fields
;
1775 h
->lookahead
->next
.list
[i
]->i_duration
= h
->i_prev_duration
;
1779 if( h
->param
.rc
.b_stat_read
)
1781 /* Use the frame types from the first pass */
1782 for( int i
= 0; i
< h
->lookahead
->next
.i_size
; i
++ )
1783 h
->lookahead
->next
.list
[i
]->i_type
=
1784 x264_ratecontrol_slice_type( h
, h
->lookahead
->next
.list
[i
]->i_frame
);
1786 else if( (h
->param
.i_bframe
&& h
->param
.i_bframe_adaptive
)
1787 || h
->param
.i_scenecut_threshold
1788 || h
->param
.rc
.b_mb_tree
1789 || (h
->param
.rc
.i_vbv_buffer_size
&& h
->param
.rc
.i_lookahead
) )
1790 x264_slicetype_analyse( h
, 0 );
1792 for( bframes
= 0, brefs
= 0;; bframes
++ )
1794 frm
= h
->lookahead
->next
.list
[bframes
];
1796 if( frm
->i_forced_type
!= X264_TYPE_AUTO
&& frm
->i_type
!= frm
->i_forced_type
&&
1797 !(frm
->i_forced_type
== X264_TYPE_KEYFRAME
&& IS_X264_TYPE_I( frm
->i_type
)) )
1799 x264_log( h
, X264_LOG_WARNING
, "forced frame type (%d) at %d was changed to frame type (%d)\n",
1800 frm
->i_forced_type
, frm
->i_frame
, frm
->i_type
);
1803 if( frm
->i_type
== X264_TYPE_BREF
&& h
->param
.i_bframe_pyramid
< X264_B_PYRAMID_NORMAL
&&
1804 brefs
== h
->param
.i_bframe_pyramid
)
1806 frm
->i_type
= X264_TYPE_B
;
1807 x264_log( h
, X264_LOG_WARNING
, "B-ref at frame %d incompatible with B-pyramid %s \n",
1808 frm
->i_frame
, x264_b_pyramid_names
[h
->param
.i_bframe_pyramid
] );
1810 /* pyramid with multiple B-refs needs a big enough dpb that the preceding P-frame stays available.
1811 smaller dpb could be supported by smart enough use of mmco, but it's easier just to forbid it. */
1812 else if( frm
->i_type
== X264_TYPE_BREF
&& h
->param
.i_bframe_pyramid
== X264_B_PYRAMID_NORMAL
&&
1813 brefs
&& h
->param
.i_frame_reference
<= (brefs
+3) )
1815 frm
->i_type
= X264_TYPE_B
;
1816 x264_log( h
, X264_LOG_WARNING
, "B-ref at frame %d incompatible with B-pyramid %s and %d reference frames\n",
1817 frm
->i_frame
, x264_b_pyramid_names
[h
->param
.i_bframe_pyramid
], h
->param
.i_frame_reference
);
1820 if( frm
->i_type
== X264_TYPE_KEYFRAME
)
1821 frm
->i_type
= h
->param
.b_open_gop
? X264_TYPE_I
: X264_TYPE_IDR
;
1823 /* Limit GOP size */
1824 if( (!h
->param
.b_intra_refresh
|| frm
->i_frame
== 0) && frm
->i_frame
- h
->lookahead
->i_last_keyframe
>= h
->param
.i_keyint_max
)
1826 if( frm
->i_type
== X264_TYPE_AUTO
|| frm
->i_type
== X264_TYPE_I
)
1827 frm
->i_type
= h
->param
.b_open_gop
&& h
->lookahead
->i_last_keyframe
>= 0 ? X264_TYPE_I
: X264_TYPE_IDR
;
1828 int warn
= frm
->i_type
!= X264_TYPE_IDR
;
1829 if( warn
&& h
->param
.b_open_gop
)
1830 warn
&= frm
->i_type
!= X264_TYPE_I
;
1833 x264_log( h
, X264_LOG_WARNING
, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm
->i_type
, frm
->i_frame
);
1834 frm
->i_type
= h
->param
.b_open_gop
&& h
->lookahead
->i_last_keyframe
>= 0 ? X264_TYPE_I
: X264_TYPE_IDR
;
1837 if( frm
->i_type
== X264_TYPE_I
&& frm
->i_frame
- h
->lookahead
->i_last_keyframe
>= h
->param
.i_keyint_min
)
1839 if( h
->param
.b_open_gop
)
1841 h
->lookahead
->i_last_keyframe
= frm
->i_frame
; // Use display order
1842 if( h
->param
.b_bluray_compat
)
1843 h
->lookahead
->i_last_keyframe
-= bframes
; // Use bluray order
1844 frm
->b_keyframe
= 1;
1847 frm
->i_type
= X264_TYPE_IDR
;
1849 if( frm
->i_type
== X264_TYPE_IDR
)
1852 h
->lookahead
->i_last_keyframe
= frm
->i_frame
;
1853 frm
->b_keyframe
= 1;
1857 h
->lookahead
->next
.list
[bframes
]->i_type
= X264_TYPE_P
;
1861 if( bframes
== h
->param
.i_bframe
||
1862 !h
->lookahead
->next
.list
[bframes
+1] )
1864 if( IS_X264_TYPE_B( frm
->i_type
) )
1865 x264_log( h
, X264_LOG_WARNING
, "specified frame type is not compatible with max B-frames\n" );
1866 if( frm
->i_type
== X264_TYPE_AUTO
1867 || IS_X264_TYPE_B( frm
->i_type
) )
1868 frm
->i_type
= X264_TYPE_P
;
1871 if( frm
->i_type
== X264_TYPE_BREF
)
1874 if( frm
->i_type
== X264_TYPE_AUTO
)
1875 frm
->i_type
= X264_TYPE_B
;
1877 else if( !IS_X264_TYPE_B( frm
->i_type
) ) break;
1881 h
->lookahead
->next
.list
[bframes
-1]->b_last_minigop_bframe
= 1;
1882 h
->lookahead
->next
.list
[bframes
]->i_bframes
= bframes
;
1884 /* insert a bref into the sequence */
1885 if( h
->param
.i_bframe_pyramid
&& bframes
> 1 && !brefs
)
1887 h
->lookahead
->next
.list
[(bframes
-1)/2]->i_type
= X264_TYPE_BREF
;
1891 /* calculate the frame costs ahead of time for x264_rc_analyse_slice while we still have lowres */
1892 if( h
->param
.rc
.i_rc_method
!= X264_RC_CQP
)
1894 x264_mb_analysis_t a
;
1896 p1
= b
= bframes
+ 1;
1898 lowres_context_init( h
, &a
);
1900 frames
[0] = h
->lookahead
->last_nonb
;
1901 memcpy( &frames
[1], h
->lookahead
->next
.list
, (bframes
+1) * sizeof(x264_frame_t
*) );
1902 if( IS_X264_TYPE_I( h
->lookahead
->next
.list
[bframes
]->i_type
) )
1907 slicetype_frame_cost( h
, &a
, frames
, p0
, p1
, b
);
1909 if( (p0
!= p1
|| bframes
) && h
->param
.rc
.i_vbv_buffer_size
)
1911 /* We need the intra costs for row SATDs. */
1912 slicetype_frame_cost( h
, &a
, frames
, b
, b
, b
);
1914 /* We need B-frame costs for row SATDs. */
1916 for( b
= 1; b
<= bframes
; b
++ )
1918 if( frames
[b
]->i_type
== X264_TYPE_B
)
1919 for( p1
= b
; frames
[p1
]->i_type
== X264_TYPE_B
; )
1923 slicetype_frame_cost( h
, &a
, frames
, p0
, p1
, b
);
1924 if( frames
[b
]->i_type
== X264_TYPE_BREF
)
1930 /* Analyse for weighted P frames */
1931 if( !h
->param
.rc
.b_stat_read
&& h
->lookahead
->next
.list
[bframes
]->i_type
== X264_TYPE_P
1932 && h
->param
.analyse
.i_weighted_pred
>= X264_WEIGHTP_SIMPLE
)
1935 x264_weights_analyse( h
, h
->lookahead
->next
.list
[bframes
], h
->lookahead
->last_nonb
, 0 );
1938 /* shift sequence to coded order.
1939 use a small temporary list to avoid shifting the entire next buffer around */
1940 int i_coded
= h
->lookahead
->next
.list
[0]->i_frame
;
1943 int idx_list
[] = { brefs
+1, 1 };
1944 for( int i
= 0; i
< bframes
; i
++ )
1946 int idx
= idx_list
[h
->lookahead
->next
.list
[i
]->i_type
== X264_TYPE_BREF
]++;
1947 frames
[idx
] = h
->lookahead
->next
.list
[i
];
1948 frames
[idx
]->i_reordered_pts
= h
->lookahead
->next
.list
[idx
]->i_pts
;
1950 frames
[0] = h
->lookahead
->next
.list
[bframes
];
1951 frames
[0]->i_reordered_pts
= h
->lookahead
->next
.list
[0]->i_pts
;
1952 memcpy( h
->lookahead
->next
.list
, frames
, (bframes
+1) * sizeof(x264_frame_t
*) );
1955 for( int i
= 0; i
<= bframes
; i
++ )
1957 h
->lookahead
->next
.list
[i
]->i_coded
= i_coded
++;
1960 calculate_durations( h
, h
->lookahead
->next
.list
[i
], h
->lookahead
->next
.list
[i
-1], &h
->i_cpb_delay
, &h
->i_coded_fields
);
1961 h
->lookahead
->next
.list
[0]->f_planned_cpb_duration
[i
-1] = (double)h
->lookahead
->next
.list
[i
]->i_cpb_duration
*
1962 h
->sps
->vui
.i_num_units_in_tick
/ h
->sps
->vui
.i_time_scale
;
1965 calculate_durations( h
, h
->lookahead
->next
.list
[i
], NULL
, &h
->i_cpb_delay
, &h
->i_coded_fields
);
1969 int x264_rc_analyse_slice( x264_t
*h
)
1975 if( IS_X264_TYPE_I(h
->fenc
->i_type
) )
1977 else if( h
->fenc
->i_type
== X264_TYPE_P
)
1978 p1
= b
= h
->fenc
->i_bframes
+ 1;
1981 p1
= (h
->fref_nearest
[1]->i_poc
- h
->fref_nearest
[0]->i_poc
)/2;
1982 b
= (h
->fenc
->i_poc
- h
->fref_nearest
[0]->i_poc
)/2;
1984 /* We don't need to assign p0/p1 since we are not performing any real analysis here. */
1985 x264_frame_t
**frames
= &h
->fenc
- b
;
1987 /* cost should have been already calculated by x264_slicetype_decide */
1988 cost
= frames
[b
]->i_cost_est
[b
-p0
][p1
-b
];
1989 assert( cost
>= 0 );
1991 if( h
->param
.rc
.b_mb_tree
&& !h
->param
.rc
.b_stat_read
)
1993 cost
= slicetype_frame_cost_recalculate( h
, frames
, p0
, p1
, b
);
1994 if( b
&& h
->param
.rc
.i_vbv_buffer_size
)
1995 slicetype_frame_cost_recalculate( h
, frames
, b
, b
, b
);
1997 /* In AQ, use the weighted score instead. */
1998 else if( h
->param
.rc
.i_aq_mode
)
1999 cost
= frames
[b
]->i_cost_est_aq
[b
-p0
][p1
-b
];
2001 h
->fenc
->i_row_satd
= h
->fenc
->i_row_satds
[b
-p0
][p1
-b
];
2002 h
->fdec
->i_row_satd
= h
->fdec
->i_row_satds
[b
-p0
][p1
-b
];
2003 h
->fdec
->i_satd
= cost
;
2004 memcpy( h
->fdec
->i_row_satd
, h
->fenc
->i_row_satd
, h
->mb
.i_mb_height
* sizeof(int) );
2005 if( !IS_X264_TYPE_I(h
->fenc
->i_type
) )
2006 memcpy( h
->fdec
->i_row_satds
[0][0], h
->fenc
->i_row_satds
[0][0], h
->mb
.i_mb_height
* sizeof(int) );
2008 if( h
->param
.b_intra_refresh
&& h
->param
.rc
.i_vbv_buffer_size
&& h
->fenc
->i_type
== X264_TYPE_P
)
2010 int ip_factor
= 256 * h
->param
.rc
.f_ip_factor
; /* fix8 */
2011 for( int y
= 0; y
< h
->mb
.i_mb_height
; y
++ )
2013 int mb_xy
= y
* h
->mb
.i_mb_stride
+ h
->fdec
->i_pir_start_col
;
2014 for( int x
= h
->fdec
->i_pir_start_col
; x
<= h
->fdec
->i_pir_end_col
; x
++, mb_xy
++ )
2016 int intra_cost
= (h
->fenc
->i_intra_cost
[mb_xy
] * ip_factor
+ 128) >> 8;
2017 int inter_cost
= h
->fenc
->lowres_costs
[b
-p0
][p1
-b
][mb_xy
] & LOWRES_COST_MASK
;
2018 int diff
= intra_cost
- inter_cost
;
2019 if( h
->param
.rc
.i_aq_mode
)
2020 h
->fdec
->i_row_satd
[y
] += (diff
* frames
[b
]->i_inv_qscale_factor
[mb_xy
] + 128) >> 8;
2022 h
->fdec
->i_row_satd
[y
] += diff
;