2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 * This is an example demonstrating how to implement a multi-layer
13 * VP9 encoding scheme based on spatial scalability for video applications
14 * that benefit from a scalable bitstream.
25 #include "../tools_common.h"
26 #include "../video_writer.h"
28 #include "../vpx_ports/vpx_timer.h"
29 #include "vpx/svc_context.h"
30 #include "vpx/vp8cx.h"
31 #include "vpx/vpx_encoder.h"
32 #include "../vpxstats.h"
33 #include "vp9/encoder/vp9_encoder.h"
34 #define OUTPUT_RC_STATS 1
36 static const arg_def_t skip_frames_arg
=
37 ARG_DEF("s", "skip-frames", 1, "input frames to skip");
38 static const arg_def_t frames_arg
=
39 ARG_DEF("f", "frames", 1, "number of frames to encode");
40 static const arg_def_t threads_arg
=
41 ARG_DEF("th", "threads", 1, "number of threads to use");
43 static const arg_def_t output_rc_stats_arg
=
44 ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
46 static const arg_def_t width_arg
= ARG_DEF("w", "width", 1, "source width");
47 static const arg_def_t height_arg
= ARG_DEF("h", "height", 1, "source height");
48 static const arg_def_t timebase_arg
=
49 ARG_DEF("t", "timebase", 1, "timebase (num/den)");
50 static const arg_def_t bitrate_arg
= ARG_DEF(
51 "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
52 static const arg_def_t spatial_layers_arg
=
53 ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
54 static const arg_def_t temporal_layers_arg
=
55 ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
56 static const arg_def_t temporal_layering_mode_arg
=
57 ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
58 "VP9E_TEMPORAL_LAYERING_MODE");
59 static const arg_def_t kf_dist_arg
=
60 ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
61 static const arg_def_t scale_factors_arg
=
62 ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
63 static const arg_def_t passes_arg
=
64 ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
65 static const arg_def_t pass_arg
=
66 ARG_DEF(NULL
, "pass", 1, "Pass to execute (1/2)");
67 static const arg_def_t fpf_name_arg
=
68 ARG_DEF(NULL
, "fpf", 1, "First pass statistics file name");
69 static const arg_def_t min_q_arg
=
70 ARG_DEF(NULL
, "min-q", 1, "Minimum quantizer");
71 static const arg_def_t max_q_arg
=
72 ARG_DEF(NULL
, "max-q", 1, "Maximum quantizer");
73 static const arg_def_t min_bitrate_arg
=
74 ARG_DEF(NULL
, "min-bitrate", 1, "Minimum bitrate");
75 static const arg_def_t max_bitrate_arg
=
76 ARG_DEF(NULL
, "max-bitrate", 1, "Maximum bitrate");
77 static const arg_def_t lag_in_frame_arg
=
78 ARG_DEF(NULL
, "lag-in-frames", 1, "Number of frame to input before "
79 "generating any outputs");
80 static const arg_def_t rc_end_usage_arg
=
81 ARG_DEF(NULL
, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
82 static const arg_def_t speed_arg
=
83 ARG_DEF("sp", "speed", 1, "speed configuration");
84 static const arg_def_t aqmode_arg
=
85 ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
87 #if CONFIG_VP9_HIGHBITDEPTH
88 static const struct arg_enum_list bitdepth_enum
[] = {
95 static const arg_def_t bitdepth_arg
=
96 ARG_DEF_ENUM("d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ",
98 #endif // CONFIG_VP9_HIGHBITDEPTH
101 static const arg_def_t
*svc_args
[] = {
102 &frames_arg
, &width_arg
, &height_arg
,
103 &timebase_arg
, &bitrate_arg
, &skip_frames_arg
, &spatial_layers_arg
,
104 &kf_dist_arg
, &scale_factors_arg
, &passes_arg
, &pass_arg
,
105 &fpf_name_arg
, &min_q_arg
, &max_q_arg
, &min_bitrate_arg
,
106 &max_bitrate_arg
, &temporal_layers_arg
, &temporal_layering_mode_arg
,
107 &lag_in_frame_arg
, &threads_arg
, &aqmode_arg
,
109 &output_rc_stats_arg
,
112 #if CONFIG_VP9_HIGHBITDEPTH
116 &rc_end_usage_arg
, NULL
119 static const uint32_t default_frames_to_skip
= 0;
120 static const uint32_t default_frames_to_code
= 60 * 60;
121 static const uint32_t default_width
= 1920;
122 static const uint32_t default_height
= 1080;
123 static const uint32_t default_timebase_num
= 1;
124 static const uint32_t default_timebase_den
= 60;
125 static const uint32_t default_bitrate
= 1000;
126 static const uint32_t default_spatial_layers
= 5;
127 static const uint32_t default_temporal_layers
= 1;
128 static const uint32_t default_kf_dist
= 100;
129 static const uint32_t default_temporal_layering_mode
= 0;
130 static const uint32_t default_output_rc_stats
= 0;
131 static const int32_t default_speed
= -1; // -1 means use library default.
132 static const uint32_t default_threads
= 0; // zero means use library default.
135 const char *input_filename
;
136 const char *output_filename
;
137 uint32_t frames_to_code
;
138 uint32_t frames_to_skip
;
139 struct VpxInputContext input_ctx
;
145 static const char *exec_name
;
147 void usage_exit(void) {
148 fprintf(stderr
, "Usage: %s <options> input_filename output_filename\n",
150 fprintf(stderr
, "Options:\n");
151 arg_show_usage(stderr
, svc_args
);
155 static void parse_command_line(int argc
, const char **argv_
,
156 AppInput
*app_input
, SvcContext
*svc_ctx
,
157 vpx_codec_enc_cfg_t
*enc_cfg
) {
158 struct arg arg
= {0};
165 const char *fpf_file_name
= NULL
;
166 unsigned int min_bitrate
= 0;
167 unsigned int max_bitrate
= 0;
168 char string_options
[1024] = {0};
170 // initialize SvcContext with parameters that will be passed to vpx_svc_init
171 svc_ctx
->log_level
= SVC_LOG_DEBUG
;
172 svc_ctx
->spatial_layers
= default_spatial_layers
;
173 svc_ctx
->temporal_layers
= default_temporal_layers
;
174 svc_ctx
->temporal_layering_mode
= default_temporal_layering_mode
;
176 svc_ctx
->output_rc_stat
= default_output_rc_stats
;
178 svc_ctx
->speed
= default_speed
;
179 svc_ctx
->threads
= default_threads
;
181 // start with default encoder configuration
182 res
= vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg
, 0);
184 die("Failed to get config: %s\n", vpx_codec_err_to_string(res
));
186 // update enc_cfg with app default values
187 enc_cfg
->g_w
= default_width
;
188 enc_cfg
->g_h
= default_height
;
189 enc_cfg
->g_timebase
.num
= default_timebase_num
;
190 enc_cfg
->g_timebase
.den
= default_timebase_den
;
191 enc_cfg
->rc_target_bitrate
= default_bitrate
;
192 enc_cfg
->kf_min_dist
= default_kf_dist
;
193 enc_cfg
->kf_max_dist
= default_kf_dist
;
194 enc_cfg
->rc_end_usage
= VPX_CQ
;
196 // initialize AppInput with default values
197 app_input
->frames_to_code
= default_frames_to_code
;
198 app_input
->frames_to_skip
= default_frames_to_skip
;
200 // process command line options
201 argv
= argv_dup(argc
- 1, argv_
+ 1);
202 for (argi
= argj
= argv
; (*argj
= *argi
); argi
+= arg
.argv_step
) {
205 if (arg_match(&arg
, &frames_arg
, argi
)) {
206 app_input
->frames_to_code
= arg_parse_uint(&arg
);
207 } else if (arg_match(&arg
, &width_arg
, argi
)) {
208 enc_cfg
->g_w
= arg_parse_uint(&arg
);
209 } else if (arg_match(&arg
, &height_arg
, argi
)) {
210 enc_cfg
->g_h
= arg_parse_uint(&arg
);
211 } else if (arg_match(&arg
, &timebase_arg
, argi
)) {
212 enc_cfg
->g_timebase
= arg_parse_rational(&arg
);
213 } else if (arg_match(&arg
, &bitrate_arg
, argi
)) {
214 enc_cfg
->rc_target_bitrate
= arg_parse_uint(&arg
);
215 } else if (arg_match(&arg
, &skip_frames_arg
, argi
)) {
216 app_input
->frames_to_skip
= arg_parse_uint(&arg
);
217 } else if (arg_match(&arg
, &spatial_layers_arg
, argi
)) {
218 svc_ctx
->spatial_layers
= arg_parse_uint(&arg
);
219 } else if (arg_match(&arg
, &temporal_layers_arg
, argi
)) {
220 svc_ctx
->temporal_layers
= arg_parse_uint(&arg
);
222 } else if (arg_match(&arg
, &output_rc_stats_arg
, argi
)) {
223 svc_ctx
->output_rc_stat
= arg_parse_uint(&arg
);
225 } else if (arg_match(&arg
, &speed_arg
, argi
)) {
226 svc_ctx
->speed
= arg_parse_uint(&arg
);
227 } else if (arg_match(&arg
, &aqmode_arg
, argi
)) {
228 svc_ctx
->aqmode
= arg_parse_uint(&arg
);
229 } else if (arg_match(&arg
, &threads_arg
, argi
)) {
230 svc_ctx
->threads
= arg_parse_uint(&arg
);
231 } else if (arg_match(&arg
, &temporal_layering_mode_arg
, argi
)) {
232 svc_ctx
->temporal_layering_mode
=
233 enc_cfg
->temporal_layering_mode
= arg_parse_int(&arg
);
234 if (svc_ctx
->temporal_layering_mode
) {
235 enc_cfg
->g_error_resilient
= 1;
237 } else if (arg_match(&arg
, &kf_dist_arg
, argi
)) {
238 enc_cfg
->kf_min_dist
= arg_parse_uint(&arg
);
239 enc_cfg
->kf_max_dist
= enc_cfg
->kf_min_dist
;
240 } else if (arg_match(&arg
, &scale_factors_arg
, argi
)) {
241 snprintf(string_options
, sizeof(string_options
), "%s scale-factors=%s",
242 string_options
, arg
.val
);
243 } else if (arg_match(&arg
, &passes_arg
, argi
)) {
244 passes
= arg_parse_uint(&arg
);
245 if (passes
< 1 || passes
> 2) {
246 die("Error: Invalid number of passes (%d)\n", passes
);
248 } else if (arg_match(&arg
, &pass_arg
, argi
)) {
249 pass
= arg_parse_uint(&arg
);
250 if (pass
< 1 || pass
> 2) {
251 die("Error: Invalid pass selected (%d)\n", pass
);
253 } else if (arg_match(&arg
, &fpf_name_arg
, argi
)) {
254 fpf_file_name
= arg
.val
;
255 } else if (arg_match(&arg
, &min_q_arg
, argi
)) {
256 snprintf(string_options
, sizeof(string_options
), "%s min-quantizers=%s",
257 string_options
, arg
.val
);
258 } else if (arg_match(&arg
, &max_q_arg
, argi
)) {
259 snprintf(string_options
, sizeof(string_options
), "%s max-quantizers=%s",
260 string_options
, arg
.val
);
261 } else if (arg_match(&arg
, &min_bitrate_arg
, argi
)) {
262 min_bitrate
= arg_parse_uint(&arg
);
263 } else if (arg_match(&arg
, &max_bitrate_arg
, argi
)) {
264 max_bitrate
= arg_parse_uint(&arg
);
265 } else if (arg_match(&arg
, &lag_in_frame_arg
, argi
)) {
266 enc_cfg
->g_lag_in_frames
= arg_parse_uint(&arg
);
267 } else if (arg_match(&arg
, &rc_end_usage_arg
, argi
)) {
268 enc_cfg
->rc_end_usage
= arg_parse_uint(&arg
);
269 #if CONFIG_VP9_HIGHBITDEPTH
270 } else if (arg_match(&arg
, &bitdepth_arg
, argi
)) {
271 enc_cfg
->g_bit_depth
= arg_parse_enum_or_int(&arg
);
272 switch (enc_cfg
->g_bit_depth
) {
274 enc_cfg
->g_input_bit_depth
= 8;
275 enc_cfg
->g_profile
= 0;
278 enc_cfg
->g_input_bit_depth
= 10;
279 enc_cfg
->g_profile
= 2;
282 enc_cfg
->g_input_bit_depth
= 12;
283 enc_cfg
->g_profile
= 2;
286 die("Error: Invalid bit depth selected (%d)\n", enc_cfg
->g_bit_depth
);
289 #endif // CONFIG_VP9_HIGHBITDEPTH
295 // There will be a space in front of the string options
296 if (strlen(string_options
) > 0)
297 vpx_svc_set_options(svc_ctx
, string_options
+ 1);
299 if (passes
== 0 || passes
== 1) {
301 fprintf(stderr
, "pass is ignored since there's only one pass\n");
303 enc_cfg
->g_pass
= VPX_RC_ONE_PASS
;
306 die("pass must be specified when passes is 2\n");
309 if (fpf_file_name
== NULL
) {
310 die("fpf must be specified when passes is 2\n");
314 enc_cfg
->g_pass
= VPX_RC_FIRST_PASS
;
315 if (!stats_open_file(&app_input
->rc_stats
, fpf_file_name
, 0)) {
316 fatal("Failed to open statistics store");
319 enc_cfg
->g_pass
= VPX_RC_LAST_PASS
;
320 if (!stats_open_file(&app_input
->rc_stats
, fpf_file_name
, 1)) {
321 fatal("Failed to open statistics store");
323 enc_cfg
->rc_twopass_stats_in
= stats_get(&app_input
->rc_stats
);
325 app_input
->passes
= passes
;
326 app_input
->pass
= pass
;
329 if (enc_cfg
->rc_target_bitrate
> 0) {
330 if (min_bitrate
> 0) {
331 enc_cfg
->rc_2pass_vbr_minsection_pct
=
332 min_bitrate
* 100 / enc_cfg
->rc_target_bitrate
;
334 if (max_bitrate
> 0) {
335 enc_cfg
->rc_2pass_vbr_maxsection_pct
=
336 max_bitrate
* 100 / enc_cfg
->rc_target_bitrate
;
340 // Check for unrecognized options
341 for (argi
= argv
; *argi
; ++argi
)
342 if (argi
[0][0] == '-' && strlen(argi
[0]) > 1)
343 die("Error: Unrecognized option %s\n", *argi
);
345 if (argv
[0] == NULL
|| argv
[1] == 0) {
348 app_input
->input_filename
= argv
[0];
349 app_input
->output_filename
= argv
[1];
352 if (enc_cfg
->g_w
< 16 || enc_cfg
->g_w
% 2 || enc_cfg
->g_h
< 16 ||
354 die("Invalid resolution: %d x %d\n", enc_cfg
->g_w
, enc_cfg
->g_h
);
357 "Codec %s\nframes: %d, skip: %d\n"
359 "width %d, height: %d,\n"
360 "num: %d, den: %d, bitrate: %d,\n"
362 vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input
->frames_to_code
,
363 app_input
->frames_to_skip
,
364 svc_ctx
->spatial_layers
, enc_cfg
->g_w
, enc_cfg
->g_h
,
365 enc_cfg
->g_timebase
.num
, enc_cfg
->g_timebase
.den
,
366 enc_cfg
->rc_target_bitrate
, enc_cfg
->kf_max_dist
);
370 // For rate control encoding stats.
371 struct RateControlStats
{
372 // Number of input frames per layer.
373 int layer_input_frames
[VPX_MAX_LAYERS
];
374 // Total (cumulative) number of encoded frames per layer.
375 int layer_tot_enc_frames
[VPX_MAX_LAYERS
];
376 // Number of encoded non-key frames per layer.
377 int layer_enc_frames
[VPX_MAX_LAYERS
];
378 // Framerate per layer (cumulative).
379 double layer_framerate
[VPX_MAX_LAYERS
];
380 // Target average frame size per layer (per-frame-bandwidth per layer).
381 double layer_pfb
[VPX_MAX_LAYERS
];
382 // Actual average frame size per layer.
383 double layer_avg_frame_size
[VPX_MAX_LAYERS
];
384 // Average rate mismatch per layer (|target - actual| / target).
385 double layer_avg_rate_mismatch
[VPX_MAX_LAYERS
];
386 // Actual encoding bitrate per layer (cumulative).
387 double layer_encoding_bitrate
[VPX_MAX_LAYERS
];
388 // Average of the short-time encoder actual bitrate.
389 // TODO(marpan): Should we add these short-time stats for each layer?
390 double avg_st_encoding_bitrate
;
391 // Variance of the short-time encoder actual bitrate.
392 double variance_st_encoding_bitrate
;
393 // Window (number of frames) for computing short-time encoding bitrate.
395 // Number of window measurements.
399 // Note: these rate control stats assume only 1 key frame in the
400 // sequence (i.e., first frame only).
401 static void set_rate_control_stats(struct RateControlStats
*rc
,
402 vpx_codec_enc_cfg_t
*cfg
) {
404 // Set the layer (cumulative) framerate and the target layer (non-cumulative)
405 // per-frame-bandwidth, for the rate control encoding stats below.
406 const double framerate
= cfg
->g_timebase
.den
/ cfg
->g_timebase
.num
;
408 for (sl
= 0; sl
< cfg
->ss_number_layers
; ++sl
) {
409 for (tl
= 0; tl
< cfg
->ts_number_layers
; ++tl
) {
410 const int layer
= sl
* cfg
->ts_number_layers
+ tl
;
411 const int tlayer0
= sl
* cfg
->ts_number_layers
;
412 if (cfg
->ts_number_layers
== 1)
413 rc
->layer_framerate
[layer
] = framerate
;
415 rc
->layer_framerate
[layer
] =
416 framerate
/ cfg
->ts_rate_decimator
[tl
];
418 rc
->layer_pfb
[layer
] = 1000.0 *
419 (cfg
->layer_target_bitrate
[layer
] -
420 cfg
->layer_target_bitrate
[layer
- 1]) /
421 (rc
->layer_framerate
[layer
] -
422 rc
->layer_framerate
[layer
- 1]);
424 rc
->layer_pfb
[tlayer0
] = 1000.0 *
425 cfg
->layer_target_bitrate
[tlayer0
] /
426 rc
->layer_framerate
[tlayer0
];
428 rc
->layer_input_frames
[layer
] = 0;
429 rc
->layer_enc_frames
[layer
] = 0;
430 rc
->layer_tot_enc_frames
[layer
] = 0;
431 rc
->layer_encoding_bitrate
[layer
] = 0.0;
432 rc
->layer_avg_frame_size
[layer
] = 0.0;
433 rc
->layer_avg_rate_mismatch
[layer
] = 0.0;
436 rc
->window_count
= 0;
437 rc
->window_size
= 15;
438 rc
->avg_st_encoding_bitrate
= 0.0;
439 rc
->variance_st_encoding_bitrate
= 0.0;
442 static void printout_rate_control_summary(struct RateControlStats
*rc
,
443 vpx_codec_enc_cfg_t
*cfg
,
446 int tot_num_frames
= 0;
447 double perc_fluctuation
= 0.0;
448 printf("Total number of processed frames: %d\n\n", frame_cnt
- 1);
449 printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
450 cfg
->ss_number_layers
, cfg
->ts_number_layers
);
451 for (sl
= 0; sl
< cfg
->ss_number_layers
; ++sl
) {
452 for (tl
= 0; tl
< cfg
->ts_number_layers
; ++tl
) {
453 const int layer
= sl
* cfg
->ts_number_layers
+ tl
;
454 const int num_dropped
= (tl
> 0) ?
455 (rc
->layer_input_frames
[layer
] - rc
->layer_enc_frames
[layer
]) :
456 (rc
->layer_input_frames
[layer
] - rc
->layer_enc_frames
[layer
] - 1);
458 tot_num_frames
+= rc
->layer_input_frames
[layer
];
459 rc
->layer_encoding_bitrate
[layer
] = 0.001 * rc
->layer_framerate
[layer
] *
460 rc
->layer_encoding_bitrate
[layer
] / tot_num_frames
;
461 rc
->layer_avg_frame_size
[layer
] = rc
->layer_avg_frame_size
[layer
] /
462 rc
->layer_enc_frames
[layer
];
463 rc
->layer_avg_rate_mismatch
[layer
] =
464 100.0 * rc
->layer_avg_rate_mismatch
[layer
] /
465 rc
->layer_enc_frames
[layer
];
466 printf("For layer#: sl%d tl%d \n", sl
, tl
);
467 printf("Bitrate (target vs actual): %d %f.0 kbps\n",
468 cfg
->layer_target_bitrate
[layer
],
469 rc
->layer_encoding_bitrate
[layer
]);
470 printf("Average frame size (target vs actual): %f %f bits\n",
471 rc
->layer_pfb
[layer
], rc
->layer_avg_frame_size
[layer
]);
472 printf("Average rate_mismatch: %f\n",
473 rc
->layer_avg_rate_mismatch
[layer
]);
474 printf("Number of input frames, encoded (non-key) frames, "
475 "and percent dropped frames: %d %d %f.0 \n",
476 rc
->layer_input_frames
[layer
], rc
->layer_enc_frames
[layer
],
477 100.0 * num_dropped
/ rc
->layer_input_frames
[layer
]);
481 rc
->avg_st_encoding_bitrate
= rc
->avg_st_encoding_bitrate
/ rc
->window_count
;
482 rc
->variance_st_encoding_bitrate
=
483 rc
->variance_st_encoding_bitrate
/ rc
->window_count
-
484 (rc
->avg_st_encoding_bitrate
* rc
->avg_st_encoding_bitrate
);
485 perc_fluctuation
= 100.0 * sqrt(rc
->variance_st_encoding_bitrate
) /
486 rc
->avg_st_encoding_bitrate
;
487 printf("Short-time stats, for window of %d frames: \n", rc
->window_size
);
488 printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
489 rc
->avg_st_encoding_bitrate
,
490 sqrt(rc
->variance_st_encoding_bitrate
),
492 if (frame_cnt
!= tot_num_frames
)
493 die("Error: Number of input frames not equal to output encoded frames != "
494 "%d tot_num_frames = %d\n", frame_cnt
, tot_num_frames
);
497 vpx_codec_err_t
parse_superframe_index(const uint8_t *data
,
499 uint32_t sizes
[8], int *count
) {
500 // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
501 // it is a super frame index. If the last byte of real video compression
502 // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
503 // not the associated matching marker byte at the front of the index we have
504 // an invalid bitstream and need to return an error.
508 marker
= *(data
+ data_sz
- 1);
512 if ((marker
& 0xe0) == 0xc0) {
513 const uint32_t frames
= (marker
& 0x7) + 1;
514 const uint32_t mag
= ((marker
>> 3) & 0x3) + 1;
515 const size_t index_sz
= 2 + mag
* frames
;
517 // This chunk is marked as having a superframe index but doesn't have
518 // enough data for it, thus it's an invalid superframe index.
519 if (data_sz
< index_sz
)
520 return VPX_CODEC_CORRUPT_FRAME
;
523 const uint8_t marker2
= *(data
+ data_sz
- index_sz
);
525 // This chunk is marked as having a superframe index but doesn't have
526 // the matching marker byte at the front of the index therefore it's an
528 if (marker
!= marker2
)
529 return VPX_CODEC_CORRUPT_FRAME
;
533 // Found a valid superframe index.
535 const uint8_t *x
= &data
[data_sz
- index_sz
+ 1];
537 for (i
= 0; i
< frames
; ++i
) {
538 uint32_t this_sz
= 0;
540 for (j
= 0; j
< mag
; ++j
)
541 this_sz
|= (*x
++) << (j
* 8);
551 // Example pattern for spatial layers and 2 temporal layers used in the
552 // bypass/flexible mode. The pattern corresponds to the pattern
553 // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
554 // non-flexible mode.
555 void set_frame_flags_bypass_mode(int sl
, int tl
, int num_spatial_layers
,
557 vpx_svc_ref_frame_config_t
*ref_frame_config
) {
558 for (sl
= 0; sl
< num_spatial_layers
; ++sl
) {
561 ref_frame_config
->frame_flags
[sl
] = VP8_EFLAG_NO_REF_GF
|
562 VP8_EFLAG_NO_REF_ARF
|
563 VP8_EFLAG_NO_UPD_GF
|
564 VP8_EFLAG_NO_UPD_ARF
;
567 ref_frame_config
->frame_flags
[sl
] = VP8_EFLAG_NO_REF_LAST
|
568 VP8_EFLAG_NO_REF_ARF
|
569 VP8_EFLAG_NO_UPD_GF
|
570 VP8_EFLAG_NO_UPD_ARF
;
572 ref_frame_config
->frame_flags
[sl
] = VP8_EFLAG_NO_REF_ARF
|
573 VP8_EFLAG_NO_UPD_GF
|
574 VP8_EFLAG_NO_UPD_ARF
;
577 } else if (tl
== 1) {
579 ref_frame_config
->frame_flags
[sl
] = VP8_EFLAG_NO_REF_GF
|
580 VP8_EFLAG_NO_REF_ARF
|
581 VP8_EFLAG_NO_UPD_LAST
|
584 ref_frame_config
->frame_flags
[sl
] = VP8_EFLAG_NO_REF_ARF
|
585 VP8_EFLAG_NO_UPD_LAST
|
590 ref_frame_config
->lst_fb_idx
[sl
] = sl
;
592 ref_frame_config
->gld_fb_idx
[sl
] = sl
- 1;
594 ref_frame_config
->gld_fb_idx
[sl
] = 0;
595 ref_frame_config
->alt_fb_idx
[sl
] = 0;
596 } else if (tl
== 1) {
597 ref_frame_config
->lst_fb_idx
[sl
] = sl
;
598 ref_frame_config
->gld_fb_idx
[sl
] = num_spatial_layers
+ sl
- 1;
599 ref_frame_config
->alt_fb_idx
[sl
] = num_spatial_layers
+ sl
;
604 int main(int argc
, const char **argv
) {
605 AppInput app_input
= {0};
606 VpxVideoWriter
*writer
= NULL
;
607 VpxVideoInfo info
= {0};
608 vpx_codec_ctx_t codec
;
609 vpx_codec_enc_cfg_t enc_cfg
;
612 uint32_t frame_cnt
= 0;
615 int pts
= 0; /* PTS starts at 0 */
616 int frame_duration
= 1; /* 1 timebase tick per frame */
618 int end_of_stream
= 0;
619 int frames_received
= 0;
621 VpxVideoWriter
*outfile
[VPX_TS_MAX_LAYERS
] = {NULL
};
622 struct RateControlStats rc
;
623 vpx_svc_layer_id_t layer_id
;
624 vpx_svc_ref_frame_config_t ref_frame_config
;
626 double sum_bitrate
= 0.0;
627 double sum_bitrate2
= 0.0;
628 double framerate
= 30.0;
630 struct vpx_usec_timer timer
;
632 memset(&svc_ctx
, 0, sizeof(svc_ctx
));
633 svc_ctx
.log_print
= 1;
635 parse_command_line(argc
, argv
, &app_input
, &svc_ctx
, &enc_cfg
);
637 // Allocate image buffer
638 #if CONFIG_VP9_HIGHBITDEPTH
639 if (!vpx_img_alloc(&raw
, enc_cfg
.g_input_bit_depth
== 8 ?
640 VPX_IMG_FMT_I420
: VPX_IMG_FMT_I42016
,
641 enc_cfg
.g_w
, enc_cfg
.g_h
, 32)) {
642 die("Failed to allocate image %dx%d\n", enc_cfg
.g_w
, enc_cfg
.g_h
);
645 if (!vpx_img_alloc(&raw
, VPX_IMG_FMT_I420
, enc_cfg
.g_w
, enc_cfg
.g_h
, 32)) {
646 die("Failed to allocate image %dx%d\n", enc_cfg
.g_w
, enc_cfg
.g_h
);
648 #endif // CONFIG_VP9_HIGHBITDEPTH
650 if (!(infile
= fopen(app_input
.input_filename
, "rb")))
651 die("Failed to open %s for reading\n", app_input
.input_filename
);
654 if (vpx_svc_init(&svc_ctx
, &codec
, vpx_codec_vp9_cx(), &enc_cfg
) !=
656 die("Failed to initialize encoder\n");
659 if (svc_ctx
.output_rc_stat
) {
660 set_rate_control_stats(&rc
, &enc_cfg
);
661 framerate
= enc_cfg
.g_timebase
.den
/ enc_cfg
.g_timebase
.num
;
665 info
.codec_fourcc
= VP9_FOURCC
;
666 info
.time_base
.numerator
= enc_cfg
.g_timebase
.num
;
667 info
.time_base
.denominator
= enc_cfg
.g_timebase
.den
;
669 if (!(app_input
.passes
== 2 && app_input
.pass
== 1)) {
670 // We don't save the bitstream for the 1st pass on two pass rate control
671 writer
= vpx_video_writer_open(app_input
.output_filename
, kContainerIVF
,
674 die("Failed to open %s for writing\n", app_input
.output_filename
);
677 // For now, just write temporal layer streams.
678 // TODO(wonkap): do spatial by re-writing superframe.
679 if (svc_ctx
.output_rc_stat
) {
680 for (tl
= 0; tl
< enc_cfg
.ts_number_layers
; ++tl
) {
681 char file_name
[PATH_MAX
];
683 snprintf(file_name
, sizeof(file_name
), "%s_t%d.ivf",
684 app_input
.output_filename
, tl
);
685 outfile
[tl
] = vpx_video_writer_open(file_name
, kContainerIVF
, &info
);
687 die("Failed to open %s for writing", file_name
);
692 // skip initial frames
693 for (i
= 0; i
< app_input
.frames_to_skip
; ++i
)
694 vpx_img_read(&raw
, infile
);
696 if (svc_ctx
.speed
!= -1)
697 vpx_codec_control(&codec
, VP8E_SET_CPUUSED
, svc_ctx
.speed
);
699 vpx_codec_control(&codec
, VP9E_SET_TILE_COLUMNS
, (svc_ctx
.threads
>> 1));
700 if (svc_ctx
.speed
>= 5 && svc_ctx
.aqmode
== 1)
701 vpx_codec_control(&codec
, VP9E_SET_AQ_MODE
, 3);
705 while (!end_of_stream
) {
706 vpx_codec_iter_t iter
= NULL
;
707 const vpx_codec_cx_pkt_t
*cx_pkt
;
708 if (frame_cnt
>= app_input
.frames_to_code
|| !vpx_img_read(&raw
, infile
)) {
709 // We need one extra vpx_svc_encode call at end of stream to flush
710 // encoder and get remaining data
714 // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates)
715 // and the buffer indices for each spatial layer of the current
716 // (super)frame to be encoded. The temporal layer_id for the current frame
717 // also needs to be set.
718 // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
719 // mode to "VP9E_LAYERING_MODE_BYPASS".
720 if (svc_ctx
.temporal_layering_mode
== VP9E_TEMPORAL_LAYERING_MODE_BYPASS
) {
721 layer_id
.spatial_layer_id
= 0;
722 // Example for 2 temporal layers.
723 if (frame_cnt
% 2 == 0)
724 layer_id
.temporal_layer_id
= 0;
726 layer_id
.temporal_layer_id
= 1;
727 // Note that we only set the temporal layer_id, since we are calling
728 // the encode for the whole superframe. The encoder will internally loop
729 // over all the spatial layers for the current superframe.
730 vpx_codec_control(&codec
, VP9E_SET_SVC_LAYER_ID
, &layer_id
);
731 set_frame_flags_bypass_mode(sl
, layer_id
.temporal_layer_id
,
732 svc_ctx
.spatial_layers
,
735 vpx_codec_control(&codec
, VP9E_SET_SVC_REF_FRAME_CONFIG
,
737 // Keep track of input frames, to account for frame drops in rate control
739 for (sl
= 0; sl
< enc_cfg
.ss_number_layers
; ++sl
) {
740 ++rc
.layer_input_frames
[sl
* enc_cfg
.ts_number_layers
+
741 layer_id
.temporal_layer_id
];
745 vpx_usec_timer_start(&timer
);
746 res
= vpx_svc_encode(&svc_ctx
, &codec
, (end_of_stream
? NULL
: &raw
),
747 pts
, frame_duration
, svc_ctx
.speed
>= 5 ?
748 VPX_DL_REALTIME
: VPX_DL_GOOD_QUALITY
);
749 vpx_usec_timer_mark(&timer
);
750 cx_time
+= vpx_usec_timer_elapsed(&timer
);
752 printf("%s", vpx_svc_get_message(&svc_ctx
));
754 if (res
!= VPX_CODEC_OK
) {
755 die_codec(&codec
, "Failed to encode frame");
758 while ((cx_pkt
= vpx_codec_get_cx_data(&codec
, &iter
)) != NULL
) {
759 switch (cx_pkt
->kind
) {
760 case VPX_CODEC_CX_FRAME_PKT
: {
761 SvcInternal_t
*const si
= (SvcInternal_t
*)svc_ctx
.internal
;
762 if (cx_pkt
->data
.frame
.sz
> 0) {
767 vpx_video_writer_write_frame(writer
,
768 cx_pkt
->data
.frame
.buf
,
769 cx_pkt
->data
.frame
.sz
,
770 cx_pkt
->data
.frame
.pts
);
772 // TODO(marpan/wonkap): Put this (to line728) in separate function.
773 if (svc_ctx
.output_rc_stat
) {
774 vpx_codec_control(&codec
, VP9E_GET_SVC_LAYER_ID
, &layer_id
);
775 parse_superframe_index(cx_pkt
->data
.frame
.buf
,
776 cx_pkt
->data
.frame
.sz
, sizes
, &count
);
777 // Note computing input_layer_frames here won't account for frame
778 // drops in rate control stats.
779 // TODO(marpan): Fix this for non-bypass mode so we can get stats
780 // for dropped frames.
781 if (svc_ctx
.temporal_layering_mode
!=
782 VP9E_TEMPORAL_LAYERING_MODE_BYPASS
) {
783 for (sl
= 0; sl
< enc_cfg
.ss_number_layers
; ++sl
) {
784 ++rc
.layer_input_frames
[sl
* enc_cfg
.ts_number_layers
+
785 layer_id
.temporal_layer_id
];
788 for (tl
= layer_id
.temporal_layer_id
;
789 tl
< enc_cfg
.ts_number_layers
; ++tl
) {
790 vpx_video_writer_write_frame(outfile
[tl
],
791 cx_pkt
->data
.frame
.buf
,
792 cx_pkt
->data
.frame
.sz
,
793 cx_pkt
->data
.frame
.pts
);
796 for (sl
= 0; sl
< enc_cfg
.ss_number_layers
; ++sl
) {
797 for (tl
= layer_id
.temporal_layer_id
;
798 tl
< enc_cfg
.ts_number_layers
; ++tl
) {
799 const int layer
= sl
* enc_cfg
.ts_number_layers
+ tl
;
800 ++rc
.layer_tot_enc_frames
[layer
];
801 rc
.layer_encoding_bitrate
[layer
] += 8.0 * sizes
[sl
];
802 // Keep count of rate control stats per layer, for non-key
804 if (tl
== layer_id
.temporal_layer_id
&&
805 !(cx_pkt
->data
.frame
.flags
& VPX_FRAME_IS_KEY
)) {
806 rc
.layer_avg_frame_size
[layer
] += 8.0 * sizes
[sl
];
807 rc
.layer_avg_rate_mismatch
[layer
] +=
808 fabs(8.0 * sizes
[sl
] - rc
.layer_pfb
[layer
]) /
810 ++rc
.layer_enc_frames
[layer
];
815 // Update for short-time encoding bitrate states, for moving
816 // window of size rc->window, shifted by rc->window / 2.
817 // Ignore first window segment, due to key frame.
818 if (frame_cnt
> rc
.window_size
) {
819 tl
= layer_id
.temporal_layer_id
;
820 for (sl
= 0; sl
< enc_cfg
.ss_number_layers
; ++sl
) {
821 sum_bitrate
+= 0.001 * 8.0 * sizes
[sl
] * framerate
;
823 if (frame_cnt
% rc
.window_size
== 0) {
824 rc
.window_count
+= 1;
825 rc
.avg_st_encoding_bitrate
+= sum_bitrate
/ rc
.window_size
;
826 rc
.variance_st_encoding_bitrate
+=
827 (sum_bitrate
/ rc
.window_size
) *
828 (sum_bitrate
/ rc
.window_size
);
833 // Second shifted window.
834 if (frame_cnt
> rc
.window_size
+ rc
.window_size
/ 2) {
835 tl
= layer_id
.temporal_layer_id
;
836 for (sl
= 0; sl
< enc_cfg
.ss_number_layers
; ++sl
) {
837 sum_bitrate2
+= 0.001 * 8.0 * sizes
[sl
] * framerate
;
840 if (frame_cnt
> 2 * rc
.window_size
&&
841 frame_cnt
% rc
.window_size
== 0) {
842 rc
.window_count
+= 1;
843 rc
.avg_st_encoding_bitrate
+= sum_bitrate2
/ rc
.window_size
;
844 rc
.variance_st_encoding_bitrate
+=
845 (sum_bitrate2
/ rc
.window_size
) *
846 (sum_bitrate2
/ rc
.window_size
);
854 printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received
,
855 !!(cx_pkt
->data
.frame
.flags
& VPX_FRAME_IS_KEY
),
856 (int)cx_pkt
->data
.frame
.sz
, (int)cx_pkt
->data
.frame
.pts
);
857 if (enc_cfg
.ss_number_layers
== 1 && enc_cfg
.ts_number_layers
== 1)
858 si
->bytes_sum
[0] += (int)cx_pkt
->data
.frame
.sz
;
862 case VPX_CODEC_STATS_PKT
: {
863 stats_write(&app_input
.rc_stats
,
864 cx_pkt
->data
.twopass_stats
.buf
,
865 cx_pkt
->data
.twopass_stats
.sz
);
874 if (!end_of_stream
) {
876 pts
+= frame_duration
;
880 // Compensate for the extra frame count for the bypass mode.
881 if (svc_ctx
.temporal_layering_mode
== VP9E_TEMPORAL_LAYERING_MODE_BYPASS
) {
882 for (sl
= 0; sl
< enc_cfg
.ss_number_layers
; ++sl
) {
883 const int layer
= sl
* enc_cfg
.ts_number_layers
+
884 layer_id
.temporal_layer_id
;
885 --rc
.layer_input_frames
[layer
];
889 printf("Processed %d frames\n", frame_cnt
);
892 if (svc_ctx
.output_rc_stat
) {
893 printout_rate_control_summary(&rc
, &enc_cfg
, frame_cnt
);
897 if (vpx_codec_destroy(&codec
)) die_codec(&codec
, "Failed to destroy codec");
898 if (app_input
.passes
== 2)
899 stats_close(&app_input
.rc_stats
, 1);
901 vpx_video_writer_close(writer
);
904 if (svc_ctx
.output_rc_stat
) {
905 for (tl
= 0; tl
< enc_cfg
.ts_number_layers
; ++tl
) {
906 vpx_video_writer_close(outfile
[tl
]);
910 printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
912 1000 * (float)cx_time
/ (double)(frame_cnt
* 1000000),
913 1000000 * (double)frame_cnt
/ (double)cx_time
);
915 // display average size, psnr
916 printf("%s", vpx_svc_dump_statistics(&svc_ctx
));
917 vpx_svc_release(&svc_ctx
);