2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 * This is an example demonstrating how to implement a multi-layer
13 * VP9 encoding scheme based on spatial scalability for video applications
14 * that benefit from a scalable bitstream.
25 #include "../tools_common.h"
26 #include "../video_writer.h"
28 #include "vpx/svc_context.h"
29 #include "vpx/vp8cx.h"
30 #include "vpx/vpx_encoder.h"
31 #include "../vpxstats.h"
32 #define OUTPUT_RC_STATS 1
34 static const arg_def_t skip_frames_arg
=
35 ARG_DEF("s", "skip-frames", 1, "input frames to skip");
36 static const arg_def_t frames_arg
=
37 ARG_DEF("f", "frames", 1, "number of frames to encode");
38 static const arg_def_t threads_arg
=
39 ARG_DEF("th", "threads", 1, "number of threads to use");
41 static const arg_def_t output_rc_stats_arg
=
42 ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
44 static const arg_def_t width_arg
= ARG_DEF("w", "width", 1, "source width");
45 static const arg_def_t height_arg
= ARG_DEF("h", "height", 1, "source height");
46 static const arg_def_t timebase_arg
=
47 ARG_DEF("t", "timebase", 1, "timebase (num/den)");
48 static const arg_def_t bitrate_arg
= ARG_DEF(
49 "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
50 static const arg_def_t spatial_layers_arg
=
51 ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
52 static const arg_def_t temporal_layers_arg
=
53 ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
54 static const arg_def_t temporal_layering_mode_arg
=
55 ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
56 "VP9E_TEMPORAL_LAYERING_MODE");
57 static const arg_def_t kf_dist_arg
=
58 ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
59 static const arg_def_t scale_factors_arg
=
60 ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
61 static const arg_def_t passes_arg
=
62 ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
63 static const arg_def_t pass_arg
=
64 ARG_DEF(NULL
, "pass", 1, "Pass to execute (1/2)");
65 static const arg_def_t fpf_name_arg
=
66 ARG_DEF(NULL
, "fpf", 1, "First pass statistics file name");
67 static const arg_def_t min_q_arg
=
68 ARG_DEF(NULL
, "min-q", 1, "Minimum quantizer");
69 static const arg_def_t max_q_arg
=
70 ARG_DEF(NULL
, "max-q", 1, "Maximum quantizer");
71 static const arg_def_t min_bitrate_arg
=
72 ARG_DEF(NULL
, "min-bitrate", 1, "Minimum bitrate");
73 static const arg_def_t max_bitrate_arg
=
74 ARG_DEF(NULL
, "max-bitrate", 1, "Maximum bitrate");
75 static const arg_def_t lag_in_frame_arg
=
76 ARG_DEF(NULL
, "lag-in-frames", 1, "Number of frame to input before "
77 "generating any outputs");
78 static const arg_def_t rc_end_usage_arg
=
79 ARG_DEF(NULL
, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
80 static const arg_def_t speed_arg
=
81 ARG_DEF("sp", "speed", 1, "speed configuration");
83 #if CONFIG_VP9_HIGHBITDEPTH
84 static const struct arg_enum_list bitdepth_enum
[] = {
91 static const arg_def_t bitdepth_arg
=
92 ARG_DEF_ENUM("d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ",
94 #endif // CONFIG_VP9_HIGHBITDEPTH
97 static const arg_def_t
*svc_args
[] = {
98 &frames_arg
, &width_arg
, &height_arg
,
99 &timebase_arg
, &bitrate_arg
, &skip_frames_arg
, &spatial_layers_arg
,
100 &kf_dist_arg
, &scale_factors_arg
, &passes_arg
, &pass_arg
,
101 &fpf_name_arg
, &min_q_arg
, &max_q_arg
, &min_bitrate_arg
,
102 &max_bitrate_arg
, &temporal_layers_arg
, &temporal_layering_mode_arg
,
103 &lag_in_frame_arg
, &threads_arg
,
105 &output_rc_stats_arg
,
108 #if CONFIG_VP9_HIGHBITDEPTH
112 &rc_end_usage_arg
, NULL
115 static const uint32_t default_frames_to_skip
= 0;
116 static const uint32_t default_frames_to_code
= 60 * 60;
117 static const uint32_t default_width
= 1920;
118 static const uint32_t default_height
= 1080;
119 static const uint32_t default_timebase_num
= 1;
120 static const uint32_t default_timebase_den
= 60;
121 static const uint32_t default_bitrate
= 1000;
122 static const uint32_t default_spatial_layers
= 5;
123 static const uint32_t default_temporal_layers
= 1;
124 static const uint32_t default_kf_dist
= 100;
125 static const uint32_t default_temporal_layering_mode
= 0;
126 static const uint32_t default_output_rc_stats
= 0;
127 static const int32_t default_speed
= -1; // -1 means use library default.
128 static const uint32_t default_threads
= 0; // zero means use library default.
131 const char *input_filename
;
132 const char *output_filename
;
133 uint32_t frames_to_code
;
134 uint32_t frames_to_skip
;
135 struct VpxInputContext input_ctx
;
141 static const char *exec_name
;
143 void usage_exit(void) {
144 fprintf(stderr
, "Usage: %s <options> input_filename output_filename\n",
146 fprintf(stderr
, "Options:\n");
147 arg_show_usage(stderr
, svc_args
);
151 static void parse_command_line(int argc
, const char **argv_
,
152 AppInput
*app_input
, SvcContext
*svc_ctx
,
153 vpx_codec_enc_cfg_t
*enc_cfg
) {
154 struct arg arg
= {0};
161 const char *fpf_file_name
= NULL
;
162 unsigned int min_bitrate
= 0;
163 unsigned int max_bitrate
= 0;
164 char string_options
[1024] = {0};
166 // initialize SvcContext with parameters that will be passed to vpx_svc_init
167 svc_ctx
->log_level
= SVC_LOG_DEBUG
;
168 svc_ctx
->spatial_layers
= default_spatial_layers
;
169 svc_ctx
->temporal_layers
= default_temporal_layers
;
170 svc_ctx
->temporal_layering_mode
= default_temporal_layering_mode
;
172 svc_ctx
->output_rc_stat
= default_output_rc_stats
;
174 svc_ctx
->speed
= default_speed
;
175 svc_ctx
->threads
= default_threads
;
177 // start with default encoder configuration
178 res
= vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg
, 0);
180 die("Failed to get config: %s\n", vpx_codec_err_to_string(res
));
182 // update enc_cfg with app default values
183 enc_cfg
->g_w
= default_width
;
184 enc_cfg
->g_h
= default_height
;
185 enc_cfg
->g_timebase
.num
= default_timebase_num
;
186 enc_cfg
->g_timebase
.den
= default_timebase_den
;
187 enc_cfg
->rc_target_bitrate
= default_bitrate
;
188 enc_cfg
->kf_min_dist
= default_kf_dist
;
189 enc_cfg
->kf_max_dist
= default_kf_dist
;
190 enc_cfg
->rc_end_usage
= VPX_CQ
;
192 // initialize AppInput with default values
193 app_input
->frames_to_code
= default_frames_to_code
;
194 app_input
->frames_to_skip
= default_frames_to_skip
;
196 // process command line options
197 argv
= argv_dup(argc
- 1, argv_
+ 1);
198 for (argi
= argj
= argv
; (*argj
= *argi
); argi
+= arg
.argv_step
) {
201 if (arg_match(&arg
, &frames_arg
, argi
)) {
202 app_input
->frames_to_code
= arg_parse_uint(&arg
);
203 } else if (arg_match(&arg
, &width_arg
, argi
)) {
204 enc_cfg
->g_w
= arg_parse_uint(&arg
);
205 } else if (arg_match(&arg
, &height_arg
, argi
)) {
206 enc_cfg
->g_h
= arg_parse_uint(&arg
);
207 } else if (arg_match(&arg
, &timebase_arg
, argi
)) {
208 enc_cfg
->g_timebase
= arg_parse_rational(&arg
);
209 } else if (arg_match(&arg
, &bitrate_arg
, argi
)) {
210 enc_cfg
->rc_target_bitrate
= arg_parse_uint(&arg
);
211 } else if (arg_match(&arg
, &skip_frames_arg
, argi
)) {
212 app_input
->frames_to_skip
= arg_parse_uint(&arg
);
213 } else if (arg_match(&arg
, &spatial_layers_arg
, argi
)) {
214 svc_ctx
->spatial_layers
= arg_parse_uint(&arg
);
215 } else if (arg_match(&arg
, &temporal_layers_arg
, argi
)) {
216 svc_ctx
->temporal_layers
= arg_parse_uint(&arg
);
218 } else if (arg_match(&arg
, &output_rc_stats_arg
, argi
)) {
219 svc_ctx
->output_rc_stat
= arg_parse_uint(&arg
);
221 } else if (arg_match(&arg
, &speed_arg
, argi
)) {
222 svc_ctx
->speed
= arg_parse_uint(&arg
);
223 } else if (arg_match(&arg
, &threads_arg
, argi
)) {
224 svc_ctx
->threads
= arg_parse_uint(&arg
);
225 } else if (arg_match(&arg
, &temporal_layering_mode_arg
, argi
)) {
226 svc_ctx
->temporal_layering_mode
=
227 enc_cfg
->temporal_layering_mode
= arg_parse_int(&arg
);
228 if (svc_ctx
->temporal_layering_mode
) {
229 enc_cfg
->g_error_resilient
= 1;
231 } else if (arg_match(&arg
, &kf_dist_arg
, argi
)) {
232 enc_cfg
->kf_min_dist
= arg_parse_uint(&arg
);
233 enc_cfg
->kf_max_dist
= enc_cfg
->kf_min_dist
;
234 } else if (arg_match(&arg
, &scale_factors_arg
, argi
)) {
235 snprintf(string_options
, sizeof(string_options
), "%s scale-factors=%s",
236 string_options
, arg
.val
);
237 } else if (arg_match(&arg
, &passes_arg
, argi
)) {
238 passes
= arg_parse_uint(&arg
);
239 if (passes
< 1 || passes
> 2) {
240 die("Error: Invalid number of passes (%d)\n", passes
);
242 } else if (arg_match(&arg
, &pass_arg
, argi
)) {
243 pass
= arg_parse_uint(&arg
);
244 if (pass
< 1 || pass
> 2) {
245 die("Error: Invalid pass selected (%d)\n", pass
);
247 } else if (arg_match(&arg
, &fpf_name_arg
, argi
)) {
248 fpf_file_name
= arg
.val
;
249 } else if (arg_match(&arg
, &min_q_arg
, argi
)) {
250 snprintf(string_options
, sizeof(string_options
), "%s min-quantizers=%s",
251 string_options
, arg
.val
);
252 } else if (arg_match(&arg
, &max_q_arg
, argi
)) {
253 snprintf(string_options
, sizeof(string_options
), "%s max-quantizers=%s",
254 string_options
, arg
.val
);
255 } else if (arg_match(&arg
, &min_bitrate_arg
, argi
)) {
256 min_bitrate
= arg_parse_uint(&arg
);
257 } else if (arg_match(&arg
, &max_bitrate_arg
, argi
)) {
258 max_bitrate
= arg_parse_uint(&arg
);
259 } else if (arg_match(&arg
, &lag_in_frame_arg
, argi
)) {
260 enc_cfg
->g_lag_in_frames
= arg_parse_uint(&arg
);
261 } else if (arg_match(&arg
, &rc_end_usage_arg
, argi
)) {
262 enc_cfg
->rc_end_usage
= arg_parse_uint(&arg
);
263 #if CONFIG_VP9_HIGHBITDEPTH
264 } else if (arg_match(&arg
, &bitdepth_arg
, argi
)) {
265 enc_cfg
->g_bit_depth
= arg_parse_enum_or_int(&arg
);
266 switch (enc_cfg
->g_bit_depth
) {
268 enc_cfg
->g_input_bit_depth
= 8;
269 enc_cfg
->g_profile
= 0;
272 enc_cfg
->g_input_bit_depth
= 10;
273 enc_cfg
->g_profile
= 2;
276 enc_cfg
->g_input_bit_depth
= 12;
277 enc_cfg
->g_profile
= 2;
280 die("Error: Invalid bit depth selected (%d)\n", enc_cfg
->g_bit_depth
);
283 #endif // CONFIG_VP9_HIGHBITDEPTH
289 // There will be a space in front of the string options
290 if (strlen(string_options
) > 0)
291 vpx_svc_set_options(svc_ctx
, string_options
+ 1);
293 if (passes
== 0 || passes
== 1) {
295 fprintf(stderr
, "pass is ignored since there's only one pass\n");
297 enc_cfg
->g_pass
= VPX_RC_ONE_PASS
;
300 die("pass must be specified when passes is 2\n");
303 if (fpf_file_name
== NULL
) {
304 die("fpf must be specified when passes is 2\n");
308 enc_cfg
->g_pass
= VPX_RC_FIRST_PASS
;
309 if (!stats_open_file(&app_input
->rc_stats
, fpf_file_name
, 0)) {
310 fatal("Failed to open statistics store");
313 enc_cfg
->g_pass
= VPX_RC_LAST_PASS
;
314 if (!stats_open_file(&app_input
->rc_stats
, fpf_file_name
, 1)) {
315 fatal("Failed to open statistics store");
317 enc_cfg
->rc_twopass_stats_in
= stats_get(&app_input
->rc_stats
);
319 app_input
->passes
= passes
;
320 app_input
->pass
= pass
;
323 if (enc_cfg
->rc_target_bitrate
> 0) {
324 if (min_bitrate
> 0) {
325 enc_cfg
->rc_2pass_vbr_minsection_pct
=
326 min_bitrate
* 100 / enc_cfg
->rc_target_bitrate
;
328 if (max_bitrate
> 0) {
329 enc_cfg
->rc_2pass_vbr_maxsection_pct
=
330 max_bitrate
* 100 / enc_cfg
->rc_target_bitrate
;
334 // Check for unrecognized options
335 for (argi
= argv
; *argi
; ++argi
)
336 if (argi
[0][0] == '-' && strlen(argi
[0]) > 1)
337 die("Error: Unrecognized option %s\n", *argi
);
339 if (argv
[0] == NULL
|| argv
[1] == 0) {
342 app_input
->input_filename
= argv
[0];
343 app_input
->output_filename
= argv
[1];
346 if (enc_cfg
->g_w
< 16 || enc_cfg
->g_w
% 2 || enc_cfg
->g_h
< 16 ||
348 die("Invalid resolution: %d x %d\n", enc_cfg
->g_w
, enc_cfg
->g_h
);
351 "Codec %s\nframes: %d, skip: %d\n"
353 "width %d, height: %d,\n"
354 "num: %d, den: %d, bitrate: %d,\n"
356 vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input
->frames_to_code
,
357 app_input
->frames_to_skip
,
358 svc_ctx
->spatial_layers
, enc_cfg
->g_w
, enc_cfg
->g_h
,
359 enc_cfg
->g_timebase
.num
, enc_cfg
->g_timebase
.den
,
360 enc_cfg
->rc_target_bitrate
, enc_cfg
->kf_max_dist
);
364 // For rate control encoding stats.
365 struct RateControlStats
{
366 // Number of input frames per layer.
367 int layer_input_frames
[VPX_MAX_LAYERS
];
368 // Total (cumulative) number of encoded frames per layer.
369 int layer_tot_enc_frames
[VPX_MAX_LAYERS
];
370 // Number of encoded non-key frames per layer.
371 int layer_enc_frames
[VPX_MAX_LAYERS
];
372 // Framerate per layer (cumulative).
373 double layer_framerate
[VPX_MAX_LAYERS
];
374 // Target average frame size per layer (per-frame-bandwidth per layer).
375 double layer_pfb
[VPX_MAX_LAYERS
];
376 // Actual average frame size per layer.
377 double layer_avg_frame_size
[VPX_MAX_LAYERS
];
378 // Average rate mismatch per layer (|target - actual| / target).
379 double layer_avg_rate_mismatch
[VPX_MAX_LAYERS
];
380 // Actual encoding bitrate per layer (cumulative).
381 double layer_encoding_bitrate
[VPX_MAX_LAYERS
];
382 // Average of the short-time encoder actual bitrate.
383 // TODO(marpan): Should we add these short-time stats for each layer?
384 double avg_st_encoding_bitrate
;
385 // Variance of the short-time encoder actual bitrate.
386 double variance_st_encoding_bitrate
;
387 // Window (number of frames) for computing short-time encoding bitrate.
389 // Number of window measurements.
393 // Note: these rate control stats assume only 1 key frame in the
394 // sequence (i.e., first frame only).
395 static void set_rate_control_stats(struct RateControlStats
*rc
,
396 vpx_codec_enc_cfg_t
*cfg
) {
398 // Set the layer (cumulative) framerate and the target layer (non-cumulative)
399 // per-frame-bandwidth, for the rate control encoding stats below.
400 const double framerate
= cfg
->g_timebase
.den
/ cfg
->g_timebase
.num
;
402 for (sl
= 0; sl
< cfg
->ss_number_layers
; ++sl
) {
403 for (tl
= 0; tl
< cfg
->ts_number_layers
; ++tl
) {
404 const int layer
= sl
* cfg
->ts_number_layers
+ tl
;
405 const int tlayer0
= sl
* cfg
->ts_number_layers
;
406 rc
->layer_framerate
[layer
] =
407 framerate
/ cfg
->ts_rate_decimator
[tl
];
409 rc
->layer_pfb
[layer
] = 1000.0 *
410 (cfg
->layer_target_bitrate
[layer
] -
411 cfg
->layer_target_bitrate
[layer
- 1]) /
412 (rc
->layer_framerate
[layer
] -
413 rc
->layer_framerate
[layer
- 1]);
415 rc
->layer_pfb
[tlayer0
] = 1000.0 *
416 cfg
->layer_target_bitrate
[tlayer0
] /
417 rc
->layer_framerate
[tlayer0
];
419 rc
->layer_input_frames
[layer
] = 0;
420 rc
->layer_enc_frames
[layer
] = 0;
421 rc
->layer_tot_enc_frames
[layer
] = 0;
422 rc
->layer_encoding_bitrate
[layer
] = 0.0;
423 rc
->layer_avg_frame_size
[layer
] = 0.0;
424 rc
->layer_avg_rate_mismatch
[layer
] = 0.0;
427 rc
->window_count
= 0;
428 rc
->window_size
= 15;
429 rc
->avg_st_encoding_bitrate
= 0.0;
430 rc
->variance_st_encoding_bitrate
= 0.0;
433 static void printout_rate_control_summary(struct RateControlStats
*rc
,
434 vpx_codec_enc_cfg_t
*cfg
,
437 int tot_num_frames
= 0;
438 double perc_fluctuation
= 0.0;
439 printf("Total number of processed frames: %d\n\n", frame_cnt
- 1);
440 printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
441 cfg
->ss_number_layers
, cfg
->ts_number_layers
);
442 for (sl
= 0; sl
< cfg
->ss_number_layers
; ++sl
) {
443 for (tl
= 0; tl
< cfg
->ts_number_layers
; ++tl
) {
444 const int layer
= sl
* cfg
->ts_number_layers
+ tl
;
445 const int num_dropped
= (tl
> 0) ?
446 (rc
->layer_input_frames
[layer
] - rc
->layer_enc_frames
[layer
]) :
447 (rc
->layer_input_frames
[layer
] - rc
->layer_enc_frames
[layer
] - 1);
449 tot_num_frames
+= rc
->layer_input_frames
[layer
];
450 rc
->layer_encoding_bitrate
[layer
] = 0.001 * rc
->layer_framerate
[layer
] *
451 rc
->layer_encoding_bitrate
[layer
] / tot_num_frames
;
452 rc
->layer_avg_frame_size
[layer
] = rc
->layer_avg_frame_size
[layer
] /
453 rc
->layer_enc_frames
[layer
];
454 rc
->layer_avg_rate_mismatch
[layer
] =
455 100.0 * rc
->layer_avg_rate_mismatch
[layer
] /
456 rc
->layer_enc_frames
[layer
];
457 printf("For layer#: sl%d tl%d \n", sl
, tl
);
458 printf("Bitrate (target vs actual): %d %f.0 kbps\n",
459 cfg
->layer_target_bitrate
[layer
],
460 rc
->layer_encoding_bitrate
[layer
]);
461 printf("Average frame size (target vs actual): %f %f bits\n",
462 rc
->layer_pfb
[layer
], rc
->layer_avg_frame_size
[layer
]);
463 printf("Average rate_mismatch: %f\n",
464 rc
->layer_avg_rate_mismatch
[layer
]);
465 printf("Number of input frames, encoded (non-key) frames, "
466 "and percent dropped frames: %d %d %f.0 \n",
467 rc
->layer_input_frames
[layer
], rc
->layer_enc_frames
[layer
],
468 100.0 * num_dropped
/ rc
->layer_input_frames
[layer
]);
472 rc
->avg_st_encoding_bitrate
= rc
->avg_st_encoding_bitrate
/ rc
->window_count
;
473 rc
->variance_st_encoding_bitrate
=
474 rc
->variance_st_encoding_bitrate
/ rc
->window_count
-
475 (rc
->avg_st_encoding_bitrate
* rc
->avg_st_encoding_bitrate
);
476 perc_fluctuation
= 100.0 * sqrt(rc
->variance_st_encoding_bitrate
) /
477 rc
->avg_st_encoding_bitrate
;
478 printf("Short-time stats, for window of %d frames: \n", rc
->window_size
);
479 printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
480 rc
->avg_st_encoding_bitrate
,
481 sqrt(rc
->variance_st_encoding_bitrate
),
483 if (frame_cnt
!= tot_num_frames
)
484 die("Error: Number of input frames not equal to output encoded frames != "
485 "%d tot_num_frames = %d\n", frame_cnt
, tot_num_frames
);
488 vpx_codec_err_t
parse_superframe_index(const uint8_t *data
,
490 uint32_t sizes
[8], int *count
) {
491 // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
492 // it is a super frame index. If the last byte of real video compression
493 // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
494 // not the associated matching marker byte at the front of the index we have
495 // an invalid bitstream and need to return an error.
499 marker
= *(data
+ data_sz
- 1);
503 if ((marker
& 0xe0) == 0xc0) {
504 const uint32_t frames
= (marker
& 0x7) + 1;
505 const uint32_t mag
= ((marker
>> 3) & 0x3) + 1;
506 const size_t index_sz
= 2 + mag
* frames
;
508 // This chunk is marked as having a superframe index but doesn't have
509 // enough data for it, thus it's an invalid superframe index.
510 if (data_sz
< index_sz
)
511 return VPX_CODEC_CORRUPT_FRAME
;
514 const uint8_t marker2
= *(data
+ data_sz
- index_sz
);
516 // This chunk is marked as having a superframe index but doesn't have
517 // the matching marker byte at the front of the index therefore it's an
519 if (marker
!= marker2
)
520 return VPX_CODEC_CORRUPT_FRAME
;
524 // Found a valid superframe index.
526 const uint8_t *x
= &data
[data_sz
- index_sz
+ 1];
528 for (i
= 0; i
< frames
; ++i
) {
529 uint32_t this_sz
= 0;
531 for (j
= 0; j
< mag
; ++j
)
532 this_sz
|= (*x
++) << (j
* 8);
542 int main(int argc
, const char **argv
) {
543 AppInput app_input
= {0};
544 VpxVideoWriter
*writer
= NULL
;
545 VpxVideoInfo info
= {0};
546 vpx_codec_ctx_t codec
;
547 vpx_codec_enc_cfg_t enc_cfg
;
550 uint32_t frame_cnt
= 0;
553 int pts
= 0; /* PTS starts at 0 */
554 int frame_duration
= 1; /* 1 timebase tick per frame */
556 int end_of_stream
= 0;
557 int frames_received
= 0;
559 VpxVideoWriter
*outfile
[VPX_TS_MAX_LAYERS
] = {NULL
};
560 struct RateControlStats rc
;
561 vpx_svc_layer_id_t layer_id
;
563 double sum_bitrate
= 0.0;
564 double sum_bitrate2
= 0.0;
565 double framerate
= 30.0;
567 memset(&svc_ctx
, 0, sizeof(svc_ctx
));
568 svc_ctx
.log_print
= 1;
570 parse_command_line(argc
, argv
, &app_input
, &svc_ctx
, &enc_cfg
);
572 // Allocate image buffer
573 #if CONFIG_VP9_HIGHBITDEPTH
574 if (!vpx_img_alloc(&raw
, enc_cfg
.g_input_bit_depth
== 8 ?
575 VPX_IMG_FMT_I420
: VPX_IMG_FMT_I42016
,
576 enc_cfg
.g_w
, enc_cfg
.g_h
, 32)) {
577 die("Failed to allocate image %dx%d\n", enc_cfg
.g_w
, enc_cfg
.g_h
);
580 if (!vpx_img_alloc(&raw
, VPX_IMG_FMT_I420
, enc_cfg
.g_w
, enc_cfg
.g_h
, 32)) {
581 die("Failed to allocate image %dx%d\n", enc_cfg
.g_w
, enc_cfg
.g_h
);
583 #endif // CONFIG_VP9_HIGHBITDEPTH
585 if (!(infile
= fopen(app_input
.input_filename
, "rb")))
586 die("Failed to open %s for reading\n", app_input
.input_filename
);
589 if (vpx_svc_init(&svc_ctx
, &codec
, vpx_codec_vp9_cx(), &enc_cfg
) !=
591 die("Failed to initialize encoder\n");
594 if (svc_ctx
.output_rc_stat
) {
595 set_rate_control_stats(&rc
, &enc_cfg
);
596 framerate
= enc_cfg
.g_timebase
.den
/ enc_cfg
.g_timebase
.num
;
600 info
.codec_fourcc
= VP9_FOURCC
;
601 info
.time_base
.numerator
= enc_cfg
.g_timebase
.num
;
602 info
.time_base
.denominator
= enc_cfg
.g_timebase
.den
;
604 if (!(app_input
.passes
== 2 && app_input
.pass
== 1)) {
605 // We don't save the bitstream for the 1st pass on two pass rate control
606 writer
= vpx_video_writer_open(app_input
.output_filename
, kContainerIVF
,
609 die("Failed to open %s for writing\n", app_input
.output_filename
);
612 // For now, just write temporal layer streams.
613 // TODO(wonkap): do spatial by re-writing superframe.
614 if (svc_ctx
.output_rc_stat
) {
615 for (tl
= 0; tl
< enc_cfg
.ts_number_layers
; ++tl
) {
616 char file_name
[PATH_MAX
];
618 snprintf(file_name
, sizeof(file_name
), "%s_t%d.ivf",
619 app_input
.output_filename
, tl
);
620 outfile
[tl
] = vpx_video_writer_open(file_name
, kContainerIVF
, &info
);
622 die("Failed to open %s for writing", file_name
);
627 // skip initial frames
628 for (i
= 0; i
< app_input
.frames_to_skip
; ++i
)
629 vpx_img_read(&raw
, infile
);
631 if (svc_ctx
.speed
!= -1)
632 vpx_codec_control(&codec
, VP8E_SET_CPUUSED
, svc_ctx
.speed
);
634 vpx_codec_control(&codec
, VP9E_SET_TILE_COLUMNS
, (svc_ctx
.threads
>> 1));
637 while (!end_of_stream
) {
638 vpx_codec_iter_t iter
= NULL
;
639 const vpx_codec_cx_pkt_t
*cx_pkt
;
640 if (frame_cnt
>= app_input
.frames_to_code
|| !vpx_img_read(&raw
, infile
)) {
641 // We need one extra vpx_svc_encode call at end of stream to flush
642 // encoder and get remaining data
646 res
= vpx_svc_encode(&svc_ctx
, &codec
, (end_of_stream
? NULL
: &raw
),
647 pts
, frame_duration
, svc_ctx
.speed
>= 5 ?
648 VPX_DL_REALTIME
: VPX_DL_GOOD_QUALITY
);
650 printf("%s", vpx_svc_get_message(&svc_ctx
));
651 if (res
!= VPX_CODEC_OK
) {
652 die_codec(&codec
, "Failed to encode frame");
655 while ((cx_pkt
= vpx_codec_get_cx_data(&codec
, &iter
)) != NULL
) {
656 switch (cx_pkt
->kind
) {
657 case VPX_CODEC_CX_FRAME_PKT
: {
658 if (cx_pkt
->data
.frame
.sz
> 0) {
663 vpx_video_writer_write_frame(writer
,
664 cx_pkt
->data
.frame
.buf
,
665 cx_pkt
->data
.frame
.sz
,
666 cx_pkt
->data
.frame
.pts
);
668 // TODO(marpan/wonkap): Put this (to line728) in separate function.
669 if (svc_ctx
.output_rc_stat
) {
670 vpx_codec_control(&codec
, VP9E_GET_SVC_LAYER_ID
, &layer_id
);
671 parse_superframe_index(cx_pkt
->data
.frame
.buf
,
672 cx_pkt
->data
.frame
.sz
, sizes
, &count
);
673 for (sl
= 0; sl
< enc_cfg
.ss_number_layers
; ++sl
) {
674 ++rc
.layer_input_frames
[sl
* enc_cfg
.ts_number_layers
+
675 layer_id
.temporal_layer_id
];
677 for (tl
= layer_id
.temporal_layer_id
;
678 tl
< enc_cfg
.ts_number_layers
; ++tl
) {
679 vpx_video_writer_write_frame(outfile
[tl
],
680 cx_pkt
->data
.frame
.buf
,
681 cx_pkt
->data
.frame
.sz
,
682 cx_pkt
->data
.frame
.pts
);
685 for (sl
= 0; sl
< enc_cfg
.ss_number_layers
; ++sl
) {
686 for (tl
= layer_id
.temporal_layer_id
;
687 tl
< enc_cfg
.ts_number_layers
; ++tl
) {
688 const int layer
= sl
* enc_cfg
.ts_number_layers
+ tl
;
689 ++rc
.layer_tot_enc_frames
[layer
];
690 rc
.layer_encoding_bitrate
[layer
] += 8.0 * sizes
[sl
];
691 // Keep count of rate control stats per layer, for non-key
693 if (tl
== layer_id
.temporal_layer_id
&&
694 !(cx_pkt
->data
.frame
.flags
& VPX_FRAME_IS_KEY
)) {
695 rc
.layer_avg_frame_size
[layer
] += 8.0 * sizes
[sl
];
696 rc
.layer_avg_rate_mismatch
[layer
] +=
697 fabs(8.0 * sizes
[sl
] - rc
.layer_pfb
[layer
]) /
699 ++rc
.layer_enc_frames
[layer
];
704 // Update for short-time encoding bitrate states, for moving
705 // window of size rc->window, shifted by rc->window / 2.
706 // Ignore first window segment, due to key frame.
707 if (frame_cnt
> rc
.window_size
) {
708 tl
= layer_id
.temporal_layer_id
;
709 for (sl
= 0; sl
< enc_cfg
.ss_number_layers
; ++sl
) {
710 sum_bitrate
+= 0.001 * 8.0 * sizes
[sl
] * framerate
;
712 if (frame_cnt
% rc
.window_size
== 0) {
713 rc
.window_count
+= 1;
714 rc
.avg_st_encoding_bitrate
+= sum_bitrate
/ rc
.window_size
;
715 rc
.variance_st_encoding_bitrate
+=
716 (sum_bitrate
/ rc
.window_size
) *
717 (sum_bitrate
/ rc
.window_size
);
722 // Second shifted window.
723 if (frame_cnt
> rc
.window_size
+ rc
.window_size
/ 2) {
724 tl
= layer_id
.temporal_layer_id
;
725 for (sl
= 0; sl
< enc_cfg
.ss_number_layers
; ++sl
) {
726 sum_bitrate2
+= 0.001 * 8.0 * sizes
[sl
] * framerate
;
729 if (frame_cnt
> 2 * rc
.window_size
&&
730 frame_cnt
% rc
.window_size
== 0) {
731 rc
.window_count
+= 1;
732 rc
.avg_st_encoding_bitrate
+= sum_bitrate2
/ rc
.window_size
;
733 rc
.variance_st_encoding_bitrate
+=
734 (sum_bitrate2
/ rc
.window_size
) *
735 (sum_bitrate2
/ rc
.window_size
);
743 printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received
,
744 !!(cx_pkt
->data
.frame
.flags
& VPX_FRAME_IS_KEY
),
745 (int)cx_pkt
->data
.frame
.sz
, (int)cx_pkt
->data
.frame
.pts
);
749 case VPX_CODEC_STATS_PKT
: {
750 stats_write(&app_input
.rc_stats
,
751 cx_pkt
->data
.twopass_stats
.buf
,
752 cx_pkt
->data
.twopass_stats
.sz
);
761 if (!end_of_stream
) {
763 pts
+= frame_duration
;
766 printf("Processed %d frames\n", frame_cnt
);
769 if (svc_ctx
.output_rc_stat
) {
770 printout_rate_control_summary(&rc
, &enc_cfg
, frame_cnt
);
774 if (vpx_codec_destroy(&codec
)) die_codec(&codec
, "Failed to destroy codec");
775 if (app_input
.passes
== 2)
776 stats_close(&app_input
.rc_stats
, 1);
778 vpx_video_writer_close(writer
);
781 if (svc_ctx
.output_rc_stat
) {
782 for (tl
= 0; tl
< enc_cfg
.ts_number_layers
; ++tl
) {
783 vpx_video_writer_close(outfile
[tl
]);
788 // display average size, psnr
789 printf("%s", vpx_svc_dump_statistics(&svc_ctx
));
790 vpx_svc_release(&svc_ctx
);