Bug 1882465 - Update .hg-annotate-ignore-revs and .git-blame-ignore-revs to reflect...
[gecko.git] / third_party / aom / examples / svc_encoder_rtc.cc
blobc751e9868cbe2ba40deafac64bc7d3073470e532
1 /*
2 * Copyright (c) 2019, Alliance for Open Media. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 // This is an example demonstrating how to implement a multi-layer AOM
12 // encoding scheme for RTC video applications.
14 #include <assert.h>
15 #include <limits.h>
16 #include <math.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
21 #include <memory>
23 #include "config/aom_config.h"
25 #if CONFIG_AV1_DECODER
26 #include "aom/aom_decoder.h"
27 #endif
28 #include "aom/aom_encoder.h"
29 #include "aom/aomcx.h"
30 #include "common/args.h"
31 #include "common/tools_common.h"
32 #include "common/video_writer.h"
33 #include "examples/encoder_util.h"
34 #include "aom_ports/aom_timer.h"
35 #include "av1/ratectrl_rtc.h"
37 #define OPTION_BUFFER_SIZE 1024
39 typedef struct {
40 const char *output_filename;
41 char options[OPTION_BUFFER_SIZE];
42 struct AvxInputContext input_ctx;
43 int speed;
44 int aq_mode;
45 int layering_mode;
46 int output_obu;
47 int decode;
48 int tune_content;
49 int show_psnr;
50 bool use_external_rc;
51 } AppInput;
53 typedef enum {
54 QUANTIZER = 0,
55 BITRATE,
56 SCALE_FACTOR,
57 AUTO_ALT_REF,
58 ALL_OPTION_TYPES
59 } LAYER_OPTION_TYPE;
61 static const arg_def_t outputfile =
62 ARG_DEF("o", "output", 1, "Output filename");
63 static const arg_def_t frames_arg =
64 ARG_DEF("f", "frames", 1, "Number of frames to encode");
65 static const arg_def_t threads_arg =
66 ARG_DEF("th", "threads", 1, "Number of threads to use");
67 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
68 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
69 static const arg_def_t timebase_arg =
70 ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
71 static const arg_def_t bitrate_arg = ARG_DEF(
72 "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
73 static const arg_def_t spatial_layers_arg =
74 ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
75 static const arg_def_t temporal_layers_arg =
76 ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
77 static const arg_def_t layering_mode_arg =
78 ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
79 static const arg_def_t kf_dist_arg =
80 ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
81 static const arg_def_t scale_factors_arg =
82 ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
83 static const arg_def_t min_q_arg =
84 ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
85 static const arg_def_t max_q_arg =
86 ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
87 static const arg_def_t speed_arg =
88 ARG_DEF("sp", "speed", 1, "Speed configuration");
89 static const arg_def_t aqmode_arg =
90 ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
91 static const arg_def_t bitrates_arg =
92 ARG_DEF("bl", "bitrates", 1,
93 "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
94 static const arg_def_t dropframe_thresh_arg =
95 ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
96 static const arg_def_t error_resilient_arg =
97 ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
98 static const arg_def_t output_obu_arg =
99 ARG_DEF(NULL, "output-obu", 1,
100 "Write OBUs when set to 1. Otherwise write IVF files.");
101 static const arg_def_t test_decode_arg =
102 ARG_DEF(NULL, "test-decode", 1,
103 "Attempt to test decoding the output when set to 1. Default is 1.");
104 static const arg_def_t psnr_arg =
105 ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
106 static const arg_def_t ext_rc_arg =
107 ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
108 static const struct arg_enum_list tune_content_enum[] = {
109 { "default", AOM_CONTENT_DEFAULT },
110 { "screen", AOM_CONTENT_SCREEN },
111 { "film", AOM_CONTENT_FILM },
112 { NULL, 0 }
114 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
115 NULL, "tune-content", 1, "Tune content type", tune_content_enum);
117 #if CONFIG_AV1_HIGHBITDEPTH
118 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
119 { "10", AOM_BITS_10 },
120 { NULL, 0 } };
122 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
123 "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
124 #endif // CONFIG_AV1_HIGHBITDEPTH
126 static const arg_def_t *svc_args[] = {
127 &frames_arg, &outputfile, &width_arg,
128 &height_arg, &timebase_arg, &bitrate_arg,
129 &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg,
130 &min_q_arg, &max_q_arg, &temporal_layers_arg,
131 &layering_mode_arg, &threads_arg, &aqmode_arg,
132 #if CONFIG_AV1_HIGHBITDEPTH
133 &bitdepth_arg,
134 #endif
135 &speed_arg, &bitrates_arg, &dropframe_thresh_arg,
136 &error_resilient_arg, &output_obu_arg, &test_decode_arg,
137 &tune_content_arg, &psnr_arg, NULL,
140 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
142 static const char *exec_name;
144 void usage_exit(void) {
145 fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n",
146 exec_name);
147 fprintf(stderr, "Options:\n");
148 arg_show_usage(stderr, svc_args);
149 exit(EXIT_FAILURE);
152 static int file_is_y4m(const char detect[4]) {
153 return memcmp(detect, "YUV4", 4) == 0;
156 static int fourcc_is_ivf(const char detect[4]) {
157 if (memcmp(detect, "DKIF", 4) == 0) {
158 return 1;
160 return 0;
163 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
164 1 };
166 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
168 static void open_input_file(struct AvxInputContext *input,
169 aom_chroma_sample_position_t csp) {
170 /* Parse certain options from the input file, if possible */
171 input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
172 : set_binary_mode(stdin);
174 if (!input->file) fatal("Failed to open input file");
176 if (!fseeko(input->file, 0, SEEK_END)) {
177 /* Input file is seekable. Figure out how long it is, so we can get
178 * progress info.
180 input->length = ftello(input->file);
181 rewind(input->file);
184 /* Default to 1:1 pixel aspect ratio. */
185 input->pixel_aspect_ratio.numerator = 1;
186 input->pixel_aspect_ratio.denominator = 1;
188 /* For RAW input sources, these bytes will applied on the first frame
189 * in read_frame().
191 input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
192 input->detect.position = 0;
194 if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
195 if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
196 input->only_i420) >= 0) {
197 input->file_type = FILE_TYPE_Y4M;
198 input->width = input->y4m.pic_w;
199 input->height = input->y4m.pic_h;
200 input->pixel_aspect_ratio.numerator = input->y4m.par_n;
201 input->pixel_aspect_ratio.denominator = input->y4m.par_d;
202 input->framerate.numerator = input->y4m.fps_n;
203 input->framerate.denominator = input->y4m.fps_d;
204 input->fmt = input->y4m.aom_fmt;
205 input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
206 } else {
207 fatal("Unsupported Y4M stream.");
209 } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
210 fatal("IVF is not supported as input.");
211 } else {
212 input->file_type = FILE_TYPE_RAW;
216 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
217 int *value0, int *value1) {
218 if (type == SCALE_FACTOR) {
219 *value0 = (int)strtol(input, &input, 10);
220 if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
221 *value1 = (int)strtol(input, &input, 10);
223 if (*value0 < option_min_values[SCALE_FACTOR] ||
224 *value1 < option_min_values[SCALE_FACTOR] ||
225 *value0 > option_max_values[SCALE_FACTOR] ||
226 *value1 > option_max_values[SCALE_FACTOR] ||
227 *value0 > *value1) // num shouldn't be greater than den
228 return AOM_CODEC_INVALID_PARAM;
229 } else {
230 *value0 = atoi(input);
231 if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
232 return AOM_CODEC_INVALID_PARAM;
234 return AOM_CODEC_OK;
237 static aom_codec_err_t parse_layer_options_from_string(
238 aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
239 int *option0, int *option1) {
240 aom_codec_err_t res = AOM_CODEC_OK;
241 char *input_string;
242 char *token;
243 const char *delim = ",";
244 int num_layers = svc_params->number_spatial_layers;
245 int i = 0;
247 if (type == BITRATE)
248 num_layers =
249 svc_params->number_spatial_layers * svc_params->number_temporal_layers;
251 if (input == NULL || option0 == NULL ||
252 (option1 == NULL && type == SCALE_FACTOR))
253 return AOM_CODEC_INVALID_PARAM;
255 const size_t input_length = strlen(input);
256 input_string = reinterpret_cast<char *>(malloc(input_length + 1));
257 if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
258 memcpy(input_string, input, input_length + 1);
259 token = strtok(input_string, delim); // NOLINT
260 for (i = 0; i < num_layers; ++i) {
261 if (token != NULL) {
262 res = extract_option(type, token, option0 + i, option1 + i);
263 if (res != AOM_CODEC_OK) break;
264 token = strtok(NULL, delim); // NOLINT
265 } else {
266 res = AOM_CODEC_INVALID_PARAM;
267 break;
270 free(input_string);
271 return res;
274 static void parse_command_line(int argc, const char **argv_,
275 AppInput *app_input,
276 aom_svc_params_t *svc_params,
277 aom_codec_enc_cfg_t *enc_cfg) {
278 struct arg arg;
279 char **argv = NULL;
280 char **argi = NULL;
281 char **argj = NULL;
282 char string_options[1024] = { 0 };
284 // Default settings
285 svc_params->number_spatial_layers = 1;
286 svc_params->number_temporal_layers = 1;
287 app_input->layering_mode = 0;
288 app_input->output_obu = 0;
289 app_input->decode = 1;
290 enc_cfg->g_threads = 1;
291 enc_cfg->rc_end_usage = AOM_CBR;
293 // process command line options
294 argv = argv_dup(argc - 1, argv_ + 1);
295 if (!argv) {
296 fprintf(stderr, "Error allocating argument list\n");
297 exit(EXIT_FAILURE);
299 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
300 arg.argv_step = 1;
302 if (arg_match(&arg, &outputfile, argi)) {
303 app_input->output_filename = arg.val;
304 } else if (arg_match(&arg, &width_arg, argi)) {
305 enc_cfg->g_w = arg_parse_uint(&arg);
306 } else if (arg_match(&arg, &height_arg, argi)) {
307 enc_cfg->g_h = arg_parse_uint(&arg);
308 } else if (arg_match(&arg, &timebase_arg, argi)) {
309 enc_cfg->g_timebase = arg_parse_rational(&arg);
310 } else if (arg_match(&arg, &bitrate_arg, argi)) {
311 enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
312 } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
313 svc_params->number_spatial_layers = arg_parse_uint(&arg);
314 } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
315 svc_params->number_temporal_layers = arg_parse_uint(&arg);
316 } else if (arg_match(&arg, &speed_arg, argi)) {
317 app_input->speed = arg_parse_uint(&arg);
318 if (app_input->speed > 11) {
319 aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
321 } else if (arg_match(&arg, &aqmode_arg, argi)) {
322 app_input->aq_mode = arg_parse_uint(&arg);
323 } else if (arg_match(&arg, &threads_arg, argi)) {
324 enc_cfg->g_threads = arg_parse_uint(&arg);
325 } else if (arg_match(&arg, &layering_mode_arg, argi)) {
326 app_input->layering_mode = arg_parse_int(&arg);
327 } else if (arg_match(&arg, &kf_dist_arg, argi)) {
328 enc_cfg->kf_min_dist = arg_parse_uint(&arg);
329 enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
330 } else if (arg_match(&arg, &scale_factors_arg, argi)) {
331 aom_codec_err_t res = parse_layer_options_from_string(
332 svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
333 svc_params->scaling_factor_den);
334 if (res != AOM_CODEC_OK) {
335 die("Failed to parse scale factors: %s\n",
336 aom_codec_err_to_string(res));
338 } else if (arg_match(&arg, &min_q_arg, argi)) {
339 enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
340 } else if (arg_match(&arg, &max_q_arg, argi)) {
341 enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
342 #if CONFIG_AV1_HIGHBITDEPTH
343 } else if (arg_match(&arg, &bitdepth_arg, argi)) {
344 enc_cfg->g_bit_depth =
345 static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
346 switch (enc_cfg->g_bit_depth) {
347 case AOM_BITS_8:
348 enc_cfg->g_input_bit_depth = 8;
349 enc_cfg->g_profile = 0;
350 break;
351 case AOM_BITS_10:
352 enc_cfg->g_input_bit_depth = 10;
353 enc_cfg->g_profile = 0;
354 break;
355 default:
356 die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
358 #endif // CONFIG_VP9_HIGHBITDEPTH
359 } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
360 enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
361 } else if (arg_match(&arg, &error_resilient_arg, argi)) {
362 enc_cfg->g_error_resilient = arg_parse_uint(&arg);
363 if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
364 die("Invalid value for error resilient (0, 1): %d.",
365 enc_cfg->g_error_resilient);
366 } else if (arg_match(&arg, &output_obu_arg, argi)) {
367 app_input->output_obu = arg_parse_uint(&arg);
368 if (app_input->output_obu != 0 && app_input->output_obu != 1)
369 die("Invalid value for obu output flag (0, 1): %d.",
370 app_input->output_obu);
371 } else if (arg_match(&arg, &test_decode_arg, argi)) {
372 app_input->decode = arg_parse_uint(&arg);
373 if (app_input->decode != 0 && app_input->decode != 1)
374 die("Invalid value for test decode flag (0, 1): %d.",
375 app_input->decode);
376 } else if (arg_match(&arg, &tune_content_arg, argi)) {
377 app_input->tune_content = arg_parse_enum_or_int(&arg);
378 printf("tune content %d\n", app_input->tune_content);
379 } else if (arg_match(&arg, &psnr_arg, argi)) {
380 app_input->show_psnr = 1;
381 } else if (arg_match(&arg, &ext_rc_arg, argi)) {
382 app_input->use_external_rc = true;
383 } else {
384 ++argj;
388 // Total bitrate needs to be parsed after the number of layers.
389 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
390 arg.argv_step = 1;
391 if (arg_match(&arg, &bitrates_arg, argi)) {
392 aom_codec_err_t res = parse_layer_options_from_string(
393 svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
394 if (res != AOM_CODEC_OK) {
395 die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
397 } else {
398 ++argj;
402 // There will be a space in front of the string options
403 if (strlen(string_options) > 0)
404 strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
406 // Check for unrecognized options
407 for (argi = argv; *argi; ++argi)
408 if (argi[0][0] == '-' && strlen(argi[0]) > 1)
409 die("Error: Unrecognized option %s\n", *argi);
411 if (argv[0] == NULL) {
412 usage_exit();
415 app_input->input_ctx.filename = argv[0];
416 free(argv);
418 open_input_file(&app_input->input_ctx, AOM_CSP_UNKNOWN);
419 if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
420 enc_cfg->g_w = app_input->input_ctx.width;
421 enc_cfg->g_h = app_input->input_ctx.height;
424 if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
425 enc_cfg->g_h % 2)
426 die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
428 printf(
429 "Codec %s\n"
430 "layers: %d\n"
431 "width %u, height: %u\n"
432 "num: %d, den: %d, bitrate: %u\n"
433 "gop size: %u\n",
434 aom_codec_iface_name(aom_codec_av1_cx()),
435 svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
436 enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
437 enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
440 static int mode_to_num_temporal_layers[12] = {
441 1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
443 static int mode_to_num_spatial_layers[12] = {
444 1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
447 // For rate control encoding stats.
448 struct RateControlMetrics {
449 // Number of input frames per layer.
450 int layer_input_frames[AOM_MAX_TS_LAYERS];
451 // Number of encoded non-key frames per layer.
452 int layer_enc_frames[AOM_MAX_TS_LAYERS];
453 // Framerate per layer layer (cumulative).
454 double layer_framerate[AOM_MAX_TS_LAYERS];
455 // Target average frame size per layer (per-frame-bandwidth per layer).
456 double layer_pfb[AOM_MAX_LAYERS];
457 // Actual average frame size per layer.
458 double layer_avg_frame_size[AOM_MAX_LAYERS];
459 // Average rate mismatch per layer (|target - actual| / target).
460 double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
461 // Actual encoding bitrate per layer (cumulative across temporal layers).
462 double layer_encoding_bitrate[AOM_MAX_LAYERS];
463 // Average of the short-time encoder actual bitrate.
464 // TODO(marpan): Should we add these short-time stats for each layer?
465 double avg_st_encoding_bitrate;
466 // Variance of the short-time encoder actual bitrate.
467 double variance_st_encoding_bitrate;
468 // Window (number of frames) for computing short-timee encoding bitrate.
469 int window_size;
470 // Number of window measurements.
471 int window_count;
472 int layer_target_bitrate[AOM_MAX_LAYERS];
475 static const int REF_FRAMES = 8;
477 static const int INTER_REFS_PER_FRAME = 7;
479 // Reference frames used in this example encoder.
480 enum {
481 SVC_LAST_FRAME = 0,
482 SVC_LAST2_FRAME,
483 SVC_LAST3_FRAME,
484 SVC_GOLDEN_FRAME,
485 SVC_BWDREF_FRAME,
486 SVC_ALTREF2_FRAME,
487 SVC_ALTREF_FRAME
490 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
491 FILE *f = input_ctx->file;
492 y4m_input *y4m = &input_ctx->y4m;
493 int shortread = 0;
495 if (input_ctx->file_type == FILE_TYPE_Y4M) {
496 if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
497 } else {
498 shortread = read_yuv_frame(input_ctx, img);
501 return !shortread;
504 static void close_input_file(struct AvxInputContext *input) {
505 fclose(input->file);
506 if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
509 // Note: these rate control metrics assume only 1 key frame in the
510 // sequence (i.e., first frame only). So for temporal pattern# 7
511 // (which has key frame for every frame on base layer), the metrics
512 // computation will be off/wrong.
513 // TODO(marpan): Update these metrics to account for multiple key frames
514 // in the stream.
515 static void set_rate_control_metrics(struct RateControlMetrics *rc,
516 double framerate, int ss_number_layers,
517 int ts_number_layers) {
518 int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
519 ts_rate_decimator[0] = 1;
520 if (ts_number_layers == 2) {
521 ts_rate_decimator[0] = 2;
522 ts_rate_decimator[1] = 1;
524 if (ts_number_layers == 3) {
525 ts_rate_decimator[0] = 4;
526 ts_rate_decimator[1] = 2;
527 ts_rate_decimator[2] = 1;
529 // Set the layer (cumulative) framerate and the target layer (non-cumulative)
530 // per-frame-bandwidth, for the rate control encoding stats below.
531 for (int sl = 0; sl < ss_number_layers; ++sl) {
532 int i = sl * ts_number_layers;
533 rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
534 rc->layer_pfb[i] =
535 1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
536 for (int tl = 0; tl < ts_number_layers; ++tl) {
537 i = sl * ts_number_layers + tl;
538 if (tl > 0) {
539 rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
540 rc->layer_pfb[i] =
541 1000.0 *
542 (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
543 (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
545 rc->layer_input_frames[tl] = 0;
546 rc->layer_enc_frames[tl] = 0;
547 rc->layer_encoding_bitrate[i] = 0.0;
548 rc->layer_avg_frame_size[i] = 0.0;
549 rc->layer_avg_rate_mismatch[i] = 0.0;
552 rc->window_count = 0;
553 rc->window_size = 15;
554 rc->avg_st_encoding_bitrate = 0.0;
555 rc->variance_st_encoding_bitrate = 0.0;
558 static void printout_rate_control_summary(struct RateControlMetrics *rc,
559 int frame_cnt, int ss_number_layers,
560 int ts_number_layers) {
561 int tot_num_frames = 0;
562 double perc_fluctuation = 0.0;
563 printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
564 printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
565 for (int sl = 0; sl < ss_number_layers; ++sl) {
566 tot_num_frames = 0;
567 for (int tl = 0; tl < ts_number_layers; ++tl) {
568 int i = sl * ts_number_layers + tl;
569 const int num_dropped =
570 tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
571 : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
572 tot_num_frames += rc->layer_input_frames[tl];
573 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
574 rc->layer_encoding_bitrate[i] /
575 tot_num_frames;
576 rc->layer_avg_frame_size[i] =
577 rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
578 rc->layer_avg_rate_mismatch[i] =
579 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
580 printf("For layer#: %d %d \n", sl, tl);
581 printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
582 rc->layer_encoding_bitrate[i]);
583 printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
584 rc->layer_avg_frame_size[i]);
585 printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
586 printf(
587 "Number of input frames, encoded (non-key) frames, "
588 "and perc dropped frames: %d %d %f\n",
589 rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
590 100.0 * num_dropped / rc->layer_input_frames[tl]);
591 printf("\n");
594 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
595 rc->variance_st_encoding_bitrate =
596 rc->variance_st_encoding_bitrate / rc->window_count -
597 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
598 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
599 rc->avg_st_encoding_bitrate;
600 printf("Short-time stats, for window of %d frames:\n", rc->window_size);
601 printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
602 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
603 perc_fluctuation);
604 if (frame_cnt - 1 != tot_num_frames)
605 die("Error: Number of input frames not equal to output!\n");
608 // Layer pattern configuration.
609 static void set_layer_pattern(
610 int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
611 aom_svc_ref_frame_config_t *ref_frame_config,
612 aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
613 int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
614 // Setting this flag to 1 enables simplex example of
615 // RPS (Reference Picture Selection) for 1 layer.
616 int use_rps_example = 0;
617 int i;
618 int enable_longterm_temporal_ref = 1;
619 int shift = (layering_mode == 8) ? 2 : 0;
620 int simulcast_mode = (layering_mode == 11);
621 *use_svc_control = 1;
622 layer_id->spatial_layer_id = spatial_layer_id;
623 int lag_index = 0;
624 int base_count = superframe_cnt >> 2;
625 ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
626 ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
627 ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
628 // Set the reference map buffer idx for the 7 references:
629 // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
630 // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
631 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
632 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
633 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
635 if (ksvc_mode) {
636 // Same pattern as case 9, but the reference strucutre will be constrained
637 // below.
638 layering_mode = 9;
640 switch (layering_mode) {
641 case 0:
642 if (use_rps_example == 0) {
643 // 1-layer: update LAST on every frame, reference LAST.
644 layer_id->temporal_layer_id = 0;
645 layer_id->spatial_layer_id = 0;
646 ref_frame_config->refresh[0] = 1;
647 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
648 } else {
649 // Pattern of 2 references (ALTREF and GOLDEN) trailing
650 // LAST by 4 and 8 frames, with some switching logic to
651 // sometimes only predict from the longer-term reference
652 //(golden here). This is simple example to test RPS
653 // (reference picture selection).
654 int last_idx = 0;
655 int last_idx_refresh = 0;
656 int gld_idx = 0;
657 int alt_ref_idx = 0;
658 int lag_alt = 4;
659 int lag_gld = 8;
660 layer_id->temporal_layer_id = 0;
661 layer_id->spatial_layer_id = 0;
662 int sh = 8; // slots 0 - 7.
663 // Moving index slot for last: 0 - (sh - 1)
664 if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
665 // Moving index for refresh of last: one ahead for next frame.
666 last_idx_refresh = superframe_cnt % sh;
667 // Moving index for gld_ref, lag behind current by lag_gld
668 if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
669 // Moving index for alt_ref, lag behind LAST by lag_alt frames.
670 if (superframe_cnt > lag_alt)
671 alt_ref_idx = (superframe_cnt - lag_alt) % sh;
672 // Set the ref_idx.
673 // Default all references to slot for last.
674 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
675 ref_frame_config->ref_idx[i] = last_idx;
676 // Set the ref_idx for the relevant references.
677 ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
678 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
679 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
680 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
681 // Refresh this slot, which will become LAST on next frame.
682 ref_frame_config->refresh[last_idx_refresh] = 1;
683 // Reference LAST, ALTREF, and GOLDEN
684 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
685 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
686 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
687 // Switch to only GOLDEN every 300 frames.
688 if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
689 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
690 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
691 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
692 // Test if the long-term is LAST instead, this is just a renaming
693 // but its tests if encoder behaves the same, whether its
694 // LAST or GOLDEN.
695 if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
696 ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
697 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
698 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
699 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
703 break;
704 case 1:
705 // 2-temporal layer.
706 // 1 3 5
707 // 0 2 4
708 // Keep golden fixed at slot 3.
709 base_count = superframe_cnt >> 1;
710 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
711 // Cyclically refresh slots 5, 6, 7, for lag alt ref.
712 lag_index = 5;
713 if (base_count > 0) {
714 lag_index = 5 + (base_count % 3);
715 if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
717 // Set the altref slot to lag_index.
718 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
719 if (superframe_cnt % 2 == 0) {
720 layer_id->temporal_layer_id = 0;
721 // Update LAST on layer 0, reference LAST.
722 ref_frame_config->refresh[0] = 1;
723 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
724 // Refresh lag_index slot, needed for lagging golen.
725 ref_frame_config->refresh[lag_index] = 1;
726 // Refresh GOLDEN every x base layer frames.
727 if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
728 } else {
729 layer_id->temporal_layer_id = 1;
730 // No updates on layer 1, reference LAST (TL0).
731 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
733 // Always reference golden and altref on TL0.
734 if (layer_id->temporal_layer_id == 0) {
735 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
736 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
738 break;
739 case 2:
740 // 3-temporal layer:
741 // 1 3 5 7
742 // 2 6
743 // 0 4 8
744 if (superframe_cnt % 4 == 0) {
745 // Base layer.
746 layer_id->temporal_layer_id = 0;
747 // Update LAST on layer 0, reference LAST.
748 ref_frame_config->refresh[0] = 1;
749 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
750 } else if ((superframe_cnt - 1) % 4 == 0) {
751 layer_id->temporal_layer_id = 2;
752 // First top layer: no updates, only reference LAST (TL0).
753 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
754 } else if ((superframe_cnt - 2) % 4 == 0) {
755 layer_id->temporal_layer_id = 1;
756 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
757 ref_frame_config->refresh[1] = 1;
758 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
759 } else if ((superframe_cnt - 3) % 4 == 0) {
760 layer_id->temporal_layer_id = 2;
761 // Second top layer: no updates, only reference LAST.
762 // Set buffer idx for LAST to slot 1, since that was the slot
763 // updated in previous frame. So LAST is TL1 frame.
764 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
765 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
766 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
768 break;
769 case 3:
770 // 3 TL, same as above, except allow for predicting
771 // off 2 more references (GOLDEN and ALTREF), with
772 // GOLDEN updated periodically, and ALTREF lagging from
773 // LAST from ~4 frames. Both GOLDEN and ALTREF
774 // can only be updated on base temporal layer.
776 // Keep golden fixed at slot 3.
777 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
778 // Cyclically refresh slots 5, 6, 7, for lag altref.
779 lag_index = 5;
780 if (base_count > 0) {
781 lag_index = 5 + (base_count % 3);
782 if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
784 // Set the altref slot to lag_index.
785 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
786 if (superframe_cnt % 4 == 0) {
787 // Base layer.
788 layer_id->temporal_layer_id = 0;
789 // Update LAST on layer 0, reference LAST.
790 ref_frame_config->refresh[0] = 1;
791 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
792 // Refresh GOLDEN every x ~10 base layer frames.
793 if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
794 // Refresh lag_index slot, needed for lagging altref.
795 ref_frame_config->refresh[lag_index] = 1;
796 } else if ((superframe_cnt - 1) % 4 == 0) {
797 layer_id->temporal_layer_id = 2;
798 // First top layer: no updates, only reference LAST (TL0).
799 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
800 } else if ((superframe_cnt - 2) % 4 == 0) {
801 layer_id->temporal_layer_id = 1;
802 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
803 ref_frame_config->refresh[1] = 1;
804 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
805 } else if ((superframe_cnt - 3) % 4 == 0) {
806 layer_id->temporal_layer_id = 2;
807 // Second top layer: no updates, only reference LAST.
808 // Set buffer idx for LAST to slot 1, since that was the slot
809 // updated in previous frame. So LAST is TL1 frame.
810 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
811 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
812 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
814 // Every frame can reference GOLDEN AND ALTREF.
815 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
816 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
817 // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
818 if (speed >= 7) {
819 ref_frame_comp_pred->use_comp_pred[2] = 1;
820 ref_frame_comp_pred->use_comp_pred[0] = 1;
822 break;
823 case 4:
824 // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
825 // only reference GF (not LAST). Other frames only reference LAST.
826 // 1 3 5 7
827 // 2 6
828 // 0 4 8
829 if (superframe_cnt % 4 == 0) {
830 // Base layer.
831 layer_id->temporal_layer_id = 0;
832 // Update LAST on layer 0, only reference LAST.
833 ref_frame_config->refresh[0] = 1;
834 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
835 } else if ((superframe_cnt - 1) % 4 == 0) {
836 layer_id->temporal_layer_id = 2;
837 // First top layer: no updates, only reference LAST (TL0).
838 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
839 } else if ((superframe_cnt - 2) % 4 == 0) {
840 layer_id->temporal_layer_id = 1;
841 // Middle layer (TL1): update GF, only reference LAST (TL0).
842 ref_frame_config->refresh[3] = 1;
843 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
844 } else if ((superframe_cnt - 3) % 4 == 0) {
845 layer_id->temporal_layer_id = 2;
846 // Second top layer: no updates, only reference GF.
847 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
849 break;
850 case 5:
851 // 2 spatial layers, 1 temporal.
852 layer_id->temporal_layer_id = 0;
853 if (layer_id->spatial_layer_id == 0) {
854 // Reference LAST, update LAST.
855 ref_frame_config->refresh[0] = 1;
856 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
857 } else if (layer_id->spatial_layer_id == 1) {
858 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
859 // and GOLDEN to slot 0. Update slot 1 (LAST).
860 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
861 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
862 ref_frame_config->refresh[1] = 1;
863 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
864 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
866 break;
867 case 6:
868 // 3 spatial layers, 1 temporal.
869 // Note for this case, we set the buffer idx for all references to be
870 // either LAST or GOLDEN, which are always valid references, since decoder
871 // will check if any of the 7 references is valid scale in
872 // valid_ref_frame_size().
873 layer_id->temporal_layer_id = 0;
874 if (layer_id->spatial_layer_id == 0) {
875 // Reference LAST, update LAST. Set all buffer_idx to 0.
876 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
877 ref_frame_config->ref_idx[i] = 0;
878 ref_frame_config->refresh[0] = 1;
879 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
880 } else if (layer_id->spatial_layer_id == 1) {
881 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
882 // and GOLDEN (and all other refs) to slot 0.
883 // Update slot 1 (LAST).
884 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
885 ref_frame_config->ref_idx[i] = 0;
886 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
887 ref_frame_config->refresh[1] = 1;
888 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
889 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
890 } else if (layer_id->spatial_layer_id == 2) {
891 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
892 // and GOLDEN (and all other refs) to slot 1.
893 // Update slot 2 (LAST).
894 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
895 ref_frame_config->ref_idx[i] = 1;
896 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
897 ref_frame_config->refresh[2] = 1;
898 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
899 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
900 // For 3 spatial layer case: allow for top spatial layer to use
901 // additional temporal reference. Update every 10 frames.
902 if (enable_longterm_temporal_ref) {
903 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
904 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
905 if (base_count % 10 == 0)
906 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
909 break;
910 case 7:
911 // 2 spatial and 3 temporal layer.
912 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
913 if (superframe_cnt % 4 == 0) {
914 // Base temporal layer
915 layer_id->temporal_layer_id = 0;
916 if (layer_id->spatial_layer_id == 0) {
917 // Reference LAST, update LAST
918 // Set all buffer_idx to 0
919 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
920 ref_frame_config->ref_idx[i] = 0;
921 ref_frame_config->refresh[0] = 1;
922 } else if (layer_id->spatial_layer_id == 1) {
923 // Reference LAST and GOLDEN.
924 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
925 ref_frame_config->ref_idx[i] = 0;
926 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
927 ref_frame_config->refresh[1] = 1;
929 } else if ((superframe_cnt - 1) % 4 == 0) {
930 // First top temporal enhancement layer.
931 layer_id->temporal_layer_id = 2;
932 if (layer_id->spatial_layer_id == 0) {
933 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
934 ref_frame_config->ref_idx[i] = 0;
935 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
936 ref_frame_config->refresh[3] = 1;
937 } else if (layer_id->spatial_layer_id == 1) {
938 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
939 // GOLDEN (and all other refs) to slot 3.
940 // No update.
941 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
942 ref_frame_config->ref_idx[i] = 3;
943 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
945 } else if ((superframe_cnt - 2) % 4 == 0) {
946 // Middle temporal enhancement layer.
947 layer_id->temporal_layer_id = 1;
948 if (layer_id->spatial_layer_id == 0) {
949 // Reference LAST.
950 // Set all buffer_idx to 0.
951 // Set GOLDEN to slot 5 and update slot 5.
952 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
953 ref_frame_config->ref_idx[i] = 0;
954 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
955 ref_frame_config->refresh[5 - shift] = 1;
956 } else if (layer_id->spatial_layer_id == 1) {
957 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
958 // GOLDEN (and all other refs) to slot 5.
959 // Set LAST3 to slot 6 and update slot 6.
960 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
961 ref_frame_config->ref_idx[i] = 5 - shift;
962 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
963 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
964 ref_frame_config->refresh[6 - shift] = 1;
966 } else if ((superframe_cnt - 3) % 4 == 0) {
967 // Second top temporal enhancement layer.
968 layer_id->temporal_layer_id = 2;
969 if (layer_id->spatial_layer_id == 0) {
970 // Set LAST to slot 5 and reference LAST.
971 // Set GOLDEN to slot 3 and update slot 3.
972 // Set all other buffer_idx to 0.
973 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
974 ref_frame_config->ref_idx[i] = 0;
975 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
976 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
977 ref_frame_config->refresh[3] = 1;
978 } else if (layer_id->spatial_layer_id == 1) {
979 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
980 // GOLDEN to slot 3. No update.
981 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
982 ref_frame_config->ref_idx[i] = 0;
983 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
984 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
987 break;
988 case 8:
989 // 3 spatial and 3 temporal layer.
990 // Same as case 9 but overalap in the buffer slot updates.
991 // (shift = 2). The slots 3 and 4 updated by first TL2 are
992 // reused for update in TL1 superframe.
993 // Note for this case, frame order hint must be disabled for
994 // lower resolutios (operating points > 0) to be decoedable.
995 case 9:
996 // 3 spatial and 3 temporal layer.
997 // No overlap in buffer updates between TL2 and TL1.
998 // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
999 // Set the references via the svc_ref_frame_config control.
1000 // Always reference LAST.
1001 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1002 if (superframe_cnt % 4 == 0) {
1003 // Base temporal layer.
1004 layer_id->temporal_layer_id = 0;
1005 if (layer_id->spatial_layer_id == 0) {
1006 // Reference LAST, update LAST.
1007 // Set all buffer_idx to 0.
1008 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1009 ref_frame_config->ref_idx[i] = 0;
1010 ref_frame_config->refresh[0] = 1;
1011 } else if (layer_id->spatial_layer_id == 1) {
1012 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1013 // GOLDEN (and all other refs) to slot 0.
1014 // Update slot 1 (LAST).
1015 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1016 ref_frame_config->ref_idx[i] = 0;
1017 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1018 ref_frame_config->refresh[1] = 1;
1019 } else if (layer_id->spatial_layer_id == 2) {
1020 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1021 // GOLDEN (and all other refs) to slot 1.
1022 // Update slot 2 (LAST).
1023 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1024 ref_frame_config->ref_idx[i] = 1;
1025 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1026 ref_frame_config->refresh[2] = 1;
1028 } else if ((superframe_cnt - 1) % 4 == 0) {
1029 // First top temporal enhancement layer.
1030 layer_id->temporal_layer_id = 2;
1031 if (layer_id->spatial_layer_id == 0) {
1032 // Reference LAST (slot 0).
1033 // Set GOLDEN to slot 3 and update slot 3.
1034 // Set all other buffer_idx to slot 0.
1035 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1036 ref_frame_config->ref_idx[i] = 0;
1037 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1038 ref_frame_config->refresh[3] = 1;
1039 } else if (layer_id->spatial_layer_id == 1) {
1040 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1041 // GOLDEN (and all other refs) to slot 3.
1042 // Set LAST2 to slot 4 and Update slot 4.
1043 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1044 ref_frame_config->ref_idx[i] = 3;
1045 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1046 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1047 ref_frame_config->refresh[4] = 1;
1048 } else if (layer_id->spatial_layer_id == 2) {
1049 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1050 // GOLDEN (and all other refs) to slot 4.
1051 // No update.
1052 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1053 ref_frame_config->ref_idx[i] = 4;
1054 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1056 } else if ((superframe_cnt - 2) % 4 == 0) {
1057 // Middle temporal enhancement layer.
1058 layer_id->temporal_layer_id = 1;
1059 if (layer_id->spatial_layer_id == 0) {
1060 // Reference LAST.
1061 // Set all buffer_idx to 0.
1062 // Set GOLDEN to slot 5 and update slot 5.
1063 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1064 ref_frame_config->ref_idx[i] = 0;
1065 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1066 ref_frame_config->refresh[5 - shift] = 1;
1067 } else if (layer_id->spatial_layer_id == 1) {
1068 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1069 // GOLDEN (and all other refs) to slot 5.
1070 // Set LAST3 to slot 6 and update slot 6.
1071 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1072 ref_frame_config->ref_idx[i] = 5 - shift;
1073 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1074 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1075 ref_frame_config->refresh[6 - shift] = 1;
1076 } else if (layer_id->spatial_layer_id == 2) {
1077 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1078 // GOLDEN (and all other refs) to slot 6.
1079 // Set LAST3 to slot 7 and update slot 7.
1080 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1081 ref_frame_config->ref_idx[i] = 6 - shift;
1082 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1083 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1084 ref_frame_config->refresh[7 - shift] = 1;
1086 } else if ((superframe_cnt - 3) % 4 == 0) {
1087 // Second top temporal enhancement layer.
1088 layer_id->temporal_layer_id = 2;
1089 if (layer_id->spatial_layer_id == 0) {
1090 // Set LAST to slot 5 and reference LAST.
1091 // Set GOLDEN to slot 3 and update slot 3.
1092 // Set all other buffer_idx to 0.
1093 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1094 ref_frame_config->ref_idx[i] = 0;
1095 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1096 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1097 ref_frame_config->refresh[3] = 1;
1098 } else if (layer_id->spatial_layer_id == 1) {
1099 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1100 // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1101 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1102 ref_frame_config->ref_idx[i] = 0;
1103 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1104 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1105 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1106 ref_frame_config->refresh[4] = 1;
1107 } else if (layer_id->spatial_layer_id == 2) {
1108 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1109 // GOLDEN to slot 4. No update.
1110 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1111 ref_frame_config->ref_idx[i] = 0;
1112 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1113 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1116 break;
1117 case 11:
1118 // Simulcast mode for 3 spatial and 3 temporal layers.
1119 // No inter-layer predicton, only prediction is temporal and single
1120 // reference (LAST).
1121 // No overlap in buffer slots between spatial layers. So for example,
1122 // SL0 only uses slots 0 and 1.
1123 // SL1 only uses slots 2 and 3.
1124 // SL2 only uses slots 4 and 5.
1125 // All 7 references for each inter-frame must only access buffer slots
1126 // for that spatial layer.
1127 // On key (super)frames: SL1 and SL2 must have no references set
1128 // and must refresh all the slots for that layer only (so 2 and 3
1129 // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1130 // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1131 // internally as Intra-only frames that allow that stream to be decoded.
1132 // These conditions will allow for each spatial stream to be
1133 // independently decodeable.
1135 // Initialize all references to 0 (don't use reference).
1136 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1137 ref_frame_config->reference[i] = 0;
1138 // Initialize as no refresh/update for all slots.
1139 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1140 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1141 ref_frame_config->ref_idx[i] = 0;
1143 if (is_key_frame) {
1144 if (layer_id->spatial_layer_id == 0) {
1145 // Assign LAST/GOLDEN to slot 0/1.
1146 // Refesh slots 0 and 1 for SL0.
1147 // SL0: this will get set to KEY frame internally.
1148 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1149 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1150 ref_frame_config->refresh[0] = 1;
1151 ref_frame_config->refresh[1] = 1;
1152 } else if (layer_id->spatial_layer_id == 1) {
1153 // Assign LAST/GOLDEN to slot 2/3.
1154 // Refesh slots 2 and 3 for SL1.
1155 // This will get set to Intra-only frame internally.
1156 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1157 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1158 ref_frame_config->refresh[2] = 1;
1159 ref_frame_config->refresh[3] = 1;
1160 } else if (layer_id->spatial_layer_id == 2) {
1161 // Assign LAST/GOLDEN to slot 4/5.
1162 // Refresh slots 4 and 5 for SL2.
1163 // This will get set to Intra-only frame internally.
1164 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1165 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1166 ref_frame_config->refresh[4] = 1;
1167 ref_frame_config->refresh[5] = 1;
1169 } else if (superframe_cnt % 4 == 0) {
1170 // Base temporal layer: TL0
1171 layer_id->temporal_layer_id = 0;
1172 if (layer_id->spatial_layer_id == 0) { // SL0
1173 // Reference LAST. Assign all references to either slot
1174 // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1175 // Update slot 0 (LAST).
1176 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1177 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1178 ref_frame_config->ref_idx[i] = 1;
1179 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1180 ref_frame_config->refresh[0] = 1;
1181 } else if (layer_id->spatial_layer_id == 1) { // SL1
1182 // Reference LAST. Assign all references to either slot
1183 // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1184 // Update slot 2 (LAST).
1185 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1186 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1187 ref_frame_config->ref_idx[i] = 3;
1188 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1189 ref_frame_config->refresh[2] = 1;
1190 } else if (layer_id->spatial_layer_id == 2) { // SL2
1191 // Reference LAST. Assign all references to either slot
1192 // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1193 // Update slot 4 (LAST).
1194 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1195 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1196 ref_frame_config->ref_idx[i] = 5;
1197 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1198 ref_frame_config->refresh[4] = 1;
1200 } else if ((superframe_cnt - 1) % 4 == 0) {
1201 // First top temporal enhancement layer: TL2
1202 layer_id->temporal_layer_id = 2;
1203 if (layer_id->spatial_layer_id == 0) { // SL0
1204 // Reference LAST (slot 0). Assign other references to slot 1.
1205 // No update/refresh on any slots.
1206 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1207 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1208 ref_frame_config->ref_idx[i] = 1;
1209 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1210 } else if (layer_id->spatial_layer_id == 1) { // SL1
1211 // Reference LAST (slot 2). Assign other references to slot 3.
1212 // No update/refresh on any slots.
1213 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1214 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1215 ref_frame_config->ref_idx[i] = 3;
1216 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1217 } else if (layer_id->spatial_layer_id == 2) { // SL2
1218 // Reference LAST (slot 4). Assign other references to slot 4.
1219 // No update/refresh on any slots.
1220 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1221 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1222 ref_frame_config->ref_idx[i] = 5;
1223 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1225 } else if ((superframe_cnt - 2) % 4 == 0) {
1226 // Middle temporal enhancement layer: TL1
1227 layer_id->temporal_layer_id = 1;
1228 if (layer_id->spatial_layer_id == 0) { // SL0
1229 // Reference LAST (slot 0).
1230 // Set GOLDEN to slot 1 and update slot 1.
1231 // This will be used as reference for next TL2.
1232 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1233 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1234 ref_frame_config->ref_idx[i] = 1;
1235 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1236 ref_frame_config->refresh[1] = 1;
1237 } else if (layer_id->spatial_layer_id == 1) { // SL1
1238 // Reference LAST (slot 2).
1239 // Set GOLDEN to slot 3 and update slot 3.
1240 // This will be used as reference for next TL2.
1241 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1242 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1243 ref_frame_config->ref_idx[i] = 3;
1244 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1245 ref_frame_config->refresh[3] = 1;
1246 } else if (layer_id->spatial_layer_id == 2) { // SL2
1247 // Reference LAST (slot 4).
1248 // Set GOLDEN to slot 5 and update slot 5.
1249 // This will be used as reference for next TL2.
1250 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1251 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1252 ref_frame_config->ref_idx[i] = 5;
1253 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1254 ref_frame_config->refresh[5] = 1;
1256 } else if ((superframe_cnt - 3) % 4 == 0) {
1257 // Second top temporal enhancement layer: TL2
1258 layer_id->temporal_layer_id = 2;
1259 if (layer_id->spatial_layer_id == 0) { // SL0
1260 // Reference LAST (slot 1). Assign other references to slot 0.
1261 // No update/refresh on any slots.
1262 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1263 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1264 ref_frame_config->ref_idx[i] = 0;
1265 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1266 } else if (layer_id->spatial_layer_id == 1) { // SL1
1267 // Reference LAST (slot 3). Assign other references to slot 2.
1268 // No update/refresh on any slots.
1269 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1270 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1271 ref_frame_config->ref_idx[i] = 2;
1272 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1273 } else if (layer_id->spatial_layer_id == 2) { // SL2
1274 // Reference LAST (slot 5). Assign other references to slot 4.
1275 // No update/refresh on any slots.
1276 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1277 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1278 ref_frame_config->ref_idx[i] = 4;
1279 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1282 if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1283 // Always reference GOLDEN (inter-layer prediction).
1284 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1285 if (ksvc_mode) {
1286 // KSVC: only keep the inter-layer reference (GOLDEN) for
1287 // superframes whose base is key.
1288 if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1290 if (is_key_frame && layer_id->spatial_layer_id > 1) {
1291 // On superframes whose base is key: remove LAST to avoid prediction
1292 // off layer two levels below.
1293 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1296 // For 3 spatial layer case 8 (where there is free buffer slot):
1297 // allow for top spatial layer to use additional temporal reference.
1298 // Additional reference is only updated on base temporal layer, every
1299 // 10 TL0 frames here.
1300 if (!simulcast_mode && enable_longterm_temporal_ref &&
1301 layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1302 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1303 if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1304 if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1305 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1307 break;
1308 default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1312 #if CONFIG_AV1_DECODER
1313 // Returns whether there is a mismatch between the encoder's new frame and the
1314 // decoder's new frame.
1315 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1316 const int frames_out) {
1317 aom_image_t enc_img, dec_img;
1318 int mismatch = 0;
1320 /* Get the internal new frame */
1321 AOM_CODEC_CONTROL_TYPECHECKED(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img);
1322 AOM_CODEC_CONTROL_TYPECHECKED(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img);
1324 #if CONFIG_AV1_HIGHBITDEPTH
1325 if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1326 (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1327 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1328 aom_image_t enc_hbd_img;
1329 aom_img_alloc(
1330 &enc_hbd_img,
1331 static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1332 enc_img.d_w, enc_img.d_h, 16);
1333 aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1334 enc_img = enc_hbd_img;
1336 if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1337 aom_image_t dec_hbd_img;
1338 aom_img_alloc(
1339 &dec_hbd_img,
1340 static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1341 dec_img.d_w, dec_img.d_h, 16);
1342 aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1343 dec_img = dec_hbd_img;
1346 #endif
1348 if (!aom_compare_img(&enc_img, &dec_img)) {
1349 int y[4], u[4], v[4];
1350 #if CONFIG_AV1_HIGHBITDEPTH
1351 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1352 aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1353 } else {
1354 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1356 #else
1357 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1358 #endif
1359 fprintf(stderr,
1360 "Encode/decode mismatch on frame %d at"
1361 " Y[%d, %d] {%d/%d},"
1362 " U[%d, %d] {%d/%d},"
1363 " V[%d, %d] {%d/%d}\n",
1364 frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1365 v[1], v[2], v[3]);
1366 mismatch = 1;
1369 aom_img_free(&enc_img);
1370 aom_img_free(&dec_img);
1371 return mismatch;
1373 #endif // CONFIG_AV1_DECODER
1375 struct psnr_stats {
1376 // The second element of these arrays is reserved for high bitdepth.
1377 uint64_t psnr_sse_total[2];
1378 uint64_t psnr_samples_total[2];
1379 double psnr_totals[2][4];
1380 int psnr_count[2];
1383 static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1384 double ovpsnr;
1386 if (!psnr_stream->psnr_count[0]) return;
1388 fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1389 ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1390 (double)psnr_stream->psnr_sse_total[0]);
1391 fprintf(stderr, " %.3f", ovpsnr);
1393 for (int i = 0; i < 4; i++) {
1394 fprintf(stderr, " %.3f",
1395 psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1397 fprintf(stderr, "\n");
1400 static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1401 const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1402 aom::AV1RateControlRtcConfig rc_cfg;
1403 rc_cfg.width = cfg.g_w;
1404 rc_cfg.height = cfg.g_h;
1405 rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1406 rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1407 rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1408 rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1409 rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1410 rc_cfg.buf_sz = cfg.rc_buf_sz;
1411 rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1412 rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1413 // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1414 rc_cfg.max_intra_bitrate_pct = 300;
1415 rc_cfg.framerate = cfg.g_timebase.den;
1416 // TODO(jianj): Add suppor for SVC.
1417 rc_cfg.ss_number_layers = 1;
1418 rc_cfg.ts_number_layers = 1;
1419 rc_cfg.scaling_factor_num[0] = 1;
1420 rc_cfg.scaling_factor_den[0] = 1;
1421 rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1422 rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1423 rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1424 rc_cfg.aq_mode = app_input.aq_mode;
1426 return rc_cfg;
1429 static int qindex_to_quantizer(int qindex) {
1430 // Table that converts 0-63 range Q values passed in outside to the 0-255
1431 // range Qindex used internally.
1432 static const int quantizer_to_qindex[] = {
1433 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1434 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1435 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1436 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1437 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1439 for (int quantizer = 0; quantizer < 64; ++quantizer)
1440 if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1442 return 63;
1445 static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1446 aom_codec_ctx_t *codec, int frame_cnt) {
1447 aom_active_map_t map = { 0, 0, 0 };
1449 map.rows = (cfg->g_h + 15) / 16;
1450 map.cols = (cfg->g_w + 15) / 16;
1452 map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1453 if (!map.active_map) die("Failed to allocate active map");
1455 // Example map for testing.
1456 for (unsigned int i = 0; i < map.rows; ++i) {
1457 for (unsigned int j = 0; j < map.cols; ++j) {
1458 int index = map.cols * i + j;
1459 map.active_map[index] = 1;
1460 if (frame_cnt < 300) {
1461 if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1462 } else if (frame_cnt >= 300) {
1463 if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1468 if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1469 die_codec(codec, "Failed to set active map");
1471 free(map.active_map);
1474 int main(int argc, const char **argv) {
1475 AppInput app_input;
1476 AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1477 FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1478 AvxVideoWriter *total_layer_file = NULL;
1479 FILE *total_layer_obu_file = NULL;
1480 aom_codec_enc_cfg_t cfg;
1481 int frame_cnt = 0;
1482 aom_image_t raw;
1483 int frame_avail;
1484 int got_data = 0;
1485 int flags = 0;
1486 int i;
1487 int pts = 0; // PTS starts at 0.
1488 int frame_duration = 1; // 1 timebase tick per frame.
1489 aom_svc_layer_id_t layer_id;
1490 aom_svc_params_t svc_params;
1491 aom_svc_ref_frame_config_t ref_frame_config;
1492 aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1494 #if CONFIG_INTERNAL_STATS
1495 FILE *stats_file = fopen("opsnr.stt", "a");
1496 if (stats_file == NULL) {
1497 die("Cannot open opsnr.stt\n");
1499 #endif
1500 #if CONFIG_AV1_DECODER
1501 aom_codec_ctx_t decoder;
1502 #endif
1504 struct RateControlMetrics rc;
1505 int64_t cx_time = 0;
1506 int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1507 int frame_cnt_layer[AOM_MAX_LAYERS];
1508 double sum_bitrate = 0.0;
1509 double sum_bitrate2 = 0.0;
1510 double framerate = 30.0;
1511 int use_svc_control = 1;
1512 int set_err_resil_frame = 0;
1513 int test_changing_bitrate = 0;
1514 zero(rc.layer_target_bitrate);
1515 memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1516 memset(&app_input, 0, sizeof(AppInput));
1517 memset(&svc_params, 0, sizeof(svc_params));
1519 // Flag to test dynamic scaling of source frames for single
1520 // spatial stream, using the scaling_mode control.
1521 const int test_dynamic_scaling_single_layer = 0;
1523 // Flag to test setting speed per layer.
1524 const int test_speed_per_layer = 0;
1526 // Flag for testing active maps.
1527 const int test_active_maps = 0;
1529 /* Setup default input stream settings */
1530 app_input.input_ctx.framerate.numerator = 30;
1531 app_input.input_ctx.framerate.denominator = 1;
1532 app_input.input_ctx.only_i420 = 0;
1533 app_input.input_ctx.bit_depth = AOM_BITS_8;
1534 app_input.speed = 7;
1535 exec_name = argv[0];
1537 // start with default encoder configuration
1538 aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg,
1539 AOM_USAGE_REALTIME);
1540 if (res != AOM_CODEC_OK) {
1541 die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1544 // Real time parameters.
1545 cfg.g_usage = AOM_USAGE_REALTIME;
1547 cfg.rc_end_usage = AOM_CBR;
1548 cfg.rc_min_quantizer = 2;
1549 cfg.rc_max_quantizer = 52;
1550 cfg.rc_undershoot_pct = 50;
1551 cfg.rc_overshoot_pct = 50;
1552 cfg.rc_buf_initial_sz = 600;
1553 cfg.rc_buf_optimal_sz = 600;
1554 cfg.rc_buf_sz = 1000;
1555 cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1556 cfg.g_lag_in_frames = 0;
1557 cfg.kf_mode = AOM_KF_AUTO;
1559 parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1561 int ts_number_layers = svc_params.number_temporal_layers;
1562 int ss_number_layers = svc_params.number_spatial_layers;
1564 unsigned int width = cfg.g_w;
1565 unsigned int height = cfg.g_h;
1567 if (app_input.layering_mode >= 0) {
1568 if (ts_number_layers !=
1569 mode_to_num_temporal_layers[app_input.layering_mode] ||
1570 ss_number_layers !=
1571 mode_to_num_spatial_layers[app_input.layering_mode]) {
1572 die("Number of layers doesn't match layering mode.");
1576 // Y4M reader has its own allocation.
1577 if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1578 if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1579 die("Failed to allocate image (%dx%d)", width, height);
1583 aom_codec_iface_t *encoder = aom_codec_av1_cx();
1585 memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1586 sizeof(svc_params.layer_target_bitrate));
1588 unsigned int total_rate = 0;
1589 for (i = 0; i < ss_number_layers; i++) {
1590 total_rate +=
1591 svc_params
1592 .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1594 if (total_rate != cfg.rc_target_bitrate) {
1595 die("Incorrect total target bitrate");
1598 svc_params.framerate_factor[0] = 1;
1599 if (ts_number_layers == 2) {
1600 svc_params.framerate_factor[0] = 2;
1601 svc_params.framerate_factor[1] = 1;
1602 } else if (ts_number_layers == 3) {
1603 svc_params.framerate_factor[0] = 4;
1604 svc_params.framerate_factor[1] = 2;
1605 svc_params.framerate_factor[2] = 1;
1608 if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) {
1609 // Override these settings with the info from Y4M file.
1610 cfg.g_w = app_input.input_ctx.width;
1611 cfg.g_h = app_input.input_ctx.height;
1612 // g_timebase is the reciprocal of frame rate.
1613 cfg.g_timebase.num = app_input.input_ctx.framerate.denominator;
1614 cfg.g_timebase.den = app_input.input_ctx.framerate.numerator;
1616 framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1617 set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1619 AvxVideoInfo info;
1620 info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1621 info.frame_width = cfg.g_w;
1622 info.frame_height = cfg.g_h;
1623 info.time_base.numerator = cfg.g_timebase.num;
1624 info.time_base.denominator = cfg.g_timebase.den;
1625 // Open an output file for each stream.
1626 for (int sl = 0; sl < ss_number_layers; ++sl) {
1627 for (int tl = 0; tl < ts_number_layers; ++tl) {
1628 i = sl * ts_number_layers + tl;
1629 char file_name[PATH_MAX];
1630 snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1631 app_input.output_filename, i);
1632 if (app_input.output_obu) {
1633 obu_files[i] = fopen(file_name, "wb");
1634 if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1635 } else {
1636 outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1637 if (!outfile[i]) die("Failed to open %s for writing", file_name);
1641 if (app_input.output_obu) {
1642 total_layer_obu_file = fopen(app_input.output_filename, "wb");
1643 if (!total_layer_obu_file)
1644 die("Failed to open %s for writing", app_input.output_filename);
1645 } else {
1646 total_layer_file =
1647 aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1648 if (!total_layer_file)
1649 die("Failed to open %s for writing", app_input.output_filename);
1652 // Initialize codec.
1653 aom_codec_ctx_t codec;
1654 aom_codec_flags_t flag = 0;
1655 flag |= cfg.g_input_bit_depth == AOM_BITS_8 ? 0 : AOM_CODEC_USE_HIGHBITDEPTH;
1656 flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1657 if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1658 die_codec(&codec, "Failed to initialize encoder");
1660 #if CONFIG_AV1_DECODER
1661 if (app_input.decode) {
1662 if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1663 die_codec(&decoder, "Failed to initialize decoder");
1665 #endif
1667 aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1668 aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1669 aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0);
1670 aom_codec_control(&codec, AV1E_SET_ENABLE_CDEF, 1);
1671 aom_codec_control(&codec, AV1E_SET_LOOPFILTER_CONTROL, 1);
1672 aom_codec_control(&codec, AV1E_SET_ENABLE_WARPED_MOTION, 0);
1673 aom_codec_control(&codec, AV1E_SET_ENABLE_OBMC, 0);
1674 aom_codec_control(&codec, AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
1675 aom_codec_control(&codec, AV1E_SET_ENABLE_ORDER_HINT, 0);
1676 aom_codec_control(&codec, AV1E_SET_ENABLE_TPL_MODEL, 0);
1677 aom_codec_control(&codec, AV1E_SET_DELTAQ_MODE, 0);
1678 aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 3);
1679 aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 3);
1680 aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 3);
1681 aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 3);
1682 aom_codec_control(&codec, AV1E_SET_CDF_UPDATE_MODE, 1);
1684 // Settings to reduce key frame encoding time.
1685 aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 0);
1686 aom_codec_control(&codec, AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
1687 aom_codec_control(&codec, AV1E_SET_ENABLE_ANGLE_DELTA, 0);
1688 aom_codec_control(&codec, AV1E_SET_ENABLE_FILTER_INTRA, 0);
1689 aom_codec_control(&codec, AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
1691 if (cfg.g_threads > 1) {
1692 aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS,
1693 (unsigned int)log2(cfg.g_threads));
1696 aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1697 if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1698 aom_codec_control(&codec, AV1E_SET_ENABLE_PALETTE, 1);
1699 aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 1);
1700 // INTRABC is currently disabled for rt mode, as it's too slow.
1701 aom_codec_control(&codec, AV1E_SET_ENABLE_INTRABC, 0);
1704 if (app_input.use_external_rc) {
1705 aom_codec_control(&codec, AV1E_SET_RTC_EXTERNAL_RC, 1);
1708 aom_codec_control(&codec, AV1E_SET_MAX_CONSEC_FRAME_DROP_CBR, INT_MAX);
1710 aom_codec_control(&codec, AV1E_SET_SVC_FRAME_DROP_MODE,
1711 AOM_FULL_SUPERFRAME_DROP);
1713 svc_params.number_spatial_layers = ss_number_layers;
1714 svc_params.number_temporal_layers = ts_number_layers;
1715 for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1716 svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1717 svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1719 for (i = 0; i < ss_number_layers; ++i) {
1720 svc_params.scaling_factor_num[i] = 1;
1721 svc_params.scaling_factor_den[i] = 1;
1723 if (ss_number_layers == 2) {
1724 svc_params.scaling_factor_num[0] = 1;
1725 svc_params.scaling_factor_den[0] = 2;
1726 } else if (ss_number_layers == 3) {
1727 svc_params.scaling_factor_num[0] = 1;
1728 svc_params.scaling_factor_den[0] = 4;
1729 svc_params.scaling_factor_num[1] = 1;
1730 svc_params.scaling_factor_den[1] = 2;
1732 aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
1733 // TODO(aomedia:3032): Configure KSVC in fixed mode.
1735 // This controls the maximum target size of the key frame.
1736 // For generating smaller key frames, use a smaller max_intra_size_pct
1737 // value, like 100 or 200.
1739 const int max_intra_size_pct = 300;
1740 aom_codec_control(&codec, AOME_SET_MAX_INTRA_BITRATE_PCT,
1741 max_intra_size_pct);
1744 for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
1745 cx_time_layer[lx] = 0;
1746 frame_cnt_layer[lx] = 0;
1749 std::unique_ptr<aom::AV1RateControlRTC> rc_api;
1750 if (app_input.use_external_rc) {
1751 const aom::AV1RateControlRtcConfig rc_cfg =
1752 create_rtc_rc_config(cfg, app_input);
1753 rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
1756 frame_avail = 1;
1757 struct psnr_stats psnr_stream;
1758 memset(&psnr_stream, 0, sizeof(psnr_stream));
1759 while (frame_avail || got_data) {
1760 struct aom_usec_timer timer;
1761 frame_avail = read_frame(&(app_input.input_ctx), &raw);
1762 // Loop over spatial layers.
1763 for (int slx = 0; slx < ss_number_layers; slx++) {
1764 aom_codec_iter_t iter = NULL;
1765 const aom_codec_cx_pkt_t *pkt;
1766 int layer = 0;
1767 // Flag for superframe whose base is key.
1768 int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
1769 // For flexible mode:
1770 if (app_input.layering_mode >= 0) {
1771 // Set the reference/update flags, layer_id, and reference_map
1772 // buffer index.
1773 set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
1774 &ref_frame_config, &ref_frame_comp_pred,
1775 &use_svc_control, slx, is_key_frame,
1776 (app_input.layering_mode == 10), app_input.speed);
1777 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1778 if (use_svc_control) {
1779 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
1780 &ref_frame_config);
1781 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
1782 &ref_frame_comp_pred);
1784 // Set the speed per layer.
1785 if (test_speed_per_layer) {
1786 int speed_per_layer = 10;
1787 if (layer_id.spatial_layer_id == 0) {
1788 if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
1789 if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
1790 if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
1791 } else if (layer_id.spatial_layer_id == 1) {
1792 if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
1793 if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
1794 if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
1795 } else if (layer_id.spatial_layer_id == 2) {
1796 if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
1797 if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
1798 if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
1800 aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
1802 } else {
1803 // Only up to 3 temporal layers supported in fixed mode.
1804 // Only need to set spatial and temporal layer_id: reference
1805 // prediction, refresh, and buffer_idx are set internally.
1806 layer_id.spatial_layer_id = slx;
1807 layer_id.temporal_layer_id = 0;
1808 if (ts_number_layers == 2) {
1809 layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
1810 } else if (ts_number_layers == 3) {
1811 if (frame_cnt % 2 != 0)
1812 layer_id.temporal_layer_id = 2;
1813 else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
1814 layer_id.temporal_layer_id = 1;
1816 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1819 if (set_err_resil_frame && cfg.g_error_resilient == 0) {
1820 // Set error_resilient per frame: off/0 for base layer and
1821 // on/1 for enhancement layer frames.
1822 // Note that this is can only be done on the fly/per-frame/layer
1823 // if the config error_resilience is off/0. See the logic for updating
1824 // in set_encoder_config():
1825 // tool_cfg->error_resilient_mode =
1826 // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
1827 const int err_resil_mode =
1828 layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
1829 aom_codec_control(&codec, AV1E_SET_ERROR_RESILIENT_MODE,
1830 err_resil_mode);
1833 layer = slx * ts_number_layers + layer_id.temporal_layer_id;
1834 if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
1836 if (test_dynamic_scaling_single_layer) {
1837 // Example to scale source down by 2x2, then 4x4, and then back up to
1838 // 2x2, and then back to original.
1839 int frame_2x2 = 200;
1840 int frame_4x4 = 400;
1841 int frame_2x2up = 600;
1842 int frame_orig = 800;
1843 if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
1844 // Scale source down by 2x2.
1845 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1846 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1847 } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
1848 // Scale source down by 4x4.
1849 struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
1850 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1851 } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
1852 // Source back up to 2x2.
1853 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1854 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1855 } else if (frame_cnt >= frame_orig) {
1856 // Source back up to original resolution (no scaling).
1857 struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
1858 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1860 if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
1861 frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
1862 // For dynamic resize testing on single layer: refresh all references
1863 // on the resized frame: this is to avoid decode error:
1864 // if resize goes down by >= 4x4 then libaom decoder will throw an
1865 // error that some reference (even though not used) is beyond the
1866 // limit size (must be smaller than 4x4).
1867 for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
1868 if (use_svc_control) {
1869 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
1870 &ref_frame_config);
1871 aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
1872 &ref_frame_comp_pred);
1877 // Change target_bitrate every other frame.
1878 if (test_changing_bitrate && frame_cnt % 2 == 0) {
1879 if (frame_cnt < 500)
1880 cfg.rc_target_bitrate += 10;
1881 else
1882 cfg.rc_target_bitrate -= 10;
1883 // Do big increase and decrease.
1884 if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
1885 if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
1886 if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
1887 // Call change_config, or bypass with new control.
1888 // res = aom_codec_enc_config_set(&codec, &cfg);
1889 if (aom_codec_control(&codec, AV1E_SET_BITRATE_ONE_PASS_CBR,
1890 cfg.rc_target_bitrate))
1891 die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
1894 if (rc_api) {
1895 aom::AV1FrameParamsRTC frame_params;
1896 // TODO(jianj): Add support for SVC.
1897 frame_params.spatial_layer_id = 0;
1898 frame_params.temporal_layer_id = 0;
1899 frame_params.frame_type =
1900 is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
1901 rc_api->ComputeQP(frame_params);
1902 const int current_qp = rc_api->GetQP();
1903 if (aom_codec_control(&codec, AV1E_SET_QUANTIZER_ONE_PASS,
1904 qindex_to_quantizer(current_qp))) {
1905 die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
1909 if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
1911 // Do the layer encode.
1912 aom_usec_timer_start(&timer);
1913 if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
1914 die_codec(&codec, "Failed to encode frame");
1915 aom_usec_timer_mark(&timer);
1916 cx_time += aom_usec_timer_elapsed(&timer);
1917 cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
1918 frame_cnt_layer[layer] += 1;
1920 got_data = 0;
1921 // For simulcast (mode 11): write out each spatial layer to the file.
1922 int ss_layers_write = (app_input.layering_mode == 11)
1923 ? layer_id.spatial_layer_id + 1
1924 : ss_number_layers;
1925 while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
1926 switch (pkt->kind) {
1927 case AOM_CODEC_CX_FRAME_PKT:
1928 for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
1929 ++sl) {
1930 for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
1931 ++tl) {
1932 int j = sl * ts_number_layers + tl;
1933 if (app_input.output_obu) {
1934 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1935 obu_files[j]);
1936 } else {
1937 aom_video_writer_write_frame(
1938 outfile[j],
1939 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1940 pkt->data.frame.sz, pts);
1942 if (sl == layer_id.spatial_layer_id)
1943 rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
1946 got_data = 1;
1947 // Write everything into the top layer.
1948 if (app_input.output_obu) {
1949 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1950 total_layer_obu_file);
1951 } else {
1952 aom_video_writer_write_frame(
1953 total_layer_file,
1954 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1955 pkt->data.frame.sz, pts);
1957 // Keep count of rate control stats per layer (for non-key).
1958 if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
1959 int j = layer_id.spatial_layer_id * ts_number_layers +
1960 layer_id.temporal_layer_id;
1961 assert(j >= 0);
1962 rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
1963 rc.layer_avg_rate_mismatch[j] +=
1964 fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
1965 rc.layer_pfb[j];
1966 if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
1969 if (rc_api) {
1970 rc_api->PostEncodeUpdate(pkt->data.frame.sz);
1972 // Update for short-time encoding bitrate states, for moving window
1973 // of size rc->window, shifted by rc->window / 2.
1974 // Ignore first window segment, due to key frame.
1975 // For spatial layers: only do this for top/highest SL.
1976 if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
1977 sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1978 rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
1979 if (frame_cnt % rc.window_size == 0) {
1980 rc.window_count += 1;
1981 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
1982 rc.variance_st_encoding_bitrate +=
1983 (sum_bitrate / rc.window_size) *
1984 (sum_bitrate / rc.window_size);
1985 sum_bitrate = 0.0;
1988 // Second shifted window.
1989 if (frame_cnt > rc.window_size + rc.window_size / 2 &&
1990 slx == ss_number_layers - 1) {
1991 sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1992 if (frame_cnt > 2 * rc.window_size &&
1993 frame_cnt % rc.window_size == 0) {
1994 rc.window_count += 1;
1995 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
1996 rc.variance_st_encoding_bitrate +=
1997 (sum_bitrate2 / rc.window_size) *
1998 (sum_bitrate2 / rc.window_size);
1999 sum_bitrate2 = 0.0;
2003 #if CONFIG_AV1_DECODER
2004 if (app_input.decode) {
2005 if (aom_codec_decode(
2006 &decoder,
2007 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2008 pkt->data.frame.sz, NULL))
2009 die_codec(&decoder, "Failed to decode frame");
2011 #endif
2013 break;
2014 case AOM_CODEC_PSNR_PKT:
2015 if (app_input.show_psnr) {
2016 psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2017 psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2018 for (int plane = 0; plane < 4; plane++) {
2019 psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2021 psnr_stream.psnr_count[0]++;
2023 break;
2024 default: break;
2027 #if CONFIG_AV1_DECODER
2028 if (got_data && app_input.decode) {
2029 // Don't look for mismatch on top spatial and top temporal layers as
2030 // they are non reference frames.
2031 if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2032 !(layer_id.temporal_layer_id > 0 &&
2033 layer_id.temporal_layer_id == ts_number_layers - 1)) {
2034 if (test_decode(&codec, &decoder, frame_cnt)) {
2035 #if CONFIG_INTERNAL_STATS
2036 fprintf(stats_file, "First mismatch occurred in frame %d\n",
2037 frame_cnt);
2038 fclose(stats_file);
2039 #endif
2040 fatal("Mismatch seen");
2044 #endif
2045 } // loop over spatial layers
2046 ++frame_cnt;
2047 pts += frame_duration;
2050 close_input_file(&(app_input.input_ctx));
2051 printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2052 ts_number_layers);
2054 printf("\n");
2055 for (int slx = 0; slx < ss_number_layers; slx++)
2056 for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2057 int lx = slx * ts_number_layers + tlx;
2058 printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2059 slx, tlx, frame_cnt_layer[lx],
2060 (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2061 1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2064 printf("\n");
2065 printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2066 frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2067 1000000 * (double)frame_cnt / (double)cx_time);
2069 if (app_input.show_psnr) {
2070 show_psnr(&psnr_stream, 255.0);
2073 if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2075 #if CONFIG_AV1_DECODER
2076 if (app_input.decode) {
2077 if (aom_codec_destroy(&decoder))
2078 die_codec(&decoder, "Failed to destroy decoder");
2080 #endif
2082 #if CONFIG_INTERNAL_STATS
2083 fprintf(stats_file, "No mismatch detected in recon buffers\n");
2084 fclose(stats_file);
2085 #endif
2087 // Try to rewrite the output file headers with the actual frame count.
2088 for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2089 aom_video_writer_close(outfile[i]);
2090 aom_video_writer_close(total_layer_file);
2092 if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
2093 aom_img_free(&raw);
2095 return EXIT_SUCCESS;