BITSTREAM CLARIFICATION: Forbid referencing across color spaces.
[aom.git] / examples / vp8_multi_resolution_encoder.c
blob7c050fa3eb091f6a9ffae70c94dab2e11b80b1e1
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
12 // This is an example demonstrating multi-resolution encoding in VP8.
13 // High-resolution input video is down-sampled to lower-resolutions. The
14 // encoder then encodes the video and outputs multiple bitstreams with
15 // different resolutions.
17 // Configure with --enable-multi-res-encoding flag to enable this example.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
23 #include "third_party/libyuv/include/libyuv/basic_types.h"
24 #include "third_party/libyuv/include/libyuv/scale.h"
25 #include "third_party/libyuv/include/libyuv/cpu_id.h"
27 #define VPX_CODEC_DISABLE_COMPAT 1
28 #include "vpx/vpx_encoder.h"
29 #include "vpx/vp8cx.h"
31 #include "./tools_common.h"
32 #include "./video_writer.h"
34 // The input video frame is downsampled several times to generate a
35 // multi-level hierarchical structure. kNumEncoders is defined as the number
36 // of encoding levels required. For example, if the size of input video is
37 // 1280x720, kNumEncoders is 3, and down-sampling factor is 2, the encoder
38 // outputs 3 bitstreams with resolution of 1280x720(level 0),
39 // 640x360(level 1), and 320x180(level 2) respectively.
40 #define kNumEncoders 3
42 static const char *exec_name;
44 void usage_exit() {
45 fprintf(stderr,
46 "Usage: %s <width> <height> <infile> <outfile(s)> <output psnr?>\n",
47 exec_name);
48 exit(EXIT_FAILURE);
51 int main(int argc, char *argv[]) {
52 int frame_cnt = 0;
53 FILE *infile = NULL;
54 VpxVideoWriter *writers[kNumEncoders];
55 vpx_codec_ctx_t codec[kNumEncoders];
56 vpx_codec_enc_cfg_t cfg[kNumEncoders];
57 vpx_image_t raw[kNumEncoders];
58 const VpxInterface *const encoder = get_vpx_encoder_by_name("vp8");
59 // Currently, only realtime mode is supported in multi-resolution encoding.
60 const int arg_deadline = VPX_DL_REALTIME;
61 int i;
62 int width = 0;
63 int height = 0;
64 int frame_avail = 0;
65 int got_data = 0;
67 // Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you
68 // don't need to know PSNR, which will skip PSNR calculation and save
69 // encoding time.
70 int show_psnr = 0;
71 uint64_t psnr_sse_total[kNumEncoders] = {0};
72 uint64_t psnr_samples_total[kNumEncoders] = {0};
73 double psnr_totals[kNumEncoders][4] = {{0, 0}};
74 int psnr_count[kNumEncoders] = {0};
76 // Set the required target bitrates for each resolution level.
77 // If target bitrate for highest-resolution level is set to 0,
78 // (i.e. target_bitrate[0]=0), we skip encoding at that level.
79 unsigned int target_bitrate[kNumEncoders] = {1000, 500, 100};
81 // Enter the frame rate of the input video.
82 const int framerate = 30;
83 // Set down-sampling factor for each resolution level.
84 // dsf[0] controls down sampling from level 0 to level 1;
85 // dsf[1] controls down sampling from level 1 to level 2;
86 // dsf[2] is not used.
87 vpx_rational_t dsf[kNumEncoders] = {{2, 1}, {2, 1}, {1, 1}};
89 exec_name = argv[0];
91 if (!encoder)
92 die("Unsupported codec.");
94 // exe_name, input width, input height, input file,
95 // output file 1, output file 2, output file 3, psnr on/off
96 if (argc != (5 + kNumEncoders))
97 die("Invalid number of input options.");
99 printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
101 width = strtol(argv[1], NULL, 0);
102 height = strtol(argv[2], NULL, 0);
104 if (width < 16 || width % 2 || height < 16 || height % 2)
105 die("Invalid resolution: %ldx%ld", width, height);
107 // Open input video file for encoding
108 if (!(infile = fopen(argv[3], "rb")))
109 die("Failed to open %s for reading", argv[3]);
111 show_psnr = strtol(argv[kNumEncoders + 4], NULL, 0);
113 // Populate default encoder configuration
114 for (i = 0; i < kNumEncoders; ++i) {
115 vpx_codec_err_t res =
116 vpx_codec_enc_config_default(encoder->codec_interface(), &cfg[i], 0);
117 if (res != VPX_CODEC_OK) {
118 printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
119 return EXIT_FAILURE;
123 // Update the default configuration according to needs of the application.
124 // Highest-resolution encoder settings
125 cfg[0].g_w = width;
126 cfg[0].g_h = height;
127 cfg[0].g_threads = 1;
128 cfg[0].rc_dropframe_thresh = 30;
129 cfg[0].rc_end_usage = VPX_CBR;
130 cfg[0].rc_resize_allowed = 0;
131 cfg[0].rc_min_quantizer = 4;
132 cfg[0].rc_max_quantizer = 56;
133 cfg[0].rc_undershoot_pct = 98;
134 cfg[0].rc_overshoot_pct = 100;
135 cfg[0].rc_buf_initial_sz = 500;
136 cfg[0].rc_buf_optimal_sz = 600;
137 cfg[0].rc_buf_sz = 1000;
138 cfg[0].g_error_resilient = 1;
139 cfg[0].g_lag_in_frames = 0;
140 cfg[0].kf_mode = VPX_KF_AUTO; // VPX_KF_DISABLED
141 cfg[0].kf_min_dist = 3000;
142 cfg[0].kf_max_dist = 3000;
143 cfg[0].rc_target_bitrate = target_bitrate[0];
144 cfg[0].g_timebase.num = 1;
145 cfg[0].g_timebase.den = framerate;
147 // Other-resolution encoder settings
148 for (i = 1; i < kNumEncoders; ++i) {
149 cfg[i] = cfg[0];
150 cfg[i].g_threads = 1;
151 cfg[i].rc_target_bitrate = target_bitrate[i];
153 // Note: Width & height of other-resolution encoders are calculated
154 // from the highest-resolution encoder's size and the corresponding
155 // down_sampling_factor.
157 unsigned int iw = cfg[i - 1].g_w * dsf[i - 1].den + dsf[i - 1].num - 1;
158 unsigned int ih = cfg[i - 1].g_h * dsf[i - 1].den + dsf[i - 1].num - 1;
159 cfg[i].g_w = iw / dsf[i - 1].num;
160 cfg[i].g_h = ih / dsf[i - 1].num;
163 // Make width & height to be multiplier of 2.
164 if ((cfg[i].g_w) % 2)
165 cfg[i].g_w++;
167 if ((cfg[i].g_h) % 2)
168 cfg[i].g_h++;
171 // Open output file for each encoder to output bitstreams
172 for (i = 0; i < kNumEncoders; ++i) {
173 VpxVideoInfo info = {
174 encoder->fourcc,
175 cfg[i].g_w,
176 cfg[i].g_h,
177 {cfg[i].g_timebase.num, cfg[i].g_timebase.den}
180 if (!(writers[i] = vpx_video_writer_open(argv[i+4], kContainerIVF, &info)))
181 die("Failed to open %s for writing", argv[i+4]);
184 // Allocate image for each encoder
185 for (i = 0; i < kNumEncoders; ++i)
186 if (!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
187 die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
189 // Initialize multi-encoder
190 if (vpx_codec_enc_init_multi(&codec[0], encoder->codec_interface(), &cfg[0],
191 kNumEncoders,
192 show_psnr ? VPX_CODEC_USE_PSNR : 0, &dsf[0]))
193 die_codec(&codec[0], "Failed to initialize encoder");
195 // The extra encoding configuration parameters can be set as follows.
196 for (i = 0; i < kNumEncoders; i++) {
197 // Set encoding speed
198 if (vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, -6))
199 die_codec(&codec[i], "Failed to set cpu_used");
201 // Set static threshold.
202 if (vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1))
203 die_codec(&codec[i], "Failed to set static threshold");
205 // Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING
206 // Enable denoising for the highest-resolution encoder.
207 if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, i == 0))
208 die_codec(&codec[0], "Failed to set noise_sensitivity");
211 frame_avail = 1;
212 got_data = 0;
214 while (frame_avail || got_data) {
215 vpx_codec_iter_t iter[kNumEncoders] = {NULL};
216 const vpx_codec_cx_pkt_t *pkt[kNumEncoders];
218 frame_avail = vpx_img_read(&raw[0], infile);
220 if (frame_avail) {
221 for (i = 1; i < kNumEncoders; ++i) {
222 vpx_image_t *const prev = &raw[i - 1];
224 // Scale the image down a number of times by downsampling factor
225 // FilterMode 1 or 2 give better psnr than FilterMode 0.
226 I420Scale(prev->planes[VPX_PLANE_Y], prev->stride[VPX_PLANE_Y],
227 prev->planes[VPX_PLANE_U], prev->stride[VPX_PLANE_U],
228 prev->planes[VPX_PLANE_V], prev->stride[VPX_PLANE_V],
229 prev->d_w, prev->d_h,
230 raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
231 raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
232 raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
233 raw[i].d_w, raw[i].d_h, 1);
237 // Encode frame.
238 if (vpx_codec_encode(&codec[0], frame_avail? &raw[0] : NULL,
239 frame_cnt, 1, 0, arg_deadline)) {
240 die_codec(&codec[0], "Failed to encode frame");
243 for (i = kNumEncoders - 1; i >= 0; i--) {
244 got_data = 0;
246 while ((pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i]))) {
247 got_data = 1;
248 switch (pkt[i]->kind) {
249 case VPX_CODEC_CX_FRAME_PKT:
250 vpx_video_writer_write_frame(writers[i], pkt[i]->data.frame.buf,
251 pkt[i]->data.frame.sz, frame_cnt - 1);
252 break;
253 case VPX_CODEC_PSNR_PKT:
254 if (show_psnr) {
255 int j;
256 psnr_sse_total[i] += pkt[i]->data.psnr.sse[0];
257 psnr_samples_total[i] += pkt[i]->data.psnr.samples[0];
258 for (j = 0; j < 4; j++)
259 psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j];
260 psnr_count[i]++;
262 break;
263 default:
264 break;
266 printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT &&
267 (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
268 fflush(stdout);
271 frame_cnt++;
273 printf("\n");
275 fclose(infile);
277 printf("Processed %d frames.\n", frame_cnt - 1);
278 for (i = 0; i < kNumEncoders; ++i) {
279 // Calculate PSNR and print it out
280 if (show_psnr && psnr_count[i] > 0) {
281 int j;
282 double ovpsnr = sse_to_psnr(psnr_samples_total[i], 255.0,
283 psnr_sse_total[i]);
285 fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i);
286 fprintf(stderr, " %.3lf", ovpsnr);
287 for (j = 0; j < 4; j++)
288 fprintf(stderr, " %.3lf", psnr_totals[i][j]/psnr_count[i]);
291 if (vpx_codec_destroy(&codec[i]))
292 die_codec(&codec[i], "Failed to destroy codec");
294 vpx_img_free(&raw[i]);
295 vpx_video_writer_close(writers[i]);
297 printf("\n");
299 return EXIT_SUCCESS;