avprobe: also output dar/par if only defined in stream
[FFMpeg-mirror/mplayer-patches.git] / libavcodec / proresenc.c
blobc4716d6c91c147e8c1ac5bec31432c5f457f8b4c
1 /*
2 * Apple ProRes encoder
4 * Copyright (c) 2012 Konstantin Shishkov
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/opt.h"
24 #include "avcodec.h"
25 #include "put_bits.h"
26 #include "bytestream.h"
27 #include "internal.h"
28 #include "proresdsp.h"
29 #include "proresdata.h"
31 #define CFACTOR_Y422 2
32 #define CFACTOR_Y444 3
34 #define MAX_MBS_PER_SLICE 8
36 #define MAX_PLANES 3 // should be increased to 4 when there's AV_PIX_FMT_YUV444AP10
38 enum {
39 PRORES_PROFILE_PROXY = 0,
40 PRORES_PROFILE_LT,
41 PRORES_PROFILE_STANDARD,
42 PRORES_PROFILE_HQ,
45 enum {
46 QUANT_MAT_PROXY = 0,
47 QUANT_MAT_LT,
48 QUANT_MAT_STANDARD,
49 QUANT_MAT_HQ,
50 QUANT_MAT_DEFAULT,
53 static const uint8_t prores_quant_matrices[][64] = {
54 { // proxy
55 4, 7, 9, 11, 13, 14, 15, 63,
56 7, 7, 11, 12, 14, 15, 63, 63,
57 9, 11, 13, 14, 15, 63, 63, 63,
58 11, 11, 13, 14, 63, 63, 63, 63,
59 11, 13, 14, 63, 63, 63, 63, 63,
60 13, 14, 63, 63, 63, 63, 63, 63,
61 13, 63, 63, 63, 63, 63, 63, 63,
62 63, 63, 63, 63, 63, 63, 63, 63,
64 { // LT
65 4, 5, 6, 7, 9, 11, 13, 15,
66 5, 5, 7, 8, 11, 13, 15, 17,
67 6, 7, 9, 11, 13, 15, 15, 17,
68 7, 7, 9, 11, 13, 15, 17, 19,
69 7, 9, 11, 13, 14, 16, 19, 23,
70 9, 11, 13, 14, 16, 19, 23, 29,
71 9, 11, 13, 15, 17, 21, 28, 35,
72 11, 13, 16, 17, 21, 28, 35, 41,
74 { // standard
75 4, 4, 5, 5, 6, 7, 7, 9,
76 4, 4, 5, 6, 7, 7, 9, 9,
77 5, 5, 6, 7, 7, 9, 9, 10,
78 5, 5, 6, 7, 7, 9, 9, 10,
79 5, 6, 7, 7, 8, 9, 10, 12,
80 6, 7, 7, 8, 9, 10, 12, 15,
81 6, 7, 7, 9, 10, 11, 14, 17,
82 7, 7, 9, 10, 11, 14, 17, 21,
84 { // high quality
85 4, 4, 4, 4, 4, 4, 4, 4,
86 4, 4, 4, 4, 4, 4, 4, 4,
87 4, 4, 4, 4, 4, 4, 4, 4,
88 4, 4, 4, 4, 4, 4, 4, 5,
89 4, 4, 4, 4, 4, 4, 5, 5,
90 4, 4, 4, 4, 4, 5, 5, 6,
91 4, 4, 4, 4, 5, 5, 6, 7,
92 4, 4, 4, 4, 5, 6, 7, 7,
94 { // codec default
95 4, 4, 4, 4, 4, 4, 4, 4,
96 4, 4, 4, 4, 4, 4, 4, 4,
97 4, 4, 4, 4, 4, 4, 4, 4,
98 4, 4, 4, 4, 4, 4, 4, 4,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
106 #define NUM_MB_LIMITS 4
107 static const int prores_mb_limits[NUM_MB_LIMITS] = {
108 1620, // up to 720x576
109 2700, // up to 960x720
110 6075, // up to 1440x1080
111 9216, // up to 2048x1152
114 static const struct prores_profile {
115 const char *full_name;
116 uint32_t tag;
117 int min_quant;
118 int max_quant;
119 int br_tab[NUM_MB_LIMITS];
120 int quant;
121 } prores_profile_info[4] = {
123 .full_name = "proxy",
124 .tag = MKTAG('a', 'p', 'c', 'o'),
125 .min_quant = 4,
126 .max_quant = 8,
127 .br_tab = { 300, 242, 220, 194 },
128 .quant = QUANT_MAT_PROXY,
131 .full_name = "LT",
132 .tag = MKTAG('a', 'p', 'c', 's'),
133 .min_quant = 1,
134 .max_quant = 9,
135 .br_tab = { 720, 560, 490, 440 },
136 .quant = QUANT_MAT_LT,
139 .full_name = "standard",
140 .tag = MKTAG('a', 'p', 'c', 'n'),
141 .min_quant = 1,
142 .max_quant = 6,
143 .br_tab = { 1050, 808, 710, 632 },
144 .quant = QUANT_MAT_STANDARD,
147 .full_name = "high quality",
148 .tag = MKTAG('a', 'p', 'c', 'h'),
149 .min_quant = 1,
150 .max_quant = 6,
151 .br_tab = { 1566, 1216, 1070, 950 },
152 .quant = QUANT_MAT_HQ,
154 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
157 #define TRELLIS_WIDTH 16
158 #define SCORE_LIMIT INT_MAX / 2
160 struct TrellisNode {
161 int prev_node;
162 int quant;
163 int bits;
164 int score;
167 #define MAX_STORED_Q 16
169 typedef struct ProresThreadData {
170 DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
171 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
172 int16_t custom_q[64];
173 struct TrellisNode *nodes;
174 } ProresThreadData;
176 typedef struct ProresContext {
177 AVClass *class;
178 DECLARE_ALIGNED(16, DCTELEM, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
179 DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
180 int16_t quants[MAX_STORED_Q][64];
181 int16_t custom_q[64];
182 const uint8_t *quant_mat;
184 ProresDSPContext dsp;
185 ScanTable scantable;
187 int mb_width, mb_height;
188 int mbs_per_slice;
189 int num_chroma_blocks, chroma_factor;
190 int slices_width;
191 int slices_per_picture;
192 int pictures_per_frame; // 1 for progressive, 2 for interlaced
193 int cur_picture_idx;
194 int num_planes;
195 int bits_per_mb;
196 int force_quant;
198 char *vendor;
199 int quant_sel;
201 int frame_size_upper_bound;
203 int profile;
204 const struct prores_profile *profile_info;
206 int *slice_q;
208 ProresThreadData *tdata;
209 } ProresContext;
211 static void get_slice_data(ProresContext *ctx, const uint16_t *src,
212 int linesize, int x, int y, int w, int h,
213 DCTELEM *blocks, uint16_t *emu_buf,
214 int mbs_per_slice, int blocks_per_mb, int is_chroma)
216 const uint16_t *esrc;
217 const int mb_width = 4 * blocks_per_mb;
218 int elinesize;
219 int i, j, k;
221 for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
222 if (x >= w) {
223 memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
224 * sizeof(*blocks));
225 return;
227 if (x + mb_width <= w && y + 16 <= h) {
228 esrc = src;
229 elinesize = linesize;
230 } else {
231 int bw, bh, pix;
233 esrc = emu_buf;
234 elinesize = 16 * sizeof(*emu_buf);
236 bw = FFMIN(w - x, mb_width);
237 bh = FFMIN(h - y, 16);
239 for (j = 0; j < bh; j++) {
240 memcpy(emu_buf + j * 16,
241 (const uint8_t*)src + j * linesize,
242 bw * sizeof(*src));
243 pix = emu_buf[j * 16 + bw - 1];
244 for (k = bw; k < mb_width; k++)
245 emu_buf[j * 16 + k] = pix;
247 for (; j < 16; j++)
248 memcpy(emu_buf + j * 16,
249 emu_buf + (bh - 1) * 16,
250 mb_width * sizeof(*emu_buf));
252 if (!is_chroma) {
253 ctx->dsp.fdct(esrc, elinesize, blocks);
254 blocks += 64;
255 if (blocks_per_mb > 2) {
256 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
257 blocks += 64;
259 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
260 blocks += 64;
261 if (blocks_per_mb > 2) {
262 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
263 blocks += 64;
265 } else {
266 ctx->dsp.fdct(esrc, elinesize, blocks);
267 blocks += 64;
268 ctx->dsp.fdct(esrc + elinesize * 4, elinesize, blocks);
269 blocks += 64;
270 if (blocks_per_mb > 2) {
271 ctx->dsp.fdct(esrc + 8, elinesize, blocks);
272 blocks += 64;
273 ctx->dsp.fdct(esrc + elinesize * 4 + 8, elinesize, blocks);
274 blocks += 64;
278 x += mb_width;
283 * Write an unsigned rice/exp golomb codeword.
285 static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
287 unsigned int rice_order, exp_order, switch_bits, switch_val;
288 int exponent;
290 /* number of prefix bits to switch between Rice and expGolomb */
291 switch_bits = (codebook & 3) + 1;
292 rice_order = codebook >> 5; /* rice code order */
293 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
295 switch_val = switch_bits << rice_order;
297 if (val >= switch_val) {
298 val -= switch_val - (1 << exp_order);
299 exponent = av_log2(val);
301 put_bits(pb, exponent - exp_order + switch_bits, 0);
302 put_bits(pb, exponent + 1, val);
303 } else {
304 exponent = val >> rice_order;
306 if (exponent)
307 put_bits(pb, exponent, 0);
308 put_bits(pb, 1, 1);
309 if (rice_order)
310 put_sbits(pb, rice_order, val);
314 #define GET_SIGN(x) ((x) >> 31)
315 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
317 static void encode_dcs(PutBitContext *pb, DCTELEM *blocks,
318 int blocks_per_slice, int scale)
320 int i;
321 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
323 prev_dc = (blocks[0] - 0x4000) / scale;
324 encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
325 sign = 0;
326 codebook = 3;
327 blocks += 64;
329 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
330 dc = (blocks[0] - 0x4000) / scale;
331 delta = dc - prev_dc;
332 new_sign = GET_SIGN(delta);
333 delta = (delta ^ sign) - sign;
334 code = MAKE_CODE(delta);
335 encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
336 codebook = (code + (code & 1)) >> 1;
337 codebook = FFMIN(codebook, 3);
338 sign = new_sign;
339 prev_dc = dc;
343 static void encode_acs(PutBitContext *pb, DCTELEM *blocks,
344 int blocks_per_slice,
345 int plane_size_factor,
346 const uint8_t *scan, const int16_t *qmat)
348 int idx, i;
349 int run, level, run_cb, lev_cb;
350 int max_coeffs, abs_level;
352 max_coeffs = blocks_per_slice << 6;
353 run_cb = ff_prores_run_to_cb_index[4];
354 lev_cb = ff_prores_lev_to_cb_index[2];
355 run = 0;
357 for (i = 1; i < 64; i++) {
358 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
359 level = blocks[idx] / qmat[scan[i]];
360 if (level) {
361 abs_level = FFABS(level);
362 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
363 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
364 abs_level - 1);
365 put_sbits(pb, 1, GET_SIGN(level));
367 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
368 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
369 run = 0;
370 } else {
371 run++;
377 static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
378 const uint16_t *src, int linesize,
379 int mbs_per_slice, DCTELEM *blocks,
380 int blocks_per_mb, int plane_size_factor,
381 const int16_t *qmat)
383 int blocks_per_slice, saved_pos;
385 saved_pos = put_bits_count(pb);
386 blocks_per_slice = mbs_per_slice * blocks_per_mb;
388 encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
389 encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
390 ctx->scantable.permutated, qmat);
391 flush_put_bits(pb);
393 return (put_bits_count(pb) - saved_pos) >> 3;
396 static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
397 PutBitContext *pb,
398 int sizes[4], int x, int y, int quant,
399 int mbs_per_slice)
401 ProresContext *ctx = avctx->priv_data;
402 int i, xp, yp;
403 int total_size = 0;
404 const uint16_t *src;
405 int slice_width_factor = av_log2(mbs_per_slice);
406 int num_cblocks, pwidth, linesize, line_add;
407 int plane_factor, is_chroma;
408 uint16_t *qmat;
410 if (ctx->pictures_per_frame == 1)
411 line_add = 0;
412 else
413 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
415 if (ctx->force_quant) {
416 qmat = ctx->quants[0];
417 } else if (quant < MAX_STORED_Q) {
418 qmat = ctx->quants[quant];
419 } else {
420 qmat = ctx->custom_q;
421 for (i = 0; i < 64; i++)
422 qmat[i] = ctx->quant_mat[i] * quant;
425 for (i = 0; i < ctx->num_planes; i++) {
426 is_chroma = (i == 1 || i == 2);
427 plane_factor = slice_width_factor + 2;
428 if (is_chroma)
429 plane_factor += ctx->chroma_factor - 3;
430 if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
431 xp = x << 4;
432 yp = y << 4;
433 num_cblocks = 4;
434 pwidth = avctx->width;
435 } else {
436 xp = x << 3;
437 yp = y << 4;
438 num_cblocks = 2;
439 pwidth = avctx->width >> 1;
442 linesize = pic->linesize[i] * ctx->pictures_per_frame;
443 src = (const uint16_t*)(pic->data[i] + yp * linesize +
444 line_add * pic->linesize[i]) + xp;
446 get_slice_data(ctx, src, linesize, xp, yp,
447 pwidth, avctx->height / ctx->pictures_per_frame,
448 ctx->blocks[0], ctx->emu_buf,
449 mbs_per_slice, num_cblocks, is_chroma);
450 sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
451 mbs_per_slice, ctx->blocks[0],
452 num_cblocks, plane_factor,
453 qmat);
454 total_size += sizes[i];
456 return total_size;
459 static inline int estimate_vlc(unsigned codebook, int val)
461 unsigned int rice_order, exp_order, switch_bits, switch_val;
462 int exponent;
464 /* number of prefix bits to switch between Rice and expGolomb */
465 switch_bits = (codebook & 3) + 1;
466 rice_order = codebook >> 5; /* rice code order */
467 exp_order = (codebook >> 2) & 7; /* exp golomb code order */
469 switch_val = switch_bits << rice_order;
471 if (val >= switch_val) {
472 val -= switch_val - (1 << exp_order);
473 exponent = av_log2(val);
475 return exponent * 2 - exp_order + switch_bits + 1;
476 } else {
477 return (val >> rice_order) + rice_order + 1;
481 static int estimate_dcs(int *error, DCTELEM *blocks, int blocks_per_slice,
482 int scale)
484 int i;
485 int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
486 int bits;
488 prev_dc = (blocks[0] - 0x4000) / scale;
489 bits = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
490 sign = 0;
491 codebook = 3;
492 blocks += 64;
493 *error += FFABS(blocks[0] - 0x4000) % scale;
495 for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
496 dc = (blocks[0] - 0x4000) / scale;
497 *error += FFABS(blocks[0] - 0x4000) % scale;
498 delta = dc - prev_dc;
499 new_sign = GET_SIGN(delta);
500 delta = (delta ^ sign) - sign;
501 code = MAKE_CODE(delta);
502 bits += estimate_vlc(ff_prores_dc_codebook[codebook], code);
503 codebook = (code + (code & 1)) >> 1;
504 codebook = FFMIN(codebook, 3);
505 sign = new_sign;
506 prev_dc = dc;
509 return bits;
512 static int estimate_acs(int *error, DCTELEM *blocks, int blocks_per_slice,
513 int plane_size_factor,
514 const uint8_t *scan, const int16_t *qmat)
516 int idx, i;
517 int run, level, run_cb, lev_cb;
518 int max_coeffs, abs_level;
519 int bits = 0;
521 max_coeffs = blocks_per_slice << 6;
522 run_cb = ff_prores_run_to_cb_index[4];
523 lev_cb = ff_prores_lev_to_cb_index[2];
524 run = 0;
526 for (i = 1; i < 64; i++) {
527 for (idx = scan[i]; idx < max_coeffs; idx += 64) {
528 level = blocks[idx] / qmat[scan[i]];
529 *error += FFABS(blocks[idx]) % qmat[scan[i]];
530 if (level) {
531 abs_level = FFABS(level);
532 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
533 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
534 abs_level - 1) + 1;
536 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
537 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
538 run = 0;
539 } else {
540 run++;
545 return bits;
548 static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
549 const uint16_t *src, int linesize,
550 int mbs_per_slice,
551 int blocks_per_mb, int plane_size_factor,
552 const int16_t *qmat, ProresThreadData *td)
554 int blocks_per_slice;
555 int bits;
557 blocks_per_slice = mbs_per_slice * blocks_per_mb;
559 bits = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
560 bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
561 plane_size_factor, ctx->scantable.permutated, qmat);
563 return FFALIGN(bits, 8);
566 static int find_slice_quant(AVCodecContext *avctx, const AVFrame *pic,
567 int trellis_node, int x, int y, int mbs_per_slice,
568 ProresThreadData *td)
570 ProresContext *ctx = avctx->priv_data;
571 int i, q, pq, xp, yp;
572 const uint16_t *src;
573 int slice_width_factor = av_log2(mbs_per_slice);
574 int num_cblocks[MAX_PLANES], pwidth;
575 int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
576 const int min_quant = ctx->profile_info->min_quant;
577 const int max_quant = ctx->profile_info->max_quant;
578 int error, bits, bits_limit;
579 int mbs, prev, cur, new_score;
580 int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
581 int overquant;
582 uint16_t *qmat;
583 int linesize[4], line_add;
585 if (ctx->pictures_per_frame == 1)
586 line_add = 0;
587 else
588 line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
589 mbs = x + mbs_per_slice;
591 for (i = 0; i < ctx->num_planes; i++) {
592 is_chroma[i] = (i == 1 || i == 2);
593 plane_factor[i] = slice_width_factor + 2;
594 if (is_chroma[i])
595 plane_factor[i] += ctx->chroma_factor - 3;
596 if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
597 xp = x << 4;
598 yp = y << 4;
599 num_cblocks[i] = 4;
600 pwidth = avctx->width;
601 } else {
602 xp = x << 3;
603 yp = y << 4;
604 num_cblocks[i] = 2;
605 pwidth = avctx->width >> 1;
608 linesize[i] = pic->linesize[i] * ctx->pictures_per_frame;
609 src = (const uint16_t*)(pic->data[i] + yp * linesize[i] +
610 line_add * pic->linesize[i]) + xp;
612 get_slice_data(ctx, src, linesize[i], xp, yp,
613 pwidth, avctx->height / ctx->pictures_per_frame,
614 td->blocks[i], td->emu_buf,
615 mbs_per_slice, num_cblocks[i], is_chroma[i]);
618 for (q = min_quant; q < max_quant + 2; q++) {
619 td->nodes[trellis_node + q].prev_node = -1;
620 td->nodes[trellis_node + q].quant = q;
623 // todo: maybe perform coarser quantising to fit into frame size when needed
624 for (q = min_quant; q <= max_quant; q++) {
625 bits = 0;
626 error = 0;
627 for (i = 0; i < ctx->num_planes; i++) {
628 bits += estimate_slice_plane(ctx, &error, i,
629 src, linesize[i],
630 mbs_per_slice,
631 num_cblocks[i], plane_factor[i],
632 ctx->quants[q], td);
634 if (bits > 65000 * 8) {
635 error = SCORE_LIMIT;
636 break;
638 slice_bits[q] = bits;
639 slice_score[q] = error;
641 if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
642 slice_bits[max_quant + 1] = slice_bits[max_quant];
643 slice_score[max_quant + 1] = slice_score[max_quant] + 1;
644 overquant = max_quant;
645 } else {
646 for (q = max_quant + 1; q < 128; q++) {
647 bits = 0;
648 error = 0;
649 if (q < MAX_STORED_Q) {
650 qmat = ctx->quants[q];
651 } else {
652 qmat = td->custom_q;
653 for (i = 0; i < 64; i++)
654 qmat[i] = ctx->quant_mat[i] * q;
656 for (i = 0; i < ctx->num_planes; i++) {
657 bits += estimate_slice_plane(ctx, &error, i,
658 src, linesize[i],
659 mbs_per_slice,
660 num_cblocks[i], plane_factor[i],
661 qmat, td);
663 if (bits <= ctx->bits_per_mb * mbs_per_slice)
664 break;
667 slice_bits[max_quant + 1] = bits;
668 slice_score[max_quant + 1] = error;
669 overquant = q;
671 td->nodes[trellis_node + max_quant + 1].quant = overquant;
673 bits_limit = mbs * ctx->bits_per_mb;
674 for (pq = min_quant; pq < max_quant + 2; pq++) {
675 prev = trellis_node - TRELLIS_WIDTH + pq;
677 for (q = min_quant; q < max_quant + 2; q++) {
678 cur = trellis_node + q;
680 bits = td->nodes[prev].bits + slice_bits[q];
681 error = slice_score[q];
682 if (bits > bits_limit)
683 error = SCORE_LIMIT;
685 if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
686 new_score = td->nodes[prev].score + error;
687 else
688 new_score = SCORE_LIMIT;
689 if (td->nodes[cur].prev_node == -1 ||
690 td->nodes[cur].score >= new_score) {
692 td->nodes[cur].bits = bits;
693 td->nodes[cur].score = new_score;
694 td->nodes[cur].prev_node = prev;
699 error = td->nodes[trellis_node + min_quant].score;
700 pq = trellis_node + min_quant;
701 for (q = min_quant + 1; q < max_quant + 2; q++) {
702 if (td->nodes[trellis_node + q].score <= error) {
703 error = td->nodes[trellis_node + q].score;
704 pq = trellis_node + q;
708 return pq;
711 static int find_quant_thread(AVCodecContext *avctx, void *arg,
712 int jobnr, int threadnr)
714 ProresContext *ctx = avctx->priv_data;
715 ProresThreadData *td = ctx->tdata + threadnr;
716 int mbs_per_slice = ctx->mbs_per_slice;
717 int x, y = jobnr, mb, q = 0;
719 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
720 while (ctx->mb_width - x < mbs_per_slice)
721 mbs_per_slice >>= 1;
722 q = find_slice_quant(avctx, avctx->coded_frame,
723 (mb + 1) * TRELLIS_WIDTH, x, y,
724 mbs_per_slice, td);
727 for (x = ctx->slices_width - 1; x >= 0; x--) {
728 ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
729 q = td->nodes[q].prev_node;
732 return 0;
735 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
736 const AVFrame *pic, int *got_packet)
738 ProresContext *ctx = avctx->priv_data;
739 uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
740 uint8_t *picture_size_pos;
741 PutBitContext pb;
742 int x, y, i, mb, q = 0;
743 int sizes[4] = { 0 };
744 int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
745 int frame_size, picture_size, slice_size;
746 int pkt_size, ret;
747 uint8_t frame_flags;
749 *avctx->coded_frame = *pic;
750 avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
751 avctx->coded_frame->key_frame = 1;
753 pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
755 if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
756 av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
757 return ret;
760 orig_buf = pkt->data;
762 // frame atom
763 orig_buf += 4; // frame size
764 bytestream_put_be32 (&orig_buf, FRAME_ID); // frame container ID
765 buf = orig_buf;
767 // frame header
768 tmp = buf;
769 buf += 2; // frame header size will be stored here
770 bytestream_put_be16 (&buf, 0); // version 1
771 bytestream_put_buffer(&buf, ctx->vendor, 4);
772 bytestream_put_be16 (&buf, avctx->width);
773 bytestream_put_be16 (&buf, avctx->height);
775 frame_flags = ctx->chroma_factor << 6;
776 if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
777 frame_flags |= pic->top_field_first ? 0x04 : 0x08;
778 bytestream_put_byte (&buf, frame_flags);
780 bytestream_put_byte (&buf, 0); // reserved
781 bytestream_put_byte (&buf, avctx->color_primaries);
782 bytestream_put_byte (&buf, avctx->color_trc);
783 bytestream_put_byte (&buf, avctx->colorspace);
784 bytestream_put_byte (&buf, 0x40); // source format and alpha information
785 bytestream_put_byte (&buf, 0); // reserved
786 if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
787 bytestream_put_byte (&buf, 0x03); // matrix flags - both matrices are present
788 // luma quantisation matrix
789 for (i = 0; i < 64; i++)
790 bytestream_put_byte(&buf, ctx->quant_mat[i]);
791 // chroma quantisation matrix
792 for (i = 0; i < 64; i++)
793 bytestream_put_byte(&buf, ctx->quant_mat[i]);
794 } else {
795 bytestream_put_byte (&buf, 0x00); // matrix flags - default matrices are used
797 bytestream_put_be16 (&tmp, buf - orig_buf); // write back frame header size
799 for (ctx->cur_picture_idx = 0;
800 ctx->cur_picture_idx < ctx->pictures_per_frame;
801 ctx->cur_picture_idx++) {
802 // picture header
803 picture_size_pos = buf + 1;
804 bytestream_put_byte (&buf, 0x40); // picture header size (in bits)
805 buf += 4; // picture data size will be stored here
806 bytestream_put_be16 (&buf, ctx->slices_per_picture);
807 bytestream_put_byte (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
809 // seek table - will be filled during slice encoding
810 slice_sizes = buf;
811 buf += ctx->slices_per_picture * 2;
813 // slices
814 if (!ctx->force_quant) {
815 ret = avctx->execute2(avctx, find_quant_thread, NULL, NULL,
816 ctx->mb_height);
817 if (ret)
818 return ret;
821 for (y = 0; y < ctx->mb_height; y++) {
822 int mbs_per_slice = ctx->mbs_per_slice;
823 for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
824 q = ctx->force_quant ? ctx->force_quant
825 : ctx->slice_q[mb + y * ctx->slices_width];
827 while (ctx->mb_width - x < mbs_per_slice)
828 mbs_per_slice >>= 1;
830 bytestream_put_byte(&buf, slice_hdr_size << 3);
831 slice_hdr = buf;
832 buf += slice_hdr_size - 1;
833 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)) * 8);
834 encode_slice(avctx, pic, &pb, sizes, x, y, q, mbs_per_slice);
836 bytestream_put_byte(&slice_hdr, q);
837 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
838 for (i = 0; i < ctx->num_planes - 1; i++) {
839 bytestream_put_be16(&slice_hdr, sizes[i]);
840 slice_size += sizes[i];
842 bytestream_put_be16(&slice_sizes, slice_size);
843 buf += slice_size - slice_hdr_size;
847 if (ctx->pictures_per_frame == 1)
848 picture_size = buf - picture_size_pos - 6;
849 else
850 picture_size = buf - picture_size_pos + 1;
851 bytestream_put_be32(&picture_size_pos, picture_size);
854 orig_buf -= 8;
855 frame_size = buf - orig_buf;
856 bytestream_put_be32(&orig_buf, frame_size);
858 pkt->size = frame_size;
859 pkt->flags |= AV_PKT_FLAG_KEY;
860 *got_packet = 1;
862 return 0;
865 static av_cold int encode_close(AVCodecContext *avctx)
867 ProresContext *ctx = avctx->priv_data;
868 int i;
870 av_freep(&avctx->coded_frame);
872 if (ctx->tdata) {
873 for (i = 0; i < avctx->thread_count; i++)
874 av_free(ctx->tdata[i].nodes);
876 av_freep(&ctx->tdata);
877 av_freep(&ctx->slice_q);
879 return 0;
882 static av_cold int encode_init(AVCodecContext *avctx)
884 ProresContext *ctx = avctx->priv_data;
885 int mps;
886 int i, j;
887 int min_quant, max_quant;
888 int interlaced = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
890 avctx->bits_per_raw_sample = 10;
891 avctx->coded_frame = avcodec_alloc_frame();
892 if (!avctx->coded_frame)
893 return AVERROR(ENOMEM);
895 ff_proresdsp_init(&ctx->dsp);
896 ff_init_scantable(ctx->dsp.dct_permutation, &ctx->scantable,
897 interlaced ? ff_prores_interlaced_scan
898 : ff_prores_progressive_scan);
900 mps = ctx->mbs_per_slice;
901 if (mps & (mps - 1)) {
902 av_log(avctx, AV_LOG_ERROR,
903 "there should be an integer power of two MBs per slice\n");
904 return AVERROR(EINVAL);
907 ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
908 ? CFACTOR_Y422
909 : CFACTOR_Y444;
910 ctx->profile_info = prores_profile_info + ctx->profile;
911 ctx->num_planes = 3;
913 ctx->mb_width = FFALIGN(avctx->width, 16) >> 4;
915 if (interlaced)
916 ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
917 else
918 ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
920 ctx->slices_width = ctx->mb_width / mps;
921 ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
922 ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
923 ctx->pictures_per_frame = 1 + interlaced;
925 if (ctx->quant_sel == -1)
926 ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
927 else
928 ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
930 if (strlen(ctx->vendor) != 4) {
931 av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
932 return AVERROR_INVALIDDATA;
935 ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
936 if (!ctx->force_quant) {
937 if (!ctx->bits_per_mb) {
938 for (i = 0; i < NUM_MB_LIMITS - 1; i++)
939 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
940 ctx->pictures_per_frame)
941 break;
942 ctx->bits_per_mb = ctx->profile_info->br_tab[i];
943 } else if (ctx->bits_per_mb < 128) {
944 av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
945 return AVERROR_INVALIDDATA;
948 min_quant = ctx->profile_info->min_quant;
949 max_quant = ctx->profile_info->max_quant;
950 for (i = min_quant; i < MAX_STORED_Q; i++) {
951 for (j = 0; j < 64; j++)
952 ctx->quants[i][j] = ctx->quant_mat[j] * i;
955 ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
956 if (!ctx->slice_q) {
957 encode_close(avctx);
958 return AVERROR(ENOMEM);
961 ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
962 if (!ctx->tdata) {
963 encode_close(avctx);
964 return AVERROR(ENOMEM);
967 for (j = 0; j < avctx->thread_count; j++) {
968 ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
969 * TRELLIS_WIDTH
970 * sizeof(*ctx->tdata->nodes));
971 if (!ctx->tdata[j].nodes) {
972 encode_close(avctx);
973 return AVERROR(ENOMEM);
975 for (i = min_quant; i < max_quant + 2; i++) {
976 ctx->tdata[j].nodes[i].prev_node = -1;
977 ctx->tdata[j].nodes[i].bits = 0;
978 ctx->tdata[j].nodes[i].score = 0;
981 } else {
982 int ls = 0;
984 if (ctx->force_quant > 64) {
985 av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
986 return AVERROR_INVALIDDATA;
989 for (j = 0; j < 64; j++) {
990 ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
991 ls += av_log2((1 << 11) / ctx->quants[0][j]) * 2 + 1;
994 ctx->bits_per_mb = ls * 8;
995 if (ctx->chroma_factor == CFACTOR_Y444)
996 ctx->bits_per_mb += ls * 4;
997 if (ctx->num_planes == 4)
998 ctx->bits_per_mb += ls * 4;
1001 ctx->frame_size_upper_bound = ctx->pictures_per_frame *
1002 ctx->slices_per_picture *
1003 (2 + 2 * ctx->num_planes +
1004 (mps * ctx->bits_per_mb) / 8)
1005 + 200;
1007 avctx->codec_tag = ctx->profile_info->tag;
1009 av_log(avctx, AV_LOG_DEBUG,
1010 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1011 ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1012 interlaced ? "yes" : "no", ctx->bits_per_mb);
1013 av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1014 ctx->frame_size_upper_bound);
1016 return 0;
1019 #define OFFSET(x) offsetof(ProresContext, x)
1020 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1022 static const AVOption options[] = {
1023 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1024 AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1025 { "profile", NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1026 { .i64 = PRORES_PROFILE_STANDARD },
1027 PRORES_PROFILE_PROXY, PRORES_PROFILE_HQ, VE, "profile" },
1028 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1029 0, 0, VE, "profile" },
1030 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1031 0, 0, VE, "profile" },
1032 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1033 0, 0, VE, "profile" },
1034 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1035 0, 0, VE, "profile" },
1036 { "vendor", "vendor ID", OFFSET(vendor),
1037 AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1038 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1039 AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1040 { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1041 { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1042 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1043 0, 0, VE, "quant_mat" },
1044 { "proxy", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1045 0, 0, VE, "quant_mat" },
1046 { "lt", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1047 0, 0, VE, "quant_mat" },
1048 { "standard", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1049 0, 0, VE, "quant_mat" },
1050 { "hq", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1051 0, 0, VE, "quant_mat" },
1052 { "default", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1053 0, 0, VE, "quant_mat" },
1054 { NULL }
1057 static const AVClass proresenc_class = {
1058 .class_name = "ProRes encoder",
1059 .item_name = av_default_item_name,
1060 .option = options,
1061 .version = LIBAVUTIL_VERSION_INT,
1064 AVCodec ff_prores_encoder = {
1065 .name = "prores",
1066 .type = AVMEDIA_TYPE_VIDEO,
1067 .id = AV_CODEC_ID_PRORES,
1068 .priv_data_size = sizeof(ProresContext),
1069 .init = encode_init,
1070 .close = encode_close,
1071 .encode2 = encode_frame,
1072 .capabilities = CODEC_CAP_SLICE_THREADS,
1073 .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1074 .pix_fmts = (const enum AVPixelFormat[]) {
1075 AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_NONE
1077 .priv_class = &proresenc_class,