4 * Copyright (c) 2012 Konstantin Shishkov
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "libavutil/opt.h"
26 #include "bytestream.h"
28 #include "proresdsp.h"
29 #include "proresdata.h"
31 #define CFACTOR_Y422 2
32 #define CFACTOR_Y444 3
34 #define MAX_MBS_PER_SLICE 8
36 #define MAX_PLANES 3 // should be increased to 4 when there's AV_PIX_FMT_YUV444AP10
39 PRORES_PROFILE_PROXY
= 0,
41 PRORES_PROFILE_STANDARD
,
53 static const uint8_t prores_quant_matrices
[][64] = {
55 4, 7, 9, 11, 13, 14, 15, 63,
56 7, 7, 11, 12, 14, 15, 63, 63,
57 9, 11, 13, 14, 15, 63, 63, 63,
58 11, 11, 13, 14, 63, 63, 63, 63,
59 11, 13, 14, 63, 63, 63, 63, 63,
60 13, 14, 63, 63, 63, 63, 63, 63,
61 13, 63, 63, 63, 63, 63, 63, 63,
62 63, 63, 63, 63, 63, 63, 63, 63,
65 4, 5, 6, 7, 9, 11, 13, 15,
66 5, 5, 7, 8, 11, 13, 15, 17,
67 6, 7, 9, 11, 13, 15, 15, 17,
68 7, 7, 9, 11, 13, 15, 17, 19,
69 7, 9, 11, 13, 14, 16, 19, 23,
70 9, 11, 13, 14, 16, 19, 23, 29,
71 9, 11, 13, 15, 17, 21, 28, 35,
72 11, 13, 16, 17, 21, 28, 35, 41,
75 4, 4, 5, 5, 6, 7, 7, 9,
76 4, 4, 5, 6, 7, 7, 9, 9,
77 5, 5, 6, 7, 7, 9, 9, 10,
78 5, 5, 6, 7, 7, 9, 9, 10,
79 5, 6, 7, 7, 8, 9, 10, 12,
80 6, 7, 7, 8, 9, 10, 12, 15,
81 6, 7, 7, 9, 10, 11, 14, 17,
82 7, 7, 9, 10, 11, 14, 17, 21,
85 4, 4, 4, 4, 4, 4, 4, 4,
86 4, 4, 4, 4, 4, 4, 4, 4,
87 4, 4, 4, 4, 4, 4, 4, 4,
88 4, 4, 4, 4, 4, 4, 4, 5,
89 4, 4, 4, 4, 4, 4, 5, 5,
90 4, 4, 4, 4, 4, 5, 5, 6,
91 4, 4, 4, 4, 5, 5, 6, 7,
92 4, 4, 4, 4, 5, 6, 7, 7,
95 4, 4, 4, 4, 4, 4, 4, 4,
96 4, 4, 4, 4, 4, 4, 4, 4,
97 4, 4, 4, 4, 4, 4, 4, 4,
98 4, 4, 4, 4, 4, 4, 4, 4,
99 4, 4, 4, 4, 4, 4, 4, 4,
100 4, 4, 4, 4, 4, 4, 4, 4,
101 4, 4, 4, 4, 4, 4, 4, 4,
102 4, 4, 4, 4, 4, 4, 4, 4,
106 #define NUM_MB_LIMITS 4
107 static const int prores_mb_limits
[NUM_MB_LIMITS
] = {
108 1620, // up to 720x576
109 2700, // up to 960x720
110 6075, // up to 1440x1080
111 9216, // up to 2048x1152
114 static const struct prores_profile
{
115 const char *full_name
;
119 int br_tab
[NUM_MB_LIMITS
];
121 } prores_profile_info
[4] = {
123 .full_name
= "proxy",
124 .tag
= MKTAG('a', 'p', 'c', 'o'),
127 .br_tab
= { 300, 242, 220, 194 },
128 .quant
= QUANT_MAT_PROXY
,
132 .tag
= MKTAG('a', 'p', 'c', 's'),
135 .br_tab
= { 720, 560, 490, 440 },
136 .quant
= QUANT_MAT_LT
,
139 .full_name
= "standard",
140 .tag
= MKTAG('a', 'p', 'c', 'n'),
143 .br_tab
= { 1050, 808, 710, 632 },
144 .quant
= QUANT_MAT_STANDARD
,
147 .full_name
= "high quality",
148 .tag
= MKTAG('a', 'p', 'c', 'h'),
151 .br_tab
= { 1566, 1216, 1070, 950 },
152 .quant
= QUANT_MAT_HQ
,
154 // for 4444 profile bitrate numbers are { 2350, 1828, 1600, 1425 }
157 #define TRELLIS_WIDTH 16
158 #define SCORE_LIMIT INT_MAX / 2
167 #define MAX_STORED_Q 16
169 typedef struct ProresThreadData
{
170 DECLARE_ALIGNED(16, DCTELEM
, blocks
)[MAX_PLANES
][64 * 4 * MAX_MBS_PER_SLICE
];
171 DECLARE_ALIGNED(16, uint16_t, emu_buf
)[16 * 16];
172 int16_t custom_q
[64];
173 struct TrellisNode
*nodes
;
176 typedef struct ProresContext
{
178 DECLARE_ALIGNED(16, DCTELEM
, blocks
)[MAX_PLANES
][64 * 4 * MAX_MBS_PER_SLICE
];
179 DECLARE_ALIGNED(16, uint16_t, emu_buf
)[16*16];
180 int16_t quants
[MAX_STORED_Q
][64];
181 int16_t custom_q
[64];
182 const uint8_t *quant_mat
;
184 ProresDSPContext dsp
;
187 int mb_width
, mb_height
;
189 int num_chroma_blocks
, chroma_factor
;
191 int slices_per_picture
;
192 int pictures_per_frame
; // 1 for progressive, 2 for interlaced
201 int frame_size_upper_bound
;
204 const struct prores_profile
*profile_info
;
208 ProresThreadData
*tdata
;
211 static void get_slice_data(ProresContext
*ctx
, const uint16_t *src
,
212 int linesize
, int x
, int y
, int w
, int h
,
213 DCTELEM
*blocks
, uint16_t *emu_buf
,
214 int mbs_per_slice
, int blocks_per_mb
, int is_chroma
)
216 const uint16_t *esrc
;
217 const int mb_width
= 4 * blocks_per_mb
;
221 for (i
= 0; i
< mbs_per_slice
; i
++, src
+= mb_width
) {
223 memset(blocks
, 0, 64 * (mbs_per_slice
- i
) * blocks_per_mb
227 if (x
+ mb_width
<= w
&& y
+ 16 <= h
) {
229 elinesize
= linesize
;
234 elinesize
= 16 * sizeof(*emu_buf
);
236 bw
= FFMIN(w
- x
, mb_width
);
237 bh
= FFMIN(h
- y
, 16);
239 for (j
= 0; j
< bh
; j
++) {
240 memcpy(emu_buf
+ j
* 16,
241 (const uint8_t*)src
+ j
* linesize
,
243 pix
= emu_buf
[j
* 16 + bw
- 1];
244 for (k
= bw
; k
< mb_width
; k
++)
245 emu_buf
[j
* 16 + k
] = pix
;
248 memcpy(emu_buf
+ j
* 16,
249 emu_buf
+ (bh
- 1) * 16,
250 mb_width
* sizeof(*emu_buf
));
253 ctx
->dsp
.fdct(esrc
, elinesize
, blocks
);
255 if (blocks_per_mb
> 2) {
256 ctx
->dsp
.fdct(esrc
+ 8, elinesize
, blocks
);
259 ctx
->dsp
.fdct(esrc
+ elinesize
* 4, elinesize
, blocks
);
261 if (blocks_per_mb
> 2) {
262 ctx
->dsp
.fdct(esrc
+ elinesize
* 4 + 8, elinesize
, blocks
);
266 ctx
->dsp
.fdct(esrc
, elinesize
, blocks
);
268 ctx
->dsp
.fdct(esrc
+ elinesize
* 4, elinesize
, blocks
);
270 if (blocks_per_mb
> 2) {
271 ctx
->dsp
.fdct(esrc
+ 8, elinesize
, blocks
);
273 ctx
->dsp
.fdct(esrc
+ elinesize
* 4 + 8, elinesize
, blocks
);
283 * Write an unsigned rice/exp golomb codeword.
285 static inline void encode_vlc_codeword(PutBitContext
*pb
, unsigned codebook
, int val
)
287 unsigned int rice_order
, exp_order
, switch_bits
, switch_val
;
290 /* number of prefix bits to switch between Rice and expGolomb */
291 switch_bits
= (codebook
& 3) + 1;
292 rice_order
= codebook
>> 5; /* rice code order */
293 exp_order
= (codebook
>> 2) & 7; /* exp golomb code order */
295 switch_val
= switch_bits
<< rice_order
;
297 if (val
>= switch_val
) {
298 val
-= switch_val
- (1 << exp_order
);
299 exponent
= av_log2(val
);
301 put_bits(pb
, exponent
- exp_order
+ switch_bits
, 0);
302 put_bits(pb
, exponent
+ 1, val
);
304 exponent
= val
>> rice_order
;
307 put_bits(pb
, exponent
, 0);
310 put_sbits(pb
, rice_order
, val
);
314 #define GET_SIGN(x) ((x) >> 31)
315 #define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
317 static void encode_dcs(PutBitContext
*pb
, DCTELEM
*blocks
,
318 int blocks_per_slice
, int scale
)
321 int codebook
= 3, code
, dc
, prev_dc
, delta
, sign
, new_sign
;
323 prev_dc
= (blocks
[0] - 0x4000) / scale
;
324 encode_vlc_codeword(pb
, FIRST_DC_CB
, MAKE_CODE(prev_dc
));
329 for (i
= 1; i
< blocks_per_slice
; i
++, blocks
+= 64) {
330 dc
= (blocks
[0] - 0x4000) / scale
;
331 delta
= dc
- prev_dc
;
332 new_sign
= GET_SIGN(delta
);
333 delta
= (delta
^ sign
) - sign
;
334 code
= MAKE_CODE(delta
);
335 encode_vlc_codeword(pb
, ff_prores_dc_codebook
[codebook
], code
);
336 codebook
= (code
+ (code
& 1)) >> 1;
337 codebook
= FFMIN(codebook
, 3);
343 static void encode_acs(PutBitContext
*pb
, DCTELEM
*blocks
,
344 int blocks_per_slice
,
345 int plane_size_factor
,
346 const uint8_t *scan
, const int16_t *qmat
)
349 int run
, level
, run_cb
, lev_cb
;
350 int max_coeffs
, abs_level
;
352 max_coeffs
= blocks_per_slice
<< 6;
353 run_cb
= ff_prores_run_to_cb_index
[4];
354 lev_cb
= ff_prores_lev_to_cb_index
[2];
357 for (i
= 1; i
< 64; i
++) {
358 for (idx
= scan
[i
]; idx
< max_coeffs
; idx
+= 64) {
359 level
= blocks
[idx
] / qmat
[scan
[i
]];
361 abs_level
= FFABS(level
);
362 encode_vlc_codeword(pb
, ff_prores_ac_codebook
[run_cb
], run
);
363 encode_vlc_codeword(pb
, ff_prores_ac_codebook
[lev_cb
],
365 put_sbits(pb
, 1, GET_SIGN(level
));
367 run_cb
= ff_prores_run_to_cb_index
[FFMIN(run
, 15)];
368 lev_cb
= ff_prores_lev_to_cb_index
[FFMIN(abs_level
, 9)];
377 static int encode_slice_plane(ProresContext
*ctx
, PutBitContext
*pb
,
378 const uint16_t *src
, int linesize
,
379 int mbs_per_slice
, DCTELEM
*blocks
,
380 int blocks_per_mb
, int plane_size_factor
,
383 int blocks_per_slice
, saved_pos
;
385 saved_pos
= put_bits_count(pb
);
386 blocks_per_slice
= mbs_per_slice
* blocks_per_mb
;
388 encode_dcs(pb
, blocks
, blocks_per_slice
, qmat
[0]);
389 encode_acs(pb
, blocks
, blocks_per_slice
, plane_size_factor
,
390 ctx
->scantable
.permutated
, qmat
);
393 return (put_bits_count(pb
) - saved_pos
) >> 3;
396 static int encode_slice(AVCodecContext
*avctx
, const AVFrame
*pic
,
398 int sizes
[4], int x
, int y
, int quant
,
401 ProresContext
*ctx
= avctx
->priv_data
;
405 int slice_width_factor
= av_log2(mbs_per_slice
);
406 int num_cblocks
, pwidth
, linesize
, line_add
;
407 int plane_factor
, is_chroma
;
410 if (ctx
->pictures_per_frame
== 1)
413 line_add
= ctx
->cur_picture_idx
^ !pic
->top_field_first
;
415 if (ctx
->force_quant
) {
416 qmat
= ctx
->quants
[0];
417 } else if (quant
< MAX_STORED_Q
) {
418 qmat
= ctx
->quants
[quant
];
420 qmat
= ctx
->custom_q
;
421 for (i
= 0; i
< 64; i
++)
422 qmat
[i
] = ctx
->quant_mat
[i
] * quant
;
425 for (i
= 0; i
< ctx
->num_planes
; i
++) {
426 is_chroma
= (i
== 1 || i
== 2);
427 plane_factor
= slice_width_factor
+ 2;
429 plane_factor
+= ctx
->chroma_factor
- 3;
430 if (!is_chroma
|| ctx
->chroma_factor
== CFACTOR_Y444
) {
434 pwidth
= avctx
->width
;
439 pwidth
= avctx
->width
>> 1;
442 linesize
= pic
->linesize
[i
] * ctx
->pictures_per_frame
;
443 src
= (const uint16_t*)(pic
->data
[i
] + yp
* linesize
+
444 line_add
* pic
->linesize
[i
]) + xp
;
446 get_slice_data(ctx
, src
, linesize
, xp
, yp
,
447 pwidth
, avctx
->height
/ ctx
->pictures_per_frame
,
448 ctx
->blocks
[0], ctx
->emu_buf
,
449 mbs_per_slice
, num_cblocks
, is_chroma
);
450 sizes
[i
] = encode_slice_plane(ctx
, pb
, src
, linesize
,
451 mbs_per_slice
, ctx
->blocks
[0],
452 num_cblocks
, plane_factor
,
454 total_size
+= sizes
[i
];
459 static inline int estimate_vlc(unsigned codebook
, int val
)
461 unsigned int rice_order
, exp_order
, switch_bits
, switch_val
;
464 /* number of prefix bits to switch between Rice and expGolomb */
465 switch_bits
= (codebook
& 3) + 1;
466 rice_order
= codebook
>> 5; /* rice code order */
467 exp_order
= (codebook
>> 2) & 7; /* exp golomb code order */
469 switch_val
= switch_bits
<< rice_order
;
471 if (val
>= switch_val
) {
472 val
-= switch_val
- (1 << exp_order
);
473 exponent
= av_log2(val
);
475 return exponent
* 2 - exp_order
+ switch_bits
+ 1;
477 return (val
>> rice_order
) + rice_order
+ 1;
481 static int estimate_dcs(int *error
, DCTELEM
*blocks
, int blocks_per_slice
,
485 int codebook
= 3, code
, dc
, prev_dc
, delta
, sign
, new_sign
;
488 prev_dc
= (blocks
[0] - 0x4000) / scale
;
489 bits
= estimate_vlc(FIRST_DC_CB
, MAKE_CODE(prev_dc
));
493 *error
+= FFABS(blocks
[0] - 0x4000) % scale
;
495 for (i
= 1; i
< blocks_per_slice
; i
++, blocks
+= 64) {
496 dc
= (blocks
[0] - 0x4000) / scale
;
497 *error
+= FFABS(blocks
[0] - 0x4000) % scale
;
498 delta
= dc
- prev_dc
;
499 new_sign
= GET_SIGN(delta
);
500 delta
= (delta
^ sign
) - sign
;
501 code
= MAKE_CODE(delta
);
502 bits
+= estimate_vlc(ff_prores_dc_codebook
[codebook
], code
);
503 codebook
= (code
+ (code
& 1)) >> 1;
504 codebook
= FFMIN(codebook
, 3);
512 static int estimate_acs(int *error
, DCTELEM
*blocks
, int blocks_per_slice
,
513 int plane_size_factor
,
514 const uint8_t *scan
, const int16_t *qmat
)
517 int run
, level
, run_cb
, lev_cb
;
518 int max_coeffs
, abs_level
;
521 max_coeffs
= blocks_per_slice
<< 6;
522 run_cb
= ff_prores_run_to_cb_index
[4];
523 lev_cb
= ff_prores_lev_to_cb_index
[2];
526 for (i
= 1; i
< 64; i
++) {
527 for (idx
= scan
[i
]; idx
< max_coeffs
; idx
+= 64) {
528 level
= blocks
[idx
] / qmat
[scan
[i
]];
529 *error
+= FFABS(blocks
[idx
]) % qmat
[scan
[i
]];
531 abs_level
= FFABS(level
);
532 bits
+= estimate_vlc(ff_prores_ac_codebook
[run_cb
], run
);
533 bits
+= estimate_vlc(ff_prores_ac_codebook
[lev_cb
],
536 run_cb
= ff_prores_run_to_cb_index
[FFMIN(run
, 15)];
537 lev_cb
= ff_prores_lev_to_cb_index
[FFMIN(abs_level
, 9)];
548 static int estimate_slice_plane(ProresContext
*ctx
, int *error
, int plane
,
549 const uint16_t *src
, int linesize
,
551 int blocks_per_mb
, int plane_size_factor
,
552 const int16_t *qmat
, ProresThreadData
*td
)
554 int blocks_per_slice
;
557 blocks_per_slice
= mbs_per_slice
* blocks_per_mb
;
559 bits
= estimate_dcs(error
, td
->blocks
[plane
], blocks_per_slice
, qmat
[0]);
560 bits
+= estimate_acs(error
, td
->blocks
[plane
], blocks_per_slice
,
561 plane_size_factor
, ctx
->scantable
.permutated
, qmat
);
563 return FFALIGN(bits
, 8);
566 static int find_slice_quant(AVCodecContext
*avctx
, const AVFrame
*pic
,
567 int trellis_node
, int x
, int y
, int mbs_per_slice
,
568 ProresThreadData
*td
)
570 ProresContext
*ctx
= avctx
->priv_data
;
571 int i
, q
, pq
, xp
, yp
;
573 int slice_width_factor
= av_log2(mbs_per_slice
);
574 int num_cblocks
[MAX_PLANES
], pwidth
;
575 int plane_factor
[MAX_PLANES
], is_chroma
[MAX_PLANES
];
576 const int min_quant
= ctx
->profile_info
->min_quant
;
577 const int max_quant
= ctx
->profile_info
->max_quant
;
578 int error
, bits
, bits_limit
;
579 int mbs
, prev
, cur
, new_score
;
580 int slice_bits
[TRELLIS_WIDTH
], slice_score
[TRELLIS_WIDTH
];
583 int linesize
[4], line_add
;
585 if (ctx
->pictures_per_frame
== 1)
588 line_add
= ctx
->cur_picture_idx
^ !pic
->top_field_first
;
589 mbs
= x
+ mbs_per_slice
;
591 for (i
= 0; i
< ctx
->num_planes
; i
++) {
592 is_chroma
[i
] = (i
== 1 || i
== 2);
593 plane_factor
[i
] = slice_width_factor
+ 2;
595 plane_factor
[i
] += ctx
->chroma_factor
- 3;
596 if (!is_chroma
[i
] || ctx
->chroma_factor
== CFACTOR_Y444
) {
600 pwidth
= avctx
->width
;
605 pwidth
= avctx
->width
>> 1;
608 linesize
[i
] = pic
->linesize
[i
] * ctx
->pictures_per_frame
;
609 src
= (const uint16_t*)(pic
->data
[i
] + yp
* linesize
[i
] +
610 line_add
* pic
->linesize
[i
]) + xp
;
612 get_slice_data(ctx
, src
, linesize
[i
], xp
, yp
,
613 pwidth
, avctx
->height
/ ctx
->pictures_per_frame
,
614 td
->blocks
[i
], td
->emu_buf
,
615 mbs_per_slice
, num_cblocks
[i
], is_chroma
[i
]);
618 for (q
= min_quant
; q
< max_quant
+ 2; q
++) {
619 td
->nodes
[trellis_node
+ q
].prev_node
= -1;
620 td
->nodes
[trellis_node
+ q
].quant
= q
;
623 // todo: maybe perform coarser quantising to fit into frame size when needed
624 for (q
= min_quant
; q
<= max_quant
; q
++) {
627 for (i
= 0; i
< ctx
->num_planes
; i
++) {
628 bits
+= estimate_slice_plane(ctx
, &error
, i
,
631 num_cblocks
[i
], plane_factor
[i
],
634 if (bits
> 65000 * 8) {
638 slice_bits
[q
] = bits
;
639 slice_score
[q
] = error
;
641 if (slice_bits
[max_quant
] <= ctx
->bits_per_mb
* mbs_per_slice
) {
642 slice_bits
[max_quant
+ 1] = slice_bits
[max_quant
];
643 slice_score
[max_quant
+ 1] = slice_score
[max_quant
] + 1;
644 overquant
= max_quant
;
646 for (q
= max_quant
+ 1; q
< 128; q
++) {
649 if (q
< MAX_STORED_Q
) {
650 qmat
= ctx
->quants
[q
];
653 for (i
= 0; i
< 64; i
++)
654 qmat
[i
] = ctx
->quant_mat
[i
] * q
;
656 for (i
= 0; i
< ctx
->num_planes
; i
++) {
657 bits
+= estimate_slice_plane(ctx
, &error
, i
,
660 num_cblocks
[i
], plane_factor
[i
],
663 if (bits
<= ctx
->bits_per_mb
* mbs_per_slice
)
667 slice_bits
[max_quant
+ 1] = bits
;
668 slice_score
[max_quant
+ 1] = error
;
671 td
->nodes
[trellis_node
+ max_quant
+ 1].quant
= overquant
;
673 bits_limit
= mbs
* ctx
->bits_per_mb
;
674 for (pq
= min_quant
; pq
< max_quant
+ 2; pq
++) {
675 prev
= trellis_node
- TRELLIS_WIDTH
+ pq
;
677 for (q
= min_quant
; q
< max_quant
+ 2; q
++) {
678 cur
= trellis_node
+ q
;
680 bits
= td
->nodes
[prev
].bits
+ slice_bits
[q
];
681 error
= slice_score
[q
];
682 if (bits
> bits_limit
)
685 if (td
->nodes
[prev
].score
< SCORE_LIMIT
&& error
< SCORE_LIMIT
)
686 new_score
= td
->nodes
[prev
].score
+ error
;
688 new_score
= SCORE_LIMIT
;
689 if (td
->nodes
[cur
].prev_node
== -1 ||
690 td
->nodes
[cur
].score
>= new_score
) {
692 td
->nodes
[cur
].bits
= bits
;
693 td
->nodes
[cur
].score
= new_score
;
694 td
->nodes
[cur
].prev_node
= prev
;
699 error
= td
->nodes
[trellis_node
+ min_quant
].score
;
700 pq
= trellis_node
+ min_quant
;
701 for (q
= min_quant
+ 1; q
< max_quant
+ 2; q
++) {
702 if (td
->nodes
[trellis_node
+ q
].score
<= error
) {
703 error
= td
->nodes
[trellis_node
+ q
].score
;
704 pq
= trellis_node
+ q
;
711 static int find_quant_thread(AVCodecContext
*avctx
, void *arg
,
712 int jobnr
, int threadnr
)
714 ProresContext
*ctx
= avctx
->priv_data
;
715 ProresThreadData
*td
= ctx
->tdata
+ threadnr
;
716 int mbs_per_slice
= ctx
->mbs_per_slice
;
717 int x
, y
= jobnr
, mb
, q
= 0;
719 for (x
= mb
= 0; x
< ctx
->mb_width
; x
+= mbs_per_slice
, mb
++) {
720 while (ctx
->mb_width
- x
< mbs_per_slice
)
722 q
= find_slice_quant(avctx
, avctx
->coded_frame
,
723 (mb
+ 1) * TRELLIS_WIDTH
, x
, y
,
727 for (x
= ctx
->slices_width
- 1; x
>= 0; x
--) {
728 ctx
->slice_q
[x
+ y
* ctx
->slices_width
] = td
->nodes
[q
].quant
;
729 q
= td
->nodes
[q
].prev_node
;
735 static int encode_frame(AVCodecContext
*avctx
, AVPacket
*pkt
,
736 const AVFrame
*pic
, int *got_packet
)
738 ProresContext
*ctx
= avctx
->priv_data
;
739 uint8_t *orig_buf
, *buf
, *slice_hdr
, *slice_sizes
, *tmp
;
740 uint8_t *picture_size_pos
;
742 int x
, y
, i
, mb
, q
= 0;
743 int sizes
[4] = { 0 };
744 int slice_hdr_size
= 2 + 2 * (ctx
->num_planes
- 1);
745 int frame_size
, picture_size
, slice_size
;
749 *avctx
->coded_frame
= *pic
;
750 avctx
->coded_frame
->pict_type
= AV_PICTURE_TYPE_I
;
751 avctx
->coded_frame
->key_frame
= 1;
753 pkt_size
= ctx
->frame_size_upper_bound
+ FF_MIN_BUFFER_SIZE
;
755 if ((ret
= ff_alloc_packet(pkt
, pkt_size
)) < 0) {
756 av_log(avctx
, AV_LOG_ERROR
, "Error getting output packet.\n");
760 orig_buf
= pkt
->data
;
763 orig_buf
+= 4; // frame size
764 bytestream_put_be32 (&orig_buf
, FRAME_ID
); // frame container ID
769 buf
+= 2; // frame header size will be stored here
770 bytestream_put_be16 (&buf
, 0); // version 1
771 bytestream_put_buffer(&buf
, ctx
->vendor
, 4);
772 bytestream_put_be16 (&buf
, avctx
->width
);
773 bytestream_put_be16 (&buf
, avctx
->height
);
775 frame_flags
= ctx
->chroma_factor
<< 6;
776 if (avctx
->flags
& CODEC_FLAG_INTERLACED_DCT
)
777 frame_flags
|= pic
->top_field_first
? 0x04 : 0x08;
778 bytestream_put_byte (&buf
, frame_flags
);
780 bytestream_put_byte (&buf
, 0); // reserved
781 bytestream_put_byte (&buf
, avctx
->color_primaries
);
782 bytestream_put_byte (&buf
, avctx
->color_trc
);
783 bytestream_put_byte (&buf
, avctx
->colorspace
);
784 bytestream_put_byte (&buf
, 0x40); // source format and alpha information
785 bytestream_put_byte (&buf
, 0); // reserved
786 if (ctx
->quant_sel
!= QUANT_MAT_DEFAULT
) {
787 bytestream_put_byte (&buf
, 0x03); // matrix flags - both matrices are present
788 // luma quantisation matrix
789 for (i
= 0; i
< 64; i
++)
790 bytestream_put_byte(&buf
, ctx
->quant_mat
[i
]);
791 // chroma quantisation matrix
792 for (i
= 0; i
< 64; i
++)
793 bytestream_put_byte(&buf
, ctx
->quant_mat
[i
]);
795 bytestream_put_byte (&buf
, 0x00); // matrix flags - default matrices are used
797 bytestream_put_be16 (&tmp
, buf
- orig_buf
); // write back frame header size
799 for (ctx
->cur_picture_idx
= 0;
800 ctx
->cur_picture_idx
< ctx
->pictures_per_frame
;
801 ctx
->cur_picture_idx
++) {
803 picture_size_pos
= buf
+ 1;
804 bytestream_put_byte (&buf
, 0x40); // picture header size (in bits)
805 buf
+= 4; // picture data size will be stored here
806 bytestream_put_be16 (&buf
, ctx
->slices_per_picture
);
807 bytestream_put_byte (&buf
, av_log2(ctx
->mbs_per_slice
) << 4); // slice width and height in MBs
809 // seek table - will be filled during slice encoding
811 buf
+= ctx
->slices_per_picture
* 2;
814 if (!ctx
->force_quant
) {
815 ret
= avctx
->execute2(avctx
, find_quant_thread
, NULL
, NULL
,
821 for (y
= 0; y
< ctx
->mb_height
; y
++) {
822 int mbs_per_slice
= ctx
->mbs_per_slice
;
823 for (x
= mb
= 0; x
< ctx
->mb_width
; x
+= mbs_per_slice
, mb
++) {
824 q
= ctx
->force_quant
? ctx
->force_quant
825 : ctx
->slice_q
[mb
+ y
* ctx
->slices_width
];
827 while (ctx
->mb_width
- x
< mbs_per_slice
)
830 bytestream_put_byte(&buf
, slice_hdr_size
<< 3);
832 buf
+= slice_hdr_size
- 1;
833 init_put_bits(&pb
, buf
, (pkt_size
- (buf
- orig_buf
)) * 8);
834 encode_slice(avctx
, pic
, &pb
, sizes
, x
, y
, q
, mbs_per_slice
);
836 bytestream_put_byte(&slice_hdr
, q
);
837 slice_size
= slice_hdr_size
+ sizes
[ctx
->num_planes
- 1];
838 for (i
= 0; i
< ctx
->num_planes
- 1; i
++) {
839 bytestream_put_be16(&slice_hdr
, sizes
[i
]);
840 slice_size
+= sizes
[i
];
842 bytestream_put_be16(&slice_sizes
, slice_size
);
843 buf
+= slice_size
- slice_hdr_size
;
847 if (ctx
->pictures_per_frame
== 1)
848 picture_size
= buf
- picture_size_pos
- 6;
850 picture_size
= buf
- picture_size_pos
+ 1;
851 bytestream_put_be32(&picture_size_pos
, picture_size
);
855 frame_size
= buf
- orig_buf
;
856 bytestream_put_be32(&orig_buf
, frame_size
);
858 pkt
->size
= frame_size
;
859 pkt
->flags
|= AV_PKT_FLAG_KEY
;
865 static av_cold
int encode_close(AVCodecContext
*avctx
)
867 ProresContext
*ctx
= avctx
->priv_data
;
870 av_freep(&avctx
->coded_frame
);
873 for (i
= 0; i
< avctx
->thread_count
; i
++)
874 av_free(ctx
->tdata
[i
].nodes
);
876 av_freep(&ctx
->tdata
);
877 av_freep(&ctx
->slice_q
);
882 static av_cold
int encode_init(AVCodecContext
*avctx
)
884 ProresContext
*ctx
= avctx
->priv_data
;
887 int min_quant
, max_quant
;
888 int interlaced
= !!(avctx
->flags
& CODEC_FLAG_INTERLACED_DCT
);
890 avctx
->bits_per_raw_sample
= 10;
891 avctx
->coded_frame
= avcodec_alloc_frame();
892 if (!avctx
->coded_frame
)
893 return AVERROR(ENOMEM
);
895 ff_proresdsp_init(&ctx
->dsp
);
896 ff_init_scantable(ctx
->dsp
.dct_permutation
, &ctx
->scantable
,
897 interlaced
? ff_prores_interlaced_scan
898 : ff_prores_progressive_scan
);
900 mps
= ctx
->mbs_per_slice
;
901 if (mps
& (mps
- 1)) {
902 av_log(avctx
, AV_LOG_ERROR
,
903 "there should be an integer power of two MBs per slice\n");
904 return AVERROR(EINVAL
);
907 ctx
->chroma_factor
= avctx
->pix_fmt
== AV_PIX_FMT_YUV422P10
910 ctx
->profile_info
= prores_profile_info
+ ctx
->profile
;
913 ctx
->mb_width
= FFALIGN(avctx
->width
, 16) >> 4;
916 ctx
->mb_height
= FFALIGN(avctx
->height
, 32) >> 5;
918 ctx
->mb_height
= FFALIGN(avctx
->height
, 16) >> 4;
920 ctx
->slices_width
= ctx
->mb_width
/ mps
;
921 ctx
->slices_width
+= av_popcount(ctx
->mb_width
- ctx
->slices_width
* mps
);
922 ctx
->slices_per_picture
= ctx
->mb_height
* ctx
->slices_width
;
923 ctx
->pictures_per_frame
= 1 + interlaced
;
925 if (ctx
->quant_sel
== -1)
926 ctx
->quant_mat
= prores_quant_matrices
[ctx
->profile_info
->quant
];
928 ctx
->quant_mat
= prores_quant_matrices
[ctx
->quant_sel
];
930 if (strlen(ctx
->vendor
) != 4) {
931 av_log(avctx
, AV_LOG_ERROR
, "vendor ID should be 4 bytes\n");
932 return AVERROR_INVALIDDATA
;
935 ctx
->force_quant
= avctx
->global_quality
/ FF_QP2LAMBDA
;
936 if (!ctx
->force_quant
) {
937 if (!ctx
->bits_per_mb
) {
938 for (i
= 0; i
< NUM_MB_LIMITS
- 1; i
++)
939 if (prores_mb_limits
[i
] >= ctx
->mb_width
* ctx
->mb_height
*
940 ctx
->pictures_per_frame
)
942 ctx
->bits_per_mb
= ctx
->profile_info
->br_tab
[i
];
943 } else if (ctx
->bits_per_mb
< 128) {
944 av_log(avctx
, AV_LOG_ERROR
, "too few bits per MB, please set at least 128\n");
945 return AVERROR_INVALIDDATA
;
948 min_quant
= ctx
->profile_info
->min_quant
;
949 max_quant
= ctx
->profile_info
->max_quant
;
950 for (i
= min_quant
; i
< MAX_STORED_Q
; i
++) {
951 for (j
= 0; j
< 64; j
++)
952 ctx
->quants
[i
][j
] = ctx
->quant_mat
[j
] * i
;
955 ctx
->slice_q
= av_malloc(ctx
->slices_per_picture
* sizeof(*ctx
->slice_q
));
958 return AVERROR(ENOMEM
);
961 ctx
->tdata
= av_mallocz(avctx
->thread_count
* sizeof(*ctx
->tdata
));
964 return AVERROR(ENOMEM
);
967 for (j
= 0; j
< avctx
->thread_count
; j
++) {
968 ctx
->tdata
[j
].nodes
= av_malloc((ctx
->slices_width
+ 1)
970 * sizeof(*ctx
->tdata
->nodes
));
971 if (!ctx
->tdata
[j
].nodes
) {
973 return AVERROR(ENOMEM
);
975 for (i
= min_quant
; i
< max_quant
+ 2; i
++) {
976 ctx
->tdata
[j
].nodes
[i
].prev_node
= -1;
977 ctx
->tdata
[j
].nodes
[i
].bits
= 0;
978 ctx
->tdata
[j
].nodes
[i
].score
= 0;
984 if (ctx
->force_quant
> 64) {
985 av_log(avctx
, AV_LOG_ERROR
, "too large quantiser, maximum is 64\n");
986 return AVERROR_INVALIDDATA
;
989 for (j
= 0; j
< 64; j
++) {
990 ctx
->quants
[0][j
] = ctx
->quant_mat
[j
] * ctx
->force_quant
;
991 ls
+= av_log2((1 << 11) / ctx
->quants
[0][j
]) * 2 + 1;
994 ctx
->bits_per_mb
= ls
* 8;
995 if (ctx
->chroma_factor
== CFACTOR_Y444
)
996 ctx
->bits_per_mb
+= ls
* 4;
997 if (ctx
->num_planes
== 4)
998 ctx
->bits_per_mb
+= ls
* 4;
1001 ctx
->frame_size_upper_bound
= ctx
->pictures_per_frame
*
1002 ctx
->slices_per_picture
*
1003 (2 + 2 * ctx
->num_planes
+
1004 (mps
* ctx
->bits_per_mb
) / 8)
1007 avctx
->codec_tag
= ctx
->profile_info
->tag
;
1009 av_log(avctx
, AV_LOG_DEBUG
,
1010 "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1011 ctx
->profile
, ctx
->slices_per_picture
* ctx
->pictures_per_frame
,
1012 interlaced
? "yes" : "no", ctx
->bits_per_mb
);
1013 av_log(avctx
, AV_LOG_DEBUG
, "frame size upper bound: %d\n",
1014 ctx
->frame_size_upper_bound
);
1019 #define OFFSET(x) offsetof(ProresContext, x)
1020 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1022 static const AVOption options
[] = {
1023 { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice
),
1024 AV_OPT_TYPE_INT
, { .i64
= 8 }, 1, MAX_MBS_PER_SLICE
, VE
},
1025 { "profile", NULL
, OFFSET(profile
), AV_OPT_TYPE_INT
,
1026 { .i64
= PRORES_PROFILE_STANDARD
},
1027 PRORES_PROFILE_PROXY
, PRORES_PROFILE_HQ
, VE
, "profile" },
1028 { "proxy", NULL
, 0, AV_OPT_TYPE_CONST
, { .i64
= PRORES_PROFILE_PROXY
},
1029 0, 0, VE
, "profile" },
1030 { "lt", NULL
, 0, AV_OPT_TYPE_CONST
, { .i64
= PRORES_PROFILE_LT
},
1031 0, 0, VE
, "profile" },
1032 { "standard", NULL
, 0, AV_OPT_TYPE_CONST
, { .i64
= PRORES_PROFILE_STANDARD
},
1033 0, 0, VE
, "profile" },
1034 { "hq", NULL
, 0, AV_OPT_TYPE_CONST
, { .i64
= PRORES_PROFILE_HQ
},
1035 0, 0, VE
, "profile" },
1036 { "vendor", "vendor ID", OFFSET(vendor
),
1037 AV_OPT_TYPE_STRING
, { .str
= "Lavc" }, CHAR_MIN
, CHAR_MAX
, VE
},
1038 { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb
),
1039 AV_OPT_TYPE_INT
, { .i64
= 0 }, 0, 8192, VE
},
1040 { "quant_mat", "quantiser matrix", OFFSET(quant_sel
), AV_OPT_TYPE_INT
,
1041 { .i64
= -1 }, -1, QUANT_MAT_DEFAULT
, VE
, "quant_mat" },
1042 { "auto", NULL
, 0, AV_OPT_TYPE_CONST
, { .i64
= -1 },
1043 0, 0, VE
, "quant_mat" },
1044 { "proxy", NULL
, 0, AV_OPT_TYPE_CONST
, { .i64
= QUANT_MAT_PROXY
},
1045 0, 0, VE
, "quant_mat" },
1046 { "lt", NULL
, 0, AV_OPT_TYPE_CONST
, { .i64
= QUANT_MAT_LT
},
1047 0, 0, VE
, "quant_mat" },
1048 { "standard", NULL
, 0, AV_OPT_TYPE_CONST
, { .i64
= QUANT_MAT_STANDARD
},
1049 0, 0, VE
, "quant_mat" },
1050 { "hq", NULL
, 0, AV_OPT_TYPE_CONST
, { .i64
= QUANT_MAT_HQ
},
1051 0, 0, VE
, "quant_mat" },
1052 { "default", NULL
, 0, AV_OPT_TYPE_CONST
, { .i64
= QUANT_MAT_DEFAULT
},
1053 0, 0, VE
, "quant_mat" },
1057 static const AVClass proresenc_class
= {
1058 .class_name
= "ProRes encoder",
1059 .item_name
= av_default_item_name
,
1061 .version
= LIBAVUTIL_VERSION_INT
,
1064 AVCodec ff_prores_encoder
= {
1066 .type
= AVMEDIA_TYPE_VIDEO
,
1067 .id
= AV_CODEC_ID_PRORES
,
1068 .priv_data_size
= sizeof(ProresContext
),
1069 .init
= encode_init
,
1070 .close
= encode_close
,
1071 .encode2
= encode_frame
,
1072 .capabilities
= CODEC_CAP_SLICE_THREADS
,
1073 .long_name
= NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1074 .pix_fmts
= (const enum AVPixelFormat
[]) {
1075 AV_PIX_FMT_YUV422P10
, AV_PIX_FMT_YUV444P10
, AV_PIX_FMT_NONE
1077 .priv_class
= &proresenc_class
,