3 * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
5 * VC-3 encoder funded by the British Broadcasting Corporation
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #define RC_VARIANCE 1 // use variance or ssd for fast rc
29 #include "mpegvideo.h"
30 #include "dnxhddata.h"
42 int dct_quantize_c(MpegEncContext
*s
, DCTELEM
*block
, int n
, int qscale
, int *overflow
);
44 typedef struct DNXHDEncContext
{
45 MpegEncContext m
; ///< Used for quantization dsp functions
49 const CIDEntry
*cid_table
;
50 uint8_t *msip
; ///< Macroblock Scan Indexes Payload
53 struct DNXHDEncContext
*thread
[MAX_THREADS
];
55 unsigned dct_y_offset
;
56 unsigned dct_uv_offset
;
60 DECLARE_ALIGNED_16(DCTELEM
, blocks
[8][64]);
62 int (*qmatrix_c
) [64];
63 int (*qmatrix_l
) [64];
64 uint16_t (*qmatrix_l16
)[2][64];
65 uint16_t (*qmatrix_c16
)[2][64];
86 RCEntry (*mb_rc
)[8160];
89 #define LAMBDA_FRAC_BITS 10
91 static int dnxhd_init_vlc(DNXHDEncContext
*ctx
)
94 int max_level
= 1<<(ctx
->cid_table
->bit_depth
+2);
96 CHECKED_ALLOCZ(ctx
->vlc_codes
, max_level
*4*sizeof(*ctx
->vlc_codes
));
97 CHECKED_ALLOCZ(ctx
->vlc_bits
, max_level
*4*sizeof(*ctx
->vlc_bits
));
98 CHECKED_ALLOCZ(ctx
->run_codes
, 63*2);
99 CHECKED_ALLOCZ(ctx
->run_bits
, 63);
101 ctx
->vlc_codes
+= max_level
*2;
102 ctx
->vlc_bits
+= max_level
*2;
103 for (level
= -max_level
; level
< max_level
; level
++) {
104 for (run
= 0; run
< 2; run
++) {
105 int index
= (level
<<1)|run
;
106 int sign
, offset
= 0, alevel
= level
;
108 MASK_ABS(sign
, alevel
);
110 offset
= (alevel
-1)>>6;
113 for (j
= 0; j
< 257; j
++) {
114 if (ctx
->cid_table
->ac_level
[j
] == alevel
&&
115 (!offset
|| (ctx
->cid_table
->ac_index_flag
[j
] && offset
)) &&
116 (!run
|| (ctx
->cid_table
->ac_run_flag
[j
] && run
))) {
117 assert(!ctx
->vlc_codes
[index
]);
119 ctx
->vlc_codes
[index
] = (ctx
->cid_table
->ac_codes
[j
]<<1)|(sign
&1);
120 ctx
->vlc_bits
[index
] = ctx
->cid_table
->ac_bits
[j
]+1;
122 ctx
->vlc_codes
[index
] = ctx
->cid_table
->ac_codes
[j
];
123 ctx
->vlc_bits
[index
] = ctx
->cid_table
->ac_bits
[j
];
128 assert(!alevel
|| j
< 257);
130 ctx
->vlc_codes
[index
] = (ctx
->vlc_codes
[index
]<<ctx
->cid_table
->index_bits
)|offset
;
131 ctx
->vlc_bits
[index
]+= ctx
->cid_table
->index_bits
;
135 for (i
= 0; i
< 62; i
++) {
136 int run
= ctx
->cid_table
->run
[i
];
138 ctx
->run_codes
[run
] = ctx
->cid_table
->run_codes
[i
];
139 ctx
->run_bits
[run
] = ctx
->cid_table
->run_bits
[i
];
146 static int dnxhd_init_qmat(DNXHDEncContext
*ctx
, int lbias
, int cbias
)
148 // init first elem to 1 to avoid div by 0 in convert_matrix
149 uint16_t weight_matrix
[64] = {1,}; // convert_matrix needs uint16_t*
152 CHECKED_ALLOCZ(ctx
->qmatrix_l
, (ctx
->m
.avctx
->qmax
+1) * 64 * sizeof(int));
153 CHECKED_ALLOCZ(ctx
->qmatrix_c
, (ctx
->m
.avctx
->qmax
+1) * 64 * sizeof(int));
154 CHECKED_ALLOCZ(ctx
->qmatrix_l16
, (ctx
->m
.avctx
->qmax
+1) * 64 * 2 * sizeof(uint16_t));
155 CHECKED_ALLOCZ(ctx
->qmatrix_c16
, (ctx
->m
.avctx
->qmax
+1) * 64 * 2 * sizeof(uint16_t));
157 for (i
= 1; i
< 64; i
++) {
158 int j
= ctx
->m
.dsp
.idct_permutation
[ff_zigzag_direct
[i
]];
159 weight_matrix
[j
] = ctx
->cid_table
->luma_weight
[i
];
161 ff_convert_matrix(&ctx
->m
.dsp
, ctx
->qmatrix_l
, ctx
->qmatrix_l16
, weight_matrix
,
162 ctx
->m
.intra_quant_bias
, 1, ctx
->m
.avctx
->qmax
, 1);
163 for (i
= 1; i
< 64; i
++) {
164 int j
= ctx
->m
.dsp
.idct_permutation
[ff_zigzag_direct
[i
]];
165 weight_matrix
[j
] = ctx
->cid_table
->chroma_weight
[i
];
167 ff_convert_matrix(&ctx
->m
.dsp
, ctx
->qmatrix_c
, ctx
->qmatrix_c16
, weight_matrix
,
168 ctx
->m
.intra_quant_bias
, 1, ctx
->m
.avctx
->qmax
, 1);
169 for (qscale
= 1; qscale
<= ctx
->m
.avctx
->qmax
; qscale
++) {
170 for (i
= 0; i
< 64; i
++) {
171 ctx
->qmatrix_l
[qscale
] [i
] <<= 2; ctx
->qmatrix_c
[qscale
] [i
] <<= 2;
172 ctx
->qmatrix_l16
[qscale
][0][i
] <<= 2; ctx
->qmatrix_l16
[qscale
][1][i
] <<= 2;
173 ctx
->qmatrix_c16
[qscale
][0][i
] <<= 2; ctx
->qmatrix_c16
[qscale
][1][i
] <<= 2;
181 static int dnxhd_init_rc(DNXHDEncContext
*ctx
)
183 CHECKED_ALLOCZ(ctx
->mb_rc
, 8160*ctx
->m
.avctx
->qmax
*sizeof(RCEntry
));
184 if (ctx
->m
.avctx
->mb_decision
!= FF_MB_DECISION_RD
)
185 CHECKED_ALLOCZ(ctx
->mb_cmp
, ctx
->m
.mb_num
*sizeof(RCCMPEntry
));
187 ctx
->frame_bits
= (ctx
->cid_table
->coding_unit_size
- 640 - 4) * 8;
189 ctx
->lambda
= 2<<LAMBDA_FRAC_BITS
; // qscale 2
195 static int dnxhd_encode_init(AVCodecContext
*avctx
)
197 DNXHDEncContext
*ctx
= avctx
->priv_data
;
200 ctx
->cid
= ff_dnxhd_find_cid(avctx
);
201 if (!ctx
->cid
|| avctx
->pix_fmt
!= PIX_FMT_YUV422P
) {
202 av_log(avctx
, AV_LOG_ERROR
, "video parameters incompatible with DNxHD\n");
205 av_log(avctx
, AV_LOG_DEBUG
, "cid %d\n", ctx
->cid
);
207 index
= ff_dnxhd_get_cid_table(ctx
->cid
);
208 ctx
->cid_table
= &ff_dnxhd_cid_table
[index
];
210 ctx
->m
.avctx
= avctx
;
214 dsputil_init(&ctx
->m
.dsp
, avctx
);
215 ff_dct_common_init(&ctx
->m
);
216 if (!ctx
->m
.dct_quantize
)
217 ctx
->m
.dct_quantize
= dct_quantize_c
;
219 ctx
->m
.mb_height
= (avctx
->height
+ 15) / 16;
220 ctx
->m
.mb_width
= (avctx
->width
+ 15) / 16;
222 if (avctx
->flags
& CODEC_FLAG_INTERLACED_DCT
) {
224 ctx
->m
.mb_height
/= 2;
227 ctx
->m
.mb_num
= ctx
->m
.mb_height
* ctx
->m
.mb_width
;
229 if (avctx
->intra_quant_bias
!= FF_DEFAULT_QUANT_BIAS
)
230 ctx
->m
.intra_quant_bias
= avctx
->intra_quant_bias
;
231 if (dnxhd_init_qmat(ctx
, ctx
->m
.intra_quant_bias
, 0) < 0) // XXX tune lbias/cbias
234 if (dnxhd_init_vlc(ctx
) < 0)
236 if (dnxhd_init_rc(ctx
) < 0)
239 CHECKED_ALLOCZ(ctx
->slice_size
, ctx
->m
.mb_height
*sizeof(uint32_t));
240 CHECKED_ALLOCZ(ctx
->mb_bits
, ctx
->m
.mb_num
*sizeof(uint16_t));
241 CHECKED_ALLOCZ(ctx
->mb_qscale
, ctx
->m
.mb_num
*sizeof(uint8_t));
243 ctx
->frame
.key_frame
= 1;
244 ctx
->frame
.pict_type
= FF_I_TYPE
;
245 ctx
->m
.avctx
->coded_frame
= &ctx
->frame
;
247 if (avctx
->thread_count
> MAX_THREADS
|| (avctx
->thread_count
> ctx
->m
.mb_height
)) {
248 av_log(avctx
, AV_LOG_ERROR
, "too many threads\n");
252 ctx
->thread
[0] = ctx
;
253 for (i
= 1; i
< avctx
->thread_count
; i
++) {
254 ctx
->thread
[i
] = av_malloc(sizeof(DNXHDEncContext
));
255 memcpy(ctx
->thread
[i
], ctx
, sizeof(DNXHDEncContext
));
258 for (i
= 0; i
< avctx
->thread_count
; i
++) {
259 ctx
->thread
[i
]->m
.start_mb_y
= (ctx
->m
.mb_height
*(i
) + avctx
->thread_count
/2) / avctx
->thread_count
;
260 ctx
->thread
[i
]->m
.end_mb_y
= (ctx
->m
.mb_height
*(i
+1) + avctx
->thread_count
/2) / avctx
->thread_count
;
264 fail
: //for CHECKED_ALLOCZ
268 static int dnxhd_write_header(AVCodecContext
*avctx
, uint8_t *buf
)
270 DNXHDEncContext
*ctx
= avctx
->priv_data
;
271 const uint8_t header_prefix
[5] = { 0x00,0x00,0x02,0x80,0x01 };
273 memcpy(buf
, header_prefix
, 5);
274 buf
[5] = ctx
->interlaced
? ctx
->cur_field
+2 : 0x01;
275 buf
[6] = 0x80; // crc flag off
276 buf
[7] = 0xa0; // reserved
277 AV_WB16(buf
+ 0x18, avctx
->height
); // ALPF
278 AV_WB16(buf
+ 0x1a, avctx
->width
); // SPL
279 AV_WB16(buf
+ 0x1d, avctx
->height
); // NAL
281 buf
[0x21] = 0x38; // FIXME 8 bit per comp
282 buf
[0x22] = 0x88 + (ctx
->frame
.interlaced_frame
<<2);
283 AV_WB32(buf
+ 0x28, ctx
->cid
); // CID
284 buf
[0x2c] = ctx
->interlaced
? 0 : 0x80;
286 buf
[0x5f] = 0x01; // UDL
288 buf
[0x167] = 0x02; // reserved
289 AV_WB16(buf
+ 0x16a, ctx
->m
.mb_height
* 4 + 4); // MSIPS
290 buf
[0x16d] = ctx
->m
.mb_height
; // Ns
291 buf
[0x16f] = 0x10; // reserved
293 ctx
->msip
= buf
+ 0x170;
297 static av_always_inline
void dnxhd_encode_dc(DNXHDEncContext
*ctx
, int diff
)
301 nbits
= av_log2_16bit(-2*diff
);
304 nbits
= av_log2_16bit(2*diff
);
306 put_bits(&ctx
->m
.pb
, ctx
->cid_table
->dc_bits
[nbits
] + nbits
,
307 (ctx
->cid_table
->dc_codes
[nbits
]<<nbits
) + (diff
& ((1 << nbits
) - 1)));
310 static av_always_inline
void dnxhd_encode_block(DNXHDEncContext
*ctx
, DCTELEM
*block
, int last_index
, int n
)
312 int last_non_zero
= 0;
315 dnxhd_encode_dc(ctx
, block
[0] - ctx
->m
.last_dc
[n
]);
316 ctx
->m
.last_dc
[n
] = block
[0];
318 for (i
= 1; i
<= last_index
; i
++) {
319 j
= ctx
->m
.intra_scantable
.permutated
[i
];
322 int run_level
= i
- last_non_zero
- 1;
323 int rlevel
= (slevel
<<1)|!!run_level
;
324 put_bits(&ctx
->m
.pb
, ctx
->vlc_bits
[rlevel
], ctx
->vlc_codes
[rlevel
]);
326 put_bits(&ctx
->m
.pb
, ctx
->run_bits
[run_level
], ctx
->run_codes
[run_level
]);
330 put_bits(&ctx
->m
.pb
, ctx
->vlc_bits
[0], ctx
->vlc_codes
[0]); // EOB
333 static av_always_inline
void dnxhd_unquantize_c(DNXHDEncContext
*ctx
, DCTELEM
*block
, int n
, int qscale
, int last_index
)
335 const uint8_t *weight_matrix
;
339 weight_matrix
= (n
&2) ? ctx
->cid_table
->chroma_weight
: ctx
->cid_table
->luma_weight
;
341 for (i
= 1; i
<= last_index
; i
++) {
342 int j
= ctx
->m
.intra_scantable
.permutated
[i
];
346 level
= (1-2*level
) * qscale
* weight_matrix
[i
];
347 if (weight_matrix
[i
] != 32)
352 level
= (2*level
+1) * qscale
* weight_matrix
[i
];
353 if (weight_matrix
[i
] != 32)
362 static av_always_inline
int dnxhd_ssd_block(DCTELEM
*qblock
, DCTELEM
*block
)
366 for (i
= 0; i
< 64; i
++)
367 score
+= (block
[i
]-qblock
[i
])*(block
[i
]-qblock
[i
]);
371 static av_always_inline
int dnxhd_calc_ac_bits(DNXHDEncContext
*ctx
, DCTELEM
*block
, int last_index
)
373 int last_non_zero
= 0;
376 for (i
= 1; i
<= last_index
; i
++) {
377 j
= ctx
->m
.intra_scantable
.permutated
[i
];
380 int run_level
= i
- last_non_zero
- 1;
381 bits
+= ctx
->vlc_bits
[(level
<<1)|!!run_level
]+ctx
->run_bits
[run_level
];
388 static av_always_inline
void dnxhd_get_pixels_4x8(DCTELEM
*restrict block
, const uint8_t *pixels
, int line_size
)
391 for (i
= 0; i
< 4; i
++) {
392 block
[0] = pixels
[0];
393 block
[1] = pixels
[1];
394 block
[2] = pixels
[2];
395 block
[3] = pixels
[3];
396 block
[4] = pixels
[4];
397 block
[5] = pixels
[5];
398 block
[6] = pixels
[6];
399 block
[7] = pixels
[7];
403 memcpy(block
, block
- 8, sizeof(*block
)*8);
404 memcpy(block
+ 8, block
-16, sizeof(*block
)*8);
405 memcpy(block
+16, block
-24, sizeof(*block
)*8);
406 memcpy(block
+24, block
-32, sizeof(*block
)*8);
409 static av_always_inline
void dnxhd_get_blocks(DNXHDEncContext
*ctx
, int mb_x
, int mb_y
)
411 const uint8_t *ptr_y
= ctx
->thread
[0]->src
[0] + ((mb_y
<< 4) * ctx
->m
.linesize
) + (mb_x
<< 4);
412 const uint8_t *ptr_u
= ctx
->thread
[0]->src
[1] + ((mb_y
<< 4) * ctx
->m
.uvlinesize
) + (mb_x
<< 3);
413 const uint8_t *ptr_v
= ctx
->thread
[0]->src
[2] + ((mb_y
<< 4) * ctx
->m
.uvlinesize
) + (mb_x
<< 3);
414 DSPContext
*dsp
= &ctx
->m
.dsp
;
416 dsp
->get_pixels(ctx
->blocks
[0], ptr_y
, ctx
->m
.linesize
);
417 dsp
->get_pixels(ctx
->blocks
[1], ptr_y
+ 8, ctx
->m
.linesize
);
418 dsp
->get_pixels(ctx
->blocks
[2], ptr_u
, ctx
->m
.uvlinesize
);
419 dsp
->get_pixels(ctx
->blocks
[3], ptr_v
, ctx
->m
.uvlinesize
);
421 if (mb_y
+1 == ctx
->m
.mb_height
&& ctx
->m
.avctx
->height
== 1080) {
422 if (ctx
->interlaced
) {
423 dnxhd_get_pixels_4x8(ctx
->blocks
[4], ptr_y
+ ctx
->dct_y_offset
, ctx
->m
.linesize
);
424 dnxhd_get_pixels_4x8(ctx
->blocks
[5], ptr_y
+ ctx
->dct_y_offset
+ 8, ctx
->m
.linesize
);
425 dnxhd_get_pixels_4x8(ctx
->blocks
[6], ptr_u
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
426 dnxhd_get_pixels_4x8(ctx
->blocks
[7], ptr_v
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
428 memset(ctx
->blocks
[4], 0, 4*64*sizeof(DCTELEM
));
430 dsp
->get_pixels(ctx
->blocks
[4], ptr_y
+ ctx
->dct_y_offset
, ctx
->m
.linesize
);
431 dsp
->get_pixels(ctx
->blocks
[5], ptr_y
+ ctx
->dct_y_offset
+ 8, ctx
->m
.linesize
);
432 dsp
->get_pixels(ctx
->blocks
[6], ptr_u
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
433 dsp
->get_pixels(ctx
->blocks
[7], ptr_v
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
437 static av_always_inline
int dnxhd_switch_matrix(DNXHDEncContext
*ctx
, int i
)
440 ctx
->m
.q_intra_matrix16
= ctx
->qmatrix_c16
;
441 ctx
->m
.q_intra_matrix
= ctx
->qmatrix_c
;
444 ctx
->m
.q_intra_matrix16
= ctx
->qmatrix_l16
;
445 ctx
->m
.q_intra_matrix
= ctx
->qmatrix_l
;
450 static int dnxhd_calc_bits_thread(AVCodecContext
*avctx
, void *arg
)
452 DNXHDEncContext
*ctx
= arg
;
454 int qscale
= ctx
->thread
[0]->qscale
;
456 for (mb_y
= ctx
->m
.start_mb_y
; mb_y
< ctx
->m
.end_mb_y
; mb_y
++) {
459 ctx
->m
.last_dc
[2] = 1024;
461 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
462 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
468 dnxhd_get_blocks(ctx
, mb_x
, mb_y
);
470 for (i
= 0; i
< 8; i
++) {
471 DECLARE_ALIGNED_16(DCTELEM
, block
[64]);
472 DCTELEM
*src_block
= ctx
->blocks
[i
];
473 int overflow
, nbits
, diff
, last_index
;
474 int n
= dnxhd_switch_matrix(ctx
, i
);
476 memcpy(block
, src_block
, sizeof(block
));
477 last_index
= ctx
->m
.dct_quantize((MpegEncContext
*)ctx
, block
, i
, qscale
, &overflow
);
478 ac_bits
+= dnxhd_calc_ac_bits(ctx
, block
, last_index
);
480 diff
= block
[0] - ctx
->m
.last_dc
[n
];
481 if (diff
< 0) nbits
= av_log2_16bit(-2*diff
);
482 else nbits
= av_log2_16bit( 2*diff
);
483 dc_bits
+= ctx
->cid_table
->dc_bits
[nbits
] + nbits
;
485 ctx
->m
.last_dc
[n
] = block
[0];
487 if (avctx
->mb_decision
== FF_MB_DECISION_RD
|| !RC_VARIANCE
) {
488 dnxhd_unquantize_c(ctx
, block
, i
, qscale
, last_index
);
489 ctx
->m
.dsp
.idct(block
);
490 ssd
+= dnxhd_ssd_block(block
, src_block
);
493 ctx
->mb_rc
[qscale
][mb
].ssd
= ssd
;
494 ctx
->mb_rc
[qscale
][mb
].bits
= ac_bits
+dc_bits
+12+8*ctx
->vlc_bits
[0];
500 static int dnxhd_encode_thread(AVCodecContext
*avctx
, void *arg
)
502 DNXHDEncContext
*ctx
= arg
;
505 for (mb_y
= ctx
->m
.start_mb_y
; mb_y
< ctx
->m
.end_mb_y
; mb_y
++) {
508 ctx
->m
.last_dc
[2] = 1024;
509 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
510 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
511 int qscale
= ctx
->mb_qscale
[mb
];
514 put_bits(&ctx
->m
.pb
, 12, qscale
<<1);
516 dnxhd_get_blocks(ctx
, mb_x
, mb_y
);
518 for (i
= 0; i
< 8; i
++) {
519 DCTELEM
*block
= ctx
->blocks
[i
];
520 int last_index
, overflow
;
521 int n
= dnxhd_switch_matrix(ctx
, i
);
522 last_index
= ctx
->m
.dct_quantize((MpegEncContext
*)ctx
, block
, i
, qscale
, &overflow
);
524 dnxhd_encode_block(ctx
, block
, last_index
, n
);
525 //STOP_TIMER("encode_block");
528 if (put_bits_count(&ctx
->m
.pb
)&31)
529 put_bits(&ctx
->m
.pb
, 32-(put_bits_count(&ctx
->m
.pb
)&31), 0);
531 flush_put_bits(&ctx
->m
.pb
);
535 static void dnxhd_setup_threads_slices(DNXHDEncContext
*ctx
, uint8_t *buf
)
539 for (i
= 0; i
< ctx
->m
.avctx
->thread_count
; i
++) {
541 for (mb_y
= ctx
->thread
[i
]->m
.start_mb_y
; mb_y
< ctx
->thread
[i
]->m
.end_mb_y
; mb_y
++) {
542 ctx
->slice_size
[mb_y
] = 0;
543 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
544 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
545 ctx
->slice_size
[mb_y
] += ctx
->mb_bits
[mb
];
547 ctx
->slice_size
[mb_y
] = (ctx
->slice_size
[mb_y
]+31)&~31;
548 ctx
->slice_size
[mb_y
] >>= 3;
549 thread_size
+= ctx
->slice_size
[mb_y
];
551 init_put_bits(&ctx
->thread
[i
]->m
.pb
, buf
+ 640 + offset
, thread_size
);
552 offset
+= thread_size
;
556 static int dnxhd_mb_var_thread(AVCodecContext
*avctx
, void *arg
)
558 DNXHDEncContext
*ctx
= arg
;
560 for (mb_y
= ctx
->m
.start_mb_y
; mb_y
< ctx
->m
.end_mb_y
; mb_y
++) {
561 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
562 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
563 uint8_t *pix
= ctx
->thread
[0]->src
[0] + ((mb_y
<<4) * ctx
->m
.linesize
) + (mb_x
<<4);
564 int sum
= ctx
->m
.dsp
.pix_sum(pix
, ctx
->m
.linesize
);
565 int varc
= (ctx
->m
.dsp
.pix_norm1(pix
, ctx
->m
.linesize
) - (((unsigned)(sum
*sum
))>>8)+128)>>8;
566 ctx
->mb_cmp
[mb
].value
= varc
;
567 ctx
->mb_cmp
[mb
].mb
= mb
;
573 static int dnxhd_encode_rdo(AVCodecContext
*avctx
, DNXHDEncContext
*ctx
)
575 int lambda
, up_step
, down_step
;
576 int last_lower
= INT_MAX
, last_higher
= 0;
579 for (q
= 1; q
< avctx
->qmax
; q
++) {
581 avctx
->execute(avctx
, dnxhd_calc_bits_thread
, (void**)&ctx
->thread
[0], NULL
, avctx
->thread_count
);
583 up_step
= down_step
= 2<<LAMBDA_FRAC_BITS
;
584 lambda
= ctx
->lambda
;
589 if (lambda
== last_higher
) {
591 end
= 1; // need to set final qscales/bits
593 for (y
= 0; y
< ctx
->m
.mb_height
; y
++) {
594 for (x
= 0; x
< ctx
->m
.mb_width
; x
++) {
595 unsigned min
= UINT_MAX
;
597 int mb
= y
*ctx
->m
.mb_width
+x
;
598 for (q
= 1; q
< avctx
->qmax
; q
++) {
599 unsigned score
= ctx
->mb_rc
[q
][mb
].bits
*lambda
+(ctx
->mb_rc
[q
][mb
].ssd
<<LAMBDA_FRAC_BITS
);
605 bits
+= ctx
->mb_rc
[qscale
][mb
].bits
;
606 ctx
->mb_qscale
[mb
] = qscale
;
607 ctx
->mb_bits
[mb
] = ctx
->mb_rc
[qscale
][mb
].bits
;
609 bits
= (bits
+31)&~31; // padding
610 if (bits
> ctx
->frame_bits
)
613 //dprintf(ctx->m.avctx, "lambda %d, up %u, down %u, bits %d, frame %d\n",
614 // lambda, last_higher, last_lower, bits, ctx->frame_bits);
616 if (bits
> ctx
->frame_bits
)
620 if (bits
< ctx
->frame_bits
) {
621 last_lower
= FFMIN(lambda
, last_lower
);
622 if (last_higher
!= 0)
623 lambda
= (lambda
+last_higher
)>>1;
626 down_step
*= 5; // XXX tune ?
627 up_step
= 1<<LAMBDA_FRAC_BITS
;
628 lambda
= FFMAX(1, lambda
);
629 if (lambda
== last_lower
)
632 last_higher
= FFMAX(lambda
, last_higher
);
633 if (last_lower
!= INT_MAX
)
634 lambda
= (lambda
+last_lower
)>>1;
638 down_step
= 1<<LAMBDA_FRAC_BITS
;
641 //dprintf(ctx->m.avctx, "out lambda %d\n", lambda);
642 ctx
->lambda
= lambda
;
646 static int dnxhd_find_qscale(DNXHDEncContext
*ctx
)
652 int last_lower
= INT_MAX
;
656 qscale
= ctx
->qscale
;
659 ctx
->qscale
= qscale
;
660 // XXX avoid recalculating bits
661 ctx
->m
.avctx
->execute(ctx
->m
.avctx
, dnxhd_calc_bits_thread
, (void**)&ctx
->thread
[0], NULL
, ctx
->m
.avctx
->thread_count
);
662 for (y
= 0; y
< ctx
->m
.mb_height
; y
++) {
663 for (x
= 0; x
< ctx
->m
.mb_width
; x
++)
664 bits
+= ctx
->mb_rc
[qscale
][y
*ctx
->m
.mb_width
+x
].bits
;
665 bits
= (bits
+31)&~31; // padding
666 if (bits
> ctx
->frame_bits
)
669 //dprintf(ctx->m.avctx, "%d, qscale %d, bits %d, frame %d, higher %d, lower %d\n",
670 // ctx->m.avctx->frame_number, qscale, bits, ctx->frame_bits, last_higher, last_lower);
671 if (bits
< ctx
->frame_bits
) {
674 if (last_higher
== qscale
- 1) {
675 qscale
= last_higher
;
678 last_lower
= FFMIN(qscale
, last_lower
);
679 if (last_higher
!= 0)
680 qscale
= (qscale
+last_higher
)>>1;
682 qscale
-= down_step
++;
687 if (last_lower
== qscale
+ 1)
689 last_higher
= FFMAX(qscale
, last_higher
);
690 if (last_lower
!= INT_MAX
)
691 qscale
= (qscale
+last_lower
)>>1;
695 if (qscale
>= ctx
->m
.avctx
->qmax
)
699 //dprintf(ctx->m.avctx, "out qscale %d\n", qscale);
700 ctx
->qscale
= qscale
;
704 static int dnxhd_rc_cmp(const void *a
, const void *b
)
706 return ((const RCCMPEntry
*)b
)->value
- ((const RCCMPEntry
*)a
)->value
;
709 static int dnxhd_encode_fast(AVCodecContext
*avctx
, DNXHDEncContext
*ctx
)
713 if ((ret
= dnxhd_find_qscale(ctx
)) < 0)
715 for (y
= 0; y
< ctx
->m
.mb_height
; y
++) {
716 for (x
= 0; x
< ctx
->m
.mb_width
; x
++) {
717 int mb
= y
*ctx
->m
.mb_width
+x
;
719 ctx
->mb_qscale
[mb
] = ctx
->qscale
;
720 ctx
->mb_bits
[mb
] = ctx
->mb_rc
[ctx
->qscale
][mb
].bits
;
721 max_bits
+= ctx
->mb_rc
[ctx
->qscale
][mb
].bits
;
723 delta_bits
= ctx
->mb_rc
[ctx
->qscale
][mb
].bits
-ctx
->mb_rc
[ctx
->qscale
+1][mb
].bits
;
724 ctx
->mb_cmp
[mb
].mb
= mb
;
725 ctx
->mb_cmp
[mb
].value
= delta_bits
?
726 ((ctx
->mb_rc
[ctx
->qscale
][mb
].ssd
-ctx
->mb_rc
[ctx
->qscale
+1][mb
].ssd
)*100)/delta_bits
727 : INT_MIN
; //avoid increasing qscale
730 max_bits
+= 31; //worst padding
734 avctx
->execute(avctx
, dnxhd_mb_var_thread
, (void**)&ctx
->thread
[0], NULL
, avctx
->thread_count
);
735 qsort(ctx
->mb_cmp
, ctx
->m
.mb_num
, sizeof(RCEntry
), dnxhd_rc_cmp
);
736 for (x
= 0; x
< ctx
->m
.mb_num
&& max_bits
> ctx
->frame_bits
; x
++) {
737 int mb
= ctx
->mb_cmp
[x
].mb
;
738 max_bits
-= ctx
->mb_rc
[ctx
->qscale
][mb
].bits
- ctx
->mb_rc
[ctx
->qscale
+1][mb
].bits
;
739 ctx
->mb_qscale
[mb
] = ctx
->qscale
+1;
740 ctx
->mb_bits
[mb
] = ctx
->mb_rc
[ctx
->qscale
+1][mb
].bits
;
746 static void dnxhd_load_picture(DNXHDEncContext
*ctx
, const AVFrame
*frame
)
750 for (i
= 0; i
< 3; i
++) {
751 ctx
->frame
.data
[i
] = frame
->data
[i
];
752 ctx
->frame
.linesize
[i
] = frame
->linesize
[i
];
755 for (i
= 0; i
< ctx
->m
.avctx
->thread_count
; i
++) {
756 ctx
->thread
[i
]->m
.linesize
= ctx
->frame
.linesize
[0]<<ctx
->interlaced
;
757 ctx
->thread
[i
]->m
.uvlinesize
= ctx
->frame
.linesize
[1]<<ctx
->interlaced
;
758 ctx
->thread
[i
]->dct_y_offset
= ctx
->m
.linesize
*8;
759 ctx
->thread
[i
]->dct_uv_offset
= ctx
->m
.uvlinesize
*8;
762 ctx
->frame
.interlaced_frame
= frame
->interlaced_frame
;
763 ctx
->cur_field
= frame
->interlaced_frame
&& !frame
->top_field_first
;
766 static int dnxhd_encode_picture(AVCodecContext
*avctx
, unsigned char *buf
, int buf_size
, const void *data
)
768 DNXHDEncContext
*ctx
= avctx
->priv_data
;
772 if (buf_size
< ctx
->cid_table
->frame_size
) {
773 av_log(avctx
, AV_LOG_ERROR
, "output buffer is too small to compress picture\n");
777 dnxhd_load_picture(ctx
, data
);
780 for (i
= 0; i
< 3; i
++) {
781 ctx
->src
[i
] = ctx
->frame
.data
[i
];
782 if (ctx
->interlaced
&& ctx
->cur_field
)
783 ctx
->src
[i
] += ctx
->frame
.linesize
[i
];
786 dnxhd_write_header(avctx
, buf
);
788 if (avctx
->mb_decision
== FF_MB_DECISION_RD
)
789 ret
= dnxhd_encode_rdo(avctx
, ctx
);
791 ret
= dnxhd_encode_fast(avctx
, ctx
);
793 av_log(avctx
, AV_LOG_ERROR
, "picture could not fit ratecontrol constraints\n");
797 dnxhd_setup_threads_slices(ctx
, buf
);
800 for (i
= 0; i
< ctx
->m
.mb_height
; i
++) {
801 AV_WB32(ctx
->msip
+ i
* 4, offset
);
802 offset
+= ctx
->slice_size
[i
];
803 assert(!(ctx
->slice_size
[i
] & 3));
806 avctx
->execute(avctx
, dnxhd_encode_thread
, (void**)&ctx
->thread
[0], NULL
, avctx
->thread_count
);
808 AV_WB32(buf
+ ctx
->cid_table
->coding_unit_size
- 4, 0x600DC0DE); // EOF
810 if (ctx
->interlaced
&& first_field
) {
813 buf
+= ctx
->cid_table
->coding_unit_size
;
814 buf_size
-= ctx
->cid_table
->coding_unit_size
;
815 goto encode_coding_unit
;
818 ctx
->frame
.quality
= ctx
->qscale
*FF_QP2LAMBDA
;
820 return ctx
->cid_table
->frame_size
;
823 static int dnxhd_encode_end(AVCodecContext
*avctx
)
825 DNXHDEncContext
*ctx
= avctx
->priv_data
;
826 int max_level
= 1<<(ctx
->cid_table
->bit_depth
+2);
829 av_free(ctx
->vlc_codes
-max_level
*2);
830 av_free(ctx
->vlc_bits
-max_level
*2);
831 av_freep(&ctx
->run_codes
);
832 av_freep(&ctx
->run_bits
);
834 av_freep(&ctx
->mb_bits
);
835 av_freep(&ctx
->mb_qscale
);
836 av_freep(&ctx
->mb_rc
);
837 av_freep(&ctx
->mb_cmp
);
838 av_freep(&ctx
->slice_size
);
840 av_freep(&ctx
->qmatrix_c
);
841 av_freep(&ctx
->qmatrix_l
);
842 av_freep(&ctx
->qmatrix_c16
);
843 av_freep(&ctx
->qmatrix_l16
);
845 for (i
= 1; i
< avctx
->thread_count
; i
++)
846 av_freep(&ctx
->thread
[i
]);
851 AVCodec dnxhd_encoder
= {
855 sizeof(DNXHDEncContext
),
857 dnxhd_encode_picture
,
859 .pix_fmts
= (enum PixelFormat
[]){PIX_FMT_YUV422P
, PIX_FMT_NONE
},
860 .long_name
= NULL_IF_CONFIG_SMALL("VC3/DNxHD"),