3 * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
5 * VC-3 encoder funded by the British Broadcasting Corporation
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #define RC_VARIANCE 1 // use variance or ssd for fast rc
29 #include "mpegvideo.h"
32 int dct_quantize_c(MpegEncContext
*s
, DCTELEM
*block
, int n
, int qscale
, int *overflow
);
34 #define LAMBDA_FRAC_BITS 10
36 static av_always_inline
void dnxhd_get_pixels_8x4(DCTELEM
*restrict block
, const uint8_t *pixels
, int line_size
)
39 for (i
= 0; i
< 4; i
++) {
40 block
[0] = pixels
[0]; block
[1] = pixels
[1];
41 block
[2] = pixels
[2]; block
[3] = pixels
[3];
42 block
[4] = pixels
[4]; block
[5] = pixels
[5];
43 block
[6] = pixels
[6]; block
[7] = pixels
[7];
47 memcpy(block
, block
- 8, sizeof(*block
)*8);
48 memcpy(block
+ 8, block
-16, sizeof(*block
)*8);
49 memcpy(block
+16, block
-24, sizeof(*block
)*8);
50 memcpy(block
+24, block
-32, sizeof(*block
)*8);
53 static int dnxhd_init_vlc(DNXHDEncContext
*ctx
)
56 int max_level
= 1<<(ctx
->cid_table
->bit_depth
+2);
58 CHECKED_ALLOCZ(ctx
->vlc_codes
, max_level
*4*sizeof(*ctx
->vlc_codes
));
59 CHECKED_ALLOCZ(ctx
->vlc_bits
, max_level
*4*sizeof(*ctx
->vlc_bits
));
60 CHECKED_ALLOCZ(ctx
->run_codes
, 63*2);
61 CHECKED_ALLOCZ(ctx
->run_bits
, 63);
63 ctx
->vlc_codes
+= max_level
*2;
64 ctx
->vlc_bits
+= max_level
*2;
65 for (level
= -max_level
; level
< max_level
; level
++) {
66 for (run
= 0; run
< 2; run
++) {
67 int index
= (level
<<1)|run
;
68 int sign
, offset
= 0, alevel
= level
;
70 MASK_ABS(sign
, alevel
);
72 offset
= (alevel
-1)>>6;
75 for (j
= 0; j
< 257; j
++) {
76 if (ctx
->cid_table
->ac_level
[j
] == alevel
&&
77 (!offset
|| (ctx
->cid_table
->ac_index_flag
[j
] && offset
)) &&
78 (!run
|| (ctx
->cid_table
->ac_run_flag
[j
] && run
))) {
79 assert(!ctx
->vlc_codes
[index
]);
81 ctx
->vlc_codes
[index
] = (ctx
->cid_table
->ac_codes
[j
]<<1)|(sign
&1);
82 ctx
->vlc_bits
[index
] = ctx
->cid_table
->ac_bits
[j
]+1;
84 ctx
->vlc_codes
[index
] = ctx
->cid_table
->ac_codes
[j
];
85 ctx
->vlc_bits
[index
] = ctx
->cid_table
->ac_bits
[j
];
90 assert(!alevel
|| j
< 257);
92 ctx
->vlc_codes
[index
] = (ctx
->vlc_codes
[index
]<<ctx
->cid_table
->index_bits
)|offset
;
93 ctx
->vlc_bits
[index
]+= ctx
->cid_table
->index_bits
;
97 for (i
= 0; i
< 62; i
++) {
98 int run
= ctx
->cid_table
->run
[i
];
100 ctx
->run_codes
[run
] = ctx
->cid_table
->run_codes
[i
];
101 ctx
->run_bits
[run
] = ctx
->cid_table
->run_bits
[i
];
108 static int dnxhd_init_qmat(DNXHDEncContext
*ctx
, int lbias
, int cbias
)
110 // init first elem to 1 to avoid div by 0 in convert_matrix
111 uint16_t weight_matrix
[64] = {1,}; // convert_matrix needs uint16_t*
114 CHECKED_ALLOCZ(ctx
->qmatrix_l
, (ctx
->m
.avctx
->qmax
+1) * 64 * sizeof(int));
115 CHECKED_ALLOCZ(ctx
->qmatrix_c
, (ctx
->m
.avctx
->qmax
+1) * 64 * sizeof(int));
116 CHECKED_ALLOCZ(ctx
->qmatrix_l16
, (ctx
->m
.avctx
->qmax
+1) * 64 * 2 * sizeof(uint16_t));
117 CHECKED_ALLOCZ(ctx
->qmatrix_c16
, (ctx
->m
.avctx
->qmax
+1) * 64 * 2 * sizeof(uint16_t));
119 for (i
= 1; i
< 64; i
++) {
120 int j
= ctx
->m
.dsp
.idct_permutation
[ff_zigzag_direct
[i
]];
121 weight_matrix
[j
] = ctx
->cid_table
->luma_weight
[i
];
123 ff_convert_matrix(&ctx
->m
.dsp
, ctx
->qmatrix_l
, ctx
->qmatrix_l16
, weight_matrix
,
124 ctx
->m
.intra_quant_bias
, 1, ctx
->m
.avctx
->qmax
, 1);
125 for (i
= 1; i
< 64; i
++) {
126 int j
= ctx
->m
.dsp
.idct_permutation
[ff_zigzag_direct
[i
]];
127 weight_matrix
[j
] = ctx
->cid_table
->chroma_weight
[i
];
129 ff_convert_matrix(&ctx
->m
.dsp
, ctx
->qmatrix_c
, ctx
->qmatrix_c16
, weight_matrix
,
130 ctx
->m
.intra_quant_bias
, 1, ctx
->m
.avctx
->qmax
, 1);
131 for (qscale
= 1; qscale
<= ctx
->m
.avctx
->qmax
; qscale
++) {
132 for (i
= 0; i
< 64; i
++) {
133 ctx
->qmatrix_l
[qscale
] [i
] <<= 2; ctx
->qmatrix_c
[qscale
] [i
] <<= 2;
134 ctx
->qmatrix_l16
[qscale
][0][i
] <<= 2; ctx
->qmatrix_l16
[qscale
][1][i
] <<= 2;
135 ctx
->qmatrix_c16
[qscale
][0][i
] <<= 2; ctx
->qmatrix_c16
[qscale
][1][i
] <<= 2;
143 static int dnxhd_init_rc(DNXHDEncContext
*ctx
)
145 CHECKED_ALLOCZ(ctx
->mb_rc
, 8160*ctx
->m
.avctx
->qmax
*sizeof(RCEntry
));
146 if (ctx
->m
.avctx
->mb_decision
!= FF_MB_DECISION_RD
)
147 CHECKED_ALLOCZ(ctx
->mb_cmp
, ctx
->m
.mb_num
*sizeof(RCCMPEntry
));
149 ctx
->frame_bits
= (ctx
->cid_table
->coding_unit_size
- 640 - 4) * 8;
151 ctx
->lambda
= 2<<LAMBDA_FRAC_BITS
; // qscale 2
157 static int dnxhd_encode_init(AVCodecContext
*avctx
)
159 DNXHDEncContext
*ctx
= avctx
->priv_data
;
162 ctx
->cid
= ff_dnxhd_find_cid(avctx
);
163 if (!ctx
->cid
|| avctx
->pix_fmt
!= PIX_FMT_YUV422P
) {
164 av_log(avctx
, AV_LOG_ERROR
, "video parameters incompatible with DNxHD\n");
167 av_log(avctx
, AV_LOG_DEBUG
, "cid %d\n", ctx
->cid
);
169 index
= ff_dnxhd_get_cid_table(ctx
->cid
);
170 ctx
->cid_table
= &ff_dnxhd_cid_table
[index
];
172 ctx
->m
.avctx
= avctx
;
176 ctx
->get_pixels_8x4_sym
= dnxhd_get_pixels_8x4
;
178 dsputil_init(&ctx
->m
.dsp
, avctx
);
179 ff_dct_common_init(&ctx
->m
);
181 ff_dnxhd_init_mmx(ctx
);
183 if (!ctx
->m
.dct_quantize
)
184 ctx
->m
.dct_quantize
= dct_quantize_c
;
186 ctx
->m
.mb_height
= (avctx
->height
+ 15) / 16;
187 ctx
->m
.mb_width
= (avctx
->width
+ 15) / 16;
189 if (avctx
->flags
& CODEC_FLAG_INTERLACED_DCT
) {
191 ctx
->m
.mb_height
/= 2;
194 ctx
->m
.mb_num
= ctx
->m
.mb_height
* ctx
->m
.mb_width
;
196 if (avctx
->intra_quant_bias
!= FF_DEFAULT_QUANT_BIAS
)
197 ctx
->m
.intra_quant_bias
= avctx
->intra_quant_bias
;
198 if (dnxhd_init_qmat(ctx
, ctx
->m
.intra_quant_bias
, 0) < 0) // XXX tune lbias/cbias
201 if (dnxhd_init_vlc(ctx
) < 0)
203 if (dnxhd_init_rc(ctx
) < 0)
206 CHECKED_ALLOCZ(ctx
->slice_size
, ctx
->m
.mb_height
*sizeof(uint32_t));
207 CHECKED_ALLOCZ(ctx
->mb_bits
, ctx
->m
.mb_num
*sizeof(uint16_t));
208 CHECKED_ALLOCZ(ctx
->mb_qscale
, ctx
->m
.mb_num
*sizeof(uint8_t));
210 ctx
->frame
.key_frame
= 1;
211 ctx
->frame
.pict_type
= FF_I_TYPE
;
212 ctx
->m
.avctx
->coded_frame
= &ctx
->frame
;
214 if (avctx
->thread_count
> MAX_THREADS
|| (avctx
->thread_count
> ctx
->m
.mb_height
)) {
215 av_log(avctx
, AV_LOG_ERROR
, "too many threads\n");
219 ctx
->thread
[0] = ctx
;
220 for (i
= 1; i
< avctx
->thread_count
; i
++) {
221 ctx
->thread
[i
] = av_malloc(sizeof(DNXHDEncContext
));
222 memcpy(ctx
->thread
[i
], ctx
, sizeof(DNXHDEncContext
));
225 for (i
= 0; i
< avctx
->thread_count
; i
++) {
226 ctx
->thread
[i
]->m
.start_mb_y
= (ctx
->m
.mb_height
*(i
) + avctx
->thread_count
/2) / avctx
->thread_count
;
227 ctx
->thread
[i
]->m
.end_mb_y
= (ctx
->m
.mb_height
*(i
+1) + avctx
->thread_count
/2) / avctx
->thread_count
;
231 fail
: //for CHECKED_ALLOCZ
235 static int dnxhd_write_header(AVCodecContext
*avctx
, uint8_t *buf
)
237 DNXHDEncContext
*ctx
= avctx
->priv_data
;
238 const uint8_t header_prefix
[5] = { 0x00,0x00,0x02,0x80,0x01 };
240 memcpy(buf
, header_prefix
, 5);
241 buf
[5] = ctx
->interlaced
? ctx
->cur_field
+2 : 0x01;
242 buf
[6] = 0x80; // crc flag off
243 buf
[7] = 0xa0; // reserved
244 AV_WB16(buf
+ 0x18, avctx
->height
); // ALPF
245 AV_WB16(buf
+ 0x1a, avctx
->width
); // SPL
246 AV_WB16(buf
+ 0x1d, avctx
->height
); // NAL
248 buf
[0x21] = 0x38; // FIXME 8 bit per comp
249 buf
[0x22] = 0x88 + (ctx
->frame
.interlaced_frame
<<2);
250 AV_WB32(buf
+ 0x28, ctx
->cid
); // CID
251 buf
[0x2c] = ctx
->interlaced
? 0 : 0x80;
253 buf
[0x5f] = 0x01; // UDL
255 buf
[0x167] = 0x02; // reserved
256 AV_WB16(buf
+ 0x16a, ctx
->m
.mb_height
* 4 + 4); // MSIPS
257 buf
[0x16d] = ctx
->m
.mb_height
; // Ns
258 buf
[0x16f] = 0x10; // reserved
260 ctx
->msip
= buf
+ 0x170;
264 static av_always_inline
void dnxhd_encode_dc(DNXHDEncContext
*ctx
, int diff
)
268 nbits
= av_log2_16bit(-2*diff
);
271 nbits
= av_log2_16bit(2*diff
);
273 put_bits(&ctx
->m
.pb
, ctx
->cid_table
->dc_bits
[nbits
] + nbits
,
274 (ctx
->cid_table
->dc_codes
[nbits
]<<nbits
) + (diff
& ((1 << nbits
) - 1)));
277 static av_always_inline
void dnxhd_encode_block(DNXHDEncContext
*ctx
, DCTELEM
*block
, int last_index
, int n
)
279 int last_non_zero
= 0;
282 dnxhd_encode_dc(ctx
, block
[0] - ctx
->m
.last_dc
[n
]);
283 ctx
->m
.last_dc
[n
] = block
[0];
285 for (i
= 1; i
<= last_index
; i
++) {
286 j
= ctx
->m
.intra_scantable
.permutated
[i
];
289 int run_level
= i
- last_non_zero
- 1;
290 int rlevel
= (slevel
<<1)|!!run_level
;
291 put_bits(&ctx
->m
.pb
, ctx
->vlc_bits
[rlevel
], ctx
->vlc_codes
[rlevel
]);
293 put_bits(&ctx
->m
.pb
, ctx
->run_bits
[run_level
], ctx
->run_codes
[run_level
]);
297 put_bits(&ctx
->m
.pb
, ctx
->vlc_bits
[0], ctx
->vlc_codes
[0]); // EOB
300 static av_always_inline
void dnxhd_unquantize_c(DNXHDEncContext
*ctx
, DCTELEM
*block
, int n
, int qscale
, int last_index
)
302 const uint8_t *weight_matrix
;
306 weight_matrix
= (n
&2) ? ctx
->cid_table
->chroma_weight
: ctx
->cid_table
->luma_weight
;
308 for (i
= 1; i
<= last_index
; i
++) {
309 int j
= ctx
->m
.intra_scantable
.permutated
[i
];
313 level
= (1-2*level
) * qscale
* weight_matrix
[i
];
314 if (weight_matrix
[i
] != 32)
319 level
= (2*level
+1) * qscale
* weight_matrix
[i
];
320 if (weight_matrix
[i
] != 32)
329 static av_always_inline
int dnxhd_ssd_block(DCTELEM
*qblock
, DCTELEM
*block
)
333 for (i
= 0; i
< 64; i
++)
334 score
+= (block
[i
]-qblock
[i
])*(block
[i
]-qblock
[i
]);
338 static av_always_inline
int dnxhd_calc_ac_bits(DNXHDEncContext
*ctx
, DCTELEM
*block
, int last_index
)
340 int last_non_zero
= 0;
343 for (i
= 1; i
<= last_index
; i
++) {
344 j
= ctx
->m
.intra_scantable
.permutated
[i
];
347 int run_level
= i
- last_non_zero
- 1;
348 bits
+= ctx
->vlc_bits
[(level
<<1)|!!run_level
]+ctx
->run_bits
[run_level
];
355 static av_always_inline
void dnxhd_get_blocks(DNXHDEncContext
*ctx
, int mb_x
, int mb_y
)
357 const uint8_t *ptr_y
= ctx
->thread
[0]->src
[0] + ((mb_y
<< 4) * ctx
->m
.linesize
) + (mb_x
<< 4);
358 const uint8_t *ptr_u
= ctx
->thread
[0]->src
[1] + ((mb_y
<< 4) * ctx
->m
.uvlinesize
) + (mb_x
<< 3);
359 const uint8_t *ptr_v
= ctx
->thread
[0]->src
[2] + ((mb_y
<< 4) * ctx
->m
.uvlinesize
) + (mb_x
<< 3);
360 DSPContext
*dsp
= &ctx
->m
.dsp
;
362 dsp
->get_pixels(ctx
->blocks
[0], ptr_y
, ctx
->m
.linesize
);
363 dsp
->get_pixels(ctx
->blocks
[1], ptr_y
+ 8, ctx
->m
.linesize
);
364 dsp
->get_pixels(ctx
->blocks
[2], ptr_u
, ctx
->m
.uvlinesize
);
365 dsp
->get_pixels(ctx
->blocks
[3], ptr_v
, ctx
->m
.uvlinesize
);
367 if (mb_y
+1 == ctx
->m
.mb_height
&& ctx
->m
.avctx
->height
== 1080) {
368 if (ctx
->interlaced
) {
369 ctx
->get_pixels_8x4_sym(ctx
->blocks
[4], ptr_y
+ ctx
->dct_y_offset
, ctx
->m
.linesize
);
370 ctx
->get_pixels_8x4_sym(ctx
->blocks
[5], ptr_y
+ ctx
->dct_y_offset
+ 8, ctx
->m
.linesize
);
371 ctx
->get_pixels_8x4_sym(ctx
->blocks
[6], ptr_u
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
372 ctx
->get_pixels_8x4_sym(ctx
->blocks
[7], ptr_v
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
374 dsp
->clear_block(ctx
->blocks
[4]); dsp
->clear_block(ctx
->blocks
[5]);
375 dsp
->clear_block(ctx
->blocks
[6]); dsp
->clear_block(ctx
->blocks
[7]);
378 dsp
->get_pixels(ctx
->blocks
[4], ptr_y
+ ctx
->dct_y_offset
, ctx
->m
.linesize
);
379 dsp
->get_pixels(ctx
->blocks
[5], ptr_y
+ ctx
->dct_y_offset
+ 8, ctx
->m
.linesize
);
380 dsp
->get_pixels(ctx
->blocks
[6], ptr_u
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
381 dsp
->get_pixels(ctx
->blocks
[7], ptr_v
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
385 static av_always_inline
int dnxhd_switch_matrix(DNXHDEncContext
*ctx
, int i
)
388 ctx
->m
.q_intra_matrix16
= ctx
->qmatrix_c16
;
389 ctx
->m
.q_intra_matrix
= ctx
->qmatrix_c
;
392 ctx
->m
.q_intra_matrix16
= ctx
->qmatrix_l16
;
393 ctx
->m
.q_intra_matrix
= ctx
->qmatrix_l
;
398 static int dnxhd_calc_bits_thread(AVCodecContext
*avctx
, void *arg
)
400 DNXHDEncContext
*ctx
= *(void**)arg
;
402 int qscale
= ctx
->thread
[0]->qscale
;
404 for (mb_y
= ctx
->m
.start_mb_y
; mb_y
< ctx
->m
.end_mb_y
; mb_y
++) {
407 ctx
->m
.last_dc
[2] = 1024;
409 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
410 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
416 dnxhd_get_blocks(ctx
, mb_x
, mb_y
);
418 for (i
= 0; i
< 8; i
++) {
419 DECLARE_ALIGNED_16(DCTELEM
, block
[64]);
420 DCTELEM
*src_block
= ctx
->blocks
[i
];
421 int overflow
, nbits
, diff
, last_index
;
422 int n
= dnxhd_switch_matrix(ctx
, i
);
424 memcpy(block
, src_block
, sizeof(block
));
425 last_index
= ctx
->m
.dct_quantize((MpegEncContext
*)ctx
, block
, i
, qscale
, &overflow
);
426 ac_bits
+= dnxhd_calc_ac_bits(ctx
, block
, last_index
);
428 diff
= block
[0] - ctx
->m
.last_dc
[n
];
429 if (diff
< 0) nbits
= av_log2_16bit(-2*diff
);
430 else nbits
= av_log2_16bit( 2*diff
);
431 dc_bits
+= ctx
->cid_table
->dc_bits
[nbits
] + nbits
;
433 ctx
->m
.last_dc
[n
] = block
[0];
435 if (avctx
->mb_decision
== FF_MB_DECISION_RD
|| !RC_VARIANCE
) {
436 dnxhd_unquantize_c(ctx
, block
, i
, qscale
, last_index
);
437 ctx
->m
.dsp
.idct(block
);
438 ssd
+= dnxhd_ssd_block(block
, src_block
);
441 ctx
->mb_rc
[qscale
][mb
].ssd
= ssd
;
442 ctx
->mb_rc
[qscale
][mb
].bits
= ac_bits
+dc_bits
+12+8*ctx
->vlc_bits
[0];
448 static int dnxhd_encode_thread(AVCodecContext
*avctx
, void *arg
)
450 DNXHDEncContext
*ctx
= *(void**)arg
;
453 for (mb_y
= ctx
->m
.start_mb_y
; mb_y
< ctx
->m
.end_mb_y
; mb_y
++) {
456 ctx
->m
.last_dc
[2] = 1024;
457 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
458 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
459 int qscale
= ctx
->mb_qscale
[mb
];
462 put_bits(&ctx
->m
.pb
, 12, qscale
<<1);
464 dnxhd_get_blocks(ctx
, mb_x
, mb_y
);
466 for (i
= 0; i
< 8; i
++) {
467 DCTELEM
*block
= ctx
->blocks
[i
];
468 int last_index
, overflow
;
469 int n
= dnxhd_switch_matrix(ctx
, i
);
470 last_index
= ctx
->m
.dct_quantize((MpegEncContext
*)ctx
, block
, i
, qscale
, &overflow
);
472 dnxhd_encode_block(ctx
, block
, last_index
, n
);
473 //STOP_TIMER("encode_block");
476 if (put_bits_count(&ctx
->m
.pb
)&31)
477 put_bits(&ctx
->m
.pb
, 32-(put_bits_count(&ctx
->m
.pb
)&31), 0);
479 flush_put_bits(&ctx
->m
.pb
);
483 static void dnxhd_setup_threads_slices(DNXHDEncContext
*ctx
, uint8_t *buf
)
487 for (i
= 0; i
< ctx
->m
.avctx
->thread_count
; i
++) {
489 for (mb_y
= ctx
->thread
[i
]->m
.start_mb_y
; mb_y
< ctx
->thread
[i
]->m
.end_mb_y
; mb_y
++) {
490 ctx
->slice_size
[mb_y
] = 0;
491 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
492 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
493 ctx
->slice_size
[mb_y
] += ctx
->mb_bits
[mb
];
495 ctx
->slice_size
[mb_y
] = (ctx
->slice_size
[mb_y
]+31)&~31;
496 ctx
->slice_size
[mb_y
] >>= 3;
497 thread_size
+= ctx
->slice_size
[mb_y
];
499 init_put_bits(&ctx
->thread
[i
]->m
.pb
, buf
+ 640 + offset
, thread_size
);
500 offset
+= thread_size
;
504 static int dnxhd_mb_var_thread(AVCodecContext
*avctx
, void *arg
)
506 DNXHDEncContext
*ctx
= *(void**)arg
;
508 for (mb_y
= ctx
->m
.start_mb_y
; mb_y
< ctx
->m
.end_mb_y
; mb_y
++) {
509 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
510 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
511 uint8_t *pix
= ctx
->thread
[0]->src
[0] + ((mb_y
<<4) * ctx
->m
.linesize
) + (mb_x
<<4);
512 int sum
= ctx
->m
.dsp
.pix_sum(pix
, ctx
->m
.linesize
);
513 int varc
= (ctx
->m
.dsp
.pix_norm1(pix
, ctx
->m
.linesize
) - (((unsigned)(sum
*sum
))>>8)+128)>>8;
514 ctx
->mb_cmp
[mb
].value
= varc
;
515 ctx
->mb_cmp
[mb
].mb
= mb
;
521 static int dnxhd_encode_rdo(AVCodecContext
*avctx
, DNXHDEncContext
*ctx
)
523 int lambda
, up_step
, down_step
;
524 int last_lower
= INT_MAX
, last_higher
= 0;
527 for (q
= 1; q
< avctx
->qmax
; q
++) {
529 avctx
->execute(avctx
, dnxhd_calc_bits_thread
, (void**)&ctx
->thread
[0], NULL
, avctx
->thread_count
, sizeof(void*));
531 up_step
= down_step
= 2<<LAMBDA_FRAC_BITS
;
532 lambda
= ctx
->lambda
;
537 if (lambda
== last_higher
) {
539 end
= 1; // need to set final qscales/bits
541 for (y
= 0; y
< ctx
->m
.mb_height
; y
++) {
542 for (x
= 0; x
< ctx
->m
.mb_width
; x
++) {
543 unsigned min
= UINT_MAX
;
545 int mb
= y
*ctx
->m
.mb_width
+x
;
546 for (q
= 1; q
< avctx
->qmax
; q
++) {
547 unsigned score
= ctx
->mb_rc
[q
][mb
].bits
*lambda
+(ctx
->mb_rc
[q
][mb
].ssd
<<LAMBDA_FRAC_BITS
);
553 bits
+= ctx
->mb_rc
[qscale
][mb
].bits
;
554 ctx
->mb_qscale
[mb
] = qscale
;
555 ctx
->mb_bits
[mb
] = ctx
->mb_rc
[qscale
][mb
].bits
;
557 bits
= (bits
+31)&~31; // padding
558 if (bits
> ctx
->frame_bits
)
561 //dprintf(ctx->m.avctx, "lambda %d, up %u, down %u, bits %d, frame %d\n",
562 // lambda, last_higher, last_lower, bits, ctx->frame_bits);
564 if (bits
> ctx
->frame_bits
)
568 if (bits
< ctx
->frame_bits
) {
569 last_lower
= FFMIN(lambda
, last_lower
);
570 if (last_higher
!= 0)
571 lambda
= (lambda
+last_higher
)>>1;
574 down_step
*= 5; // XXX tune ?
575 up_step
= 1<<LAMBDA_FRAC_BITS
;
576 lambda
= FFMAX(1, lambda
);
577 if (lambda
== last_lower
)
580 last_higher
= FFMAX(lambda
, last_higher
);
581 if (last_lower
!= INT_MAX
)
582 lambda
= (lambda
+last_lower
)>>1;
586 down_step
= 1<<LAMBDA_FRAC_BITS
;
589 //dprintf(ctx->m.avctx, "out lambda %d\n", lambda);
590 ctx
->lambda
= lambda
;
594 static int dnxhd_find_qscale(DNXHDEncContext
*ctx
)
600 int last_lower
= INT_MAX
;
604 qscale
= ctx
->qscale
;
607 ctx
->qscale
= qscale
;
608 // XXX avoid recalculating bits
609 ctx
->m
.avctx
->execute(ctx
->m
.avctx
, dnxhd_calc_bits_thread
, (void**)&ctx
->thread
[0], NULL
, ctx
->m
.avctx
->thread_count
, sizeof(void*));
610 for (y
= 0; y
< ctx
->m
.mb_height
; y
++) {
611 for (x
= 0; x
< ctx
->m
.mb_width
; x
++)
612 bits
+= ctx
->mb_rc
[qscale
][y
*ctx
->m
.mb_width
+x
].bits
;
613 bits
= (bits
+31)&~31; // padding
614 if (bits
> ctx
->frame_bits
)
617 //dprintf(ctx->m.avctx, "%d, qscale %d, bits %d, frame %d, higher %d, lower %d\n",
618 // ctx->m.avctx->frame_number, qscale, bits, ctx->frame_bits, last_higher, last_lower);
619 if (bits
< ctx
->frame_bits
) {
622 if (last_higher
== qscale
- 1) {
623 qscale
= last_higher
;
626 last_lower
= FFMIN(qscale
, last_lower
);
627 if (last_higher
!= 0)
628 qscale
= (qscale
+last_higher
)>>1;
630 qscale
-= down_step
++;
635 if (last_lower
== qscale
+ 1)
637 last_higher
= FFMAX(qscale
, last_higher
);
638 if (last_lower
!= INT_MAX
)
639 qscale
= (qscale
+last_lower
)>>1;
643 if (qscale
>= ctx
->m
.avctx
->qmax
)
647 //dprintf(ctx->m.avctx, "out qscale %d\n", qscale);
648 ctx
->qscale
= qscale
;
652 static int dnxhd_rc_cmp(const void *a
, const void *b
)
654 return ((const RCCMPEntry
*)b
)->value
- ((const RCCMPEntry
*)a
)->value
;
657 static int dnxhd_encode_fast(AVCodecContext
*avctx
, DNXHDEncContext
*ctx
)
661 if ((ret
= dnxhd_find_qscale(ctx
)) < 0)
663 for (y
= 0; y
< ctx
->m
.mb_height
; y
++) {
664 for (x
= 0; x
< ctx
->m
.mb_width
; x
++) {
665 int mb
= y
*ctx
->m
.mb_width
+x
;
667 ctx
->mb_qscale
[mb
] = ctx
->qscale
;
668 ctx
->mb_bits
[mb
] = ctx
->mb_rc
[ctx
->qscale
][mb
].bits
;
669 max_bits
+= ctx
->mb_rc
[ctx
->qscale
][mb
].bits
;
671 delta_bits
= ctx
->mb_rc
[ctx
->qscale
][mb
].bits
-ctx
->mb_rc
[ctx
->qscale
+1][mb
].bits
;
672 ctx
->mb_cmp
[mb
].mb
= mb
;
673 ctx
->mb_cmp
[mb
].value
= delta_bits
?
674 ((ctx
->mb_rc
[ctx
->qscale
][mb
].ssd
-ctx
->mb_rc
[ctx
->qscale
+1][mb
].ssd
)*100)/delta_bits
675 : INT_MIN
; //avoid increasing qscale
678 max_bits
+= 31; //worst padding
682 avctx
->execute(avctx
, dnxhd_mb_var_thread
, (void**)&ctx
->thread
[0], NULL
, avctx
->thread_count
, sizeof(void*));
683 qsort(ctx
->mb_cmp
, ctx
->m
.mb_num
, sizeof(RCEntry
), dnxhd_rc_cmp
);
684 for (x
= 0; x
< ctx
->m
.mb_num
&& max_bits
> ctx
->frame_bits
; x
++) {
685 int mb
= ctx
->mb_cmp
[x
].mb
;
686 max_bits
-= ctx
->mb_rc
[ctx
->qscale
][mb
].bits
- ctx
->mb_rc
[ctx
->qscale
+1][mb
].bits
;
687 ctx
->mb_qscale
[mb
] = ctx
->qscale
+1;
688 ctx
->mb_bits
[mb
] = ctx
->mb_rc
[ctx
->qscale
+1][mb
].bits
;
694 static void dnxhd_load_picture(DNXHDEncContext
*ctx
, const AVFrame
*frame
)
698 for (i
= 0; i
< 3; i
++) {
699 ctx
->frame
.data
[i
] = frame
->data
[i
];
700 ctx
->frame
.linesize
[i
] = frame
->linesize
[i
];
703 for (i
= 0; i
< ctx
->m
.avctx
->thread_count
; i
++) {
704 ctx
->thread
[i
]->m
.linesize
= ctx
->frame
.linesize
[0]<<ctx
->interlaced
;
705 ctx
->thread
[i
]->m
.uvlinesize
= ctx
->frame
.linesize
[1]<<ctx
->interlaced
;
706 ctx
->thread
[i
]->dct_y_offset
= ctx
->m
.linesize
*8;
707 ctx
->thread
[i
]->dct_uv_offset
= ctx
->m
.uvlinesize
*8;
710 ctx
->frame
.interlaced_frame
= frame
->interlaced_frame
;
711 ctx
->cur_field
= frame
->interlaced_frame
&& !frame
->top_field_first
;
714 static int dnxhd_encode_picture(AVCodecContext
*avctx
, unsigned char *buf
, int buf_size
, const void *data
)
716 DNXHDEncContext
*ctx
= avctx
->priv_data
;
720 if (buf_size
< ctx
->cid_table
->frame_size
) {
721 av_log(avctx
, AV_LOG_ERROR
, "output buffer is too small to compress picture\n");
725 dnxhd_load_picture(ctx
, data
);
728 for (i
= 0; i
< 3; i
++) {
729 ctx
->src
[i
] = ctx
->frame
.data
[i
];
730 if (ctx
->interlaced
&& ctx
->cur_field
)
731 ctx
->src
[i
] += ctx
->frame
.linesize
[i
];
734 dnxhd_write_header(avctx
, buf
);
736 if (avctx
->mb_decision
== FF_MB_DECISION_RD
)
737 ret
= dnxhd_encode_rdo(avctx
, ctx
);
739 ret
= dnxhd_encode_fast(avctx
, ctx
);
741 av_log(avctx
, AV_LOG_ERROR
, "picture could not fit ratecontrol constraints\n");
745 dnxhd_setup_threads_slices(ctx
, buf
);
748 for (i
= 0; i
< ctx
->m
.mb_height
; i
++) {
749 AV_WB32(ctx
->msip
+ i
* 4, offset
);
750 offset
+= ctx
->slice_size
[i
];
751 assert(!(ctx
->slice_size
[i
] & 3));
754 avctx
->execute(avctx
, dnxhd_encode_thread
, (void**)&ctx
->thread
[0], NULL
, avctx
->thread_count
, sizeof(void*));
756 AV_WB32(buf
+ ctx
->cid_table
->coding_unit_size
- 4, 0x600DC0DE); // EOF
758 if (ctx
->interlaced
&& first_field
) {
761 buf
+= ctx
->cid_table
->coding_unit_size
;
762 buf_size
-= ctx
->cid_table
->coding_unit_size
;
763 goto encode_coding_unit
;
766 ctx
->frame
.quality
= ctx
->qscale
*FF_QP2LAMBDA
;
768 return ctx
->cid_table
->frame_size
;
771 static int dnxhd_encode_end(AVCodecContext
*avctx
)
773 DNXHDEncContext
*ctx
= avctx
->priv_data
;
774 int max_level
= 1<<(ctx
->cid_table
->bit_depth
+2);
777 av_free(ctx
->vlc_codes
-max_level
*2);
778 av_free(ctx
->vlc_bits
-max_level
*2);
779 av_freep(&ctx
->run_codes
);
780 av_freep(&ctx
->run_bits
);
782 av_freep(&ctx
->mb_bits
);
783 av_freep(&ctx
->mb_qscale
);
784 av_freep(&ctx
->mb_rc
);
785 av_freep(&ctx
->mb_cmp
);
786 av_freep(&ctx
->slice_size
);
788 av_freep(&ctx
->qmatrix_c
);
789 av_freep(&ctx
->qmatrix_l
);
790 av_freep(&ctx
->qmatrix_c16
);
791 av_freep(&ctx
->qmatrix_l16
);
793 for (i
= 1; i
< avctx
->thread_count
; i
++)
794 av_freep(&ctx
->thread
[i
]);
799 AVCodec dnxhd_encoder
= {
803 sizeof(DNXHDEncContext
),
805 dnxhd_encode_picture
,
807 .pix_fmts
= (enum PixelFormat
[]){PIX_FMT_YUV422P
, PIX_FMT_NONE
},
808 .long_name
= NULL_IF_CONFIG_SMALL("VC3/DNxHD"),