2 * H.26L/H.264/AVC/JVT/14496-10/... decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "libavutil/imgutils.h"
31 #include "cabac_functions.h"
34 #include "mpegvideo.h"
37 #include "h264chroma.h"
38 #include "h264_mvpred.h"
41 #include "rectangle.h"
44 #include "vdpau_internal.h"
45 #include "libavutil/avassert.h"
50 const uint16_t ff_h264_mb_sizes
[4] = { 256, 384, 512, 768 };
52 static const uint8_t rem6
[QP_MAX_NUM
+ 1] = {
53 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
54 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
55 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
58 static const uint8_t div6
[QP_MAX_NUM
+ 1] = {
59 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3,
60 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
61 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
64 static const enum AVPixelFormat hwaccel_pixfmt_list_h264_jpeg_420
[] = {
65 #if CONFIG_H264_DXVA2_HWACCEL
68 #if CONFIG_H264_VAAPI_HWACCEL
71 #if CONFIG_H264_VDA_HWACCEL
74 #if CONFIG_H264_VDPAU_HWACCEL
82 * Check if the top & left blocks are available if needed and
83 * change the dc mode so it only uses the available blocks.
85 int ff_h264_check_intra4x4_pred_mode(H264Context
*h
)
87 MpegEncContext
*const s
= &h
->s
;
88 static const int8_t top
[12] = {
89 -1, 0, LEFT_DC_PRED
, -1, -1, -1, -1, -1, 0
91 static const int8_t left
[12] = {
92 0, -1, TOP_DC_PRED
, 0, -1, -1, -1, 0, -1, DC_128_PRED
96 if (!(h
->top_samples_available
& 0x8000)) {
97 for (i
= 0; i
< 4; i
++) {
98 int status
= top
[h
->intra4x4_pred_mode_cache
[scan8
[0] + i
]];
100 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
101 "top block unavailable for requested intra4x4 mode %d at %d %d\n",
102 status
, s
->mb_x
, s
->mb_y
);
105 h
->intra4x4_pred_mode_cache
[scan8
[0] + i
] = status
;
110 if ((h
->left_samples_available
& 0x8888) != 0x8888) {
111 static const int mask
[4] = { 0x8000, 0x2000, 0x80, 0x20 };
112 for (i
= 0; i
< 4; i
++)
113 if (!(h
->left_samples_available
& mask
[i
])) {
114 int status
= left
[h
->intra4x4_pred_mode_cache
[scan8
[0] + 8 * i
]];
116 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
117 "left block unavailable for requested intra4x4 mode %d at %d %d\n",
118 status
, s
->mb_x
, s
->mb_y
);
121 h
->intra4x4_pred_mode_cache
[scan8
[0] + 8 * i
] = status
;
127 } // FIXME cleanup like ff_h264_check_intra_pred_mode
130 * Check if the top & left blocks are available if needed and
131 * change the dc mode so it only uses the available blocks.
133 int ff_h264_check_intra_pred_mode(H264Context
*h
, int mode
, int is_chroma
)
135 MpegEncContext
*const s
= &h
->s
;
136 static const int8_t top
[7] = { LEFT_DC_PRED8x8
, 1, -1, -1 };
137 static const int8_t left
[7] = { TOP_DC_PRED8x8
, -1, 2, -1, DC_128_PRED8x8
};
140 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
141 "out of range intra chroma pred mode at %d %d\n",
146 if (!(h
->top_samples_available
& 0x8000)) {
149 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
150 "top block unavailable for requested intra mode at %d %d\n",
156 if ((h
->left_samples_available
& 0x8080) != 0x8080) {
158 if (is_chroma
&& (h
->left_samples_available
& 0x8080)) {
159 // mad cow disease mode, aka MBAFF + constrained_intra_pred
160 mode
= ALZHEIMER_DC_L0T_PRED8x8
+
161 (!(h
->left_samples_available
& 0x8000)) +
162 2 * (mode
== DC_128_PRED8x8
);
165 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
166 "left block unavailable for requested intra mode at %d %d\n",
175 const uint8_t *ff_h264_decode_nal(H264Context
*h
, const uint8_t *src
,
176 int *dst_length
, int *consumed
, int length
)
182 // src[0]&0x80; // forbidden bit
183 h
->nal_ref_idc
= src
[0] >> 5;
184 h
->nal_unit_type
= src
[0] & 0x1F;
189 #define STARTCODE_TEST \
190 if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) { \
191 if (src[i + 2] != 3) { \
192 /* startcode, so we must be past the end */ \
197 #if HAVE_FAST_UNALIGNED
198 #define FIND_FIRST_ZERO \
199 if (i > 0 && !src[i]) \
204 for (i
= 0; i
+ 1 < length
; i
+= 9) {
205 if (!((~AV_RN64A(src
+ i
) &
206 (AV_RN64A(src
+ i
) - 0x0100010001000101ULL
)) &
207 0x8000800080008080ULL
))
214 for (i
= 0; i
+ 1 < length
; i
+= 5) {
215 if (!((~AV_RN32A(src
+ i
) &
216 (AV_RN32A(src
+ i
) - 0x01000101U
)) &
225 for (i
= 0; i
+ 1 < length
; i
+= 2) {
228 if (i
> 0 && src
[i
- 1] == 0)
234 if (i
>= length
- 1) { // no escaped 0
235 *dst_length
= length
;
236 *consumed
= length
+ 1; // +1 for the header
240 // use second escape buffer for inter data
241 bufidx
= h
->nal_unit_type
== NAL_DPC
? 1 : 0;
242 av_fast_malloc(&h
->rbsp_buffer
[bufidx
], &h
->rbsp_buffer_size
[bufidx
],
243 length
+ FF_INPUT_BUFFER_PADDING_SIZE
);
244 dst
= h
->rbsp_buffer
[bufidx
];
251 while (si
+ 2 < length
) {
252 // remove escapes (very rare 1:2^22)
253 if (src
[si
+ 2] > 3) {
254 dst
[di
++] = src
[si
++];
255 dst
[di
++] = src
[si
++];
256 } else if (src
[si
] == 0 && src
[si
+ 1] == 0) {
257 if (src
[si
+ 2] == 3) { // escape
262 } else // next start code
266 dst
[di
++] = src
[si
++];
269 dst
[di
++] = src
[si
++];
272 memset(dst
+ di
, 0, FF_INPUT_BUFFER_PADDING_SIZE
);
275 *consumed
= si
+ 1; // +1 for the header
276 /* FIXME store exact number of bits in the getbitcontext
277 * (it is needed for decoding) */
282 * Identify the exact end of the bitstream
283 * @return the length of the trailing, or 0 if damaged
285 static int decode_rbsp_trailing(H264Context
*h
, const uint8_t *src
)
290 tprintf(h
->s
.avctx
, "rbsp trailing %X\n", v
);
292 for (r
= 1; r
< 9; r
++) {
300 static inline int get_lowest_part_list_y(H264Context
*h
, Picture
*pic
, int n
,
301 int height
, int y_offset
, int list
)
303 int raw_my
= h
->mv_cache
[list
][scan8
[n
]][1];
304 int filter_height_up
= (raw_my
& 3) ? 2 : 0;
305 int filter_height_down
= (raw_my
& 3) ? 3 : 0;
306 int full_my
= (raw_my
>> 2) + y_offset
;
307 int top
= full_my
- filter_height_up
;
308 int bottom
= full_my
+ filter_height_down
+ height
;
310 return FFMAX(abs(top
), bottom
);
313 static inline void get_lowest_part_y(H264Context
*h
, int refs
[2][48], int n
,
314 int height
, int y_offset
, int list0
,
315 int list1
, int *nrefs
)
317 MpegEncContext
*const s
= &h
->s
;
320 y_offset
+= 16 * (s
->mb_y
>> MB_FIELD
);
323 int ref_n
= h
->ref_cache
[0][scan8
[n
]];
324 Picture
*ref
= &h
->ref_list
[0][ref_n
];
326 // Error resilience puts the current picture in the ref list.
327 // Don't try to wait on these as it will cause a deadlock.
328 // Fields can wait on each other, though.
329 if (ref
->f
.thread_opaque
!= s
->current_picture
.f
.thread_opaque
||
330 (ref
->f
.reference
& 3) != s
->picture_structure
) {
331 my
= get_lowest_part_list_y(h
, ref
, n
, height
, y_offset
, 0);
332 if (refs
[0][ref_n
] < 0)
334 refs
[0][ref_n
] = FFMAX(refs
[0][ref_n
], my
);
339 int ref_n
= h
->ref_cache
[1][scan8
[n
]];
340 Picture
*ref
= &h
->ref_list
[1][ref_n
];
342 if (ref
->f
.thread_opaque
!= s
->current_picture
.f
.thread_opaque
||
343 (ref
->f
.reference
& 3) != s
->picture_structure
) {
344 my
= get_lowest_part_list_y(h
, ref
, n
, height
, y_offset
, 1);
345 if (refs
[1][ref_n
] < 0)
347 refs
[1][ref_n
] = FFMAX(refs
[1][ref_n
], my
);
353 * Wait until all reference frames are available for MC operations.
355 * @param h the H264 context
357 static void await_references(H264Context
*h
)
359 MpegEncContext
*const s
= &h
->s
;
360 const int mb_xy
= h
->mb_xy
;
361 const int mb_type
= s
->current_picture
.f
.mb_type
[mb_xy
];
363 int nrefs
[2] = { 0 };
366 memset(refs
, -1, sizeof(refs
));
368 if (IS_16X16(mb_type
)) {
369 get_lowest_part_y(h
, refs
, 0, 16, 0,
370 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1), nrefs
);
371 } else if (IS_16X8(mb_type
)) {
372 get_lowest_part_y(h
, refs
, 0, 8, 0,
373 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1), nrefs
);
374 get_lowest_part_y(h
, refs
, 8, 8, 8,
375 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1), nrefs
);
376 } else if (IS_8X16(mb_type
)) {
377 get_lowest_part_y(h
, refs
, 0, 16, 0,
378 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1), nrefs
);
379 get_lowest_part_y(h
, refs
, 4, 16, 0,
380 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1), nrefs
);
384 assert(IS_8X8(mb_type
));
386 for (i
= 0; i
< 4; i
++) {
387 const int sub_mb_type
= h
->sub_mb_type
[i
];
389 int y_offset
= (i
& 2) << 2;
391 if (IS_SUB_8X8(sub_mb_type
)) {
392 get_lowest_part_y(h
, refs
, n
, 8, y_offset
,
393 IS_DIR(sub_mb_type
, 0, 0),
394 IS_DIR(sub_mb_type
, 0, 1),
396 } else if (IS_SUB_8X4(sub_mb_type
)) {
397 get_lowest_part_y(h
, refs
, n
, 4, y_offset
,
398 IS_DIR(sub_mb_type
, 0, 0),
399 IS_DIR(sub_mb_type
, 0, 1),
401 get_lowest_part_y(h
, refs
, n
+ 2, 4, y_offset
+ 4,
402 IS_DIR(sub_mb_type
, 0, 0),
403 IS_DIR(sub_mb_type
, 0, 1),
405 } else if (IS_SUB_4X8(sub_mb_type
)) {
406 get_lowest_part_y(h
, refs
, n
, 8, y_offset
,
407 IS_DIR(sub_mb_type
, 0, 0),
408 IS_DIR(sub_mb_type
, 0, 1),
410 get_lowest_part_y(h
, refs
, n
+ 1, 8, y_offset
,
411 IS_DIR(sub_mb_type
, 0, 0),
412 IS_DIR(sub_mb_type
, 0, 1),
416 assert(IS_SUB_4X4(sub_mb_type
));
417 for (j
= 0; j
< 4; j
++) {
418 int sub_y_offset
= y_offset
+ 2 * (j
& 2);
419 get_lowest_part_y(h
, refs
, n
+ j
, 4, sub_y_offset
,
420 IS_DIR(sub_mb_type
, 0, 0),
421 IS_DIR(sub_mb_type
, 0, 1),
428 for (list
= h
->list_count
- 1; list
>= 0; list
--)
429 for (ref
= 0; ref
< 48 && nrefs
[list
]; ref
++) {
430 int row
= refs
[list
][ref
];
432 Picture
*ref_pic
= &h
->ref_list
[list
][ref
];
433 int ref_field
= ref_pic
->f
.reference
- 1;
434 int ref_field_picture
= ref_pic
->field_picture
;
435 int pic_height
= 16 * s
->mb_height
>> ref_field_picture
;
440 if (!FIELD_PICTURE
&& ref_field_picture
) { // frame referencing two fields
441 ff_thread_await_progress(&ref_pic
->f
,
442 FFMIN((row
>> 1) - !(row
& 1),
445 ff_thread_await_progress(&ref_pic
->f
,
446 FFMIN((row
>> 1), pic_height
- 1),
448 } else if (FIELD_PICTURE
&& !ref_field_picture
) { // field referencing one field of a frame
449 ff_thread_await_progress(&ref_pic
->f
,
450 FFMIN(row
* 2 + ref_field
,
453 } else if (FIELD_PICTURE
) {
454 ff_thread_await_progress(&ref_pic
->f
,
455 FFMIN(row
, pic_height
- 1),
458 ff_thread_await_progress(&ref_pic
->f
,
459 FFMIN(row
, pic_height
- 1),
466 static av_always_inline
void mc_dir_part(H264Context
*h
, Picture
*pic
,
467 int n
, int square
, int height
,
469 uint8_t *dest_y
, uint8_t *dest_cb
,
471 int src_x_offset
, int src_y_offset
,
472 qpel_mc_func
*qpix_op
,
473 h264_chroma_mc_func chroma_op
,
474 int pixel_shift
, int chroma_idc
)
476 MpegEncContext
*const s
= &h
->s
;
477 const int mx
= h
->mv_cache
[list
][scan8
[n
]][0] + src_x_offset
* 8;
478 int my
= h
->mv_cache
[list
][scan8
[n
]][1] + src_y_offset
* 8;
479 const int luma_xy
= (mx
& 3) + ((my
& 3) << 2);
480 int offset
= ((mx
>> 2) << pixel_shift
) + (my
>> 2) * h
->mb_linesize
;
481 uint8_t *src_y
= pic
->f
.data
[0] + offset
;
482 uint8_t *src_cb
, *src_cr
;
483 int extra_width
= h
->emu_edge_width
;
484 int extra_height
= h
->emu_edge_height
;
486 const int full_mx
= mx
>> 2;
487 const int full_my
= my
>> 2;
488 const int pic_width
= 16 * s
->mb_width
;
489 const int pic_height
= 16 * s
->mb_height
>> MB_FIELD
;
497 if (full_mx
< 0 - extra_width
||
498 full_my
< 0 - extra_height
||
499 full_mx
+ 16 /*FIXME*/ > pic_width
+ extra_width
||
500 full_my
+ 16 /*FIXME*/ > pic_height
+ extra_height
) {
501 s
->vdsp
.emulated_edge_mc(s
->edge_emu_buffer
,
502 src_y
- (2 << pixel_shift
) - 2 * h
->mb_linesize
,
504 16 + 5, 16 + 5 /*FIXME*/, full_mx
- 2,
505 full_my
- 2, pic_width
, pic_height
);
506 src_y
= s
->edge_emu_buffer
+ (2 << pixel_shift
) + 2 * h
->mb_linesize
;
510 qpix_op
[luma_xy
](dest_y
, src_y
, h
->mb_linesize
); // FIXME try variable height perhaps?
512 qpix_op
[luma_xy
](dest_y
+ delta
, src_y
+ delta
, h
->mb_linesize
);
514 if (CONFIG_GRAY
&& s
->flags
& CODEC_FLAG_GRAY
)
517 if (chroma_idc
== 3 /* yuv444 */) {
518 src_cb
= pic
->f
.data
[1] + offset
;
520 s
->vdsp
.emulated_edge_mc(s
->edge_emu_buffer
,
521 src_cb
- (2 << pixel_shift
) - 2 * h
->mb_linesize
,
523 16 + 5, 16 + 5 /*FIXME*/,
524 full_mx
- 2, full_my
- 2,
525 pic_width
, pic_height
);
526 src_cb
= s
->edge_emu_buffer
+ (2 << pixel_shift
) + 2 * h
->mb_linesize
;
528 qpix_op
[luma_xy
](dest_cb
, src_cb
, h
->mb_linesize
); // FIXME try variable height perhaps?
530 qpix_op
[luma_xy
](dest_cb
+ delta
, src_cb
+ delta
, h
->mb_linesize
);
532 src_cr
= pic
->f
.data
[2] + offset
;
534 s
->vdsp
.emulated_edge_mc(s
->edge_emu_buffer
,
535 src_cr
- (2 << pixel_shift
) - 2 * h
->mb_linesize
,
537 16 + 5, 16 + 5 /*FIXME*/,
538 full_mx
- 2, full_my
- 2,
539 pic_width
, pic_height
);
540 src_cr
= s
->edge_emu_buffer
+ (2 << pixel_shift
) + 2 * h
->mb_linesize
;
542 qpix_op
[luma_xy
](dest_cr
, src_cr
, h
->mb_linesize
); // FIXME try variable height perhaps?
544 qpix_op
[luma_xy
](dest_cr
+ delta
, src_cr
+ delta
, h
->mb_linesize
);
548 ysh
= 3 - (chroma_idc
== 2 /* yuv422 */);
549 if (chroma_idc
== 1 /* yuv420 */ && MB_FIELD
) {
550 // chroma offset when predicting from a field of opposite parity
551 my
+= 2 * ((s
->mb_y
& 1) - (pic
->f
.reference
- 1));
552 emu
|= (my
>> 3) < 0 || (my
>> 3) + 8 >= (pic_height
>> 1);
555 src_cb
= pic
->f
.data
[1] + ((mx
>> 3) << pixel_shift
) +
556 (my
>> ysh
) * h
->mb_uvlinesize
;
557 src_cr
= pic
->f
.data
[2] + ((mx
>> 3) << pixel_shift
) +
558 (my
>> ysh
) * h
->mb_uvlinesize
;
561 s
->vdsp
.emulated_edge_mc(s
->edge_emu_buffer
, src_cb
, h
->mb_uvlinesize
,
562 9, 8 * chroma_idc
+ 1, (mx
>> 3), (my
>> ysh
),
563 pic_width
>> 1, pic_height
>> (chroma_idc
== 1 /* yuv420 */));
564 src_cb
= s
->edge_emu_buffer
;
566 chroma_op(dest_cb
, src_cb
, h
->mb_uvlinesize
,
567 height
>> (chroma_idc
== 1 /* yuv420 */),
568 mx
& 7, (my
<< (chroma_idc
== 2 /* yuv422 */)) & 7);
571 s
->vdsp
.emulated_edge_mc(s
->edge_emu_buffer
, src_cr
, h
->mb_uvlinesize
,
572 9, 8 * chroma_idc
+ 1, (mx
>> 3), (my
>> ysh
),
573 pic_width
>> 1, pic_height
>> (chroma_idc
== 1 /* yuv420 */));
574 src_cr
= s
->edge_emu_buffer
;
576 chroma_op(dest_cr
, src_cr
, h
->mb_uvlinesize
, height
>> (chroma_idc
== 1 /* yuv420 */),
577 mx
& 7, (my
<< (chroma_idc
== 2 /* yuv422 */)) & 7);
580 static av_always_inline
void mc_part_std(H264Context
*h
, int n
, int square
,
581 int height
, int delta
,
582 uint8_t *dest_y
, uint8_t *dest_cb
,
584 int x_offset
, int y_offset
,
585 qpel_mc_func
*qpix_put
,
586 h264_chroma_mc_func chroma_put
,
587 qpel_mc_func
*qpix_avg
,
588 h264_chroma_mc_func chroma_avg
,
589 int list0
, int list1
,
590 int pixel_shift
, int chroma_idc
)
592 MpegEncContext
*const s
= &h
->s
;
593 qpel_mc_func
*qpix_op
= qpix_put
;
594 h264_chroma_mc_func chroma_op
= chroma_put
;
596 dest_y
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
597 if (chroma_idc
== 3 /* yuv444 */) {
598 dest_cb
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
599 dest_cr
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
600 } else if (chroma_idc
== 2 /* yuv422 */) {
601 dest_cb
+= (x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_uvlinesize
;
602 dest_cr
+= (x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_uvlinesize
;
603 } else { /* yuv420 */
604 dest_cb
+= (x_offset
<< pixel_shift
) + y_offset
* h
->mb_uvlinesize
;
605 dest_cr
+= (x_offset
<< pixel_shift
) + y_offset
* h
->mb_uvlinesize
;
607 x_offset
+= 8 * s
->mb_x
;
608 y_offset
+= 8 * (s
->mb_y
>> MB_FIELD
);
611 Picture
*ref
= &h
->ref_list
[0][h
->ref_cache
[0][scan8
[n
]]];
612 mc_dir_part(h
, ref
, n
, square
, height
, delta
, 0,
613 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
614 qpix_op
, chroma_op
, pixel_shift
, chroma_idc
);
617 chroma_op
= chroma_avg
;
621 Picture
*ref
= &h
->ref_list
[1][h
->ref_cache
[1][scan8
[n
]]];
622 mc_dir_part(h
, ref
, n
, square
, height
, delta
, 1,
623 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
624 qpix_op
, chroma_op
, pixel_shift
, chroma_idc
);
628 static av_always_inline
void mc_part_weighted(H264Context
*h
, int n
, int square
,
629 int height
, int delta
,
630 uint8_t *dest_y
, uint8_t *dest_cb
,
632 int x_offset
, int y_offset
,
633 qpel_mc_func
*qpix_put
,
634 h264_chroma_mc_func chroma_put
,
635 h264_weight_func luma_weight_op
,
636 h264_weight_func chroma_weight_op
,
637 h264_biweight_func luma_weight_avg
,
638 h264_biweight_func chroma_weight_avg
,
639 int list0
, int list1
,
640 int pixel_shift
, int chroma_idc
)
642 MpegEncContext
*const s
= &h
->s
;
645 dest_y
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
646 if (chroma_idc
== 3 /* yuv444 */) {
647 chroma_height
= height
;
648 chroma_weight_avg
= luma_weight_avg
;
649 chroma_weight_op
= luma_weight_op
;
650 dest_cb
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
651 dest_cr
+= (2 * x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_linesize
;
652 } else if (chroma_idc
== 2 /* yuv422 */) {
653 chroma_height
= height
;
654 dest_cb
+= (x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_uvlinesize
;
655 dest_cr
+= (x_offset
<< pixel_shift
) + 2 * y_offset
* h
->mb_uvlinesize
;
656 } else { /* yuv420 */
657 chroma_height
= height
>> 1;
658 dest_cb
+= (x_offset
<< pixel_shift
) + y_offset
* h
->mb_uvlinesize
;
659 dest_cr
+= (x_offset
<< pixel_shift
) + y_offset
* h
->mb_uvlinesize
;
661 x_offset
+= 8 * s
->mb_x
;
662 y_offset
+= 8 * (s
->mb_y
>> MB_FIELD
);
664 if (list0
&& list1
) {
665 /* don't optimize for luma-only case, since B-frames usually
666 * use implicit weights => chroma too. */
667 uint8_t *tmp_cb
= h
->bipred_scratchpad
;
668 uint8_t *tmp_cr
= h
->bipred_scratchpad
+ (16 << pixel_shift
);
669 uint8_t *tmp_y
= h
->bipred_scratchpad
+ 16 * h
->mb_uvlinesize
;
670 int refn0
= h
->ref_cache
[0][scan8
[n
]];
671 int refn1
= h
->ref_cache
[1][scan8
[n
]];
673 mc_dir_part(h
, &h
->ref_list
[0][refn0
], n
, square
, height
, delta
, 0,
674 dest_y
, dest_cb
, dest_cr
,
675 x_offset
, y_offset
, qpix_put
, chroma_put
,
676 pixel_shift
, chroma_idc
);
677 mc_dir_part(h
, &h
->ref_list
[1][refn1
], n
, square
, height
, delta
, 1,
678 tmp_y
, tmp_cb
, tmp_cr
,
679 x_offset
, y_offset
, qpix_put
, chroma_put
,
680 pixel_shift
, chroma_idc
);
682 if (h
->use_weight
== 2) {
683 int weight0
= h
->implicit_weight
[refn0
][refn1
][s
->mb_y
& 1];
684 int weight1
= 64 - weight0
;
685 luma_weight_avg(dest_y
, tmp_y
, h
->mb_linesize
,
686 height
, 5, weight0
, weight1
, 0);
687 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
,
688 chroma_height
, 5, weight0
, weight1
, 0);
689 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
,
690 chroma_height
, 5, weight0
, weight1
, 0);
692 luma_weight_avg(dest_y
, tmp_y
, h
->mb_linesize
, height
,
693 h
->luma_log2_weight_denom
,
694 h
->luma_weight
[refn0
][0][0],
695 h
->luma_weight
[refn1
][1][0],
696 h
->luma_weight
[refn0
][0][1] +
697 h
->luma_weight
[refn1
][1][1]);
698 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, chroma_height
,
699 h
->chroma_log2_weight_denom
,
700 h
->chroma_weight
[refn0
][0][0][0],
701 h
->chroma_weight
[refn1
][1][0][0],
702 h
->chroma_weight
[refn0
][0][0][1] +
703 h
->chroma_weight
[refn1
][1][0][1]);
704 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, chroma_height
,
705 h
->chroma_log2_weight_denom
,
706 h
->chroma_weight
[refn0
][0][1][0],
707 h
->chroma_weight
[refn1
][1][1][0],
708 h
->chroma_weight
[refn0
][0][1][1] +
709 h
->chroma_weight
[refn1
][1][1][1]);
712 int list
= list1
? 1 : 0;
713 int refn
= h
->ref_cache
[list
][scan8
[n
]];
714 Picture
*ref
= &h
->ref_list
[list
][refn
];
715 mc_dir_part(h
, ref
, n
, square
, height
, delta
, list
,
716 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
717 qpix_put
, chroma_put
, pixel_shift
, chroma_idc
);
719 luma_weight_op(dest_y
, h
->mb_linesize
, height
,
720 h
->luma_log2_weight_denom
,
721 h
->luma_weight
[refn
][list
][0],
722 h
->luma_weight
[refn
][list
][1]);
723 if (h
->use_weight_chroma
) {
724 chroma_weight_op(dest_cb
, h
->mb_uvlinesize
, chroma_height
,
725 h
->chroma_log2_weight_denom
,
726 h
->chroma_weight
[refn
][list
][0][0],
727 h
->chroma_weight
[refn
][list
][0][1]);
728 chroma_weight_op(dest_cr
, h
->mb_uvlinesize
, chroma_height
,
729 h
->chroma_log2_weight_denom
,
730 h
->chroma_weight
[refn
][list
][1][0],
731 h
->chroma_weight
[refn
][list
][1][1]);
736 static av_always_inline
void prefetch_motion(H264Context
*h
, int list
,
737 int pixel_shift
, int chroma_idc
)
739 /* fetch pixels for estimated mv 4 macroblocks ahead
740 * optimized for 64byte cache lines */
741 MpegEncContext
*const s
= &h
->s
;
742 const int refn
= h
->ref_cache
[list
][scan8
[0]];
744 const int mx
= (h
->mv_cache
[list
][scan8
[0]][0] >> 2) + 16 * s
->mb_x
+ 8;
745 const int my
= (h
->mv_cache
[list
][scan8
[0]][1] >> 2) + 16 * s
->mb_y
;
746 uint8_t **src
= h
->ref_list
[list
][refn
].f
.data
;
747 int off
= (mx
<< pixel_shift
) +
748 (my
+ (s
->mb_x
& 3) * 4) * h
->mb_linesize
+
750 s
->vdsp
.prefetch(src
[0] + off
, s
->linesize
, 4);
751 if (chroma_idc
== 3 /* yuv444 */) {
752 s
->vdsp
.prefetch(src
[1] + off
, s
->linesize
, 4);
753 s
->vdsp
.prefetch(src
[2] + off
, s
->linesize
, 4);
755 off
= ((mx
>> 1) << pixel_shift
) +
756 ((my
>> 1) + (s
->mb_x
& 7)) * s
->uvlinesize
+
758 s
->vdsp
.prefetch(src
[1] + off
, src
[2] - src
[1], 2);
763 static void free_tables(H264Context
*h
, int free_rbsp
)
768 av_freep(&h
->intra4x4_pred_mode
);
769 av_freep(&h
->chroma_pred_mode_table
);
770 av_freep(&h
->cbp_table
);
771 av_freep(&h
->mvd_table
[0]);
772 av_freep(&h
->mvd_table
[1]);
773 av_freep(&h
->direct_table
);
774 av_freep(&h
->non_zero_count
);
775 av_freep(&h
->slice_table_base
);
776 h
->slice_table
= NULL
;
777 av_freep(&h
->list_counts
);
779 av_freep(&h
->mb2b_xy
);
780 av_freep(&h
->mb2br_xy
);
782 for (i
= 0; i
< MAX_THREADS
; i
++) {
783 hx
= h
->thread_context
[i
];
786 av_freep(&hx
->top_borders
[1]);
787 av_freep(&hx
->top_borders
[0]);
788 av_freep(&hx
->bipred_scratchpad
);
790 av_freep(&hx
->rbsp_buffer
[1]);
791 av_freep(&hx
->rbsp_buffer
[0]);
792 hx
->rbsp_buffer_size
[0] = 0;
793 hx
->rbsp_buffer_size
[1] = 0;
796 av_freep(&h
->thread_context
[i
]);
800 static void init_dequant8_coeff_table(H264Context
*h
)
803 const int max_qp
= 51 + 6 * (h
->sps
.bit_depth_luma
- 8);
805 for (i
= 0; i
< 6; i
++) {
806 h
->dequant8_coeff
[i
] = h
->dequant8_buffer
[i
];
807 for (j
= 0; j
< i
; j
++)
808 if (!memcmp(h
->pps
.scaling_matrix8
[j
], h
->pps
.scaling_matrix8
[i
],
809 64 * sizeof(uint8_t))) {
810 h
->dequant8_coeff
[i
] = h
->dequant8_buffer
[j
];
816 for (q
= 0; q
< max_qp
+ 1; q
++) {
819 for (x
= 0; x
< 64; x
++)
820 h
->dequant8_coeff
[i
][q
][(x
>> 3) | ((x
& 7) << 3)] =
821 ((uint32_t)dequant8_coeff_init
[idx
][dequant8_coeff_init_scan
[((x
>> 1) & 12) | (x
& 3)]] *
822 h
->pps
.scaling_matrix8
[i
][x
]) << shift
;
827 static void init_dequant4_coeff_table(H264Context
*h
)
830 const int max_qp
= 51 + 6 * (h
->sps
.bit_depth_luma
- 8);
831 for (i
= 0; i
< 6; i
++) {
832 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[i
];
833 for (j
= 0; j
< i
; j
++)
834 if (!memcmp(h
->pps
.scaling_matrix4
[j
], h
->pps
.scaling_matrix4
[i
],
835 16 * sizeof(uint8_t))) {
836 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[j
];
842 for (q
= 0; q
< max_qp
+ 1; q
++) {
843 int shift
= div6
[q
] + 2;
845 for (x
= 0; x
< 16; x
++)
846 h
->dequant4_coeff
[i
][q
][(x
>> 2) | ((x
<< 2) & 0xF)] =
847 ((uint32_t)dequant4_coeff_init
[idx
][(x
& 1) + ((x
>> 2) & 1)] *
848 h
->pps
.scaling_matrix4
[i
][x
]) << shift
;
853 static void init_dequant_tables(H264Context
*h
)
856 init_dequant4_coeff_table(h
);
857 if (h
->pps
.transform_8x8_mode
)
858 init_dequant8_coeff_table(h
);
859 if (h
->sps
.transform_bypass
) {
860 for (i
= 0; i
< 6; i
++)
861 for (x
= 0; x
< 16; x
++)
862 h
->dequant4_coeff
[i
][0][x
] = 1 << 6;
863 if (h
->pps
.transform_8x8_mode
)
864 for (i
= 0; i
< 6; i
++)
865 for (x
= 0; x
< 64; x
++)
866 h
->dequant8_coeff
[i
][0][x
] = 1 << 6;
870 int ff_h264_alloc_tables(H264Context
*h
)
872 MpegEncContext
*const s
= &h
->s
;
873 const int big_mb_num
= s
->mb_stride
* (s
->mb_height
+ 1);
874 const int row_mb_num
= s
->mb_stride
* 2 * s
->avctx
->thread_count
;
877 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->intra4x4_pred_mode
,
878 row_mb_num
* 8 * sizeof(uint8_t), fail
)
879 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->non_zero_count
,
880 big_mb_num
* 48 * sizeof(uint8_t), fail
)
881 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->slice_table_base
,
882 (big_mb_num
+ s
->mb_stride
) * sizeof(*h
->slice_table_base
), fail
)
883 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->cbp_table
,
884 big_mb_num
* sizeof(uint16_t), fail
)
885 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->chroma_pred_mode_table
,
886 big_mb_num
* sizeof(uint8_t), fail
)
887 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->mvd_table
[0],
888 16 * row_mb_num
* sizeof(uint8_t), fail
);
889 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->mvd_table
[1],
890 16 * row_mb_num
* sizeof(uint8_t), fail
);
891 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->direct_table
,
892 4 * big_mb_num
* sizeof(uint8_t), fail
);
893 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->list_counts
,
894 big_mb_num
* sizeof(uint8_t), fail
)
896 memset(h
->slice_table_base
, -1,
897 (big_mb_num
+ s
->mb_stride
) * sizeof(*h
->slice_table_base
));
898 h
->slice_table
= h
->slice_table_base
+ s
->mb_stride
* 2 + 1;
900 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->mb2b_xy
,
901 big_mb_num
* sizeof(uint32_t), fail
);
902 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->mb2br_xy
,
903 big_mb_num
* sizeof(uint32_t), fail
);
904 for (y
= 0; y
< s
->mb_height
; y
++)
905 for (x
= 0; x
< s
->mb_width
; x
++) {
906 const int mb_xy
= x
+ y
* s
->mb_stride
;
907 const int b_xy
= 4 * x
+ 4 * y
* h
->b_stride
;
909 h
->mb2b_xy
[mb_xy
] = b_xy
;
910 h
->mb2br_xy
[mb_xy
] = 8 * (FMO
? mb_xy
: (mb_xy
% (2 * s
->mb_stride
)));
913 if (!h
->dequant4_coeff
[0])
914 init_dequant_tables(h
);
924 * Mimic alloc_tables(), but for every context thread.
926 static void clone_tables(H264Context
*dst
, H264Context
*src
, int i
)
928 MpegEncContext
*const s
= &src
->s
;
929 dst
->intra4x4_pred_mode
= src
->intra4x4_pred_mode
+ i
* 8 * 2 * s
->mb_stride
;
930 dst
->non_zero_count
= src
->non_zero_count
;
931 dst
->slice_table
= src
->slice_table
;
932 dst
->cbp_table
= src
->cbp_table
;
933 dst
->mb2b_xy
= src
->mb2b_xy
;
934 dst
->mb2br_xy
= src
->mb2br_xy
;
935 dst
->chroma_pred_mode_table
= src
->chroma_pred_mode_table
;
936 dst
->mvd_table
[0] = src
->mvd_table
[0] + i
* 8 * 2 * s
->mb_stride
;
937 dst
->mvd_table
[1] = src
->mvd_table
[1] + i
* 8 * 2 * s
->mb_stride
;
938 dst
->direct_table
= src
->direct_table
;
939 dst
->list_counts
= src
->list_counts
;
940 dst
->bipred_scratchpad
= NULL
;
941 ff_h264_pred_init(&dst
->hpc
, src
->s
.codec_id
, src
->sps
.bit_depth_luma
,
942 src
->sps
.chroma_format_idc
);
947 * Allocate buffers which are not shared amongst multiple threads.
949 static int context_init(H264Context
*h
)
951 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->top_borders
[0],
952 h
->s
.mb_width
* 16 * 3 * sizeof(uint8_t) * 2, fail
)
953 FF_ALLOCZ_OR_GOTO(h
->s
.avctx
, h
->top_borders
[1],
954 h
->s
.mb_width
* 16 * 3 * sizeof(uint8_t) * 2, fail
)
956 h
->ref_cache
[0][scan8
[5] + 1] =
957 h
->ref_cache
[0][scan8
[7] + 1] =
958 h
->ref_cache
[0][scan8
[13] + 1] =
959 h
->ref_cache
[1][scan8
[5] + 1] =
960 h
->ref_cache
[1][scan8
[7] + 1] =
961 h
->ref_cache
[1][scan8
[13] + 1] = PART_NOT_AVAILABLE
;
966 return -1; // free_tables will clean up for us
969 static int decode_nal_units(H264Context
*h
, const uint8_t *buf
, int buf_size
,
970 int parse_extradata
);
972 static av_cold
void common_init(H264Context
*h
)
974 MpegEncContext
*const s
= &h
->s
;
976 s
->width
= s
->avctx
->width
;
977 s
->height
= s
->avctx
->height
;
978 s
->codec_id
= s
->avctx
->codec
->id
;
980 ff_h264dsp_init(&h
->h264dsp
, 8, 1);
981 ff_h264chroma_init(&h
->h264chroma
, h
->sps
.bit_depth_chroma
);
982 ff_h264qpel_init(&h
->h264qpel
, 8);
983 ff_h264_pred_init(&h
->hpc
, s
->codec_id
, 8, 1);
985 h
->dequant_coeff_pps
= -1;
986 s
->unrestricted_mv
= 1;
988 /* needed so that IDCT permutation is known early */
989 ff_dsputil_init(&s
->dsp
, s
->avctx
);
990 ff_videodsp_init(&s
->vdsp
, 8);
992 memset(h
->pps
.scaling_matrix4
, 16, 6 * 16 * sizeof(uint8_t));
993 memset(h
->pps
.scaling_matrix8
, 16, 2 * 64 * sizeof(uint8_t));
996 int ff_h264_decode_extradata(H264Context
*h
)
998 AVCodecContext
*avctx
= h
->s
.avctx
;
1000 if (avctx
->extradata
[0] == 1) {
1001 int i
, cnt
, nalsize
;
1002 unsigned char *p
= avctx
->extradata
;
1006 if (avctx
->extradata_size
< 7) {
1007 av_log(avctx
, AV_LOG_ERROR
, "avcC too short\n");
1010 /* sps and pps in the avcC always have length coded with 2 bytes,
1011 * so put a fake nal_length_size = 2 while parsing them */
1012 h
->nal_length_size
= 2;
1013 // Decode sps from avcC
1014 cnt
= *(p
+ 5) & 0x1f; // Number of sps
1016 for (i
= 0; i
< cnt
; i
++) {
1017 nalsize
= AV_RB16(p
) + 2;
1018 if (p
- avctx
->extradata
+ nalsize
> avctx
->extradata_size
)
1020 if (decode_nal_units(h
, p
, nalsize
, 1) < 0) {
1021 av_log(avctx
, AV_LOG_ERROR
,
1022 "Decoding sps %d from avcC failed\n", i
);
1027 // Decode pps from avcC
1028 cnt
= *(p
++); // Number of pps
1029 for (i
= 0; i
< cnt
; i
++) {
1030 nalsize
= AV_RB16(p
) + 2;
1031 if (p
- avctx
->extradata
+ nalsize
> avctx
->extradata_size
)
1033 if (decode_nal_units(h
, p
, nalsize
, 1) < 0) {
1034 av_log(avctx
, AV_LOG_ERROR
,
1035 "Decoding pps %d from avcC failed\n", i
);
1040 // Now store right nal length size, that will be used to parse all other nals
1041 h
->nal_length_size
= (avctx
->extradata
[4] & 0x03) + 1;
1044 if (decode_nal_units(h
, avctx
->extradata
, avctx
->extradata_size
, 1) < 0)
1050 av_cold
int ff_h264_decode_init(AVCodecContext
*avctx
)
1052 H264Context
*h
= avctx
->priv_data
;
1053 MpegEncContext
*const s
= &h
->s
;
1056 ff_MPV_decode_defaults(s
);
1061 s
->out_format
= FMT_H264
;
1062 s
->workaround_bugs
= avctx
->workaround_bugs
;
1065 // s->decode_mb = ff_h263_decode_mb;
1066 s
->quarter_sample
= 1;
1067 if (!avctx
->has_b_frames
)
1070 avctx
->chroma_sample_location
= AVCHROMA_LOC_LEFT
;
1072 ff_h264_decode_init_vlc();
1075 h
->sps
.bit_depth_luma
= avctx
->bits_per_raw_sample
= 8;
1077 h
->thread_context
[0] = h
;
1078 h
->outputed_poc
= h
->next_outputed_poc
= INT_MIN
;
1079 for (i
= 0; i
< MAX_DELAYED_PIC_COUNT
; i
++)
1080 h
->last_pocs
[i
] = INT_MIN
;
1081 h
->prev_poc_msb
= 1 << 16;
1083 ff_h264_reset_sei(h
);
1084 if (avctx
->codec_id
== AV_CODEC_ID_H264
) {
1085 if (avctx
->ticks_per_frame
== 1)
1086 s
->avctx
->time_base
.den
*= 2;
1087 avctx
->ticks_per_frame
= 2;
1090 if (avctx
->extradata_size
> 0 && avctx
->extradata
&&
1091 ff_h264_decode_extradata(h
))
1094 if (h
->sps
.bitstream_restriction_flag
&&
1095 s
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
) {
1096 s
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
1103 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size))))
1105 static void copy_picture_range(Picture
**to
, Picture
**from
, int count
,
1106 MpegEncContext
*new_base
,
1107 MpegEncContext
*old_base
)
1111 for (i
= 0; i
< count
; i
++) {
1112 assert((IN_RANGE(from
[i
], old_base
, sizeof(*old_base
)) ||
1113 IN_RANGE(from
[i
], old_base
->picture
,
1114 sizeof(Picture
) * old_base
->picture_count
) ||
1116 to
[i
] = REBASE_PICTURE(from
[i
], new_base
, old_base
);
1120 static void copy_parameter_set(void **to
, void **from
, int count
, int size
)
1124 for (i
= 0; i
< count
; i
++) {
1125 if (to
[i
] && !from
[i
])
1127 else if (from
[i
] && !to
[i
])
1128 to
[i
] = av_malloc(size
);
1131 memcpy(to
[i
], from
[i
], size
);
1135 static int decode_init_thread_copy(AVCodecContext
*avctx
)
1137 H264Context
*h
= avctx
->priv_data
;
1139 if (!avctx
->internal
->is_copy
)
1141 memset(h
->sps_buffers
, 0, sizeof(h
->sps_buffers
));
1142 memset(h
->pps_buffers
, 0, sizeof(h
->pps_buffers
));
1144 h
->s
.context_initialized
= 0;
1149 #define copy_fields(to, from, start_field, end_field) \
1150 memcpy(&to->start_field, &from->start_field, \
1151 (char *)&to->end_field - (char *)&to->start_field)
1153 static int h264_slice_header_init(H264Context
*, int);
1155 static int h264_set_parameter_from_sps(H264Context
*h
);
1157 static int decode_update_thread_context(AVCodecContext
*dst
,
1158 const AVCodecContext
*src
)
1160 H264Context
*h
= dst
->priv_data
, *h1
= src
->priv_data
;
1161 MpegEncContext
*const s
= &h
->s
, *const s1
= &h1
->s
;
1162 int inited
= s
->context_initialized
, err
;
1165 if (dst
== src
|| !s1
->context_initialized
)
1169 (s
->width
!= s1
->width
||
1170 s
->height
!= s1
->height
||
1171 s
->mb_width
!= s1
->mb_width
||
1172 s
->mb_height
!= s1
->mb_height
||
1173 h
->sps
.bit_depth_luma
!= h1
->sps
.bit_depth_luma
||
1174 h
->sps
.chroma_format_idc
!= h1
->sps
.chroma_format_idc
||
1175 h
->sps
.colorspace
!= h1
->sps
.colorspace
)) {
1177 av_freep(&h
->bipred_scratchpad
);
1179 s
->width
= s1
->width
;
1180 s
->height
= s1
->height
;
1181 s
->mb_height
= s1
->mb_height
;
1182 h
->b_stride
= h1
->b_stride
;
1184 if ((err
= h264_slice_header_init(h
, 1)) < 0) {
1185 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "h264_slice_header_init() failed");
1188 h
->context_reinitialized
= 1;
1190 /* update linesize on resize for h264. The h264 decoder doesn't
1191 * necessarily call ff_MPV_frame_start in the new thread */
1192 s
->linesize
= s1
->linesize
;
1193 s
->uvlinesize
= s1
->uvlinesize
;
1195 /* copy block_offset since frame_start may not be called */
1196 memcpy(h
->block_offset
, h1
->block_offset
, sizeof(h
->block_offset
));
1197 h264_set_parameter_from_sps(h
);
1200 err
= ff_mpeg_update_thread_context(dst
, src
);
1205 for (i
= 0; i
< MAX_SPS_COUNT
; i
++)
1206 av_freep(h
->sps_buffers
+ i
);
1208 for (i
= 0; i
< MAX_PPS_COUNT
; i
++)
1209 av_freep(h
->pps_buffers
+ i
);
1211 // copy all fields after MpegEnc
1212 memcpy(&h
->s
+ 1, &h1
->s
+ 1,
1213 sizeof(H264Context
) - sizeof(MpegEncContext
));
1214 memset(h
->sps_buffers
, 0, sizeof(h
->sps_buffers
));
1215 memset(h
->pps_buffers
, 0, sizeof(h
->pps_buffers
));
1216 if (ff_h264_alloc_tables(h
) < 0) {
1217 av_log(dst
, AV_LOG_ERROR
, "Could not allocate memory for h264\n");
1218 return AVERROR(ENOMEM
);
1222 for (i
= 0; i
< 2; i
++) {
1223 h
->rbsp_buffer
[i
] = NULL
;
1224 h
->rbsp_buffer_size
[i
] = 0;
1226 h
->bipred_scratchpad
= NULL
;
1228 h
->thread_context
[0] = h
;
1230 s
->dsp
.clear_blocks(h
->mb
);
1231 s
->dsp
.clear_blocks(h
->mb
+ (24 * 16 << h
->pixel_shift
));
1234 /* frame_start may not be called for the next thread (if it's decoding
1235 * a bottom field) so this has to be allocated here */
1236 if (!h
->bipred_scratchpad
)
1237 h
->bipred_scratchpad
= av_malloc(16 * 6 * s
->linesize
);
1239 // extradata/NAL handling
1240 h
->is_avc
= h1
->is_avc
;
1243 copy_parameter_set((void **)h
->sps_buffers
, (void **)h1
->sps_buffers
,
1244 MAX_SPS_COUNT
, sizeof(SPS
));
1246 copy_parameter_set((void **)h
->pps_buffers
, (void **)h1
->pps_buffers
,
1247 MAX_PPS_COUNT
, sizeof(PPS
));
1250 // Dequantization matrices
1251 // FIXME these are big - can they be only copied when PPS changes?
1252 copy_fields(h
, h1
, dequant4_buffer
, dequant4_coeff
);
1254 for (i
= 0; i
< 6; i
++)
1255 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[0] +
1256 (h1
->dequant4_coeff
[i
] - h1
->dequant4_buffer
[0]);
1258 for (i
= 0; i
< 6; i
++)
1259 h
->dequant8_coeff
[i
] = h
->dequant8_buffer
[0] +
1260 (h1
->dequant8_coeff
[i
] - h1
->dequant8_buffer
[0]);
1262 h
->dequant_coeff_pps
= h1
->dequant_coeff_pps
;
1265 copy_fields(h
, h1
, poc_lsb
, redundant_pic_count
);
1268 copy_fields(h
, h1
, ref_count
, list_count
);
1269 copy_fields(h
, h1
, ref2frm
, intra_gb
);
1270 copy_fields(h
, h1
, short_ref
, cabac_init_idc
);
1272 copy_picture_range(h
->short_ref
, h1
->short_ref
, 32, s
, s1
);
1273 copy_picture_range(h
->long_ref
, h1
->long_ref
, 32, s
, s1
);
1274 copy_picture_range(h
->delayed_pic
, h1
->delayed_pic
,
1275 MAX_DELAYED_PIC_COUNT
+ 2, s
, s1
);
1277 h
->last_slice_type
= h1
->last_slice_type
;
1279 if (!s
->current_picture_ptr
)
1282 if (!s
->droppable
) {
1283 err
= ff_h264_execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
1284 h
->prev_poc_msb
= h
->poc_msb
;
1285 h
->prev_poc_lsb
= h
->poc_lsb
;
1287 h
->prev_frame_num_offset
= h
->frame_num_offset
;
1288 h
->prev_frame_num
= h
->frame_num
;
1289 h
->outputed_poc
= h
->next_outputed_poc
;
1294 int ff_h264_frame_start(H264Context
*h
)
1296 MpegEncContext
*const s
= &h
->s
;
1298 const int pixel_shift
= h
->pixel_shift
;
1300 if (ff_MPV_frame_start(s
, s
->avctx
) < 0)
1302 ff_er_frame_start(s
);
1304 * ff_MPV_frame_start uses pict_type to derive key_frame.
1305 * This is incorrect for H.264; IDR markings must be used.
1306 * Zero here; IDR markings per slice in frame or fields are ORed in later.
1307 * See decode_nal_units().
1309 s
->current_picture_ptr
->f
.key_frame
= 0;
1310 s
->current_picture_ptr
->mmco_reset
= 0;
1312 assert(s
->linesize
&& s
->uvlinesize
);
1314 for (i
= 0; i
< 16; i
++) {
1315 h
->block_offset
[i
] = (4 * ((scan8
[i
] - scan8
[0]) & 7) << pixel_shift
) + 4 * s
->linesize
* ((scan8
[i
] - scan8
[0]) >> 3);
1316 h
->block_offset
[48 + i
] = (4 * ((scan8
[i
] - scan8
[0]) & 7) << pixel_shift
) + 8 * s
->linesize
* ((scan8
[i
] - scan8
[0]) >> 3);
1318 for (i
= 0; i
< 16; i
++) {
1319 h
->block_offset
[16 + i
] =
1320 h
->block_offset
[32 + i
] = (4 * ((scan8
[i
] - scan8
[0]) & 7) << pixel_shift
) + 4 * s
->uvlinesize
* ((scan8
[i
] - scan8
[0]) >> 3);
1321 h
->block_offset
[48 + 16 + i
] =
1322 h
->block_offset
[48 + 32 + i
] = (4 * ((scan8
[i
] - scan8
[0]) & 7) << pixel_shift
) + 8 * s
->uvlinesize
* ((scan8
[i
] - scan8
[0]) >> 3);
1325 /* can't be in alloc_tables because linesize isn't known there.
1326 * FIXME: redo bipred weight to not require extra buffer? */
1327 for (i
= 0; i
< s
->slice_context_count
; i
++)
1328 if (h
->thread_context
[i
] && !h
->thread_context
[i
]->bipred_scratchpad
)
1329 h
->thread_context
[i
]->bipred_scratchpad
= av_malloc(16 * 6 * s
->linesize
);
1331 /* Some macroblocks can be accessed before they're available in case
1332 * of lost slices, MBAFF or threading. */
1333 memset(h
->slice_table
, -1,
1334 (s
->mb_height
* s
->mb_stride
- 1) * sizeof(*h
->slice_table
));
1336 // s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding ||
1337 // s->current_picture.f.reference /* || h->contains_intra */ || 1;
1339 /* We mark the current picture as non-reference after allocating it, so
1340 * that if we break out due to an error it can be released automatically
1341 * in the next ff_MPV_frame_start().
1342 * SVQ3 as well as most other codecs have only last/next/current and thus
1343 * get released even with set reference, besides SVQ3 and others do not
1344 * mark frames as reference later "naturally". */
1345 if (s
->codec_id
!= AV_CODEC_ID_SVQ3
)
1346 s
->current_picture_ptr
->f
.reference
= 0;
1348 s
->current_picture_ptr
->field_poc
[0] =
1349 s
->current_picture_ptr
->field_poc
[1] = INT_MAX
;
1351 h
->next_output_pic
= NULL
;
1353 assert(s
->current_picture_ptr
->long_ref
== 0);
1359 * Run setup operations that must be run after slice header decoding.
1360 * This includes finding the next displayed frame.
1362 * @param h h264 master context
1363 * @param setup_finished enough NALs have been read that we can call
1364 * ff_thread_finish_setup()
1366 static void decode_postinit(H264Context
*h
, int setup_finished
)
1368 MpegEncContext
*const s
= &h
->s
;
1369 Picture
*out
= s
->current_picture_ptr
;
1370 Picture
*cur
= s
->current_picture_ptr
;
1371 int i
, pics
, out_of_order
, out_idx
;
1372 int invalid
= 0, cnt
= 0;
1374 s
->current_picture_ptr
->f
.qscale_type
= FF_QSCALE_TYPE_H264
;
1375 s
->current_picture_ptr
->f
.pict_type
= s
->pict_type
;
1377 if (h
->next_output_pic
)
1380 if (cur
->field_poc
[0] == INT_MAX
|| cur
->field_poc
[1] == INT_MAX
) {
1381 /* FIXME: if we have two PAFF fields in one packet, we can't start
1382 * the next thread here. If we have one field per packet, we can.
1383 * The check in decode_nal_units() is not good enough to find this
1384 * yet, so we assume the worst for now. */
1385 // if (setup_finished)
1386 // ff_thread_finish_setup(s->avctx);
1390 cur
->f
.interlaced_frame
= 0;
1391 cur
->f
.repeat_pict
= 0;
1393 /* Signal interlacing information externally. */
1394 /* Prioritize picture timing SEI information over used
1395 * decoding process if it exists. */
1397 if (h
->sps
.pic_struct_present_flag
) {
1398 switch (h
->sei_pic_struct
) {
1399 case SEI_PIC_STRUCT_FRAME
:
1401 case SEI_PIC_STRUCT_TOP_FIELD
:
1402 case SEI_PIC_STRUCT_BOTTOM_FIELD
:
1403 cur
->f
.interlaced_frame
= 1;
1405 case SEI_PIC_STRUCT_TOP_BOTTOM
:
1406 case SEI_PIC_STRUCT_BOTTOM_TOP
:
1407 if (FIELD_OR_MBAFF_PICTURE
)
1408 cur
->f
.interlaced_frame
= 1;
1410 // try to flag soft telecine progressive
1411 cur
->f
.interlaced_frame
= h
->prev_interlaced_frame
;
1413 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP
:
1414 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM
:
1415 /* Signal the possibility of telecined film externally
1416 * (pic_struct 5,6). From these hints, let the applications
1417 * decide if they apply deinterlacing. */
1418 cur
->f
.repeat_pict
= 1;
1420 case SEI_PIC_STRUCT_FRAME_DOUBLING
:
1421 cur
->f
.repeat_pict
= 2;
1423 case SEI_PIC_STRUCT_FRAME_TRIPLING
:
1424 cur
->f
.repeat_pict
= 4;
1428 if ((h
->sei_ct_type
& 3) &&
1429 h
->sei_pic_struct
<= SEI_PIC_STRUCT_BOTTOM_TOP
)
1430 cur
->f
.interlaced_frame
= (h
->sei_ct_type
& (1 << 1)) != 0;
1432 /* Derive interlacing flag from used decoding process. */
1433 cur
->f
.interlaced_frame
= FIELD_OR_MBAFF_PICTURE
;
1435 h
->prev_interlaced_frame
= cur
->f
.interlaced_frame
;
1437 if (cur
->field_poc
[0] != cur
->field_poc
[1]) {
1438 /* Derive top_field_first from field pocs. */
1439 cur
->f
.top_field_first
= cur
->field_poc
[0] < cur
->field_poc
[1];
1441 if (cur
->f
.interlaced_frame
|| h
->sps
.pic_struct_present_flag
) {
1442 /* Use picture timing SEI information. Even if it is a
1443 * information of a past frame, better than nothing. */
1444 if (h
->sei_pic_struct
== SEI_PIC_STRUCT_TOP_BOTTOM
||
1445 h
->sei_pic_struct
== SEI_PIC_STRUCT_TOP_BOTTOM_TOP
)
1446 cur
->f
.top_field_first
= 1;
1448 cur
->f
.top_field_first
= 0;
1450 /* Most likely progressive */
1451 cur
->f
.top_field_first
= 0;
1455 // FIXME do something with unavailable reference frames
1457 /* Sort B-frames into display order */
1459 if (h
->sps
.bitstream_restriction_flag
&&
1460 s
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
) {
1461 s
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
1465 if (s
->avctx
->strict_std_compliance
>= FF_COMPLIANCE_STRICT
&&
1466 !h
->sps
.bitstream_restriction_flag
) {
1467 s
->avctx
->has_b_frames
= MAX_DELAYED_PIC_COUNT
- 1;
1472 while (h
->delayed_pic
[pics
])
1475 assert(pics
<= MAX_DELAYED_PIC_COUNT
);
1477 h
->delayed_pic
[pics
++] = cur
;
1478 if (cur
->f
.reference
== 0)
1479 cur
->f
.reference
= DELAYED_PIC_REF
;
1481 /* Frame reordering. This code takes pictures from coding order and sorts
1482 * them by their incremental POC value into display order. It supports POC
1483 * gaps, MMCO reset codes and random resets.
1484 * A "display group" can start either with a IDR frame (f.key_frame = 1),
1485 * and/or can be closed down with a MMCO reset code. In sequences where
1486 * there is no delay, we can't detect that (since the frame was already
1487 * output to the user), so we also set h->mmco_reset to detect the MMCO
1489 * FIXME: if we detect insufficient delays (as per s->avctx->has_b_frames),
1490 * we increase the delay between input and output. All frames affected by
1491 * the lag (e.g. those that should have been output before another frame
1492 * that we already returned to the user) will be dropped. This is a bug
1493 * that we will fix later. */
1494 for (i
= 0; i
< MAX_DELAYED_PIC_COUNT
; i
++) {
1495 cnt
+= out
->poc
< h
->last_pocs
[i
];
1496 invalid
+= out
->poc
== INT_MIN
;
1498 if (!h
->mmco_reset
&& !cur
->f
.key_frame
&&
1499 cnt
+ invalid
== MAX_DELAYED_PIC_COUNT
&& cnt
> 0) {
1502 h
->delayed_pic
[pics
- 2]->mmco_reset
= 2;
1504 if (h
->mmco_reset
|| cur
->f
.key_frame
) {
1505 for (i
= 0; i
< MAX_DELAYED_PIC_COUNT
; i
++)
1506 h
->last_pocs
[i
] = INT_MIN
;
1508 invalid
= MAX_DELAYED_PIC_COUNT
;
1510 out
= h
->delayed_pic
[0];
1512 for (i
= 1; i
< MAX_DELAYED_PIC_COUNT
&&
1513 h
->delayed_pic
[i
] &&
1514 !h
->delayed_pic
[i
- 1]->mmco_reset
&&
1515 !h
->delayed_pic
[i
]->f
.key_frame
;
1517 if (h
->delayed_pic
[i
]->poc
< out
->poc
) {
1518 out
= h
->delayed_pic
[i
];
1521 if (s
->avctx
->has_b_frames
== 0 &&
1522 (h
->delayed_pic
[0]->f
.key_frame
|| h
->mmco_reset
))
1523 h
->next_outputed_poc
= INT_MIN
;
1524 out_of_order
= !out
->f
.key_frame
&& !h
->mmco_reset
&&
1525 (out
->poc
< h
->next_outputed_poc
);
1527 if (h
->sps
.bitstream_restriction_flag
&&
1528 s
->avctx
->has_b_frames
>= h
->sps
.num_reorder_frames
) {
1529 } else if (out_of_order
&& pics
- 1 == s
->avctx
->has_b_frames
&&
1530 s
->avctx
->has_b_frames
< MAX_DELAYED_PIC_COUNT
) {
1531 if (invalid
+ cnt
< MAX_DELAYED_PIC_COUNT
) {
1532 s
->avctx
->has_b_frames
= FFMAX(s
->avctx
->has_b_frames
, cnt
);
1535 } else if (s
->low_delay
&&
1536 ((h
->next_outputed_poc
!= INT_MIN
&&
1537 out
->poc
> h
->next_outputed_poc
+ 2) ||
1538 cur
->f
.pict_type
== AV_PICTURE_TYPE_B
)) {
1540 s
->avctx
->has_b_frames
++;
1543 if (pics
> s
->avctx
->has_b_frames
) {
1544 out
->f
.reference
&= ~DELAYED_PIC_REF
;
1545 // for frame threading, the owner must be the second field's thread or
1546 // else the first thread can release the picture and reuse it unsafely
1548 for (i
= out_idx
; h
->delayed_pic
[i
]; i
++)
1549 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+ 1];
1551 memmove(h
->last_pocs
, &h
->last_pocs
[1],
1552 sizeof(*h
->last_pocs
) * (MAX_DELAYED_PIC_COUNT
- 1));
1553 h
->last_pocs
[MAX_DELAYED_PIC_COUNT
- 1] = cur
->poc
;
1554 if (!out_of_order
&& pics
> s
->avctx
->has_b_frames
) {
1555 h
->next_output_pic
= out
;
1556 if (out
->mmco_reset
) {
1558 h
->next_outputed_poc
= out
->poc
;
1559 h
->delayed_pic
[out_idx
- 1]->mmco_reset
= out
->mmco_reset
;
1561 h
->next_outputed_poc
= INT_MIN
;
1564 if (out_idx
== 0 && pics
> 1 && h
->delayed_pic
[0]->f
.key_frame
) {
1565 h
->next_outputed_poc
= INT_MIN
;
1567 h
->next_outputed_poc
= out
->poc
;
1572 av_log(s
->avctx
, AV_LOG_DEBUG
, "no picture\n");
1576 ff_thread_finish_setup(s
->avctx
);
1579 static av_always_inline
void backup_mb_border(H264Context
*h
, uint8_t *src_y
,
1580 uint8_t *src_cb
, uint8_t *src_cr
,
1581 int linesize
, int uvlinesize
,
1584 MpegEncContext
*const s
= &h
->s
;
1585 uint8_t *top_border
;
1587 const int pixel_shift
= h
->pixel_shift
;
1588 int chroma444
= CHROMA444
;
1589 int chroma422
= CHROMA422
;
1592 src_cb
-= uvlinesize
;
1593 src_cr
-= uvlinesize
;
1595 if (!simple
&& FRAME_MBAFF
) {
1598 top_border
= h
->top_borders
[0][s
->mb_x
];
1599 AV_COPY128(top_border
, src_y
+ 15 * linesize
);
1601 AV_COPY128(top_border
+ 16, src_y
+ 15 * linesize
+ 16);
1602 if (simple
|| !CONFIG_GRAY
|| !(s
->flags
& CODEC_FLAG_GRAY
)) {
1605 AV_COPY128(top_border
+ 32, src_cb
+ 15 * uvlinesize
);
1606 AV_COPY128(top_border
+ 48, src_cb
+ 15 * uvlinesize
+ 16);
1607 AV_COPY128(top_border
+ 64, src_cr
+ 15 * uvlinesize
);
1608 AV_COPY128(top_border
+ 80, src_cr
+ 15 * uvlinesize
+ 16);
1610 AV_COPY128(top_border
+ 16, src_cb
+ 15 * uvlinesize
);
1611 AV_COPY128(top_border
+ 32, src_cr
+ 15 * uvlinesize
);
1613 } else if (chroma422
) {
1615 AV_COPY128(top_border
+ 32, src_cb
+ 15 * uvlinesize
);
1616 AV_COPY128(top_border
+ 48, src_cr
+ 15 * uvlinesize
);
1618 AV_COPY64(top_border
+ 16, src_cb
+ 15 * uvlinesize
);
1619 AV_COPY64(top_border
+ 24, src_cr
+ 15 * uvlinesize
);
1623 AV_COPY128(top_border
+ 32, src_cb
+ 7 * uvlinesize
);
1624 AV_COPY128(top_border
+ 48, src_cr
+ 7 * uvlinesize
);
1626 AV_COPY64(top_border
+ 16, src_cb
+ 7 * uvlinesize
);
1627 AV_COPY64(top_border
+ 24, src_cr
+ 7 * uvlinesize
);
1632 } else if (MB_MBAFF
) {
1638 top_border
= h
->top_borders
[top_idx
][s
->mb_x
];
1639 /* There are two lines saved, the line above the top macroblock
1640 * of a pair, and the line above the bottom macroblock. */
1641 AV_COPY128(top_border
, src_y
+ 16 * linesize
);
1643 AV_COPY128(top_border
+ 16, src_y
+ 16 * linesize
+ 16);
1645 if (simple
|| !CONFIG_GRAY
|| !(s
->flags
& CODEC_FLAG_GRAY
)) {
1648 AV_COPY128(top_border
+ 32, src_cb
+ 16 * linesize
);
1649 AV_COPY128(top_border
+ 48, src_cb
+ 16 * linesize
+ 16);
1650 AV_COPY128(top_border
+ 64, src_cr
+ 16 * linesize
);
1651 AV_COPY128(top_border
+ 80, src_cr
+ 16 * linesize
+ 16);
1653 AV_COPY128(top_border
+ 16, src_cb
+ 16 * linesize
);
1654 AV_COPY128(top_border
+ 32, src_cr
+ 16 * linesize
);
1656 } else if (chroma422
) {
1658 AV_COPY128(top_border
+ 32, src_cb
+ 16 * uvlinesize
);
1659 AV_COPY128(top_border
+ 48, src_cr
+ 16 * uvlinesize
);
1661 AV_COPY64(top_border
+ 16, src_cb
+ 16 * uvlinesize
);
1662 AV_COPY64(top_border
+ 24, src_cr
+ 16 * uvlinesize
);
1666 AV_COPY128(top_border
+ 32, src_cb
+ 8 * uvlinesize
);
1667 AV_COPY128(top_border
+ 48, src_cr
+ 8 * uvlinesize
);
1669 AV_COPY64(top_border
+ 16, src_cb
+ 8 * uvlinesize
);
1670 AV_COPY64(top_border
+ 24, src_cr
+ 8 * uvlinesize
);
1676 static av_always_inline
void xchg_mb_border(H264Context
*h
, uint8_t *src_y
,
1677 uint8_t *src_cb
, uint8_t *src_cr
,
1678 int linesize
, int uvlinesize
,
1679 int xchg
, int chroma444
,
1680 int simple
, int pixel_shift
)
1682 MpegEncContext
*const s
= &h
->s
;
1683 int deblock_topleft
;
1686 uint8_t *top_border_m1
;
1687 uint8_t *top_border
;
1689 if (!simple
&& FRAME_MBAFF
) {
1694 top_idx
= MB_MBAFF
? 0 : 1;
1698 if (h
->deblocking_filter
== 2) {
1699 deblock_topleft
= h
->slice_table
[h
->mb_xy
- 1 - s
->mb_stride
] == h
->slice_num
;
1700 deblock_top
= h
->top_type
;
1702 deblock_topleft
= (s
->mb_x
> 0);
1703 deblock_top
= (s
->mb_y
> !!MB_FIELD
);
1706 src_y
-= linesize
+ 1 + pixel_shift
;
1707 src_cb
-= uvlinesize
+ 1 + pixel_shift
;
1708 src_cr
-= uvlinesize
+ 1 + pixel_shift
;
1710 top_border_m1
= h
->top_borders
[top_idx
][s
->mb_x
- 1];
1711 top_border
= h
->top_borders
[top_idx
][s
->mb_x
];
1713 #define XCHG(a, b, xchg) \
1714 if (pixel_shift) { \
1716 AV_SWAP64(b + 0, a + 0); \
1717 AV_SWAP64(b + 8, a + 8); \
1727 if (deblock_topleft
) {
1728 XCHG(top_border_m1
+ (8 << pixel_shift
),
1729 src_y
- (7 << pixel_shift
), 1);
1731 XCHG(top_border
+ (0 << pixel_shift
), src_y
+ (1 << pixel_shift
), xchg
);
1732 XCHG(top_border
+ (8 << pixel_shift
), src_y
+ (9 << pixel_shift
), 1);
1733 if (s
->mb_x
+ 1 < s
->mb_width
) {
1734 XCHG(h
->top_borders
[top_idx
][s
->mb_x
+ 1],
1735 src_y
+ (17 << pixel_shift
), 1);
1738 if (simple
|| !CONFIG_GRAY
|| !(s
->flags
& CODEC_FLAG_GRAY
)) {
1740 if (deblock_topleft
) {
1741 XCHG(top_border_m1
+ (24 << pixel_shift
), src_cb
- (7 << pixel_shift
), 1);
1742 XCHG(top_border_m1
+ (40 << pixel_shift
), src_cr
- (7 << pixel_shift
), 1);
1744 XCHG(top_border
+ (16 << pixel_shift
), src_cb
+ (1 << pixel_shift
), xchg
);
1745 XCHG(top_border
+ (24 << pixel_shift
), src_cb
+ (9 << pixel_shift
), 1);
1746 XCHG(top_border
+ (32 << pixel_shift
), src_cr
+ (1 << pixel_shift
), xchg
);
1747 XCHG(top_border
+ (40 << pixel_shift
), src_cr
+ (9 << pixel_shift
), 1);
1748 if (s
->mb_x
+ 1 < s
->mb_width
) {
1749 XCHG(h
->top_borders
[top_idx
][s
->mb_x
+ 1] + (16 << pixel_shift
), src_cb
+ (17 << pixel_shift
), 1);
1750 XCHG(h
->top_borders
[top_idx
][s
->mb_x
+ 1] + (32 << pixel_shift
), src_cr
+ (17 << pixel_shift
), 1);
1754 if (deblock_topleft
) {
1755 XCHG(top_border_m1
+ (16 << pixel_shift
), src_cb
- (7 << pixel_shift
), 1);
1756 XCHG(top_border_m1
+ (24 << pixel_shift
), src_cr
- (7 << pixel_shift
), 1);
1758 XCHG(top_border
+ (16 << pixel_shift
), src_cb
+ 1 + pixel_shift
, 1);
1759 XCHG(top_border
+ (24 << pixel_shift
), src_cr
+ 1 + pixel_shift
, 1);
1765 static av_always_inline
int dctcoef_get(int16_t *mb
, int high_bit_depth
,
1768 if (high_bit_depth
) {
1769 return AV_RN32A(((int32_t *)mb
) + index
);
1771 return AV_RN16A(mb
+ index
);
1774 static av_always_inline
void dctcoef_set(int16_t *mb
, int high_bit_depth
,
1775 int index
, int value
)
1777 if (high_bit_depth
) {
1778 AV_WN32A(((int32_t *)mb
) + index
, value
);
1780 AV_WN16A(mb
+ index
, value
);
1783 static av_always_inline
void hl_decode_mb_predict_luma(H264Context
*h
,
1784 int mb_type
, int is_h264
,
1786 int transform_bypass
,
1790 uint8_t *dest_y
, int p
)
1792 MpegEncContext
*const s
= &h
->s
;
1793 void (*idct_add
)(uint8_t *dst
, int16_t *block
, int stride
);
1794 void (*idct_dc_add
)(uint8_t *dst
, int16_t *block
, int stride
);
1796 int qscale
= p
== 0 ? s
->qscale
: h
->chroma_qp
[p
- 1];
1797 block_offset
+= 16 * p
;
1798 if (IS_INTRA4x4(mb_type
)) {
1799 if (simple
|| !s
->encoding
) {
1800 if (IS_8x8DCT(mb_type
)) {
1801 if (transform_bypass
) {
1803 idct_add
= s
->dsp
.add_pixels8
;
1805 idct_dc_add
= h
->h264dsp
.h264_idct8_dc_add
;
1806 idct_add
= h
->h264dsp
.h264_idct8_add
;
1808 for (i
= 0; i
< 16; i
+= 4) {
1809 uint8_t *const ptr
= dest_y
+ block_offset
[i
];
1810 const int dir
= h
->intra4x4_pred_mode_cache
[scan8
[i
]];
1811 if (transform_bypass
&& h
->sps
.profile_idc
== 244 && dir
<= 1) {
1812 h
->hpc
.pred8x8l_add
[dir
](ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
1814 const int nnz
= h
->non_zero_count_cache
[scan8
[i
+ p
* 16]];
1815 h
->hpc
.pred8x8l
[dir
](ptr
, (h
->topleft_samples_available
<< i
) & 0x8000,
1816 (h
->topright_samples_available
<< i
) & 0x4000, linesize
);
1818 if (nnz
== 1 && dctcoef_get(h
->mb
, pixel_shift
, i
* 16 + p
* 256))
1819 idct_dc_add(ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
1821 idct_add(ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
1826 if (transform_bypass
) {
1828 idct_add
= s
->dsp
.add_pixels4
;
1830 idct_dc_add
= h
->h264dsp
.h264_idct_dc_add
;
1831 idct_add
= h
->h264dsp
.h264_idct_add
;
1833 for (i
= 0; i
< 16; i
++) {
1834 uint8_t *const ptr
= dest_y
+ block_offset
[i
];
1835 const int dir
= h
->intra4x4_pred_mode_cache
[scan8
[i
]];
1837 if (transform_bypass
&& h
->sps
.profile_idc
== 244 && dir
<= 1) {
1838 h
->hpc
.pred4x4_add
[dir
](ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
1843 if (dir
== DIAG_DOWN_LEFT_PRED
|| dir
== VERT_LEFT_PRED
) {
1844 const int topright_avail
= (h
->topright_samples_available
<< i
) & 0x8000;
1845 assert(s
->mb_y
|| linesize
<= block_offset
[i
]);
1846 if (!topright_avail
) {
1848 tr_high
= ((uint16_t *)ptr
)[3 - linesize
/ 2] * 0x0001000100010001ULL
;
1849 topright
= (uint8_t *)&tr_high
;
1851 tr
= ptr
[3 - linesize
] * 0x01010101u
;
1852 topright
= (uint8_t *)&tr
;
1855 topright
= ptr
+ (4 << pixel_shift
) - linesize
;
1859 h
->hpc
.pred4x4
[dir
](ptr
, topright
, linesize
);
1860 nnz
= h
->non_zero_count_cache
[scan8
[i
+ p
* 16]];
1863 if (nnz
== 1 && dctcoef_get(h
->mb
, pixel_shift
, i
* 16 + p
* 256))
1864 idct_dc_add(ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
1866 idct_add(ptr
, h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
), linesize
);
1867 } else if (CONFIG_SVQ3_DECODER
)
1868 ff_svq3_add_idct_c(ptr
, h
->mb
+ i
* 16 + p
* 256, linesize
, qscale
, 0);
1875 h
->hpc
.pred16x16
[h
->intra16x16_pred_mode
](dest_y
, linesize
);
1877 if (h
->non_zero_count_cache
[scan8
[LUMA_DC_BLOCK_INDEX
+ p
]]) {
1878 if (!transform_bypass
)
1879 h
->h264dsp
.h264_luma_dc_dequant_idct(h
->mb
+ (p
* 256 << pixel_shift
),
1881 h
->dequant4_coeff
[p
][qscale
][0]);
1883 static const uint8_t dc_mapping
[16] = {
1884 0 * 16, 1 * 16, 4 * 16, 5 * 16,
1885 2 * 16, 3 * 16, 6 * 16, 7 * 16,
1886 8 * 16, 9 * 16, 12 * 16, 13 * 16,
1887 10 * 16, 11 * 16, 14 * 16, 15 * 16 };
1888 for (i
= 0; i
< 16; i
++)
1889 dctcoef_set(h
->mb
+ (p
* 256 << pixel_shift
),
1890 pixel_shift
, dc_mapping
[i
],
1891 dctcoef_get(h
->mb_luma_dc
[p
],
1895 } else if (CONFIG_SVQ3_DECODER
)
1896 ff_svq3_luma_dc_dequant_idct_c(h
->mb
+ p
* 256,
1897 h
->mb_luma_dc
[p
], qscale
);
1901 static av_always_inline
void hl_decode_mb_idct_luma(H264Context
*h
, int mb_type
,
1902 int is_h264
, int simple
,
1903 int transform_bypass
,
1907 uint8_t *dest_y
, int p
)
1909 MpegEncContext
*const s
= &h
->s
;
1910 void (*idct_add
)(uint8_t *dst
, int16_t *block
, int stride
);
1912 block_offset
+= 16 * p
;
1913 if (!IS_INTRA4x4(mb_type
)) {
1915 if (IS_INTRA16x16(mb_type
)) {
1916 if (transform_bypass
) {
1917 if (h
->sps
.profile_idc
== 244 &&
1918 (h
->intra16x16_pred_mode
== VERT_PRED8x8
||
1919 h
->intra16x16_pred_mode
== HOR_PRED8x8
)) {
1920 h
->hpc
.pred16x16_add
[h
->intra16x16_pred_mode
](dest_y
, block_offset
,
1921 h
->mb
+ (p
* 256 << pixel_shift
),
1924 for (i
= 0; i
< 16; i
++)
1925 if (h
->non_zero_count_cache
[scan8
[i
+ p
* 16]] ||
1926 dctcoef_get(h
->mb
, pixel_shift
, i
* 16 + p
* 256))
1927 s
->dsp
.add_pixels4(dest_y
+ block_offset
[i
],
1928 h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
),
1932 h
->h264dsp
.h264_idct_add16intra(dest_y
, block_offset
,
1933 h
->mb
+ (p
* 256 << pixel_shift
),
1935 h
->non_zero_count_cache
+ p
* 5 * 8);
1937 } else if (h
->cbp
& 15) {
1938 if (transform_bypass
) {
1939 const int di
= IS_8x8DCT(mb_type
) ? 4 : 1;
1940 idct_add
= IS_8x8DCT(mb_type
) ? s
->dsp
.add_pixels8
1941 : s
->dsp
.add_pixels4
;
1942 for (i
= 0; i
< 16; i
+= di
)
1943 if (h
->non_zero_count_cache
[scan8
[i
+ p
* 16]])
1944 idct_add(dest_y
+ block_offset
[i
],
1945 h
->mb
+ (i
* 16 + p
* 256 << pixel_shift
),
1948 if (IS_8x8DCT(mb_type
))
1949 h
->h264dsp
.h264_idct8_add4(dest_y
, block_offset
,
1950 h
->mb
+ (p
* 256 << pixel_shift
),
1952 h
->non_zero_count_cache
+ p
* 5 * 8);
1954 h
->h264dsp
.h264_idct_add16(dest_y
, block_offset
,
1955 h
->mb
+ (p
* 256 << pixel_shift
),
1957 h
->non_zero_count_cache
+ p
* 5 * 8);
1960 } else if (CONFIG_SVQ3_DECODER
) {
1961 for (i
= 0; i
< 16; i
++)
1962 if (h
->non_zero_count_cache
[scan8
[i
+ p
* 16]] || h
->mb
[i
* 16 + p
* 256]) {
1963 // FIXME benchmark weird rule, & below
1964 uint8_t *const ptr
= dest_y
+ block_offset
[i
];
1965 ff_svq3_add_idct_c(ptr
, h
->mb
+ i
* 16 + p
* 256, linesize
,
1966 s
->qscale
, IS_INTRA(mb_type
) ? 1 : 0);
1974 #include "h264_mb_template.c"
1978 #include "h264_mb_template.c"
1982 #include "h264_mb_template.c"
1984 void ff_h264_hl_decode_mb(H264Context
*h
)
1986 MpegEncContext
*const s
= &h
->s
;
1987 const int mb_xy
= h
->mb_xy
;
1988 const int mb_type
= s
->current_picture
.f
.mb_type
[mb_xy
];
1989 int is_complex
= CONFIG_SMALL
|| h
->is_complex
|| IS_INTRA_PCM(mb_type
) || s
->qscale
== 0;
1992 if (is_complex
|| h
->pixel_shift
)
1993 hl_decode_mb_444_complex(h
);
1995 hl_decode_mb_444_simple_8(h
);
1996 } else if (is_complex
) {
1997 hl_decode_mb_complex(h
);
1998 } else if (h
->pixel_shift
) {
1999 hl_decode_mb_simple_16(h
);
2001 hl_decode_mb_simple_8(h
);
2004 static int pred_weight_table(H264Context
*h
)
2006 MpegEncContext
*const s
= &h
->s
;
2008 int luma_def
, chroma_def
;
2011 h
->use_weight_chroma
= 0;
2012 h
->luma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
2013 if (h
->sps
.chroma_format_idc
)
2014 h
->chroma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
2015 luma_def
= 1 << h
->luma_log2_weight_denom
;
2016 chroma_def
= 1 << h
->chroma_log2_weight_denom
;
2018 for (list
= 0; list
< 2; list
++) {
2019 h
->luma_weight_flag
[list
] = 0;
2020 h
->chroma_weight_flag
[list
] = 0;
2021 for (i
= 0; i
< h
->ref_count
[list
]; i
++) {
2022 int luma_weight_flag
, chroma_weight_flag
;
2024 luma_weight_flag
= get_bits1(&s
->gb
);
2025 if (luma_weight_flag
) {
2026 h
->luma_weight
[i
][list
][0] = get_se_golomb(&s
->gb
);
2027 h
->luma_weight
[i
][list
][1] = get_se_golomb(&s
->gb
);
2028 if (h
->luma_weight
[i
][list
][0] != luma_def
||
2029 h
->luma_weight
[i
][list
][1] != 0) {
2031 h
->luma_weight_flag
[list
] = 1;
2034 h
->luma_weight
[i
][list
][0] = luma_def
;
2035 h
->luma_weight
[i
][list
][1] = 0;
2038 if (h
->sps
.chroma_format_idc
) {
2039 chroma_weight_flag
= get_bits1(&s
->gb
);
2040 if (chroma_weight_flag
) {
2042 for (j
= 0; j
< 2; j
++) {
2043 h
->chroma_weight
[i
][list
][j
][0] = get_se_golomb(&s
->gb
);
2044 h
->chroma_weight
[i
][list
][j
][1] = get_se_golomb(&s
->gb
);
2045 if (h
->chroma_weight
[i
][list
][j
][0] != chroma_def
||
2046 h
->chroma_weight
[i
][list
][j
][1] != 0) {
2047 h
->use_weight_chroma
= 1;
2048 h
->chroma_weight_flag
[list
] = 1;
2053 for (j
= 0; j
< 2; j
++) {
2054 h
->chroma_weight
[i
][list
][j
][0] = chroma_def
;
2055 h
->chroma_weight
[i
][list
][j
][1] = 0;
2060 if (h
->slice_type_nos
!= AV_PICTURE_TYPE_B
)
2063 h
->use_weight
= h
->use_weight
|| h
->use_weight_chroma
;
2068 * Initialize implicit_weight table.
2069 * @param field 0/1 initialize the weight for interlaced MBAFF
2070 * -1 initializes the rest
2072 static void implicit_weight_table(H264Context
*h
, int field
)
2074 MpegEncContext
*const s
= &h
->s
;
2075 int ref0
, ref1
, i
, cur_poc
, ref_start
, ref_count0
, ref_count1
;
2077 for (i
= 0; i
< 2; i
++) {
2078 h
->luma_weight_flag
[i
] = 0;
2079 h
->chroma_weight_flag
[i
] = 0;
2083 if (s
->picture_structure
== PICT_FRAME
) {
2084 cur_poc
= s
->current_picture_ptr
->poc
;
2086 cur_poc
= s
->current_picture_ptr
->field_poc
[s
->picture_structure
- 1];
2088 if (h
->ref_count
[0] == 1 && h
->ref_count
[1] == 1 && !FRAME_MBAFF
&&
2089 h
->ref_list
[0][0].poc
+ h
->ref_list
[1][0].poc
== 2 * cur_poc
) {
2091 h
->use_weight_chroma
= 0;
2095 ref_count0
= h
->ref_count
[0];
2096 ref_count1
= h
->ref_count
[1];
2098 cur_poc
= s
->current_picture_ptr
->field_poc
[field
];
2100 ref_count0
= 16 + 2 * h
->ref_count
[0];
2101 ref_count1
= 16 + 2 * h
->ref_count
[1];
2105 h
->use_weight_chroma
= 2;
2106 h
->luma_log2_weight_denom
= 5;
2107 h
->chroma_log2_weight_denom
= 5;
2109 for (ref0
= ref_start
; ref0
< ref_count0
; ref0
++) {
2110 int poc0
= h
->ref_list
[0][ref0
].poc
;
2111 for (ref1
= ref_start
; ref1
< ref_count1
; ref1
++) {
2113 if (!h
->ref_list
[0][ref0
].long_ref
&& !h
->ref_list
[1][ref1
].long_ref
) {
2114 int poc1
= h
->ref_list
[1][ref1
].poc
;
2115 int td
= av_clip(poc1
- poc0
, -128, 127);
2117 int tb
= av_clip(cur_poc
- poc0
, -128, 127);
2118 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
2119 int dist_scale_factor
= (tb
* tx
+ 32) >> 8;
2120 if (dist_scale_factor
>= -64 && dist_scale_factor
<= 128)
2121 w
= 64 - dist_scale_factor
;
2125 h
->implicit_weight
[ref0
][ref1
][0] =
2126 h
->implicit_weight
[ref0
][ref1
][1] = w
;
2128 h
->implicit_weight
[ref0
][ref1
][field
] = w
;
2135 * instantaneous decoder refresh.
2137 static void idr(H264Context
*h
)
2139 ff_h264_remove_all_refs(h
);
2140 h
->prev_frame_num
= 0;
2141 h
->prev_frame_num_offset
= 0;
2143 h
->prev_poc_lsb
= 0;
2146 /* forget old pics after a seek */
2147 static void flush_change(H264Context
*h
)
2150 for (i
= 0; i
< MAX_DELAYED_PIC_COUNT
; i
++)
2151 h
->last_pocs
[i
] = INT_MIN
;
2152 h
->outputed_poc
= h
->next_outputed_poc
= INT_MIN
;
2153 h
->prev_interlaced_frame
= 1;
2155 if (h
->s
.current_picture_ptr
)
2156 h
->s
.current_picture_ptr
->f
.reference
= 0;
2157 h
->s
.first_field
= 0;
2158 memset(h
->ref_list
[0], 0, sizeof(h
->ref_list
[0]));
2159 memset(h
->ref_list
[1], 0, sizeof(h
->ref_list
[1]));
2160 memset(h
->default_ref_list
[0], 0, sizeof(h
->default_ref_list
[0]));
2161 memset(h
->default_ref_list
[1], 0, sizeof(h
->default_ref_list
[1]));
2162 ff_h264_reset_sei(h
);
2165 /* forget old pics after a seek */
2166 static void flush_dpb(AVCodecContext
*avctx
)
2168 H264Context
*h
= avctx
->priv_data
;
2171 for (i
= 0; i
< MAX_DELAYED_PIC_COUNT
; i
++) {
2172 if (h
->delayed_pic
[i
])
2173 h
->delayed_pic
[i
]->f
.reference
= 0;
2174 h
->delayed_pic
[i
] = NULL
;
2178 ff_mpeg_flush(avctx
);
2181 static int init_poc(H264Context
*h
)
2183 MpegEncContext
*const s
= &h
->s
;
2184 const int max_frame_num
= 1 << h
->sps
.log2_max_frame_num
;
2186 Picture
*cur
= s
->current_picture_ptr
;
2188 h
->frame_num_offset
= h
->prev_frame_num_offset
;
2189 if (h
->frame_num
< h
->prev_frame_num
)
2190 h
->frame_num_offset
+= max_frame_num
;
2192 if (h
->sps
.poc_type
== 0) {
2193 const int max_poc_lsb
= 1 << h
->sps
.log2_max_poc_lsb
;
2195 if (h
->poc_lsb
< h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
>= max_poc_lsb
/ 2)
2196 h
->poc_msb
= h
->prev_poc_msb
+ max_poc_lsb
;
2197 else if (h
->poc_lsb
> h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
< -max_poc_lsb
/ 2)
2198 h
->poc_msb
= h
->prev_poc_msb
- max_poc_lsb
;
2200 h
->poc_msb
= h
->prev_poc_msb
;
2202 field_poc
[1] = h
->poc_msb
+ h
->poc_lsb
;
2203 if (s
->picture_structure
== PICT_FRAME
)
2204 field_poc
[1] += h
->delta_poc_bottom
;
2205 } else if (h
->sps
.poc_type
== 1) {
2206 int abs_frame_num
, expected_delta_per_poc_cycle
, expectedpoc
;
2209 if (h
->sps
.poc_cycle_length
!= 0)
2210 abs_frame_num
= h
->frame_num_offset
+ h
->frame_num
;
2214 if (h
->nal_ref_idc
== 0 && abs_frame_num
> 0)
2217 expected_delta_per_poc_cycle
= 0;
2218 for (i
= 0; i
< h
->sps
.poc_cycle_length
; i
++)
2219 // FIXME integrate during sps parse
2220 expected_delta_per_poc_cycle
+= h
->sps
.offset_for_ref_frame
[i
];
2222 if (abs_frame_num
> 0) {
2223 int poc_cycle_cnt
= (abs_frame_num
- 1) / h
->sps
.poc_cycle_length
;
2224 int frame_num_in_poc_cycle
= (abs_frame_num
- 1) % h
->sps
.poc_cycle_length
;
2226 expectedpoc
= poc_cycle_cnt
* expected_delta_per_poc_cycle
;
2227 for (i
= 0; i
<= frame_num_in_poc_cycle
; i
++)
2228 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_ref_frame
[i
];
2232 if (h
->nal_ref_idc
== 0)
2233 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_non_ref_pic
;
2235 field_poc
[0] = expectedpoc
+ h
->delta_poc
[0];
2236 field_poc
[1] = field_poc
[0] + h
->sps
.offset_for_top_to_bottom_field
;
2238 if (s
->picture_structure
== PICT_FRAME
)
2239 field_poc
[1] += h
->delta_poc
[1];
2241 int poc
= 2 * (h
->frame_num_offset
+ h
->frame_num
);
2243 if (!h
->nal_ref_idc
)
2250 if (s
->picture_structure
!= PICT_BOTTOM_FIELD
)
2251 s
->current_picture_ptr
->field_poc
[0] = field_poc
[0];
2252 if (s
->picture_structure
!= PICT_TOP_FIELD
)
2253 s
->current_picture_ptr
->field_poc
[1] = field_poc
[1];
2254 cur
->poc
= FFMIN(cur
->field_poc
[0], cur
->field_poc
[1]);
2260 * initialize scan tables
2262 static void init_scan_tables(H264Context
*h
)
2265 for (i
= 0; i
< 16; i
++) {
2266 #define T(x) (x >> 2) | ((x << 2) & 0xF)
2267 h
->zigzag_scan
[i
] = T(zigzag_scan
[i
]);
2268 h
->field_scan
[i
] = T(field_scan
[i
]);
2271 for (i
= 0; i
< 64; i
++) {
2272 #define T(x) (x >> 3) | ((x & 7) << 3)
2273 h
->zigzag_scan8x8
[i
] = T(ff_zigzag_direct
[i
]);
2274 h
->zigzag_scan8x8_cavlc
[i
] = T(zigzag_scan8x8_cavlc
[i
]);
2275 h
->field_scan8x8
[i
] = T(field_scan8x8
[i
]);
2276 h
->field_scan8x8_cavlc
[i
] = T(field_scan8x8_cavlc
[i
]);
2279 if (h
->sps
.transform_bypass
) { // FIXME same ugly
2280 h
->zigzag_scan_q0
= zigzag_scan
;
2281 h
->zigzag_scan8x8_q0
= ff_zigzag_direct
;
2282 h
->zigzag_scan8x8_cavlc_q0
= zigzag_scan8x8_cavlc
;
2283 h
->field_scan_q0
= field_scan
;
2284 h
->field_scan8x8_q0
= field_scan8x8
;
2285 h
->field_scan8x8_cavlc_q0
= field_scan8x8_cavlc
;
2287 h
->zigzag_scan_q0
= h
->zigzag_scan
;
2288 h
->zigzag_scan8x8_q0
= h
->zigzag_scan8x8
;
2289 h
->zigzag_scan8x8_cavlc_q0
= h
->zigzag_scan8x8_cavlc
;
2290 h
->field_scan_q0
= h
->field_scan
;
2291 h
->field_scan8x8_q0
= h
->field_scan8x8
;
2292 h
->field_scan8x8_cavlc_q0
= h
->field_scan8x8_cavlc
;
2296 static int field_end(H264Context
*h
, int in_setup
)
2298 MpegEncContext
*const s
= &h
->s
;
2299 AVCodecContext
*const avctx
= s
->avctx
;
2303 if (!in_setup
&& !s
->droppable
)
2304 ff_thread_report_progress(&s
->current_picture_ptr
->f
, INT_MAX
,
2305 s
->picture_structure
== PICT_BOTTOM_FIELD
);
2307 if (CONFIG_H264_VDPAU_DECODER
&&
2308 s
->avctx
->codec
->capabilities
& CODEC_CAP_HWACCEL_VDPAU
)
2309 ff_vdpau_h264_set_reference_frames(s
);
2311 if (in_setup
|| !(avctx
->active_thread_type
& FF_THREAD_FRAME
)) {
2312 if (!s
->droppable
) {
2313 err
= ff_h264_execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
2314 h
->prev_poc_msb
= h
->poc_msb
;
2315 h
->prev_poc_lsb
= h
->poc_lsb
;
2317 h
->prev_frame_num_offset
= h
->frame_num_offset
;
2318 h
->prev_frame_num
= h
->frame_num
;
2319 h
->outputed_poc
= h
->next_outputed_poc
;
2322 if (avctx
->hwaccel
) {
2323 if (avctx
->hwaccel
->end_frame(avctx
) < 0)
2324 av_log(avctx
, AV_LOG_ERROR
,
2325 "hardware accelerator failed to decode picture\n");
2328 if (CONFIG_H264_VDPAU_DECODER
&&
2329 s
->avctx
->codec
->capabilities
& CODEC_CAP_HWACCEL_VDPAU
)
2330 ff_vdpau_h264_picture_complete(s
);
2333 * FIXME: Error handling code does not seem to support interlaced
2334 * when slices span multiple rows
2335 * The ff_er_add_slice calls don't work right for bottom
2336 * fields; they cause massive erroneous error concealing
2337 * Error marking covers both fields (top and bottom).
2338 * This causes a mismatched s->error_count
2339 * and a bad error table. Further, the error count goes to
2340 * INT_MAX when called for bottom field, because mb_y is
2341 * past end by one (callers fault) and resync_mb_y != 0
2342 * causes problems for the first MB line, too.
2347 ff_MPV_frame_end(s
);
2349 h
->current_slice
= 0;
2355 * Replicate H264 "master" context to thread contexts.
2357 static int clone_slice(H264Context
*dst
, H264Context
*src
)
2361 memcpy(dst
->block_offset
, src
->block_offset
, sizeof(dst
->block_offset
));
2362 dst
->s
.current_picture_ptr
= src
->s
.current_picture_ptr
;
2363 dst
->s
.current_picture
= src
->s
.current_picture
;
2364 dst
->s
.linesize
= src
->s
.linesize
;
2365 dst
->s
.uvlinesize
= src
->s
.uvlinesize
;
2366 dst
->s
.first_field
= src
->s
.first_field
;
2368 if (!dst
->s
.edge_emu_buffer
&&
2369 (ret
= ff_mpv_frame_size_alloc(&dst
->s
, dst
->s
.linesize
))) {
2370 av_log(dst
->s
.avctx
, AV_LOG_ERROR
,
2371 "Failed to allocate scratch buffers\n");
2375 dst
->prev_poc_msb
= src
->prev_poc_msb
;
2376 dst
->prev_poc_lsb
= src
->prev_poc_lsb
;
2377 dst
->prev_frame_num_offset
= src
->prev_frame_num_offset
;
2378 dst
->prev_frame_num
= src
->prev_frame_num
;
2379 dst
->short_ref_count
= src
->short_ref_count
;
2381 memcpy(dst
->short_ref
, src
->short_ref
, sizeof(dst
->short_ref
));
2382 memcpy(dst
->long_ref
, src
->long_ref
, sizeof(dst
->long_ref
));
2383 memcpy(dst
->default_ref_list
, src
->default_ref_list
, sizeof(dst
->default_ref_list
));
2385 memcpy(dst
->dequant4_coeff
, src
->dequant4_coeff
, sizeof(src
->dequant4_coeff
));
2386 memcpy(dst
->dequant8_coeff
, src
->dequant8_coeff
, sizeof(src
->dequant8_coeff
));
2392 * Compute profile from profile_idc and constraint_set?_flags.
2396 * @return profile as defined by FF_PROFILE_H264_*
2398 int ff_h264_get_profile(SPS
*sps
)
2400 int profile
= sps
->profile_idc
;
2402 switch (sps
->profile_idc
) {
2403 case FF_PROFILE_H264_BASELINE
:
2404 // constraint_set1_flag set to 1
2405 profile
|= (sps
->constraint_set_flags
& 1 << 1) ? FF_PROFILE_H264_CONSTRAINED
: 0;
2407 case FF_PROFILE_H264_HIGH_10
:
2408 case FF_PROFILE_H264_HIGH_422
:
2409 case FF_PROFILE_H264_HIGH_444_PREDICTIVE
:
2410 // constraint_set3_flag set to 1
2411 profile
|= (sps
->constraint_set_flags
& 1 << 3) ? FF_PROFILE_H264_INTRA
: 0;
2418 static int h264_set_parameter_from_sps(H264Context
*h
)
2420 MpegEncContext
*s
= &h
->s
;
2422 if (s
->flags
& CODEC_FLAG_LOW_DELAY
||
2423 (h
->sps
.bitstream_restriction_flag
&&
2424 !h
->sps
.num_reorder_frames
)) {
2425 if (s
->avctx
->has_b_frames
> 1 || h
->delayed_pic
[0])
2426 av_log(h
->s
.avctx
, AV_LOG_WARNING
, "Delayed frames seen. "
2427 "Reenabling low delay requires a codec flush.\n");
2432 if (s
->avctx
->has_b_frames
< 2)
2433 s
->avctx
->has_b_frames
= !s
->low_delay
;
2435 if (s
->avctx
->bits_per_raw_sample
!= h
->sps
.bit_depth_luma
||
2436 h
->cur_chroma_format_idc
!= h
->sps
.chroma_format_idc
) {
2437 if (s
->avctx
->codec
&&
2438 s
->avctx
->codec
->capabilities
& CODEC_CAP_HWACCEL_VDPAU
&&
2439 (h
->sps
.bit_depth_luma
!= 8 || h
->sps
.chroma_format_idc
> 1)) {
2440 av_log(s
->avctx
, AV_LOG_ERROR
,
2441 "VDPAU decoding does not support video colorspace.\n");
2442 return AVERROR_INVALIDDATA
;
2444 if (h
->sps
.bit_depth_luma
>= 8 && h
->sps
.bit_depth_luma
<= 10) {
2445 s
->avctx
->bits_per_raw_sample
= h
->sps
.bit_depth_luma
;
2446 h
->cur_chroma_format_idc
= h
->sps
.chroma_format_idc
;
2447 h
->pixel_shift
= h
->sps
.bit_depth_luma
> 8;
2449 ff_h264dsp_init(&h
->h264dsp
, h
->sps
.bit_depth_luma
,
2450 h
->sps
.chroma_format_idc
);
2451 ff_h264chroma_init(&h
->h264chroma
, h
->sps
.bit_depth_chroma
);
2452 ff_h264qpel_init(&h
->h264qpel
, h
->sps
.bit_depth_luma
);
2453 ff_h264_pred_init(&h
->hpc
, s
->codec_id
, h
->sps
.bit_depth_luma
,
2454 h
->sps
.chroma_format_idc
);
2455 s
->dsp
.dct_bits
= h
->sps
.bit_depth_luma
> 8 ? 32 : 16;
2456 ff_dsputil_init(&s
->dsp
, s
->avctx
);
2457 ff_videodsp_init(&s
->vdsp
, h
->sps
.bit_depth_luma
);
2459 av_log(s
->avctx
, AV_LOG_ERROR
, "Unsupported bit depth: %d\n",
2460 h
->sps
.bit_depth_luma
);
2461 return AVERROR_INVALIDDATA
;
2467 static enum PixelFormat
get_pixel_format(H264Context
*h
)
2469 MpegEncContext
*const s
= &h
->s
;
2470 switch (h
->sps
.bit_depth_luma
) {
2473 if (s
->avctx
->colorspace
== AVCOL_SPC_RGB
) {
2474 return AV_PIX_FMT_GBRP9
;
2476 return AV_PIX_FMT_YUV444P9
;
2477 } else if (CHROMA422
)
2478 return AV_PIX_FMT_YUV422P9
;
2480 return AV_PIX_FMT_YUV420P9
;
2484 if (s
->avctx
->colorspace
== AVCOL_SPC_RGB
) {
2485 return AV_PIX_FMT_GBRP10
;
2487 return AV_PIX_FMT_YUV444P10
;
2488 } else if (CHROMA422
)
2489 return AV_PIX_FMT_YUV422P10
;
2491 return AV_PIX_FMT_YUV420P10
;
2495 if (s
->avctx
->colorspace
== AVCOL_SPC_RGB
) {
2496 return AV_PIX_FMT_GBRP
;
2498 return s
->avctx
->color_range
== AVCOL_RANGE_JPEG
? AV_PIX_FMT_YUVJ444P
2499 : AV_PIX_FMT_YUV444P
;
2500 } else if (CHROMA422
) {
2501 return s
->avctx
->color_range
== AVCOL_RANGE_JPEG
? AV_PIX_FMT_YUVJ422P
2502 : AV_PIX_FMT_YUV422P
;
2504 return s
->avctx
->get_format(s
->avctx
, s
->avctx
->codec
->pix_fmts
?
2505 s
->avctx
->codec
->pix_fmts
:
2506 s
->avctx
->color_range
== AVCOL_RANGE_JPEG
?
2507 hwaccel_pixfmt_list_h264_jpeg_420
:
2508 ff_hwaccel_pixfmt_list_420
);
2512 av_log(s
->avctx
, AV_LOG_ERROR
,
2513 "Unsupported bit depth: %d\n", h
->sps
.bit_depth_luma
);
2514 return AVERROR_INVALIDDATA
;
2518 static int h264_slice_header_init(H264Context
*h
, int reinit
)
2520 MpegEncContext
*const s
= &h
->s
;
2523 avcodec_set_dimensions(s
->avctx
, s
->width
, s
->height
);
2524 s
->avctx
->sample_aspect_ratio
= h
->sps
.sar
;
2525 av_assert0(s
->avctx
->sample_aspect_ratio
.den
);
2527 if (h
->sps
.timing_info_present_flag
) {
2528 int64_t den
= h
->sps
.time_scale
;
2529 if (h
->x264_build
< 44U)
2531 av_reduce(&s
->avctx
->time_base
.num
, &s
->avctx
->time_base
.den
,
2532 h
->sps
.num_units_in_tick
, den
, 1 << 30);
2535 s
->avctx
->hwaccel
= ff_find_hwaccel(s
->avctx
->codec
->id
, s
->avctx
->pix_fmt
);
2539 if ((ret
= ff_MPV_common_frame_size_change(s
)) < 0) {
2540 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ff_MPV_common_frame_size_change() failed.\n");
2544 if ((ret
= ff_MPV_common_init(s
)) < 0) {
2545 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ff_MPV_common_init() failed.\n");
2550 h
->prev_interlaced_frame
= 1;
2552 init_scan_tables(h
);
2553 if (ff_h264_alloc_tables(h
) < 0) {
2554 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
2555 "Could not allocate memory for h264\n");
2556 return AVERROR(ENOMEM
);
2559 if (!HAVE_THREADS
|| !(s
->avctx
->active_thread_type
& FF_THREAD_SLICE
)) {
2560 if (context_init(h
) < 0) {
2561 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "context_init() failed.\n");
2565 for (i
= 1; i
< s
->slice_context_count
; i
++) {
2567 c
= h
->thread_context
[i
] = av_malloc(sizeof(H264Context
));
2568 memcpy(c
, h
->s
.thread_context
[i
], sizeof(MpegEncContext
));
2569 memset(&c
->s
+ 1, 0, sizeof(H264Context
) - sizeof(MpegEncContext
));
2570 c
->h264dsp
= h
->h264dsp
;
2571 c
->h264qpel
= h
->h264qpel
;
2574 c
->pixel_shift
= h
->pixel_shift
;
2575 init_scan_tables(c
);
2576 clone_tables(c
, h
, i
);
2579 for (i
= 0; i
< s
->slice_context_count
; i
++)
2580 if (context_init(h
->thread_context
[i
]) < 0) {
2581 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "context_init() failed.\n");
2590 * Decode a slice header.
2591 * This will also call ff_MPV_common_init() and frame_start() as needed.
2593 * @param h h264context
2594 * @param h0 h264 master context (differs from 'h' when doing sliced based
2595 * parallel decoding)
2597 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
2599 static int decode_slice_header(H264Context
*h
, H264Context
*h0
)
2601 MpegEncContext
*const s
= &h
->s
;
2602 MpegEncContext
*const s0
= &h0
->s
;
2603 unsigned int first_mb_in_slice
;
2604 unsigned int pps_id
;
2605 int num_ref_idx_active_override_flag
, max_refs
, ret
;
2606 unsigned int slice_type
, tmp
, i
, j
;
2607 int default_ref_list_done
= 0;
2608 int last_pic_structure
, last_pic_droppable
;
2609 int needs_reinit
= 0;
2611 s
->me
.qpel_put
= h
->h264qpel
.put_h264_qpel_pixels_tab
;
2612 s
->me
.qpel_avg
= h
->h264qpel
.avg_h264_qpel_pixels_tab
;
2614 first_mb_in_slice
= get_ue_golomb(&s
->gb
);
2616 if (first_mb_in_slice
== 0) { // FIXME better field boundary detection
2617 if (h0
->current_slice
&& FIELD_PICTURE
) {
2621 h0
->current_slice
= 0;
2622 if (!s0
->first_field
) {
2623 if (s
->current_picture_ptr
&& !s
->droppable
&&
2624 s
->current_picture_ptr
->owner2
== s
) {
2625 ff_thread_report_progress(&s
->current_picture_ptr
->f
, INT_MAX
,
2626 s
->picture_structure
== PICT_BOTTOM_FIELD
);
2628 s
->current_picture_ptr
= NULL
;
2632 slice_type
= get_ue_golomb_31(&s
->gb
);
2633 if (slice_type
> 9) {
2634 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
2635 "slice type too large (%d) at %d %d\n",
2636 h
->slice_type
, s
->mb_x
, s
->mb_y
);
2639 if (slice_type
> 4) {
2641 h
->slice_type_fixed
= 1;
2643 h
->slice_type_fixed
= 0;
2645 slice_type
= golomb_to_pict_type
[slice_type
];
2646 if (slice_type
== AV_PICTURE_TYPE_I
||
2647 (h0
->current_slice
!= 0 && slice_type
== h0
->last_slice_type
)) {
2648 default_ref_list_done
= 1;
2650 h
->slice_type
= slice_type
;
2651 h
->slice_type_nos
= slice_type
& 3;
2653 // to make a few old functions happy, it's wrong though
2654 s
->pict_type
= h
->slice_type
;
2656 pps_id
= get_ue_golomb(&s
->gb
);
2657 if (pps_id
>= MAX_PPS_COUNT
) {
2658 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "pps_id out of range\n");
2661 if (!h0
->pps_buffers
[pps_id
]) {
2662 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
2663 "non-existing PPS %u referenced\n",
2667 h
->pps
= *h0
->pps_buffers
[pps_id
];
2669 if (!h0
->sps_buffers
[h
->pps
.sps_id
]) {
2670 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
2671 "non-existing SPS %u referenced\n",
2676 if (h
->pps
.sps_id
!= h
->current_sps_id
||
2677 h
->context_reinitialized
||
2678 h0
->sps_buffers
[h
->pps
.sps_id
]->new) {
2679 SPS
*new_sps
= h0
->sps_buffers
[h
->pps
.sps_id
];
2681 h0
->sps_buffers
[h
->pps
.sps_id
]->new = 0;
2683 if (h
->sps
.chroma_format_idc
!= new_sps
->chroma_format_idc
||
2684 h
->sps
.bit_depth_luma
!= new_sps
->bit_depth_luma
)
2687 h
->current_sps_id
= h
->pps
.sps_id
;
2688 h
->sps
= *h0
->sps_buffers
[h
->pps
.sps_id
];
2690 if ((ret
= h264_set_parameter_from_sps(h
)) < 0)
2694 s
->avctx
->profile
= ff_h264_get_profile(&h
->sps
);
2695 s
->avctx
->level
= h
->sps
.level_idc
;
2696 s
->avctx
->refs
= h
->sps
.ref_frame_count
;
2698 if (s
->mb_width
!= h
->sps
.mb_width
||
2699 s
->mb_height
!= h
->sps
.mb_height
* (2 - h
->sps
.frame_mbs_only_flag
))
2702 s
->mb_width
= h
->sps
.mb_width
;
2703 s
->mb_height
= h
->sps
.mb_height
* (2 - h
->sps
.frame_mbs_only_flag
);
2705 h
->b_stride
= s
->mb_width
* 4;
2707 s
->chroma_y_shift
= h
->sps
.chroma_format_idc
<= 1; // 400 uses yuv420p
2709 s
->width
= 16 * s
->mb_width
- (2 >> CHROMA444
) * FFMIN(h
->sps
.crop_right
, (8 << CHROMA444
) - 1);
2710 if (h
->sps
.frame_mbs_only_flag
)
2711 s
->height
= 16 * s
->mb_height
- (1 << s
->chroma_y_shift
) * FFMIN(h
->sps
.crop_bottom
, (16 >> s
->chroma_y_shift
) - 1);
2713 s
->height
= 16 * s
->mb_height
- (2 << s
->chroma_y_shift
) * FFMIN(h
->sps
.crop_bottom
, (16 >> s
->chroma_y_shift
) - 1);
2715 if (FFALIGN(s
->avctx
->width
, 16) == s
->width
&&
2716 FFALIGN(s
->avctx
->height
, 16) == s
->height
) {
2717 s
->width
= s
->avctx
->width
;
2718 s
->height
= s
->avctx
->height
;
2721 if (h
->sps
.video_signal_type_present_flag
) {
2722 s
->avctx
->color_range
= h
->sps
.full_range
? AVCOL_RANGE_JPEG
2724 if (h
->sps
.colour_description_present_flag
) {
2725 if (s
->avctx
->colorspace
!= h
->sps
.colorspace
)
2727 s
->avctx
->color_primaries
= h
->sps
.color_primaries
;
2728 s
->avctx
->color_trc
= h
->sps
.color_trc
;
2729 s
->avctx
->colorspace
= h
->sps
.colorspace
;
2733 if (s
->context_initialized
&&
2734 (s
->width
!= s
->avctx
->width
||
2735 s
->height
!= s
->avctx
->height
||
2737 av_cmp_q(h
->sps
.sar
, s
->avctx
->sample_aspect_ratio
))) {
2740 av_log(s
->avctx
, AV_LOG_ERROR
, "changing width/height on "
2741 "slice %d\n", h0
->current_slice
+ 1);
2742 return AVERROR_INVALIDDATA
;
2747 if ((ret
= get_pixel_format(h
)) < 0)
2749 s
->avctx
->pix_fmt
= ret
;
2751 av_log(h
->s
.avctx
, AV_LOG_INFO
, "Reinit context to %dx%d, "
2752 "pix_fmt: %d\n", s
->width
, s
->height
, s
->avctx
->pix_fmt
);
2754 if ((ret
= h264_slice_header_init(h
, 1)) < 0) {
2755 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
2756 "h264_slice_header_init() failed\n");
2759 h
->context_reinitialized
= 1;
2761 if (!s
->context_initialized
) {
2763 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
2764 "Cannot (re-)initialize context during parallel decoding.\n");
2768 if ((ret
= get_pixel_format(h
)) < 0)
2770 s
->avctx
->pix_fmt
= ret
;
2772 if ((ret
= h264_slice_header_init(h
, 0)) < 0) {
2773 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
2774 "h264_slice_header_init() failed\n");
2779 if (h
== h0
&& h
->dequant_coeff_pps
!= pps_id
) {
2780 h
->dequant_coeff_pps
= pps_id
;
2781 init_dequant_tables(h
);
2784 h
->frame_num
= get_bits(&s
->gb
, h
->sps
.log2_max_frame_num
);
2787 h
->mb_aff_frame
= 0;
2788 last_pic_structure
= s0
->picture_structure
;
2789 last_pic_droppable
= s0
->droppable
;
2790 s
->droppable
= h
->nal_ref_idc
== 0;
2791 if (h
->sps
.frame_mbs_only_flag
) {
2792 s
->picture_structure
= PICT_FRAME
;
2794 if (get_bits1(&s
->gb
)) { // field_pic_flag
2795 s
->picture_structure
= PICT_TOP_FIELD
+ get_bits1(&s
->gb
); // bottom_field_flag
2797 s
->picture_structure
= PICT_FRAME
;
2798 h
->mb_aff_frame
= h
->sps
.mb_aff
;
2801 h
->mb_field_decoding_flag
= s
->picture_structure
!= PICT_FRAME
;
2803 if (h0
->current_slice
!= 0) {
2804 if (last_pic_structure
!= s
->picture_structure
||
2805 last_pic_droppable
!= s
->droppable
) {
2806 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
2807 "Changing field mode (%d -> %d) between slices is not allowed\n",
2808 last_pic_structure
, s
->picture_structure
);
2809 s
->picture_structure
= last_pic_structure
;
2810 s
->droppable
= last_pic_droppable
;
2811 return AVERROR_INVALIDDATA
;
2812 } else if (!s0
->current_picture_ptr
) {
2813 av_log(s
->avctx
, AV_LOG_ERROR
,
2814 "unset current_picture_ptr on %d. slice\n",
2815 h0
->current_slice
+ 1);
2816 return AVERROR_INVALIDDATA
;
2819 /* Shorten frame num gaps so we don't have to allocate reference
2820 * frames just to throw them away */
2821 if (h
->frame_num
!= h
->prev_frame_num
) {
2822 int unwrap_prev_frame_num
= h
->prev_frame_num
;
2823 int max_frame_num
= 1 << h
->sps
.log2_max_frame_num
;
2825 if (unwrap_prev_frame_num
> h
->frame_num
)
2826 unwrap_prev_frame_num
-= max_frame_num
;
2828 if ((h
->frame_num
- unwrap_prev_frame_num
) > h
->sps
.ref_frame_count
) {
2829 unwrap_prev_frame_num
= (h
->frame_num
- h
->sps
.ref_frame_count
) - 1;
2830 if (unwrap_prev_frame_num
< 0)
2831 unwrap_prev_frame_num
+= max_frame_num
;
2833 h
->prev_frame_num
= unwrap_prev_frame_num
;
2837 /* See if we have a decoded first field looking for a pair...
2838 * Here, we're using that to see if we should mark previously
2839 * decode frames as "finished".
2840 * We have to do that before the "dummy" in-between frame allocation,
2841 * since that can modify s->current_picture_ptr. */
2842 if (s0
->first_field
) {
2843 assert(s0
->current_picture_ptr
);
2844 assert(s0
->current_picture_ptr
->f
.data
[0]);
2845 assert(s0
->current_picture_ptr
->f
.reference
!= DELAYED_PIC_REF
);
2847 /* Mark old field/frame as completed */
2848 if (!last_pic_droppable
&& s0
->current_picture_ptr
->owner2
== s0
) {
2849 ff_thread_report_progress(&s0
->current_picture_ptr
->f
, INT_MAX
,
2850 last_pic_structure
== PICT_BOTTOM_FIELD
);
2853 /* figure out if we have a complementary field pair */
2854 if (!FIELD_PICTURE
|| s
->picture_structure
== last_pic_structure
) {
2855 /* Previous field is unmatched. Don't display it, but let it
2856 * remain for reference if marked as such. */
2857 if (!last_pic_droppable
&& last_pic_structure
!= PICT_FRAME
) {
2858 ff_thread_report_progress(&s0
->current_picture_ptr
->f
, INT_MAX
,
2859 last_pic_structure
== PICT_TOP_FIELD
);
2862 if (s0
->current_picture_ptr
->frame_num
!= h
->frame_num
) {
2863 /* This and previous field were reference, but had
2864 * different frame_nums. Consider this field first in
2865 * pair. Throw away previous field except for reference
2867 if (!last_pic_droppable
&& last_pic_structure
!= PICT_FRAME
) {
2868 ff_thread_report_progress(&s0
->current_picture_ptr
->f
, INT_MAX
,
2869 last_pic_structure
== PICT_TOP_FIELD
);
2872 /* Second field in complementary pair */
2873 if (!((last_pic_structure
== PICT_TOP_FIELD
&&
2874 s
->picture_structure
== PICT_BOTTOM_FIELD
) ||
2875 (last_pic_structure
== PICT_BOTTOM_FIELD
&&
2876 s
->picture_structure
== PICT_TOP_FIELD
))) {
2877 av_log(s
->avctx
, AV_LOG_ERROR
,
2878 "Invalid field mode combination %d/%d\n",
2879 last_pic_structure
, s
->picture_structure
);
2880 s
->picture_structure
= last_pic_structure
;
2881 s
->droppable
= last_pic_droppable
;
2882 return AVERROR_INVALIDDATA
;
2883 } else if (last_pic_droppable
!= s
->droppable
) {
2884 av_log(s
->avctx
, AV_LOG_ERROR
,
2885 "Cannot combine reference and non-reference fields in the same frame\n");
2886 av_log_ask_for_sample(s
->avctx
, NULL
);
2887 s
->picture_structure
= last_pic_structure
;
2888 s
->droppable
= last_pic_droppable
;
2889 return AVERROR_PATCHWELCOME
;
2892 /* Take ownership of this buffer. Note that if another thread owned
2893 * the first field of this buffer, we're not operating on that pointer,
2894 * so the original thread is still responsible for reporting progress
2895 * on that first field (or if that was us, we just did that above).
2896 * By taking ownership, we assign responsibility to ourselves to
2897 * report progress on the second field. */
2898 s0
->current_picture_ptr
->owner2
= s0
;
2903 while (h
->frame_num
!= h
->prev_frame_num
&&
2904 h
->frame_num
!= (h
->prev_frame_num
+ 1) % (1 << h
->sps
.log2_max_frame_num
)) {
2905 Picture
*prev
= h
->short_ref_count
? h
->short_ref
[0] : NULL
;
2906 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "Frame num gap %d %d\n",
2907 h
->frame_num
, h
->prev_frame_num
);
2908 if (ff_h264_frame_start(h
) < 0)
2910 h
->prev_frame_num
++;
2911 h
->prev_frame_num
%= 1 << h
->sps
.log2_max_frame_num
;
2912 s
->current_picture_ptr
->frame_num
= h
->prev_frame_num
;
2913 ff_thread_report_progress(&s
->current_picture_ptr
->f
, INT_MAX
, 0);
2914 ff_thread_report_progress(&s
->current_picture_ptr
->f
, INT_MAX
, 1);
2915 if ((ret
= ff_generate_sliding_window_mmcos(h
, 1)) < 0 &&
2916 s
->avctx
->err_recognition
& AV_EF_EXPLODE
)
2918 if (ff_h264_execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
) < 0 &&
2919 (s
->avctx
->err_recognition
& AV_EF_EXPLODE
))
2920 return AVERROR_INVALIDDATA
;
2921 /* Error concealment: if a ref is missing, copy the previous ref in its place.
2922 * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
2923 * about there being no actual duplicates.
2924 * FIXME: this doesn't copy padding for out-of-frame motion vectors. Given we're
2925 * concealing a lost frame, this probably isn't noticeable by comparison, but it should
2927 if (h
->short_ref_count
) {
2929 av_image_copy(h
->short_ref
[0]->f
.data
, h
->short_ref
[0]->f
.linesize
,
2930 (const uint8_t **)prev
->f
.data
, prev
->f
.linesize
,
2931 s
->avctx
->pix_fmt
, s
->mb_width
* 16, s
->mb_height
* 16);
2932 h
->short_ref
[0]->poc
= prev
->poc
+ 2;
2934 h
->short_ref
[0]->frame_num
= h
->prev_frame_num
;
2938 /* See if we have a decoded first field looking for a pair...
2939 * We're using that to see whether to continue decoding in that
2940 * frame, or to allocate a new one. */
2941 if (s0
->first_field
) {
2942 assert(s0
->current_picture_ptr
);
2943 assert(s0
->current_picture_ptr
->f
.data
[0]);
2944 assert(s0
->current_picture_ptr
->f
.reference
!= DELAYED_PIC_REF
);
2946 /* figure out if we have a complementary field pair */
2947 if (!FIELD_PICTURE
|| s
->picture_structure
== last_pic_structure
) {
2948 /* Previous field is unmatched. Don't display it, but let it
2949 * remain for reference if marked as such. */
2950 s0
->current_picture_ptr
= NULL
;
2951 s0
->first_field
= FIELD_PICTURE
;
2953 if (s0
->current_picture_ptr
->frame_num
!= h
->frame_num
) {
2954 /* This and the previous field had different frame_nums.
2955 * Consider this field first in pair. Throw away previous
2956 * one except for reference purposes. */
2957 s0
->first_field
= 1;
2958 s0
->current_picture_ptr
= NULL
;
2960 /* Second field in complementary pair */
2961 s0
->first_field
= 0;
2965 /* Frame or first field in a potentially complementary pair */
2966 s0
->first_field
= FIELD_PICTURE
;
2969 if (!FIELD_PICTURE
|| s0
->first_field
) {
2970 if (ff_h264_frame_start(h
) < 0) {
2971 s0
->first_field
= 0;
2975 ff_release_unused_pictures(s
, 0);
2978 if (h
!= h0
&& (ret
= clone_slice(h
, h0
)) < 0)
2981 s
->current_picture_ptr
->frame_num
= h
->frame_num
; // FIXME frame_num cleanup
2983 assert(s
->mb_num
== s
->mb_width
* s
->mb_height
);
2984 if (first_mb_in_slice
<< FIELD_OR_MBAFF_PICTURE
>= s
->mb_num
||
2985 first_mb_in_slice
>= s
->mb_num
) {
2986 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "first_mb_in_slice overflow\n");
2989 s
->resync_mb_x
= s
->mb_x
= first_mb_in_slice
% s
->mb_width
;
2990 s
->resync_mb_y
= s
->mb_y
= (first_mb_in_slice
/ s
->mb_width
) << FIELD_OR_MBAFF_PICTURE
;
2991 if (s
->picture_structure
== PICT_BOTTOM_FIELD
)
2992 s
->resync_mb_y
= s
->mb_y
= s
->mb_y
+ 1;
2993 assert(s
->mb_y
< s
->mb_height
);
2995 if (s
->picture_structure
== PICT_FRAME
) {
2996 h
->curr_pic_num
= h
->frame_num
;
2997 h
->max_pic_num
= 1 << h
->sps
.log2_max_frame_num
;
2999 h
->curr_pic_num
= 2 * h
->frame_num
+ 1;
3000 h
->max_pic_num
= 1 << (h
->sps
.log2_max_frame_num
+ 1);
3003 if (h
->nal_unit_type
== NAL_IDR_SLICE
)
3004 get_ue_golomb(&s
->gb
); /* idr_pic_id */
3006 if (h
->sps
.poc_type
== 0) {
3007 h
->poc_lsb
= get_bits(&s
->gb
, h
->sps
.log2_max_poc_lsb
);
3009 if (h
->pps
.pic_order_present
== 1 && s
->picture_structure
== PICT_FRAME
)
3010 h
->delta_poc_bottom
= get_se_golomb(&s
->gb
);
3013 if (h
->sps
.poc_type
== 1 && !h
->sps
.delta_pic_order_always_zero_flag
) {
3014 h
->delta_poc
[0] = get_se_golomb(&s
->gb
);
3016 if (h
->pps
.pic_order_present
== 1 && s
->picture_structure
== PICT_FRAME
)
3017 h
->delta_poc
[1] = get_se_golomb(&s
->gb
);
3022 if (h
->pps
.redundant_pic_cnt_present
)
3023 h
->redundant_pic_count
= get_ue_golomb(&s
->gb
);
3025 // set defaults, might be overridden a few lines later
3026 h
->ref_count
[0] = h
->pps
.ref_count
[0];
3027 h
->ref_count
[1] = h
->pps
.ref_count
[1];
3029 if (h
->slice_type_nos
!= AV_PICTURE_TYPE_I
) {
3030 if (h
->slice_type_nos
== AV_PICTURE_TYPE_B
)
3031 h
->direct_spatial_mv_pred
= get_bits1(&s
->gb
);
3032 num_ref_idx_active_override_flag
= get_bits1(&s
->gb
);
3034 if (num_ref_idx_active_override_flag
) {
3035 h
->ref_count
[0] = get_ue_golomb(&s
->gb
) + 1;
3036 if (h
->ref_count
[0] < 1)
3037 return AVERROR_INVALIDDATA
;
3038 if (h
->slice_type_nos
== AV_PICTURE_TYPE_B
) {
3039 h
->ref_count
[1] = get_ue_golomb(&s
->gb
) + 1;
3040 if (h
->ref_count
[1] < 1)
3041 return AVERROR_INVALIDDATA
;
3045 if (h
->slice_type_nos
== AV_PICTURE_TYPE_B
)
3052 max_refs
= s
->picture_structure
== PICT_FRAME
? 16 : 32;
3054 if (h
->ref_count
[0] > max_refs
|| h
->ref_count
[1] > max_refs
) {
3055 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow\n");
3056 h
->ref_count
[0] = h
->ref_count
[1] = 1;
3057 return AVERROR_INVALIDDATA
;
3060 if (!default_ref_list_done
)
3061 ff_h264_fill_default_ref_list(h
);
3063 if (h
->slice_type_nos
!= AV_PICTURE_TYPE_I
&&
3064 ff_h264_decode_ref_pic_list_reordering(h
) < 0) {
3065 h
->ref_count
[1] = h
->ref_count
[0] = 0;
3069 if (h
->slice_type_nos
!= AV_PICTURE_TYPE_I
) {
3070 s
->last_picture_ptr
= &h
->ref_list
[0][0];
3071 s
->last_picture_ptr
->owner2
= s
;
3072 ff_copy_picture(&s
->last_picture
, s
->last_picture_ptr
);
3074 if (h
->slice_type_nos
== AV_PICTURE_TYPE_B
) {
3075 s
->next_picture_ptr
= &h
->ref_list
[1][0];
3076 s
->next_picture_ptr
->owner2
= s
;
3077 ff_copy_picture(&s
->next_picture
, s
->next_picture_ptr
);
3080 if ((h
->pps
.weighted_pred
&& h
->slice_type_nos
== AV_PICTURE_TYPE_P
) ||
3081 (h
->pps
.weighted_bipred_idc
== 1 &&
3082 h
->slice_type_nos
== AV_PICTURE_TYPE_B
))
3083 pred_weight_table(h
);
3084 else if (h
->pps
.weighted_bipred_idc
== 2 &&
3085 h
->slice_type_nos
== AV_PICTURE_TYPE_B
) {
3086 implicit_weight_table(h
, -1);
3089 for (i
= 0; i
< 2; i
++) {
3090 h
->luma_weight_flag
[i
] = 0;
3091 h
->chroma_weight_flag
[i
] = 0;
3095 // If frame-mt is enabled, only update mmco tables for the first slice
3096 // in a field. Subsequent slices can temporarily clobber h->mmco_index
3097 // or h->mmco, which will cause ref list mix-ups and decoding errors
3098 // further down the line. This may break decoding if the first slice is
3099 // corrupt, thus we only do this if frame-mt is enabled.
3100 if (h
->nal_ref_idc
&&
3101 ff_h264_decode_ref_pic_marking(h0
, &s
->gb
,
3102 !(s
->avctx
->active_thread_type
& FF_THREAD_FRAME
) ||
3103 h0
->current_slice
== 0) < 0 &&
3104 (s
->avctx
->err_recognition
& AV_EF_EXPLODE
))
3105 return AVERROR_INVALIDDATA
;
3108 ff_h264_fill_mbaff_ref_list(h
);
3110 if (h
->pps
.weighted_bipred_idc
== 2 && h
->slice_type_nos
== AV_PICTURE_TYPE_B
) {
3111 implicit_weight_table(h
, 0);
3112 implicit_weight_table(h
, 1);
3116 if (h
->slice_type_nos
== AV_PICTURE_TYPE_B
&& !h
->direct_spatial_mv_pred
)
3117 ff_h264_direct_dist_scale_factor(h
);
3118 ff_h264_direct_ref_list_init(h
);
3120 if (h
->slice_type_nos
!= AV_PICTURE_TYPE_I
&& h
->pps
.cabac
) {
3121 tmp
= get_ue_golomb_31(&s
->gb
);
3123 av_log(s
->avctx
, AV_LOG_ERROR
, "cabac_init_idc overflow\n");
3126 h
->cabac_init_idc
= tmp
;
3129 h
->last_qscale_diff
= 0;
3130 tmp
= h
->pps
.init_qp
+ get_se_golomb(&s
->gb
);
3131 if (tmp
> 51 + 6 * (h
->sps
.bit_depth_luma
- 8)) {
3132 av_log(s
->avctx
, AV_LOG_ERROR
, "QP %u out of range\n", tmp
);
3136 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
3137 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
3138 // FIXME qscale / qp ... stuff
3139 if (h
->slice_type
== AV_PICTURE_TYPE_SP
)
3140 get_bits1(&s
->gb
); /* sp_for_switch_flag */
3141 if (h
->slice_type
== AV_PICTURE_TYPE_SP
||
3142 h
->slice_type
== AV_PICTURE_TYPE_SI
)
3143 get_se_golomb(&s
->gb
); /* slice_qs_delta */
3145 h
->deblocking_filter
= 1;
3146 h
->slice_alpha_c0_offset
= 52;
3147 h
->slice_beta_offset
= 52;
3148 if (h
->pps
.deblocking_filter_parameters_present
) {
3149 tmp
= get_ue_golomb_31(&s
->gb
);
3151 av_log(s
->avctx
, AV_LOG_ERROR
,
3152 "deblocking_filter_idc %u out of range\n", tmp
);
3155 h
->deblocking_filter
= tmp
;
3156 if (h
->deblocking_filter
< 2)
3157 h
->deblocking_filter
^= 1; // 1<->0
3159 if (h
->deblocking_filter
) {
3160 h
->slice_alpha_c0_offset
+= get_se_golomb(&s
->gb
) << 1;
3161 h
->slice_beta_offset
+= get_se_golomb(&s
->gb
) << 1;
3162 if (h
->slice_alpha_c0_offset
> 104U ||
3163 h
->slice_beta_offset
> 104U) {
3164 av_log(s
->avctx
, AV_LOG_ERROR
,
3165 "deblocking filter parameters %d %d out of range\n",
3166 h
->slice_alpha_c0_offset
, h
->slice_beta_offset
);
3172 if (s
->avctx
->skip_loop_filter
>= AVDISCARD_ALL
||
3173 (s
->avctx
->skip_loop_filter
>= AVDISCARD_NONKEY
&&
3174 h
->slice_type_nos
!= AV_PICTURE_TYPE_I
) ||
3175 (s
->avctx
->skip_loop_filter
>= AVDISCARD_BIDIR
&&
3176 h
->slice_type_nos
== AV_PICTURE_TYPE_B
) ||
3177 (s
->avctx
->skip_loop_filter
>= AVDISCARD_NONREF
&&
3178 h
->nal_ref_idc
== 0))
3179 h
->deblocking_filter
= 0;
3181 if (h
->deblocking_filter
== 1 && h0
->max_contexts
> 1) {
3182 if (s
->avctx
->flags2
& CODEC_FLAG2_FAST
) {
3183 /* Cheat slightly for speed:
3184 * Do not bother to deblock across slices. */
3185 h
->deblocking_filter
= 2;
3187 h0
->max_contexts
= 1;
3188 if (!h0
->single_decode_warning
) {
3189 av_log(s
->avctx
, AV_LOG_INFO
,
3190 "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3191 h0
->single_decode_warning
= 1;
3194 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3195 "Deblocking switched inside frame.\n");
3200 h
->qp_thresh
= 15 + 52 -
3201 FFMIN(h
->slice_alpha_c0_offset
, h
->slice_beta_offset
) -
3203 h
->pps
.chroma_qp_index_offset
[0],
3204 h
->pps
.chroma_qp_index_offset
[1]) +
3205 6 * (h
->sps
.bit_depth_luma
- 8);
3207 h0
->last_slice_type
= slice_type
;
3208 h
->slice_num
= ++h0
->current_slice
;
3209 if (h
->slice_num
>= MAX_SLICES
) {
3210 av_log(s
->avctx
, AV_LOG_ERROR
,
3211 "Too many slices, increase MAX_SLICES and recompile\n");
3214 for (j
= 0; j
< 2; j
++) {
3216 int *ref2frm
= h
->ref2frm
[h
->slice_num
& (MAX_SLICES
- 1)][j
];
3217 for (i
= 0; i
< 16; i
++) {
3219 if (h
->ref_list
[j
][i
].f
.data
[0]) {
3221 uint8_t *base
= h
->ref_list
[j
][i
].f
.base
[0];
3222 for (k
= 0; k
< h
->short_ref_count
; k
++)
3223 if (h
->short_ref
[k
]->f
.base
[0] == base
) {
3227 for (k
= 0; k
< h
->long_ref_count
; k
++)
3228 if (h
->long_ref
[k
] && h
->long_ref
[k
]->f
.base
[0] == base
) {
3229 id_list
[i
] = h
->short_ref_count
+ k
;
3237 for (i
= 0; i
< 16; i
++)
3238 ref2frm
[i
+ 2] = 4 * id_list
[i
] +
3239 (h
->ref_list
[j
][i
].f
.reference
& 3);
3241 ref2frm
[18 + 1] = -1;
3242 for (i
= 16; i
< 48; i
++)
3243 ref2frm
[i
+ 4] = 4 * id_list
[(i
- 16) >> 1] +
3244 (h
->ref_list
[j
][i
].f
.reference
& 3);
3247 // FIXME: fix draw_edges + PAFF + frame threads
3248 h
->emu_edge_width
= (s
->flags
& CODEC_FLAG_EMU_EDGE
||
3249 (!h
->sps
.frame_mbs_only_flag
&&
3250 s
->avctx
->active_thread_type
))
3252 h
->emu_edge_height
= (FRAME_MBAFF
|| FIELD_PICTURE
) ? 0 : h
->emu_edge_width
;
3254 if (s
->avctx
->debug
& FF_DEBUG_PICT_INFO
) {
3255 av_log(h
->s
.avctx
, AV_LOG_DEBUG
,
3256 "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3258 (s
->picture_structure
== PICT_FRAME
? "F" : s
->picture_structure
== PICT_TOP_FIELD
? "T" : "B"),
3260 av_get_picture_type_char(h
->slice_type
),
3261 h
->slice_type_fixed
? " fix" : "",
3262 h
->nal_unit_type
== NAL_IDR_SLICE
? " IDR" : "",
3263 pps_id
, h
->frame_num
,
3264 s
->current_picture_ptr
->field_poc
[0],
3265 s
->current_picture_ptr
->field_poc
[1],
3266 h
->ref_count
[0], h
->ref_count
[1],
3268 h
->deblocking_filter
,
3269 h
->slice_alpha_c0_offset
/ 2 - 26, h
->slice_beta_offset
/ 2 - 26,
3271 h
->use_weight
== 1 && h
->use_weight_chroma
? "c" : "",
3272 h
->slice_type
== AV_PICTURE_TYPE_B
? (h
->direct_spatial_mv_pred
? "SPAT" : "TEMP") : "");
3278 int ff_h264_get_slice_type(const H264Context
*h
)
3280 switch (h
->slice_type
) {
3281 case AV_PICTURE_TYPE_P
:
3283 case AV_PICTURE_TYPE_B
:
3285 case AV_PICTURE_TYPE_I
:
3287 case AV_PICTURE_TYPE_SP
:
3289 case AV_PICTURE_TYPE_SI
:
3296 static av_always_inline
void fill_filter_caches_inter(H264Context
*h
,
3297 MpegEncContext
*const s
,
3298 int mb_type
, int top_xy
,
3299 int left_xy
[LEFT_MBS
],
3301 int left_type
[LEFT_MBS
],
3302 int mb_xy
, int list
)
3304 int b_stride
= h
->b_stride
;
3305 int16_t(*mv_dst
)[2] = &h
->mv_cache
[list
][scan8
[0]];
3306 int8_t *ref_cache
= &h
->ref_cache
[list
][scan8
[0]];
3307 if (IS_INTER(mb_type
) || IS_DIRECT(mb_type
)) {
3308 if (USES_LIST(top_type
, list
)) {
3309 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3 * b_stride
;
3310 const int b8_xy
= 4 * top_xy
+ 2;
3311 int (*ref2frm
)[64] = h
->ref2frm
[h
->slice_table
[top_xy
] & (MAX_SLICES
- 1)][0] + (MB_MBAFF
? 20 : 2);
3312 AV_COPY128(mv_dst
- 1 * 8, s
->current_picture
.f
.motion_val
[list
][b_xy
+ 0]);
3313 ref_cache
[0 - 1 * 8] =
3314 ref_cache
[1 - 1 * 8] = ref2frm
[list
][s
->current_picture
.f
.ref_index
[list
][b8_xy
+ 0]];
3315 ref_cache
[2 - 1 * 8] =
3316 ref_cache
[3 - 1 * 8] = ref2frm
[list
][s
->current_picture
.f
.ref_index
[list
][b8_xy
+ 1]];
3318 AV_ZERO128(mv_dst
- 1 * 8);
3319 AV_WN32A(&ref_cache
[0 - 1 * 8], ((LIST_NOT_USED
) & 0xFF) * 0x01010101u
);
3322 if (!IS_INTERLACED(mb_type
^ left_type
[LTOP
])) {
3323 if (USES_LIST(left_type
[LTOP
], list
)) {
3324 const int b_xy
= h
->mb2b_xy
[left_xy
[LTOP
]] + 3;
3325 const int b8_xy
= 4 * left_xy
[LTOP
] + 1;
3326 int (*ref2frm
)[64] = h
->ref2frm
[h
->slice_table
[left_xy
[LTOP
]] & (MAX_SLICES
- 1)][0] + (MB_MBAFF
? 20 : 2);
3327 AV_COPY32(mv_dst
- 1 + 0, s
->current_picture
.f
.motion_val
[list
][b_xy
+ b_stride
* 0]);
3328 AV_COPY32(mv_dst
- 1 + 8, s
->current_picture
.f
.motion_val
[list
][b_xy
+ b_stride
* 1]);
3329 AV_COPY32(mv_dst
- 1 + 16, s
->current_picture
.f
.motion_val
[list
][b_xy
+ b_stride
* 2]);
3330 AV_COPY32(mv_dst
- 1 + 24, s
->current_picture
.f
.motion_val
[list
][b_xy
+ b_stride
* 3]);
3332 ref_cache
[-1 + 8] = ref2frm
[list
][s
->current_picture
.f
.ref_index
[list
][b8_xy
+ 2 * 0]];
3333 ref_cache
[-1 + 16] =
3334 ref_cache
[-1 + 24] = ref2frm
[list
][s
->current_picture
.f
.ref_index
[list
][b8_xy
+ 2 * 1]];
3336 AV_ZERO32(mv_dst
- 1 + 0);
3337 AV_ZERO32(mv_dst
- 1 + 8);
3338 AV_ZERO32(mv_dst
- 1 + 16);
3339 AV_ZERO32(mv_dst
- 1 + 24);
3342 ref_cache
[-1 + 16] =
3343 ref_cache
[-1 + 24] = LIST_NOT_USED
;
3348 if (!USES_LIST(mb_type
, list
)) {
3349 fill_rectangle(mv_dst
, 4, 4, 8, pack16to32(0, 0), 4);
3350 AV_WN32A(&ref_cache
[0 * 8], ((LIST_NOT_USED
) & 0xFF) * 0x01010101u
);
3351 AV_WN32A(&ref_cache
[1 * 8], ((LIST_NOT_USED
) & 0xFF) * 0x01010101u
);
3352 AV_WN32A(&ref_cache
[2 * 8], ((LIST_NOT_USED
) & 0xFF) * 0x01010101u
);
3353 AV_WN32A(&ref_cache
[3 * 8], ((LIST_NOT_USED
) & 0xFF) * 0x01010101u
);
3358 int8_t *ref
= &s
->current_picture
.f
.ref_index
[list
][4 * mb_xy
];
3359 int (*ref2frm
)[64] = h
->ref2frm
[h
->slice_num
& (MAX_SLICES
- 1)][0] + (MB_MBAFF
? 20 : 2);
3360 uint32_t ref01
= (pack16to32(ref2frm
[list
][ref
[0]], ref2frm
[list
][ref
[1]]) & 0x00FF00FF) * 0x0101;
3361 uint32_t ref23
= (pack16to32(ref2frm
[list
][ref
[2]], ref2frm
[list
][ref
[3]]) & 0x00FF00FF) * 0x0101;
3362 AV_WN32A(&ref_cache
[0 * 8], ref01
);
3363 AV_WN32A(&ref_cache
[1 * 8], ref01
);
3364 AV_WN32A(&ref_cache
[2 * 8], ref23
);
3365 AV_WN32A(&ref_cache
[3 * 8], ref23
);
3369 int16_t(*mv_src
)[2] = &s
->current_picture
.f
.motion_val
[list
][4 * s
->mb_x
+ 4 * s
->mb_y
* b_stride
];
3370 AV_COPY128(mv_dst
+ 8 * 0, mv_src
+ 0 * b_stride
);
3371 AV_COPY128(mv_dst
+ 8 * 1, mv_src
+ 1 * b_stride
);
3372 AV_COPY128(mv_dst
+ 8 * 2, mv_src
+ 2 * b_stride
);
3373 AV_COPY128(mv_dst
+ 8 * 3, mv_src
+ 3 * b_stride
);
3379 * @return non zero if the loop filter can be skipped
3381 static int fill_filter_caches(H264Context
*h
, int mb_type
)
3383 MpegEncContext
*const s
= &h
->s
;
3384 const int mb_xy
= h
->mb_xy
;
3385 int top_xy
, left_xy
[LEFT_MBS
];
3386 int top_type
, left_type
[LEFT_MBS
];
3390 top_xy
= mb_xy
- (s
->mb_stride
<< MB_FIELD
);
3392 /* Wow, what a mess, why didn't they simplify the interlacing & intra
3393 * stuff, I can't imagine that these complex rules are worth it. */
3395 left_xy
[LBOT
] = left_xy
[LTOP
] = mb_xy
- 1;
3397 const int left_mb_field_flag
= IS_INTERLACED(s
->current_picture
.f
.mb_type
[mb_xy
- 1]);
3398 const int curr_mb_field_flag
= IS_INTERLACED(mb_type
);
3400 if (left_mb_field_flag
!= curr_mb_field_flag
)
3401 left_xy
[LTOP
] -= s
->mb_stride
;
3403 if (curr_mb_field_flag
)
3404 top_xy
+= s
->mb_stride
&
3405 (((s
->current_picture
.f
.mb_type
[top_xy
] >> 7) & 1) - 1);
3406 if (left_mb_field_flag
!= curr_mb_field_flag
)
3407 left_xy
[LBOT
] += s
->mb_stride
;
3411 h
->top_mb_xy
= top_xy
;
3412 h
->left_mb_xy
[LTOP
] = left_xy
[LTOP
];
3413 h
->left_mb_xy
[LBOT
] = left_xy
[LBOT
];
3415 /* For sufficiently low qp, filtering wouldn't do anything.
3416 * This is a conservative estimate: could also check beta_offset
3417 * and more accurate chroma_qp. */
3418 int qp_thresh
= h
->qp_thresh
; // FIXME strictly we should store qp_thresh for each mb of a slice
3419 int qp
= s
->current_picture
.f
.qscale_table
[mb_xy
];
3420 if (qp
<= qp_thresh
&&
3421 (left_xy
[LTOP
] < 0 ||
3422 ((qp
+ s
->current_picture
.f
.qscale_table
[left_xy
[LTOP
]] + 1) >> 1) <= qp_thresh
) &&
3424 ((qp
+ s
->current_picture
.f
.qscale_table
[top_xy
] + 1) >> 1) <= qp_thresh
)) {
3427 if ((left_xy
[LTOP
] < 0 ||
3428 ((qp
+ s
->current_picture
.f
.qscale_table
[left_xy
[LBOT
]] + 1) >> 1) <= qp_thresh
) &&
3429 (top_xy
< s
->mb_stride
||
3430 ((qp
+ s
->current_picture
.f
.qscale_table
[top_xy
- s
->mb_stride
] + 1) >> 1) <= qp_thresh
))
3435 top_type
= s
->current_picture
.f
.mb_type
[top_xy
];
3436 left_type
[LTOP
] = s
->current_picture
.f
.mb_type
[left_xy
[LTOP
]];
3437 left_type
[LBOT
] = s
->current_picture
.f
.mb_type
[left_xy
[LBOT
]];
3438 if (h
->deblocking_filter
== 2) {
3439 if (h
->slice_table
[top_xy
] != h
->slice_num
)
3441 if (h
->slice_table
[left_xy
[LBOT
]] != h
->slice_num
)
3442 left_type
[LTOP
] = left_type
[LBOT
] = 0;
3444 if (h
->slice_table
[top_xy
] == 0xFFFF)
3446 if (h
->slice_table
[left_xy
[LBOT
]] == 0xFFFF)
3447 left_type
[LTOP
] = left_type
[LBOT
] = 0;
3449 h
->top_type
= top_type
;
3450 h
->left_type
[LTOP
] = left_type
[LTOP
];
3451 h
->left_type
[LBOT
] = left_type
[LBOT
];
3453 if (IS_INTRA(mb_type
))
3456 fill_filter_caches_inter(h
, s
, mb_type
, top_xy
, left_xy
,
3457 top_type
, left_type
, mb_xy
, 0);
3458 if (h
->list_count
== 2)
3459 fill_filter_caches_inter(h
, s
, mb_type
, top_xy
, left_xy
,
3460 top_type
, left_type
, mb_xy
, 1);
3462 nnz
= h
->non_zero_count
[mb_xy
];
3463 nnz_cache
= h
->non_zero_count_cache
;
3464 AV_COPY32(&nnz_cache
[4 + 8 * 1], &nnz
[0]);
3465 AV_COPY32(&nnz_cache
[4 + 8 * 2], &nnz
[4]);
3466 AV_COPY32(&nnz_cache
[4 + 8 * 3], &nnz
[8]);
3467 AV_COPY32(&nnz_cache
[4 + 8 * 4], &nnz
[12]);
3468 h
->cbp
= h
->cbp_table
[mb_xy
];
3471 nnz
= h
->non_zero_count
[top_xy
];
3472 AV_COPY32(&nnz_cache
[4 + 8 * 0], &nnz
[3 * 4]);
3475 if (left_type
[LTOP
]) {
3476 nnz
= h
->non_zero_count
[left_xy
[LTOP
]];
3477 nnz_cache
[3 + 8 * 1] = nnz
[3 + 0 * 4];
3478 nnz_cache
[3 + 8 * 2] = nnz
[3 + 1 * 4];
3479 nnz_cache
[3 + 8 * 3] = nnz
[3 + 2 * 4];
3480 nnz_cache
[3 + 8 * 4] = nnz
[3 + 3 * 4];
3483 /* CAVLC 8x8dct requires NNZ values for residual decoding that differ
3484 * from what the loop filter needs */
3485 if (!CABAC
&& h
->pps
.transform_8x8_mode
) {
3486 if (IS_8x8DCT(top_type
)) {
3487 nnz_cache
[4 + 8 * 0] =
3488 nnz_cache
[5 + 8 * 0] = (h
->cbp_table
[top_xy
] & 0x4000) >> 12;
3489 nnz_cache
[6 + 8 * 0] =
3490 nnz_cache
[7 + 8 * 0] = (h
->cbp_table
[top_xy
] & 0x8000) >> 12;
3492 if (IS_8x8DCT(left_type
[LTOP
])) {
3493 nnz_cache
[3 + 8 * 1] =
3494 nnz_cache
[3 + 8 * 2] = (h
->cbp_table
[left_xy
[LTOP
]] & 0x2000) >> 12; // FIXME check MBAFF
3496 if (IS_8x8DCT(left_type
[LBOT
])) {
3497 nnz_cache
[3 + 8 * 3] =
3498 nnz_cache
[3 + 8 * 4] = (h
->cbp_table
[left_xy
[LBOT
]] & 0x8000) >> 12; // FIXME check MBAFF
3501 if (IS_8x8DCT(mb_type
)) {
3502 nnz_cache
[scan8
[0]] =
3503 nnz_cache
[scan8
[1]] =
3504 nnz_cache
[scan8
[2]] =
3505 nnz_cache
[scan8
[3]] = (h
->cbp
& 0x1000) >> 12;
3507 nnz_cache
[scan8
[0 + 4]] =
3508 nnz_cache
[scan8
[1 + 4]] =
3509 nnz_cache
[scan8
[2 + 4]] =
3510 nnz_cache
[scan8
[3 + 4]] = (h
->cbp
& 0x2000) >> 12;
3512 nnz_cache
[scan8
[0 + 8]] =
3513 nnz_cache
[scan8
[1 + 8]] =
3514 nnz_cache
[scan8
[2 + 8]] =
3515 nnz_cache
[scan8
[3 + 8]] = (h
->cbp
& 0x4000) >> 12;
3517 nnz_cache
[scan8
[0 + 12]] =
3518 nnz_cache
[scan8
[1 + 12]] =
3519 nnz_cache
[scan8
[2 + 12]] =
3520 nnz_cache
[scan8
[3 + 12]] = (h
->cbp
& 0x8000) >> 12;
3527 static void loop_filter(H264Context
*h
, int start_x
, int end_x
)
3529 MpegEncContext
*const s
= &h
->s
;
3530 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
3531 int linesize
, uvlinesize
, mb_x
, mb_y
;
3532 const int end_mb_y
= s
->mb_y
+ FRAME_MBAFF
;
3533 const int old_slice_type
= h
->slice_type
;
3534 const int pixel_shift
= h
->pixel_shift
;
3535 const int block_h
= 16 >> s
->chroma_y_shift
;
3537 if (h
->deblocking_filter
) {
3538 for (mb_x
= start_x
; mb_x
< end_x
; mb_x
++)
3539 for (mb_y
= end_mb_y
- FRAME_MBAFF
; mb_y
<= end_mb_y
; mb_y
++) {
3541 mb_xy
= h
->mb_xy
= mb_x
+ mb_y
* s
->mb_stride
;
3542 h
->slice_num
= h
->slice_table
[mb_xy
];
3543 mb_type
= s
->current_picture
.f
.mb_type
[mb_xy
];
3544 h
->list_count
= h
->list_counts
[mb_xy
];
3548 h
->mb_field_decoding_flag
= !!IS_INTERLACED(mb_type
);
3552 dest_y
= s
->current_picture
.f
.data
[0] +
3553 ((mb_x
<< pixel_shift
) + mb_y
* s
->linesize
) * 16;
3554 dest_cb
= s
->current_picture
.f
.data
[1] +
3555 (mb_x
<< pixel_shift
) * (8 << CHROMA444
) +
3556 mb_y
* s
->uvlinesize
* block_h
;
3557 dest_cr
= s
->current_picture
.f
.data
[2] +
3558 (mb_x
<< pixel_shift
) * (8 << CHROMA444
) +
3559 mb_y
* s
->uvlinesize
* block_h
;
3560 // FIXME simplify above
3563 linesize
= h
->mb_linesize
= s
->linesize
* 2;
3564 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
* 2;
3565 if (mb_y
& 1) { // FIXME move out of this function?
3566 dest_y
-= s
->linesize
* 15;
3567 dest_cb
-= s
->uvlinesize
* (block_h
- 1);
3568 dest_cr
-= s
->uvlinesize
* (block_h
- 1);
3571 linesize
= h
->mb_linesize
= s
->linesize
;
3572 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
;
3574 backup_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
,
3576 if (fill_filter_caches(h
, mb_type
))
3578 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->current_picture
.f
.qscale_table
[mb_xy
]);
3579 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->current_picture
.f
.qscale_table
[mb_xy
]);
3582 ff_h264_filter_mb(h
, mb_x
, mb_y
, dest_y
, dest_cb
, dest_cr
,
3583 linesize
, uvlinesize
);
3585 ff_h264_filter_mb_fast(h
, mb_x
, mb_y
, dest_y
, dest_cb
,
3586 dest_cr
, linesize
, uvlinesize
);
3590 h
->slice_type
= old_slice_type
;
3592 s
->mb_y
= end_mb_y
- FRAME_MBAFF
;
3593 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
3594 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
3597 static void predict_field_decoding_flag(H264Context
*h
)
3599 MpegEncContext
*const s
= &h
->s
;
3600 const int mb_xy
= s
->mb_x
+ s
->mb_y
* s
->mb_stride
;
3601 int mb_type
= (h
->slice_table
[mb_xy
- 1] == h
->slice_num
) ?
3602 s
->current_picture
.f
.mb_type
[mb_xy
- 1] :
3603 (h
->slice_table
[mb_xy
- s
->mb_stride
] == h
->slice_num
) ?
3604 s
->current_picture
.f
.mb_type
[mb_xy
- s
->mb_stride
] : 0;
3605 h
->mb_mbaff
= h
->mb_field_decoding_flag
= IS_INTERLACED(mb_type
) ? 1 : 0;
3609 * Draw edges and report progress for the last MB row.
3611 static void decode_finish_row(H264Context
*h
)
3613 MpegEncContext
*const s
= &h
->s
;
3614 int top
= 16 * (s
->mb_y
>> FIELD_PICTURE
);
3615 int pic_height
= 16 * s
->mb_height
>> FIELD_PICTURE
;
3616 int height
= 16 << FRAME_MBAFF
;
3617 int deblock_border
= (16 + 4) << FRAME_MBAFF
;
3619 if (h
->deblocking_filter
) {
3620 if ((top
+ height
) >= pic_height
)
3621 height
+= deblock_border
;
3622 top
-= deblock_border
;
3625 if (top
>= pic_height
|| (top
+ height
) < h
->emu_edge_height
)
3628 height
= FFMIN(height
, pic_height
- top
);
3629 if (top
< h
->emu_edge_height
) {
3630 height
= top
+ height
;
3634 ff_draw_horiz_band(s
, top
, height
);
3639 ff_thread_report_progress(&s
->current_picture_ptr
->f
, top
+ height
- 1,
3640 s
->picture_structure
== PICT_BOTTOM_FIELD
);
3643 static int decode_slice(struct AVCodecContext
*avctx
, void *arg
)
3645 H264Context
*h
= *(void **)arg
;
3646 MpegEncContext
*const s
= &h
->s
;
3647 const int part_mask
= s
->partitioned_frame
? (ER_AC_END
| ER_AC_ERROR
)
3649 int lf_x_start
= s
->mb_x
;
3651 s
->mb_skip_run
= -1;
3653 h
->is_complex
= FRAME_MBAFF
|| s
->picture_structure
!= PICT_FRAME
||
3654 s
->codec_id
!= AV_CODEC_ID_H264
||
3655 (CONFIG_GRAY
&& (s
->flags
& CODEC_FLAG_GRAY
));
3659 align_get_bits(&s
->gb
);
3662 ff_init_cabac_states(&h
->cabac
);
3663 ff_init_cabac_decoder(&h
->cabac
,
3664 s
->gb
.buffer
+ get_bits_count(&s
->gb
) / 8,
3665 (get_bits_left(&s
->gb
) + 7) / 8);
3667 ff_h264_init_cabac_states(h
);
3671 int ret
= ff_h264_decode_mb_cabac(h
);
3673 // STOP_TIMER("decode_mb_cabac")
3676 ff_h264_hl_decode_mb(h
);
3678 // FIXME optimal? or let mb_decode decode 16x32 ?
3679 if (ret
>= 0 && FRAME_MBAFF
) {
3682 ret
= ff_h264_decode_mb_cabac(h
);
3685 ff_h264_hl_decode_mb(h
);
3688 eos
= get_cabac_terminate(&h
->cabac
);
3690 if ((s
->workaround_bugs
& FF_BUG_TRUNCATED
) &&
3691 h
->cabac
.bytestream
> h
->cabac
.bytestream_end
+ 2) {
3692 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
- 1,
3693 s
->mb_y
, ER_MB_END
& part_mask
);
3694 if (s
->mb_x
>= lf_x_start
)
3695 loop_filter(h
, lf_x_start
, s
->mb_x
+ 1);
3698 if (ret
< 0 || h
->cabac
.bytestream
> h
->cabac
.bytestream_end
+ 2) {
3699 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3700 "error while decoding MB %d %d, bytestream (%td)\n",
3702 h
->cabac
.bytestream_end
- h
->cabac
.bytestream
);
3703 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
,
3704 s
->mb_y
, ER_MB_ERROR
& part_mask
);
3708 if (++s
->mb_x
>= s
->mb_width
) {
3709 loop_filter(h
, lf_x_start
, s
->mb_x
);
3710 s
->mb_x
= lf_x_start
= 0;
3711 decode_finish_row(h
);
3713 if (FIELD_OR_MBAFF_PICTURE
) {
3715 if (FRAME_MBAFF
&& s
->mb_y
< s
->mb_height
)
3716 predict_field_decoding_flag(h
);
3720 if (eos
|| s
->mb_y
>= s
->mb_height
) {
3721 tprintf(s
->avctx
, "slice end %d %d\n",
3722 get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
3723 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
- 1,
3724 s
->mb_y
, ER_MB_END
& part_mask
);
3725 if (s
->mb_x
> lf_x_start
)
3726 loop_filter(h
, lf_x_start
, s
->mb_x
);
3732 int ret
= ff_h264_decode_mb_cavlc(h
);
3735 ff_h264_hl_decode_mb(h
);
3737 // FIXME optimal? or let mb_decode decode 16x32 ?
3738 if (ret
>= 0 && FRAME_MBAFF
) {
3740 ret
= ff_h264_decode_mb_cavlc(h
);
3743 ff_h264_hl_decode_mb(h
);
3748 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3749 "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
3750 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
,
3751 s
->mb_y
, ER_MB_ERROR
& part_mask
);
3755 if (++s
->mb_x
>= s
->mb_width
) {
3756 loop_filter(h
, lf_x_start
, s
->mb_x
);
3757 s
->mb_x
= lf_x_start
= 0;
3758 decode_finish_row(h
);
3760 if (FIELD_OR_MBAFF_PICTURE
) {
3762 if (FRAME_MBAFF
&& s
->mb_y
< s
->mb_height
)
3763 predict_field_decoding_flag(h
);
3765 if (s
->mb_y
>= s
->mb_height
) {
3766 tprintf(s
->avctx
, "slice end %d %d\n",
3767 get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
3769 if (get_bits_left(&s
->gb
) == 0) {
3770 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
,
3771 s
->mb_x
- 1, s
->mb_y
,
3772 ER_MB_END
& part_mask
);
3776 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
,
3777 s
->mb_x
- 1, s
->mb_y
,
3778 ER_MB_END
& part_mask
);
3785 if (get_bits_left(&s
->gb
) <= 0 && s
->mb_skip_run
<= 0) {
3786 tprintf(s
->avctx
, "slice end %d %d\n",
3787 get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
3788 if (get_bits_left(&s
->gb
) == 0) {
3789 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
,
3790 s
->mb_x
- 1, s
->mb_y
,
3791 ER_MB_END
& part_mask
);
3792 if (s
->mb_x
> lf_x_start
)
3793 loop_filter(h
, lf_x_start
, s
->mb_x
);
3797 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
,
3798 s
->mb_y
, ER_MB_ERROR
& part_mask
);
3808 * Call decode_slice() for each context.
3810 * @param h h264 master context
3811 * @param context_count number of contexts to execute
3813 static int execute_decode_slices(H264Context
*h
, int context_count
)
3815 MpegEncContext
*const s
= &h
->s
;
3816 AVCodecContext
*const avctx
= s
->avctx
;
3820 if (s
->avctx
->hwaccel
||
3821 s
->avctx
->codec
->capabilities
& CODEC_CAP_HWACCEL_VDPAU
)
3823 if (context_count
== 1) {
3824 return decode_slice(avctx
, &h
);
3826 for (i
= 1; i
< context_count
; i
++) {
3827 hx
= h
->thread_context
[i
];
3828 hx
->s
.err_recognition
= avctx
->err_recognition
;
3829 hx
->s
.error_count
= 0;
3832 avctx
->execute(avctx
, decode_slice
, h
->thread_context
,
3833 NULL
, context_count
, sizeof(void *));
3835 /* pull back stuff from slices to master context */
3836 hx
= h
->thread_context
[context_count
- 1];
3837 s
->mb_x
= hx
->s
.mb_x
;
3838 s
->mb_y
= hx
->s
.mb_y
;
3839 s
->droppable
= hx
->s
.droppable
;
3840 s
->picture_structure
= hx
->s
.picture_structure
;
3841 for (i
= 1; i
< context_count
; i
++)
3842 h
->s
.error_count
+= h
->thread_context
[i
]->s
.error_count
;
3848 static int decode_nal_units(H264Context
*h
, const uint8_t *buf
, int buf_size
,
3849 int parse_extradata
)
3851 MpegEncContext
*const s
= &h
->s
;
3852 AVCodecContext
*const avctx
= s
->avctx
;
3853 H264Context
*hx
; ///< thread context
3857 int pass
= !(avctx
->active_thread_type
& FF_THREAD_FRAME
);
3858 int nals_needed
= 0; ///< number of NALs that need decoding before the next frame thread starts
3861 h
->max_contexts
= s
->slice_context_count
;
3862 if (!(s
->flags2
& CODEC_FLAG2_CHUNKS
)) {
3863 h
->current_slice
= 0;
3864 if (!s
->first_field
)
3865 s
->current_picture_ptr
= NULL
;
3866 ff_h264_reset_sei(h
);
3869 for (; pass
<= 1; pass
++) {
3872 next_avc
= h
->is_avc
? 0 : buf_size
;
3882 if (buf_index
>= next_avc
) {
3883 if (buf_index
>= buf_size
- h
->nal_length_size
)
3886 for (i
= 0; i
< h
->nal_length_size
; i
++)
3887 nalsize
= (nalsize
<< 8) | buf
[buf_index
++];
3888 if (nalsize
<= 0 || nalsize
> buf_size
- buf_index
) {
3889 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3890 "AVC: nal size %d\n", nalsize
);
3893 next_avc
= buf_index
+ nalsize
;
3895 // start code prefix search
3896 for (; buf_index
+ 3 < next_avc
; buf_index
++)
3897 // This should always succeed in the first iteration.
3898 if (buf
[buf_index
] == 0 &&
3899 buf
[buf_index
+ 1] == 0 &&
3900 buf
[buf_index
+ 2] == 1)
3903 if (buf_index
+ 3 >= buf_size
) {
3904 buf_index
= buf_size
;
3909 if (buf_index
>= next_avc
)
3913 hx
= h
->thread_context
[context_count
];
3915 ptr
= ff_h264_decode_nal(hx
, buf
+ buf_index
, &dst_length
,
3916 &consumed
, next_avc
- buf_index
);
3917 if (ptr
== NULL
|| dst_length
< 0) {
3921 i
= buf_index
+ consumed
;
3922 if ((s
->workaround_bugs
& FF_BUG_AUTODETECT
) && i
+ 3 < next_avc
&&
3923 buf
[i
] == 0x00 && buf
[i
+ 1] == 0x00 &&
3924 buf
[i
+ 2] == 0x01 && buf
[i
+ 3] == 0xE0)
3925 s
->workaround_bugs
|= FF_BUG_TRUNCATED
;
3927 if (!(s
->workaround_bugs
& FF_BUG_TRUNCATED
))
3928 while (ptr
[dst_length
- 1] == 0 && dst_length
> 0)
3930 bit_length
= !dst_length
? 0
3932 decode_rbsp_trailing(h
, ptr
+ dst_length
- 1));
3934 if (s
->avctx
->debug
& FF_DEBUG_STARTCODE
)
3935 av_log(h
->s
.avctx
, AV_LOG_DEBUG
,
3936 "NAL %d at %d/%d length %d\n",
3937 hx
->nal_unit_type
, buf_index
, buf_size
, dst_length
);
3939 if (h
->is_avc
&& (nalsize
!= consumed
) && nalsize
)
3940 av_log(h
->s
.avctx
, AV_LOG_DEBUG
,
3941 "AVC: Consumed only %d bytes instead of %d\n",
3944 buf_index
+= consumed
;
3948 /* packets can sometimes contain multiple PPS/SPS,
3949 * e.g. two PAFF field pictures in one packet, or a demuxer
3950 * which splits NALs strangely if so, when frame threading we
3951 * can't start the next thread until we've read all of them */
3952 switch (hx
->nal_unit_type
) {
3955 nals_needed
= nal_index
;
3960 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
3961 if (!get_ue_golomb(&hx
->s
.gb
))
3962 nals_needed
= nal_index
;
3967 // FIXME do not discard SEI id
3968 if (avctx
->skip_frame
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0)
3972 /* Ignore every NAL unit type except PPS and SPS during extradata
3973 * parsing. Decoding slices is not possible in codec init
3975 if (parse_extradata
&& HAVE_THREADS
&&
3976 (s
->avctx
->active_thread_type
& FF_THREAD_FRAME
) &&
3977 (hx
->nal_unit_type
!= NAL_PPS
&&
3978 hx
->nal_unit_type
!= NAL_SPS
)) {
3979 av_log(avctx
, AV_LOG_INFO
, "Ignoring NAL unit %d during "
3980 "extradata parsing\n", hx
->nal_unit_type
);
3981 hx
->nal_unit_type
= NAL_FF_IGNORE
;
3984 switch (hx
->nal_unit_type
) {
3986 if (h
->nal_unit_type
!= NAL_IDR_SLICE
) {
3987 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3988 "Invalid mix of idr and non-idr slices\n");
3992 idr(h
); // FIXME ensure we don't lose some frames if there is reordering
3994 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
3996 hx
->inter_gb_ptr
= &hx
->s
.gb
;
3997 hx
->s
.data_partitioning
= 0;
3999 if ((err
= decode_slice_header(hx
, h
)))
4002 s
->current_picture_ptr
->f
.key_frame
|=
4003 (hx
->nal_unit_type
== NAL_IDR_SLICE
) ||
4004 (h
->sei_recovery_frame_cnt
>= 0);
4006 if (h
->current_slice
== 1) {
4007 if (!(s
->flags2
& CODEC_FLAG2_CHUNKS
))
4008 decode_postinit(h
, nal_index
>= nals_needed
);
4010 if (s
->avctx
->hwaccel
&&
4011 s
->avctx
->hwaccel
->start_frame(s
->avctx
, NULL
, 0) < 0)
4013 if (CONFIG_H264_VDPAU_DECODER
&&
4014 s
->avctx
->codec
->capabilities
& CODEC_CAP_HWACCEL_VDPAU
)
4015 ff_vdpau_h264_picture_start(s
);
4018 if (hx
->redundant_pic_count
== 0 &&
4019 (avctx
->skip_frame
< AVDISCARD_NONREF
||
4021 (avctx
->skip_frame
< AVDISCARD_BIDIR
||
4022 hx
->slice_type_nos
!= AV_PICTURE_TYPE_B
) &&
4023 (avctx
->skip_frame
< AVDISCARD_NONKEY
||
4024 hx
->slice_type_nos
== AV_PICTURE_TYPE_I
) &&
4025 avctx
->skip_frame
< AVDISCARD_ALL
) {
4026 if (avctx
->hwaccel
) {
4027 if (avctx
->hwaccel
->decode_slice(avctx
,
4028 &buf
[buf_index
- consumed
],
4031 } else if (CONFIG_H264_VDPAU_DECODER
&&
4032 s
->avctx
->codec
->capabilities
& CODEC_CAP_HWACCEL_VDPAU
) {
4033 static const uint8_t start_code
[] = {
4035 ff_vdpau_add_data_chunk(s
, start_code
,
4036 sizeof(start_code
));
4037 ff_vdpau_add_data_chunk(s
, &buf
[buf_index
- consumed
],
4044 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
4046 hx
->inter_gb_ptr
= NULL
;
4048 if ((err
= decode_slice_header(hx
, h
)) < 0)
4051 hx
->s
.data_partitioning
= 1;
4054 init_get_bits(&hx
->intra_gb
, ptr
, bit_length
);
4055 hx
->intra_gb_ptr
= &hx
->intra_gb
;
4058 init_get_bits(&hx
->inter_gb
, ptr
, bit_length
);
4059 hx
->inter_gb_ptr
= &hx
->inter_gb
;
4061 if (hx
->redundant_pic_count
== 0 &&
4063 hx
->s
.data_partitioning
&&
4064 s
->current_picture_ptr
&&
4065 s
->context_initialized
&&
4066 (avctx
->skip_frame
< AVDISCARD_NONREF
|| hx
->nal_ref_idc
) &&
4067 (avctx
->skip_frame
< AVDISCARD_BIDIR
||
4068 hx
->slice_type_nos
!= AV_PICTURE_TYPE_B
) &&
4069 (avctx
->skip_frame
< AVDISCARD_NONKEY
||
4070 hx
->slice_type_nos
== AV_PICTURE_TYPE_I
) &&
4071 avctx
->skip_frame
< AVDISCARD_ALL
)
4075 init_get_bits(&s
->gb
, ptr
, bit_length
);
4076 ff_h264_decode_sei(h
);
4079 init_get_bits(&s
->gb
, ptr
, bit_length
);
4080 if (ff_h264_decode_seq_parameter_set(h
) < 0 &&
4081 h
->is_avc
&& (nalsize
!= consumed
) && nalsize
) {
4082 av_log(h
->s
.avctx
, AV_LOG_DEBUG
,
4083 "SPS decoding failure, trying again with the complete NAL\n");
4084 init_get_bits(&s
->gb
, buf
+ buf_index
+ 1 - consumed
,
4086 ff_h264_decode_seq_parameter_set(h
);
4089 if (h264_set_parameter_from_sps(h
) < 0) {
4095 init_get_bits(&s
->gb
, ptr
, bit_length
);
4096 ff_h264_decode_picture_parameter_set(h
, bit_length
);
4099 case NAL_END_SEQUENCE
:
4100 case NAL_END_STREAM
:
4101 case NAL_FILLER_DATA
:
4103 case NAL_AUXILIARY_SLICE
:
4108 av_log(avctx
, AV_LOG_DEBUG
, "Unknown NAL code: %d (%d bits)\n",
4109 hx
->nal_unit_type
, bit_length
);
4112 if (context_count
== h
->max_contexts
) {
4113 execute_decode_slices(h
, context_count
);
4118 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "decode_slice_header error\n");
4119 else if (err
== 1) {
4120 /* Slice could not be decoded in parallel mode, copy down
4121 * NAL unit stuff to context 0 and restart. Note that
4122 * rbsp_buffer is not transferred, but since we no longer
4123 * run in parallel mode this should not be an issue. */
4124 h
->nal_unit_type
= hx
->nal_unit_type
;
4125 h
->nal_ref_idc
= hx
->nal_ref_idc
;
4132 execute_decode_slices(h
, context_count
);
4136 if (s
->current_picture_ptr
&& s
->current_picture_ptr
->owner2
== s
&&
4138 ff_thread_report_progress(&s
->current_picture_ptr
->f
, INT_MAX
,
4139 s
->picture_structure
== PICT_BOTTOM_FIELD
);
4146 * Return the number of bytes consumed for building the current frame.
4148 static int get_consumed_bytes(MpegEncContext
*s
, int pos
, int buf_size
)
4151 pos
= 1; // avoid infinite loops (i doubt that is needed but ...)
4152 if (pos
+ 10 > buf_size
)
4153 pos
= buf_size
; // oops ;)
4158 static int decode_frame(AVCodecContext
*avctx
, void *data
,
4159 int *got_frame
, AVPacket
*avpkt
)
4161 const uint8_t *buf
= avpkt
->data
;
4162 int buf_size
= avpkt
->size
;
4163 H264Context
*h
= avctx
->priv_data
;
4164 MpegEncContext
*s
= &h
->s
;
4165 AVFrame
*pict
= data
;
4168 s
->flags
= avctx
->flags
;
4169 s
->flags2
= avctx
->flags2
;
4171 /* end of stream, output what is still in the buffers */
4173 if (buf_size
== 0) {
4177 s
->current_picture_ptr
= NULL
;
4179 // FIXME factorize this with the output code below
4180 out
= h
->delayed_pic
[0];
4183 h
->delayed_pic
[i
] &&
4184 !h
->delayed_pic
[i
]->f
.key_frame
&&
4185 !h
->delayed_pic
[i
]->mmco_reset
;
4187 if (h
->delayed_pic
[i
]->poc
< out
->poc
) {
4188 out
= h
->delayed_pic
[i
];
4192 for (i
= out_idx
; h
->delayed_pic
[i
]; i
++)
4193 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+ 1];
4203 buf_index
= decode_nal_units(h
, buf
, buf_size
, 0);
4207 if (!s
->current_picture_ptr
&& h
->nal_unit_type
== NAL_END_SEQUENCE
) {
4212 if (!(s
->flags2
& CODEC_FLAG2_CHUNKS
) && !s
->current_picture_ptr
) {
4213 if (avctx
->skip_frame
>= AVDISCARD_NONREF
)
4215 av_log(avctx
, AV_LOG_ERROR
, "no frame!\n");
4219 if (!(s
->flags2
& CODEC_FLAG2_CHUNKS
) ||
4220 (s
->mb_y
>= s
->mb_height
&& s
->mb_height
)) {
4221 if (s
->flags2
& CODEC_FLAG2_CHUNKS
)
4222 decode_postinit(h
, 1);
4225 h
->context_reinitialized
= 0;
4227 if (!h
->next_output_pic
) {
4228 /* Wait for second field. */
4232 *pict
= h
->next_output_pic
->f
;
4236 assert(pict
->data
[0] || !*got_frame
);
4237 ff_print_debug_info(s
, pict
);
4239 return get_consumed_bytes(s
, buf_index
, buf_size
);
4242 av_cold
void ff_h264_free_context(H264Context
*h
)
4246 free_tables(h
, 1); // FIXME cleanup init stuff perhaps
4248 for (i
= 0; i
< MAX_SPS_COUNT
; i
++)
4249 av_freep(h
->sps_buffers
+ i
);
4251 for (i
= 0; i
< MAX_PPS_COUNT
; i
++)
4252 av_freep(h
->pps_buffers
+ i
);
4255 static av_cold
int h264_decode_end(AVCodecContext
*avctx
)
4257 H264Context
*h
= avctx
->priv_data
;
4258 MpegEncContext
*s
= &h
->s
;
4260 ff_h264_free_context(h
);
4262 ff_MPV_common_end(s
);
4264 // memset(h, 0, sizeof(H264Context));
4269 static const AVProfile profiles
[] = {
4270 { FF_PROFILE_H264_BASELINE
, "Baseline" },
4271 { FF_PROFILE_H264_CONSTRAINED_BASELINE
, "Constrained Baseline" },
4272 { FF_PROFILE_H264_MAIN
, "Main" },
4273 { FF_PROFILE_H264_EXTENDED
, "Extended" },
4274 { FF_PROFILE_H264_HIGH
, "High" },
4275 { FF_PROFILE_H264_HIGH_10
, "High 10" },
4276 { FF_PROFILE_H264_HIGH_10_INTRA
, "High 10 Intra" },
4277 { FF_PROFILE_H264_HIGH_422
, "High 4:2:2" },
4278 { FF_PROFILE_H264_HIGH_422_INTRA
, "High 4:2:2 Intra" },
4279 { FF_PROFILE_H264_HIGH_444
, "High 4:4:4" },
4280 { FF_PROFILE_H264_HIGH_444_PREDICTIVE
, "High 4:4:4 Predictive" },
4281 { FF_PROFILE_H264_HIGH_444_INTRA
, "High 4:4:4 Intra" },
4282 { FF_PROFILE_H264_CAVLC_444
, "CAVLC 4:4:4" },
4283 { FF_PROFILE_UNKNOWN
},
4286 AVCodec ff_h264_decoder
= {
4288 .type
= AVMEDIA_TYPE_VIDEO
,
4289 .id
= AV_CODEC_ID_H264
,
4290 .priv_data_size
= sizeof(H264Context
),
4291 .init
= ff_h264_decode_init
,
4292 .close
= h264_decode_end
,
4293 .decode
= decode_frame
,
4294 .capabilities
= /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1
|
4295 CODEC_CAP_DELAY
| CODEC_CAP_SLICE_THREADS
|
4296 CODEC_CAP_FRAME_THREADS
,
4298 .long_name
= NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
4299 .init_thread_copy
= ONLY_IF_THREADS_ENABLED(decode_init_thread_copy
),
4300 .update_thread_context
= ONLY_IF_THREADS_ENABLED(decode_update_thread_context
),
4301 .profiles
= NULL_IF_CONFIG_SMALL(profiles
),
4304 #if CONFIG_H264_VDPAU_DECODER
4305 AVCodec ff_h264_vdpau_decoder
= {
4306 .name
= "h264_vdpau",
4307 .type
= AVMEDIA_TYPE_VIDEO
,
4308 .id
= AV_CODEC_ID_H264
,
4309 .priv_data_size
= sizeof(H264Context
),
4310 .init
= ff_h264_decode_init
,
4311 .close
= h264_decode_end
,
4312 .decode
= decode_frame
,
4313 .capabilities
= CODEC_CAP_DR1
| CODEC_CAP_DELAY
| CODEC_CAP_HWACCEL_VDPAU
,
4315 .long_name
= NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
4316 .pix_fmts
= (const enum AVPixelFormat
[]) { AV_PIX_FMT_VDPAU_H264
,
4318 .profiles
= NULL_IF_CONFIG_SMALL(profiles
),