2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "mpegvideo.h"
34 #include "h264_parser.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
42 #include "x86/h264_i386.h"
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc
[4];
55 static VLC_TYPE coeff_token_vlc_tables
[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size
[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc
;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table
[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size
= 256;
62 static VLC total_zeros_vlc
[15];
63 static VLC_TYPE total_zeros_vlc_tables
[15][512][2];
64 static const int total_zeros_vlc_tables_size
= 512;
66 static VLC chroma_dc_total_zeros_vlc
[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables
[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size
= 8;
70 static VLC run_vlc
[6];
71 static VLC_TYPE run_vlc_tables
[6][8][2];
72 static const int run_vlc_tables_size
= 8;
75 static VLC_TYPE run7_vlc_table
[96][2];
76 static const int run7_vlc_table_size
= 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
);
79 static void svq3_add_idct_c(uint8_t *dst
, DCTELEM
*block
, int stride
, int qp
, int dc
);
80 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
81 static void filter_mb_fast( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
82 static Picture
* remove_long(H264Context
*h
, int i
, int ref_mask
);
84 static av_always_inline
uint32_t pack16to32(int a
, int b
){
85 #ifdef WORDS_BIGENDIAN
86 return (b
&0xFFFF) + (a
<<16);
88 return (a
&0xFFFF) + (b
<<16);
92 static const uint8_t rem6
[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6
[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options
[4][8]={
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab
[7][1<<LEVEL_TAB_BITS
][2];
110 static void fill_caches(H264Context
*h
, int mb_type
, int for_deblock
){
111 MpegEncContext
* const s
= &h
->s
;
112 const int mb_xy
= h
->mb_xy
;
113 int topleft_xy
, top_xy
, topright_xy
, left_xy
[2];
114 int topleft_type
, top_type
, topright_type
, left_type
[2];
115 const uint8_t * left_block
;
116 int topleft_partition
= -1;
119 top_xy
= mb_xy
- (s
->mb_stride
<< FIELD_PICTURE
);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock
&& (h
->slice_num
== 1 || h
->slice_table
[mb_xy
] == h
->slice_table
[top_xy
]) && !FRAME_MBAFF
)
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy
= top_xy
- 1;
129 topright_xy
= top_xy
+ 1;
130 left_xy
[1] = left_xy
[0] = mb_xy
-1;
131 left_block
= left_block_options
[0];
133 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
134 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
135 const int topleft_pair_xy
= top_pair_xy
- 1;
136 const int topright_pair_xy
= top_pair_xy
+ 1;
137 const int topleft_mb_field_flag
= IS_INTERLACED(s
->current_picture
.mb_type
[topleft_pair_xy
]);
138 const int top_mb_field_flag
= IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
139 const int topright_mb_field_flag
= IS_INTERLACED(s
->current_picture
.mb_type
[topright_pair_xy
]);
140 const int left_mb_field_flag
= IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
141 const int curr_mb_field_flag
= IS_INTERLACED(mb_type
);
142 const int bottom
= (s
->mb_y
& 1);
143 tprintf(s
->avctx
, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag
, left_mb_field_flag
, topleft_mb_field_flag
, top_mb_field_flag
, topright_mb_field_flag
);
145 if (curr_mb_field_flag
&& (bottom
|| top_mb_field_flag
)){
146 top_xy
-= s
->mb_stride
;
148 if (curr_mb_field_flag
&& (bottom
|| topleft_mb_field_flag
)){
149 topleft_xy
-= s
->mb_stride
;
150 } else if(bottom
&& !curr_mb_field_flag
&& left_mb_field_flag
) {
151 topleft_xy
+= s
->mb_stride
;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition
= 0;
155 if (curr_mb_field_flag
&& (bottom
|| topright_mb_field_flag
)){
156 topright_xy
-= s
->mb_stride
;
158 if (left_mb_field_flag
!= curr_mb_field_flag
) {
159 left_xy
[1] = left_xy
[0] = pair_xy
- 1;
160 if (curr_mb_field_flag
) {
161 left_xy
[1] += s
->mb_stride
;
162 left_block
= left_block_options
[3];
164 left_block
= left_block_options
[2 - bottom
];
169 h
->top_mb_xy
= top_xy
;
170 h
->left_mb_xy
[0] = left_xy
[0];
171 h
->left_mb_xy
[1] = left_xy
[1];
175 top_type
= h
->slice_table
[top_xy
] < 0xFFFF ? s
->current_picture
.mb_type
[top_xy
] : 0;
176 left_type
[0] = h
->slice_table
[left_xy
[0] ] < 0xFFFF ? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
177 left_type
[1] = h
->slice_table
[left_xy
[1] ] < 0xFFFF ? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
179 if(MB_MBAFF
&& !IS_INTRA(mb_type
)){
181 for(list
=0; list
<h
->list_count
; list
++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type
,list
)){
186 int8_t *ref
= &s
->current_picture
.ref_index
[list
][h
->mb2b8_xy
[mb_xy
]];
187 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 0]] =
188 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 2]] = (pack16to32(ref
[0],ref
[1])&0x00FF00FF)*0x0101;
190 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 8]] =
191 *(uint32_t*)&h
->ref_cache
[list
][scan8
[10]] = (pack16to32(ref
[0],ref
[1])&0x00FF00FF)*0x0101;
196 topleft_type
= h
->slice_table
[topleft_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topleft_xy
] : 0;
197 top_type
= h
->slice_table
[top_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[top_xy
] : 0;
198 topright_type
= h
->slice_table
[topright_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topright_xy
]: 0;
199 left_type
[0] = h
->slice_table
[left_xy
[0] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
200 left_type
[1] = h
->slice_table
[left_xy
[1] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
202 if(IS_INTRA(mb_type
)){
203 int type_mask
= h
->pps
.constrained_intra_pred
? IS_INTRA(-1) : -1;
204 h
->topleft_samples_available
=
205 h
->top_samples_available
=
206 h
->left_samples_available
= 0xFFFF;
207 h
->topright_samples_available
= 0xEEEA;
209 if(!(top_type
& type_mask
)){
210 h
->topleft_samples_available
= 0xB3FF;
211 h
->top_samples_available
= 0x33FF;
212 h
->topright_samples_available
= 0x26EA;
214 if(IS_INTERLACED(mb_type
) != IS_INTERLACED(left_type
[0])){
215 if(IS_INTERLACED(mb_type
)){
216 if(!(left_type
[0] & type_mask
)){
217 h
->topleft_samples_available
&= 0xDFFF;
218 h
->left_samples_available
&= 0x5FFF;
220 if(!(left_type
[1] & type_mask
)){
221 h
->topleft_samples_available
&= 0xFF5F;
222 h
->left_samples_available
&= 0xFF5F;
225 int left_typei
= h
->slice_table
[left_xy
[0] + s
->mb_stride
] == h
->slice_num
226 ? s
->current_picture
.mb_type
[left_xy
[0] + s
->mb_stride
] : 0;
227 assert(left_xy
[0] == left_xy
[1]);
228 if(!((left_typei
& type_mask
) && (left_type
[0] & type_mask
))){
229 h
->topleft_samples_available
&= 0xDF5F;
230 h
->left_samples_available
&= 0x5F5F;
234 if(!(left_type
[0] & type_mask
)){
235 h
->topleft_samples_available
&= 0xDF5F;
236 h
->left_samples_available
&= 0x5F5F;
240 if(!(topleft_type
& type_mask
))
241 h
->topleft_samples_available
&= 0x7FFF;
243 if(!(topright_type
& type_mask
))
244 h
->topright_samples_available
&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type
)){
247 if(IS_INTRA4x4(top_type
)){
248 h
->intra4x4_pred_mode_cache
[4+8*0]= h
->intra4x4_pred_mode
[top_xy
][4];
249 h
->intra4x4_pred_mode_cache
[5+8*0]= h
->intra4x4_pred_mode
[top_xy
][5];
250 h
->intra4x4_pred_mode_cache
[6+8*0]= h
->intra4x4_pred_mode
[top_xy
][6];
251 h
->intra4x4_pred_mode_cache
[7+8*0]= h
->intra4x4_pred_mode
[top_xy
][3];
254 if(!(top_type
& type_mask
))
259 h
->intra4x4_pred_mode_cache
[4+8*0]=
260 h
->intra4x4_pred_mode_cache
[5+8*0]=
261 h
->intra4x4_pred_mode_cache
[6+8*0]=
262 h
->intra4x4_pred_mode_cache
[7+8*0]= pred
;
265 if(IS_INTRA4x4(left_type
[i
])){
266 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[0+2*i
]];
267 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[1+2*i
]];
270 if(!(left_type
[i
] & type_mask
))
275 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]=
276 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= pred
;
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
294 h
->non_zero_count_cache
[4+8*0]= h
->non_zero_count
[top_xy
][4];
295 h
->non_zero_count_cache
[5+8*0]= h
->non_zero_count
[top_xy
][5];
296 h
->non_zero_count_cache
[6+8*0]= h
->non_zero_count
[top_xy
][6];
297 h
->non_zero_count_cache
[7+8*0]= h
->non_zero_count
[top_xy
][3];
299 h
->non_zero_count_cache
[1+8*0]= h
->non_zero_count
[top_xy
][9];
300 h
->non_zero_count_cache
[2+8*0]= h
->non_zero_count
[top_xy
][8];
302 h
->non_zero_count_cache
[1+8*3]= h
->non_zero_count
[top_xy
][12];
303 h
->non_zero_count_cache
[2+8*3]= h
->non_zero_count
[top_xy
][11];
306 h
->non_zero_count_cache
[4+8*0]=
307 h
->non_zero_count_cache
[5+8*0]=
308 h
->non_zero_count_cache
[6+8*0]=
309 h
->non_zero_count_cache
[7+8*0]=
311 h
->non_zero_count_cache
[1+8*0]=
312 h
->non_zero_count_cache
[2+8*0]=
314 h
->non_zero_count_cache
[1+8*3]=
315 h
->non_zero_count_cache
[2+8*3]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
319 for (i
=0; i
<2; i
++) {
321 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[0+2*i
]];
322 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[1+2*i
]];
323 h
->non_zero_count_cache
[0+8*1 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[4+2*i
]];
324 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[5+2*i
]];
326 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]=
327 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]=
328 h
->non_zero_count_cache
[0+8*1 + 8*i
]=
329 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
336 h
->top_cbp
= h
->cbp_table
[top_xy
];
337 } else if(IS_INTRA(mb_type
)) {
344 h
->left_cbp
= h
->cbp_table
[left_xy
[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type
)) {
351 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[0]]>>((left_block
[0]&(~1))+1))&0x1) << 1;
354 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[1]]>>((left_block
[2]&(~1))+1))&0x1) << 3;
359 if(IS_INTER(mb_type
) || IS_DIRECT(mb_type
)){
361 for(list
=0; list
<h
->list_count
; list
++){
362 if(!USES_LIST(mb_type
, list
) && !IS_DIRECT(mb_type
) && !h
->deblocking_filter
){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
370 h
->mv_cache_clean
[list
]= 0;
372 if(USES_LIST(top_type
, list
)){
373 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
374 const int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
375 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0];
376 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 1];
377 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2];
378 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 3];
379 h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]=
380 h
->ref_cache
[list
][scan8
[0] + 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 0];
381 h
->ref_cache
[list
][scan8
[0] + 2 - 1*8]=
382 h
->ref_cache
[list
][scan8
[0] + 3 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 1];
384 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]=
385 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]=
386 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]=
387 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]= ((top_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
)&0xFF)*0x01010101;
392 int cache_idx
= scan8
[0] - 1 + i
*2*8;
393 if(USES_LIST(left_type
[i
], list
)){
394 const int b_xy
= h
->mb2b_xy
[left_xy
[i
]] + 3;
395 const int b8_xy
= h
->mb2b8_xy
[left_xy
[i
]] + 1;
396 *(uint32_t*)h
->mv_cache
[list
][cache_idx
]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[0+i
*2]];
397 *(uint32_t*)h
->mv_cache
[list
][cache_idx
+8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[1+i
*2]];
398 h
->ref_cache
[list
][cache_idx
]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[0+i
*2]>>1)];
399 h
->ref_cache
[list
][cache_idx
+8]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[1+i
*2]>>1)];
401 *(uint32_t*)h
->mv_cache
[list
][cache_idx
]=
402 *(uint32_t*)h
->mv_cache
[list
][cache_idx
+8]= 0;
403 h
->ref_cache
[list
][cache_idx
]=
404 h
->ref_cache
[list
][cache_idx
+8]= left_type
[i
] ? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
408 if(for_deblock
|| ((IS_DIRECT(mb_type
) && !h
->direct_spatial_mv_pred
) && !FRAME_MBAFF
))
411 if(USES_LIST(topleft_type
, list
)){
412 const int b_xy
= h
->mb2b_xy
[topleft_xy
] + 3 + h
->b_stride
+ (topleft_partition
& 2*h
->b_stride
);
413 const int b8_xy
= h
->mb2b8_xy
[topleft_xy
] + 1 + (topleft_partition
& h
->b8_stride
);
414 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
415 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
417 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= 0;
418 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= topleft_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
421 if(USES_LIST(topright_type
, list
)){
422 const int b_xy
= h
->mb2b_xy
[topright_xy
] + 3*h
->b_stride
;
423 const int b8_xy
= h
->mb2b8_xy
[topright_xy
] + h
->b8_stride
;
424 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
425 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
427 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= 0;
428 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= topright_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
431 if((IS_SKIP(mb_type
) || IS_DIRECT(mb_type
)) && !FRAME_MBAFF
)
434 h
->ref_cache
[list
][scan8
[5 ]+1] =
435 h
->ref_cache
[list
][scan8
[7 ]+1] =
436 h
->ref_cache
[list
][scan8
[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h
->ref_cache
[list
][scan8
[4 ]] =
438 h
->ref_cache
[list
][scan8
[12]] = PART_NOT_AVAILABLE
;
439 *(uint32_t*)h
->mv_cache
[list
][scan8
[5 ]+1]=
440 *(uint32_t*)h
->mv_cache
[list
][scan8
[7 ]+1]=
441 *(uint32_t*)h
->mv_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h
->mv_cache
[list
][scan8
[4 ]]=
443 *(uint32_t*)h
->mv_cache
[list
][scan8
[12]]= 0;
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type
, list
)){
448 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
449 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 0];
450 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 1];
451 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 2];
452 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 3];
454 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]=
455 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]=
456 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]=
457 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type
[0], list
)){
460 const int b_xy
= h
->mb2b_xy
[left_xy
[0]] + 3;
461 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[0]];
462 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[1]];
464 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]=
465 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type
[1], list
)){
468 const int b_xy
= h
->mb2b_xy
[left_xy
[1]] + 3;
469 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[2]];
470 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[3]];
472 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]=
473 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h
->mvd_cache
[list
][scan8
[5 ]+1]=
476 *(uint32_t*)h
->mvd_cache
[list
][scan8
[7 ]+1]=
477 *(uint32_t*)h
->mvd_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h
->mvd_cache
[list
][scan8
[4 ]]=
479 *(uint32_t*)h
->mvd_cache
[list
][scan8
[12]]= 0;
481 if(h
->slice_type_nos
== FF_B_TYPE
){
482 fill_rectangle(&h
->direct_cache
[scan8
[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type
)){
485 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type
)){
487 int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
488 h
->direct_cache
[scan8
[0] + 0 - 1*8]= h
->direct_table
[b8_xy
];
489 h
->direct_cache
[scan8
[0] + 2 - 1*8]= h
->direct_table
[b8_xy
+ 1];
491 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type
[0]))
495 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type
[0]))
497 h
->direct_cache
[scan8
[0] - 1 + 0*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[0]] + 1 + h
->b8_stride
*(left_block
[0]>>1)];
499 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type
[1]))
502 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type
[1]))
504 h
->direct_cache
[scan8
[0] - 1 + 2*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[1]] + 1 + h
->b8_stride
*(left_block
[2]>>1)];
506 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 0;
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
546 h
->neighbor_transform_size
= !!IS_8x8DCT(top_type
) + !!IS_8x8DCT(left_type
[0]);
549 static inline void write_back_intra_pred_mode(H264Context
*h
){
550 const int mb_xy
= h
->mb_xy
;
552 h
->intra4x4_pred_mode
[mb_xy
][0]= h
->intra4x4_pred_mode_cache
[7+8*1];
553 h
->intra4x4_pred_mode
[mb_xy
][1]= h
->intra4x4_pred_mode_cache
[7+8*2];
554 h
->intra4x4_pred_mode
[mb_xy
][2]= h
->intra4x4_pred_mode_cache
[7+8*3];
555 h
->intra4x4_pred_mode
[mb_xy
][3]= h
->intra4x4_pred_mode_cache
[7+8*4];
556 h
->intra4x4_pred_mode
[mb_xy
][4]= h
->intra4x4_pred_mode_cache
[4+8*4];
557 h
->intra4x4_pred_mode
[mb_xy
][5]= h
->intra4x4_pred_mode_cache
[5+8*4];
558 h
->intra4x4_pred_mode
[mb_xy
][6]= h
->intra4x4_pred_mode_cache
[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context
*h
){
565 MpegEncContext
* const s
= &h
->s
;
566 static const int8_t top
[12]= {-1, 0,LEFT_DC_PRED
,-1,-1,-1,-1,-1, 0};
567 static const int8_t left
[12]= { 0,-1, TOP_DC_PRED
, 0,-1,-1,-1, 0,-1,DC_128_PRED
};
570 if(!(h
->top_samples_available
&0x8000)){
572 int status
= top
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + i
] ];
574 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
577 h
->intra4x4_pred_mode_cache
[scan8
[0] + i
]= status
;
582 if((h
->left_samples_available
&0x8888)!=0x8888){
583 static const int mask
[4]={0x8000,0x2000,0x80,0x20};
585 if(!(h
->left_samples_available
&mask
[i
])){
586 int status
= left
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
] ];
588 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
591 h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
]= status
;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context
*h
, int mode
){
604 MpegEncContext
* const s
= &h
->s
;
605 static const int8_t top
[7]= {LEFT_DC_PRED8x8
, 1,-1,-1};
606 static const int8_t left
[7]= { TOP_DC_PRED8x8
,-1, 2,-1,DC_128_PRED8x8
};
609 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "out of range intra chroma pred mode at %d %d\n", s
->mb_x
, s
->mb_y
);
613 if(!(h
->top_samples_available
&0x8000)){
616 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
621 if((h
->left_samples_available
&0x8080) != 0x8080){
623 if(h
->left_samples_available
&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode
= ALZHEIMER_DC_L0T_PRED8x8
+ (!(h
->left_samples_available
&0x8000)) + 2*(mode
== DC_128_PRED8x8
);
627 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context
*h
, int n
){
639 const int index8
= scan8
[n
];
640 const int left
= h
->intra4x4_pred_mode_cache
[index8
- 1];
641 const int top
= h
->intra4x4_pred_mode_cache
[index8
- 8];
642 const int min
= FFMIN(left
, top
);
644 tprintf(h
->s
.avctx
, "mode:%d %d min:%d\n", left
,top
, min
);
646 if(min
<0) return DC_PRED
;
650 static inline void write_back_non_zero_count(H264Context
*h
){
651 const int mb_xy
= h
->mb_xy
;
653 h
->non_zero_count
[mb_xy
][0]= h
->non_zero_count_cache
[7+8*1];
654 h
->non_zero_count
[mb_xy
][1]= h
->non_zero_count_cache
[7+8*2];
655 h
->non_zero_count
[mb_xy
][2]= h
->non_zero_count_cache
[7+8*3];
656 h
->non_zero_count
[mb_xy
][3]= h
->non_zero_count_cache
[7+8*4];
657 h
->non_zero_count
[mb_xy
][4]= h
->non_zero_count_cache
[4+8*4];
658 h
->non_zero_count
[mb_xy
][5]= h
->non_zero_count_cache
[5+8*4];
659 h
->non_zero_count
[mb_xy
][6]= h
->non_zero_count_cache
[6+8*4];
661 h
->non_zero_count
[mb_xy
][9]= h
->non_zero_count_cache
[1+8*2];
662 h
->non_zero_count
[mb_xy
][8]= h
->non_zero_count_cache
[2+8*2];
663 h
->non_zero_count
[mb_xy
][7]= h
->non_zero_count_cache
[2+8*1];
665 h
->non_zero_count
[mb_xy
][12]=h
->non_zero_count_cache
[1+8*5];
666 h
->non_zero_count
[mb_xy
][11]=h
->non_zero_count_cache
[2+8*5];
667 h
->non_zero_count
[mb_xy
][10]=h
->non_zero_count_cache
[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context
*h
, int n
){
675 const int index8
= scan8
[n
];
676 const int left
= h
->non_zero_count_cache
[index8
- 1];
677 const int top
= h
->non_zero_count_cache
[index8
- 8];
680 if(i
<64) i
= (i
+1)>>1;
682 tprintf(h
->s
.avctx
, "pred_nnz L%X T%X n%d s%d P%X\n", left
, top
, n
, scan8
[n
], i
&31);
687 static inline int fetch_diagonal_mv(H264Context
*h
, const int16_t **C
, int i
, int list
, int part_width
){
688 const int topright_ref
= h
->ref_cache
[list
][ i
- 8 + part_width
];
689 MpegEncContext
*s
= &h
->s
;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
694 const uint32_t *mb_types
= s
->current_picture_ptr
->mb_type
;
696 *(uint32_t*)h
->mv_cache
[list
][scan8
[0]-2] = 0;
697 *C
= h
->mv_cache
[list
][scan8
[0]-2];
700 && (s
->mb_y
&1) && i
< scan8
[0]+8 && topright_ref
!= PART_NOT_AVAILABLE
){
701 int topright_xy
= s
->mb_x
+ (s
->mb_y
-1)*s
->mb_stride
+ (i
== scan8
[0]+3);
702 if(IS_INTERLACED(mb_types
[topright_xy
])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s
->mb_x
*4+(i
&7)-4+part_width
, s
->mb_y
*4-1);
716 if(topright_ref
== PART_NOT_AVAILABLE
717 && ((s
->mb_y
&1) || i
>= scan8
[0]+8) && (i
&7)==4
718 && h
->ref_cache
[list
][scan8
[0]-1] != PART_NOT_AVAILABLE
){
720 && IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])){
721 SET_DIAG_MV(*2, >>1, s
->mb_x
*4-1, (s
->mb_y
|1)*4+(s
->mb_y
&1)*2+(i
>>4)-1);
724 && !IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s
->mb_x
*4-1, (s
->mb_y
&~1)*4 - 1 + ((i
-scan8
[0])>>3)*2);
733 if(topright_ref
!= PART_NOT_AVAILABLE
){
734 *C
= h
->mv_cache
[list
][ i
- 8 + part_width
];
737 tprintf(s
->avctx
, "topright MV not available\n");
739 *C
= h
->mv_cache
[list
][ i
- 8 - 1 ];
740 return h
->ref_cache
[list
][ i
- 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context
* const h
, int n
, int part_width
, int list
, int ref
, int * const mx
, int * const my
){
752 const int index8
= scan8
[n
];
753 const int top_ref
= h
->ref_cache
[list
][ index8
- 8 ];
754 const int left_ref
= h
->ref_cache
[list
][ index8
- 1 ];
755 const int16_t * const A
= h
->mv_cache
[list
][ index8
- 1 ];
756 const int16_t * const B
= h
->mv_cache
[list
][ index8
- 8 ];
758 int diagonal_ref
, match_count
;
760 assert(part_width
==1 || part_width
==2 || part_width
==4);
770 diagonal_ref
= fetch_diagonal_mv(h
, &C
, index8
, list
, part_width
);
771 match_count
= (diagonal_ref
==ref
) + (top_ref
==ref
) + (left_ref
==ref
);
772 tprintf(h
->s
.avctx
, "pred_motion match_count=%d\n", match_count
);
773 if(match_count
> 1){ //most common
774 *mx
= mid_pred(A
[0], B
[0], C
[0]);
775 *my
= mid_pred(A
[1], B
[1], C
[1]);
776 }else if(match_count
==1){
780 }else if(top_ref
==ref
){
788 if(top_ref
== PART_NOT_AVAILABLE
&& diagonal_ref
== PART_NOT_AVAILABLE
&& left_ref
!= PART_NOT_AVAILABLE
){
792 *mx
= mid_pred(A
[0], B
[0], C
[0]);
793 *my
= mid_pred(A
[1], B
[1], C
[1]);
797 tprintf(h
->s
.avctx
, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], diagonal_ref
, C
[0], C
[1], left_ref
, A
[0], A
[1], ref
, *mx
, *my
, h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
808 const int top_ref
= h
->ref_cache
[list
][ scan8
[0] - 8 ];
809 const int16_t * const B
= h
->mv_cache
[list
][ scan8
[0] - 8 ];
811 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
819 const int left_ref
= h
->ref_cache
[list
][ scan8
[8] - 1 ];
820 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[8] - 1 ];
822 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
832 pred_motion(h
, n
, 4, list
, ref
, mx
, my
);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
843 const int left_ref
= h
->ref_cache
[list
][ scan8
[0] - 1 ];
844 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[0] - 1 ];
846 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
857 diagonal_ref
= fetch_diagonal_mv(h
, &C
, scan8
[4], list
, 2);
859 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref
, C
[0], C
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
861 if(diagonal_ref
== ref
){
869 pred_motion(h
, n
, 2, list
, ref
, mx
, my
);
872 static inline void pred_pskip_motion(H264Context
* const h
, int * const mx
, int * const my
){
873 const int top_ref
= h
->ref_cache
[0][ scan8
[0] - 8 ];
874 const int left_ref
= h
->ref_cache
[0][ scan8
[0] - 1 ];
876 tprintf(h
->s
.avctx
, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref
, left_ref
, h
->s
.mb_x
, h
->s
.mb_y
);
878 if(top_ref
== PART_NOT_AVAILABLE
|| left_ref
== PART_NOT_AVAILABLE
879 || !( top_ref
| *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 8 ])
880 || !(left_ref
| *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 1 ])){
886 pred_motion(h
, 0, 4, 0, 0, mx
, my
);
891 static int get_scale_factor(H264Context
* const h
, int poc
, int poc1
, int i
){
892 int poc0
= h
->ref_list
[0][i
].poc
;
893 int td
= av_clip(poc1
- poc0
, -128, 127);
894 if(td
== 0 || h
->ref_list
[0][i
].long_ref
){
897 int tb
= av_clip(poc
- poc0
, -128, 127);
898 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
899 return av_clip((tb
*tx
+ 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context
* const h
){
904 MpegEncContext
* const s
= &h
->s
;
905 const int poc
= h
->s
.current_picture_ptr
->field_poc
[ s
->picture_structure
== PICT_BOTTOM_FIELD
];
906 const int poc1
= h
->ref_list
[1][0].poc
;
908 for(field
=0; field
<2; field
++){
909 const int poc
= h
->s
.current_picture_ptr
->field_poc
[field
];
910 const int poc1
= h
->ref_list
[1][0].field_poc
[field
];
911 for(i
=0; i
< 2*h
->ref_count
[0]; i
++)
912 h
->dist_scale_factor_field
[field
][i
^field
] = get_scale_factor(h
, poc
, poc1
, i
+16);
915 for(i
=0; i
<h
->ref_count
[0]; i
++){
916 h
->dist_scale_factor
[i
] = get_scale_factor(h
, poc
, poc1
, i
);
920 static void fill_colmap(H264Context
*h
, int map
[2][16+32], int list
, int field
, int colfield
, int mbafi
){
921 MpegEncContext
* const s
= &h
->s
;
922 Picture
* const ref1
= &h
->ref_list
[1][0];
923 int j
, old_ref
, rfield
;
924 int start
= mbafi
? 16 : 0;
925 int end
= mbafi
? 16+2*h
->ref_count
[list
] : h
->ref_count
[list
];
926 int interl
= mbafi
|| s
->picture_structure
!= PICT_FRAME
;
928 /* bogus; fills in for missing frames */
929 memset(map
[list
], 0, sizeof(map
[list
]));
931 for(rfield
=0; rfield
<2; rfield
++){
932 for(old_ref
=0; old_ref
<ref1
->ref_count
[colfield
][list
]; old_ref
++){
933 int poc
= ref1
->ref_poc
[colfield
][list
][old_ref
];
937 else if( interl
&& (poc
&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc
= (poc
&~3) + rfield
+ 1;
940 for(j
=start
; j
<end
; j
++){
941 if(4*h
->ref_list
[list
][j
].frame_num
+ (h
->ref_list
[list
][j
].reference
&3) == poc
){
942 int cur_ref
= mbafi
? (j
-16)^field
: j
;
943 map
[list
][2*old_ref
+ (rfield
^field
) + 16] = cur_ref
;
945 map
[list
][old_ref
] = cur_ref
;
953 static inline void direct_ref_list_init(H264Context
* const h
){
954 MpegEncContext
* const s
= &h
->s
;
955 Picture
* const ref1
= &h
->ref_list
[1][0];
956 Picture
* const cur
= s
->current_picture_ptr
;
958 int sidx
= (s
->picture_structure
&1)^1;
959 int ref1sidx
= (ref1
->reference
&1)^1;
961 for(list
=0; list
<2; list
++){
962 cur
->ref_count
[sidx
][list
] = h
->ref_count
[list
];
963 for(j
=0; j
<h
->ref_count
[list
]; j
++)
964 cur
->ref_poc
[sidx
][list
][j
] = 4*h
->ref_list
[list
][j
].frame_num
+ (h
->ref_list
[list
][j
].reference
&3);
967 if(s
->picture_structure
== PICT_FRAME
){
968 memcpy(cur
->ref_count
[1], cur
->ref_count
[0], sizeof(cur
->ref_count
[0]));
969 memcpy(cur
->ref_poc
[1], cur
->ref_poc
[0], sizeof(cur
->ref_poc
[0]));
972 cur
->mbaff
= FRAME_MBAFF
;
974 if(cur
->pict_type
!= FF_B_TYPE
|| h
->direct_spatial_mv_pred
)
977 for(list
=0; list
<2; list
++){
978 fill_colmap(h
, h
->map_col_to_list0
, list
, sidx
, ref1sidx
, 0);
979 for(field
=0; field
<2; field
++)
980 fill_colmap(h
, h
->map_col_to_list0_field
[field
], list
, field
, field
, 1);
984 static inline void pred_direct_motion(H264Context
* const h
, int *mb_type
){
985 MpegEncContext
* const s
= &h
->s
;
986 int b8_stride
= h
->b8_stride
;
987 int b4_stride
= h
->b_stride
;
988 int mb_xy
= h
->mb_xy
;
990 const int16_t (*l1mv0
)[2], (*l1mv1
)[2];
991 const int8_t *l1ref0
, *l1ref1
;
992 const int is_b8x8
= IS_8X8(*mb_type
);
993 unsigned int sub_mb_type
;
996 assert(h
->ref_list
[1][0].reference
&3);
998 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1000 if(IS_INTERLACED(h
->ref_list
[1][0].mb_type
[mb_xy
])){ // AFL/AFR/FR/FL -> AFL/FL
1001 if(!IS_INTERLACED(*mb_type
)){ // AFR/FR -> AFL/FL
1002 int cur_poc
= s
->current_picture_ptr
->poc
;
1003 int *col_poc
= h
->ref_list
[1]->field_poc
;
1004 int col_parity
= FFABS(col_poc
[0] - cur_poc
) >= FFABS(col_poc
[1] - cur_poc
);
1005 mb_xy
= s
->mb_x
+ ((s
->mb_y
&~1) + col_parity
)*s
->mb_stride
;
1007 }else if(!(s
->picture_structure
& h
->ref_list
[1][0].reference
) && !h
->ref_list
[1][0].mbaff
){// FL -> FL & differ parity
1008 int fieldoff
= 2*(h
->ref_list
[1][0].reference
)-3;
1009 mb_xy
+= s
->mb_stride
*fieldoff
;
1012 }else{ // AFL/AFR/FR/FL -> AFR/FR
1013 if(IS_INTERLACED(*mb_type
)){ // AFL /FL -> AFR/FR
1014 mb_xy
= s
->mb_x
+ (s
->mb_y
&~1)*s
->mb_stride
;
1015 mb_type_col
[0] = h
->ref_list
[1][0].mb_type
[mb_xy
];
1016 mb_type_col
[1] = h
->ref_list
[1][0].mb_type
[mb_xy
+ s
->mb_stride
];
1019 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1020 if( (mb_type_col
[0] & MB_TYPE_16x16_OR_INTRA
)
1021 && (mb_type_col
[1] & MB_TYPE_16x16_OR_INTRA
)
1023 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1024 *mb_type
|= MB_TYPE_16x8
|MB_TYPE_L0L1
|MB_TYPE_DIRECT2
; /* B_16x8 */
1026 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1027 *mb_type
|= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1029 }else{ // AFR/FR -> AFR/FR
1032 mb_type_col
[1] = h
->ref_list
[1][0].mb_type
[mb_xy
];
1033 if(IS_8X8(mb_type_col
[0]) && !h
->sps
.direct_8x8_inference_flag
){
1034 /* FIXME save sub mb types from previous frames (or derive from MVs)
1035 * so we know exactly what block size to use */
1036 sub_mb_type
= MB_TYPE_8x8
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_4x4 */
1037 *mb_type
|= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1038 }else if(!is_b8x8
&& (mb_type_col
[0] & MB_TYPE_16x16_OR_INTRA
)){
1039 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1040 *mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_16x16 */
1042 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1043 *mb_type
|= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1048 l1mv0
= &h
->ref_list
[1][0].motion_val
[0][h
->mb2b_xy
[mb_xy
]];
1049 l1mv1
= &h
->ref_list
[1][0].motion_val
[1][h
->mb2b_xy
[mb_xy
]];
1050 l1ref0
= &h
->ref_list
[1][0].ref_index
[0][h
->mb2b8_xy
[mb_xy
]];
1051 l1ref1
= &h
->ref_list
[1][0].ref_index
[1][h
->mb2b8_xy
[mb_xy
]];
1054 l1ref0
+= h
->b8_stride
;
1055 l1ref1
+= h
->b8_stride
;
1056 l1mv0
+= 2*b4_stride
;
1057 l1mv1
+= 2*b4_stride
;
1061 if(h
->direct_spatial_mv_pred
){
1066 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1068 /* ref = min(neighbors) */
1069 for(list
=0; list
<2; list
++){
1070 int refa
= h
->ref_cache
[list
][scan8
[0] - 1];
1071 int refb
= h
->ref_cache
[list
][scan8
[0] - 8];
1072 int refc
= h
->ref_cache
[list
][scan8
[0] - 8 + 4];
1073 if(refc
== PART_NOT_AVAILABLE
)
1074 refc
= h
->ref_cache
[list
][scan8
[0] - 8 - 1];
1075 ref
[list
] = FFMIN3((unsigned)refa
, (unsigned)refb
, (unsigned)refc
);
1080 if(ref
[0] < 0 && ref
[1] < 0){
1081 ref
[0] = ref
[1] = 0;
1082 mv
[0][0] = mv
[0][1] =
1083 mv
[1][0] = mv
[1][1] = 0;
1085 for(list
=0; list
<2; list
++){
1087 pred_motion(h
, 0, 4, list
, ref
[list
], &mv
[list
][0], &mv
[list
][1]);
1089 mv
[list
][0] = mv
[list
][1] = 0;
1095 *mb_type
&= ~MB_TYPE_L1
;
1096 sub_mb_type
&= ~MB_TYPE_L1
;
1097 }else if(ref
[0] < 0){
1099 *mb_type
&= ~MB_TYPE_L0
;
1100 sub_mb_type
&= ~MB_TYPE_L0
;
1103 if(IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
[0])){
1104 for(i8
=0; i8
<4; i8
++){
1107 int xy8
= x8
+y8
*b8_stride
;
1108 int xy4
= 3*x8
+y8
*b4_stride
;
1111 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1113 h
->sub_mb_type
[i8
] = sub_mb_type
;
1115 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[0], 1);
1116 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[1], 1);
1117 if(!IS_INTRA(mb_type_col
[y8
])
1118 && ( (l1ref0
[xy8
] == 0 && FFABS(l1mv0
[xy4
][0]) <= 1 && FFABS(l1mv0
[xy4
][1]) <= 1)
1119 || (l1ref0
[xy8
] < 0 && l1ref1
[xy8
] == 0 && FFABS(l1mv1
[xy4
][0]) <= 1 && FFABS(l1mv1
[xy4
][1]) <= 1))){
1121 a
= pack16to32(mv
[0][0],mv
[0][1]);
1123 b
= pack16to32(mv
[1][0],mv
[1][1]);
1125 a
= pack16to32(mv
[0][0],mv
[0][1]);
1126 b
= pack16to32(mv
[1][0],mv
[1][1]);
1128 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, a
, 4);
1129 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, b
, 4);
1131 }else if(IS_16X16(*mb_type
)){
1134 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, (uint8_t)ref
[0], 1);
1135 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, (uint8_t)ref
[1], 1);
1136 if(!IS_INTRA(mb_type_col
[0])
1137 && ( (l1ref0
[0] == 0 && FFABS(l1mv0
[0][0]) <= 1 && FFABS(l1mv0
[0][1]) <= 1)
1138 || (l1ref0
[0] < 0 && l1ref1
[0] == 0 && FFABS(l1mv1
[0][0]) <= 1 && FFABS(l1mv1
[0][1]) <= 1
1139 && (h
->x264_build
>33 || !h
->x264_build
)))){
1141 a
= pack16to32(mv
[0][0],mv
[0][1]);
1143 b
= pack16to32(mv
[1][0],mv
[1][1]);
1145 a
= pack16to32(mv
[0][0],mv
[0][1]);
1146 b
= pack16to32(mv
[1][0],mv
[1][1]);
1148 fill_rectangle(&h
->mv_cache
[0][scan8
[0]], 4, 4, 8, a
, 4);
1149 fill_rectangle(&h
->mv_cache
[1][scan8
[0]], 4, 4, 8, b
, 4);
1151 for(i8
=0; i8
<4; i8
++){
1152 const int x8
= i8
&1;
1153 const int y8
= i8
>>1;
1155 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1157 h
->sub_mb_type
[i8
] = sub_mb_type
;
1159 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[0][0],mv
[0][1]), 4);
1160 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[1][0],mv
[1][1]), 4);
1161 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[0], 1);
1162 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[1], 1);
1165 if(!IS_INTRA(mb_type_col
[0]) && ( l1ref0
[x8
+ y8
*b8_stride
] == 0
1166 || (l1ref0
[x8
+ y8
*b8_stride
] < 0 && l1ref1
[x8
+ y8
*b8_stride
] == 0
1167 && (h
->x264_build
>33 || !h
->x264_build
)))){
1168 const int16_t (*l1mv
)[2]= l1ref0
[x8
+ y8
*b8_stride
] == 0 ? l1mv0
: l1mv1
;
1169 if(IS_SUB_8X8(sub_mb_type
)){
1170 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*b4_stride
];
1171 if(FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1){
1173 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1175 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1178 for(i4
=0; i4
<4; i4
++){
1179 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*b4_stride
];
1180 if(FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1){
1182 *(uint32_t*)h
->mv_cache
[0][scan8
[i8
*4+i4
]] = 0;
1184 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] = 0;
1190 }else{ /* direct temporal mv pred */
1191 const int *map_col_to_list0
[2] = {h
->map_col_to_list0
[0], h
->map_col_to_list0
[1]};
1192 const int *dist_scale_factor
= h
->dist_scale_factor
;
1195 if(FRAME_MBAFF
&& IS_INTERLACED(*mb_type
)){
1196 map_col_to_list0
[0] = h
->map_col_to_list0_field
[s
->mb_y
&1][0];
1197 map_col_to_list0
[1] = h
->map_col_to_list0_field
[s
->mb_y
&1][1];
1198 dist_scale_factor
=h
->dist_scale_factor_field
[s
->mb_y
&1];
1200 if(h
->ref_list
[1][0].mbaff
&& IS_INTERLACED(mb_type_col
[0]))
1203 if(IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
[0])){
1204 /* FIXME assumes direct_8x8_inference == 1 */
1205 int y_shift
= 2*!IS_INTERLACED(*mb_type
);
1207 for(i8
=0; i8
<4; i8
++){
1208 const int x8
= i8
&1;
1209 const int y8
= i8
>>1;
1211 const int16_t (*l1mv
)[2]= l1mv0
;
1213 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1215 h
->sub_mb_type
[i8
] = sub_mb_type
;
1217 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1218 if(IS_INTRA(mb_type_col
[y8
])){
1219 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1220 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1221 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1225 ref0
= l1ref0
[x8
+ y8
*b8_stride
];
1227 ref0
= map_col_to_list0
[0][ref0
+ ref_offset
];
1229 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ y8
*b8_stride
] + ref_offset
];
1232 scale
= dist_scale_factor
[ref0
];
1233 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1236 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*b4_stride
];
1237 int my_col
= (mv_col
[1]<<y_shift
)/2;
1238 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1239 int my
= (scale
* my_col
+ 128) >> 8;
1240 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1241 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-my_col
), 4);
1247 /* one-to-one mv scaling */
1249 if(IS_16X16(*mb_type
)){
1252 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, 0, 1);
1253 if(IS_INTRA(mb_type_col
[0])){
1256 const int ref0
= l1ref0
[0] >= 0 ? map_col_to_list0
[0][l1ref0
[0] + ref_offset
]
1257 : map_col_to_list0
[1][l1ref1
[0] + ref_offset
];
1258 const int scale
= dist_scale_factor
[ref0
];
1259 const int16_t *mv_col
= l1ref0
[0] >= 0 ? l1mv0
[0] : l1mv1
[0];
1261 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1262 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1264 mv0
= pack16to32(mv_l0
[0],mv_l0
[1]);
1265 mv1
= pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1267 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, ref
, 1);
1268 fill_rectangle(&h
-> mv_cache
[0][scan8
[0]], 4, 4, 8, mv0
, 4);
1269 fill_rectangle(&h
-> mv_cache
[1][scan8
[0]], 4, 4, 8, mv1
, 4);
1271 for(i8
=0; i8
<4; i8
++){
1272 const int x8
= i8
&1;
1273 const int y8
= i8
>>1;
1275 const int16_t (*l1mv
)[2]= l1mv0
;
1277 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1279 h
->sub_mb_type
[i8
] = sub_mb_type
;
1280 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1281 if(IS_INTRA(mb_type_col
[0])){
1282 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1283 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1284 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1288 ref0
= l1ref0
[x8
+ y8
*b8_stride
] + ref_offset
;
1290 ref0
= map_col_to_list0
[0][ref0
];
1292 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ y8
*b8_stride
] + ref_offset
];
1295 scale
= dist_scale_factor
[ref0
];
1297 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1298 if(IS_SUB_8X8(sub_mb_type
)){
1299 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*b4_stride
];
1300 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1301 int my
= (scale
* mv_col
[1] + 128) >> 8;
1302 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1303 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-mv_col
[1]), 4);
1305 for(i4
=0; i4
<4; i4
++){
1306 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*b4_stride
];
1307 int16_t *mv_l0
= h
->mv_cache
[0][scan8
[i8
*4+i4
]];
1308 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1309 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1310 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] =
1311 pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1318 static inline void write_back_motion(H264Context
*h
, int mb_type
){
1319 MpegEncContext
* const s
= &h
->s
;
1320 const int b_xy
= 4*s
->mb_x
+ 4*s
->mb_y
*h
->b_stride
;
1321 const int b8_xy
= 2*s
->mb_x
+ 2*s
->mb_y
*h
->b8_stride
;
1324 if(!USES_LIST(mb_type
, 0))
1325 fill_rectangle(&s
->current_picture
.ref_index
[0][b8_xy
], 2, 2, h
->b8_stride
, (uint8_t)LIST_NOT_USED
, 1);
1327 for(list
=0; list
<h
->list_count
; list
++){
1329 if(!USES_LIST(mb_type
, list
))
1333 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+0 + 8*y
];
1334 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+2 + 8*y
];
1336 if( h
->pps
.cabac
) {
1337 if(IS_SKIP(mb_type
))
1338 fill_rectangle(h
->mvd_table
[list
][b_xy
], 4, 4, h
->b_stride
, 0, 4);
1341 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+0 + 8*y
];
1342 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+2 + 8*y
];
1347 int8_t *ref_index
= &s
->current_picture
.ref_index
[list
][b8_xy
];
1348 ref_index
[0+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[0]];
1349 ref_index
[1+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[4]];
1350 ref_index
[0+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[8]];
1351 ref_index
[1+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[12]];
1355 if(h
->slice_type_nos
== FF_B_TYPE
&& h
->pps
.cabac
){
1356 if(IS_8X8(mb_type
)){
1357 uint8_t *direct_table
= &h
->direct_table
[b8_xy
];
1358 direct_table
[1+0*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[1]) ? 1 : 0;
1359 direct_table
[0+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[2]) ? 1 : 0;
1360 direct_table
[1+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[3]) ? 1 : 0;
1365 const uint8_t *ff_h264_decode_nal(H264Context
*h
, const uint8_t *src
, int *dst_length
, int *consumed
, int length
){
1370 // src[0]&0x80; //forbidden bit
1371 h
->nal_ref_idc
= src
[0]>>5;
1372 h
->nal_unit_type
= src
[0]&0x1F;
1376 for(i
=0; i
<length
; i
++)
1377 printf("%2X ", src
[i
]);
1380 #if HAVE_FAST_UNALIGNED
1381 # if HAVE_FAST_64BIT
1383 for(i
=0; i
+1<length
; i
+=9){
1384 if(!((~*(const uint64_t*)(src
+i
) & (*(const uint64_t*)(src
+i
) - 0x0100010001000101ULL
)) & 0x8000800080008080ULL
))
1387 for(i
=0; i
+1<length
; i
+=5){
1388 if(!((~*(const uint32_t*)(src
+i
) & (*(const uint32_t*)(src
+i
) - 0x01000101U
)) & 0x80008080U
))
1391 if(i
>0 && !src
[i
]) i
--;
1395 for(i
=0; i
+1<length
; i
+=2){
1396 if(src
[i
]) continue;
1397 if(i
>0 && src
[i
-1]==0) i
--;
1399 if(i
+2<length
&& src
[i
+1]==0 && src
[i
+2]<=3){
1401 /* startcode, so we must be past the end */
1409 if(i
>=length
-1){ //no escaped 0
1410 *dst_length
= length
;
1411 *consumed
= length
+1; //+1 for the header
1415 bufidx
= h
->nal_unit_type
== NAL_DPC
? 1 : 0; // use second escape buffer for inter data
1416 av_fast_malloc(&h
->rbsp_buffer
[bufidx
], &h
->rbsp_buffer_size
[bufidx
], length
+FF_INPUT_BUFFER_PADDING_SIZE
);
1417 dst
= h
->rbsp_buffer
[bufidx
];
1423 //printf("decoding esc\n");
1424 memcpy(dst
, src
, i
);
1427 //remove escapes (very rare 1:2^22)
1429 dst
[di
++]= src
[si
++];
1430 dst
[di
++]= src
[si
++];
1431 }else if(src
[si
]==0 && src
[si
+1]==0){
1432 if(src
[si
+2]==3){ //escape
1437 }else //next start code
1441 dst
[di
++]= src
[si
++];
1444 dst
[di
++]= src
[si
++];
1447 memset(dst
+di
, 0, FF_INPUT_BUFFER_PADDING_SIZE
);
1450 *consumed
= si
+ 1;//+1 for the header
1451 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1455 int ff_h264_decode_rbsp_trailing(H264Context
*h
, const uint8_t *src
){
1459 tprintf(h
->s
.avctx
, "rbsp trailing %X\n", v
);
1469 * IDCT transforms the 16 dc values and dequantizes them.
1470 * @param qp quantization parameter
1472 static void h264_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1475 int temp
[16]; //FIXME check if this is a good idea
1476 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1477 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1479 //memset(block, 64, 2*256);
1482 const int offset
= y_offset
[i
];
1483 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1484 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1485 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1486 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1495 const int offset
= x_offset
[i
];
1496 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1497 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1498 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1499 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1501 block
[stride
*0 +offset
]= ((((z0
+ z3
)*qmul
+ 128 ) >> 8)); //FIXME think about merging this into decode_residual
1502 block
[stride
*2 +offset
]= ((((z1
+ z2
)*qmul
+ 128 ) >> 8));
1503 block
[stride
*8 +offset
]= ((((z1
- z2
)*qmul
+ 128 ) >> 8));
1504 block
[stride
*10+offset
]= ((((z0
- z3
)*qmul
+ 128 ) >> 8));
1510 * DCT transforms the 16 dc values.
1511 * @param qp quantization parameter ??? FIXME
1513 static void h264_luma_dc_dct_c(DCTELEM
*block
/*, int qp*/){
1514 // const int qmul= dequant_coeff[qp][0];
1516 int temp
[16]; //FIXME check if this is a good idea
1517 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1518 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1521 const int offset
= y_offset
[i
];
1522 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1523 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1524 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1525 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1534 const int offset
= x_offset
[i
];
1535 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1536 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1537 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1538 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1540 block
[stride
*0 +offset
]= (z0
+ z3
)>>1;
1541 block
[stride
*2 +offset
]= (z1
+ z2
)>>1;
1542 block
[stride
*8 +offset
]= (z1
- z2
)>>1;
1543 block
[stride
*10+offset
]= (z0
- z3
)>>1;
1551 static void chroma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1552 const int stride
= 16*2;
1553 const int xStride
= 16;
1556 a
= block
[stride
*0 + xStride
*0];
1557 b
= block
[stride
*0 + xStride
*1];
1558 c
= block
[stride
*1 + xStride
*0];
1559 d
= block
[stride
*1 + xStride
*1];
1566 block
[stride
*0 + xStride
*0]= ((a
+c
)*qmul
) >> 7;
1567 block
[stride
*0 + xStride
*1]= ((e
+b
)*qmul
) >> 7;
1568 block
[stride
*1 + xStride
*0]= ((a
-c
)*qmul
) >> 7;
1569 block
[stride
*1 + xStride
*1]= ((e
-b
)*qmul
) >> 7;
1573 static void chroma_dc_dct_c(DCTELEM
*block
){
1574 const int stride
= 16*2;
1575 const int xStride
= 16;
1578 a
= block
[stride
*0 + xStride
*0];
1579 b
= block
[stride
*0 + xStride
*1];
1580 c
= block
[stride
*1 + xStride
*0];
1581 d
= block
[stride
*1 + xStride
*1];
1588 block
[stride
*0 + xStride
*0]= (a
+c
);
1589 block
[stride
*0 + xStride
*1]= (e
+b
);
1590 block
[stride
*1 + xStride
*0]= (a
-c
);
1591 block
[stride
*1 + xStride
*1]= (e
-b
);
1596 * gets the chroma qp.
1598 static inline int get_chroma_qp(H264Context
*h
, int t
, int qscale
){
1599 return h
->pps
.chroma_qp_table
[t
][qscale
];
1602 static inline void mc_dir_part(H264Context
*h
, Picture
*pic
, int n
, int square
, int chroma_height
, int delta
, int list
,
1603 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1604 int src_x_offset
, int src_y_offset
,
1605 qpel_mc_func
*qpix_op
, h264_chroma_mc_func chroma_op
){
1606 MpegEncContext
* const s
= &h
->s
;
1607 const int mx
= h
->mv_cache
[list
][ scan8
[n
] ][0] + src_x_offset
*8;
1608 int my
= h
->mv_cache
[list
][ scan8
[n
] ][1] + src_y_offset
*8;
1609 const int luma_xy
= (mx
&3) + ((my
&3)<<2);
1610 uint8_t * src_y
= pic
->data
[0] + (mx
>>2) + (my
>>2)*h
->mb_linesize
;
1611 uint8_t * src_cb
, * src_cr
;
1612 int extra_width
= h
->emu_edge_width
;
1613 int extra_height
= h
->emu_edge_height
;
1615 const int full_mx
= mx
>>2;
1616 const int full_my
= my
>>2;
1617 const int pic_width
= 16*s
->mb_width
;
1618 const int pic_height
= 16*s
->mb_height
>> MB_FIELD
;
1620 if(mx
&7) extra_width
-= 3;
1621 if(my
&7) extra_height
-= 3;
1623 if( full_mx
< 0-extra_width
1624 || full_my
< 0-extra_height
1625 || full_mx
+ 16/*FIXME*/ > pic_width
+ extra_width
1626 || full_my
+ 16/*FIXME*/ > pic_height
+ extra_height
){
1627 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_y
- 2 - 2*h
->mb_linesize
, h
->mb_linesize
, 16+5, 16+5/*FIXME*/, full_mx
-2, full_my
-2, pic_width
, pic_height
);
1628 src_y
= s
->edge_emu_buffer
+ 2 + 2*h
->mb_linesize
;
1632 qpix_op
[luma_xy
](dest_y
, src_y
, h
->mb_linesize
); //FIXME try variable height perhaps?
1634 qpix_op
[luma_xy
](dest_y
+ delta
, src_y
+ delta
, h
->mb_linesize
);
1637 if(CONFIG_GRAY
&& s
->flags
&CODEC_FLAG_GRAY
) return;
1640 // chroma offset when predicting from a field of opposite parity
1641 my
+= 2 * ((s
->mb_y
& 1) - (pic
->reference
- 1));
1642 emu
|= (my
>>3) < 0 || (my
>>3) + 8 >= (pic_height
>>1);
1644 src_cb
= pic
->data
[1] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
1645 src_cr
= pic
->data
[2] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
1648 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cb
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
1649 src_cb
= s
->edge_emu_buffer
;
1651 chroma_op(dest_cb
, src_cb
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
1654 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cr
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
1655 src_cr
= s
->edge_emu_buffer
;
1657 chroma_op(dest_cr
, src_cr
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
1660 static inline void mc_part_std(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1661 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1662 int x_offset
, int y_offset
,
1663 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1664 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
1665 int list0
, int list1
){
1666 MpegEncContext
* const s
= &h
->s
;
1667 qpel_mc_func
*qpix_op
= qpix_put
;
1668 h264_chroma_mc_func chroma_op
= chroma_put
;
1670 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
1671 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1672 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1673 x_offset
+= 8*s
->mb_x
;
1674 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
1677 Picture
*ref
= &h
->ref_list
[0][ h
->ref_cache
[0][ scan8
[n
] ] ];
1678 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 0,
1679 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1680 qpix_op
, chroma_op
);
1683 chroma_op
= chroma_avg
;
1687 Picture
*ref
= &h
->ref_list
[1][ h
->ref_cache
[1][ scan8
[n
] ] ];
1688 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 1,
1689 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1690 qpix_op
, chroma_op
);
1694 static inline void mc_part_weighted(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1695 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1696 int x_offset
, int y_offset
,
1697 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1698 h264_weight_func luma_weight_op
, h264_weight_func chroma_weight_op
,
1699 h264_biweight_func luma_weight_avg
, h264_biweight_func chroma_weight_avg
,
1700 int list0
, int list1
){
1701 MpegEncContext
* const s
= &h
->s
;
1703 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
1704 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1705 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1706 x_offset
+= 8*s
->mb_x
;
1707 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
1710 /* don't optimize for luma-only case, since B-frames usually
1711 * use implicit weights => chroma too. */
1712 uint8_t *tmp_cb
= s
->obmc_scratchpad
;
1713 uint8_t *tmp_cr
= s
->obmc_scratchpad
+ 8;
1714 uint8_t *tmp_y
= s
->obmc_scratchpad
+ 8*h
->mb_uvlinesize
;
1715 int refn0
= h
->ref_cache
[0][ scan8
[n
] ];
1716 int refn1
= h
->ref_cache
[1][ scan8
[n
] ];
1718 mc_dir_part(h
, &h
->ref_list
[0][refn0
], n
, square
, chroma_height
, delta
, 0,
1719 dest_y
, dest_cb
, dest_cr
,
1720 x_offset
, y_offset
, qpix_put
, chroma_put
);
1721 mc_dir_part(h
, &h
->ref_list
[1][refn1
], n
, square
, chroma_height
, delta
, 1,
1722 tmp_y
, tmp_cb
, tmp_cr
,
1723 x_offset
, y_offset
, qpix_put
, chroma_put
);
1725 if(h
->use_weight
== 2){
1726 int weight0
= h
->implicit_weight
[refn0
][refn1
];
1727 int weight1
= 64 - weight0
;
1728 luma_weight_avg( dest_y
, tmp_y
, h
-> mb_linesize
, 5, weight0
, weight1
, 0);
1729 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
1730 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
1732 luma_weight_avg(dest_y
, tmp_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
1733 h
->luma_weight
[0][refn0
], h
->luma_weight
[1][refn1
],
1734 h
->luma_offset
[0][refn0
] + h
->luma_offset
[1][refn1
]);
1735 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1736 h
->chroma_weight
[0][refn0
][0], h
->chroma_weight
[1][refn1
][0],
1737 h
->chroma_offset
[0][refn0
][0] + h
->chroma_offset
[1][refn1
][0]);
1738 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1739 h
->chroma_weight
[0][refn0
][1], h
->chroma_weight
[1][refn1
][1],
1740 h
->chroma_offset
[0][refn0
][1] + h
->chroma_offset
[1][refn1
][1]);
1743 int list
= list1
? 1 : 0;
1744 int refn
= h
->ref_cache
[list
][ scan8
[n
] ];
1745 Picture
*ref
= &h
->ref_list
[list
][refn
];
1746 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, list
,
1747 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1748 qpix_put
, chroma_put
);
1750 luma_weight_op(dest_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
1751 h
->luma_weight
[list
][refn
], h
->luma_offset
[list
][refn
]);
1752 if(h
->use_weight_chroma
){
1753 chroma_weight_op(dest_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1754 h
->chroma_weight
[list
][refn
][0], h
->chroma_offset
[list
][refn
][0]);
1755 chroma_weight_op(dest_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1756 h
->chroma_weight
[list
][refn
][1], h
->chroma_offset
[list
][refn
][1]);
1761 static inline void mc_part(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1762 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1763 int x_offset
, int y_offset
,
1764 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1765 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
1766 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
,
1767 int list0
, int list1
){
1768 if((h
->use_weight
==2 && list0
&& list1
1769 && (h
->implicit_weight
[ h
->ref_cache
[0][scan8
[n
]] ][ h
->ref_cache
[1][scan8
[n
]] ] != 32))
1770 || h
->use_weight
==1)
1771 mc_part_weighted(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
1772 x_offset
, y_offset
, qpix_put
, chroma_put
,
1773 weight_op
[0], weight_op
[3], weight_avg
[0], weight_avg
[3], list0
, list1
);
1775 mc_part_std(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
1776 x_offset
, y_offset
, qpix_put
, chroma_put
, qpix_avg
, chroma_avg
, list0
, list1
);
1779 static inline void prefetch_motion(H264Context
*h
, int list
){
1780 /* fetch pixels for estimated mv 4 macroblocks ahead
1781 * optimized for 64byte cache lines */
1782 MpegEncContext
* const s
= &h
->s
;
1783 const int refn
= h
->ref_cache
[list
][scan8
[0]];
1785 const int mx
= (h
->mv_cache
[list
][scan8
[0]][0]>>2) + 16*s
->mb_x
+ 8;
1786 const int my
= (h
->mv_cache
[list
][scan8
[0]][1]>>2) + 16*s
->mb_y
;
1787 uint8_t **src
= h
->ref_list
[list
][refn
].data
;
1788 int off
= mx
+ (my
+ (s
->mb_x
&3)*4)*h
->mb_linesize
+ 64;
1789 s
->dsp
.prefetch(src
[0]+off
, s
->linesize
, 4);
1790 off
= (mx
>>1) + ((my
>>1) + (s
->mb_x
&7))*s
->uvlinesize
+ 64;
1791 s
->dsp
.prefetch(src
[1]+off
, src
[2]-src
[1], 2);
1795 static void hl_motion(H264Context
*h
, uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1796 qpel_mc_func (*qpix_put
)[16], h264_chroma_mc_func (*chroma_put
),
1797 qpel_mc_func (*qpix_avg
)[16], h264_chroma_mc_func (*chroma_avg
),
1798 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
){
1799 MpegEncContext
* const s
= &h
->s
;
1800 const int mb_xy
= h
->mb_xy
;
1801 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
1803 assert(IS_INTER(mb_type
));
1805 prefetch_motion(h
, 0);
1807 if(IS_16X16(mb_type
)){
1808 mc_part(h
, 0, 1, 8, 0, dest_y
, dest_cb
, dest_cr
, 0, 0,
1809 qpix_put
[0], chroma_put
[0], qpix_avg
[0], chroma_avg
[0],
1810 &weight_op
[0], &weight_avg
[0],
1811 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1812 }else if(IS_16X8(mb_type
)){
1813 mc_part(h
, 0, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 0,
1814 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
1815 &weight_op
[1], &weight_avg
[1],
1816 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1817 mc_part(h
, 8, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 4,
1818 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
1819 &weight_op
[1], &weight_avg
[1],
1820 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
1821 }else if(IS_8X16(mb_type
)){
1822 mc_part(h
, 0, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 0, 0,
1823 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1824 &weight_op
[2], &weight_avg
[2],
1825 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1826 mc_part(h
, 4, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 4, 0,
1827 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1828 &weight_op
[2], &weight_avg
[2],
1829 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
1833 assert(IS_8X8(mb_type
));
1836 const int sub_mb_type
= h
->sub_mb_type
[i
];
1838 int x_offset
= (i
&1)<<2;
1839 int y_offset
= (i
&2)<<1;
1841 if(IS_SUB_8X8(sub_mb_type
)){
1842 mc_part(h
, n
, 1, 4, 0, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1843 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1844 &weight_op
[3], &weight_avg
[3],
1845 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1846 }else if(IS_SUB_8X4(sub_mb_type
)){
1847 mc_part(h
, n
, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1848 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
1849 &weight_op
[4], &weight_avg
[4],
1850 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1851 mc_part(h
, n
+2, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
+2,
1852 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
1853 &weight_op
[4], &weight_avg
[4],
1854 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1855 }else if(IS_SUB_4X8(sub_mb_type
)){
1856 mc_part(h
, n
, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1857 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1858 &weight_op
[5], &weight_avg
[5],
1859 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1860 mc_part(h
, n
+1, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
+2, y_offset
,
1861 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1862 &weight_op
[5], &weight_avg
[5],
1863 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1866 assert(IS_SUB_4X4(sub_mb_type
));
1868 int sub_x_offset
= x_offset
+ 2*(j
&1);
1869 int sub_y_offset
= y_offset
+ (j
&2);
1870 mc_part(h
, n
+j
, 1, 2, 0, dest_y
, dest_cb
, dest_cr
, sub_x_offset
, sub_y_offset
,
1871 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1872 &weight_op
[6], &weight_avg
[6],
1873 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1879 prefetch_motion(h
, 1);
1882 static av_cold
void init_cavlc_level_tab(void){
1883 int suffix_length
, mask
;
1886 for(suffix_length
=0; suffix_length
<7; suffix_length
++){
1887 for(i
=0; i
<(1<<LEVEL_TAB_BITS
); i
++){
1888 int prefix
= LEVEL_TAB_BITS
- av_log2(2*i
);
1889 int level_code
= (prefix
<<suffix_length
) + (i
>>(LEVEL_TAB_BITS
-prefix
-1-suffix_length
)) - (1<<suffix_length
);
1891 mask
= -(level_code
&1);
1892 level_code
= (((2+level_code
)>>1) ^ mask
) - mask
;
1893 if(prefix
+ 1 + suffix_length
<= LEVEL_TAB_BITS
){
1894 cavlc_level_tab
[suffix_length
][i
][0]= level_code
;
1895 cavlc_level_tab
[suffix_length
][i
][1]= prefix
+ 1 + suffix_length
;
1896 }else if(prefix
+ 1 <= LEVEL_TAB_BITS
){
1897 cavlc_level_tab
[suffix_length
][i
][0]= prefix
+100;
1898 cavlc_level_tab
[suffix_length
][i
][1]= prefix
+ 1;
1900 cavlc_level_tab
[suffix_length
][i
][0]= LEVEL_TAB_BITS
+100;
1901 cavlc_level_tab
[suffix_length
][i
][1]= LEVEL_TAB_BITS
;
1907 static av_cold
void decode_init_vlc(void){
1908 static int done
= 0;
1915 chroma_dc_coeff_token_vlc
.table
= chroma_dc_coeff_token_vlc_table
;
1916 chroma_dc_coeff_token_vlc
.table_allocated
= chroma_dc_coeff_token_vlc_table_size
;
1917 init_vlc(&chroma_dc_coeff_token_vlc
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 4*5,
1918 &chroma_dc_coeff_token_len
[0], 1, 1,
1919 &chroma_dc_coeff_token_bits
[0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC
);
1924 coeff_token_vlc
[i
].table
= coeff_token_vlc_tables
+offset
;
1925 coeff_token_vlc
[i
].table_allocated
= coeff_token_vlc_tables_size
[i
];
1926 init_vlc(&coeff_token_vlc
[i
], COEFF_TOKEN_VLC_BITS
, 4*17,
1927 &coeff_token_len
[i
][0], 1, 1,
1928 &coeff_token_bits
[i
][0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC
);
1930 offset
+= coeff_token_vlc_tables_size
[i
];
1933 * This is a one time safety check to make sure that
1934 * the packed static coeff_token_vlc table sizes
1935 * were initialized correctly.
1937 assert(offset
== FF_ARRAY_ELEMS(coeff_token_vlc_tables
));
1940 chroma_dc_total_zeros_vlc
[i
].table
= chroma_dc_total_zeros_vlc_tables
[i
];
1941 chroma_dc_total_zeros_vlc
[i
].table_allocated
= chroma_dc_total_zeros_vlc_tables_size
;
1942 init_vlc(&chroma_dc_total_zeros_vlc
[i
],
1943 CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 4,
1944 &chroma_dc_total_zeros_len
[i
][0], 1, 1,
1945 &chroma_dc_total_zeros_bits
[i
][0], 1, 1,
1946 INIT_VLC_USE_NEW_STATIC
);
1948 for(i
=0; i
<15; i
++){
1949 total_zeros_vlc
[i
].table
= total_zeros_vlc_tables
[i
];
1950 total_zeros_vlc
[i
].table_allocated
= total_zeros_vlc_tables_size
;
1951 init_vlc(&total_zeros_vlc
[i
],
1952 TOTAL_ZEROS_VLC_BITS
, 16,
1953 &total_zeros_len
[i
][0], 1, 1,
1954 &total_zeros_bits
[i
][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC
);
1959 run_vlc
[i
].table
= run_vlc_tables
[i
];
1960 run_vlc
[i
].table_allocated
= run_vlc_tables_size
;
1961 init_vlc(&run_vlc
[i
],
1963 &run_len
[i
][0], 1, 1,
1964 &run_bits
[i
][0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC
);
1967 run7_vlc
.table
= run7_vlc_table
,
1968 run7_vlc
.table_allocated
= run7_vlc_table_size
;
1969 init_vlc(&run7_vlc
, RUN7_VLC_BITS
, 16,
1970 &run_len
[6][0], 1, 1,
1971 &run_bits
[6][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC
);
1974 init_cavlc_level_tab();
1978 static void free_tables(H264Context
*h
){
1981 av_freep(&h
->intra4x4_pred_mode
);
1982 av_freep(&h
->chroma_pred_mode_table
);
1983 av_freep(&h
->cbp_table
);
1984 av_freep(&h
->mvd_table
[0]);
1985 av_freep(&h
->mvd_table
[1]);
1986 av_freep(&h
->direct_table
);
1987 av_freep(&h
->non_zero_count
);
1988 av_freep(&h
->slice_table_base
);
1989 h
->slice_table
= NULL
;
1991 av_freep(&h
->mb2b_xy
);
1992 av_freep(&h
->mb2b8_xy
);
1994 for(i
= 0; i
< MAX_THREADS
; i
++) {
1995 hx
= h
->thread_context
[i
];
1997 av_freep(&hx
->top_borders
[1]);
1998 av_freep(&hx
->top_borders
[0]);
1999 av_freep(&hx
->s
.obmc_scratchpad
);
2003 static void init_dequant8_coeff_table(H264Context
*h
){
2005 const int transpose
= (h
->s
.dsp
.h264_idct8_add
!= ff_h264_idct8_add_c
); //FIXME ugly
2006 h
->dequant8_coeff
[0] = h
->dequant8_buffer
[0];
2007 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[1];
2009 for(i
=0; i
<2; i
++ ){
2010 if(i
&& !memcmp(h
->pps
.scaling_matrix8
[0], h
->pps
.scaling_matrix8
[1], 64*sizeof(uint8_t))){
2011 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[0];
2015 for(q
=0; q
<52; q
++){
2016 int shift
= div6
[q
];
2019 h
->dequant8_coeff
[i
][q
][transpose
? (x
>>3)|((x
&7)<<3) : x
] =
2020 ((uint32_t)dequant8_coeff_init
[idx
][ dequant8_coeff_init_scan
[((x
>>1)&12) | (x
&3)] ] *
2021 h
->pps
.scaling_matrix8
[i
][x
]) << shift
;
2026 static void init_dequant4_coeff_table(H264Context
*h
){
2028 const int transpose
= (h
->s
.dsp
.h264_idct_add
!= ff_h264_idct_add_c
); //FIXME ugly
2029 for(i
=0; i
<6; i
++ ){
2030 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[i
];
2032 if(!memcmp(h
->pps
.scaling_matrix4
[j
], h
->pps
.scaling_matrix4
[i
], 16*sizeof(uint8_t))){
2033 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[j
];
2040 for(q
=0; q
<52; q
++){
2041 int shift
= div6
[q
] + 2;
2044 h
->dequant4_coeff
[i
][q
][transpose
? (x
>>2)|((x
<<2)&0xF) : x
] =
2045 ((uint32_t)dequant4_coeff_init
[idx
][(x
&1) + ((x
>>2)&1)] *
2046 h
->pps
.scaling_matrix4
[i
][x
]) << shift
;
2051 static void init_dequant_tables(H264Context
*h
){
2053 init_dequant4_coeff_table(h
);
2054 if(h
->pps
.transform_8x8_mode
)
2055 init_dequant8_coeff_table(h
);
2056 if(h
->sps
.transform_bypass
){
2059 h
->dequant4_coeff
[i
][0][x
] = 1<<6;
2060 if(h
->pps
.transform_8x8_mode
)
2063 h
->dequant8_coeff
[i
][0][x
] = 1<<6;
2070 * needs width/height
2072 static int alloc_tables(H264Context
*h
){
2073 MpegEncContext
* const s
= &h
->s
;
2074 const int big_mb_num
= s
->mb_stride
* (s
->mb_height
+1);
2077 CHECKED_ALLOCZ(h
->intra4x4_pred_mode
, big_mb_num
* 8 * sizeof(uint8_t))
2079 CHECKED_ALLOCZ(h
->non_zero_count
, big_mb_num
* 16 * sizeof(uint8_t))
2080 CHECKED_ALLOCZ(h
->slice_table_base
, (big_mb_num
+s
->mb_stride
) * sizeof(*h
->slice_table_base
))
2081 CHECKED_ALLOCZ(h
->cbp_table
, big_mb_num
* sizeof(uint16_t))
2083 CHECKED_ALLOCZ(h
->chroma_pred_mode_table
, big_mb_num
* sizeof(uint8_t))
2084 CHECKED_ALLOCZ(h
->mvd_table
[0], 32*big_mb_num
* sizeof(uint16_t));
2085 CHECKED_ALLOCZ(h
->mvd_table
[1], 32*big_mb_num
* sizeof(uint16_t));
2086 CHECKED_ALLOCZ(h
->direct_table
, 32*big_mb_num
* sizeof(uint8_t));
2088 memset(h
->slice_table_base
, -1, (big_mb_num
+s
->mb_stride
) * sizeof(*h
->slice_table_base
));
2089 h
->slice_table
= h
->slice_table_base
+ s
->mb_stride
*2 + 1;
2091 CHECKED_ALLOCZ(h
->mb2b_xy
, big_mb_num
* sizeof(uint32_t));
2092 CHECKED_ALLOCZ(h
->mb2b8_xy
, big_mb_num
* sizeof(uint32_t));
2093 for(y
=0; y
<s
->mb_height
; y
++){
2094 for(x
=0; x
<s
->mb_width
; x
++){
2095 const int mb_xy
= x
+ y
*s
->mb_stride
;
2096 const int b_xy
= 4*x
+ 4*y
*h
->b_stride
;
2097 const int b8_xy
= 2*x
+ 2*y
*h
->b8_stride
;
2099 h
->mb2b_xy
[mb_xy
]= b_xy
;
2100 h
->mb2b8_xy
[mb_xy
]= b8_xy
;
2104 s
->obmc_scratchpad
= NULL
;
2106 if(!h
->dequant4_coeff
[0])
2107 init_dequant_tables(h
);
2116 * Mimic alloc_tables(), but for every context thread.
2118 static void clone_tables(H264Context
*dst
, H264Context
*src
){
2119 dst
->intra4x4_pred_mode
= src
->intra4x4_pred_mode
;
2120 dst
->non_zero_count
= src
->non_zero_count
;
2121 dst
->slice_table
= src
->slice_table
;
2122 dst
->cbp_table
= src
->cbp_table
;
2123 dst
->mb2b_xy
= src
->mb2b_xy
;
2124 dst
->mb2b8_xy
= src
->mb2b8_xy
;
2125 dst
->chroma_pred_mode_table
= src
->chroma_pred_mode_table
;
2126 dst
->mvd_table
[0] = src
->mvd_table
[0];
2127 dst
->mvd_table
[1] = src
->mvd_table
[1];
2128 dst
->direct_table
= src
->direct_table
;
2130 dst
->s
.obmc_scratchpad
= NULL
;
2131 ff_h264_pred_init(&dst
->hpc
, src
->s
.codec_id
);
2136 * Allocate buffers which are not shared amongst multiple threads.
2138 static int context_init(H264Context
*h
){
2139 CHECKED_ALLOCZ(h
->top_borders
[0], h
->s
.mb_width
* (16+8+8) * sizeof(uint8_t))
2140 CHECKED_ALLOCZ(h
->top_borders
[1], h
->s
.mb_width
* (16+8+8) * sizeof(uint8_t))
2144 return -1; // free_tables will clean up for us
2147 static av_cold
void common_init(H264Context
*h
){
2148 MpegEncContext
* const s
= &h
->s
;
2150 s
->width
= s
->avctx
->width
;
2151 s
->height
= s
->avctx
->height
;
2152 s
->codec_id
= s
->avctx
->codec
->id
;
2154 ff_h264_pred_init(&h
->hpc
, s
->codec_id
);
2156 h
->dequant_coeff_pps
= -1;
2157 s
->unrestricted_mv
=1;
2158 s
->decode
=1; //FIXME
2160 dsputil_init(&s
->dsp
, s
->avctx
); // needed so that idct permutation is known early
2162 memset(h
->pps
.scaling_matrix4
, 16, 6*16*sizeof(uint8_t));
2163 memset(h
->pps
.scaling_matrix8
, 16, 2*64*sizeof(uint8_t));
2167 * Reset SEI values at the beginning of the frame.
2169 * @param h H.264 context.
2171 static void reset_sei(H264Context
*h
) {
2172 h
->sei_recovery_frame_cnt
= -1;
2173 h
->sei_dpb_output_delay
= 0;
2174 h
->sei_cpb_removal_delay
= -1;
2175 h
->sei_buffering_period_present
= 0;
2178 static av_cold
int decode_init(AVCodecContext
*avctx
){
2179 H264Context
*h
= avctx
->priv_data
;
2180 MpegEncContext
* const s
= &h
->s
;
2182 MPV_decode_defaults(s
);
2187 s
->out_format
= FMT_H264
;
2188 s
->workaround_bugs
= avctx
->workaround_bugs
;
2191 // s->decode_mb= ff_h263_decode_mb;
2192 s
->quarter_sample
= 1;
2193 if(!avctx
->has_b_frames
)
2196 if(s
->avctx
->codec
->capabilities
&CODEC_CAP_HWACCEL_VDPAU
)
2197 avctx
->pix_fmt
= PIX_FMT_VDPAU_H264
;
2199 avctx
->pix_fmt
= avctx
->get_format(avctx
, avctx
->codec
->pix_fmts
);
2200 avctx
->hwaccel
= ff_find_hwaccel(avctx
->codec
->id
, avctx
->pix_fmt
);
2204 if(avctx
->extradata_size
> 0 && avctx
->extradata
&&
2205 *(char *)avctx
->extradata
== 1){
2212 h
->thread_context
[0] = h
;
2213 h
->outputed_poc
= INT_MIN
;
2214 h
->prev_poc_msb
= 1<<16;
2216 if(avctx
->codec_id
== CODEC_ID_H264
){
2217 if(avctx
->ticks_per_frame
== 1){
2218 s
->avctx
->time_base
.den
*=2;
2220 avctx
->ticks_per_frame
= 2;
2225 static int frame_start(H264Context
*h
){
2226 MpegEncContext
* const s
= &h
->s
;
2229 if(MPV_frame_start(s
, s
->avctx
) < 0)
2231 ff_er_frame_start(s
);
2233 * MPV_frame_start uses pict_type to derive key_frame.
2234 * This is incorrect for H.264; IDR markings must be used.
2235 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2236 * See decode_nal_units().
2238 s
->current_picture_ptr
->key_frame
= 0;
2240 assert(s
->linesize
&& s
->uvlinesize
);
2242 for(i
=0; i
<16; i
++){
2243 h
->block_offset
[i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
2244 h
->block_offset
[24+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
2247 h
->block_offset
[16+i
]=
2248 h
->block_offset
[20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
2249 h
->block_offset
[24+16+i
]=
2250 h
->block_offset
[24+20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
2253 /* can't be in alloc_tables because linesize isn't known there.
2254 * FIXME: redo bipred weight to not require extra buffer? */
2255 for(i
= 0; i
< s
->avctx
->thread_count
; i
++)
2256 if(!h
->thread_context
[i
]->s
.obmc_scratchpad
)
2257 h
->thread_context
[i
]->s
.obmc_scratchpad
= av_malloc(16*2*s
->linesize
+ 8*2*s
->uvlinesize
);
2259 /* some macroblocks will be accessed before they're available */
2260 if(FRAME_MBAFF
|| s
->avctx
->thread_count
> 1)
2261 memset(h
->slice_table
, -1, (s
->mb_height
*s
->mb_stride
-1) * sizeof(*h
->slice_table
));
2263 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2265 // We mark the current picture as non-reference after allocating it, so
2266 // that if we break out due to an error it can be released automatically
2267 // in the next MPV_frame_start().
2268 // SVQ3 as well as most other codecs have only last/next/current and thus
2269 // get released even with set reference, besides SVQ3 and others do not
2270 // mark frames as reference later "naturally".
2271 if(s
->codec_id
!= CODEC_ID_SVQ3
)
2272 s
->current_picture_ptr
->reference
= 0;
2274 s
->current_picture_ptr
->field_poc
[0]=
2275 s
->current_picture_ptr
->field_poc
[1]= INT_MAX
;
2276 assert(s
->current_picture_ptr
->long_ref
==0);
2281 static inline void backup_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int simple
){
2282 MpegEncContext
* const s
= &h
->s
;
2291 src_cb
-= uvlinesize
;
2292 src_cr
-= uvlinesize
;
2294 if(!simple
&& FRAME_MBAFF
){
2296 offset
= MB_MBAFF
? 1 : 17;
2297 uvoffset
= MB_MBAFF
? 1 : 9;
2299 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+ 0)= *(uint64_t*)(src_y
+ 15*linesize
);
2300 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+ 8)= *(uint64_t*)(src_y
+8+15*linesize
);
2301 if(simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2302 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+7*uvlinesize
);
2303 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+7*uvlinesize
);
2308 h
->left_border
[0]= h
->top_borders
[0][s
->mb_x
][15];
2309 if(simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2310 h
->left_border
[34 ]= h
->top_borders
[0][s
->mb_x
][16+7 ];
2311 h
->left_border
[34+18]= h
->top_borders
[0][s
->mb_x
][16+8+7];
2317 top_idx
= MB_MBAFF
? 0 : 1;
2319 step
= MB_MBAFF
? 2 : 1;
2322 // There are two lines saved, the line above the the top macroblock of a pair,
2323 // and the line above the bottom macroblock
2324 h
->left_border
[offset
]= h
->top_borders
[top_idx
][s
->mb_x
][15];
2325 for(i
=1; i
<17 - skiplast
; i
++){
2326 h
->left_border
[offset
+i
*step
]= src_y
[15+i
* linesize
];
2329 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 16*linesize
);
2330 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+16*linesize
);
2332 if(simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2333 h
->left_border
[uvoffset
+34 ]= h
->top_borders
[top_idx
][s
->mb_x
][16+7];
2334 h
->left_border
[uvoffset
+34+18]= h
->top_borders
[top_idx
][s
->mb_x
][24+7];
2335 for(i
=1; i
<9 - skiplast
; i
++){
2336 h
->left_border
[uvoffset
+34 +i
*step
]= src_cb
[7+i
*uvlinesize
];
2337 h
->left_border
[uvoffset
+34+18+i
*step
]= src_cr
[7+i
*uvlinesize
];
2339 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+8*uvlinesize
);
2340 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+8*uvlinesize
);
2344 static inline void xchg_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int xchg
, int simple
){
2345 MpegEncContext
* const s
= &h
->s
;
2356 if(!simple
&& FRAME_MBAFF
){
2358 offset
= MB_MBAFF
? 1 : 17;
2359 uvoffset
= MB_MBAFF
? 1 : 9;
2363 top_idx
= MB_MBAFF
? 0 : 1;
2365 step
= MB_MBAFF
? 2 : 1;
2368 if(h
->deblocking_filter
== 2) {
2370 deblock_left
= h
->slice_table
[mb_xy
] == h
->slice_table
[mb_xy
- 1];
2371 deblock_top
= h
->slice_table
[mb_xy
] == h
->slice_table
[h
->top_mb_xy
];
2373 deblock_left
= (s
->mb_x
> 0);
2374 deblock_top
= (s
->mb_y
> !!MB_FIELD
);
2377 src_y
-= linesize
+ 1;
2378 src_cb
-= uvlinesize
+ 1;
2379 src_cr
-= uvlinesize
+ 1;
2381 #define XCHG(a,b,t,xchg)\
2388 for(i
= !deblock_top
; i
<16; i
++){
2389 XCHG(h
->left_border
[offset
+i
*step
], src_y
[i
* linesize
], temp8
, xchg
);
2391 XCHG(h
->left_border
[offset
+i
*step
], src_y
[i
* linesize
], temp8
, 1);
2395 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+0), *(uint64_t*)(src_y
+1), temp64
, xchg
);
2396 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+8), *(uint64_t*)(src_y
+9), temp64
, 1);
2397 if(s
->mb_x
+1 < s
->mb_width
){
2398 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
+1]), *(uint64_t*)(src_y
+17), temp64
, 1);
2402 if(simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2404 for(i
= !deblock_top
; i
<8; i
++){
2405 XCHG(h
->left_border
[uvoffset
+34 +i
*step
], src_cb
[i
*uvlinesize
], temp8
, xchg
);
2406 XCHG(h
->left_border
[uvoffset
+34+18+i
*step
], src_cr
[i
*uvlinesize
], temp8
, xchg
);
2408 XCHG(h
->left_border
[uvoffset
+34 +i
*step
], src_cb
[i
*uvlinesize
], temp8
, 1);
2409 XCHG(h
->left_border
[uvoffset
+34+18+i
*step
], src_cr
[i
*uvlinesize
], temp8
, 1);
2412 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1), temp64
, 1);
2413 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1), temp64
, 1);
2418 static av_always_inline
void hl_decode_mb_internal(H264Context
*h
, int simple
){
2419 MpegEncContext
* const s
= &h
->s
;
2420 const int mb_x
= s
->mb_x
;
2421 const int mb_y
= s
->mb_y
;
2422 const int mb_xy
= h
->mb_xy
;
2423 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
2424 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
2425 int linesize
, uvlinesize
/*dct_offset*/;
2427 int *block_offset
= &h
->block_offset
[0];
2428 const int transform_bypass
= !simple
&& (s
->qscale
== 0 && h
->sps
.transform_bypass
);
2429 /* is_h264 should always be true if SVQ3 is disabled. */
2430 const int is_h264
= !CONFIG_SVQ3_DECODER
|| simple
|| s
->codec_id
== CODEC_ID_H264
;
2431 void (*idct_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
2432 void (*idct_dc_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
2434 dest_y
= s
->current_picture
.data
[0] + (mb_x
+ mb_y
* s
->linesize
) * 16;
2435 dest_cb
= s
->current_picture
.data
[1] + (mb_x
+ mb_y
* s
->uvlinesize
) * 8;
2436 dest_cr
= s
->current_picture
.data
[2] + (mb_x
+ mb_y
* s
->uvlinesize
) * 8;
2438 s
->dsp
.prefetch(dest_y
+ (s
->mb_x
&3)*4*s
->linesize
+ 64, s
->linesize
, 4);
2439 s
->dsp
.prefetch(dest_cb
+ (s
->mb_x
&7)*s
->uvlinesize
+ 64, dest_cr
- dest_cb
, 2);
2441 if (!simple
&& MB_FIELD
) {
2442 linesize
= h
->mb_linesize
= s
->linesize
* 2;
2443 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
* 2;
2444 block_offset
= &h
->block_offset
[24];
2445 if(mb_y
&1){ //FIXME move out of this function?
2446 dest_y
-= s
->linesize
*15;
2447 dest_cb
-= s
->uvlinesize
*7;
2448 dest_cr
-= s
->uvlinesize
*7;
2452 for(list
=0; list
<h
->list_count
; list
++){
2453 if(!USES_LIST(mb_type
, list
))
2455 if(IS_16X16(mb_type
)){
2456 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
2457 fill_rectangle(ref
, 4, 4, 8, (16+*ref
)^(s
->mb_y
&1), 1);
2459 for(i
=0; i
<16; i
+=4){
2460 int ref
= h
->ref_cache
[list
][scan8
[i
]];
2462 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2, 8, (16+ref
)^(s
->mb_y
&1), 1);
2468 linesize
= h
->mb_linesize
= s
->linesize
;
2469 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
;
2470 // dct_offset = s->linesize * 16;
2473 if (!simple
&& IS_INTRA_PCM(mb_type
)) {
2474 for (i
=0; i
<16; i
++) {
2475 memcpy(dest_y
+ i
* linesize
, h
->mb
+ i
*8, 16);
2477 for (i
=0; i
<8; i
++) {
2478 memcpy(dest_cb
+ i
*uvlinesize
, h
->mb
+ 128 + i
*4, 8);
2479 memcpy(dest_cr
+ i
*uvlinesize
, h
->mb
+ 160 + i
*4, 8);
2482 if(IS_INTRA(mb_type
)){
2483 if(h
->deblocking_filter
)
2484 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 1, simple
);
2486 if(simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2487 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cb
, uvlinesize
);
2488 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cr
, uvlinesize
);
2491 if(IS_INTRA4x4(mb_type
)){
2492 if(simple
|| !s
->encoding
){
2493 if(IS_8x8DCT(mb_type
)){
2494 if(transform_bypass
){
2496 idct_add
= s
->dsp
.add_pixels8
;
2498 idct_dc_add
= s
->dsp
.h264_idct8_dc_add
;
2499 idct_add
= s
->dsp
.h264_idct8_add
;
2501 for(i
=0; i
<16; i
+=4){
2502 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2503 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
2504 if(transform_bypass
&& h
->sps
.profile_idc
==244 && dir
<=1){
2505 h
->hpc
.pred8x8l_add
[dir
](ptr
, h
->mb
+ i
*16, linesize
);
2507 const int nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2508 h
->hpc
.pred8x8l
[ dir
](ptr
, (h
->topleft_samples_available
<<i
)&0x8000,
2509 (h
->topright_samples_available
<<i
)&0x4000, linesize
);
2511 if(nnz
== 1 && h
->mb
[i
*16])
2512 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
2514 idct_add (ptr
, h
->mb
+ i
*16, linesize
);
2519 if(transform_bypass
){
2521 idct_add
= s
->dsp
.add_pixels4
;
2523 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
2524 idct_add
= s
->dsp
.h264_idct_add
;
2526 for(i
=0; i
<16; i
++){
2527 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2528 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
2530 if(transform_bypass
&& h
->sps
.profile_idc
==244 && dir
<=1){
2531 h
->hpc
.pred4x4_add
[dir
](ptr
, h
->mb
+ i
*16, linesize
);
2535 if(dir
== DIAG_DOWN_LEFT_PRED
|| dir
== VERT_LEFT_PRED
){
2536 const int topright_avail
= (h
->topright_samples_available
<<i
)&0x8000;
2537 assert(mb_y
|| linesize
<= block_offset
[i
]);
2538 if(!topright_avail
){
2539 tr
= ptr
[3 - linesize
]*0x01010101;
2540 topright
= (uint8_t*) &tr
;
2542 topright
= ptr
+ 4 - linesize
;
2546 h
->hpc
.pred4x4
[ dir
](ptr
, topright
, linesize
);
2547 nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2550 if(nnz
== 1 && h
->mb
[i
*16])
2551 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
2553 idct_add (ptr
, h
->mb
+ i
*16, linesize
);
2555 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, 0);
2562 h
->hpc
.pred16x16
[ h
->intra16x16_pred_mode
](dest_y
, linesize
);
2564 if(!transform_bypass
)
2565 h264_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
, h
->dequant4_coeff
[0][s
->qscale
][0]);
2567 svq3_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
);
2569 if(h
->deblocking_filter
)
2570 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 0, simple
);
2572 hl_motion(h
, dest_y
, dest_cb
, dest_cr
,
2573 s
->me
.qpel_put
, s
->dsp
.put_h264_chroma_pixels_tab
,
2574 s
->me
.qpel_avg
, s
->dsp
.avg_h264_chroma_pixels_tab
,
2575 s
->dsp
.weight_h264_pixels_tab
, s
->dsp
.biweight_h264_pixels_tab
);
2579 if(!IS_INTRA4x4(mb_type
)){
2581 if(IS_INTRA16x16(mb_type
)){
2582 if(transform_bypass
){
2583 if(h
->sps
.profile_idc
==244 && (h
->intra16x16_pred_mode
==VERT_PRED8x8
|| h
->intra16x16_pred_mode
==HOR_PRED8x8
)){
2584 h
->hpc
.pred16x16_add
[h
->intra16x16_pred_mode
](dest_y
, block_offset
, h
->mb
, linesize
);
2586 for(i
=0; i
<16; i
++){
2587 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16])
2588 s
->dsp
.add_pixels4(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2592 s
->dsp
.h264_idct_add16intra(dest_y
, block_offset
, h
->mb
, linesize
, h
->non_zero_count_cache
);
2594 }else if(h
->cbp
&15){
2595 if(transform_bypass
){
2596 const int di
= IS_8x8DCT(mb_type
) ? 4 : 1;
2597 idct_add
= IS_8x8DCT(mb_type
) ? s
->dsp
.add_pixels8
: s
->dsp
.add_pixels4
;
2598 for(i
=0; i
<16; i
+=di
){
2599 if(h
->non_zero_count_cache
[ scan8
[i
] ]){
2600 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2604 if(IS_8x8DCT(mb_type
)){
2605 s
->dsp
.h264_idct8_add4(dest_y
, block_offset
, h
->mb
, linesize
, h
->non_zero_count_cache
);
2607 s
->dsp
.h264_idct_add16(dest_y
, block_offset
, h
->mb
, linesize
, h
->non_zero_count_cache
);
2612 for(i
=0; i
<16; i
++){
2613 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){ //FIXME benchmark weird rule, & below
2614 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2615 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, IS_INTRA(mb_type
) ? 1 : 0);
2621 if((simple
|| !CONFIG_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)) && (h
->cbp
&0x30)){
2622 uint8_t *dest
[2] = {dest_cb
, dest_cr
};
2623 if(transform_bypass
){
2624 if(IS_INTRA(mb_type
) && h
->sps
.profile_idc
==244 && (h
->chroma_pred_mode
==VERT_PRED8x8
|| h
->chroma_pred_mode
==HOR_PRED8x8
)){
2625 h
->hpc
.pred8x8_add
[h
->chroma_pred_mode
](dest
[0], block_offset
+ 16, h
->mb
+ 16*16, uvlinesize
);
2626 h
->hpc
.pred8x8_add
[h
->chroma_pred_mode
](dest
[1], block_offset
+ 20, h
->mb
+ 20*16, uvlinesize
);
2628 idct_add
= s
->dsp
.add_pixels4
;
2629 for(i
=16; i
<16+8; i
++){
2630 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16])
2631 idct_add (dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
2635 chroma_dc_dequant_idct_c(h
->mb
+ 16*16, h
->chroma_qp
[0], h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 1:4][h
->chroma_qp
[0]][0]);
2636 chroma_dc_dequant_idct_c(h
->mb
+ 16*16+4*16, h
->chroma_qp
[1], h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 2:5][h
->chroma_qp
[1]][0]);
2638 idct_add
= s
->dsp
.h264_idct_add
;
2639 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
2640 for(i
=16; i
<16+8; i
++){
2641 if(h
->non_zero_count_cache
[ scan8
[i
] ])
2642 idct_add (dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
2643 else if(h
->mb
[i
*16])
2644 idct_dc_add(dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
2647 for(i
=16; i
<16+8; i
++){
2648 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){
2649 uint8_t * const ptr
= dest
[(i
&4)>>2] + block_offset
[i
];
2650 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, uvlinesize
, chroma_qp
[s
->qscale
+ 12] - 12, 2);
2657 if(h
->cbp
|| IS_INTRA(mb_type
))
2658 s
->dsp
.clear_blocks(h
->mb
);
2660 if(h
->deblocking_filter
) {
2661 backup_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, simple
);
2662 fill_caches(h
, mb_type
, 1); //FIXME don't fill stuff which isn't used by filter_mb
2663 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->current_picture
.qscale_table
[mb_xy
]);
2664 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->current_picture
.qscale_table
[mb_xy
]);
2665 if (!simple
&& FRAME_MBAFF
) {
2666 filter_mb (h
, mb_x
, mb_y
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
2668 filter_mb_fast(h
, mb_x
, mb_y
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
2674 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2676 static void hl_decode_mb_simple(H264Context
*h
){
2677 hl_decode_mb_internal(h
, 1);
2681 * Process a macroblock; this handles edge cases, such as interlacing.
2683 static void av_noinline
hl_decode_mb_complex(H264Context
*h
){
2684 hl_decode_mb_internal(h
, 0);
2687 static void hl_decode_mb(H264Context
*h
){
2688 MpegEncContext
* const s
= &h
->s
;
2689 const int mb_xy
= h
->mb_xy
;
2690 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
2691 int is_complex
= CONFIG_SMALL
|| h
->is_complex
|| IS_INTRA_PCM(mb_type
) || s
->qscale
== 0;
2694 hl_decode_mb_complex(h
);
2695 else hl_decode_mb_simple(h
);
2698 static void pic_as_field(Picture
*pic
, const int parity
){
2700 for (i
= 0; i
< 4; ++i
) {
2701 if (parity
== PICT_BOTTOM_FIELD
)
2702 pic
->data
[i
] += pic
->linesize
[i
];
2703 pic
->reference
= parity
;
2704 pic
->linesize
[i
] *= 2;
2706 pic
->poc
= pic
->field_poc
[parity
== PICT_BOTTOM_FIELD
];
2709 static int split_field_copy(Picture
*dest
, Picture
*src
,
2710 int parity
, int id_add
){
2711 int match
= !!(src
->reference
& parity
);
2715 if(parity
!= PICT_FRAME
){
2716 pic_as_field(dest
, parity
);
2718 dest
->pic_id
+= id_add
;
2725 static int build_def_list(Picture
*def
, Picture
**in
, int len
, int is_long
, int sel
){
2729 while(i
[0]<len
|| i
[1]<len
){
2730 while(i
[0]<len
&& !(in
[ i
[0] ] && (in
[ i
[0] ]->reference
& sel
)))
2732 while(i
[1]<len
&& !(in
[ i
[1] ] && (in
[ i
[1] ]->reference
& (sel
^3))))
2735 in
[ i
[0] ]->pic_id
= is_long
? i
[0] : in
[ i
[0] ]->frame_num
;
2736 split_field_copy(&def
[index
++], in
[ i
[0]++ ], sel
, 1);
2739 in
[ i
[1] ]->pic_id
= is_long
? i
[1] : in
[ i
[1] ]->frame_num
;
2740 split_field_copy(&def
[index
++], in
[ i
[1]++ ], sel
^3, 0);
2747 static int add_sorted(Picture
**sorted
, Picture
**src
, int len
, int limit
, int dir
){
2752 best_poc
= dir
? INT_MIN
: INT_MAX
;
2754 for(i
=0; i
<len
; i
++){
2755 const int poc
= src
[i
]->poc
;
2756 if(((poc
> limit
) ^ dir
) && ((poc
< best_poc
) ^ dir
)){
2758 sorted
[out_i
]= src
[i
];
2761 if(best_poc
== (dir
? INT_MIN
: INT_MAX
))
2763 limit
= sorted
[out_i
++]->poc
- dir
;
2769 * fills the default_ref_list.
2771 static int fill_default_ref_list(H264Context
*h
){
2772 MpegEncContext
* const s
= &h
->s
;
2775 if(h
->slice_type_nos
==FF_B_TYPE
){
2776 Picture
*sorted
[32];
2781 cur_poc
= s
->current_picture_ptr
->field_poc
[ s
->picture_structure
== PICT_BOTTOM_FIELD
];
2783 cur_poc
= s
->current_picture_ptr
->poc
;
2785 for(list
= 0; list
<2; list
++){
2786 len
= add_sorted(sorted
, h
->short_ref
, h
->short_ref_count
, cur_poc
, 1^list
);
2787 len
+=add_sorted(sorted
+len
, h
->short_ref
, h
->short_ref_count
, cur_poc
, 0^list
);
2789 len
= build_def_list(h
->default_ref_list
[list
] , sorted
, len
, 0, s
->picture_structure
);
2790 len
+=build_def_list(h
->default_ref_list
[list
]+len
, h
->long_ref
, 16 , 1, s
->picture_structure
);
2793 if(len
< h
->ref_count
[list
])
2794 memset(&h
->default_ref_list
[list
][len
], 0, sizeof(Picture
)*(h
->ref_count
[list
] - len
));
2798 if(lens
[0] == lens
[1] && lens
[1] > 1){
2799 for(i
=0; h
->default_ref_list
[0][i
].data
[0] == h
->default_ref_list
[1][i
].data
[0] && i
<lens
[0]; i
++);
2801 FFSWAP(Picture
, h
->default_ref_list
[1][0], h
->default_ref_list
[1][1]);
2804 len
= build_def_list(h
->default_ref_list
[0] , h
->short_ref
, h
->short_ref_count
, 0, s
->picture_structure
);
2805 len
+= build_def_list(h
->default_ref_list
[0]+len
, h
-> long_ref
, 16 , 1, s
->picture_structure
);
2807 if(len
< h
->ref_count
[0])
2808 memset(&h
->default_ref_list
[0][len
], 0, sizeof(Picture
)*(h
->ref_count
[0] - len
));
2811 for (i
=0; i
<h
->ref_count
[0]; i
++) {
2812 tprintf(h
->s
.avctx
, "List0: %s fn:%d 0x%p\n", (h
->default_ref_list
[0][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[0][i
].pic_id
, h
->default_ref_list
[0][i
].data
[0]);
2814 if(h
->slice_type_nos
==FF_B_TYPE
){
2815 for (i
=0; i
<h
->ref_count
[1]; i
++) {
2816 tprintf(h
->s
.avctx
, "List1: %s fn:%d 0x%p\n", (h
->default_ref_list
[1][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[1][i
].pic_id
, h
->default_ref_list
[1][i
].data
[0]);
2823 static void print_short_term(H264Context
*h
);
2824 static void print_long_term(H264Context
*h
);
2827 * Extract structure information about the picture described by pic_num in
2828 * the current decoding context (frame or field). Note that pic_num is
2829 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2830 * @param pic_num picture number for which to extract structure information
2831 * @param structure one of PICT_XXX describing structure of picture
2833 * @return frame number (short term) or long term index of picture
2834 * described by pic_num
2836 static int pic_num_extract(H264Context
*h
, int pic_num
, int *structure
){
2837 MpegEncContext
* const s
= &h
->s
;
2839 *structure
= s
->picture_structure
;
2842 /* opposite field */
2843 *structure
^= PICT_FRAME
;
2850 static int decode_ref_pic_list_reordering(H264Context
*h
){
2851 MpegEncContext
* const s
= &h
->s
;
2852 int list
, index
, pic_structure
;
2854 print_short_term(h
);
2857 for(list
=0; list
<h
->list_count
; list
++){
2858 memcpy(h
->ref_list
[list
], h
->default_ref_list
[list
], sizeof(Picture
)*h
->ref_count
[list
]);
2860 if(get_bits1(&s
->gb
)){
2861 int pred
= h
->curr_pic_num
;
2863 for(index
=0; ; index
++){
2864 unsigned int reordering_of_pic_nums_idc
= get_ue_golomb_31(&s
->gb
);
2865 unsigned int pic_id
;
2867 Picture
*ref
= NULL
;
2869 if(reordering_of_pic_nums_idc
==3)
2872 if(index
>= h
->ref_count
[list
]){
2873 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference count overflow\n");
2877 if(reordering_of_pic_nums_idc
<3){
2878 if(reordering_of_pic_nums_idc
<2){
2879 const unsigned int abs_diff_pic_num
= get_ue_golomb(&s
->gb
) + 1;
2882 if(abs_diff_pic_num
> h
->max_pic_num
){
2883 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "abs_diff_pic_num overflow\n");
2887 if(reordering_of_pic_nums_idc
== 0) pred
-= abs_diff_pic_num
;
2888 else pred
+= abs_diff_pic_num
;
2889 pred
&= h
->max_pic_num
- 1;
2891 frame_num
= pic_num_extract(h
, pred
, &pic_structure
);
2893 for(i
= h
->short_ref_count
-1; i
>=0; i
--){
2894 ref
= h
->short_ref
[i
];
2895 assert(ref
->reference
);
2896 assert(!ref
->long_ref
);
2898 ref
->frame_num
== frame_num
&&
2899 (ref
->reference
& pic_structure
)
2907 pic_id
= get_ue_golomb(&s
->gb
); //long_term_pic_idx
2909 long_idx
= pic_num_extract(h
, pic_id
, &pic_structure
);
2912 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "long_term_pic_idx overflow\n");
2915 ref
= h
->long_ref
[long_idx
];
2916 assert(!(ref
&& !ref
->reference
));
2917 if(ref
&& (ref
->reference
& pic_structure
)){
2918 ref
->pic_id
= pic_id
;
2919 assert(ref
->long_ref
);
2927 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference picture missing during reorder\n");
2928 memset(&h
->ref_list
[list
][index
], 0, sizeof(Picture
)); //FIXME
2930 for(i
=index
; i
+1<h
->ref_count
[list
]; i
++){
2931 if(ref
->long_ref
== h
->ref_list
[list
][i
].long_ref
&& ref
->pic_id
== h
->ref_list
[list
][i
].pic_id
)
2934 for(; i
> index
; i
--){
2935 h
->ref_list
[list
][i
]= h
->ref_list
[list
][i
-1];
2937 h
->ref_list
[list
][index
]= *ref
;
2939 pic_as_field(&h
->ref_list
[list
][index
], pic_structure
);
2943 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal reordering_of_pic_nums_idc\n");
2949 for(list
=0; list
<h
->list_count
; list
++){
2950 for(index
= 0; index
< h
->ref_count
[list
]; index
++){
2951 if(!h
->ref_list
[list
][index
].data
[0]){
2952 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "Missing reference picture\n");
2953 if(h
->default_ref_list
[list
][0].data
[0])
2954 h
->ref_list
[list
][index
]= h
->default_ref_list
[list
][0];
2964 static void fill_mbaff_ref_list(H264Context
*h
){
2966 for(list
=0; list
<2; list
++){ //FIXME try list_count
2967 for(i
=0; i
<h
->ref_count
[list
]; i
++){
2968 Picture
*frame
= &h
->ref_list
[list
][i
];
2969 Picture
*field
= &h
->ref_list
[list
][16+2*i
];
2972 field
[0].linesize
[j
] <<= 1;
2973 field
[0].reference
= PICT_TOP_FIELD
;
2974 field
[0].poc
= field
[0].field_poc
[0];
2975 field
[1] = field
[0];
2977 field
[1].data
[j
] += frame
->linesize
[j
];
2978 field
[1].reference
= PICT_BOTTOM_FIELD
;
2979 field
[1].poc
= field
[1].field_poc
[1];
2981 h
->luma_weight
[list
][16+2*i
] = h
->luma_weight
[list
][16+2*i
+1] = h
->luma_weight
[list
][i
];
2982 h
->luma_offset
[list
][16+2*i
] = h
->luma_offset
[list
][16+2*i
+1] = h
->luma_offset
[list
][i
];
2984 h
->chroma_weight
[list
][16+2*i
][j
] = h
->chroma_weight
[list
][16+2*i
+1][j
] = h
->chroma_weight
[list
][i
][j
];
2985 h
->chroma_offset
[list
][16+2*i
][j
] = h
->chroma_offset
[list
][16+2*i
+1][j
] = h
->chroma_offset
[list
][i
][j
];
2989 for(j
=0; j
<h
->ref_count
[1]; j
++){
2990 for(i
=0; i
<h
->ref_count
[0]; i
++)
2991 h
->implicit_weight
[j
][16+2*i
] = h
->implicit_weight
[j
][16+2*i
+1] = h
->implicit_weight
[j
][i
];
2992 memcpy(h
->implicit_weight
[16+2*j
], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
2993 memcpy(h
->implicit_weight
[16+2*j
+1], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
2997 static int pred_weight_table(H264Context
*h
){
2998 MpegEncContext
* const s
= &h
->s
;
3000 int luma_def
, chroma_def
;
3003 h
->use_weight_chroma
= 0;
3004 h
->luma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
3005 h
->chroma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
3006 luma_def
= 1<<h
->luma_log2_weight_denom
;
3007 chroma_def
= 1<<h
->chroma_log2_weight_denom
;
3009 for(list
=0; list
<2; list
++){
3010 h
->luma_weight_flag
[list
] = 0;
3011 h
->chroma_weight_flag
[list
] = 0;
3012 for(i
=0; i
<h
->ref_count
[list
]; i
++){
3013 int luma_weight_flag
, chroma_weight_flag
;
3015 luma_weight_flag
= get_bits1(&s
->gb
);
3016 if(luma_weight_flag
){
3017 h
->luma_weight
[list
][i
]= get_se_golomb(&s
->gb
);
3018 h
->luma_offset
[list
][i
]= get_se_golomb(&s
->gb
);
3019 if( h
->luma_weight
[list
][i
] != luma_def
3020 || h
->luma_offset
[list
][i
] != 0) {
3022 h
->luma_weight_flag
[list
]= 1;
3025 h
->luma_weight
[list
][i
]= luma_def
;
3026 h
->luma_offset
[list
][i
]= 0;
3030 chroma_weight_flag
= get_bits1(&s
->gb
);
3031 if(chroma_weight_flag
){
3034 h
->chroma_weight
[list
][i
][j
]= get_se_golomb(&s
->gb
);
3035 h
->chroma_offset
[list
][i
][j
]= get_se_golomb(&s
->gb
);
3036 if( h
->chroma_weight
[list
][i
][j
] != chroma_def
3037 || h
->chroma_offset
[list
][i
][j
] != 0) {
3038 h
->use_weight_chroma
= 1;
3039 h
->chroma_weight_flag
[list
]= 1;
3045 h
->chroma_weight
[list
][i
][j
]= chroma_def
;
3046 h
->chroma_offset
[list
][i
][j
]= 0;
3051 if(h
->slice_type_nos
!= FF_B_TYPE
) break;
3053 h
->use_weight
= h
->use_weight
|| h
->use_weight_chroma
;
3057 static void implicit_weight_table(H264Context
*h
){
3058 MpegEncContext
* const s
= &h
->s
;
3060 int cur_poc
= s
->current_picture_ptr
->poc
;
3062 for (i
= 0; i
< 2; i
++) {
3063 h
->luma_weight_flag
[i
] = 0;
3064 h
->chroma_weight_flag
[i
] = 0;
3067 if( h
->ref_count
[0] == 1 && h
->ref_count
[1] == 1
3068 && h
->ref_list
[0][0].poc
+ h
->ref_list
[1][0].poc
== 2*cur_poc
){
3070 h
->use_weight_chroma
= 0;
3075 h
->use_weight_chroma
= 2;
3076 h
->luma_log2_weight_denom
= 5;
3077 h
->chroma_log2_weight_denom
= 5;
3079 for(ref0
=0; ref0
< h
->ref_count
[0]; ref0
++){
3080 int poc0
= h
->ref_list
[0][ref0
].poc
;
3081 for(ref1
=0; ref1
< h
->ref_count
[1]; ref1
++){
3082 int poc1
= h
->ref_list
[1][ref1
].poc
;
3083 int td
= av_clip(poc1
- poc0
, -128, 127);
3085 int tb
= av_clip(cur_poc
- poc0
, -128, 127);
3086 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
3087 int dist_scale_factor
= av_clip((tb
*tx
+ 32) >> 6, -1024, 1023) >> 2;
3088 if(dist_scale_factor
< -64 || dist_scale_factor
> 128)
3089 h
->implicit_weight
[ref0
][ref1
] = 32;
3091 h
->implicit_weight
[ref0
][ref1
] = 64 - dist_scale_factor
;
3093 h
->implicit_weight
[ref0
][ref1
] = 32;
3099 * Mark a picture as no longer needed for reference. The refmask
3100 * argument allows unreferencing of individual fields or the whole frame.
3101 * If the picture becomes entirely unreferenced, but is being held for
3102 * display purposes, it is marked as such.
3103 * @param refmask mask of fields to unreference; the mask is bitwise
3104 * anded with the reference marking of pic
3105 * @return non-zero if pic becomes entirely unreferenced (except possibly
3106 * for display purposes) zero if one of the fields remains in
3109 static inline int unreference_pic(H264Context
*h
, Picture
*pic
, int refmask
){
3111 if (pic
->reference
&= refmask
) {
3114 for(i
= 0; h
->delayed_pic
[i
]; i
++)
3115 if(pic
== h
->delayed_pic
[i
]){
3116 pic
->reference
=DELAYED_PIC_REF
;
3124 * instantaneous decoder refresh.
3126 static void idr(H264Context
*h
){
3129 for(i
=0; i
<16; i
++){
3130 remove_long(h
, i
, 0);
3132 assert(h
->long_ref_count
==0);
3134 for(i
=0; i
<h
->short_ref_count
; i
++){
3135 unreference_pic(h
, h
->short_ref
[i
], 0);
3136 h
->short_ref
[i
]= NULL
;
3138 h
->short_ref_count
=0;
3139 h
->prev_frame_num
= 0;
3140 h
->prev_frame_num_offset
= 0;
3145 /* forget old pics after a seek */
3146 static void flush_dpb(AVCodecContext
*avctx
){
3147 H264Context
*h
= avctx
->priv_data
;
3149 for(i
=0; i
<MAX_DELAYED_PIC_COUNT
; i
++) {
3150 if(h
->delayed_pic
[i
])
3151 h
->delayed_pic
[i
]->reference
= 0;
3152 h
->delayed_pic
[i
]= NULL
;
3154 h
->outputed_poc
= INT_MIN
;
3156 if(h
->s
.current_picture_ptr
)
3157 h
->s
.current_picture_ptr
->reference
= 0;
3158 h
->s
.first_field
= 0;
3160 ff_mpeg_flush(avctx
);
3164 * Find a Picture in the short term reference list by frame number.
3165 * @param frame_num frame number to search for
3166 * @param idx the index into h->short_ref where returned picture is found
3167 * undefined if no picture found.
3168 * @return pointer to the found picture, or NULL if no pic with the provided
3169 * frame number is found
3171 static Picture
* find_short(H264Context
*h
, int frame_num
, int *idx
){
3172 MpegEncContext
* const s
= &h
->s
;
3175 for(i
=0; i
<h
->short_ref_count
; i
++){
3176 Picture
*pic
= h
->short_ref
[i
];
3177 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3178 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d %d %p\n", i
, pic
->frame_num
, pic
);
3179 if(pic
->frame_num
== frame_num
) {
3188 * Remove a picture from the short term reference list by its index in
3189 * that list. This does no checking on the provided index; it is assumed
3190 * to be valid. Other list entries are shifted down.
3191 * @param i index into h->short_ref of picture to remove.
3193 static void remove_short_at_index(H264Context
*h
, int i
){
3194 assert(i
>= 0 && i
< h
->short_ref_count
);
3195 h
->short_ref
[i
]= NULL
;
3196 if (--h
->short_ref_count
)
3197 memmove(&h
->short_ref
[i
], &h
->short_ref
[i
+1], (h
->short_ref_count
- i
)*sizeof(Picture
*));
3202 * @return the removed picture or NULL if an error occurs
3204 static Picture
* remove_short(H264Context
*h
, int frame_num
, int ref_mask
){
3205 MpegEncContext
* const s
= &h
->s
;
3209 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3210 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "remove short %d count %d\n", frame_num
, h
->short_ref_count
);
3212 pic
= find_short(h
, frame_num
, &i
);
3214 if(unreference_pic(h
, pic
, ref_mask
))
3215 remove_short_at_index(h
, i
);
3222 * Remove a picture from the long term reference list by its index in
3224 * @return the removed picture or NULL if an error occurs
3226 static Picture
* remove_long(H264Context
*h
, int i
, int ref_mask
){
3229 pic
= h
->long_ref
[i
];
3231 if(unreference_pic(h
, pic
, ref_mask
)){
3232 assert(h
->long_ref
[i
]->long_ref
== 1);
3233 h
->long_ref
[i
]->long_ref
= 0;
3234 h
->long_ref
[i
]= NULL
;
3235 h
->long_ref_count
--;
3243 * print short term list
3245 static void print_short_term(H264Context
*h
) {
3247 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3248 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "short term list:\n");
3249 for(i
=0; i
<h
->short_ref_count
; i
++){
3250 Picture
*pic
= h
->short_ref
[i
];
3251 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3257 * print long term list
3259 static void print_long_term(H264Context
*h
) {
3261 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3262 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "long term list:\n");
3263 for(i
= 0; i
< 16; i
++){
3264 Picture
*pic
= h
->long_ref
[i
];
3266 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3273 * Executes the reference picture marking (memory management control operations).
3275 static int execute_ref_pic_marking(H264Context
*h
, MMCO
*mmco
, int mmco_count
){
3276 MpegEncContext
* const s
= &h
->s
;
3277 int i
, av_uninit(j
);
3278 int current_ref_assigned
=0;
3279 Picture
*av_uninit(pic
);
3281 if((s
->avctx
->debug
&FF_DEBUG_MMCO
) && mmco_count
==0)
3282 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "no mmco here\n");
3284 for(i
=0; i
<mmco_count
; i
++){
3285 int av_uninit(structure
), av_uninit(frame_num
);
3286 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3287 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco:%d %d %d\n", h
->mmco
[i
].opcode
, h
->mmco
[i
].short_pic_num
, h
->mmco
[i
].long_arg
);
3289 if( mmco
[i
].opcode
== MMCO_SHORT2UNUSED
3290 || mmco
[i
].opcode
== MMCO_SHORT2LONG
){
3291 frame_num
= pic_num_extract(h
, mmco
[i
].short_pic_num
, &structure
);
3292 pic
= find_short(h
, frame_num
, &j
);
3294 if(mmco
[i
].opcode
!= MMCO_SHORT2LONG
|| !h
->long_ref
[mmco
[i
].long_arg
]
3295 || h
->long_ref
[mmco
[i
].long_arg
]->frame_num
!= frame_num
)
3296 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mmco: unref short failure\n");
3301 switch(mmco
[i
].opcode
){
3302 case MMCO_SHORT2UNUSED
:
3303 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3304 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref short %d count %d\n", h
->mmco
[i
].short_pic_num
, h
->short_ref_count
);
3305 remove_short(h
, frame_num
, structure
^ PICT_FRAME
);
3307 case MMCO_SHORT2LONG
:
3308 if (h
->long_ref
[mmco
[i
].long_arg
] != pic
)
3309 remove_long(h
, mmco
[i
].long_arg
, 0);
3311 remove_short_at_index(h
, j
);
3312 h
->long_ref
[ mmco
[i
].long_arg
]= pic
;
3313 if (h
->long_ref
[ mmco
[i
].long_arg
]){
3314 h
->long_ref
[ mmco
[i
].long_arg
]->long_ref
=1;
3315 h
->long_ref_count
++;
3318 case MMCO_LONG2UNUSED
:
3319 j
= pic_num_extract(h
, mmco
[i
].long_arg
, &structure
);
3320 pic
= h
->long_ref
[j
];
3322 remove_long(h
, j
, structure
^ PICT_FRAME
);
3323 } else if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3324 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref long failure\n");
3327 // Comment below left from previous code as it is an interresting note.
3328 /* First field in pair is in short term list or
3329 * at a different long term index.
3330 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3331 * Report the problem and keep the pair where it is,
3332 * and mark this field valid.
3335 if (h
->long_ref
[mmco
[i
].long_arg
] != s
->current_picture_ptr
) {
3336 remove_long(h
, mmco
[i
].long_arg
, 0);
3338 h
->long_ref
[ mmco
[i
].long_arg
]= s
->current_picture_ptr
;
3339 h
->long_ref
[ mmco
[i
].long_arg
]->long_ref
=1;
3340 h
->long_ref_count
++;
3343 s
->current_picture_ptr
->reference
|= s
->picture_structure
;
3344 current_ref_assigned
=1;
3346 case MMCO_SET_MAX_LONG
:
3347 assert(mmco
[i
].long_arg
<= 16);
3348 // just remove the long term which index is greater than new max
3349 for(j
= mmco
[i
].long_arg
; j
<16; j
++){
3350 remove_long(h
, j
, 0);
3354 while(h
->short_ref_count
){
3355 remove_short(h
, h
->short_ref
[0]->frame_num
, 0);
3357 for(j
= 0; j
< 16; j
++) {
3358 remove_long(h
, j
, 0);
3360 s
->current_picture_ptr
->poc
=
3361 s
->current_picture_ptr
->field_poc
[0]=
3362 s
->current_picture_ptr
->field_poc
[1]=
3366 s
->current_picture_ptr
->frame_num
= 0;
3372 if (!current_ref_assigned
) {
3373 /* Second field of complementary field pair; the first field of
3374 * which is already referenced. If short referenced, it
3375 * should be first entry in short_ref. If not, it must exist
3376 * in long_ref; trying to put it on the short list here is an
3377 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3379 if (h
->short_ref_count
&& h
->short_ref
[0] == s
->current_picture_ptr
) {
3380 /* Just mark the second field valid */
3381 s
->current_picture_ptr
->reference
= PICT_FRAME
;
3382 } else if (s
->current_picture_ptr
->long_ref
) {
3383 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term reference "
3384 "assignment for second field "
3385 "in complementary field pair "
3386 "(first field is long term)\n");
3388 pic
= remove_short(h
, s
->current_picture_ptr
->frame_num
, 0);
3390 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term buffer state detected\n");
3393 if(h
->short_ref_count
)
3394 memmove(&h
->short_ref
[1], &h
->short_ref
[0], h
->short_ref_count
*sizeof(Picture
*));
3396 h
->short_ref
[0]= s
->current_picture_ptr
;
3397 h
->short_ref_count
++;
3398 s
->current_picture_ptr
->reference
|= s
->picture_structure
;
3402 if (h
->long_ref_count
+ h
->short_ref_count
> h
->sps
.ref_frame_count
){
3404 /* We have too many reference frames, probably due to corrupted
3405 * stream. Need to discard one frame. Prevents overrun of the
3406 * short_ref and long_ref buffers.
3408 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3409 "number of reference frames exceeds max (probably "
3410 "corrupt input), discarding one\n");
3412 if (h
->long_ref_count
&& !h
->short_ref_count
) {
3413 for (i
= 0; i
< 16; ++i
)
3418 remove_long(h
, i
, 0);
3420 pic
= h
->short_ref
[h
->short_ref_count
- 1];
3421 remove_short(h
, pic
->frame_num
, 0);
3425 print_short_term(h
);
3430 static int decode_ref_pic_marking(H264Context
*h
, GetBitContext
*gb
){
3431 MpegEncContext
* const s
= &h
->s
;
3435 if(h
->nal_unit_type
== NAL_IDR_SLICE
){ //FIXME fields
3436 s
->broken_link
= get_bits1(gb
) -1;
3438 h
->mmco
[0].opcode
= MMCO_LONG
;
3439 h
->mmco
[0].long_arg
= 0;
3443 if(get_bits1(gb
)){ // adaptive_ref_pic_marking_mode_flag
3444 for(i
= 0; i
<MAX_MMCO_COUNT
; i
++) {
3445 MMCOOpcode opcode
= get_ue_golomb_31(gb
);
3447 h
->mmco
[i
].opcode
= opcode
;
3448 if(opcode
==MMCO_SHORT2UNUSED
|| opcode
==MMCO_SHORT2LONG
){
3449 h
->mmco
[i
].short_pic_num
= (h
->curr_pic_num
- get_ue_golomb(gb
) - 1) & (h
->max_pic_num
- 1);
3450 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3451 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3455 if(opcode
==MMCO_SHORT2LONG
|| opcode
==MMCO_LONG2UNUSED
|| opcode
==MMCO_LONG
|| opcode
==MMCO_SET_MAX_LONG
){
3456 unsigned int long_arg
= get_ue_golomb_31(gb
);
3457 if(long_arg
>= 32 || (long_arg
>= 16 && !(opcode
== MMCO_LONG2UNUSED
&& FIELD_PICTURE
))){
3458 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal long ref in memory management control operation %d\n", opcode
);
3461 h
->mmco
[i
].long_arg
= long_arg
;
3464 if(opcode
> (unsigned)MMCO_LONG
){
3465 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal memory management control operation %d\n", opcode
);
3468 if(opcode
== MMCO_END
)
3473 assert(h
->long_ref_count
+ h
->short_ref_count
<= h
->sps
.ref_frame_count
);
3475 if(h
->short_ref_count
&& h
->long_ref_count
+ h
->short_ref_count
== h
->sps
.ref_frame_count
&&
3476 !(FIELD_PICTURE
&& !s
->first_field
&& s
->current_picture_ptr
->reference
)) {
3477 h
->mmco
[0].opcode
= MMCO_SHORT2UNUSED
;
3478 h
->mmco
[0].short_pic_num
= h
->short_ref
[ h
->short_ref_count
- 1 ]->frame_num
;
3480 if (FIELD_PICTURE
) {
3481 h
->mmco
[0].short_pic_num
*= 2;
3482 h
->mmco
[1].opcode
= MMCO_SHORT2UNUSED
;
3483 h
->mmco
[1].short_pic_num
= h
->mmco
[0].short_pic_num
+ 1;
3493 static int init_poc(H264Context
*h
){
3494 MpegEncContext
* const s
= &h
->s
;
3495 const int max_frame_num
= 1<<h
->sps
.log2_max_frame_num
;
3497 Picture
*cur
= s
->current_picture_ptr
;
3499 h
->frame_num_offset
= h
->prev_frame_num_offset
;
3500 if(h
->frame_num
< h
->prev_frame_num
)
3501 h
->frame_num_offset
+= max_frame_num
;
3503 if(h
->sps
.poc_type
==0){
3504 const int max_poc_lsb
= 1<<h
->sps
.log2_max_poc_lsb
;
3506 if (h
->poc_lsb
< h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
>= max_poc_lsb
/2)
3507 h
->poc_msb
= h
->prev_poc_msb
+ max_poc_lsb
;
3508 else if(h
->poc_lsb
> h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
< -max_poc_lsb
/2)
3509 h
->poc_msb
= h
->prev_poc_msb
- max_poc_lsb
;
3511 h
->poc_msb
= h
->prev_poc_msb
;
3512 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3514 field_poc
[1] = h
->poc_msb
+ h
->poc_lsb
;
3515 if(s
->picture_structure
== PICT_FRAME
)
3516 field_poc
[1] += h
->delta_poc_bottom
;
3517 }else if(h
->sps
.poc_type
==1){
3518 int abs_frame_num
, expected_delta_per_poc_cycle
, expectedpoc
;
3521 if(h
->sps
.poc_cycle_length
!= 0)
3522 abs_frame_num
= h
->frame_num_offset
+ h
->frame_num
;
3526 if(h
->nal_ref_idc
==0 && abs_frame_num
> 0)
3529 expected_delta_per_poc_cycle
= 0;
3530 for(i
=0; i
< h
->sps
.poc_cycle_length
; i
++)
3531 expected_delta_per_poc_cycle
+= h
->sps
.offset_for_ref_frame
[ i
]; //FIXME integrate during sps parse
3533 if(abs_frame_num
> 0){
3534 int poc_cycle_cnt
= (abs_frame_num
- 1) / h
->sps
.poc_cycle_length
;
3535 int frame_num_in_poc_cycle
= (abs_frame_num
- 1) % h
->sps
.poc_cycle_length
;
3537 expectedpoc
= poc_cycle_cnt
* expected_delta_per_poc_cycle
;
3538 for(i
= 0; i
<= frame_num_in_poc_cycle
; i
++)
3539 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_ref_frame
[ i
];
3543 if(h
->nal_ref_idc
== 0)
3544 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_non_ref_pic
;
3546 field_poc
[0] = expectedpoc
+ h
->delta_poc
[0];
3547 field_poc
[1] = field_poc
[0] + h
->sps
.offset_for_top_to_bottom_field
;
3549 if(s
->picture_structure
== PICT_FRAME
)
3550 field_poc
[1] += h
->delta_poc
[1];
3552 int poc
= 2*(h
->frame_num_offset
+ h
->frame_num
);
3561 if(s
->picture_structure
!= PICT_BOTTOM_FIELD
)
3562 s
->current_picture_ptr
->field_poc
[0]= field_poc
[0];
3563 if(s
->picture_structure
!= PICT_TOP_FIELD
)
3564 s
->current_picture_ptr
->field_poc
[1]= field_poc
[1];
3565 cur
->poc
= FFMIN(cur
->field_poc
[0], cur
->field_poc
[1]);
3572 * initialize scan tables
3574 static void init_scan_tables(H264Context
*h
){
3575 MpegEncContext
* const s
= &h
->s
;
3577 if(s
->dsp
.h264_idct_add
== ff_h264_idct_add_c
){ //FIXME little ugly
3578 memcpy(h
->zigzag_scan
, zigzag_scan
, 16*sizeof(uint8_t));
3579 memcpy(h
-> field_scan
, field_scan
, 16*sizeof(uint8_t));
3581 for(i
=0; i
<16; i
++){
3582 #define T(x) (x>>2) | ((x<<2) & 0xF)
3583 h
->zigzag_scan
[i
] = T(zigzag_scan
[i
]);
3584 h
-> field_scan
[i
] = T( field_scan
[i
]);
3588 if(s
->dsp
.h264_idct8_add
== ff_h264_idct8_add_c
){
3589 memcpy(h
->zigzag_scan8x8
, ff_zigzag_direct
, 64*sizeof(uint8_t));
3590 memcpy(h
->zigzag_scan8x8_cavlc
, zigzag_scan8x8_cavlc
, 64*sizeof(uint8_t));
3591 memcpy(h
->field_scan8x8
, field_scan8x8
, 64*sizeof(uint8_t));
3592 memcpy(h
->field_scan8x8_cavlc
, field_scan8x8_cavlc
, 64*sizeof(uint8_t));
3594 for(i
=0; i
<64; i
++){
3595 #define T(x) (x>>3) | ((x&7)<<3)
3596 h
->zigzag_scan8x8
[i
] = T(ff_zigzag_direct
[i
]);
3597 h
->zigzag_scan8x8_cavlc
[i
] = T(zigzag_scan8x8_cavlc
[i
]);
3598 h
->field_scan8x8
[i
] = T(field_scan8x8
[i
]);
3599 h
->field_scan8x8_cavlc
[i
] = T(field_scan8x8_cavlc
[i
]);
3603 if(h
->sps
.transform_bypass
){ //FIXME same ugly
3604 h
->zigzag_scan_q0
= zigzag_scan
;
3605 h
->zigzag_scan8x8_q0
= ff_zigzag_direct
;
3606 h
->zigzag_scan8x8_cavlc_q0
= zigzag_scan8x8_cavlc
;
3607 h
->field_scan_q0
= field_scan
;
3608 h
->field_scan8x8_q0
= field_scan8x8
;
3609 h
->field_scan8x8_cavlc_q0
= field_scan8x8_cavlc
;
3611 h
->zigzag_scan_q0
= h
->zigzag_scan
;
3612 h
->zigzag_scan8x8_q0
= h
->zigzag_scan8x8
;
3613 h
->zigzag_scan8x8_cavlc_q0
= h
->zigzag_scan8x8_cavlc
;
3614 h
->field_scan_q0
= h
->field_scan
;
3615 h
->field_scan8x8_q0
= h
->field_scan8x8
;
3616 h
->field_scan8x8_cavlc_q0
= h
->field_scan8x8_cavlc
;
3620 static void field_end(H264Context
*h
){
3621 MpegEncContext
* const s
= &h
->s
;
3622 AVCodecContext
* const avctx
= s
->avctx
;
3625 s
->current_picture_ptr
->qscale_type
= FF_QSCALE_TYPE_H264
;
3626 s
->current_picture_ptr
->pict_type
= s
->pict_type
;
3628 if (CONFIG_H264_VDPAU_DECODER
&& s
->avctx
->codec
->capabilities
&CODEC_CAP_HWACCEL_VDPAU
)
3629 ff_vdpau_h264_set_reference_frames(s
);
3632 execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
3633 h
->prev_poc_msb
= h
->poc_msb
;
3634 h
->prev_poc_lsb
= h
->poc_lsb
;
3636 h
->prev_frame_num_offset
= h
->frame_num_offset
;
3637 h
->prev_frame_num
= h
->frame_num
;
3639 if (avctx
->hwaccel
) {
3640 if (avctx
->hwaccel
->end_frame(avctx
) < 0)
3641 av_log(avctx
, AV_LOG_ERROR
, "hardware accelerator failed to decode picture\n");
3644 if (CONFIG_H264_VDPAU_DECODER
&& s
->avctx
->codec
->capabilities
&CODEC_CAP_HWACCEL_VDPAU
)
3645 ff_vdpau_h264_picture_complete(s
);
3648 * FIXME: Error handling code does not seem to support interlaced
3649 * when slices span multiple rows
3650 * The ff_er_add_slice calls don't work right for bottom
3651 * fields; they cause massive erroneous error concealing
3652 * Error marking covers both fields (top and bottom).
3653 * This causes a mismatched s->error_count
3654 * and a bad error table. Further, the error count goes to
3655 * INT_MAX when called for bottom field, because mb_y is
3656 * past end by one (callers fault) and resync_mb_y != 0
3657 * causes problems for the first MB line, too.
3668 * Replicates H264 "master" context to thread contexts.
3670 static void clone_slice(H264Context
*dst
, H264Context
*src
)
3672 memcpy(dst
->block_offset
, src
->block_offset
, sizeof(dst
->block_offset
));
3673 dst
->s
.current_picture_ptr
= src
->s
.current_picture_ptr
;
3674 dst
->s
.current_picture
= src
->s
.current_picture
;
3675 dst
->s
.linesize
= src
->s
.linesize
;
3676 dst
->s
.uvlinesize
= src
->s
.uvlinesize
;
3677 dst
->s
.first_field
= src
->s
.first_field
;
3679 dst
->prev_poc_msb
= src
->prev_poc_msb
;
3680 dst
->prev_poc_lsb
= src
->prev_poc_lsb
;
3681 dst
->prev_frame_num_offset
= src
->prev_frame_num_offset
;
3682 dst
->prev_frame_num
= src
->prev_frame_num
;
3683 dst
->short_ref_count
= src
->short_ref_count
;
3685 memcpy(dst
->short_ref
, src
->short_ref
, sizeof(dst
->short_ref
));
3686 memcpy(dst
->long_ref
, src
->long_ref
, sizeof(dst
->long_ref
));
3687 memcpy(dst
->default_ref_list
, src
->default_ref_list
, sizeof(dst
->default_ref_list
));
3688 memcpy(dst
->ref_list
, src
->ref_list
, sizeof(dst
->ref_list
));
3690 memcpy(dst
->dequant4_coeff
, src
->dequant4_coeff
, sizeof(src
->dequant4_coeff
));
3691 memcpy(dst
->dequant8_coeff
, src
->dequant8_coeff
, sizeof(src
->dequant8_coeff
));
3695 * decodes a slice header.
3696 * This will also call MPV_common_init() and frame_start() as needed.
3698 * @param h h264context
3699 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3701 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3703 static int decode_slice_header(H264Context
*h
, H264Context
*h0
){
3704 MpegEncContext
* const s
= &h
->s
;
3705 MpegEncContext
* const s0
= &h0
->s
;
3706 unsigned int first_mb_in_slice
;
3707 unsigned int pps_id
;
3708 int num_ref_idx_active_override_flag
;
3709 unsigned int slice_type
, tmp
, i
, j
;
3710 int default_ref_list_done
= 0;
3711 int last_pic_structure
;
3713 s
->dropable
= h
->nal_ref_idc
== 0;
3715 if((s
->avctx
->flags2
& CODEC_FLAG2_FAST
) && !h
->nal_ref_idc
){
3716 s
->me
.qpel_put
= s
->dsp
.put_2tap_qpel_pixels_tab
;
3717 s
->me
.qpel_avg
= s
->dsp
.avg_2tap_qpel_pixels_tab
;
3719 s
->me
.qpel_put
= s
->dsp
.put_h264_qpel_pixels_tab
;
3720 s
->me
.qpel_avg
= s
->dsp
.avg_h264_qpel_pixels_tab
;
3723 first_mb_in_slice
= get_ue_golomb(&s
->gb
);
3725 if(first_mb_in_slice
== 0){ //FIXME better field boundary detection
3726 if(h0
->current_slice
&& FIELD_PICTURE
){
3730 h0
->current_slice
= 0;
3731 if (!s0
->first_field
)
3732 s
->current_picture_ptr
= NULL
;
3735 slice_type
= get_ue_golomb_31(&s
->gb
);
3737 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "slice type too large (%d) at %d %d\n", h
->slice_type
, s
->mb_x
, s
->mb_y
);
3742 h
->slice_type_fixed
=1;
3744 h
->slice_type_fixed
=0;
3746 slice_type
= golomb_to_pict_type
[ slice_type
];
3747 if (slice_type
== FF_I_TYPE
3748 || (h0
->current_slice
!= 0 && slice_type
== h0
->last_slice_type
) ) {
3749 default_ref_list_done
= 1;
3751 h
->slice_type
= slice_type
;
3752 h
->slice_type_nos
= slice_type
& 3;
3754 s
->pict_type
= h
->slice_type
; // to make a few old functions happy, it's wrong though
3755 if (s
->pict_type
== FF_B_TYPE
&& s0
->last_picture_ptr
== NULL
) {
3756 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3757 "B picture before any references, skipping\n");
3761 pps_id
= get_ue_golomb(&s
->gb
);
3762 if(pps_id
>=MAX_PPS_COUNT
){
3763 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "pps_id out of range\n");
3766 if(!h0
->pps_buffers
[pps_id
]) {
3767 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non-existing PPS %u referenced\n", pps_id
);
3770 h
->pps
= *h0
->pps_buffers
[pps_id
];
3772 if(!h0
->sps_buffers
[h
->pps
.sps_id
]) {
3773 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non-existing SPS %u referenced\n", h
->pps
.sps_id
);
3776 h
->sps
= *h0
->sps_buffers
[h
->pps
.sps_id
];
3778 if(h
== h0
&& h
->dequant_coeff_pps
!= pps_id
){
3779 h
->dequant_coeff_pps
= pps_id
;
3780 init_dequant_tables(h
);
3783 s
->mb_width
= h
->sps
.mb_width
;
3784 s
->mb_height
= h
->sps
.mb_height
* (2 - h
->sps
.frame_mbs_only_flag
);
3786 h
->b_stride
= s
->mb_width
*4;
3787 h
->b8_stride
= s
->mb_width
*2;
3789 s
->width
= 16*s
->mb_width
- 2*FFMIN(h
->sps
.crop_right
, 7);
3790 if(h
->sps
.frame_mbs_only_flag
)
3791 s
->height
= 16*s
->mb_height
- 2*FFMIN(h
->sps
.crop_bottom
, 7);
3793 s
->height
= 16*s
->mb_height
- 4*FFMIN(h
->sps
.crop_bottom
, 3);
3795 if (s
->context_initialized
3796 && ( s
->width
!= s
->avctx
->width
|| s
->height
!= s
->avctx
->height
)) {
3798 return -1; // width / height changed during parallelized decoding
3800 flush_dpb(s
->avctx
);
3803 if (!s
->context_initialized
) {
3805 return -1; // we cant (re-)initialize context during parallel decoding
3806 if (MPV_common_init(s
) < 0)
3810 init_scan_tables(h
);
3813 for(i
= 1; i
< s
->avctx
->thread_count
; i
++) {
3815 c
= h
->thread_context
[i
] = av_malloc(sizeof(H264Context
));
3816 memcpy(c
, h
->s
.thread_context
[i
], sizeof(MpegEncContext
));
3817 memset(&c
->s
+ 1, 0, sizeof(H264Context
) - sizeof(MpegEncContext
));
3820 init_scan_tables(c
);
3824 for(i
= 0; i
< s
->avctx
->thread_count
; i
++)
3825 if(context_init(h
->thread_context
[i
]) < 0)
3828 s
->avctx
->width
= s
->width
;
3829 s
->avctx
->height
= s
->height
;
3830 s
->avctx
->sample_aspect_ratio
= h
->sps
.sar
;
3831 if(!s
->avctx
->sample_aspect_ratio
.den
)
3832 s
->avctx
->sample_aspect_ratio
.den
= 1;
3834 if(h
->sps
.timing_info_present_flag
){
3835 s
->avctx
->time_base
= (AVRational
){h
->sps
.num_units_in_tick
, h
->sps
.time_scale
};
3836 if(h
->x264_build
> 0 && h
->x264_build
< 44)
3837 s
->avctx
->time_base
.den
*= 2;
3838 av_reduce(&s
->avctx
->time_base
.num
, &s
->avctx
->time_base
.den
,
3839 s
->avctx
->time_base
.num
, s
->avctx
->time_base
.den
, 1<<30);
3843 h
->frame_num
= get_bits(&s
->gb
, h
->sps
.log2_max_frame_num
);
3846 h
->mb_aff_frame
= 0;
3847 last_pic_structure
= s0
->picture_structure
;
3848 if(h
->sps
.frame_mbs_only_flag
){
3849 s
->picture_structure
= PICT_FRAME
;
3851 if(get_bits1(&s
->gb
)) { //field_pic_flag
3852 s
->picture_structure
= PICT_TOP_FIELD
+ get_bits1(&s
->gb
); //bottom_field_flag
3854 s
->picture_structure
= PICT_FRAME
;
3855 h
->mb_aff_frame
= h
->sps
.mb_aff
;
3858 h
->mb_field_decoding_flag
= s
->picture_structure
!= PICT_FRAME
;
3860 if(h0
->current_slice
== 0){
3861 while(h
->frame_num
!= h
->prev_frame_num
&&
3862 h
->frame_num
!= (h
->prev_frame_num
+1)%(1<<h
->sps
.log2_max_frame_num
)){
3863 av_log(NULL
, AV_LOG_DEBUG
, "Frame num gap %d %d\n", h
->frame_num
, h
->prev_frame_num
);
3864 if (frame_start(h
) < 0)
3866 h
->prev_frame_num
++;
3867 h
->prev_frame_num
%= 1<<h
->sps
.log2_max_frame_num
;
3868 s
->current_picture_ptr
->frame_num
= h
->prev_frame_num
;
3869 execute_ref_pic_marking(h
, NULL
, 0);
3872 /* See if we have a decoded first field looking for a pair... */
3873 if (s0
->first_field
) {
3874 assert(s0
->current_picture_ptr
);
3875 assert(s0
->current_picture_ptr
->data
[0]);
3876 assert(s0
->current_picture_ptr
->reference
!= DELAYED_PIC_REF
);
3878 /* figure out if we have a complementary field pair */
3879 if (!FIELD_PICTURE
|| s
->picture_structure
== last_pic_structure
) {
3881 * Previous field is unmatched. Don't display it, but let it
3882 * remain for reference if marked as such.
3884 s0
->current_picture_ptr
= NULL
;
3885 s0
->first_field
= FIELD_PICTURE
;
3888 if (h
->nal_ref_idc
&&
3889 s0
->current_picture_ptr
->reference
&&
3890 s0
->current_picture_ptr
->frame_num
!= h
->frame_num
) {
3892 * This and previous field were reference, but had
3893 * different frame_nums. Consider this field first in
3894 * pair. Throw away previous field except for reference
3897 s0
->first_field
= 1;
3898 s0
->current_picture_ptr
= NULL
;
3901 /* Second field in complementary pair */
3902 s0
->first_field
= 0;
3907 /* Frame or first field in a potentially complementary pair */
3908 assert(!s0
->current_picture_ptr
);
3909 s0
->first_field
= FIELD_PICTURE
;
3912 if((!FIELD_PICTURE
|| s0
->first_field
) && frame_start(h
) < 0) {
3913 s0
->first_field
= 0;
3920 s
->current_picture_ptr
->frame_num
= h
->frame_num
; //FIXME frame_num cleanup
3922 assert(s
->mb_num
== s
->mb_width
* s
->mb_height
);
3923 if(first_mb_in_slice
<< FIELD_OR_MBAFF_PICTURE
>= s
->mb_num
||
3924 first_mb_in_slice
>= s
->mb_num
){
3925 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "first_mb_in_slice overflow\n");
3928 s
->resync_mb_x
= s
->mb_x
= first_mb_in_slice
% s
->mb_width
;
3929 s
->resync_mb_y
= s
->mb_y
= (first_mb_in_slice
/ s
->mb_width
) << FIELD_OR_MBAFF_PICTURE
;
3930 if (s
->picture_structure
== PICT_BOTTOM_FIELD
)
3931 s
->resync_mb_y
= s
->mb_y
= s
->mb_y
+ 1;
3932 assert(s
->mb_y
< s
->mb_height
);
3934 if(s
->picture_structure
==PICT_FRAME
){
3935 h
->curr_pic_num
= h
->frame_num
;
3936 h
->max_pic_num
= 1<< h
->sps
.log2_max_frame_num
;
3938 h
->curr_pic_num
= 2*h
->frame_num
+ 1;
3939 h
->max_pic_num
= 1<<(h
->sps
.log2_max_frame_num
+ 1);
3942 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
3943 get_ue_golomb(&s
->gb
); /* idr_pic_id */
3946 if(h
->sps
.poc_type
==0){
3947 h
->poc_lsb
= get_bits(&s
->gb
, h
->sps
.log2_max_poc_lsb
);
3949 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
){
3950 h
->delta_poc_bottom
= get_se_golomb(&s
->gb
);
3954 if(h
->sps
.poc_type
==1 && !h
->sps
.delta_pic_order_always_zero_flag
){
3955 h
->delta_poc
[0]= get_se_golomb(&s
->gb
);
3957 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
)
3958 h
->delta_poc
[1]= get_se_golomb(&s
->gb
);
3963 if(h
->pps
.redundant_pic_cnt_present
){
3964 h
->redundant_pic_count
= get_ue_golomb(&s
->gb
);
3967 //set defaults, might be overridden a few lines later
3968 h
->ref_count
[0]= h
->pps
.ref_count
[0];
3969 h
->ref_count
[1]= h
->pps
.ref_count
[1];
3971 if(h
->slice_type_nos
!= FF_I_TYPE
){
3972 if(h
->slice_type_nos
== FF_B_TYPE
){
3973 h
->direct_spatial_mv_pred
= get_bits1(&s
->gb
);
3975 num_ref_idx_active_override_flag
= get_bits1(&s
->gb
);
3977 if(num_ref_idx_active_override_flag
){
3978 h
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
3979 if(h
->slice_type_nos
==FF_B_TYPE
)
3980 h
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
3982 if(h
->ref_count
[0]-1 > 32-1 || h
->ref_count
[1]-1 > 32-1){
3983 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow\n");
3984 h
->ref_count
[0]= h
->ref_count
[1]= 1;
3988 if(h
->slice_type_nos
== FF_B_TYPE
)
3995 if(!default_ref_list_done
){
3996 fill_default_ref_list(h
);
3999 if(h
->slice_type_nos
!=FF_I_TYPE
&& decode_ref_pic_list_reordering(h
) < 0)
4002 if(h
->slice_type_nos
!=FF_I_TYPE
){
4003 s
->last_picture_ptr
= &h
->ref_list
[0][0];
4004 ff_copy_picture(&s
->last_picture
, s
->last_picture_ptr
);
4006 if(h
->slice_type_nos
==FF_B_TYPE
){
4007 s
->next_picture_ptr
= &h
->ref_list
[1][0];
4008 ff_copy_picture(&s
->next_picture
, s
->next_picture_ptr
);
4011 if( (h
->pps
.weighted_pred
&& h
->slice_type_nos
== FF_P_TYPE
)
4012 || (h
->pps
.weighted_bipred_idc
==1 && h
->slice_type_nos
== FF_B_TYPE
) )
4013 pred_weight_table(h
);
4014 else if(h
->pps
.weighted_bipred_idc
==2 && h
->slice_type_nos
== FF_B_TYPE
)
4015 implicit_weight_table(h
);
4018 for (i
= 0; i
< 2; i
++) {
4019 h
->luma_weight_flag
[i
] = 0;
4020 h
->chroma_weight_flag
[i
] = 0;
4025 decode_ref_pic_marking(h0
, &s
->gb
);
4028 fill_mbaff_ref_list(h
);
4030 if(h
->slice_type_nos
==FF_B_TYPE
&& !h
->direct_spatial_mv_pred
)
4031 direct_dist_scale_factor(h
);
4032 direct_ref_list_init(h
);
4034 if( h
->slice_type_nos
!= FF_I_TYPE
&& h
->pps
.cabac
){
4035 tmp
= get_ue_golomb_31(&s
->gb
);
4037 av_log(s
->avctx
, AV_LOG_ERROR
, "cabac_init_idc overflow\n");
4040 h
->cabac_init_idc
= tmp
;
4043 h
->last_qscale_diff
= 0;
4044 tmp
= h
->pps
.init_qp
+ get_se_golomb(&s
->gb
);
4046 av_log(s
->avctx
, AV_LOG_ERROR
, "QP %u out of range\n", tmp
);
4050 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
4051 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
4052 //FIXME qscale / qp ... stuff
4053 if(h
->slice_type
== FF_SP_TYPE
){
4054 get_bits1(&s
->gb
); /* sp_for_switch_flag */
4056 if(h
->slice_type
==FF_SP_TYPE
|| h
->slice_type
== FF_SI_TYPE
){
4057 get_se_golomb(&s
->gb
); /* slice_qs_delta */
4060 h
->deblocking_filter
= 1;
4061 h
->slice_alpha_c0_offset
= 0;
4062 h
->slice_beta_offset
= 0;
4063 if( h
->pps
.deblocking_filter_parameters_present
) {
4064 tmp
= get_ue_golomb_31(&s
->gb
);
4066 av_log(s
->avctx
, AV_LOG_ERROR
, "deblocking_filter_idc %u out of range\n", tmp
);
4069 h
->deblocking_filter
= tmp
;
4070 if(h
->deblocking_filter
< 2)
4071 h
->deblocking_filter
^= 1; // 1<->0
4073 if( h
->deblocking_filter
) {
4074 h
->slice_alpha_c0_offset
= get_se_golomb(&s
->gb
) << 1;
4075 h
->slice_beta_offset
= get_se_golomb(&s
->gb
) << 1;
4079 if( s
->avctx
->skip_loop_filter
>= AVDISCARD_ALL
4080 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONKEY
&& h
->slice_type_nos
!= FF_I_TYPE
)
4081 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_BIDIR
&& h
->slice_type_nos
== FF_B_TYPE
)
4082 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
4083 h
->deblocking_filter
= 0;
4085 if(h
->deblocking_filter
== 1 && h0
->max_contexts
> 1) {
4086 if(s
->avctx
->flags2
& CODEC_FLAG2_FAST
) {
4087 /* Cheat slightly for speed:
4088 Do not bother to deblock across slices. */
4089 h
->deblocking_filter
= 2;
4091 h0
->max_contexts
= 1;
4092 if(!h0
->single_decode_warning
) {
4093 av_log(s
->avctx
, AV_LOG_INFO
, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4094 h0
->single_decode_warning
= 1;
4097 return 1; // deblocking switched inside frame
4102 if( h
->pps
.num_slice_groups
> 1 && h
->pps
.mb_slice_group_map_type
>= 3 && h
->pps
.mb_slice_group_map_type
<= 5)
4103 slice_group_change_cycle
= get_bits(&s
->gb
, ?);
4106 h0
->last_slice_type
= slice_type
;
4107 h
->slice_num
= ++h0
->current_slice
;
4108 if(h
->slice_num
>= MAX_SLICES
){
4109 av_log(s
->avctx
, AV_LOG_ERROR
, "Too many slices, increase MAX_SLICES and recompile\n");
4113 int *ref2frm
= h
->ref2frm
[h
->slice_num
&(MAX_SLICES
-1)][j
];
4117 ref2frm
[i
+2]= 4*h
->ref_list
[j
][i
].frame_num
4118 +(h
->ref_list
[j
][i
].reference
&3);
4121 for(i
=16; i
<48; i
++)
4122 ref2frm
[i
+4]= 4*h
->ref_list
[j
][i
].frame_num
4123 +(h
->ref_list
[j
][i
].reference
&3);
4126 h
->emu_edge_width
= (s
->flags
&CODEC_FLAG_EMU_EDGE
) ? 0 : 16;
4127 h
->emu_edge_height
= (FRAME_MBAFF
|| FIELD_PICTURE
) ? 0 : h
->emu_edge_width
;
4129 s
->avctx
->refs
= h
->sps
.ref_frame_count
;
4131 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
4132 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4134 (s
->picture_structure
==PICT_FRAME
? "F" : s
->picture_structure
==PICT_TOP_FIELD
? "T" : "B"),
4136 av_get_pict_type_char(h
->slice_type
), h
->slice_type_fixed
? " fix" : "", h
->nal_unit_type
== NAL_IDR_SLICE
? " IDR" : "",
4137 pps_id
, h
->frame_num
,
4138 s
->current_picture_ptr
->field_poc
[0], s
->current_picture_ptr
->field_poc
[1],
4139 h
->ref_count
[0], h
->ref_count
[1],
4141 h
->deblocking_filter
, h
->slice_alpha_c0_offset
/2, h
->slice_beta_offset
/2,
4143 h
->use_weight
==1 && h
->use_weight_chroma
? "c" : "",
4144 h
->slice_type
== FF_B_TYPE
? (h
->direct_spatial_mv_pred
? "SPAT" : "TEMP") : ""
4154 static inline int get_level_prefix(GetBitContext
*gb
){
4158 OPEN_READER(re
, gb
);
4159 UPDATE_CACHE(re
, gb
);
4160 buf
=GET_CACHE(re
, gb
);
4162 log
= 32 - av_log2(buf
);
4164 print_bin(buf
>>(32-log
), log
);
4165 av_log(NULL
, AV_LOG_DEBUG
, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf
>>(32-log
), log
, log
-1, get_bits_count(gb
), __FILE__
);
4168 LAST_SKIP_BITS(re
, gb
, log
);
4169 CLOSE_READER(re
, gb
);
4174 static inline int get_dct8x8_allowed(H264Context
*h
){
4175 if(h
->sps
.direct_8x8_inference_flag
)
4176 return !(*(uint64_t*)h
->sub_mb_type
& ((MB_TYPE_16x8
|MB_TYPE_8x16
|MB_TYPE_8x8
)*0x0001000100010001ULL
));
4178 return !(*(uint64_t*)h
->sub_mb_type
& ((MB_TYPE_16x8
|MB_TYPE_8x16
|MB_TYPE_8x8
|MB_TYPE_DIRECT2
)*0x0001000100010001ULL
));
4182 * decodes a residual block.
4183 * @param n block index
4184 * @param scantable scantable
4185 * @param max_coeff number of coefficients in the block
4186 * @return <0 if an error occurred
4188 static int decode_residual(H264Context
*h
, GetBitContext
*gb
, DCTELEM
*block
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
){
4189 MpegEncContext
* const s
= &h
->s
;
4190 static const int coeff_token_table_index
[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4192 int zeros_left
, coeff_num
, coeff_token
, total_coeff
, i
, j
, trailing_ones
, run_before
;
4194 //FIXME put trailing_onex into the context
4196 if(n
== CHROMA_DC_BLOCK_INDEX
){
4197 coeff_token
= get_vlc2(gb
, chroma_dc_coeff_token_vlc
.table
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 1);
4198 total_coeff
= coeff_token
>>2;
4200 if(n
== LUMA_DC_BLOCK_INDEX
){
4201 total_coeff
= pred_non_zero_count(h
, 0);
4202 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4203 total_coeff
= coeff_token
>>2;
4205 total_coeff
= pred_non_zero_count(h
, n
);
4206 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4207 total_coeff
= coeff_token
>>2;
4208 h
->non_zero_count_cache
[ scan8
[n
] ]= total_coeff
;
4212 //FIXME set last_non_zero?
4216 if(total_coeff
> (unsigned)max_coeff
) {
4217 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "corrupted macroblock %d %d (total_coeff=%d)\n", s
->mb_x
, s
->mb_y
, total_coeff
);
4221 trailing_ones
= coeff_token
&3;
4222 tprintf(h
->s
.avctx
, "trailing:%d, total:%d\n", trailing_ones
, total_coeff
);
4223 assert(total_coeff
<=16);
4225 i
= show_bits(gb
, 3);
4226 skip_bits(gb
, trailing_ones
);
4227 level
[0] = 1-((i
&4)>>1);
4228 level
[1] = 1-((i
&2) );
4229 level
[2] = 1-((i
&1)<<1);
4231 if(trailing_ones
<total_coeff
) {
4233 int suffix_length
= total_coeff
> 10 && trailing_ones
< 3;
4234 int bitsi
= show_bits(gb
, LEVEL_TAB_BITS
);
4235 int level_code
= cavlc_level_tab
[suffix_length
][bitsi
][0];
4237 skip_bits(gb
, cavlc_level_tab
[suffix_length
][bitsi
][1]);
4238 if(level_code
>= 100){
4239 prefix
= level_code
- 100;
4240 if(prefix
== LEVEL_TAB_BITS
)
4241 prefix
+= get_level_prefix(gb
);
4243 //first coefficient has suffix_length equal to 0 or 1
4244 if(prefix
<14){ //FIXME try to build a large unified VLC table for all this
4246 level_code
= (prefix
<<1) + get_bits1(gb
); //part
4248 level_code
= prefix
; //part
4249 }else if(prefix
==14){
4251 level_code
= (prefix
<<1) + get_bits1(gb
); //part
4253 level_code
= prefix
+ get_bits(gb
, 4); //part
4255 level_code
= 30 + get_bits(gb
, prefix
-3); //part
4257 level_code
+= (1<<(prefix
-3))-4096;
4260 if(trailing_ones
< 3) level_code
+= 2;
4263 mask
= -(level_code
&1);
4264 level
[trailing_ones
]= (((2+level_code
)>>1) ^ mask
) - mask
;
4266 if(trailing_ones
< 3) level_code
+= (level_code
>>31)|1;
4269 if(level_code
+ 3U > 6U)
4271 level
[trailing_ones
]= level_code
;
4274 //remaining coefficients have suffix_length > 0
4275 for(i
=trailing_ones
+1;i
<total_coeff
;i
++) {
4276 static const unsigned int suffix_limit
[7] = {0,3,6,12,24,48,INT_MAX
};
4277 int bitsi
= show_bits(gb
, LEVEL_TAB_BITS
);
4278 level_code
= cavlc_level_tab
[suffix_length
][bitsi
][0];
4280 skip_bits(gb
, cavlc_level_tab
[suffix_length
][bitsi
][1]);
4281 if(level_code
>= 100){
4282 prefix
= level_code
- 100;
4283 if(prefix
== LEVEL_TAB_BITS
){
4284 prefix
+= get_level_prefix(gb
);
4287 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
);
4289 level_code
= (15<<suffix_length
) + get_bits(gb
, prefix
-3);
4291 level_code
+= (1<<(prefix
-3))-4096;
4293 mask
= -(level_code
&1);
4294 level_code
= (((2+level_code
)>>1) ^ mask
) - mask
;
4296 level
[i
]= level_code
;
4298 if(suffix_limit
[suffix_length
] + level_code
> 2U*suffix_limit
[suffix_length
])
4303 if(total_coeff
== max_coeff
)
4306 if(n
== CHROMA_DC_BLOCK_INDEX
)
4307 zeros_left
= get_vlc2(gb
, chroma_dc_total_zeros_vlc
[ total_coeff
-1 ].table
, CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 1);
4309 zeros_left
= get_vlc2(gb
, total_zeros_vlc
[ total_coeff
-1 ].table
, TOTAL_ZEROS_VLC_BITS
, 1);
4312 coeff_num
= zeros_left
+ total_coeff
- 1;
4313 j
= scantable
[coeff_num
];
4315 block
[j
] = level
[0];
4316 for(i
=1;i
<total_coeff
;i
++) {
4319 else if(zeros_left
< 7){
4320 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
4322 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
4324 zeros_left
-= run_before
;
4325 coeff_num
-= 1 + run_before
;
4326 j
= scantable
[ coeff_num
];
4331 block
[j
] = (level
[0] * qmul
[j
] + 32)>>6;
4332 for(i
=1;i
<total_coeff
;i
++) {
4335 else if(zeros_left
< 7){
4336 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
4338 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
4340 zeros_left
-= run_before
;
4341 coeff_num
-= 1 + run_before
;
4342 j
= scantable
[ coeff_num
];
4344 block
[j
]= (level
[i
] * qmul
[j
] + 32)>>6;
4349 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "negative number of zero coeffs at %d %d\n", s
->mb_x
, s
->mb_y
);
4356 static void predict_field_decoding_flag(H264Context
*h
){
4357 MpegEncContext
* const s
= &h
->s
;
4358 const int mb_xy
= h
->mb_xy
;
4359 int mb_type
= (h
->slice_table
[mb_xy
-1] == h
->slice_num
)
4360 ? s
->current_picture
.mb_type
[mb_xy
-1]
4361 : (h
->slice_table
[mb_xy
-s
->mb_stride
] == h
->slice_num
)
4362 ? s
->current_picture
.mb_type
[mb_xy
-s
->mb_stride
]
4364 h
->mb_mbaff
= h
->mb_field_decoding_flag
= IS_INTERLACED(mb_type
) ? 1 : 0;
4368 * decodes a P_SKIP or B_SKIP macroblock
4370 static void decode_mb_skip(H264Context
*h
){
4371 MpegEncContext
* const s
= &h
->s
;
4372 const int mb_xy
= h
->mb_xy
;
4375 memset(h
->non_zero_count
[mb_xy
], 0, 16);
4376 memset(h
->non_zero_count_cache
+ 8, 0, 8*5); //FIXME ugly, remove pfui
4379 mb_type
|= MB_TYPE_INTERLACED
;
4381 if( h
->slice_type_nos
== FF_B_TYPE
)
4383 // just for fill_caches. pred_direct_motion will set the real mb_type
4384 mb_type
|= MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
|MB_TYPE_SKIP
;
4386 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4387 pred_direct_motion(h
, &mb_type
);
4388 mb_type
|= MB_TYPE_SKIP
;
4393 mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P1L0
|MB_TYPE_SKIP
;
4395 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4396 pred_pskip_motion(h
, &mx
, &my
);
4397 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, 0, 1);
4398 fill_rectangle( h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mx
,my
), 4);
4401 write_back_motion(h
, mb_type
);
4402 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4403 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
4404 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4405 h
->prev_mb_skipped
= 1;
4409 * decodes a macroblock
4410 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4412 static int decode_mb_cavlc(H264Context
*h
){
4413 MpegEncContext
* const s
= &h
->s
;
4415 int partition_count
;
4416 unsigned int mb_type
, cbp
;
4417 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
4419 mb_xy
= h
->mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
4421 tprintf(s
->avctx
, "pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
4422 cbp
= 0; /* avoid warning. FIXME: find a solution without slowing
4424 if(h
->slice_type_nos
!= FF_I_TYPE
){
4425 if(s
->mb_skip_run
==-1)
4426 s
->mb_skip_run
= get_ue_golomb(&s
->gb
);
4428 if (s
->mb_skip_run
--) {
4429 if(FRAME_MBAFF
&& (s
->mb_y
&1) == 0){
4430 if(s
->mb_skip_run
==0)
4431 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4433 predict_field_decoding_flag(h
);
4440 if( (s
->mb_y
&1) == 0 )
4441 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4444 h
->prev_mb_skipped
= 0;
4446 mb_type
= get_ue_golomb(&s
->gb
);
4447 if(h
->slice_type_nos
== FF_B_TYPE
){
4449 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
4450 mb_type
= b_mb_type_info
[mb_type
].type
;
4453 goto decode_intra_mb
;
4455 }else if(h
->slice_type_nos
== FF_P_TYPE
){
4457 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
4458 mb_type
= p_mb_type_info
[mb_type
].type
;
4461 goto decode_intra_mb
;
4464 assert(h
->slice_type_nos
== FF_I_TYPE
);
4465 if(h
->slice_type
== FF_SI_TYPE
&& mb_type
)
4469 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_type %d in %c slice too large at %d %d\n", mb_type
, av_get_pict_type_char(h
->slice_type
), s
->mb_x
, s
->mb_y
);
4473 cbp
= i_mb_type_info
[mb_type
].cbp
;
4474 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
4475 mb_type
= i_mb_type_info
[mb_type
].type
;
4479 mb_type
|= MB_TYPE_INTERLACED
;
4481 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4483 if(IS_INTRA_PCM(mb_type
)){
4486 // We assume these blocks are very rare so we do not optimize it.
4487 align_get_bits(&s
->gb
);
4489 // The pixels are stored in the same order as levels in h->mb array.
4490 for(x
=0; x
< (CHROMA
? 384 : 256); x
++){
4491 ((uint8_t*)h
->mb
)[x
]= get_bits(&s
->gb
, 8);
4494 // In deblocking, the quantizer is 0
4495 s
->current_picture
.qscale_table
[mb_xy
]= 0;
4496 // All coeffs are present
4497 memset(h
->non_zero_count
[mb_xy
], 16, 16);
4499 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4504 h
->ref_count
[0] <<= 1;
4505 h
->ref_count
[1] <<= 1;
4508 fill_caches(h
, mb_type
, 0);
4511 if(IS_INTRA(mb_type
)){
4513 // init_top_left_availability(h);
4514 if(IS_INTRA4x4(mb_type
)){
4517 if(dct8x8_allowed
&& get_bits1(&s
->gb
)){
4518 mb_type
|= MB_TYPE_8x8DCT
;
4522 // fill_intra4x4_pred_table(h);
4523 for(i
=0; i
<16; i
+=di
){
4524 int mode
= pred_intra_mode(h
, i
);
4526 if(!get_bits1(&s
->gb
)){
4527 const int rem_mode
= get_bits(&s
->gb
, 3);
4528 mode
= rem_mode
+ (rem_mode
>= mode
);
4532 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
4534 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = mode
;
4536 write_back_intra_pred_mode(h
);
4537 if( check_intra4x4_pred_mode(h
) < 0)
4540 h
->intra16x16_pred_mode
= check_intra_pred_mode(h
, h
->intra16x16_pred_mode
);
4541 if(h
->intra16x16_pred_mode
< 0)
4545 pred_mode
= check_intra_pred_mode(h
, get_ue_golomb_31(&s
->gb
));
4548 h
->chroma_pred_mode
= pred_mode
;
4550 }else if(partition_count
==4){
4551 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
4553 if(h
->slice_type_nos
== FF_B_TYPE
){
4555 h
->sub_mb_type
[i
]= get_ue_golomb_31(&s
->gb
);
4556 if(h
->sub_mb_type
[i
] >=13){
4557 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "B sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4560 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4561 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4563 if( IS_DIRECT(h
->sub_mb_type
[0]) || IS_DIRECT(h
->sub_mb_type
[1])
4564 || IS_DIRECT(h
->sub_mb_type
[2]) || IS_DIRECT(h
->sub_mb_type
[3])) {
4565 pred_direct_motion(h
, &mb_type
);
4566 h
->ref_cache
[0][scan8
[4]] =
4567 h
->ref_cache
[1][scan8
[4]] =
4568 h
->ref_cache
[0][scan8
[12]] =
4569 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
4572 assert(h
->slice_type_nos
== FF_P_TYPE
); //FIXME SP correct ?
4574 h
->sub_mb_type
[i
]= get_ue_golomb_31(&s
->gb
);
4575 if(h
->sub_mb_type
[i
] >=4){
4576 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "P sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4579 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4580 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4584 for(list
=0; list
<h
->list_count
; list
++){
4585 int ref_count
= IS_REF0(mb_type
) ? 1 : h
->ref_count
[list
];
4587 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
4588 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4592 }else if(ref_count
== 2){
4593 tmp
= get_bits1(&s
->gb
)^1;
4595 tmp
= get_ue_golomb_31(&s
->gb
);
4597 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", tmp
);
4610 dct8x8_allowed
= get_dct8x8_allowed(h
);
4612 for(list
=0; list
<h
->list_count
; list
++){
4614 if(IS_DIRECT(h
->sub_mb_type
[i
])) {
4615 h
->ref_cache
[list
][ scan8
[4*i
] ] = h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
4618 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
4619 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
4621 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4622 const int sub_mb_type
= h
->sub_mb_type
[i
];
4623 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
4624 for(j
=0; j
<sub_partition_count
[i
]; j
++){
4626 const int index
= 4*i
+ block_width
*j
;
4627 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
4628 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mx
, &my
);
4629 mx
+= get_se_golomb(&s
->gb
);
4630 my
+= get_se_golomb(&s
->gb
);
4631 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4633 if(IS_SUB_8X8(sub_mb_type
)){
4635 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
4637 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
4638 }else if(IS_SUB_8X4(sub_mb_type
)){
4639 mv_cache
[ 1 ][0]= mx
;
4640 mv_cache
[ 1 ][1]= my
;
4641 }else if(IS_SUB_4X8(sub_mb_type
)){
4642 mv_cache
[ 8 ][0]= mx
;
4643 mv_cache
[ 8 ][1]= my
;
4645 mv_cache
[ 0 ][0]= mx
;
4646 mv_cache
[ 0 ][1]= my
;
4649 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
4655 }else if(IS_DIRECT(mb_type
)){
4656 pred_direct_motion(h
, &mb_type
);
4657 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
4659 int list
, mx
, my
, i
;
4660 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4661 if(IS_16X16(mb_type
)){
4662 for(list
=0; list
<h
->list_count
; list
++){
4664 if(IS_DIR(mb_type
, 0, list
)){
4665 if(h
->ref_count
[list
]==1){
4667 }else if(h
->ref_count
[list
]==2){
4668 val
= get_bits1(&s
->gb
)^1;
4670 val
= get_ue_golomb_31(&s
->gb
);
4671 if(val
>= h
->ref_count
[list
]){
4672 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4677 val
= LIST_NOT_USED
&0xFF;
4678 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 1);
4680 for(list
=0; list
<h
->list_count
; list
++){
4682 if(IS_DIR(mb_type
, 0, list
)){
4683 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mx
, &my
);
4684 mx
+= get_se_golomb(&s
->gb
);
4685 my
+= get_se_golomb(&s
->gb
);
4686 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4688 val
= pack16to32(mx
,my
);
4691 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 4);
4694 else if(IS_16X8(mb_type
)){
4695 for(list
=0; list
<h
->list_count
; list
++){
4698 if(IS_DIR(mb_type
, i
, list
)){
4699 if(h
->ref_count
[list
] == 1){
4701 }else if(h
->ref_count
[list
] == 2){
4702 val
= get_bits1(&s
->gb
)^1;
4704 val
= get_ue_golomb_31(&s
->gb
);
4705 if(val
>= h
->ref_count
[list
]){
4706 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4711 val
= LIST_NOT_USED
&0xFF;
4712 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 1);
4715 for(list
=0; list
<h
->list_count
; list
++){
4718 if(IS_DIR(mb_type
, i
, list
)){
4719 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mx
, &my
);
4720 mx
+= get_se_golomb(&s
->gb
);
4721 my
+= get_se_golomb(&s
->gb
);
4722 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4724 val
= pack16to32(mx
,my
);
4727 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 4);
4731 assert(IS_8X16(mb_type
));
4732 for(list
=0; list
<h
->list_count
; list
++){
4735 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
4736 if(h
->ref_count
[list
]==1){
4738 }else if(h
->ref_count
[list
]==2){
4739 val
= get_bits1(&s
->gb
)^1;
4741 val
= get_ue_golomb_31(&s
->gb
);
4742 if(val
>= h
->ref_count
[list
]){
4743 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4748 val
= LIST_NOT_USED
&0xFF;
4749 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 1);
4752 for(list
=0; list
<h
->list_count
; list
++){
4755 if(IS_DIR(mb_type
, i
, list
)){
4756 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mx
, &my
);
4757 mx
+= get_se_golomb(&s
->gb
);
4758 my
+= get_se_golomb(&s
->gb
);
4759 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4761 val
= pack16to32(mx
,my
);
4764 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 4);
4770 if(IS_INTER(mb_type
))
4771 write_back_motion(h
, mb_type
);
4773 if(!IS_INTRA16x16(mb_type
)){
4774 cbp
= get_ue_golomb(&s
->gb
);
4776 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cbp too large (%u) at %d %d\n", cbp
, s
->mb_x
, s
->mb_y
);
4781 if(IS_INTRA4x4(mb_type
)) cbp
= golomb_to_intra4x4_cbp
[cbp
];
4782 else cbp
= golomb_to_inter_cbp
[cbp
];
4784 if(IS_INTRA4x4(mb_type
)) cbp
= golomb_to_intra4x4_cbp_gray
[cbp
];
4785 else cbp
= golomb_to_inter_cbp_gray
[cbp
];
4790 if(dct8x8_allowed
&& (cbp
&15) && !IS_INTRA(mb_type
)){
4791 if(get_bits1(&s
->gb
)){
4792 mb_type
|= MB_TYPE_8x8DCT
;
4793 h
->cbp_table
[mb_xy
]= cbp
;
4796 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4798 if(cbp
|| IS_INTRA16x16(mb_type
)){
4799 int i8x8
, i4x4
, chroma_idx
;
4801 GetBitContext
*gb
= IS_INTRA(mb_type
) ? h
->intra_gb_ptr
: h
->inter_gb_ptr
;
4802 const uint8_t *scan
, *scan8x8
, *dc_scan
;
4804 // fill_non_zero_count_cache(h);
4806 if(IS_INTERLACED(mb_type
)){
4807 scan8x8
= s
->qscale
? h
->field_scan8x8_cavlc
: h
->field_scan8x8_cavlc_q0
;
4808 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
4809 dc_scan
= luma_dc_field_scan
;
4811 scan8x8
= s
->qscale
? h
->zigzag_scan8x8_cavlc
: h
->zigzag_scan8x8_cavlc_q0
;
4812 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
4813 dc_scan
= luma_dc_zigzag_scan
;
4816 dquant
= get_se_golomb(&s
->gb
);
4818 if( dquant
> 25 || dquant
< -26 ){
4819 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "dquant out of range (%d) at %d %d\n", dquant
, s
->mb_x
, s
->mb_y
);
4823 s
->qscale
+= dquant
;
4824 if(((unsigned)s
->qscale
) > 51){
4825 if(s
->qscale
<0) s
->qscale
+= 52;
4826 else s
->qscale
-= 52;
4829 h
->chroma_qp
[0]= get_chroma_qp(h
, 0, s
->qscale
);
4830 h
->chroma_qp
[1]= get_chroma_qp(h
, 1, s
->qscale
);
4831 if(IS_INTRA16x16(mb_type
)){
4832 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
, LUMA_DC_BLOCK_INDEX
, dc_scan
, h
->dequant4_coeff
[0][s
->qscale
], 16) < 0){
4833 return -1; //FIXME continue if partitioned and other return -1 too
4836 assert((cbp
&15) == 0 || (cbp
&15) == 15);
4839 for(i8x8
=0; i8x8
<4; i8x8
++){
4840 for(i4x4
=0; i4x4
<4; i4x4
++){
4841 const int index
= i4x4
+ 4*i8x8
;
4842 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
+ 16*index
, index
, scan
+ 1, h
->dequant4_coeff
[0][s
->qscale
], 15) < 0 ){
4848 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
4851 for(i8x8
=0; i8x8
<4; i8x8
++){
4852 if(cbp
& (1<<i8x8
)){
4853 if(IS_8x8DCT(mb_type
)){
4854 DCTELEM
*buf
= &h
->mb
[64*i8x8
];
4856 for(i4x4
=0; i4x4
<4; i4x4
++){
4857 if( decode_residual(h
, gb
, buf
, i4x4
+4*i8x8
, scan8x8
+16*i4x4
,
4858 h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 16) <0 )
4861 nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
4862 nnz
[0] += nnz
[1] + nnz
[8] + nnz
[9];
4864 for(i4x4
=0; i4x4
<4; i4x4
++){
4865 const int index
= i4x4
+ 4*i8x8
;
4867 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
, h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
], 16) <0 ){
4873 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
4874 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
4880 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++)
4881 if( decode_residual(h
, gb
, h
->mb
+ 256 + 16*4*chroma_idx
, CHROMA_DC_BLOCK_INDEX
, chroma_dc_scan
, NULL
, 4) < 0){
4887 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++){
4888 const uint32_t *qmul
= h
->dequant4_coeff
[chroma_idx
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
[chroma_idx
]];
4889 for(i4x4
=0; i4x4
<4; i4x4
++){
4890 const int index
= 16 + 4*chroma_idx
+ i4x4
;
4891 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
+ 1, qmul
, 15) < 0){
4897 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
4898 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
4899 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
4902 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
4903 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
4904 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
4905 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
4907 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
4908 write_back_non_zero_count(h
);
4911 h
->ref_count
[0] >>= 1;
4912 h
->ref_count
[1] >>= 1;
4918 static int decode_cabac_field_decoding_flag(H264Context
*h
) {
4919 MpegEncContext
* const s
= &h
->s
;
4920 const int mb_x
= s
->mb_x
;
4921 const int mb_y
= s
->mb_y
& ~1;
4922 const int mba_xy
= mb_x
- 1 + mb_y
*s
->mb_stride
;
4923 const int mbb_xy
= mb_x
+ (mb_y
-2)*s
->mb_stride
;
4925 unsigned int ctx
= 0;
4927 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) ) {
4930 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) ) {
4934 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[70 + ctx
] );
4937 static int decode_cabac_intra_mb_type(H264Context
*h
, int ctx_base
, int intra_slice
) {
4938 uint8_t *state
= &h
->cabac_state
[ctx_base
];
4942 MpegEncContext
* const s
= &h
->s
;
4943 const int mba_xy
= h
->left_mb_xy
[0];
4944 const int mbb_xy
= h
->top_mb_xy
;
4946 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mba_xy
] ) )
4948 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mbb_xy
] ) )
4950 if( get_cabac_noinline( &h
->cabac
, &state
[ctx
] ) == 0 )
4951 return 0; /* I4x4 */
4954 if( get_cabac_noinline( &h
->cabac
, &state
[0] ) == 0 )
4955 return 0; /* I4x4 */
4958 if( get_cabac_terminate( &h
->cabac
) )
4959 return 25; /* PCM */
4961 mb_type
= 1; /* I16x16 */
4962 mb_type
+= 12 * get_cabac_noinline( &h
->cabac
, &state
[1] ); /* cbp_luma != 0 */
4963 if( get_cabac_noinline( &h
->cabac
, &state
[2] ) ) /* cbp_chroma */
4964 mb_type
+= 4 + 4 * get_cabac_noinline( &h
->cabac
, &state
[2+intra_slice
] );
4965 mb_type
+= 2 * get_cabac_noinline( &h
->cabac
, &state
[3+intra_slice
] );
4966 mb_type
+= 1 * get_cabac_noinline( &h
->cabac
, &state
[3+2*intra_slice
] );
4970 static int decode_cabac_mb_type_b( H264Context
*h
) {
4971 MpegEncContext
* const s
= &h
->s
;
4973 const int mba_xy
= h
->left_mb_xy
[0];
4974 const int mbb_xy
= h
->top_mb_xy
;
4977 assert(h
->slice_type_nos
== FF_B_TYPE
);
4979 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mba_xy
] ) )
4981 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mbb_xy
] ) )
4984 if( !get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+ctx
] ) )
4985 return 0; /* B_Direct_16x16 */
4987 if( !get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+3] ) ) {
4988 return 1 + get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ); /* B_L[01]_16x16 */
4991 bits
= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+4] ) << 3;
4992 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ) << 2;
4993 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ) << 1;
4994 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] );
4996 return bits
+ 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4997 else if( bits
== 13 ) {
4998 return decode_cabac_intra_mb_type(h
, 32, 0) + 23;
4999 } else if( bits
== 14 )
5000 return 11; /* B_L1_L0_8x16 */
5001 else if( bits
== 15 )
5002 return 22; /* B_8x8 */
5004 bits
= ( bits
<<1 ) | get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] );
5005 return bits
- 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5008 static int decode_cabac_mb_skip( H264Context
*h
, int mb_x
, int mb_y
) {
5009 MpegEncContext
* const s
= &h
->s
;
5013 if(FRAME_MBAFF
){ //FIXME merge with the stuff in fill_caches?
5014 int mb_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
5017 && h
->slice_table
[mba_xy
] == h
->slice_num
5018 && MB_FIELD
== !!IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) )
5019 mba_xy
+= s
->mb_stride
;
5021 mbb_xy
= mb_xy
- s
->mb_stride
;
5023 && h
->slice_table
[mbb_xy
] == h
->slice_num
5024 && IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) )
5025 mbb_xy
-= s
->mb_stride
;
5027 mbb_xy
= mb_x
+ (mb_y
-1)*s
->mb_stride
;
5029 int mb_xy
= h
->mb_xy
;
5031 mbb_xy
= mb_xy
- (s
->mb_stride
<< FIELD_PICTURE
);
5034 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mba_xy
] ))
5036 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mbb_xy
] ))
5039 if( h
->slice_type_nos
== FF_B_TYPE
)
5041 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[11+ctx
] );
5044 static int decode_cabac_mb_intra4x4_pred_mode( H264Context
*h
, int pred_mode
) {
5047 if( get_cabac( &h
->cabac
, &h
->cabac_state
[68] ) )
5050 mode
+= 1 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5051 mode
+= 2 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5052 mode
+= 4 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5054 if( mode
>= pred_mode
)
5060 static int decode_cabac_mb_chroma_pre_mode( H264Context
*h
) {
5061 const int mba_xy
= h
->left_mb_xy
[0];
5062 const int mbb_xy
= h
->top_mb_xy
;
5066 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5067 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mba_xy
] != 0 )
5070 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mbb_xy
] != 0 )
5073 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+ctx
] ) == 0 )
5076 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5078 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5084 static int decode_cabac_mb_cbp_luma( H264Context
*h
) {
5085 int cbp_b
, cbp_a
, ctx
, cbp
= 0;
5087 cbp_a
= h
->slice_table
[h
->left_mb_xy
[0]] == h
->slice_num
? h
->left_cbp
: -1;
5088 cbp_b
= h
->slice_table
[h
->top_mb_xy
] == h
->slice_num
? h
->top_cbp
: -1;
5090 ctx
= !(cbp_a
& 0x02) + 2 * !(cbp_b
& 0x04);
5091 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]);
5092 ctx
= !(cbp
& 0x01) + 2 * !(cbp_b
& 0x08);
5093 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 1;
5094 ctx
= !(cbp_a
& 0x08) + 2 * !(cbp
& 0x01);
5095 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 2;
5096 ctx
= !(cbp
& 0x04) + 2 * !(cbp
& 0x02);
5097 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 3;
5100 static int decode_cabac_mb_cbp_chroma( H264Context
*h
) {
5104 cbp_a
= (h
->left_cbp
>>4)&0x03;
5105 cbp_b
= (h
-> top_cbp
>>4)&0x03;
5108 if( cbp_a
> 0 ) ctx
++;
5109 if( cbp_b
> 0 ) ctx
+= 2;
5110 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[77 + ctx
] ) == 0 )
5114 if( cbp_a
== 2 ) ctx
++;
5115 if( cbp_b
== 2 ) ctx
+= 2;
5116 return 1 + get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[77 + ctx
] );
5118 static int decode_cabac_mb_dqp( H264Context
*h
) {
5119 int ctx
= h
->last_qscale_diff
!= 0;
5122 while( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[60 + ctx
] ) ) {
5125 if(val
> 102) //prevent infinite loop
5130 return (val
+ 1)>>1 ;
5132 return -((val
+ 1)>>1);
5134 static int decode_cabac_p_mb_sub_type( H264Context
*h
) {
5135 if( get_cabac( &h
->cabac
, &h
->cabac_state
[21] ) )
5137 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[22] ) )
5139 if( get_cabac( &h
->cabac
, &h
->cabac_state
[23] ) )
5143 static int decode_cabac_b_mb_sub_type( H264Context
*h
) {
5145 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[36] ) )
5146 return 0; /* B_Direct_8x8 */
5147 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[37] ) )
5148 return 1 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L0_8x8, B_L1_8x8 */
5150 if( get_cabac( &h
->cabac
, &h
->cabac_state
[38] ) ) {
5151 if( get_cabac( &h
->cabac
, &h
->cabac_state
[39] ) )
5152 return 11 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L1_4x4, B_Bi_4x4 */
5155 type
+= 2*get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5156 type
+= get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5160 static inline int decode_cabac_mb_transform_size( H264Context
*h
) {
5161 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[399 + h
->neighbor_transform_size
] );
5164 static int decode_cabac_mb_ref( H264Context
*h
, int list
, int n
) {
5165 int refa
= h
->ref_cache
[list
][scan8
[n
] - 1];
5166 int refb
= h
->ref_cache
[list
][scan8
[n
] - 8];
5170 if( h
->slice_type_nos
== FF_B_TYPE
) {
5171 if( refa
> 0 && !h
->direct_cache
[scan8
[n
] - 1] )
5173 if( refb
> 0 && !h
->direct_cache
[scan8
[n
] - 8] )
5182 while( get_cabac( &h
->cabac
, &h
->cabac_state
[54+ctx
] ) ) {
5185 if(ref
>= 32 /*h->ref_list[list]*/){
5192 static int decode_cabac_mb_mvd( H264Context
*h
, int list
, int n
, int l
) {
5193 int amvd
= abs( h
->mvd_cache
[list
][scan8
[n
] - 1][l
] ) +
5194 abs( h
->mvd_cache
[list
][scan8
[n
] - 8][l
] );
5195 int ctxbase
= (l
== 0) ? 40 : 47;
5197 int ctx
= (amvd
>2) + (amvd
>32);
5199 if(!get_cabac(&h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
]))
5204 while( mvd
< 9 && get_cabac( &h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
] ) ) {
5212 while( get_cabac_bypass( &h
->cabac
) ) {
5216 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "overflow in decode_cabac_mb_mvd\n");
5221 if( get_cabac_bypass( &h
->cabac
) )
5225 return get_cabac_bypass_sign( &h
->cabac
, -mvd
);
5228 static av_always_inline
int get_cabac_cbf_ctx( H264Context
*h
, int cat
, int idx
, int is_dc
) {
5234 nza
= h
->left_cbp
&0x100;
5235 nzb
= h
-> top_cbp
&0x100;
5237 nza
= (h
->left_cbp
>>(6+idx
))&0x01;
5238 nzb
= (h
-> top_cbp
>>(6+idx
))&0x01;
5241 assert(cat
== 1 || cat
== 2 || cat
== 4);
5242 nza
= h
->non_zero_count_cache
[scan8
[idx
] - 1];
5243 nzb
= h
->non_zero_count_cache
[scan8
[idx
] - 8];
5252 return ctx
+ 4 * cat
;
5255 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8
[63]) = {
5256 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5257 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5258 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5259 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5262 static av_always_inline
void decode_cabac_residual_internal( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
, int is_dc
) {
5263 static const int significant_coeff_flag_offset
[2][6] = {
5264 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5265 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5267 static const int last_coeff_flag_offset
[2][6] = {
5268 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5269 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5271 static const int coeff_abs_level_m1_offset
[6] = {
5272 227+0, 227+10, 227+20, 227+30, 227+39, 426
5274 static const uint8_t significant_coeff_flag_offset_8x8
[2][63] = {
5275 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5276 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5277 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5278 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5279 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5280 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5281 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5282 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5284 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5285 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5286 * map node ctx => cabac ctx for level=1 */
5287 static const uint8_t coeff_abs_level1_ctx
[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5288 /* map node ctx => cabac ctx for level>1 */
5289 static const uint8_t coeff_abs_levelgt1_ctx
[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5290 static const uint8_t coeff_abs_level_transition
[2][8] = {
5291 /* update node ctx after decoding a level=1 */
5292 { 1, 2, 3, 3, 4, 5, 6, 7 },
5293 /* update node ctx after decoding a level>1 */
5294 { 4, 4, 4, 4, 5, 6, 7, 7 }
5300 int coeff_count
= 0;
5303 uint8_t *significant_coeff_ctx_base
;
5304 uint8_t *last_coeff_ctx_base
;
5305 uint8_t *abs_level_m1_ctx_base
;
5308 #define CABAC_ON_STACK
5310 #ifdef CABAC_ON_STACK
5313 cc
.range
= h
->cabac
.range
;
5314 cc
.low
= h
->cabac
.low
;
5315 cc
.bytestream
= h
->cabac
.bytestream
;
5317 #define CC &h->cabac
5321 /* cat: 0-> DC 16x16 n = 0
5322 * 1-> AC 16x16 n = luma4x4idx
5323 * 2-> Luma4x4 n = luma4x4idx
5324 * 3-> DC Chroma n = iCbCr
5325 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5326 * 5-> Luma8x8 n = 4 * luma8x8idx
5329 /* read coded block flag */
5330 if( is_dc
|| cat
!= 5 ) {
5331 if( get_cabac( CC
, &h
->cabac_state
[85 + get_cabac_cbf_ctx( h
, cat
, n
, is_dc
) ] ) == 0 ) {
5333 h
->non_zero_count_cache
[scan8
[n
]] = 0;
5335 #ifdef CABAC_ON_STACK
5336 h
->cabac
.range
= cc
.range
;
5337 h
->cabac
.low
= cc
.low
;
5338 h
->cabac
.bytestream
= cc
.bytestream
;
5344 significant_coeff_ctx_base
= h
->cabac_state
5345 + significant_coeff_flag_offset
[MB_FIELD
][cat
];
5346 last_coeff_ctx_base
= h
->cabac_state
5347 + last_coeff_flag_offset
[MB_FIELD
][cat
];
5348 abs_level_m1_ctx_base
= h
->cabac_state
5349 + coeff_abs_level_m1_offset
[cat
];
5351 if( !is_dc
&& cat
== 5 ) {
5352 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5353 for(last= 0; last < coefs; last++) { \
5354 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5355 if( get_cabac( CC, sig_ctx )) { \
5356 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5357 index[coeff_count++] = last; \
5358 if( get_cabac( CC, last_ctx ) ) { \
5364 if( last == max_coeff -1 ) {\
5365 index[coeff_count++] = last;\
5367 const uint8_t *sig_off
= significant_coeff_flag_offset_8x8
[MB_FIELD
];
5368 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5369 coeff_count
= decode_significance_8x8_x86(CC
, significant_coeff_ctx_base
, index
, sig_off
);
5371 coeff_count
= decode_significance_x86(CC
, max_coeff
, significant_coeff_ctx_base
, index
);
5373 DECODE_SIGNIFICANCE( 63, sig_off
[last
], last_coeff_flag_offset_8x8
[last
] );
5375 DECODE_SIGNIFICANCE( max_coeff
- 1, last
, last
);
5378 assert(coeff_count
> 0);
5382 h
->cbp_table
[h
->mb_xy
] |= 0x100;
5384 h
->cbp_table
[h
->mb_xy
] |= 0x40 << n
;
5387 fill_rectangle(&h
->non_zero_count_cache
[scan8
[n
]], 2, 2, 8, coeff_count
, 1);
5389 assert( cat
== 1 || cat
== 2 || cat
== 4 );
5390 h
->non_zero_count_cache
[scan8
[n
]] = coeff_count
;
5395 uint8_t *ctx
= coeff_abs_level1_ctx
[node_ctx
] + abs_level_m1_ctx_base
;
5397 int j
= scantable
[index
[--coeff_count
]];
5399 if( get_cabac( CC
, ctx
) == 0 ) {
5400 node_ctx
= coeff_abs_level_transition
[0][node_ctx
];
5402 block
[j
] = get_cabac_bypass_sign( CC
, -1);
5404 block
[j
] = (get_cabac_bypass_sign( CC
, -qmul
[j
]) + 32) >> 6;
5408 ctx
= coeff_abs_levelgt1_ctx
[node_ctx
] + abs_level_m1_ctx_base
;
5409 node_ctx
= coeff_abs_level_transition
[1][node_ctx
];
5411 while( coeff_abs
< 15 && get_cabac( CC
, ctx
) ) {
5415 if( coeff_abs
>= 15 ) {
5417 while( get_cabac_bypass( CC
) ) {
5423 coeff_abs
+= coeff_abs
+ get_cabac_bypass( CC
);
5429 block
[j
] = get_cabac_bypass_sign( CC
, -coeff_abs
);
5431 block
[j
] = (get_cabac_bypass_sign( CC
, -coeff_abs
) * qmul
[j
] + 32) >> 6;
5434 } while( coeff_count
);
5435 #ifdef CABAC_ON_STACK
5436 h
->cabac
.range
= cc
.range
;
5437 h
->cabac
.low
= cc
.low
;
5438 h
->cabac
.bytestream
= cc
.bytestream
;
5444 static void decode_cabac_residual_dc( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
5445 decode_cabac_residual_internal(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
, 1);
5448 static void decode_cabac_residual_nondc( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
5449 decode_cabac_residual_internal(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
, 0);
5453 static void decode_cabac_residual( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
5455 decode_cabac_residual_internal(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
, cat
== 0 || cat
== 3);
5457 if( cat
== 0 || cat
== 3 ) decode_cabac_residual_dc(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
);
5458 else decode_cabac_residual_nondc(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
);
5462 static inline void compute_mb_neighbors(H264Context
*h
)
5464 MpegEncContext
* const s
= &h
->s
;
5465 const int mb_xy
= h
->mb_xy
;
5466 h
->top_mb_xy
= mb_xy
- s
->mb_stride
;
5467 h
->left_mb_xy
[0] = mb_xy
- 1;
5469 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
5470 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
5471 const int top_mb_field_flag
= IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
5472 const int left_mb_field_flag
= IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
5473 const int curr_mb_field_flag
= MB_FIELD
;
5474 const int bottom
= (s
->mb_y
& 1);
5476 if (curr_mb_field_flag
&& (bottom
|| top_mb_field_flag
)){
5477 h
->top_mb_xy
-= s
->mb_stride
;
5479 if (!left_mb_field_flag
== curr_mb_field_flag
) {
5480 h
->left_mb_xy
[0] = pair_xy
- 1;
5482 } else if (FIELD_PICTURE
) {
5483 h
->top_mb_xy
-= s
->mb_stride
;
5489 * decodes a macroblock
5490 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5492 static int decode_mb_cabac(H264Context
*h
) {
5493 MpegEncContext
* const s
= &h
->s
;
5495 int mb_type
, partition_count
, cbp
= 0;
5496 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
5498 mb_xy
= h
->mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5500 tprintf(s
->avctx
, "pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
5501 if( h
->slice_type_nos
!= FF_I_TYPE
) {
5503 /* a skipped mb needs the aff flag from the following mb */
5504 if( FRAME_MBAFF
&& s
->mb_x
==0 && (s
->mb_y
&1)==0 )
5505 predict_field_decoding_flag(h
);
5506 if( FRAME_MBAFF
&& (s
->mb_y
&1)==1 && h
->prev_mb_skipped
)
5507 skip
= h
->next_mb_skipped
;
5509 skip
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
);
5510 /* read skip flags */
5512 if( FRAME_MBAFF
&& (s
->mb_y
&1)==0 ){
5513 s
->current_picture
.mb_type
[mb_xy
] = MB_TYPE_SKIP
;
5514 h
->next_mb_skipped
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
+1 );
5515 if(!h
->next_mb_skipped
)
5516 h
->mb_mbaff
= h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
5521 h
->cbp_table
[mb_xy
] = 0;
5522 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5523 h
->last_qscale_diff
= 0;
5530 if( (s
->mb_y
&1) == 0 )
5532 h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
5535 h
->prev_mb_skipped
= 0;
5537 compute_mb_neighbors(h
);
5539 if( h
->slice_type_nos
== FF_B_TYPE
) {
5540 mb_type
= decode_cabac_mb_type_b( h
);
5542 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
5543 mb_type
= b_mb_type_info
[mb_type
].type
;
5546 goto decode_intra_mb
;
5548 } else if( h
->slice_type_nos
== FF_P_TYPE
) {
5549 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[14] ) == 0 ) {
5551 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[15] ) == 0 ) {
5552 /* P_L0_D16x16, P_8x8 */
5553 mb_type
= 3 * get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[16] );
5555 /* P_L0_D8x16, P_L0_D16x8 */
5556 mb_type
= 2 - get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[17] );
5558 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
5559 mb_type
= p_mb_type_info
[mb_type
].type
;
5561 mb_type
= decode_cabac_intra_mb_type(h
, 17, 0);
5562 goto decode_intra_mb
;
5565 mb_type
= decode_cabac_intra_mb_type(h
, 3, 1);
5566 if(h
->slice_type
== FF_SI_TYPE
&& mb_type
)
5568 assert(h
->slice_type_nos
== FF_I_TYPE
);
5570 partition_count
= 0;
5571 cbp
= i_mb_type_info
[mb_type
].cbp
;
5572 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
5573 mb_type
= i_mb_type_info
[mb_type
].type
;
5576 mb_type
|= MB_TYPE_INTERLACED
;
5578 h
->slice_table
[ mb_xy
]= h
->slice_num
;
5580 if(IS_INTRA_PCM(mb_type
)) {
5583 // We assume these blocks are very rare so we do not optimize it.
5584 // FIXME The two following lines get the bitstream position in the cabac
5585 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5586 ptr
= h
->cabac
.bytestream
;
5587 if(h
->cabac
.low
&0x1) ptr
--;
5589 if(h
->cabac
.low
&0x1FF) ptr
--;
5592 // The pixels are stored in the same order as levels in h->mb array.
5593 memcpy(h
->mb
, ptr
, 256); ptr
+=256;
5595 memcpy(h
->mb
+128, ptr
, 128); ptr
+=128;
5598 ff_init_cabac_decoder(&h
->cabac
, ptr
, h
->cabac
.bytestream_end
- ptr
);
5600 // All blocks are present
5601 h
->cbp_table
[mb_xy
] = 0x1ef;
5602 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5603 // In deblocking, the quantizer is 0
5604 s
->current_picture
.qscale_table
[mb_xy
]= 0;
5605 // All coeffs are present
5606 memset(h
->non_zero_count
[mb_xy
], 16, 16);
5607 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5608 h
->last_qscale_diff
= 0;
5613 h
->ref_count
[0] <<= 1;
5614 h
->ref_count
[1] <<= 1;
5617 fill_caches(h
, mb_type
, 0);
5619 if( IS_INTRA( mb_type
) ) {
5621 if( IS_INTRA4x4( mb_type
) ) {
5622 if( dct8x8_allowed
&& decode_cabac_mb_transform_size( h
) ) {
5623 mb_type
|= MB_TYPE_8x8DCT
;
5624 for( i
= 0; i
< 16; i
+=4 ) {
5625 int pred
= pred_intra_mode( h
, i
);
5626 int mode
= decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5627 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
5630 for( i
= 0; i
< 16; i
++ ) {
5631 int pred
= pred_intra_mode( h
, i
);
5632 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5634 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5637 write_back_intra_pred_mode(h
);
5638 if( check_intra4x4_pred_mode(h
) < 0 ) return -1;
5640 h
->intra16x16_pred_mode
= check_intra_pred_mode( h
, h
->intra16x16_pred_mode
);
5641 if( h
->intra16x16_pred_mode
< 0 ) return -1;
5644 h
->chroma_pred_mode_table
[mb_xy
] =
5645 pred_mode
= decode_cabac_mb_chroma_pre_mode( h
);
5647 pred_mode
= check_intra_pred_mode( h
, pred_mode
);
5648 if( pred_mode
< 0 ) return -1;
5649 h
->chroma_pred_mode
= pred_mode
;
5651 } else if( partition_count
== 4 ) {
5652 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
5654 if( h
->slice_type_nos
== FF_B_TYPE
) {
5655 for( i
= 0; i
< 4; i
++ ) {
5656 h
->sub_mb_type
[i
] = decode_cabac_b_mb_sub_type( h
);
5657 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5658 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5660 if( IS_DIRECT(h
->sub_mb_type
[0] | h
->sub_mb_type
[1] |
5661 h
->sub_mb_type
[2] | h
->sub_mb_type
[3]) ) {
5662 pred_direct_motion(h
, &mb_type
);
5663 h
->ref_cache
[0][scan8
[4]] =
5664 h
->ref_cache
[1][scan8
[4]] =
5665 h
->ref_cache
[0][scan8
[12]] =
5666 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
5667 if( h
->ref_count
[0] > 1 || h
->ref_count
[1] > 1 ) {
5668 for( i
= 0; i
< 4; i
++ )
5669 if( IS_DIRECT(h
->sub_mb_type
[i
]) )
5670 fill_rectangle( &h
->direct_cache
[scan8
[4*i
]], 2, 2, 8, 1, 1 );
5674 for( i
= 0; i
< 4; i
++ ) {
5675 h
->sub_mb_type
[i
] = decode_cabac_p_mb_sub_type( h
);
5676 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5677 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5681 for( list
= 0; list
< h
->list_count
; list
++ ) {
5682 for( i
= 0; i
< 4; i
++ ) {
5683 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
5684 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
5685 if( h
->ref_count
[list
] > 1 ){
5686 ref
[list
][i
] = decode_cabac_mb_ref( h
, list
, 4*i
);
5687 if(ref
[list
][i
] >= (unsigned)h
->ref_count
[list
]){
5688 av_log(s
->avctx
, AV_LOG_ERROR
, "Reference %d >= %d\n", ref
[list
][i
], h
->ref_count
[list
]);
5696 h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
5697 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
5702 dct8x8_allowed
= get_dct8x8_allowed(h
);
5704 for(list
=0; list
<h
->list_count
; list
++){
5706 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
5707 if(IS_DIRECT(h
->sub_mb_type
[i
])){
5708 fill_rectangle(h
->mvd_cache
[list
][scan8
[4*i
]], 2, 2, 8, 0, 4);
5712 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
) && !IS_DIRECT(h
->sub_mb_type
[i
])){
5713 const int sub_mb_type
= h
->sub_mb_type
[i
];
5714 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
5715 for(j
=0; j
<sub_partition_count
[i
]; j
++){
5718 const int index
= 4*i
+ block_width
*j
;
5719 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
5720 int16_t (* mvd_cache
)[2]= &h
->mvd_cache
[list
][ scan8
[index
] ];
5721 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mpx
, &mpy
);
5723 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, index
, 0 );
5724 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, index
, 1 );
5725 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5727 if(IS_SUB_8X8(sub_mb_type
)){
5729 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
5731 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
5734 mvd_cache
[ 8 ][0]= mvd_cache
[ 9 ][0]= mx
- mpx
;
5736 mvd_cache
[ 8 ][1]= mvd_cache
[ 9 ][1]= my
- mpy
;
5737 }else if(IS_SUB_8X4(sub_mb_type
)){
5738 mv_cache
[ 1 ][0]= mx
;
5739 mv_cache
[ 1 ][1]= my
;
5741 mvd_cache
[ 1 ][0]= mx
- mpx
;
5742 mvd_cache
[ 1 ][1]= my
- mpy
;
5743 }else if(IS_SUB_4X8(sub_mb_type
)){
5744 mv_cache
[ 8 ][0]= mx
;
5745 mv_cache
[ 8 ][1]= my
;
5747 mvd_cache
[ 8 ][0]= mx
- mpx
;
5748 mvd_cache
[ 8 ][1]= my
- mpy
;
5750 mv_cache
[ 0 ][0]= mx
;
5751 mv_cache
[ 0 ][1]= my
;
5753 mvd_cache
[ 0 ][0]= mx
- mpx
;
5754 mvd_cache
[ 0 ][1]= my
- mpy
;
5757 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
5758 uint32_t *pd
= (uint32_t *)&h
->mvd_cache
[list
][ scan8
[4*i
] ][0];
5759 p
[0] = p
[1] = p
[8] = p
[9] = 0;
5760 pd
[0]= pd
[1]= pd
[8]= pd
[9]= 0;
5764 } else if( IS_DIRECT(mb_type
) ) {
5765 pred_direct_motion(h
, &mb_type
);
5766 fill_rectangle(h
->mvd_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
5767 fill_rectangle(h
->mvd_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
5768 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
5770 int list
, mx
, my
, i
, mpx
, mpy
;
5771 if(IS_16X16(mb_type
)){
5772 for(list
=0; list
<h
->list_count
; list
++){
5773 if(IS_DIR(mb_type
, 0, list
)){
5775 if(h
->ref_count
[list
] > 1){
5776 ref
= decode_cabac_mb_ref(h
, list
, 0);
5777 if(ref
>= (unsigned)h
->ref_count
[list
]){
5778 av_log(s
->avctx
, AV_LOG_ERROR
, "Reference %d >= %d\n", ref
, h
->ref_count
[list
]);
5783 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, ref
, 1);
5785 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED
, 1); //FIXME factorize and the other fill_rect below too
5787 for(list
=0; list
<h
->list_count
; list
++){
5788 if(IS_DIR(mb_type
, 0, list
)){
5789 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mpx
, &mpy
);
5791 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 0, 0 );
5792 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 0, 1 );
5793 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5795 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5796 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
,my
), 4);
5798 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, 0, 4);
5801 else if(IS_16X8(mb_type
)){
5802 for(list
=0; list
<h
->list_count
; list
++){
5804 if(IS_DIR(mb_type
, i
, list
)){
5806 if(h
->ref_count
[list
] > 1){
5807 ref
= decode_cabac_mb_ref( h
, list
, 8*i
);
5808 if(ref
>= (unsigned)h
->ref_count
[list
]){
5809 av_log(s
->avctx
, AV_LOG_ERROR
, "Reference %d >= %d\n", ref
, h
->ref_count
[list
]);
5814 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, ref
, 1);
5816 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, (LIST_NOT_USED
&0xFF), 1);
5819 for(list
=0; list
<h
->list_count
; list
++){
5821 if(IS_DIR(mb_type
, i
, list
)){
5822 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mpx
, &mpy
);
5823 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 0 );
5824 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 1 );
5825 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5827 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5828 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
,my
), 4);
5830 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5831 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5836 assert(IS_8X16(mb_type
));
5837 for(list
=0; list
<h
->list_count
; list
++){
5839 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
5841 if(h
->ref_count
[list
] > 1){
5842 ref
= decode_cabac_mb_ref( h
, list
, 4*i
);
5843 if(ref
>= (unsigned)h
->ref_count
[list
]){
5844 av_log(s
->avctx
, AV_LOG_ERROR
, "Reference %d >= %d\n", ref
, h
->ref_count
[list
]);
5849 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, ref
, 1);
5851 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, (LIST_NOT_USED
&0xFF), 1);
5854 for(list
=0; list
<h
->list_count
; list
++){
5856 if(IS_DIR(mb_type
, i
, list
)){
5857 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mpx
, &mpy
);
5858 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 0 );
5859 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 1 );
5861 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5862 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5863 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
,my
), 4);
5865 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5866 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5873 if( IS_INTER( mb_type
) ) {
5874 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5875 write_back_motion( h
, mb_type
);
5878 if( !IS_INTRA16x16( mb_type
) ) {
5879 cbp
= decode_cabac_mb_cbp_luma( h
);
5881 cbp
|= decode_cabac_mb_cbp_chroma( h
) << 4;
5884 h
->cbp_table
[mb_xy
] = h
->cbp
= cbp
;
5886 if( dct8x8_allowed
&& (cbp
&15) && !IS_INTRA( mb_type
) ) {
5887 if( decode_cabac_mb_transform_size( h
) )
5888 mb_type
|= MB_TYPE_8x8DCT
;
5890 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5892 if( cbp
|| IS_INTRA16x16( mb_type
) ) {
5893 const uint8_t *scan
, *scan8x8
, *dc_scan
;
5894 const uint32_t *qmul
;
5897 if(IS_INTERLACED(mb_type
)){
5898 scan8x8
= s
->qscale
? h
->field_scan8x8
: h
->field_scan8x8_q0
;
5899 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
5900 dc_scan
= luma_dc_field_scan
;
5902 scan8x8
= s
->qscale
? h
->zigzag_scan8x8
: h
->zigzag_scan8x8_q0
;
5903 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
5904 dc_scan
= luma_dc_zigzag_scan
;
5907 h
->last_qscale_diff
= dqp
= decode_cabac_mb_dqp( h
);
5908 if( dqp
== INT_MIN
){
5909 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cabac decode of qscale diff failed at %d %d\n", s
->mb_x
, s
->mb_y
);
5913 if(((unsigned)s
->qscale
) > 51){
5914 if(s
->qscale
<0) s
->qscale
+= 52;
5915 else s
->qscale
-= 52;
5917 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
5918 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
5920 if( IS_INTRA16x16( mb_type
) ) {
5922 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5923 decode_cabac_residual( h
, h
->mb
, 0, 0, dc_scan
, NULL
, 16);
5926 qmul
= h
->dequant4_coeff
[0][s
->qscale
];
5927 for( i
= 0; i
< 16; i
++ ) {
5928 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5929 decode_cabac_residual(h
, h
->mb
+ 16*i
, 1, i
, scan
+ 1, qmul
, 15);
5932 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
5936 for( i8x8
= 0; i8x8
< 4; i8x8
++ ) {
5937 if( cbp
& (1<<i8x8
) ) {
5938 if( IS_8x8DCT(mb_type
) ) {
5939 decode_cabac_residual(h
, h
->mb
+ 64*i8x8
, 5, 4*i8x8
,
5940 scan8x8
, h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 64);
5942 qmul
= h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
];
5943 for( i4x4
= 0; i4x4
< 4; i4x4
++ ) {
5944 const int index
= 4*i8x8
+ i4x4
;
5945 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5947 decode_cabac_residual(h
, h
->mb
+ 16*index
, 2, index
, scan
, qmul
, 16);
5948 //STOP_TIMER("decode_residual")
5952 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
5953 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
5960 for( c
= 0; c
< 2; c
++ ) {
5961 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5962 decode_cabac_residual(h
, h
->mb
+ 256 + 16*4*c
, 3, c
, chroma_dc_scan
, NULL
, 4);
5968 for( c
= 0; c
< 2; c
++ ) {
5969 qmul
= h
->dequant4_coeff
[c
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
[c
]];
5970 for( i
= 0; i
< 4; i
++ ) {
5971 const int index
= 16 + 4 * c
+ i
;
5972 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5973 decode_cabac_residual(h
, h
->mb
+ 16*index
, 4, index
, scan
+ 1, qmul
, 15);
5977 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
5978 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
5979 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
5982 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
5983 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
5984 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
5985 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
5986 h
->last_qscale_diff
= 0;
5989 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
5990 write_back_non_zero_count(h
);
5993 h
->ref_count
[0] >>= 1;
5994 h
->ref_count
[1] >>= 1;
6001 static void filter_mb_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6002 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6003 const int alpha
= (alpha_table
+52)[index_a
];
6004 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6008 tc
[0] = (tc0_table
+52)[index_a
][bS
[0]];
6009 tc
[1] = (tc0_table
+52)[index_a
][bS
[1]];
6010 tc
[2] = (tc0_table
+52)[index_a
][bS
[2]];
6011 tc
[3] = (tc0_table
+52)[index_a
][bS
[3]];
6012 h
->s
.dsp
.h264_h_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6014 h
->s
.dsp
.h264_h_loop_filter_luma_intra(pix
, stride
, alpha
, beta
);
6017 static void filter_mb_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6018 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6019 const int alpha
= (alpha_table
+52)[index_a
];
6020 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6024 tc
[0] = (tc0_table
+52)[index_a
][bS
[0]]+1;
6025 tc
[1] = (tc0_table
+52)[index_a
][bS
[1]]+1;
6026 tc
[2] = (tc0_table
+52)[index_a
][bS
[2]]+1;
6027 tc
[3] = (tc0_table
+52)[index_a
][bS
[3]]+1;
6028 h
->s
.dsp
.h264_h_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6030 h
->s
.dsp
.h264_h_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6034 static void filter_mb_mbaff_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[8], int qp
[2] ) {
6036 for( i
= 0; i
< 16; i
++, pix
+= stride
) {
6042 int bS_index
= (i
>> 1);
6045 bS_index
|= (i
& 1);
6048 if( bS
[bS_index
] == 0 ) {
6052 qp_index
= MB_FIELD
? (i
>> 3) : (i
& 1);
6053 index_a
= qp
[qp_index
] + h
->slice_alpha_c0_offset
;
6054 alpha
= (alpha_table
+52)[index_a
];
6055 beta
= (beta_table
+52)[qp
[qp_index
] + h
->slice_beta_offset
];
6057 if( bS
[bS_index
] < 4 ) {
6058 const int tc0
= (tc0_table
+52)[index_a
][bS
[bS_index
]];
6059 const int p0
= pix
[-1];
6060 const int p1
= pix
[-2];
6061 const int p2
= pix
[-3];
6062 const int q0
= pix
[0];
6063 const int q1
= pix
[1];
6064 const int q2
= pix
[2];
6066 if( FFABS( p0
- q0
) < alpha
&&
6067 FFABS( p1
- p0
) < beta
&&
6068 FFABS( q1
- q0
) < beta
) {
6072 if( FFABS( p2
- p0
) < beta
) {
6073 pix
[-2] = p1
+ av_clip( ( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( p1
<< 1 ) ) >> 1, -tc0
, tc0
);
6076 if( FFABS( q2
- q0
) < beta
) {
6077 pix
[1] = q1
+ av_clip( ( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( q1
<< 1 ) ) >> 1, -tc0
, tc0
);
6081 i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6082 pix
[-1] = av_clip_uint8( p0
+ i_delta
); /* p0' */
6083 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
6084 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6087 const int p0
= pix
[-1];
6088 const int p1
= pix
[-2];
6089 const int p2
= pix
[-3];
6091 const int q0
= pix
[0];
6092 const int q1
= pix
[1];
6093 const int q2
= pix
[2];
6095 if( FFABS( p0
- q0
) < alpha
&&
6096 FFABS( p1
- p0
) < beta
&&
6097 FFABS( q1
- q0
) < beta
) {
6099 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6100 if( FFABS( p2
- p0
) < beta
)
6102 const int p3
= pix
[-4];
6104 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6105 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6106 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6109 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6111 if( FFABS( q2
- q0
) < beta
)
6113 const int q3
= pix
[3];
6115 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6116 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6117 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6120 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6124 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6125 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6127 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6132 static void filter_mb_mbaff_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[8], int qp
[2] ) {
6134 for( i
= 0; i
< 8; i
++, pix
+= stride
) {
6142 if( bS
[bS_index
] == 0 ) {
6146 qp_index
= MB_FIELD
? (i
>> 2) : (i
& 1);
6147 index_a
= qp
[qp_index
] + h
->slice_alpha_c0_offset
;
6148 alpha
= (alpha_table
+52)[index_a
];
6149 beta
= (beta_table
+52)[qp
[qp_index
] + h
->slice_beta_offset
];
6151 if( bS
[bS_index
] < 4 ) {
6152 const int tc
= (tc0_table
+52)[index_a
][bS
[bS_index
]] + 1;
6153 const int p0
= pix
[-1];
6154 const int p1
= pix
[-2];
6155 const int q0
= pix
[0];
6156 const int q1
= pix
[1];
6158 if( FFABS( p0
- q0
) < alpha
&&
6159 FFABS( p1
- p0
) < beta
&&
6160 FFABS( q1
- q0
) < beta
) {
6161 const int i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6163 pix
[-1] = av_clip_uint8( p0
+ i_delta
); /* p0' */
6164 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
6165 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6168 const int p0
= pix
[-1];
6169 const int p1
= pix
[-2];
6170 const int q0
= pix
[0];
6171 const int q1
= pix
[1];
6173 if( FFABS( p0
- q0
) < alpha
&&
6174 FFABS( p1
- p0
) < beta
&&
6175 FFABS( q1
- q0
) < beta
) {
6177 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2; /* p0' */
6178 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2; /* q0' */
6179 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, pix
[-3], p1
, p0
, q0
, q1
, pix
[2], pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6185 static void filter_mb_edgeh( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6186 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6187 const int alpha
= (alpha_table
+52)[index_a
];
6188 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6192 tc
[0] = (tc0_table
+52)[index_a
][bS
[0]];
6193 tc
[1] = (tc0_table
+52)[index_a
][bS
[1]];
6194 tc
[2] = (tc0_table
+52)[index_a
][bS
[2]];
6195 tc
[3] = (tc0_table
+52)[index_a
][bS
[3]];
6196 h
->s
.dsp
.h264_v_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6198 h
->s
.dsp
.h264_v_loop_filter_luma_intra(pix
, stride
, alpha
, beta
);
6202 static void filter_mb_edgech( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6203 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6204 const int alpha
= (alpha_table
+52)[index_a
];
6205 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6209 tc
[0] = (tc0_table
+52)[index_a
][bS
[0]]+1;
6210 tc
[1] = (tc0_table
+52)[index_a
][bS
[1]]+1;
6211 tc
[2] = (tc0_table
+52)[index_a
][bS
[2]]+1;
6212 tc
[3] = (tc0_table
+52)[index_a
][bS
[3]]+1;
6213 h
->s
.dsp
.h264_v_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6215 h
->s
.dsp
.h264_v_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6219 static void filter_mb_fast( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
6220 MpegEncContext
* const s
= &h
->s
;
6221 int mb_y_firstrow
= s
->picture_structure
== PICT_BOTTOM_FIELD
;
6223 int qp
, qp0
, qp1
, qpc
, qpc0
, qpc1
, qp_thresh
;
6227 if(mb_x
==0 || mb_y
==mb_y_firstrow
|| !s
->dsp
.h264_loop_filter_strength
|| h
->pps
.chroma_qp_diff
||
6228 !(s
->flags2
& CODEC_FLAG2_FAST
) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6229 (h
->deblocking_filter
== 2 && (h
->slice_table
[mb_xy
] != h
->slice_table
[h
->top_mb_xy
] ||
6230 h
->slice_table
[mb_xy
] != h
->slice_table
[mb_xy
- 1]))) {
6231 filter_mb(h
, mb_x
, mb_y
, img_y
, img_cb
, img_cr
, linesize
, uvlinesize
);
6234 assert(!FRAME_MBAFF
);
6236 mb_type
= s
->current_picture
.mb_type
[mb_xy
];
6237 qp
= s
->current_picture
.qscale_table
[mb_xy
];
6238 qp0
= s
->current_picture
.qscale_table
[mb_xy
-1];
6239 qp1
= s
->current_picture
.qscale_table
[h
->top_mb_xy
];
6240 qpc
= get_chroma_qp( h
, 0, qp
);
6241 qpc0
= get_chroma_qp( h
, 0, qp0
);
6242 qpc1
= get_chroma_qp( h
, 0, qp1
);
6243 qp0
= (qp
+ qp0
+ 1) >> 1;
6244 qp1
= (qp
+ qp1
+ 1) >> 1;
6245 qpc0
= (qpc
+ qpc0
+ 1) >> 1;
6246 qpc1
= (qpc
+ qpc1
+ 1) >> 1;
6247 qp_thresh
= 15 - h
->slice_alpha_c0_offset
;
6248 if(qp
<= qp_thresh
&& qp0
<= qp_thresh
&& qp1
<= qp_thresh
&&
6249 qpc
<= qp_thresh
&& qpc0
<= qp_thresh
&& qpc1
<= qp_thresh
)
6252 if( IS_INTRA(mb_type
) ) {
6253 int16_t bS4
[4] = {4,4,4,4};
6254 int16_t bS3
[4] = {3,3,3,3};
6255 int16_t *bSH
= FIELD_PICTURE
? bS3
: bS4
;
6256 if( IS_8x8DCT(mb_type
) ) {
6257 filter_mb_edgev( h
, &img_y
[4*0], linesize
, bS4
, qp0
);
6258 filter_mb_edgev( h
, &img_y
[4*2], linesize
, bS3
, qp
);
6259 filter_mb_edgeh( h
, &img_y
[4*0*linesize
], linesize
, bSH
, qp1
);
6260 filter_mb_edgeh( h
, &img_y
[4*2*linesize
], linesize
, bS3
, qp
);
6262 filter_mb_edgev( h
, &img_y
[4*0], linesize
, bS4
, qp0
);
6263 filter_mb_edgev( h
, &img_y
[4*1], linesize
, bS3
, qp
);
6264 filter_mb_edgev( h
, &img_y
[4*2], linesize
, bS3
, qp
);
6265 filter_mb_edgev( h
, &img_y
[4*3], linesize
, bS3
, qp
);
6266 filter_mb_edgeh( h
, &img_y
[4*0*linesize
], linesize
, bSH
, qp1
);
6267 filter_mb_edgeh( h
, &img_y
[4*1*linesize
], linesize
, bS3
, qp
);
6268 filter_mb_edgeh( h
, &img_y
[4*2*linesize
], linesize
, bS3
, qp
);
6269 filter_mb_edgeh( h
, &img_y
[4*3*linesize
], linesize
, bS3
, qp
);
6271 filter_mb_edgecv( h
, &img_cb
[2*0], uvlinesize
, bS4
, qpc0
);
6272 filter_mb_edgecv( h
, &img_cb
[2*2], uvlinesize
, bS3
, qpc
);
6273 filter_mb_edgecv( h
, &img_cr
[2*0], uvlinesize
, bS4
, qpc0
);
6274 filter_mb_edgecv( h
, &img_cr
[2*2], uvlinesize
, bS3
, qpc
);
6275 filter_mb_edgech( h
, &img_cb
[2*0*uvlinesize
], uvlinesize
, bSH
, qpc1
);
6276 filter_mb_edgech( h
, &img_cb
[2*2*uvlinesize
], uvlinesize
, bS3
, qpc
);
6277 filter_mb_edgech( h
, &img_cr
[2*0*uvlinesize
], uvlinesize
, bSH
, qpc1
);
6278 filter_mb_edgech( h
, &img_cr
[2*2*uvlinesize
], uvlinesize
, bS3
, qpc
);
6281 DECLARE_ALIGNED_8(int16_t, bS
[2][4][4]);
6282 uint64_t (*bSv
)[4] = (uint64_t(*)[4])bS
;
6284 if( IS_8x8DCT(mb_type
) && (h
->cbp
&7) == 7 ) {
6286 bSv
[0][0] = bSv
[0][2] = bSv
[1][0] = bSv
[1][2] = 0x0002000200020002ULL
;
6288 int mask_edge1
= (mb_type
& (MB_TYPE_16x16
| MB_TYPE_8x16
)) ? 3 :
6289 (mb_type
& MB_TYPE_16x8
) ? 1 : 0;
6290 int mask_edge0
= (mb_type
& (MB_TYPE_16x16
| MB_TYPE_8x16
))
6291 && (s
->current_picture
.mb_type
[mb_xy
-1] & (MB_TYPE_16x16
| MB_TYPE_8x16
))
6293 int step
= IS_8x8DCT(mb_type
) ? 2 : 1;
6294 edges
= (mb_type
& MB_TYPE_16x16
) && !(h
->cbp
& 15) ? 1 : 4;
6295 s
->dsp
.h264_loop_filter_strength( bS
, h
->non_zero_count_cache
, h
->ref_cache
, h
->mv_cache
,
6296 (h
->slice_type_nos
== FF_B_TYPE
), edges
, step
, mask_edge0
, mask_edge1
, FIELD_PICTURE
);
6298 if( IS_INTRA(s
->current_picture
.mb_type
[mb_xy
-1]) )
6299 bSv
[0][0] = 0x0004000400040004ULL
;
6300 if( IS_INTRA(s
->current_picture
.mb_type
[h
->top_mb_xy
]) )
6301 bSv
[1][0] = FIELD_PICTURE
? 0x0003000300030003ULL
: 0x0004000400040004ULL
;
6303 #define FILTER(hv,dir,edge)\
6304 if(bSv[dir][edge]) {\
6305 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6307 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6308 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6314 } else if( IS_8x8DCT(mb_type
) ) {
6334 static av_always_inline
void filter_mb_dir(H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
, int mb_xy
, int mb_type
, int mvy_limit
, int first_vertical_edge_done
, int dir
) {
6335 MpegEncContext
* const s
= &h
->s
;
6337 const int mbm_xy
= dir
== 0 ? mb_xy
-1 : h
->top_mb_xy
;
6338 const int mbm_type
= s
->current_picture
.mb_type
[mbm_xy
];
6339 int (*ref2frm
) [64] = h
->ref2frm
[ h
->slice_num
&(MAX_SLICES
-1) ][0] + (MB_MBAFF
? 20 : 2);
6340 int (*ref2frmm
)[64] = h
->ref2frm
[ h
->slice_table
[mbm_xy
]&(MAX_SLICES
-1) ][0] + (MB_MBAFF
? 20 : 2);
6341 int start
= h
->slice_table
[mbm_xy
] == 0xFFFF ? 1 : 0;
6343 const int edges
= (mb_type
& (MB_TYPE_16x16
|MB_TYPE_SKIP
))
6344 == (MB_TYPE_16x16
|MB_TYPE_SKIP
) ? 1 : 4;
6345 // how often to recheck mv-based bS when iterating between edges
6346 const int mask_edge
= (mb_type
& (MB_TYPE_16x16
| (MB_TYPE_16x8
<< dir
))) ? 3 :
6347 (mb_type
& (MB_TYPE_8x16
>> dir
)) ? 1 : 0;
6348 // how often to recheck mv-based bS when iterating along each edge
6349 const int mask_par0
= mb_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
));
6351 if (first_vertical_edge_done
) {
6355 if (h
->deblocking_filter
==2 && h
->slice_table
[mbm_xy
] != h
->slice_table
[mb_xy
])
6358 if (FRAME_MBAFF
&& (dir
== 1) && ((mb_y
&1) == 0) && start
== 0
6359 && !IS_INTERLACED(mb_type
)
6360 && IS_INTERLACED(mbm_type
)
6362 // This is a special case in the norm where the filtering must
6363 // be done twice (one each of the field) even if we are in a
6364 // frame macroblock.
6366 static const int nnz_idx
[4] = {4,5,6,3};
6367 unsigned int tmp_linesize
= 2 * linesize
;
6368 unsigned int tmp_uvlinesize
= 2 * uvlinesize
;
6369 int mbn_xy
= mb_xy
- 2 * s
->mb_stride
;
6374 for(j
=0; j
<2; j
++, mbn_xy
+= s
->mb_stride
){
6375 if( IS_INTRA(mb_type
) ||
6376 IS_INTRA(s
->current_picture
.mb_type
[mbn_xy
]) ) {
6377 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 3;
6379 const uint8_t *mbn_nnz
= h
->non_zero_count
[mbn_xy
];
6380 for( i
= 0; i
< 4; i
++ ) {
6381 if( h
->non_zero_count_cache
[scan8
[0]+i
] != 0 ||
6382 mbn_nnz
[nnz_idx
[i
]] != 0 )
6388 // Do not use s->qscale as luma quantizer because it has not the same
6389 // value in IPCM macroblocks.
6390 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6391 tprintf(s
->avctx
, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, tmp_linesize
, tmp_uvlinesize
);
6392 { int i
; for (i
= 0; i
< 4; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6393 filter_mb_edgeh( h
, &img_y
[j
*linesize
], tmp_linesize
, bS
, qp
);
6394 filter_mb_edgech( h
, &img_cb
[j
*uvlinesize
], tmp_uvlinesize
, bS
,
6395 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6396 filter_mb_edgech( h
, &img_cr
[j
*uvlinesize
], tmp_uvlinesize
, bS
,
6397 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6404 for( edge
= start
; edge
< edges
; edge
++ ) {
6405 /* mbn_xy: neighbor macroblock */
6406 const int mbn_xy
= edge
> 0 ? mb_xy
: mbm_xy
;
6407 const int mbn_type
= s
->current_picture
.mb_type
[mbn_xy
];
6408 int (*ref2frmn
)[64] = edge
> 0 ? ref2frm
: ref2frmm
;
6412 if( (edge
&1) && IS_8x8DCT(mb_type
) )
6415 if( IS_INTRA(mb_type
) ||
6416 IS_INTRA(mbn_type
) ) {
6419 if ( (!IS_INTERLACED(mb_type
) && !IS_INTERLACED(mbm_type
))
6420 || ((FRAME_MBAFF
|| (s
->picture_structure
!= PICT_FRAME
)) && (dir
== 0))
6429 bS
[0] = bS
[1] = bS
[2] = bS
[3] = value
;
6434 if( edge
& mask_edge
) {
6435 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 0;
6438 else if( FRAME_MBAFF
&& IS_INTERLACED(mb_type
^ mbn_type
)) {
6439 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 1;
6442 else if( mask_par0
&& (edge
|| (mbn_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
)))) ) {
6443 int b_idx
= 8 + 4 + edge
* (dir
? 8:1);
6444 int bn_idx
= b_idx
- (dir
? 8:1);
6447 for( l
= 0; !v
&& l
< 1 + (h
->slice_type_nos
== FF_B_TYPE
); l
++ ) {
6448 v
|= ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[l
][h
->ref_cache
[l
][bn_idx
]] ||
6449 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6450 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
;
6453 if(h
->slice_type_nos
== FF_B_TYPE
&& v
){
6455 for( l
= 0; !v
&& l
< 2; l
++ ) {
6457 v
|= ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[ln
][h
->ref_cache
[ln
][bn_idx
]] ||
6458 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[ln
][bn_idx
][0] ) >= 4 ||
6459 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[ln
][bn_idx
][1] ) >= mvy_limit
;
6463 bS
[0] = bS
[1] = bS
[2] = bS
[3] = v
;
6469 for( i
= 0; i
< 4; i
++ ) {
6470 int x
= dir
== 0 ? edge
: i
;
6471 int y
= dir
== 0 ? i
: edge
;
6472 int b_idx
= 8 + 4 + x
+ 8*y
;
6473 int bn_idx
= b_idx
- (dir
? 8:1);
6475 if( h
->non_zero_count_cache
[b_idx
] |
6476 h
->non_zero_count_cache
[bn_idx
] ) {
6482 for( l
= 0; l
< 1 + (h
->slice_type_nos
== FF_B_TYPE
); l
++ ) {
6483 if( ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[l
][h
->ref_cache
[l
][bn_idx
]] ||
6484 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6485 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
) {
6491 if(h
->slice_type_nos
== FF_B_TYPE
&& bS
[i
]){
6493 for( l
= 0; l
< 2; l
++ ) {
6495 if( ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[ln
][h
->ref_cache
[ln
][bn_idx
]] ||
6496 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[ln
][bn_idx
][0] ) >= 4 ||
6497 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[ln
][bn_idx
][1] ) >= mvy_limit
) {
6506 if(bS
[0]+bS
[1]+bS
[2]+bS
[3] == 0)
6511 // Do not use s->qscale as luma quantizer because it has not the same
6512 // value in IPCM macroblocks.
6513 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6514 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6515 tprintf(s
->avctx
, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, linesize
, uvlinesize
);
6516 { int i
; for (i
= 0; i
< 4; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6518 filter_mb_edgev( h
, &img_y
[4*edge
], linesize
, bS
, qp
);
6519 if( (edge
&1) == 0 ) {
6520 filter_mb_edgecv( h
, &img_cb
[2*edge
], uvlinesize
, bS
,
6521 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6522 filter_mb_edgecv( h
, &img_cr
[2*edge
], uvlinesize
, bS
,
6523 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6526 filter_mb_edgeh( h
, &img_y
[4*edge
*linesize
], linesize
, bS
, qp
);
6527 if( (edge
&1) == 0 ) {
6528 filter_mb_edgech( h
, &img_cb
[2*edge
*uvlinesize
], uvlinesize
, bS
,
6529 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6530 filter_mb_edgech( h
, &img_cr
[2*edge
*uvlinesize
], uvlinesize
, bS
,
6531 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6537 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
6538 MpegEncContext
* const s
= &h
->s
;
6539 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
6540 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
6541 const int mvy_limit
= IS_INTERLACED(mb_type
) ? 2 : 4;
6542 int first_vertical_edge_done
= 0;
6545 //for sufficiently low qp, filtering wouldn't do anything
6546 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6548 int qp_thresh
= 15 - h
->slice_alpha_c0_offset
- FFMAX3(0, h
->pps
.chroma_qp_index_offset
[0], h
->pps
.chroma_qp_index_offset
[1]);
6549 int qp
= s
->current_picture
.qscale_table
[mb_xy
];
6551 && (mb_x
== 0 || ((qp
+ s
->current_picture
.qscale_table
[mb_xy
-1] + 1)>>1) <= qp_thresh
)
6552 && (h
->top_mb_xy
< 0 || ((qp
+ s
->current_picture
.qscale_table
[h
->top_mb_xy
] + 1)>>1) <= qp_thresh
)){
6557 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6558 if(!h
->pps
.cabac
&& h
->pps
.transform_8x8_mode
){
6559 int top_type
, left_type
[2];
6560 top_type
= s
->current_picture
.mb_type
[h
->top_mb_xy
] ;
6561 left_type
[0] = s
->current_picture
.mb_type
[h
->left_mb_xy
[0]];
6562 left_type
[1] = s
->current_picture
.mb_type
[h
->left_mb_xy
[1]];
6564 if(IS_8x8DCT(top_type
)){
6565 h
->non_zero_count_cache
[4+8*0]=
6566 h
->non_zero_count_cache
[5+8*0]= h
->cbp_table
[h
->top_mb_xy
] & 4;
6567 h
->non_zero_count_cache
[6+8*0]=
6568 h
->non_zero_count_cache
[7+8*0]= h
->cbp_table
[h
->top_mb_xy
] & 8;
6570 if(IS_8x8DCT(left_type
[0])){
6571 h
->non_zero_count_cache
[3+8*1]=
6572 h
->non_zero_count_cache
[3+8*2]= h
->cbp_table
[h
->left_mb_xy
[0]]&2; //FIXME check MBAFF
6574 if(IS_8x8DCT(left_type
[1])){
6575 h
->non_zero_count_cache
[3+8*3]=
6576 h
->non_zero_count_cache
[3+8*4]= h
->cbp_table
[h
->left_mb_xy
[1]]&8; //FIXME check MBAFF
6579 if(IS_8x8DCT(mb_type
)){
6580 h
->non_zero_count_cache
[scan8
[0 ]]= h
->non_zero_count_cache
[scan8
[1 ]]=
6581 h
->non_zero_count_cache
[scan8
[2 ]]= h
->non_zero_count_cache
[scan8
[3 ]]= h
->cbp
& 1;
6583 h
->non_zero_count_cache
[scan8
[0+ 4]]= h
->non_zero_count_cache
[scan8
[1+ 4]]=
6584 h
->non_zero_count_cache
[scan8
[2+ 4]]= h
->non_zero_count_cache
[scan8
[3+ 4]]= h
->cbp
& 2;
6586 h
->non_zero_count_cache
[scan8
[0+ 8]]= h
->non_zero_count_cache
[scan8
[1+ 8]]=
6587 h
->non_zero_count_cache
[scan8
[2+ 8]]= h
->non_zero_count_cache
[scan8
[3+ 8]]= h
->cbp
& 4;
6589 h
->non_zero_count_cache
[scan8
[0+12]]= h
->non_zero_count_cache
[scan8
[1+12]]=
6590 h
->non_zero_count_cache
[scan8
[2+12]]= h
->non_zero_count_cache
[scan8
[3+12]]= h
->cbp
& 8;
6595 // left mb is in picture
6596 && h
->slice_table
[mb_xy
-1] != 0xFFFF
6597 // and current and left pair do not have the same interlaced type
6598 && (IS_INTERLACED(mb_type
) != IS_INTERLACED(s
->current_picture
.mb_type
[mb_xy
-1]))
6599 // and left mb is in the same slice if deblocking_filter == 2
6600 && (h
->deblocking_filter
!=2 || h
->slice_table
[mb_xy
-1] == h
->slice_table
[mb_xy
])) {
6601 /* First vertical edge is different in MBAFF frames
6602 * There are 8 different bS to compute and 2 different Qp
6604 const int pair_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
6605 const int left_mb_xy
[2] = { pair_xy
-1, pair_xy
-1+s
->mb_stride
};
6610 int mb_qp
, mbn0_qp
, mbn1_qp
;
6612 first_vertical_edge_done
= 1;
6614 if( IS_INTRA(mb_type
) )
6615 bS
[0] = bS
[1] = bS
[2] = bS
[3] = bS
[4] = bS
[5] = bS
[6] = bS
[7] = 4;
6617 for( i
= 0; i
< 8; i
++ ) {
6618 int mbn_xy
= MB_FIELD
? left_mb_xy
[i
>>2] : left_mb_xy
[i
&1];
6620 if( IS_INTRA( s
->current_picture
.mb_type
[mbn_xy
] ) )
6622 else if( h
->non_zero_count_cache
[12+8*(i
>>1)] != 0 ||
6623 ((!h
->pps
.cabac
&& IS_8x8DCT(s
->current_picture
.mb_type
[mbn_xy
])) ?
6624 (h
->cbp_table
[mbn_xy
] & ((MB_FIELD
? (i
&2) : (mb_y
&1)) ? 8 : 2))
6626 h
->non_zero_count
[mbn_xy
][MB_FIELD
? i
&3 : (i
>>2)+(mb_y
&1)*2]))
6633 mb_qp
= s
->current_picture
.qscale_table
[mb_xy
];
6634 mbn0_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[0]];
6635 mbn1_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[1]];
6636 qp
[0] = ( mb_qp
+ mbn0_qp
+ 1 ) >> 1;
6637 bqp
[0] = ( get_chroma_qp( h
, 0, mb_qp
) +
6638 get_chroma_qp( h
, 0, mbn0_qp
) + 1 ) >> 1;
6639 rqp
[0] = ( get_chroma_qp( h
, 1, mb_qp
) +
6640 get_chroma_qp( h
, 1, mbn0_qp
) + 1 ) >> 1;
6641 qp
[1] = ( mb_qp
+ mbn1_qp
+ 1 ) >> 1;
6642 bqp
[1] = ( get_chroma_qp( h
, 0, mb_qp
) +
6643 get_chroma_qp( h
, 0, mbn1_qp
) + 1 ) >> 1;
6644 rqp
[1] = ( get_chroma_qp( h
, 1, mb_qp
) +
6645 get_chroma_qp( h
, 1, mbn1_qp
) + 1 ) >> 1;
6648 tprintf(s
->avctx
, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x
, mb_y
, qp
[0], qp
[1], bqp
[0], bqp
[1], rqp
[0], rqp
[1], linesize
, uvlinesize
);
6649 { int i
; for (i
= 0; i
< 8; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6650 filter_mb_mbaff_edgev ( h
, &img_y
[0], linesize
, bS
, qp
);
6651 filter_mb_mbaff_edgecv( h
, &img_cb
[0], uvlinesize
, bS
, bqp
);
6652 filter_mb_mbaff_edgecv( h
, &img_cr
[0], uvlinesize
, bS
, rqp
);
6656 for( dir
= 0; dir
< 2; dir
++ )
6657 filter_mb_dir(h
, mb_x
, mb_y
, img_y
, img_cb
, img_cr
, linesize
, uvlinesize
, mb_xy
, mb_type
, mvy_limit
, dir
? 0 : first_vertical_edge_done
, dir
);
6659 filter_mb_dir(h
, mb_x
, mb_y
, img_y
, img_cb
, img_cr
, linesize
, uvlinesize
, mb_xy
, mb_type
, mvy_limit
, first_vertical_edge_done
, 0);
6660 filter_mb_dir(h
, mb_x
, mb_y
, img_y
, img_cb
, img_cr
, linesize
, uvlinesize
, mb_xy
, mb_type
, mvy_limit
, 0, 1);
6664 static int decode_slice(struct AVCodecContext
*avctx
, void *arg
){
6665 H264Context
*h
= *(void**)arg
;
6666 MpegEncContext
* const s
= &h
->s
;
6667 const int part_mask
= s
->partitioned_frame
? (AC_END
|AC_ERROR
) : 0x7F;
6671 h
->is_complex
= FRAME_MBAFF
|| s
->picture_structure
!= PICT_FRAME
|| s
->codec_id
!= CODEC_ID_H264
||
6672 (CONFIG_GRAY
&& (s
->flags
&CODEC_FLAG_GRAY
));
6674 if( h
->pps
.cabac
) {
6678 align_get_bits( &s
->gb
);
6681 ff_init_cabac_states( &h
->cabac
);
6682 ff_init_cabac_decoder( &h
->cabac
,
6683 s
->gb
.buffer
+ get_bits_count(&s
->gb
)/8,
6684 ( s
->gb
.size_in_bits
- get_bits_count(&s
->gb
) + 7)/8);
6685 /* calculate pre-state */
6686 for( i
= 0; i
< 460; i
++ ) {
6688 if( h
->slice_type_nos
== FF_I_TYPE
)
6689 pre
= av_clip( ((cabac_context_init_I
[i
][0] * s
->qscale
) >>4 ) + cabac_context_init_I
[i
][1], 1, 126 );
6691 pre
= av_clip( ((cabac_context_init_PB
[h
->cabac_init_idc
][i
][0] * s
->qscale
) >>4 ) + cabac_context_init_PB
[h
->cabac_init_idc
][i
][1], 1, 126 );
6694 h
->cabac_state
[i
] = 2 * ( 63 - pre
) + 0;
6696 h
->cabac_state
[i
] = 2 * ( pre
- 64 ) + 1;
6701 int ret
= decode_mb_cabac(h
);
6703 //STOP_TIMER("decode_mb_cabac")
6705 if(ret
>=0) hl_decode_mb(h
);
6707 if( ret
>= 0 && FRAME_MBAFF
) { //FIXME optimal? or let mb_decode decode 16x32 ?
6710 ret
= decode_mb_cabac(h
);
6712 if(ret
>=0) hl_decode_mb(h
);
6715 eos
= get_cabac_terminate( &h
->cabac
);
6717 if( ret
< 0 || h
->cabac
.bytestream
> h
->cabac
.bytestream_end
+ 2) {
6718 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d, bytestream (%td)\n", s
->mb_x
, s
->mb_y
, h
->cabac
.bytestream_end
- h
->cabac
.bytestream
);
6719 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6723 if( ++s
->mb_x
>= s
->mb_width
) {
6725 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6727 if(FIELD_OR_MBAFF_PICTURE
) {
6732 if( eos
|| s
->mb_y
>= s
->mb_height
) {
6733 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6734 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6741 int ret
= decode_mb_cavlc(h
);
6743 if(ret
>=0) hl_decode_mb(h
);
6745 if(ret
>=0 && FRAME_MBAFF
){ //FIXME optimal? or let mb_decode decode 16x32 ?
6747 ret
= decode_mb_cavlc(h
);
6749 if(ret
>=0) hl_decode_mb(h
);
6754 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6755 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6760 if(++s
->mb_x
>= s
->mb_width
){
6762 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6764 if(FIELD_OR_MBAFF_PICTURE
) {
6767 if(s
->mb_y
>= s
->mb_height
){
6768 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6770 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
) {
6771 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6775 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6782 if(get_bits_count(&s
->gb
) >= s
->gb
.size_in_bits
&& s
->mb_skip_run
<=0){
6783 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6784 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
){
6785 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6789 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6798 for(;s
->mb_y
< s
->mb_height
; s
->mb_y
++){
6799 for(;s
->mb_x
< s
->mb_width
; s
->mb_x
++){
6800 int ret
= decode_mb(h
);
6805 av_log(s
->avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6806 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6811 if(++s
->mb_x
>= s
->mb_width
){
6813 if(++s
->mb_y
>= s
->mb_height
){
6814 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6815 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6819 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6826 if(get_bits_count(s
->?gb
) >= s
->gb
?.size_in_bits
){
6827 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6828 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6832 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6839 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6842 return -1; //not reached
6845 static int decode_picture_timing(H264Context
*h
){
6846 MpegEncContext
* const s
= &h
->s
;
6847 if(h
->sps
.nal_hrd_parameters_present_flag
|| h
->sps
.vcl_hrd_parameters_present_flag
){
6848 h
->sei_cpb_removal_delay
= get_bits(&s
->gb
, h
->sps
.cpb_removal_delay_length
);
6849 h
->sei_dpb_output_delay
= get_bits(&s
->gb
, h
->sps
.dpb_output_delay_length
);
6851 if(h
->sps
.pic_struct_present_flag
){
6852 unsigned int i
, num_clock_ts
;
6853 h
->sei_pic_struct
= get_bits(&s
->gb
, 4);
6856 if (h
->sei_pic_struct
> SEI_PIC_STRUCT_FRAME_TRIPLING
)
6859 num_clock_ts
= sei_num_clock_ts_table
[h
->sei_pic_struct
];
6861 for (i
= 0 ; i
< num_clock_ts
; i
++){
6862 if(get_bits(&s
->gb
, 1)){ /* clock_timestamp_flag */
6863 unsigned int full_timestamp_flag
;
6864 h
->sei_ct_type
|= 1<<get_bits(&s
->gb
, 2);
6865 skip_bits(&s
->gb
, 1); /* nuit_field_based_flag */
6866 skip_bits(&s
->gb
, 5); /* counting_type */
6867 full_timestamp_flag
= get_bits(&s
->gb
, 1);
6868 skip_bits(&s
->gb
, 1); /* discontinuity_flag */
6869 skip_bits(&s
->gb
, 1); /* cnt_dropped_flag */
6870 skip_bits(&s
->gb
, 8); /* n_frames */
6871 if(full_timestamp_flag
){
6872 skip_bits(&s
->gb
, 6); /* seconds_value 0..59 */
6873 skip_bits(&s
->gb
, 6); /* minutes_value 0..59 */
6874 skip_bits(&s
->gb
, 5); /* hours_value 0..23 */
6876 if(get_bits(&s
->gb
, 1)){ /* seconds_flag */
6877 skip_bits(&s
->gb
, 6); /* seconds_value range 0..59 */
6878 if(get_bits(&s
->gb
, 1)){ /* minutes_flag */
6879 skip_bits(&s
->gb
, 6); /* minutes_value 0..59 */
6880 if(get_bits(&s
->gb
, 1)) /* hours_flag */
6881 skip_bits(&s
->gb
, 5); /* hours_value 0..23 */
6885 if(h
->sps
.time_offset_length
> 0)
6886 skip_bits(&s
->gb
, h
->sps
.time_offset_length
); /* time_offset */
6893 static int decode_unregistered_user_data(H264Context
*h
, int size
){
6894 MpegEncContext
* const s
= &h
->s
;
6895 uint8_t user_data
[16+256];
6901 for(i
=0; i
<sizeof(user_data
)-1 && i
<size
; i
++){
6902 user_data
[i
]= get_bits(&s
->gb
, 8);
6906 e
= sscanf(user_data
+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build
);
6907 if(e
==1 && build
>=0)
6908 h
->x264_build
= build
;
6910 if(s
->avctx
->debug
& FF_DEBUG_BUGS
)
6911 av_log(s
->avctx
, AV_LOG_DEBUG
, "user data:\"%s\"\n", user_data
+16);
6914 skip_bits(&s
->gb
, 8);
6919 static int decode_recovery_point(H264Context
*h
){
6920 MpegEncContext
* const s
= &h
->s
;
6922 h
->sei_recovery_frame_cnt
= get_ue_golomb(&s
->gb
);
6923 skip_bits(&s
->gb
, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6928 static int decode_buffering_period(H264Context
*h
){
6929 MpegEncContext
* const s
= &h
->s
;
6930 unsigned int sps_id
;
6934 sps_id
= get_ue_golomb_31(&s
->gb
);
6935 if(sps_id
> 31 || !h
->sps_buffers
[sps_id
]) {
6936 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non-existing SPS %d referenced in buffering period\n", sps_id
);
6939 sps
= h
->sps_buffers
[sps_id
];
6941 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6942 if (sps
->nal_hrd_parameters_present_flag
) {
6943 for (sched_sel_idx
= 0; sched_sel_idx
< sps
->cpb_cnt
; sched_sel_idx
++) {
6944 h
->initial_cpb_removal_delay
[sched_sel_idx
] = get_bits(&s
->gb
, sps
->initial_cpb_removal_delay_length
);
6945 skip_bits(&s
->gb
, sps
->initial_cpb_removal_delay_length
); // initial_cpb_removal_delay_offset
6948 if (sps
->vcl_hrd_parameters_present_flag
) {
6949 for (sched_sel_idx
= 0; sched_sel_idx
< sps
->cpb_cnt
; sched_sel_idx
++) {
6950 h
->initial_cpb_removal_delay
[sched_sel_idx
] = get_bits(&s
->gb
, sps
->initial_cpb_removal_delay_length
);
6951 skip_bits(&s
->gb
, sps
->initial_cpb_removal_delay_length
); // initial_cpb_removal_delay_offset
6955 h
->sei_buffering_period_present
= 1;
6959 int ff_h264_decode_sei(H264Context
*h
){
6960 MpegEncContext
* const s
= &h
->s
;
6962 while(get_bits_count(&s
->gb
) + 16 < s
->gb
.size_in_bits
){
6967 type
+= show_bits(&s
->gb
, 8);
6968 }while(get_bits(&s
->gb
, 8) == 255);
6972 size
+= show_bits(&s
->gb
, 8);
6973 }while(get_bits(&s
->gb
, 8) == 255);
6976 case SEI_TYPE_PIC_TIMING
: // Picture timing SEI
6977 if(decode_picture_timing(h
) < 0)
6980 case SEI_TYPE_USER_DATA_UNREGISTERED
:
6981 if(decode_unregistered_user_data(h
, size
) < 0)
6984 case SEI_TYPE_RECOVERY_POINT
:
6985 if(decode_recovery_point(h
) < 0)
6988 case SEI_BUFFERING_PERIOD
:
6989 if(decode_buffering_period(h
) < 0)
6993 skip_bits(&s
->gb
, 8*size
);
6996 //FIXME check bits here
6997 align_get_bits(&s
->gb
);
7003 static inline int decode_hrd_parameters(H264Context
*h
, SPS
*sps
){
7004 MpegEncContext
* const s
= &h
->s
;
7006 cpb_count
= get_ue_golomb_31(&s
->gb
) + 1;
7008 if(cpb_count
> 32U){
7009 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cpb_count %d invalid\n", cpb_count
);
7013 get_bits(&s
->gb
, 4); /* bit_rate_scale */
7014 get_bits(&s
->gb
, 4); /* cpb_size_scale */
7015 for(i
=0; i
<cpb_count
; i
++){
7016 get_ue_golomb(&s
->gb
); /* bit_rate_value_minus1 */
7017 get_ue_golomb(&s
->gb
); /* cpb_size_value_minus1 */
7018 get_bits1(&s
->gb
); /* cbr_flag */
7020 sps
->initial_cpb_removal_delay_length
= get_bits(&s
->gb
, 5) + 1;
7021 sps
->cpb_removal_delay_length
= get_bits(&s
->gb
, 5) + 1;
7022 sps
->dpb_output_delay_length
= get_bits(&s
->gb
, 5) + 1;
7023 sps
->time_offset_length
= get_bits(&s
->gb
, 5);
7024 sps
->cpb_cnt
= cpb_count
;
7028 static inline int decode_vui_parameters(H264Context
*h
, SPS
*sps
){
7029 MpegEncContext
* const s
= &h
->s
;
7030 int aspect_ratio_info_present_flag
;
7031 unsigned int aspect_ratio_idc
;
7033 aspect_ratio_info_present_flag
= get_bits1(&s
->gb
);
7035 if( aspect_ratio_info_present_flag
) {
7036 aspect_ratio_idc
= get_bits(&s
->gb
, 8);
7037 if( aspect_ratio_idc
== EXTENDED_SAR
) {
7038 sps
->sar
.num
= get_bits(&s
->gb
, 16);
7039 sps
->sar
.den
= get_bits(&s
->gb
, 16);
7040 }else if(aspect_ratio_idc
< FF_ARRAY_ELEMS(pixel_aspect
)){
7041 sps
->sar
= pixel_aspect
[aspect_ratio_idc
];
7043 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal aspect ratio\n");
7050 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7052 if(get_bits1(&s
->gb
)){ /* overscan_info_present_flag */
7053 get_bits1(&s
->gb
); /* overscan_appropriate_flag */
7056 if(get_bits1(&s
->gb
)){ /* video_signal_type_present_flag */
7057 get_bits(&s
->gb
, 3); /* video_format */
7058 get_bits1(&s
->gb
); /* video_full_range_flag */
7059 if(get_bits1(&s
->gb
)){ /* colour_description_present_flag */
7060 get_bits(&s
->gb
, 8); /* colour_primaries */
7061 get_bits(&s
->gb
, 8); /* transfer_characteristics */
7062 get_bits(&s
->gb
, 8); /* matrix_coefficients */
7066 if(get_bits1(&s
->gb
)){ /* chroma_location_info_present_flag */
7067 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_top_field */
7068 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_bottom_field */
7071 sps
->timing_info_present_flag
= get_bits1(&s
->gb
);
7072 if(sps
->timing_info_present_flag
){
7073 sps
->num_units_in_tick
= get_bits_long(&s
->gb
, 32);
7074 sps
->time_scale
= get_bits_long(&s
->gb
, 32);
7075 sps
->fixed_frame_rate_flag
= get_bits1(&s
->gb
);
7078 sps
->nal_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
7079 if(sps
->nal_hrd_parameters_present_flag
)
7080 if(decode_hrd_parameters(h
, sps
) < 0)
7082 sps
->vcl_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
7083 if(sps
->vcl_hrd_parameters_present_flag
)
7084 if(decode_hrd_parameters(h
, sps
) < 0)
7086 if(sps
->nal_hrd_parameters_present_flag
|| sps
->vcl_hrd_parameters_present_flag
)
7087 get_bits1(&s
->gb
); /* low_delay_hrd_flag */
7088 sps
->pic_struct_present_flag
= get_bits1(&s
->gb
);
7090 sps
->bitstream_restriction_flag
= get_bits1(&s
->gb
);
7091 if(sps
->bitstream_restriction_flag
){
7092 get_bits1(&s
->gb
); /* motion_vectors_over_pic_boundaries_flag */
7093 get_ue_golomb(&s
->gb
); /* max_bytes_per_pic_denom */
7094 get_ue_golomb(&s
->gb
); /* max_bits_per_mb_denom */
7095 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_horizontal */
7096 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_vertical */
7097 sps
->num_reorder_frames
= get_ue_golomb(&s
->gb
);
7098 get_ue_golomb(&s
->gb
); /*max_dec_frame_buffering*/
7100 if(sps
->num_reorder_frames
> 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7101 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal num_reorder_frames %d\n", sps
->num_reorder_frames
);
7109 static void decode_scaling_list(H264Context
*h
, uint8_t *factors
, int size
,
7110 const uint8_t *jvt_list
, const uint8_t *fallback_list
){
7111 MpegEncContext
* const s
= &h
->s
;
7112 int i
, last
= 8, next
= 8;
7113 const uint8_t *scan
= size
== 16 ? zigzag_scan
: ff_zigzag_direct
;
7114 if(!get_bits1(&s
->gb
)) /* matrix not written, we use the predicted one */
7115 memcpy(factors
, fallback_list
, size
*sizeof(uint8_t));
7117 for(i
=0;i
<size
;i
++){
7119 next
= (last
+ get_se_golomb(&s
->gb
)) & 0xff;
7120 if(!i
&& !next
){ /* matrix not written, we use the preset one */
7121 memcpy(factors
, jvt_list
, size
*sizeof(uint8_t));
7124 last
= factors
[scan
[i
]] = next
? next
: last
;
7128 static void decode_scaling_matrices(H264Context
*h
, SPS
*sps
, PPS
*pps
, int is_sps
,
7129 uint8_t (*scaling_matrix4
)[16], uint8_t (*scaling_matrix8
)[64]){
7130 MpegEncContext
* const s
= &h
->s
;
7131 int fallback_sps
= !is_sps
&& sps
->scaling_matrix_present
;
7132 const uint8_t *fallback
[4] = {
7133 fallback_sps
? sps
->scaling_matrix4
[0] : default_scaling4
[0],
7134 fallback_sps
? sps
->scaling_matrix4
[3] : default_scaling4
[1],
7135 fallback_sps
? sps
->scaling_matrix8
[0] : default_scaling8
[0],
7136 fallback_sps
? sps
->scaling_matrix8
[1] : default_scaling8
[1]
7138 if(get_bits1(&s
->gb
)){
7139 sps
->scaling_matrix_present
|= is_sps
;
7140 decode_scaling_list(h
,scaling_matrix4
[0],16,default_scaling4
[0],fallback
[0]); // Intra, Y
7141 decode_scaling_list(h
,scaling_matrix4
[1],16,default_scaling4
[0],scaling_matrix4
[0]); // Intra, Cr
7142 decode_scaling_list(h
,scaling_matrix4
[2],16,default_scaling4
[0],scaling_matrix4
[1]); // Intra, Cb
7143 decode_scaling_list(h
,scaling_matrix4
[3],16,default_scaling4
[1],fallback
[1]); // Inter, Y
7144 decode_scaling_list(h
,scaling_matrix4
[4],16,default_scaling4
[1],scaling_matrix4
[3]); // Inter, Cr
7145 decode_scaling_list(h
,scaling_matrix4
[5],16,default_scaling4
[1],scaling_matrix4
[4]); // Inter, Cb
7146 if(is_sps
|| pps
->transform_8x8_mode
){
7147 decode_scaling_list(h
,scaling_matrix8
[0],64,default_scaling8
[0],fallback
[2]); // Intra, Y
7148 decode_scaling_list(h
,scaling_matrix8
[1],64,default_scaling8
[1],fallback
[3]); // Inter, Y
7153 int ff_h264_decode_seq_parameter_set(H264Context
*h
){
7154 MpegEncContext
* const s
= &h
->s
;
7155 int profile_idc
, level_idc
;
7156 unsigned int sps_id
;
7160 profile_idc
= get_bits(&s
->gb
, 8);
7161 get_bits1(&s
->gb
); //constraint_set0_flag
7162 get_bits1(&s
->gb
); //constraint_set1_flag
7163 get_bits1(&s
->gb
); //constraint_set2_flag
7164 get_bits1(&s
->gb
); //constraint_set3_flag
7165 get_bits(&s
->gb
, 4); // reserved
7166 level_idc
= get_bits(&s
->gb
, 8);
7167 sps_id
= get_ue_golomb_31(&s
->gb
);
7169 if(sps_id
>= MAX_SPS_COUNT
) {
7170 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "sps_id (%d) out of range\n", sps_id
);
7173 sps
= av_mallocz(sizeof(SPS
));
7177 sps
->profile_idc
= profile_idc
;
7178 sps
->level_idc
= level_idc
;
7180 memset(sps
->scaling_matrix4
, 16, sizeof(sps
->scaling_matrix4
));
7181 memset(sps
->scaling_matrix8
, 16, sizeof(sps
->scaling_matrix8
));
7182 sps
->scaling_matrix_present
= 0;
7184 if(sps
->profile_idc
>= 100){ //high profile
7185 sps
->chroma_format_idc
= get_ue_golomb_31(&s
->gb
);
7186 if(sps
->chroma_format_idc
== 3)
7187 sps
->residual_color_transform_flag
= get_bits1(&s
->gb
);
7188 sps
->bit_depth_luma
= get_ue_golomb(&s
->gb
) + 8;
7189 sps
->bit_depth_chroma
= get_ue_golomb(&s
->gb
) + 8;
7190 sps
->transform_bypass
= get_bits1(&s
->gb
);
7191 decode_scaling_matrices(h
, sps
, NULL
, 1, sps
->scaling_matrix4
, sps
->scaling_matrix8
);
7193 sps
->chroma_format_idc
= 1;
7196 sps
->log2_max_frame_num
= get_ue_golomb(&s
->gb
) + 4;
7197 sps
->poc_type
= get_ue_golomb_31(&s
->gb
);
7199 if(sps
->poc_type
== 0){ //FIXME #define
7200 sps
->log2_max_poc_lsb
= get_ue_golomb(&s
->gb
) + 4;
7201 } else if(sps
->poc_type
== 1){//FIXME #define
7202 sps
->delta_pic_order_always_zero_flag
= get_bits1(&s
->gb
);
7203 sps
->offset_for_non_ref_pic
= get_se_golomb(&s
->gb
);
7204 sps
->offset_for_top_to_bottom_field
= get_se_golomb(&s
->gb
);
7205 sps
->poc_cycle_length
= get_ue_golomb(&s
->gb
);
7207 if((unsigned)sps
->poc_cycle_length
>= FF_ARRAY_ELEMS(sps
->offset_for_ref_frame
)){
7208 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "poc_cycle_length overflow %u\n", sps
->poc_cycle_length
);
7212 for(i
=0; i
<sps
->poc_cycle_length
; i
++)
7213 sps
->offset_for_ref_frame
[i
]= get_se_golomb(&s
->gb
);
7214 }else if(sps
->poc_type
!= 2){
7215 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal POC type %d\n", sps
->poc_type
);
7219 sps
->ref_frame_count
= get_ue_golomb_31(&s
->gb
);
7220 if(sps
->ref_frame_count
> MAX_PICTURE_COUNT
-2 || sps
->ref_frame_count
>= 32U){
7221 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "too many reference frames\n");
7224 sps
->gaps_in_frame_num_allowed_flag
= get_bits1(&s
->gb
);
7225 sps
->mb_width
= get_ue_golomb(&s
->gb
) + 1;
7226 sps
->mb_height
= get_ue_golomb(&s
->gb
) + 1;
7227 if((unsigned)sps
->mb_width
>= INT_MAX
/16 || (unsigned)sps
->mb_height
>= INT_MAX
/16 ||
7228 avcodec_check_dimensions(NULL
, 16*sps
->mb_width
, 16*sps
->mb_height
)){
7229 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_width/height overflow\n");
7233 sps
->frame_mbs_only_flag
= get_bits1(&s
->gb
);
7234 if(!sps
->frame_mbs_only_flag
)
7235 sps
->mb_aff
= get_bits1(&s
->gb
);
7239 sps
->direct_8x8_inference_flag
= get_bits1(&s
->gb
);
7241 #ifndef ALLOW_INTERLACE
7243 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MBAFF support not included; enable it at compile-time.\n");
7245 sps
->crop
= get_bits1(&s
->gb
);
7247 sps
->crop_left
= get_ue_golomb(&s
->gb
);
7248 sps
->crop_right
= get_ue_golomb(&s
->gb
);
7249 sps
->crop_top
= get_ue_golomb(&s
->gb
);
7250 sps
->crop_bottom
= get_ue_golomb(&s
->gb
);
7251 if(sps
->crop_left
|| sps
->crop_top
){
7252 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "insane cropping not completely supported, this could look slightly wrong ...\n");
7254 if(sps
->crop_right
>= 8 || sps
->crop_bottom
>= (8>> !sps
->frame_mbs_only_flag
)){
7255 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "brainfart cropping not supported, this could look slightly wrong ...\n");
7261 sps
->crop_bottom
= 0;
7264 sps
->vui_parameters_present_flag
= get_bits1(&s
->gb
);
7265 if( sps
->vui_parameters_present_flag
)
7266 decode_vui_parameters(h
, sps
);
7268 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7269 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s %d/%d\n",
7270 sps_id
, sps
->profile_idc
, sps
->level_idc
,
7272 sps
->ref_frame_count
,
7273 sps
->mb_width
, sps
->mb_height
,
7274 sps
->frame_mbs_only_flag
? "FRM" : (sps
->mb_aff
? "MB-AFF" : "PIC-AFF"),
7275 sps
->direct_8x8_inference_flag
? "8B8" : "",
7276 sps
->crop_left
, sps
->crop_right
,
7277 sps
->crop_top
, sps
->crop_bottom
,
7278 sps
->vui_parameters_present_flag
? "VUI" : "",
7279 ((const char*[]){"Gray","420","422","444"})[sps
->chroma_format_idc
],
7280 sps
->timing_info_present_flag
? sps
->num_units_in_tick
: 0,
7281 sps
->timing_info_present_flag
? sps
->time_scale
: 0
7285 av_free(h
->sps_buffers
[sps_id
]);
7286 h
->sps_buffers
[sps_id
]= sps
;
7295 build_qp_table(PPS
*pps
, int t
, int index
)
7298 for(i
= 0; i
< 52; i
++)
7299 pps
->chroma_qp_table
[t
][i
] = chroma_qp
[av_clip(i
+ index
, 0, 51)];
7302 int ff_h264_decode_picture_parameter_set(H264Context
*h
, int bit_length
){
7303 MpegEncContext
* const s
= &h
->s
;
7304 unsigned int pps_id
= get_ue_golomb(&s
->gb
);
7307 if(pps_id
>= MAX_PPS_COUNT
) {
7308 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "pps_id (%d) out of range\n", pps_id
);
7312 pps
= av_mallocz(sizeof(PPS
));
7315 pps
->sps_id
= get_ue_golomb_31(&s
->gb
);
7316 if((unsigned)pps
->sps_id
>=MAX_SPS_COUNT
|| h
->sps_buffers
[pps
->sps_id
] == NULL
){
7317 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "sps_id out of range\n");
7321 pps
->cabac
= get_bits1(&s
->gb
);
7322 pps
->pic_order_present
= get_bits1(&s
->gb
);
7323 pps
->slice_group_count
= get_ue_golomb(&s
->gb
) + 1;
7324 if(pps
->slice_group_count
> 1 ){
7325 pps
->mb_slice_group_map_type
= get_ue_golomb(&s
->gb
);
7326 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "FMO not supported\n");
7327 switch(pps
->mb_slice_group_map_type
){
7330 | for( i
= 0; i
<= num_slice_groups_minus1
; i
++ ) | | |
7331 | run_length
[ i
] |1 |ue(v
) |
7336 | for( i
= 0; i
< num_slice_groups_minus1
; i
++ ) | | |
7338 | top_left_mb
[ i
] |1 |ue(v
) |
7339 | bottom_right_mb
[ i
] |1 |ue(v
) |
7347 | slice_group_change_direction_flag
|1 |u(1) |
7348 | slice_group_change_rate_minus1
|1 |ue(v
) |
7353 | slice_group_id_cnt_minus1
|1 |ue(v
) |
7354 | for( i
= 0; i
<= slice_group_id_cnt_minus1
; i
++ | | |
7356 | slice_group_id
[ i
] |1 |u(v
) |
7361 pps
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
7362 pps
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
7363 if(pps
->ref_count
[0]-1 > 32-1 || pps
->ref_count
[1]-1 > 32-1){
7364 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow (pps)\n");
7368 pps
->weighted_pred
= get_bits1(&s
->gb
);
7369 pps
->weighted_bipred_idc
= get_bits(&s
->gb
, 2);
7370 pps
->init_qp
= get_se_golomb(&s
->gb
) + 26;
7371 pps
->init_qs
= get_se_golomb(&s
->gb
) + 26;
7372 pps
->chroma_qp_index_offset
[0]= get_se_golomb(&s
->gb
);
7373 pps
->deblocking_filter_parameters_present
= get_bits1(&s
->gb
);
7374 pps
->constrained_intra_pred
= get_bits1(&s
->gb
);
7375 pps
->redundant_pic_cnt_present
= get_bits1(&s
->gb
);
7377 pps
->transform_8x8_mode
= 0;
7378 h
->dequant_coeff_pps
= -1; //contents of sps/pps can change even if id doesn't, so reinit
7379 memcpy(pps
->scaling_matrix4
, h
->sps_buffers
[pps
->sps_id
]->scaling_matrix4
, sizeof(pps
->scaling_matrix4
));
7380 memcpy(pps
->scaling_matrix8
, h
->sps_buffers
[pps
->sps_id
]->scaling_matrix8
, sizeof(pps
->scaling_matrix8
));
7382 if(get_bits_count(&s
->gb
) < bit_length
){
7383 pps
->transform_8x8_mode
= get_bits1(&s
->gb
);
7384 decode_scaling_matrices(h
, h
->sps_buffers
[pps
->sps_id
], pps
, 0, pps
->scaling_matrix4
, pps
->scaling_matrix8
);
7385 pps
->chroma_qp_index_offset
[1]= get_se_golomb(&s
->gb
); //second_chroma_qp_index_offset
7387 pps
->chroma_qp_index_offset
[1]= pps
->chroma_qp_index_offset
[0];
7390 build_qp_table(pps
, 0, pps
->chroma_qp_index_offset
[0]);
7391 build_qp_table(pps
, 1, pps
->chroma_qp_index_offset
[1]);
7392 if(pps
->chroma_qp_index_offset
[0] != pps
->chroma_qp_index_offset
[1])
7393 h
->pps
.chroma_qp_diff
= 1;
7395 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7396 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7397 pps_id
, pps
->sps_id
,
7398 pps
->cabac
? "CABAC" : "CAVLC",
7399 pps
->slice_group_count
,
7400 pps
->ref_count
[0], pps
->ref_count
[1],
7401 pps
->weighted_pred
? "weighted" : "",
7402 pps
->init_qp
, pps
->init_qs
, pps
->chroma_qp_index_offset
[0], pps
->chroma_qp_index_offset
[1],
7403 pps
->deblocking_filter_parameters_present
? "LPAR" : "",
7404 pps
->constrained_intra_pred
? "CONSTR" : "",
7405 pps
->redundant_pic_cnt_present
? "REDU" : "",
7406 pps
->transform_8x8_mode
? "8x8DCT" : ""
7410 av_free(h
->pps_buffers
[pps_id
]);
7411 h
->pps_buffers
[pps_id
]= pps
;
7419 * Call decode_slice() for each context.
7421 * @param h h264 master context
7422 * @param context_count number of contexts to execute
7424 static void execute_decode_slices(H264Context
*h
, int context_count
){
7425 MpegEncContext
* const s
= &h
->s
;
7426 AVCodecContext
* const avctx
= s
->avctx
;
7430 if (s
->avctx
->hwaccel
)
7432 if(s
->avctx
->codec
->capabilities
&CODEC_CAP_HWACCEL_VDPAU
)
7434 if(context_count
== 1) {
7435 decode_slice(avctx
, &h
);
7437 for(i
= 1; i
< context_count
; i
++) {
7438 hx
= h
->thread_context
[i
];
7439 hx
->s
.error_recognition
= avctx
->error_recognition
;
7440 hx
->s
.error_count
= 0;
7443 avctx
->execute(avctx
, (void *)decode_slice
,
7444 (void **)h
->thread_context
, NULL
, context_count
, sizeof(void*));
7446 /* pull back stuff from slices to master context */
7447 hx
= h
->thread_context
[context_count
- 1];
7448 s
->mb_x
= hx
->s
.mb_x
;
7449 s
->mb_y
= hx
->s
.mb_y
;
7450 s
->dropable
= hx
->s
.dropable
;
7451 s
->picture_structure
= hx
->s
.picture_structure
;
7452 for(i
= 1; i
< context_count
; i
++)
7453 h
->s
.error_count
+= h
->thread_context
[i
]->s
.error_count
;
7458 static int decode_nal_units(H264Context
*h
, const uint8_t *buf
, int buf_size
){
7459 MpegEncContext
* const s
= &h
->s
;
7460 AVCodecContext
* const avctx
= s
->avctx
;
7462 H264Context
*hx
; ///< thread context
7463 int context_count
= 0;
7464 int next_avc
= h
->is_avc
? 0 : buf_size
;
7466 h
->max_contexts
= avctx
->thread_count
;
7469 for(i
=0; i
<50; i
++){
7470 av_log(NULL
, AV_LOG_ERROR
,"%02X ", buf
[i
]);
7473 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
)){
7474 h
->current_slice
= 0;
7475 if (!s
->first_field
)
7476 s
->current_picture_ptr
= NULL
;
7488 if(buf_index
>= next_avc
) {
7489 if(buf_index
>= buf_size
) break;
7491 for(i
= 0; i
< h
->nal_length_size
; i
++)
7492 nalsize
= (nalsize
<< 8) | buf
[buf_index
++];
7493 if(nalsize
<= 1 || (nalsize
+buf_index
> buf_size
)){
7498 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: nal size %d\n", nalsize
);
7502 next_avc
= buf_index
+ nalsize
;
7504 // start code prefix search
7505 for(; buf_index
+ 3 < buf_size
; buf_index
++){
7506 // This should always succeed in the first iteration.
7507 if(buf
[buf_index
] == 0 && buf
[buf_index
+1] == 0 && buf
[buf_index
+2] == 1)
7511 if(buf_index
+3 >= buf_size
) break;
7516 hx
= h
->thread_context
[context_count
];
7518 ptr
= ff_h264_decode_nal(hx
, buf
+ buf_index
, &dst_length
, &consumed
, next_avc
- buf_index
);
7519 if (ptr
==NULL
|| dst_length
< 0){
7522 while(ptr
[dst_length
- 1] == 0 && dst_length
> 0)
7524 bit_length
= !dst_length
? 0 : (8*dst_length
- ff_h264_decode_rbsp_trailing(h
, ptr
+ dst_length
- 1));
7526 if(s
->avctx
->debug
&FF_DEBUG_STARTCODE
){
7527 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "NAL %d at %d/%d length %d\n", hx
->nal_unit_type
, buf_index
, buf_size
, dst_length
);
7530 if (h
->is_avc
&& (nalsize
!= consumed
) && nalsize
){
7531 int i
, debug_level
= AV_LOG_DEBUG
;
7532 for (i
= consumed
; i
< nalsize
; i
++)
7533 if (buf
[buf_index
+i
])
7534 debug_level
= AV_LOG_ERROR
;
7535 av_log(h
->s
.avctx
, debug_level
, "AVC: Consumed only %d bytes instead of %d\n", consumed
, nalsize
);
7538 buf_index
+= consumed
;
7540 if( (s
->hurry_up
== 1 && h
->nal_ref_idc
== 0) //FIXME do not discard SEI id
7541 ||(avctx
->skip_frame
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
7546 switch(hx
->nal_unit_type
){
7548 if (h
->nal_unit_type
!= NAL_IDR_SLICE
) {
7549 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "Invalid mix of idr and non-idr slices");
7552 idr(h
); //FIXME ensure we don't loose some frames if there is reordering
7554 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
7556 hx
->inter_gb_ptr
= &hx
->s
.gb
;
7557 hx
->s
.data_partitioning
= 0;
7559 if((err
= decode_slice_header(hx
, h
)))
7562 if (s
->avctx
->hwaccel
&& h
->current_slice
== 1) {
7563 if (s
->avctx
->hwaccel
->start_frame(s
->avctx
, NULL
, 0) < 0)
7567 s
->current_picture_ptr
->key_frame
|=
7568 (hx
->nal_unit_type
== NAL_IDR_SLICE
) ||
7569 (h
->sei_recovery_frame_cnt
>= 0);
7570 if(hx
->redundant_pic_count
==0 && hx
->s
.hurry_up
< 5
7571 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| hx
->nal_ref_idc
)
7572 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| hx
->slice_type_nos
!=FF_B_TYPE
)
7573 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| hx
->slice_type_nos
==FF_I_TYPE
)
7574 && avctx
->skip_frame
< AVDISCARD_ALL
){
7575 if(avctx
->hwaccel
) {
7576 if (avctx
->hwaccel
->decode_slice(avctx
, &buf
[buf_index
- consumed
], consumed
) < 0)
7579 if(CONFIG_H264_VDPAU_DECODER
&& s
->avctx
->codec
->capabilities
&CODEC_CAP_HWACCEL_VDPAU
){
7580 static const uint8_t start_code
[] = {0x00, 0x00, 0x01};
7581 ff_vdpau_add_data_chunk(s
, start_code
, sizeof(start_code
));
7582 ff_vdpau_add_data_chunk(s
, &buf
[buf_index
- consumed
], consumed
);
7588 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
7590 hx
->inter_gb_ptr
= NULL
;
7591 hx
->s
.data_partitioning
= 1;
7593 err
= decode_slice_header(hx
, h
);
7596 init_get_bits(&hx
->intra_gb
, ptr
, bit_length
);
7597 hx
->intra_gb_ptr
= &hx
->intra_gb
;
7600 init_get_bits(&hx
->inter_gb
, ptr
, bit_length
);
7601 hx
->inter_gb_ptr
= &hx
->inter_gb
;
7603 if(hx
->redundant_pic_count
==0 && hx
->intra_gb_ptr
&& hx
->s
.data_partitioning
7604 && s
->context_initialized
7606 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| hx
->nal_ref_idc
)
7607 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| hx
->slice_type_nos
!=FF_B_TYPE
)
7608 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| hx
->slice_type_nos
==FF_I_TYPE
)
7609 && avctx
->skip_frame
< AVDISCARD_ALL
)
7613 init_get_bits(&s
->gb
, ptr
, bit_length
);
7614 ff_h264_decode_sei(h
);
7617 init_get_bits(&s
->gb
, ptr
, bit_length
);
7618 ff_h264_decode_seq_parameter_set(h
);
7620 if(s
->flags
& CODEC_FLAG_LOW_DELAY
)
7623 if(avctx
->has_b_frames
< 2)
7624 avctx
->has_b_frames
= !s
->low_delay
;
7627 init_get_bits(&s
->gb
, ptr
, bit_length
);
7629 ff_h264_decode_picture_parameter_set(h
, bit_length
);
7633 case NAL_END_SEQUENCE
:
7634 case NAL_END_STREAM
:
7635 case NAL_FILLER_DATA
:
7637 case NAL_AUXILIARY_SLICE
:
7640 av_log(avctx
, AV_LOG_DEBUG
, "Unknown NAL code: %d (%d bits)\n", h
->nal_unit_type
, bit_length
);
7643 if(context_count
== h
->max_contexts
) {
7644 execute_decode_slices(h
, context_count
);
7649 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "decode_slice_header error\n");
7651 /* Slice could not be decoded in parallel mode, copy down
7652 * NAL unit stuff to context 0 and restart. Note that
7653 * rbsp_buffer is not transferred, but since we no longer
7654 * run in parallel mode this should not be an issue. */
7655 h
->nal_unit_type
= hx
->nal_unit_type
;
7656 h
->nal_ref_idc
= hx
->nal_ref_idc
;
7662 execute_decode_slices(h
, context_count
);
7667 * returns the number of bytes consumed for building the current frame
7669 static int get_consumed_bytes(MpegEncContext
*s
, int pos
, int buf_size
){
7670 if(pos
==0) pos
=1; //avoid infinite loops (i doubt that is needed but ...)
7671 if(pos
+10>buf_size
) pos
=buf_size
; // oops ;)
7676 static int decode_frame(AVCodecContext
*avctx
,
7677 void *data
, int *data_size
,
7680 const uint8_t *buf
= avpkt
->data
;
7681 int buf_size
= avpkt
->size
;
7682 H264Context
*h
= avctx
->priv_data
;
7683 MpegEncContext
*s
= &h
->s
;
7684 AVFrame
*pict
= data
;
7687 s
->flags
= avctx
->flags
;
7688 s
->flags2
= avctx
->flags2
;
7690 /* end of stream, output what is still in the buffers */
7691 if (buf_size
== 0) {
7695 //FIXME factorize this with the output code below
7696 out
= h
->delayed_pic
[0];
7698 for(i
=1; h
->delayed_pic
[i
] && (h
->delayed_pic
[i
]->poc
&& !h
->delayed_pic
[i
]->key_frame
); i
++)
7699 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
7700 out
= h
->delayed_pic
[i
];
7704 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
7705 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
7708 *data_size
= sizeof(AVFrame
);
7709 *pict
= *(AVFrame
*)out
;
7715 if(h
->is_avc
&& !h
->got_avcC
) {
7716 int i
, cnt
, nalsize
;
7717 unsigned char *p
= avctx
->extradata
;
7718 if(avctx
->extradata_size
< 7) {
7719 av_log(avctx
, AV_LOG_ERROR
, "avcC too short\n");
7723 av_log(avctx
, AV_LOG_ERROR
, "Unknown avcC version %d\n", *p
);
7726 /* sps and pps in the avcC always have length coded with 2 bytes,
7727 so put a fake nal_length_size = 2 while parsing them */
7728 h
->nal_length_size
= 2;
7729 // Decode sps from avcC
7730 cnt
= *(p
+5) & 0x1f; // Number of sps
7732 for (i
= 0; i
< cnt
; i
++) {
7733 nalsize
= AV_RB16(p
) + 2;
7734 if(decode_nal_units(h
, p
, nalsize
) < 0) {
7735 av_log(avctx
, AV_LOG_ERROR
, "Decoding sps %d from avcC failed\n", i
);
7740 // Decode pps from avcC
7741 cnt
= *(p
++); // Number of pps
7742 for (i
= 0; i
< cnt
; i
++) {
7743 nalsize
= AV_RB16(p
) + 2;
7744 if(decode_nal_units(h
, p
, nalsize
) != nalsize
) {
7745 av_log(avctx
, AV_LOG_ERROR
, "Decoding pps %d from avcC failed\n", i
);
7750 // Now store right nal length size, that will be use to parse all other nals
7751 h
->nal_length_size
= ((*(((char*)(avctx
->extradata
))+4))&0x03)+1;
7752 // Do not reparse avcC
7756 if(!h
->got_avcC
&& !h
->is_avc
&& s
->avctx
->extradata_size
){
7757 if(decode_nal_units(h
, s
->avctx
->extradata
, s
->avctx
->extradata_size
) < 0)
7762 buf_index
=decode_nal_units(h
, buf
, buf_size
);
7766 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
) && !s
->current_picture_ptr
){
7767 if (avctx
->skip_frame
>= AVDISCARD_NONREF
|| s
->hurry_up
) return 0;
7768 av_log(avctx
, AV_LOG_ERROR
, "no frame!\n");
7772 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
) || (s
->mb_y
>= s
->mb_height
&& s
->mb_height
)){
7773 Picture
*out
= s
->current_picture_ptr
;
7774 Picture
*cur
= s
->current_picture_ptr
;
7775 int i
, pics
, cross_idr
, out_of_order
, out_idx
;
7779 if (cur
->field_poc
[0]==INT_MAX
|| cur
->field_poc
[1]==INT_MAX
) {
7780 /* Wait for second field. */
7784 cur
->repeat_pict
= 0;
7786 /* Signal interlacing information externally. */
7787 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7789 cur
->interlaced_frame
= (h
->sei_ct_type
& (1<<1)) != 0;
7791 cur
->interlaced_frame
= FIELD_OR_MBAFF_PICTURE
;
7793 if(h
->sps
.pic_struct_present_flag
){
7794 switch (h
->sei_pic_struct
)
7796 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP
:
7797 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM
:
7798 // Signal the possibility of telecined film externally (pic_struct 5,6)
7799 // From these hints, let the applications decide if they apply deinterlacing.
7800 cur
->repeat_pict
= 1;
7802 case SEI_PIC_STRUCT_FRAME_DOUBLING
:
7803 // Force progressive here, as doubling interlaced frame is a bad idea.
7804 cur
->interlaced_frame
= 0;
7805 cur
->repeat_pict
= 2;
7807 case SEI_PIC_STRUCT_FRAME_TRIPLING
:
7808 cur
->interlaced_frame
= 0;
7809 cur
->repeat_pict
= 4;
7813 /* Derive interlacing flag from used decoding process. */
7814 cur
->interlaced_frame
= FIELD_OR_MBAFF_PICTURE
;
7817 if (cur
->field_poc
[0] != cur
->field_poc
[1]){
7818 /* Derive top_field_first from field pocs. */
7819 cur
->top_field_first
= cur
->field_poc
[0] < cur
->field_poc
[1];
7821 if(cur
->interlaced_frame
|| h
->sps
.pic_struct_present_flag
){
7822 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7823 if(h
->sei_pic_struct
== SEI_PIC_STRUCT_TOP_BOTTOM
7824 || h
->sei_pic_struct
== SEI_PIC_STRUCT_TOP_BOTTOM_TOP
)
7825 cur
->top_field_first
= 1;
7827 cur
->top_field_first
= 0;
7829 /* Most likely progressive */
7830 cur
->top_field_first
= 0;
7834 //FIXME do something with unavailable reference frames
7836 /* Sort B-frames into display order */
7838 if(h
->sps
.bitstream_restriction_flag
7839 && s
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
){
7840 s
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
7844 if( s
->avctx
->strict_std_compliance
>= FF_COMPLIANCE_STRICT
7845 && !h
->sps
.bitstream_restriction_flag
){
7846 s
->avctx
->has_b_frames
= MAX_DELAYED_PIC_COUNT
;
7851 while(h
->delayed_pic
[pics
]) pics
++;
7853 assert(pics
<= MAX_DELAYED_PIC_COUNT
);
7855 h
->delayed_pic
[pics
++] = cur
;
7856 if(cur
->reference
== 0)
7857 cur
->reference
= DELAYED_PIC_REF
;
7859 out
= h
->delayed_pic
[0];
7861 for(i
=1; h
->delayed_pic
[i
] && (h
->delayed_pic
[i
]->poc
&& !h
->delayed_pic
[i
]->key_frame
); i
++)
7862 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
7863 out
= h
->delayed_pic
[i
];
7866 cross_idr
= !h
->delayed_pic
[0]->poc
|| !!h
->delayed_pic
[i
] || h
->delayed_pic
[0]->key_frame
;
7868 out_of_order
= !cross_idr
&& out
->poc
< h
->outputed_poc
;
7870 if(h
->sps
.bitstream_restriction_flag
&& s
->avctx
->has_b_frames
>= h
->sps
.num_reorder_frames
)
7872 else if((out_of_order
&& pics
-1 == s
->avctx
->has_b_frames
&& s
->avctx
->has_b_frames
< MAX_DELAYED_PIC_COUNT
)
7874 ((!cross_idr
&& out
->poc
> h
->outputed_poc
+ 2)
7875 || cur
->pict_type
== FF_B_TYPE
)))
7878 s
->avctx
->has_b_frames
++;
7881 if(out_of_order
|| pics
> s
->avctx
->has_b_frames
){
7882 out
->reference
&= ~DELAYED_PIC_REF
;
7883 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
7884 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
7886 if(!out_of_order
&& pics
> s
->avctx
->has_b_frames
){
7887 *data_size
= sizeof(AVFrame
);
7889 h
->outputed_poc
= out
->poc
;
7890 *pict
= *(AVFrame
*)out
;
7892 av_log(avctx
, AV_LOG_DEBUG
, "no picture\n");
7897 assert(pict
->data
[0] || !*data_size
);
7898 ff_print_debug_info(s
, pict
);
7899 //printf("out %d\n", (int)pict->data[0]);
7902 /* Return the Picture timestamp as the frame number */
7903 /* we subtract 1 because it is added on utils.c */
7904 avctx
->frame_number
= s
->picture_number
- 1;
7906 return get_consumed_bytes(s
, buf_index
, buf_size
);
7909 static inline void fill_mb_avail(H264Context
*h
){
7910 MpegEncContext
* const s
= &h
->s
;
7911 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
7914 h
->mb_avail
[0]= s
->mb_x
&& h
->slice_table
[mb_xy
- s
->mb_stride
- 1] == h
->slice_num
;
7915 h
->mb_avail
[1]= h
->slice_table
[mb_xy
- s
->mb_stride
] == h
->slice_num
;
7916 h
->mb_avail
[2]= s
->mb_x
+1 < s
->mb_width
&& h
->slice_table
[mb_xy
- s
->mb_stride
+ 1] == h
->slice_num
;
7922 h
->mb_avail
[3]= s
->mb_x
&& h
->slice_table
[mb_xy
- 1] == h
->slice_num
;
7923 h
->mb_avail
[4]= 1; //FIXME move out
7924 h
->mb_avail
[5]= 0; //FIXME move out
7932 #define SIZE (COUNT*40)
7938 // int int_temp[10000];
7940 AVCodecContext avctx
;
7942 dsputil_init(&dsp
, &avctx
);
7944 init_put_bits(&pb
, temp
, SIZE
);
7945 printf("testing unsigned exp golomb\n");
7946 for(i
=0; i
<COUNT
; i
++){
7948 set_ue_golomb(&pb
, i
);
7949 STOP_TIMER("set_ue_golomb");
7951 flush_put_bits(&pb
);
7953 init_get_bits(&gb
, temp
, 8*SIZE
);
7954 for(i
=0; i
<COUNT
; i
++){
7957 s
= show_bits(&gb
, 24);
7960 j
= get_ue_golomb(&gb
);
7962 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7965 STOP_TIMER("get_ue_golomb");
7969 init_put_bits(&pb
, temp
, SIZE
);
7970 printf("testing signed exp golomb\n");
7971 for(i
=0; i
<COUNT
; i
++){
7973 set_se_golomb(&pb
, i
- COUNT
/2);
7974 STOP_TIMER("set_se_golomb");
7976 flush_put_bits(&pb
);
7978 init_get_bits(&gb
, temp
, 8*SIZE
);
7979 for(i
=0; i
<COUNT
; i
++){
7982 s
= show_bits(&gb
, 24);
7985 j
= get_se_golomb(&gb
);
7986 if(j
!= i
- COUNT
/2){
7987 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7990 STOP_TIMER("get_se_golomb");
7994 printf("testing 4x4 (I)DCT\n");
7997 uint8_t src
[16], ref
[16];
7998 uint64_t error
= 0, max_error
=0;
8000 for(i
=0; i
<COUNT
; i
++){
8002 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8003 for(j
=0; j
<16; j
++){
8004 ref
[j
]= random()%255;
8005 src
[j
]= random()%255;
8008 h264_diff_dct_c(block
, src
, ref
, 4);
8011 for(j
=0; j
<16; j
++){
8012 // printf("%d ", block[j]);
8013 block
[j
]= block
[j
]*4;
8014 if(j
&1) block
[j
]= (block
[j
]*4 + 2)/5;
8015 if(j
&4) block
[j
]= (block
[j
]*4 + 2)/5;
8019 s
->dsp
.h264_idct_add(ref
, block
, 4);
8020 /* for(j=0; j<16; j++){
8021 printf("%d ", ref[j]);
8025 for(j
=0; j
<16; j
++){
8026 int diff
= FFABS(src
[j
] - ref
[j
]);
8029 max_error
= FFMAX(max_error
, diff
);
8032 printf("error=%f max_error=%d\n", ((float)error
)/COUNT
/16, (int)max_error
);
8033 printf("testing quantizer\n");
8034 for(qp
=0; qp
<52; qp
++){
8036 src1_block
[i
]= src2_block
[i
]= random()%255;
8039 printf("Testing NAL layer\n");
8041 uint8_t bitstream
[COUNT
];
8042 uint8_t nal
[COUNT
*2];
8044 memset(&h
, 0, sizeof(H264Context
));
8046 for(i
=0; i
<COUNT
; i
++){
8054 for(j
=0; j
<COUNT
; j
++){
8055 bitstream
[j
]= (random() % 255) + 1;
8058 for(j
=0; j
<zeros
; j
++){
8059 int pos
= random() % COUNT
;
8060 while(bitstream
[pos
] == 0){
8069 nal_length
= encode_nal(&h
, nal
, bitstream
, COUNT
, COUNT
*2);
8071 printf("encoding failed\n");
8075 out
= ff_h264_decode_nal(&h
, nal
, &out_length
, &consumed
, nal_length
);
8079 if(out_length
!= COUNT
){
8080 printf("incorrect length %d %d\n", out_length
, COUNT
);
8084 if(consumed
!= nal_length
){
8085 printf("incorrect consumed length %d %d\n", nal_length
, consumed
);
8089 if(memcmp(bitstream
, out
, COUNT
)){
8090 printf("mismatch\n");
8096 printf("Testing RBSP\n");
8104 av_cold
void ff_h264_free_context(H264Context
*h
)
8108 av_freep(&h
->rbsp_buffer
[0]);
8109 av_freep(&h
->rbsp_buffer
[1]);
8110 free_tables(h
); //FIXME cleanup init stuff perhaps
8112 for(i
= 0; i
< MAX_SPS_COUNT
; i
++)
8113 av_freep(h
->sps_buffers
+ i
);
8115 for(i
= 0; i
< MAX_PPS_COUNT
; i
++)
8116 av_freep(h
->pps_buffers
+ i
);
8119 static av_cold
int decode_end(AVCodecContext
*avctx
)
8121 H264Context
*h
= avctx
->priv_data
;
8122 MpegEncContext
*s
= &h
->s
;
8124 ff_h264_free_context(h
);
8128 // memset(h, 0, sizeof(H264Context));
8134 AVCodec h264_decoder
= {
8138 sizeof(H264Context
),
8143 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1
| CODEC_CAP_DELAY
,
8145 .long_name
= NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8146 .pix_fmts
= ff_hwaccel_pixfmt_list_420
,
8149 #if CONFIG_H264_VDPAU_DECODER
8150 AVCodec h264_vdpau_decoder
= {
8154 sizeof(H264Context
),
8159 CODEC_CAP_DR1
| CODEC_CAP_DELAY
| CODEC_CAP_HWACCEL_VDPAU
,
8161 .long_name
= NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8165 #if CONFIG_SVQ3_DECODER