2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc
[4];
52 static VLC_TYPE coeff_token_vlc_tables
[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size
[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc
;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table
[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size
= 256;
59 static VLC total_zeros_vlc
[15];
60 static VLC_TYPE total_zeros_vlc_tables
[15][512][2];
61 static const int total_zeros_vlc_tables_size
= 512;
63 static VLC chroma_dc_total_zeros_vlc
[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables
[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size
= 8;
67 static VLC run_vlc
[6];
68 static VLC_TYPE run_vlc_tables
[6][8][2];
69 static const int run_vlc_tables_size
= 8;
72 static VLC_TYPE run7_vlc_table
[96][2];
73 static const int run7_vlc_table_size
= 96;
75 extern int ff_VDPAU_h264_set_reference_frames(H264Context
*h
);
76 extern int ff_VDPAU_h264_picture_complete(H264Context
*h
, const uint8_t *buf
, int buf_size
);
77 extern void ff_VDPAU_h264_set_reference_frames_count(H264Context
*h
);
79 static void svq3_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
);
80 static void svq3_add_idct_c(uint8_t *dst
, DCTELEM
*block
, int stride
, int qp
, int dc
);
81 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
82 static void filter_mb_fast( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
83 static Picture
* remove_long(H264Context
*h
, int i
, int ref_mask
);
85 static av_always_inline
uint32_t pack16to32(int a
, int b
){
86 #ifdef WORDS_BIGENDIAN
87 return (b
&0xFFFF) + (a
<<16);
89 return (a
&0xFFFF) + (b
<<16);
93 static const uint8_t rem6
[52]={
94 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
97 static const uint8_t div6
[52]={
98 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
101 static const int left_block_options
[4][8]={
108 static const enum PixelFormat pixfmt_vdpau_h264_baseline_420
[] = {
109 PIX_FMT_VDPAU_H264_BASELINE
,
111 static const enum PixelFormat pixfmt_vdpau_h264_main_420
[] = {
112 PIX_FMT_VDPAU_H264_MAIN
,
114 static const enum PixelFormat pixfmt_vdpau_h264_high_420
[] = {
115 PIX_FMT_VDPAU_H264_HIGH
,
118 static void fill_caches(H264Context
*h
, int mb_type
, int for_deblock
){
119 MpegEncContext
* const s
= &h
->s
;
120 const int mb_xy
= h
->mb_xy
;
121 int topleft_xy
, top_xy
, topright_xy
, left_xy
[2];
122 int topleft_type
, top_type
, topright_type
, left_type
[2];
124 int topleft_partition
= -1;
127 top_xy
= mb_xy
- (s
->mb_stride
<< FIELD_PICTURE
);
129 //FIXME deblocking could skip the intra and nnz parts.
130 if(for_deblock
&& (h
->slice_num
== 1 || h
->slice_table
[mb_xy
] == h
->slice_table
[top_xy
]) && !FRAME_MBAFF
)
133 /* Wow, what a mess, why didn't they simplify the interlacing & intra
134 * stuff, I can't imagine that these complex rules are worth it. */
136 topleft_xy
= top_xy
- 1;
137 topright_xy
= top_xy
+ 1;
138 left_xy
[1] = left_xy
[0] = mb_xy
-1;
139 left_block
= left_block_options
[0];
141 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
142 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
143 const int topleft_pair_xy
= top_pair_xy
- 1;
144 const int topright_pair_xy
= top_pair_xy
+ 1;
145 const int topleft_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topleft_pair_xy
]);
146 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
147 const int topright_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topright_pair_xy
]);
148 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
149 const int curr_mb_frame_flag
= !IS_INTERLACED(mb_type
);
150 const int bottom
= (s
->mb_y
& 1);
151 tprintf(s
->avctx
, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag
, left_mb_frame_flag
, topleft_mb_frame_flag
, top_mb_frame_flag
, topright_mb_frame_flag
);
153 ? !curr_mb_frame_flag
// bottom macroblock
154 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
156 top_xy
-= s
->mb_stride
;
159 ? !curr_mb_frame_flag
// bottom macroblock
160 : (!curr_mb_frame_flag
&& !topleft_mb_frame_flag
) // top macroblock
162 topleft_xy
-= s
->mb_stride
;
163 } else if(bottom
&& curr_mb_frame_flag
&& !left_mb_frame_flag
) {
164 topleft_xy
+= s
->mb_stride
;
165 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
166 topleft_partition
= 0;
169 ? !curr_mb_frame_flag
// bottom macroblock
170 : (!curr_mb_frame_flag
&& !topright_mb_frame_flag
) // top macroblock
172 topright_xy
-= s
->mb_stride
;
174 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
175 left_xy
[1] = left_xy
[0] = pair_xy
- 1;
176 if (curr_mb_frame_flag
) {
178 left_block
= left_block_options
[1];
180 left_block
= left_block_options
[2];
183 left_xy
[1] += s
->mb_stride
;
184 left_block
= left_block_options
[3];
189 h
->top_mb_xy
= top_xy
;
190 h
->left_mb_xy
[0] = left_xy
[0];
191 h
->left_mb_xy
[1] = left_xy
[1];
195 top_type
= h
->slice_table
[top_xy
] < 0xFFFF ? s
->current_picture
.mb_type
[top_xy
] : 0;
196 left_type
[0] = h
->slice_table
[left_xy
[0] ] < 0xFFFF ? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
197 left_type
[1] = h
->slice_table
[left_xy
[1] ] < 0xFFFF ? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
199 if(MB_MBAFF
&& !IS_INTRA(mb_type
)){
201 for(list
=0; list
<h
->list_count
; list
++){
202 //These values where changed for ease of performing MC, we need to change them back
203 //FIXME maybe we can make MC and loop filter use the same values or prevent
204 //the MC code from changing ref_cache and rather use a temporary array.
205 if(USES_LIST(mb_type
,list
)){
206 int8_t *ref
= &s
->current_picture
.ref_index
[list
][h
->mb2b8_xy
[mb_xy
]];
207 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 0]] =
208 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 2]] = (pack16to32(ref
[0],ref
[1])&0x00FF00FF)*0x0101;
210 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 8]] =
211 *(uint32_t*)&h
->ref_cache
[list
][scan8
[10]] = (pack16to32(ref
[0],ref
[1])&0x00FF00FF)*0x0101;
216 topleft_type
= h
->slice_table
[topleft_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topleft_xy
] : 0;
217 top_type
= h
->slice_table
[top_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[top_xy
] : 0;
218 topright_type
= h
->slice_table
[topright_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topright_xy
]: 0;
219 left_type
[0] = h
->slice_table
[left_xy
[0] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
220 left_type
[1] = h
->slice_table
[left_xy
[1] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
222 if(IS_INTRA(mb_type
)){
223 int type_mask
= h
->pps
.constrained_intra_pred
? IS_INTRA(-1) : -1;
224 h
->topleft_samples_available
=
225 h
->top_samples_available
=
226 h
->left_samples_available
= 0xFFFF;
227 h
->topright_samples_available
= 0xEEEA;
229 if(!(top_type
& type_mask
)){
230 h
->topleft_samples_available
= 0xB3FF;
231 h
->top_samples_available
= 0x33FF;
232 h
->topright_samples_available
= 0x26EA;
234 if(IS_INTERLACED(mb_type
) != IS_INTERLACED(left_type
[0])){
235 if(IS_INTERLACED(mb_type
)){
236 if(!(left_type
[0] & type_mask
)){
237 h
->topleft_samples_available
&= 0xDFFF;
238 h
->left_samples_available
&= 0x5FFF;
240 if(!(left_type
[1] & type_mask
)){
241 h
->topleft_samples_available
&= 0xFF5F;
242 h
->left_samples_available
&= 0xFF5F;
245 int left_typei
= h
->slice_table
[left_xy
[0] + s
->mb_stride
] == h
->slice_num
246 ? s
->current_picture
.mb_type
[left_xy
[0] + s
->mb_stride
] : 0;
247 assert(left_xy
[0] == left_xy
[1]);
248 if(!((left_typei
& type_mask
) && (left_type
[0] & type_mask
))){
249 h
->topleft_samples_available
&= 0xDF5F;
250 h
->left_samples_available
&= 0x5F5F;
254 if(!(left_type
[0] & type_mask
)){
255 h
->topleft_samples_available
&= 0xDF5F;
256 h
->left_samples_available
&= 0x5F5F;
260 if(!(topleft_type
& type_mask
))
261 h
->topleft_samples_available
&= 0x7FFF;
263 if(!(topright_type
& type_mask
))
264 h
->topright_samples_available
&= 0xFBFF;
266 if(IS_INTRA4x4(mb_type
)){
267 if(IS_INTRA4x4(top_type
)){
268 h
->intra4x4_pred_mode_cache
[4+8*0]= h
->intra4x4_pred_mode
[top_xy
][4];
269 h
->intra4x4_pred_mode_cache
[5+8*0]= h
->intra4x4_pred_mode
[top_xy
][5];
270 h
->intra4x4_pred_mode_cache
[6+8*0]= h
->intra4x4_pred_mode
[top_xy
][6];
271 h
->intra4x4_pred_mode_cache
[7+8*0]= h
->intra4x4_pred_mode
[top_xy
][3];
274 if(!(top_type
& type_mask
))
279 h
->intra4x4_pred_mode_cache
[4+8*0]=
280 h
->intra4x4_pred_mode_cache
[5+8*0]=
281 h
->intra4x4_pred_mode_cache
[6+8*0]=
282 h
->intra4x4_pred_mode_cache
[7+8*0]= pred
;
285 if(IS_INTRA4x4(left_type
[i
])){
286 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[0+2*i
]];
287 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[1+2*i
]];
290 if(!(left_type
[i
] & type_mask
))
295 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]=
296 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= pred
;
312 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
314 h
->non_zero_count_cache
[4+8*0]= h
->non_zero_count
[top_xy
][4];
315 h
->non_zero_count_cache
[5+8*0]= h
->non_zero_count
[top_xy
][5];
316 h
->non_zero_count_cache
[6+8*0]= h
->non_zero_count
[top_xy
][6];
317 h
->non_zero_count_cache
[7+8*0]= h
->non_zero_count
[top_xy
][3];
319 h
->non_zero_count_cache
[1+8*0]= h
->non_zero_count
[top_xy
][9];
320 h
->non_zero_count_cache
[2+8*0]= h
->non_zero_count
[top_xy
][8];
322 h
->non_zero_count_cache
[1+8*3]= h
->non_zero_count
[top_xy
][12];
323 h
->non_zero_count_cache
[2+8*3]= h
->non_zero_count
[top_xy
][11];
326 h
->non_zero_count_cache
[4+8*0]=
327 h
->non_zero_count_cache
[5+8*0]=
328 h
->non_zero_count_cache
[6+8*0]=
329 h
->non_zero_count_cache
[7+8*0]=
331 h
->non_zero_count_cache
[1+8*0]=
332 h
->non_zero_count_cache
[2+8*0]=
334 h
->non_zero_count_cache
[1+8*3]=
335 h
->non_zero_count_cache
[2+8*3]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
339 for (i
=0; i
<2; i
++) {
341 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[0+2*i
]];
342 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[1+2*i
]];
343 h
->non_zero_count_cache
[0+8*1 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[4+2*i
]];
344 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[5+2*i
]];
346 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]=
347 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]=
348 h
->non_zero_count_cache
[0+8*1 + 8*i
]=
349 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
356 h
->top_cbp
= h
->cbp_table
[top_xy
];
357 } else if(IS_INTRA(mb_type
)) {
364 h
->left_cbp
= h
->cbp_table
[left_xy
[0]] & 0x1f0;
365 } else if(IS_INTRA(mb_type
)) {
371 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[0]]>>((left_block
[0]&(~1))+1))&0x1) << 1;
374 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[1]]>>((left_block
[2]&(~1))+1))&0x1) << 3;
379 if(IS_INTER(mb_type
) || IS_DIRECT(mb_type
)){
381 for(list
=0; list
<h
->list_count
; list
++){
382 if(!USES_LIST(mb_type
, list
) && !IS_DIRECT(mb_type
) && !h
->deblocking_filter
){
383 /*if(!h->mv_cache_clean[list]){
384 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
385 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
386 h->mv_cache_clean[list]= 1;
390 h
->mv_cache_clean
[list
]= 0;
392 if(USES_LIST(top_type
, list
)){
393 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
394 const int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
395 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0];
396 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 1];
397 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2];
398 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 3];
399 h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]=
400 h
->ref_cache
[list
][scan8
[0] + 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 0];
401 h
->ref_cache
[list
][scan8
[0] + 2 - 1*8]=
402 h
->ref_cache
[list
][scan8
[0] + 3 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 1];
404 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]=
405 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]=
406 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]=
407 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
408 *(uint32_t*)&h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]= ((top_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
)&0xFF)*0x01010101;
412 int cache_idx
= scan8
[0] - 1 + i
*2*8;
413 if(USES_LIST(left_type
[i
], list
)){
414 const int b_xy
= h
->mb2b_xy
[left_xy
[i
]] + 3;
415 const int b8_xy
= h
->mb2b8_xy
[left_xy
[i
]] + 1;
416 *(uint32_t*)h
->mv_cache
[list
][cache_idx
]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[0+i
*2]];
417 *(uint32_t*)h
->mv_cache
[list
][cache_idx
+8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[1+i
*2]];
418 h
->ref_cache
[list
][cache_idx
]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[0+i
*2]>>1)];
419 h
->ref_cache
[list
][cache_idx
+8]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[1+i
*2]>>1)];
421 *(uint32_t*)h
->mv_cache
[list
][cache_idx
]=
422 *(uint32_t*)h
->mv_cache
[list
][cache_idx
+8]= 0;
423 h
->ref_cache
[list
][cache_idx
]=
424 h
->ref_cache
[list
][cache_idx
+8]= left_type
[i
] ? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
428 if(for_deblock
|| ((IS_DIRECT(mb_type
) && !h
->direct_spatial_mv_pred
) && !FRAME_MBAFF
))
431 if(USES_LIST(topleft_type
, list
)){
432 const int b_xy
= h
->mb2b_xy
[topleft_xy
] + 3 + h
->b_stride
+ (topleft_partition
& 2*h
->b_stride
);
433 const int b8_xy
= h
->mb2b8_xy
[topleft_xy
] + 1 + (topleft_partition
& h
->b8_stride
);
434 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
435 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
437 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= 0;
438 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= topleft_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
441 if(USES_LIST(topright_type
, list
)){
442 const int b_xy
= h
->mb2b_xy
[topright_xy
] + 3*h
->b_stride
;
443 const int b8_xy
= h
->mb2b8_xy
[topright_xy
] + h
->b8_stride
;
444 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
445 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
447 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= 0;
448 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= topright_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
451 if((IS_SKIP(mb_type
) || IS_DIRECT(mb_type
)) && !FRAME_MBAFF
)
454 h
->ref_cache
[list
][scan8
[5 ]+1] =
455 h
->ref_cache
[list
][scan8
[7 ]+1] =
456 h
->ref_cache
[list
][scan8
[13]+1] = //FIXME remove past 3 (init somewhere else)
457 h
->ref_cache
[list
][scan8
[4 ]] =
458 h
->ref_cache
[list
][scan8
[12]] = PART_NOT_AVAILABLE
;
459 *(uint32_t*)h
->mv_cache
[list
][scan8
[5 ]+1]=
460 *(uint32_t*)h
->mv_cache
[list
][scan8
[7 ]+1]=
461 *(uint32_t*)h
->mv_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
462 *(uint32_t*)h
->mv_cache
[list
][scan8
[4 ]]=
463 *(uint32_t*)h
->mv_cache
[list
][scan8
[12]]= 0;
466 /* XXX beurk, Load mvd */
467 if(USES_LIST(top_type
, list
)){
468 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
469 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 0];
470 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 1];
471 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 2];
472 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 3];
474 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]=
475 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]=
476 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]=
477 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
479 if(USES_LIST(left_type
[0], list
)){
480 const int b_xy
= h
->mb2b_xy
[left_xy
[0]] + 3;
481 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[0]];
482 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[1]];
484 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]=
485 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= 0;
487 if(USES_LIST(left_type
[1], list
)){
488 const int b_xy
= h
->mb2b_xy
[left_xy
[1]] + 3;
489 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[2]];
490 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[3]];
492 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]=
493 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= 0;
495 *(uint32_t*)h
->mvd_cache
[list
][scan8
[5 ]+1]=
496 *(uint32_t*)h
->mvd_cache
[list
][scan8
[7 ]+1]=
497 *(uint32_t*)h
->mvd_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
498 *(uint32_t*)h
->mvd_cache
[list
][scan8
[4 ]]=
499 *(uint32_t*)h
->mvd_cache
[list
][scan8
[12]]= 0;
501 if(h
->slice_type_nos
== FF_B_TYPE
){
502 fill_rectangle(&h
->direct_cache
[scan8
[0]], 4, 4, 8, 0, 1);
504 if(IS_DIRECT(top_type
)){
505 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0x01010101;
506 }else if(IS_8X8(top_type
)){
507 int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
508 h
->direct_cache
[scan8
[0] + 0 - 1*8]= h
->direct_table
[b8_xy
];
509 h
->direct_cache
[scan8
[0] + 2 - 1*8]= h
->direct_table
[b8_xy
+ 1];
511 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0;
514 if(IS_DIRECT(left_type
[0]))
515 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 1;
516 else if(IS_8X8(left_type
[0]))
517 h
->direct_cache
[scan8
[0] - 1 + 0*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[0]] + 1 + h
->b8_stride
*(left_block
[0]>>1)];
519 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 0;
521 if(IS_DIRECT(left_type
[1]))
522 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 1;
523 else if(IS_8X8(left_type
[1]))
524 h
->direct_cache
[scan8
[0] - 1 + 2*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[1]] + 1 + h
->b8_stride
*(left_block
[2]>>1)];
526 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 0;
532 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
533 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
534 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
535 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
536 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
537 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
538 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
539 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
540 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
541 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
543 #define MAP_F2F(idx, mb_type)\
544 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
545 h->ref_cache[list][idx] <<= 1;\
546 h->mv_cache[list][idx][1] /= 2;\
547 h->mvd_cache[list][idx][1] /= 2;\
552 #define MAP_F2F(idx, mb_type)\
553 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
554 h->ref_cache[list][idx] >>= 1;\
555 h->mv_cache[list][idx][1] <<= 1;\
556 h->mvd_cache[list][idx][1] <<= 1;\
566 h
->neighbor_transform_size
= !!IS_8x8DCT(top_type
) + !!IS_8x8DCT(left_type
[0]);
569 static inline void write_back_intra_pred_mode(H264Context
*h
){
570 const int mb_xy
= h
->mb_xy
;
572 h
->intra4x4_pred_mode
[mb_xy
][0]= h
->intra4x4_pred_mode_cache
[7+8*1];
573 h
->intra4x4_pred_mode
[mb_xy
][1]= h
->intra4x4_pred_mode_cache
[7+8*2];
574 h
->intra4x4_pred_mode
[mb_xy
][2]= h
->intra4x4_pred_mode_cache
[7+8*3];
575 h
->intra4x4_pred_mode
[mb_xy
][3]= h
->intra4x4_pred_mode_cache
[7+8*4];
576 h
->intra4x4_pred_mode
[mb_xy
][4]= h
->intra4x4_pred_mode_cache
[4+8*4];
577 h
->intra4x4_pred_mode
[mb_xy
][5]= h
->intra4x4_pred_mode_cache
[5+8*4];
578 h
->intra4x4_pred_mode
[mb_xy
][6]= h
->intra4x4_pred_mode_cache
[6+8*4];
582 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
584 static inline int check_intra4x4_pred_mode(H264Context
*h
){
585 MpegEncContext
* const s
= &h
->s
;
586 static const int8_t top
[12]= {-1, 0,LEFT_DC_PRED
,-1,-1,-1,-1,-1, 0};
587 static const int8_t left
[12]= { 0,-1, TOP_DC_PRED
, 0,-1,-1,-1, 0,-1,DC_128_PRED
};
590 if(!(h
->top_samples_available
&0x8000)){
592 int status
= top
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + i
] ];
594 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
597 h
->intra4x4_pred_mode_cache
[scan8
[0] + i
]= status
;
602 if((h
->left_samples_available
&0x8888)!=0x8888){
603 static const int mask
[4]={0x8000,0x2000,0x80,0x20};
605 if(!(h
->left_samples_available
&mask
[i
])){
606 int status
= left
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
] ];
608 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
611 h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
]= status
;
618 } //FIXME cleanup like next
621 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
623 static inline int check_intra_pred_mode(H264Context
*h
, int mode
){
624 MpegEncContext
* const s
= &h
->s
;
625 static const int8_t top
[7]= {LEFT_DC_PRED8x8
, 1,-1,-1};
626 static const int8_t left
[7]= { TOP_DC_PRED8x8
,-1, 2,-1,DC_128_PRED8x8
};
629 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "out of range intra chroma pred mode at %d %d\n", s
->mb_x
, s
->mb_y
);
633 if(!(h
->top_samples_available
&0x8000)){
636 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
641 if((h
->left_samples_available
&0x8080) != 0x8080){
643 if(h
->left_samples_available
&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
644 mode
= ALZHEIMER_DC_L0T_PRED8x8
+ (!(h
->left_samples_available
&0x8000)) + 2*(mode
== DC_128_PRED8x8
);
647 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
656 * gets the predicted intra4x4 prediction mode.
658 static inline int pred_intra_mode(H264Context
*h
, int n
){
659 const int index8
= scan8
[n
];
660 const int left
= h
->intra4x4_pred_mode_cache
[index8
- 1];
661 const int top
= h
->intra4x4_pred_mode_cache
[index8
- 8];
662 const int min
= FFMIN(left
, top
);
664 tprintf(h
->s
.avctx
, "mode:%d %d min:%d\n", left
,top
, min
);
666 if(min
<0) return DC_PRED
;
670 static inline void write_back_non_zero_count(H264Context
*h
){
671 const int mb_xy
= h
->mb_xy
;
673 h
->non_zero_count
[mb_xy
][0]= h
->non_zero_count_cache
[7+8*1];
674 h
->non_zero_count
[mb_xy
][1]= h
->non_zero_count_cache
[7+8*2];
675 h
->non_zero_count
[mb_xy
][2]= h
->non_zero_count_cache
[7+8*3];
676 h
->non_zero_count
[mb_xy
][3]= h
->non_zero_count_cache
[7+8*4];
677 h
->non_zero_count
[mb_xy
][4]= h
->non_zero_count_cache
[4+8*4];
678 h
->non_zero_count
[mb_xy
][5]= h
->non_zero_count_cache
[5+8*4];
679 h
->non_zero_count
[mb_xy
][6]= h
->non_zero_count_cache
[6+8*4];
681 h
->non_zero_count
[mb_xy
][9]= h
->non_zero_count_cache
[1+8*2];
682 h
->non_zero_count
[mb_xy
][8]= h
->non_zero_count_cache
[2+8*2];
683 h
->non_zero_count
[mb_xy
][7]= h
->non_zero_count_cache
[2+8*1];
685 h
->non_zero_count
[mb_xy
][12]=h
->non_zero_count_cache
[1+8*5];
686 h
->non_zero_count
[mb_xy
][11]=h
->non_zero_count_cache
[2+8*5];
687 h
->non_zero_count
[mb_xy
][10]=h
->non_zero_count_cache
[2+8*4];
691 * gets the predicted number of non-zero coefficients.
692 * @param n block index
694 static inline int pred_non_zero_count(H264Context
*h
, int n
){
695 const int index8
= scan8
[n
];
696 const int left
= h
->non_zero_count_cache
[index8
- 1];
697 const int top
= h
->non_zero_count_cache
[index8
- 8];
700 if(i
<64) i
= (i
+1)>>1;
702 tprintf(h
->s
.avctx
, "pred_nnz L%X T%X n%d s%d P%X\n", left
, top
, n
, scan8
[n
], i
&31);
707 static inline int fetch_diagonal_mv(H264Context
*h
, const int16_t **C
, int i
, int list
, int part_width
){
708 const int topright_ref
= h
->ref_cache
[list
][ i
- 8 + part_width
];
709 MpegEncContext
*s
= &h
->s
;
711 /* there is no consistent mapping of mvs to neighboring locations that will
712 * make mbaff happy, so we can't move all this logic to fill_caches */
714 const uint32_t *mb_types
= s
->current_picture_ptr
->mb_type
;
716 *(uint32_t*)h
->mv_cache
[list
][scan8
[0]-2] = 0;
717 *C
= h
->mv_cache
[list
][scan8
[0]-2];
720 && (s
->mb_y
&1) && i
< scan8
[0]+8 && topright_ref
!= PART_NOT_AVAILABLE
){
721 int topright_xy
= s
->mb_x
+ (s
->mb_y
-1)*s
->mb_stride
+ (i
== scan8
[0]+3);
722 if(IS_INTERLACED(mb_types
[topright_xy
])){
723 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
724 const int x4 = X4, y4 = Y4;\
725 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
726 if(!USES_LIST(mb_type,list))\
727 return LIST_NOT_USED;\
728 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
729 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
730 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
731 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
733 SET_DIAG_MV(*2, >>1, s
->mb_x
*4+(i
&7)-4+part_width
, s
->mb_y
*4-1);
736 if(topright_ref
== PART_NOT_AVAILABLE
737 && ((s
->mb_y
&1) || i
>= scan8
[0]+8) && (i
&7)==4
738 && h
->ref_cache
[list
][scan8
[0]-1] != PART_NOT_AVAILABLE
){
740 && IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])){
741 SET_DIAG_MV(*2, >>1, s
->mb_x
*4-1, (s
->mb_y
|1)*4+(s
->mb_y
&1)*2+(i
>>4)-1);
744 && !IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])
746 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
747 SET_DIAG_MV(/2, <<1, s
->mb_x
*4-1, (s
->mb_y
&~1)*4 - 1 + ((i
-scan8
[0])>>3)*2);
753 if(topright_ref
!= PART_NOT_AVAILABLE
){
754 *C
= h
->mv_cache
[list
][ i
- 8 + part_width
];
757 tprintf(s
->avctx
, "topright MV not available\n");
759 *C
= h
->mv_cache
[list
][ i
- 8 - 1 ];
760 return h
->ref_cache
[list
][ i
- 8 - 1 ];
765 * gets the predicted MV.
766 * @param n the block index
767 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
768 * @param mx the x component of the predicted motion vector
769 * @param my the y component of the predicted motion vector
771 static inline void pred_motion(H264Context
* const h
, int n
, int part_width
, int list
, int ref
, int * const mx
, int * const my
){
772 const int index8
= scan8
[n
];
773 const int top_ref
= h
->ref_cache
[list
][ index8
- 8 ];
774 const int left_ref
= h
->ref_cache
[list
][ index8
- 1 ];
775 const int16_t * const A
= h
->mv_cache
[list
][ index8
- 1 ];
776 const int16_t * const B
= h
->mv_cache
[list
][ index8
- 8 ];
778 int diagonal_ref
, match_count
;
780 assert(part_width
==1 || part_width
==2 || part_width
==4);
790 diagonal_ref
= fetch_diagonal_mv(h
, &C
, index8
, list
, part_width
);
791 match_count
= (diagonal_ref
==ref
) + (top_ref
==ref
) + (left_ref
==ref
);
792 tprintf(h
->s
.avctx
, "pred_motion match_count=%d\n", match_count
);
793 if(match_count
> 1){ //most common
794 *mx
= mid_pred(A
[0], B
[0], C
[0]);
795 *my
= mid_pred(A
[1], B
[1], C
[1]);
796 }else if(match_count
==1){
800 }else if(top_ref
==ref
){
808 if(top_ref
== PART_NOT_AVAILABLE
&& diagonal_ref
== PART_NOT_AVAILABLE
&& left_ref
!= PART_NOT_AVAILABLE
){
812 *mx
= mid_pred(A
[0], B
[0], C
[0]);
813 *my
= mid_pred(A
[1], B
[1], C
[1]);
817 tprintf(h
->s
.avctx
, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], diagonal_ref
, C
[0], C
[1], left_ref
, A
[0], A
[1], ref
, *mx
, *my
, h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
821 * gets the directionally predicted 16x8 MV.
822 * @param n the block index
823 * @param mx the x component of the predicted motion vector
824 * @param my the y component of the predicted motion vector
826 static inline void pred_16x8_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
828 const int top_ref
= h
->ref_cache
[list
][ scan8
[0] - 8 ];
829 const int16_t * const B
= h
->mv_cache
[list
][ scan8
[0] - 8 ];
831 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
839 const int left_ref
= h
->ref_cache
[list
][ scan8
[8] - 1 ];
840 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[8] - 1 ];
842 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
852 pred_motion(h
, n
, 4, list
, ref
, mx
, my
);
856 * gets the directionally predicted 8x16 MV.
857 * @param n the block index
858 * @param mx the x component of the predicted motion vector
859 * @param my the y component of the predicted motion vector
861 static inline void pred_8x16_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
863 const int left_ref
= h
->ref_cache
[list
][ scan8
[0] - 1 ];
864 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[0] - 1 ];
866 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
877 diagonal_ref
= fetch_diagonal_mv(h
, &C
, scan8
[4], list
, 2);
879 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref
, C
[0], C
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
881 if(diagonal_ref
== ref
){
889 pred_motion(h
, n
, 2, list
, ref
, mx
, my
);
892 static inline void pred_pskip_motion(H264Context
* const h
, int * const mx
, int * const my
){
893 const int top_ref
= h
->ref_cache
[0][ scan8
[0] - 8 ];
894 const int left_ref
= h
->ref_cache
[0][ scan8
[0] - 1 ];
896 tprintf(h
->s
.avctx
, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref
, left_ref
, h
->s
.mb_x
, h
->s
.mb_y
);
898 if(top_ref
== PART_NOT_AVAILABLE
|| left_ref
== PART_NOT_AVAILABLE
899 || (top_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 8 ] == 0)
900 || (left_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 1 ] == 0)){
906 pred_motion(h
, 0, 4, 0, 0, mx
, my
);
911 static int get_scale_factor(H264Context
* const h
, int poc
, int poc1
, int i
){
912 int poc0
= h
->ref_list
[0][i
].poc
;
913 int td
= av_clip(poc1
- poc0
, -128, 127);
914 if(td
== 0 || h
->ref_list
[0][i
].long_ref
){
917 int tb
= av_clip(poc
- poc0
, -128, 127);
918 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
919 return av_clip((tb
*tx
+ 32) >> 6, -1024, 1023);
923 static inline void direct_dist_scale_factor(H264Context
* const h
){
924 MpegEncContext
* const s
= &h
->s
;
925 const int poc
= h
->s
.current_picture_ptr
->field_poc
[ s
->picture_structure
== PICT_BOTTOM_FIELD
];
926 const int poc1
= h
->ref_list
[1][0].poc
;
928 for(field
=0; field
<2; field
++){
929 const int poc
= h
->s
.current_picture_ptr
->field_poc
[field
];
930 const int poc1
= h
->ref_list
[1][0].field_poc
[field
];
931 for(i
=0; i
< 2*h
->ref_count
[0]; i
++)
932 h
->dist_scale_factor_field
[field
][i
^field
] = get_scale_factor(h
, poc
, poc1
, i
+16);
935 for(i
=0; i
<h
->ref_count
[0]; i
++){
936 h
->dist_scale_factor
[i
] = get_scale_factor(h
, poc
, poc1
, i
);
940 static void fill_colmap(H264Context
*h
, int map
[2][16+32], int list
, int field
, int colfield
, int mbafi
){
941 MpegEncContext
* const s
= &h
->s
;
942 Picture
* const ref1
= &h
->ref_list
[1][0];
943 int j
, old_ref
, rfield
;
944 int start
= mbafi
? 16 : 0;
945 int end
= mbafi
? 16+2*h
->ref_count
[list
] : h
->ref_count
[list
];
946 int interl
= mbafi
|| s
->picture_structure
!= PICT_FRAME
;
948 /* bogus; fills in for missing frames */
949 memset(map
[list
], 0, sizeof(map
[list
]));
951 for(rfield
=0; rfield
<2; rfield
++){
952 for(old_ref
=0; old_ref
<ref1
->ref_count
[colfield
][list
]; old_ref
++){
953 int poc
= ref1
->ref_poc
[colfield
][list
][old_ref
];
957 else if( interl
&& (poc
&3) == 3) //FIXME store all MBAFF references so this isnt needed
958 poc
= (poc
&~3) + rfield
+ 1;
960 for(j
=start
; j
<end
; j
++){
961 if(4*h
->ref_list
[list
][j
].frame_num
+ (h
->ref_list
[list
][j
].reference
&3) == poc
){
962 int cur_ref
= mbafi
? (j
-16)^field
: j
;
963 map
[list
][2*old_ref
+ (rfield
^field
) + 16] = cur_ref
;
965 map
[list
][old_ref
] = cur_ref
;
973 static inline void direct_ref_list_init(H264Context
* const h
){
974 MpegEncContext
* const s
= &h
->s
;
975 Picture
* const ref1
= &h
->ref_list
[1][0];
976 Picture
* const cur
= s
->current_picture_ptr
;
978 int sidx
= (s
->picture_structure
&1)^1;
979 int ref1sidx
= (ref1
->reference
&1)^1;
981 for(list
=0; list
<2; list
++){
982 cur
->ref_count
[sidx
][list
] = h
->ref_count
[list
];
983 for(j
=0; j
<h
->ref_count
[list
]; j
++)
984 cur
->ref_poc
[sidx
][list
][j
] = 4*h
->ref_list
[list
][j
].frame_num
+ (h
->ref_list
[list
][j
].reference
&3);
987 if(s
->picture_structure
== PICT_FRAME
){
988 memcpy(cur
->ref_count
[1], cur
->ref_count
[0], sizeof(cur
->ref_count
[0]));
989 memcpy(cur
->ref_poc
[1], cur
->ref_poc
[0], sizeof(cur
->ref_poc
[0]));
992 cur
->mbaff
= FRAME_MBAFF
;
994 if(cur
->pict_type
!= FF_B_TYPE
|| h
->direct_spatial_mv_pred
)
997 for(list
=0; list
<2; list
++){
998 fill_colmap(h
, h
->map_col_to_list0
, list
, sidx
, ref1sidx
, 0);
999 for(field
=0; field
<2; field
++)
1000 fill_colmap(h
, h
->map_col_to_list0_field
[field
], list
, field
, field
, 1);
1004 static inline void pred_direct_motion(H264Context
* const h
, int *mb_type
){
1005 MpegEncContext
* const s
= &h
->s
;
1006 int b8_stride
= h
->b8_stride
;
1007 int b4_stride
= h
->b_stride
;
1008 int mb_xy
= h
->mb_xy
;
1010 const int16_t (*l1mv0
)[2], (*l1mv1
)[2];
1011 const int8_t *l1ref0
, *l1ref1
;
1012 const int is_b8x8
= IS_8X8(*mb_type
);
1013 unsigned int sub_mb_type
;
1016 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1018 if(IS_INTERLACED(h
->ref_list
[1][0].mb_type
[mb_xy
])){ // AFL/AFR/FR/FL -> AFL/FL
1019 if(!IS_INTERLACED(*mb_type
)){ // AFR/FR -> AFL/FL
1020 int cur_poc
= s
->current_picture_ptr
->poc
;
1021 int *col_poc
= h
->ref_list
[1]->field_poc
;
1022 int col_parity
= FFABS(col_poc
[0] - cur_poc
) >= FFABS(col_poc
[1] - cur_poc
);
1023 mb_xy
= s
->mb_x
+ ((s
->mb_y
&~1) + col_parity
)*s
->mb_stride
;
1025 }else if(!(s
->picture_structure
& h
->ref_list
[1][0].reference
) && !h
->ref_list
[1][0].mbaff
){// FL -> FL & differ parity
1026 int fieldoff
= 2*(h
->ref_list
[1][0].reference
)-3;
1027 mb_xy
+= s
->mb_stride
*fieldoff
;
1030 }else{ // AFL/AFR/FR/FL -> AFR/FR
1031 if(IS_INTERLACED(*mb_type
)){ // AFL /FL -> AFR/FR
1032 mb_xy
= s
->mb_x
+ (s
->mb_y
&~1)*s
->mb_stride
;
1033 mb_type_col
[0] = h
->ref_list
[1][0].mb_type
[mb_xy
];
1034 mb_type_col
[1] = h
->ref_list
[1][0].mb_type
[mb_xy
+ s
->mb_stride
];
1037 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1038 if( (mb_type_col
[0] & MB_TYPE_16x16_OR_INTRA
)
1039 && (mb_type_col
[1] & MB_TYPE_16x16_OR_INTRA
)
1041 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1042 *mb_type
|= MB_TYPE_16x8
|MB_TYPE_L0L1
|MB_TYPE_DIRECT2
; /* B_16x8 */
1044 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1045 *mb_type
|= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1047 }else{ // AFR/FR -> AFR/FR
1050 mb_type_col
[1] = h
->ref_list
[1][0].mb_type
[mb_xy
];
1051 if(IS_8X8(mb_type_col
[0]) && !h
->sps
.direct_8x8_inference_flag
){
1052 /* FIXME save sub mb types from previous frames (or derive from MVs)
1053 * so we know exactly what block size to use */
1054 sub_mb_type
= MB_TYPE_8x8
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_4x4 */
1055 *mb_type
|= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1056 }else if(!is_b8x8
&& (mb_type_col
[0] & MB_TYPE_16x16_OR_INTRA
)){
1057 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1058 *mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_16x16 */
1060 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1061 *mb_type
|= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1066 l1mv0
= &h
->ref_list
[1][0].motion_val
[0][h
->mb2b_xy
[mb_xy
]];
1067 l1mv1
= &h
->ref_list
[1][0].motion_val
[1][h
->mb2b_xy
[mb_xy
]];
1068 l1ref0
= &h
->ref_list
[1][0].ref_index
[0][h
->mb2b8_xy
[mb_xy
]];
1069 l1ref1
= &h
->ref_list
[1][0].ref_index
[1][h
->mb2b8_xy
[mb_xy
]];
1072 l1ref0
+= h
->b8_stride
;
1073 l1ref1
+= h
->b8_stride
;
1074 l1mv0
+= 2*b4_stride
;
1075 l1mv1
+= 2*b4_stride
;
1079 if(h
->direct_spatial_mv_pred
){
1084 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1086 /* ref = min(neighbors) */
1087 for(list
=0; list
<2; list
++){
1088 int refa
= h
->ref_cache
[list
][scan8
[0] - 1];
1089 int refb
= h
->ref_cache
[list
][scan8
[0] - 8];
1090 int refc
= h
->ref_cache
[list
][scan8
[0] - 8 + 4];
1091 if(refc
== PART_NOT_AVAILABLE
)
1092 refc
= h
->ref_cache
[list
][scan8
[0] - 8 - 1];
1093 ref
[list
] = FFMIN3((unsigned)refa
, (unsigned)refb
, (unsigned)refc
);
1098 if(ref
[0] < 0 && ref
[1] < 0){
1099 ref
[0] = ref
[1] = 0;
1100 mv
[0][0] = mv
[0][1] =
1101 mv
[1][0] = mv
[1][1] = 0;
1103 for(list
=0; list
<2; list
++){
1105 pred_motion(h
, 0, 4, list
, ref
[list
], &mv
[list
][0], &mv
[list
][1]);
1107 mv
[list
][0] = mv
[list
][1] = 0;
1113 *mb_type
&= ~MB_TYPE_L1
;
1114 sub_mb_type
&= ~MB_TYPE_L1
;
1115 }else if(ref
[0] < 0){
1117 *mb_type
&= ~MB_TYPE_L0
;
1118 sub_mb_type
&= ~MB_TYPE_L0
;
1121 if(IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
[0])){
1122 for(i8
=0; i8
<4; i8
++){
1125 int xy8
= x8
+y8
*b8_stride
;
1126 int xy4
= 3*x8
+y8
*b4_stride
;
1129 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1131 h
->sub_mb_type
[i8
] = sub_mb_type
;
1133 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[0], 1);
1134 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[1], 1);
1135 if(!IS_INTRA(mb_type_col
[y8
])
1136 && ( (l1ref0
[xy8
] == 0 && FFABS(l1mv0
[xy4
][0]) <= 1 && FFABS(l1mv0
[xy4
][1]) <= 1)
1137 || (l1ref0
[xy8
] < 0 && l1ref1
[xy8
] == 0 && FFABS(l1mv1
[xy4
][0]) <= 1 && FFABS(l1mv1
[xy4
][1]) <= 1))){
1139 a
= pack16to32(mv
[0][0],mv
[0][1]);
1141 b
= pack16to32(mv
[1][0],mv
[1][1]);
1143 a
= pack16to32(mv
[0][0],mv
[0][1]);
1144 b
= pack16to32(mv
[1][0],mv
[1][1]);
1146 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, a
, 4);
1147 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, b
, 4);
1149 }else if(IS_16X16(*mb_type
)){
1152 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, (uint8_t)ref
[0], 1);
1153 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, (uint8_t)ref
[1], 1);
1154 if(!IS_INTRA(mb_type_col
[0])
1155 && ( (l1ref0
[0] == 0 && FFABS(l1mv0
[0][0]) <= 1 && FFABS(l1mv0
[0][1]) <= 1)
1156 || (l1ref0
[0] < 0 && l1ref1
[0] == 0 && FFABS(l1mv1
[0][0]) <= 1 && FFABS(l1mv1
[0][1]) <= 1
1157 && (h
->x264_build
>33 || !h
->x264_build
)))){
1159 a
= pack16to32(mv
[0][0],mv
[0][1]);
1161 b
= pack16to32(mv
[1][0],mv
[1][1]);
1163 a
= pack16to32(mv
[0][0],mv
[0][1]);
1164 b
= pack16to32(mv
[1][0],mv
[1][1]);
1166 fill_rectangle(&h
->mv_cache
[0][scan8
[0]], 4, 4, 8, a
, 4);
1167 fill_rectangle(&h
->mv_cache
[1][scan8
[0]], 4, 4, 8, b
, 4);
1169 for(i8
=0; i8
<4; i8
++){
1170 const int x8
= i8
&1;
1171 const int y8
= i8
>>1;
1173 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1175 h
->sub_mb_type
[i8
] = sub_mb_type
;
1177 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[0][0],mv
[0][1]), 4);
1178 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[1][0],mv
[1][1]), 4);
1179 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[0], 1);
1180 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[1], 1);
1183 if(!IS_INTRA(mb_type_col
[0]) && ( l1ref0
[x8
+ y8
*b8_stride
] == 0
1184 || (l1ref0
[x8
+ y8
*b8_stride
] < 0 && l1ref1
[x8
+ y8
*b8_stride
] == 0
1185 && (h
->x264_build
>33 || !h
->x264_build
)))){
1186 const int16_t (*l1mv
)[2]= l1ref0
[x8
+ y8
*b8_stride
] == 0 ? l1mv0
: l1mv1
;
1187 if(IS_SUB_8X8(sub_mb_type
)){
1188 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*b4_stride
];
1189 if(FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1){
1191 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1193 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1196 for(i4
=0; i4
<4; i4
++){
1197 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*b4_stride
];
1198 if(FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1){
1200 *(uint32_t*)h
->mv_cache
[0][scan8
[i8
*4+i4
]] = 0;
1202 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] = 0;
1208 }else{ /* direct temporal mv pred */
1209 const int *map_col_to_list0
[2] = {h
->map_col_to_list0
[0], h
->map_col_to_list0
[1]};
1210 const int *dist_scale_factor
= h
->dist_scale_factor
;
1213 if(FRAME_MBAFF
&& IS_INTERLACED(*mb_type
)){
1214 map_col_to_list0
[0] = h
->map_col_to_list0_field
[s
->mb_y
&1][0];
1215 map_col_to_list0
[1] = h
->map_col_to_list0_field
[s
->mb_y
&1][1];
1216 dist_scale_factor
=h
->dist_scale_factor_field
[s
->mb_y
&1];
1218 if(h
->ref_list
[1][0].mbaff
&& IS_INTERLACED(mb_type_col
[0]))
1221 if(IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
[0])){
1222 /* FIXME assumes direct_8x8_inference == 1 */
1223 int y_shift
= 2*!IS_INTERLACED(*mb_type
);
1225 for(i8
=0; i8
<4; i8
++){
1226 const int x8
= i8
&1;
1227 const int y8
= i8
>>1;
1229 const int16_t (*l1mv
)[2]= l1mv0
;
1231 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1233 h
->sub_mb_type
[i8
] = sub_mb_type
;
1235 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1236 if(IS_INTRA(mb_type_col
[y8
])){
1237 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1238 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1239 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1243 ref0
= l1ref0
[x8
+ y8
*b8_stride
];
1245 ref0
= map_col_to_list0
[0][ref0
+ ref_offset
];
1247 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ y8
*b8_stride
] + ref_offset
];
1250 scale
= dist_scale_factor
[ref0
];
1251 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1254 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*b4_stride
];
1255 int my_col
= (mv_col
[1]<<y_shift
)/2;
1256 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1257 int my
= (scale
* my_col
+ 128) >> 8;
1258 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1259 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-my_col
), 4);
1265 /* one-to-one mv scaling */
1267 if(IS_16X16(*mb_type
)){
1270 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, 0, 1);
1271 if(IS_INTRA(mb_type_col
[0])){
1274 const int ref0
= l1ref0
[0] >= 0 ? map_col_to_list0
[0][l1ref0
[0] + ref_offset
]
1275 : map_col_to_list0
[1][l1ref1
[0] + ref_offset
];
1276 const int scale
= dist_scale_factor
[ref0
];
1277 const int16_t *mv_col
= l1ref0
[0] >= 0 ? l1mv0
[0] : l1mv1
[0];
1279 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1280 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1282 mv0
= pack16to32(mv_l0
[0],mv_l0
[1]);
1283 mv1
= pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1285 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, ref
, 1);
1286 fill_rectangle(&h
-> mv_cache
[0][scan8
[0]], 4, 4, 8, mv0
, 4);
1287 fill_rectangle(&h
-> mv_cache
[1][scan8
[0]], 4, 4, 8, mv1
, 4);
1289 for(i8
=0; i8
<4; i8
++){
1290 const int x8
= i8
&1;
1291 const int y8
= i8
>>1;
1293 const int16_t (*l1mv
)[2]= l1mv0
;
1295 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1297 h
->sub_mb_type
[i8
] = sub_mb_type
;
1298 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1299 if(IS_INTRA(mb_type_col
[0])){
1300 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1301 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1302 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1306 ref0
= l1ref0
[x8
+ y8
*b8_stride
] + ref_offset
;
1308 ref0
= map_col_to_list0
[0][ref0
];
1310 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ y8
*b8_stride
] + ref_offset
];
1313 scale
= dist_scale_factor
[ref0
];
1315 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1316 if(IS_SUB_8X8(sub_mb_type
)){
1317 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*b4_stride
];
1318 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1319 int my
= (scale
* mv_col
[1] + 128) >> 8;
1320 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1321 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-mv_col
[1]), 4);
1323 for(i4
=0; i4
<4; i4
++){
1324 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*b4_stride
];
1325 int16_t *mv_l0
= h
->mv_cache
[0][scan8
[i8
*4+i4
]];
1326 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1327 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1328 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] =
1329 pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1336 static inline void write_back_motion(H264Context
*h
, int mb_type
){
1337 MpegEncContext
* const s
= &h
->s
;
1338 const int b_xy
= 4*s
->mb_x
+ 4*s
->mb_y
*h
->b_stride
;
1339 const int b8_xy
= 2*s
->mb_x
+ 2*s
->mb_y
*h
->b8_stride
;
1342 if(!USES_LIST(mb_type
, 0))
1343 fill_rectangle(&s
->current_picture
.ref_index
[0][b8_xy
], 2, 2, h
->b8_stride
, (uint8_t)LIST_NOT_USED
, 1);
1345 for(list
=0; list
<h
->list_count
; list
++){
1347 if(!USES_LIST(mb_type
, list
))
1351 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+0 + 8*y
];
1352 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+2 + 8*y
];
1354 if( h
->pps
.cabac
) {
1355 if(IS_SKIP(mb_type
))
1356 fill_rectangle(h
->mvd_table
[list
][b_xy
], 4, 4, h
->b_stride
, 0, 4);
1359 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+0 + 8*y
];
1360 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+2 + 8*y
];
1365 int8_t *ref_index
= &s
->current_picture
.ref_index
[list
][b8_xy
];
1366 ref_index
[0+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[0]];
1367 ref_index
[1+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[4]];
1368 ref_index
[0+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[8]];
1369 ref_index
[1+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[12]];
1373 if(h
->slice_type_nos
== FF_B_TYPE
&& h
->pps
.cabac
){
1374 if(IS_8X8(mb_type
)){
1375 uint8_t *direct_table
= &h
->direct_table
[b8_xy
];
1376 direct_table
[1+0*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[1]) ? 1 : 0;
1377 direct_table
[0+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[2]) ? 1 : 0;
1378 direct_table
[1+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[3]) ? 1 : 0;
1384 * Decodes a network abstraction layer unit.
1385 * @param consumed is the number of bytes used as input
1386 * @param length is the length of the array
1387 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1388 * @returns decoded bytes, might be src+1 if no escapes
1390 static const uint8_t *decode_nal(H264Context
*h
, const uint8_t *src
, int *dst_length
, int *consumed
, int length
){
1395 // src[0]&0x80; //forbidden bit
1396 h
->nal_ref_idc
= src
[0]>>5;
1397 h
->nal_unit_type
= src
[0]&0x1F;
1401 for(i
=0; i
<length
; i
++)
1402 printf("%2X ", src
[i
]);
1404 for(i
=0; i
+1<length
; i
+=2){
1405 if(src
[i
]) continue;
1406 if(i
>0 && src
[i
-1]==0) i
--;
1407 if(i
+2<length
&& src
[i
+1]==0 && src
[i
+2]<=3){
1409 /* startcode, so we must be past the end */
1416 if(i
>=length
-1){ //no escaped 0
1417 *dst_length
= length
;
1418 *consumed
= length
+1; //+1 for the header
1422 bufidx
= h
->nal_unit_type
== NAL_DPC
? 1 : 0; // use second escape buffer for inter data
1423 h
->rbsp_buffer
[bufidx
]= av_fast_realloc(h
->rbsp_buffer
[bufidx
], &h
->rbsp_buffer_size
[bufidx
], length
);
1424 dst
= h
->rbsp_buffer
[bufidx
];
1430 //printf("decoding esc\n");
1433 //remove escapes (very rare 1:2^22)
1434 if(si
+2<length
&& src
[si
]==0 && src
[si
+1]==0 && src
[si
+2]<=3){
1435 if(src
[si
+2]==3){ //escape
1440 }else //next start code
1444 dst
[di
++]= src
[si
++];
1448 *consumed
= si
+ 1;//+1 for the header
1449 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1454 * identifies the exact end of the bitstream
1455 * @return the length of the trailing, or 0 if damaged
1457 static int decode_rbsp_trailing(H264Context
*h
, const uint8_t *src
){
1461 tprintf(h
->s
.avctx
, "rbsp trailing %X\n", v
);
1471 * IDCT transforms the 16 dc values and dequantizes them.
1472 * @param qp quantization parameter
1474 static void h264_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1477 int temp
[16]; //FIXME check if this is a good idea
1478 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1479 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1481 //memset(block, 64, 2*256);
1484 const int offset
= y_offset
[i
];
1485 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1486 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1487 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1488 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1497 const int offset
= x_offset
[i
];
1498 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1499 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1500 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1501 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1503 block
[stride
*0 +offset
]= ((((z0
+ z3
)*qmul
+ 128 ) >> 8)); //FIXME think about merging this into decode_residual
1504 block
[stride
*2 +offset
]= ((((z1
+ z2
)*qmul
+ 128 ) >> 8));
1505 block
[stride
*8 +offset
]= ((((z1
- z2
)*qmul
+ 128 ) >> 8));
1506 block
[stride
*10+offset
]= ((((z0
- z3
)*qmul
+ 128 ) >> 8));
1512 * DCT transforms the 16 dc values.
1513 * @param qp quantization parameter ??? FIXME
1515 static void h264_luma_dc_dct_c(DCTELEM
*block
/*, int qp*/){
1516 // const int qmul= dequant_coeff[qp][0];
1518 int temp
[16]; //FIXME check if this is a good idea
1519 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1520 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1523 const int offset
= y_offset
[i
];
1524 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1525 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1526 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1527 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1536 const int offset
= x_offset
[i
];
1537 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1538 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1539 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1540 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1542 block
[stride
*0 +offset
]= (z0
+ z3
)>>1;
1543 block
[stride
*2 +offset
]= (z1
+ z2
)>>1;
1544 block
[stride
*8 +offset
]= (z1
- z2
)>>1;
1545 block
[stride
*10+offset
]= (z0
- z3
)>>1;
1553 static void chroma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1554 const int stride
= 16*2;
1555 const int xStride
= 16;
1558 a
= block
[stride
*0 + xStride
*0];
1559 b
= block
[stride
*0 + xStride
*1];
1560 c
= block
[stride
*1 + xStride
*0];
1561 d
= block
[stride
*1 + xStride
*1];
1568 block
[stride
*0 + xStride
*0]= ((a
+c
)*qmul
) >> 7;
1569 block
[stride
*0 + xStride
*1]= ((e
+b
)*qmul
) >> 7;
1570 block
[stride
*1 + xStride
*0]= ((a
-c
)*qmul
) >> 7;
1571 block
[stride
*1 + xStride
*1]= ((e
-b
)*qmul
) >> 7;
1575 static void chroma_dc_dct_c(DCTELEM
*block
){
1576 const int stride
= 16*2;
1577 const int xStride
= 16;
1580 a
= block
[stride
*0 + xStride
*0];
1581 b
= block
[stride
*0 + xStride
*1];
1582 c
= block
[stride
*1 + xStride
*0];
1583 d
= block
[stride
*1 + xStride
*1];
1590 block
[stride
*0 + xStride
*0]= (a
+c
);
1591 block
[stride
*0 + xStride
*1]= (e
+b
);
1592 block
[stride
*1 + xStride
*0]= (a
-c
);
1593 block
[stride
*1 + xStride
*1]= (e
-b
);
1598 * gets the chroma qp.
1600 static inline int get_chroma_qp(H264Context
*h
, int t
, int qscale
){
1601 return h
->pps
.chroma_qp_table
[t
][qscale
];
1604 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1605 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1606 static inline int quantize_c(DCTELEM
*block
, uint8_t *scantable
, int qscale
, int intra
, int separate_dc
){
1608 const int * const quant_table
= quant_coeff
[qscale
];
1609 const int bias
= intra
? (1<<QUANT_SHIFT
)/3 : (1<<QUANT_SHIFT
)/6;
1610 const unsigned int threshold1
= (1<<QUANT_SHIFT
) - bias
- 1;
1611 const unsigned int threshold2
= (threshold1
<<1);
1617 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
-2))/3 : (1<<(QUANT_SHIFT
-2))/6;
1618 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
-2)) - dc_bias
- 1;
1619 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
1621 int level
= block
[0]*quant_coeff
[qscale
+18][0];
1622 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
1624 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
-2);
1627 level
= (dc_bias
- level
)>>(QUANT_SHIFT
-2);
1630 // last_non_zero = i;
1635 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
+1))/3 : (1<<(QUANT_SHIFT
+1))/6;
1636 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
+1)) - dc_bias
- 1;
1637 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
1639 int level
= block
[0]*quant_table
[0];
1640 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
1642 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
+1);
1645 level
= (dc_bias
- level
)>>(QUANT_SHIFT
+1);
1648 // last_non_zero = i;
1661 const int j
= scantable
[i
];
1662 int level
= block
[j
]*quant_table
[j
];
1664 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1665 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1666 if(((unsigned)(level
+threshold1
))>threshold2
){
1668 level
= (bias
+ level
)>>QUANT_SHIFT
;
1671 level
= (bias
- level
)>>QUANT_SHIFT
;
1680 return last_non_zero
;
1683 static inline void mc_dir_part(H264Context
*h
, Picture
*pic
, int n
, int square
, int chroma_height
, int delta
, int list
,
1684 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1685 int src_x_offset
, int src_y_offset
,
1686 qpel_mc_func
*qpix_op
, h264_chroma_mc_func chroma_op
){
1687 MpegEncContext
* const s
= &h
->s
;
1688 const int mx
= h
->mv_cache
[list
][ scan8
[n
] ][0] + src_x_offset
*8;
1689 int my
= h
->mv_cache
[list
][ scan8
[n
] ][1] + src_y_offset
*8;
1690 const int luma_xy
= (mx
&3) + ((my
&3)<<2);
1691 uint8_t * src_y
= pic
->data
[0] + (mx
>>2) + (my
>>2)*h
->mb_linesize
;
1692 uint8_t * src_cb
, * src_cr
;
1693 int extra_width
= h
->emu_edge_width
;
1694 int extra_height
= h
->emu_edge_height
;
1696 const int full_mx
= mx
>>2;
1697 const int full_my
= my
>>2;
1698 const int pic_width
= 16*s
->mb_width
;
1699 const int pic_height
= 16*s
->mb_height
>> MB_FIELD
;
1701 if(!pic
->data
[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1704 if(mx
&7) extra_width
-= 3;
1705 if(my
&7) extra_height
-= 3;
1707 if( full_mx
< 0-extra_width
1708 || full_my
< 0-extra_height
1709 || full_mx
+ 16/*FIXME*/ > pic_width
+ extra_width
1710 || full_my
+ 16/*FIXME*/ > pic_height
+ extra_height
){
1711 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_y
- 2 - 2*h
->mb_linesize
, h
->mb_linesize
, 16+5, 16+5/*FIXME*/, full_mx
-2, full_my
-2, pic_width
, pic_height
);
1712 src_y
= s
->edge_emu_buffer
+ 2 + 2*h
->mb_linesize
;
1716 qpix_op
[luma_xy
](dest_y
, src_y
, h
->mb_linesize
); //FIXME try variable height perhaps?
1718 qpix_op
[luma_xy
](dest_y
+ delta
, src_y
+ delta
, h
->mb_linesize
);
1721 if(ENABLE_GRAY
&& s
->flags
&CODEC_FLAG_GRAY
) return;
1724 // chroma offset when predicting from a field of opposite parity
1725 my
+= 2 * ((s
->mb_y
& 1) - (pic
->reference
- 1));
1726 emu
|= (my
>>3) < 0 || (my
>>3) + 8 >= (pic_height
>>1);
1728 src_cb
= pic
->data
[1] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
1729 src_cr
= pic
->data
[2] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
1732 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cb
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
1733 src_cb
= s
->edge_emu_buffer
;
1735 chroma_op(dest_cb
, src_cb
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
1738 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cr
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
1739 src_cr
= s
->edge_emu_buffer
;
1741 chroma_op(dest_cr
, src_cr
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
1744 static inline void mc_part_std(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1745 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1746 int x_offset
, int y_offset
,
1747 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1748 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
1749 int list0
, int list1
){
1750 MpegEncContext
* const s
= &h
->s
;
1751 qpel_mc_func
*qpix_op
= qpix_put
;
1752 h264_chroma_mc_func chroma_op
= chroma_put
;
1754 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
1755 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1756 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1757 x_offset
+= 8*s
->mb_x
;
1758 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
1761 Picture
*ref
= &h
->ref_list
[0][ h
->ref_cache
[0][ scan8
[n
] ] ];
1762 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 0,
1763 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1764 qpix_op
, chroma_op
);
1767 chroma_op
= chroma_avg
;
1771 Picture
*ref
= &h
->ref_list
[1][ h
->ref_cache
[1][ scan8
[n
] ] ];
1772 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 1,
1773 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1774 qpix_op
, chroma_op
);
1778 static inline void mc_part_weighted(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1779 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1780 int x_offset
, int y_offset
,
1781 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1782 h264_weight_func luma_weight_op
, h264_weight_func chroma_weight_op
,
1783 h264_biweight_func luma_weight_avg
, h264_biweight_func chroma_weight_avg
,
1784 int list0
, int list1
){
1785 MpegEncContext
* const s
= &h
->s
;
1787 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
1788 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1789 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1790 x_offset
+= 8*s
->mb_x
;
1791 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
1794 /* don't optimize for luma-only case, since B-frames usually
1795 * use implicit weights => chroma too. */
1796 uint8_t *tmp_cb
= s
->obmc_scratchpad
;
1797 uint8_t *tmp_cr
= s
->obmc_scratchpad
+ 8;
1798 uint8_t *tmp_y
= s
->obmc_scratchpad
+ 8*h
->mb_uvlinesize
;
1799 int refn0
= h
->ref_cache
[0][ scan8
[n
] ];
1800 int refn1
= h
->ref_cache
[1][ scan8
[n
] ];
1802 mc_dir_part(h
, &h
->ref_list
[0][refn0
], n
, square
, chroma_height
, delta
, 0,
1803 dest_y
, dest_cb
, dest_cr
,
1804 x_offset
, y_offset
, qpix_put
, chroma_put
);
1805 mc_dir_part(h
, &h
->ref_list
[1][refn1
], n
, square
, chroma_height
, delta
, 1,
1806 tmp_y
, tmp_cb
, tmp_cr
,
1807 x_offset
, y_offset
, qpix_put
, chroma_put
);
1809 if(h
->use_weight
== 2){
1810 int weight0
= h
->implicit_weight
[refn0
][refn1
];
1811 int weight1
= 64 - weight0
;
1812 luma_weight_avg( dest_y
, tmp_y
, h
-> mb_linesize
, 5, weight0
, weight1
, 0);
1813 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
1814 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
1816 luma_weight_avg(dest_y
, tmp_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
1817 h
->luma_weight
[0][refn0
], h
->luma_weight
[1][refn1
],
1818 h
->luma_offset
[0][refn0
] + h
->luma_offset
[1][refn1
]);
1819 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1820 h
->chroma_weight
[0][refn0
][0], h
->chroma_weight
[1][refn1
][0],
1821 h
->chroma_offset
[0][refn0
][0] + h
->chroma_offset
[1][refn1
][0]);
1822 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1823 h
->chroma_weight
[0][refn0
][1], h
->chroma_weight
[1][refn1
][1],
1824 h
->chroma_offset
[0][refn0
][1] + h
->chroma_offset
[1][refn1
][1]);
1827 int list
= list1
? 1 : 0;
1828 int refn
= h
->ref_cache
[list
][ scan8
[n
] ];
1829 Picture
*ref
= &h
->ref_list
[list
][refn
];
1830 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, list
,
1831 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1832 qpix_put
, chroma_put
);
1834 luma_weight_op(dest_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
1835 h
->luma_weight
[list
][refn
], h
->luma_offset
[list
][refn
]);
1836 if(h
->use_weight_chroma
){
1837 chroma_weight_op(dest_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1838 h
->chroma_weight
[list
][refn
][0], h
->chroma_offset
[list
][refn
][0]);
1839 chroma_weight_op(dest_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1840 h
->chroma_weight
[list
][refn
][1], h
->chroma_offset
[list
][refn
][1]);
1845 static inline void mc_part(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1846 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1847 int x_offset
, int y_offset
,
1848 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1849 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
1850 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
,
1851 int list0
, int list1
){
1852 if((h
->use_weight
==2 && list0
&& list1
1853 && (h
->implicit_weight
[ h
->ref_cache
[0][scan8
[n
]] ][ h
->ref_cache
[1][scan8
[n
]] ] != 32))
1854 || h
->use_weight
==1)
1855 mc_part_weighted(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
1856 x_offset
, y_offset
, qpix_put
, chroma_put
,
1857 weight_op
[0], weight_op
[3], weight_avg
[0], weight_avg
[3], list0
, list1
);
1859 mc_part_std(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
1860 x_offset
, y_offset
, qpix_put
, chroma_put
, qpix_avg
, chroma_avg
, list0
, list1
);
1863 static inline void prefetch_motion(H264Context
*h
, int list
){
1864 /* fetch pixels for estimated mv 4 macroblocks ahead
1865 * optimized for 64byte cache lines */
1866 MpegEncContext
* const s
= &h
->s
;
1867 const int refn
= h
->ref_cache
[list
][scan8
[0]];
1869 const int mx
= (h
->mv_cache
[list
][scan8
[0]][0]>>2) + 16*s
->mb_x
+ 8;
1870 const int my
= (h
->mv_cache
[list
][scan8
[0]][1]>>2) + 16*s
->mb_y
;
1871 uint8_t **src
= h
->ref_list
[list
][refn
].data
;
1872 int off
= mx
+ (my
+ (s
->mb_x
&3)*4)*h
->mb_linesize
+ 64;
1873 s
->dsp
.prefetch(src
[0]+off
, s
->linesize
, 4);
1874 off
= (mx
>>1) + ((my
>>1) + (s
->mb_x
&7))*s
->uvlinesize
+ 64;
1875 s
->dsp
.prefetch(src
[1]+off
, src
[2]-src
[1], 2);
1879 static void hl_motion(H264Context
*h
, uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1880 qpel_mc_func (*qpix_put
)[16], h264_chroma_mc_func (*chroma_put
),
1881 qpel_mc_func (*qpix_avg
)[16], h264_chroma_mc_func (*chroma_avg
),
1882 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
){
1883 MpegEncContext
* const s
= &h
->s
;
1884 const int mb_xy
= h
->mb_xy
;
1885 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
1887 assert(IS_INTER(mb_type
));
1889 prefetch_motion(h
, 0);
1891 if(IS_16X16(mb_type
)){
1892 mc_part(h
, 0, 1, 8, 0, dest_y
, dest_cb
, dest_cr
, 0, 0,
1893 qpix_put
[0], chroma_put
[0], qpix_avg
[0], chroma_avg
[0],
1894 &weight_op
[0], &weight_avg
[0],
1895 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1896 }else if(IS_16X8(mb_type
)){
1897 mc_part(h
, 0, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 0,
1898 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
1899 &weight_op
[1], &weight_avg
[1],
1900 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1901 mc_part(h
, 8, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 4,
1902 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
1903 &weight_op
[1], &weight_avg
[1],
1904 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
1905 }else if(IS_8X16(mb_type
)){
1906 mc_part(h
, 0, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 0, 0,
1907 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1908 &weight_op
[2], &weight_avg
[2],
1909 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1910 mc_part(h
, 4, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 4, 0,
1911 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1912 &weight_op
[2], &weight_avg
[2],
1913 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
1917 assert(IS_8X8(mb_type
));
1920 const int sub_mb_type
= h
->sub_mb_type
[i
];
1922 int x_offset
= (i
&1)<<2;
1923 int y_offset
= (i
&2)<<1;
1925 if(IS_SUB_8X8(sub_mb_type
)){
1926 mc_part(h
, n
, 1, 4, 0, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1927 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1928 &weight_op
[3], &weight_avg
[3],
1929 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1930 }else if(IS_SUB_8X4(sub_mb_type
)){
1931 mc_part(h
, n
, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1932 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
1933 &weight_op
[4], &weight_avg
[4],
1934 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1935 mc_part(h
, n
+2, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
+2,
1936 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
1937 &weight_op
[4], &weight_avg
[4],
1938 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1939 }else if(IS_SUB_4X8(sub_mb_type
)){
1940 mc_part(h
, n
, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1941 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1942 &weight_op
[5], &weight_avg
[5],
1943 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1944 mc_part(h
, n
+1, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
+2, y_offset
,
1945 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1946 &weight_op
[5], &weight_avg
[5],
1947 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1950 assert(IS_SUB_4X4(sub_mb_type
));
1952 int sub_x_offset
= x_offset
+ 2*(j
&1);
1953 int sub_y_offset
= y_offset
+ (j
&2);
1954 mc_part(h
, n
+j
, 1, 2, 0, dest_y
, dest_cb
, dest_cr
, sub_x_offset
, sub_y_offset
,
1955 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1956 &weight_op
[6], &weight_avg
[6],
1957 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1963 prefetch_motion(h
, 1);
1966 static av_cold
void decode_init_vlc(void){
1967 static int done
= 0;
1974 chroma_dc_coeff_token_vlc
.table
= chroma_dc_coeff_token_vlc_table
;
1975 chroma_dc_coeff_token_vlc
.table_allocated
= chroma_dc_coeff_token_vlc_table_size
;
1976 init_vlc(&chroma_dc_coeff_token_vlc
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 4*5,
1977 &chroma_dc_coeff_token_len
[0], 1, 1,
1978 &chroma_dc_coeff_token_bits
[0], 1, 1,
1979 INIT_VLC_USE_NEW_STATIC
);
1983 coeff_token_vlc
[i
].table
= coeff_token_vlc_tables
+offset
;
1984 coeff_token_vlc
[i
].table_allocated
= coeff_token_vlc_tables_size
[i
];
1985 init_vlc(&coeff_token_vlc
[i
], COEFF_TOKEN_VLC_BITS
, 4*17,
1986 &coeff_token_len
[i
][0], 1, 1,
1987 &coeff_token_bits
[i
][0], 1, 1,
1988 INIT_VLC_USE_NEW_STATIC
);
1989 offset
+= coeff_token_vlc_tables_size
[i
];
1992 * This is a one time safety check to make sure that
1993 * the packed static coeff_token_vlc table sizes
1994 * were initialized correctly.
1996 assert(offset
== FF_ARRAY_ELEMS(coeff_token_vlc_tables
));
1999 chroma_dc_total_zeros_vlc
[i
].table
= chroma_dc_total_zeros_vlc_tables
[i
];
2000 chroma_dc_total_zeros_vlc
[i
].table_allocated
= chroma_dc_total_zeros_vlc_tables_size
;
2001 init_vlc(&chroma_dc_total_zeros_vlc
[i
],
2002 CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 4,
2003 &chroma_dc_total_zeros_len
[i
][0], 1, 1,
2004 &chroma_dc_total_zeros_bits
[i
][0], 1, 1,
2005 INIT_VLC_USE_NEW_STATIC
);
2007 for(i
=0; i
<15; i
++){
2008 total_zeros_vlc
[i
].table
= total_zeros_vlc_tables
[i
];
2009 total_zeros_vlc
[i
].table_allocated
= total_zeros_vlc_tables_size
;
2010 init_vlc(&total_zeros_vlc
[i
],
2011 TOTAL_ZEROS_VLC_BITS
, 16,
2012 &total_zeros_len
[i
][0], 1, 1,
2013 &total_zeros_bits
[i
][0], 1, 1,
2014 INIT_VLC_USE_NEW_STATIC
);
2018 run_vlc
[i
].table
= run_vlc_tables
[i
];
2019 run_vlc
[i
].table_allocated
= run_vlc_tables_size
;
2020 init_vlc(&run_vlc
[i
],
2022 &run_len
[i
][0], 1, 1,
2023 &run_bits
[i
][0], 1, 1,
2024 INIT_VLC_USE_NEW_STATIC
);
2026 run7_vlc
.table
= run7_vlc_table
,
2027 run7_vlc
.table_allocated
= run7_vlc_table_size
;
2028 init_vlc(&run7_vlc
, RUN7_VLC_BITS
, 16,
2029 &run_len
[6][0], 1, 1,
2030 &run_bits
[6][0], 1, 1,
2031 INIT_VLC_USE_NEW_STATIC
);
2035 static void free_tables(H264Context
*h
){
2038 av_freep(&h
->intra4x4_pred_mode
);
2039 av_freep(&h
->chroma_pred_mode_table
);
2040 av_freep(&h
->cbp_table
);
2041 av_freep(&h
->mvd_table
[0]);
2042 av_freep(&h
->mvd_table
[1]);
2043 av_freep(&h
->direct_table
);
2044 av_freep(&h
->non_zero_count
);
2045 av_freep(&h
->slice_table_base
);
2046 h
->slice_table
= NULL
;
2048 av_freep(&h
->mb2b_xy
);
2049 av_freep(&h
->mb2b8_xy
);
2051 for(i
= 0; i
< h
->s
.avctx
->thread_count
; i
++) {
2052 hx
= h
->thread_context
[i
];
2054 av_freep(&hx
->top_borders
[1]);
2055 av_freep(&hx
->top_borders
[0]);
2056 av_freep(&hx
->s
.obmc_scratchpad
);
2060 static void init_dequant8_coeff_table(H264Context
*h
){
2062 const int transpose
= (h
->s
.dsp
.h264_idct8_add
!= ff_h264_idct8_add_c
); //FIXME ugly
2063 h
->dequant8_coeff
[0] = h
->dequant8_buffer
[0];
2064 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[1];
2066 for(i
=0; i
<2; i
++ ){
2067 if(i
&& !memcmp(h
->pps
.scaling_matrix8
[0], h
->pps
.scaling_matrix8
[1], 64*sizeof(uint8_t))){
2068 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[0];
2072 for(q
=0; q
<52; q
++){
2073 int shift
= div6
[q
];
2076 h
->dequant8_coeff
[i
][q
][transpose
? (x
>>3)|((x
&7)<<3) : x
] =
2077 ((uint32_t)dequant8_coeff_init
[idx
][ dequant8_coeff_init_scan
[((x
>>1)&12) | (x
&3)] ] *
2078 h
->pps
.scaling_matrix8
[i
][x
]) << shift
;
2083 static void init_dequant4_coeff_table(H264Context
*h
){
2085 const int transpose
= (h
->s
.dsp
.h264_idct_add
!= ff_h264_idct_add_c
); //FIXME ugly
2086 for(i
=0; i
<6; i
++ ){
2087 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[i
];
2089 if(!memcmp(h
->pps
.scaling_matrix4
[j
], h
->pps
.scaling_matrix4
[i
], 16*sizeof(uint8_t))){
2090 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[j
];
2097 for(q
=0; q
<52; q
++){
2098 int shift
= div6
[q
] + 2;
2101 h
->dequant4_coeff
[i
][q
][transpose
? (x
>>2)|((x
<<2)&0xF) : x
] =
2102 ((uint32_t)dequant4_coeff_init
[idx
][(x
&1) + ((x
>>2)&1)] *
2103 h
->pps
.scaling_matrix4
[i
][x
]) << shift
;
2108 static void init_dequant_tables(H264Context
*h
){
2110 init_dequant4_coeff_table(h
);
2111 if(h
->pps
.transform_8x8_mode
)
2112 init_dequant8_coeff_table(h
);
2113 if(h
->sps
.transform_bypass
){
2116 h
->dequant4_coeff
[i
][0][x
] = 1<<6;
2117 if(h
->pps
.transform_8x8_mode
)
2120 h
->dequant8_coeff
[i
][0][x
] = 1<<6;
2127 * needs width/height
2129 static int alloc_tables(H264Context
*h
){
2130 MpegEncContext
* const s
= &h
->s
;
2131 const int big_mb_num
= s
->mb_stride
* (s
->mb_height
+1);
2134 CHECKED_ALLOCZ(h
->intra4x4_pred_mode
, big_mb_num
* 8 * sizeof(uint8_t))
2136 CHECKED_ALLOCZ(h
->non_zero_count
, big_mb_num
* 16 * sizeof(uint8_t))
2137 CHECKED_ALLOCZ(h
->slice_table_base
, (big_mb_num
+s
->mb_stride
) * sizeof(*h
->slice_table_base
))
2138 CHECKED_ALLOCZ(h
->cbp_table
, big_mb_num
* sizeof(uint16_t))
2140 CHECKED_ALLOCZ(h
->chroma_pred_mode_table
, big_mb_num
* sizeof(uint8_t))
2141 CHECKED_ALLOCZ(h
->mvd_table
[0], 32*big_mb_num
* sizeof(uint16_t));
2142 CHECKED_ALLOCZ(h
->mvd_table
[1], 32*big_mb_num
* sizeof(uint16_t));
2143 CHECKED_ALLOCZ(h
->direct_table
, 32*big_mb_num
* sizeof(uint8_t));
2145 memset(h
->slice_table_base
, -1, (big_mb_num
+s
->mb_stride
) * sizeof(*h
->slice_table_base
));
2146 h
->slice_table
= h
->slice_table_base
+ s
->mb_stride
*2 + 1;
2148 CHECKED_ALLOCZ(h
->mb2b_xy
, big_mb_num
* sizeof(uint32_t));
2149 CHECKED_ALLOCZ(h
->mb2b8_xy
, big_mb_num
* sizeof(uint32_t));
2150 for(y
=0; y
<s
->mb_height
; y
++){
2151 for(x
=0; x
<s
->mb_width
; x
++){
2152 const int mb_xy
= x
+ y
*s
->mb_stride
;
2153 const int b_xy
= 4*x
+ 4*y
*h
->b_stride
;
2154 const int b8_xy
= 2*x
+ 2*y
*h
->b8_stride
;
2156 h
->mb2b_xy
[mb_xy
]= b_xy
;
2157 h
->mb2b8_xy
[mb_xy
]= b8_xy
;
2161 s
->obmc_scratchpad
= NULL
;
2163 if(!h
->dequant4_coeff
[0])
2164 init_dequant_tables(h
);
2173 * Mimic alloc_tables(), but for every context thread.
2175 static void clone_tables(H264Context
*dst
, H264Context
*src
){
2176 dst
->intra4x4_pred_mode
= src
->intra4x4_pred_mode
;
2177 dst
->non_zero_count
= src
->non_zero_count
;
2178 dst
->slice_table
= src
->slice_table
;
2179 dst
->cbp_table
= src
->cbp_table
;
2180 dst
->mb2b_xy
= src
->mb2b_xy
;
2181 dst
->mb2b8_xy
= src
->mb2b8_xy
;
2182 dst
->chroma_pred_mode_table
= src
->chroma_pred_mode_table
;
2183 dst
->mvd_table
[0] = src
->mvd_table
[0];
2184 dst
->mvd_table
[1] = src
->mvd_table
[1];
2185 dst
->direct_table
= src
->direct_table
;
2187 dst
->s
.obmc_scratchpad
= NULL
;
2188 ff_h264_pred_init(&dst
->hpc
, src
->s
.codec_id
);
2193 * Allocate buffers which are not shared amongst multiple threads.
2195 static int context_init(H264Context
*h
){
2196 CHECKED_ALLOCZ(h
->top_borders
[0], h
->s
.mb_width
* (16+8+8) * sizeof(uint8_t))
2197 CHECKED_ALLOCZ(h
->top_borders
[1], h
->s
.mb_width
* (16+8+8) * sizeof(uint8_t))
2201 return -1; // free_tables will clean up for us
2204 static av_cold
void common_init(H264Context
*h
){
2205 MpegEncContext
* const s
= &h
->s
;
2207 s
->width
= s
->avctx
->width
;
2208 s
->height
= s
->avctx
->height
;
2209 s
->codec_id
= s
->avctx
->codec
->id
;
2211 ff_h264_pred_init(&h
->hpc
, s
->codec_id
);
2213 h
->dequant_coeff_pps
= -1;
2214 s
->unrestricted_mv
=1;
2215 s
->decode
=1; //FIXME
2217 memset(h
->pps
.scaling_matrix4
, 16, 6*16*sizeof(uint8_t));
2218 memset(h
->pps
.scaling_matrix8
, 16, 2*64*sizeof(uint8_t));
2221 static av_cold
int decode_init(AVCodecContext
*avctx
){
2222 H264Context
*h
= avctx
->priv_data
;
2223 MpegEncContext
* const s
= &h
->s
;
2225 MPV_decode_defaults(s
);
2230 s
->out_format
= FMT_H264
;
2231 s
->workaround_bugs
= avctx
->workaround_bugs
;
2234 // s->decode_mb= ff_h263_decode_mb;
2235 s
->quarter_sample
= 1;
2238 // Set in decode_postinit() once initial parsing is complete
2239 avctx
->pix_fmt
= PIX_FMT_NONE
;
2243 if(avctx
->extradata_size
> 0 && avctx
->extradata
&&
2244 *(char *)avctx
->extradata
== 1){
2251 h
->thread_context
[0] = h
;
2252 h
->outputed_poc
= INT_MIN
;
2253 h
->prev_poc_msb
= 1<<16;
2257 static int decode_postinit(H264Context
*h
, SPS
*sps
){
2258 AVCodecContext
* const avctx
= h
->s
.avctx
;
2260 if (avctx
->pix_fmt
!= PIX_FMT_NONE
){
2264 if (avctx
->vdpau_acceleration
) {
2265 if(h
->s
.chroma_format
>= 2) {
2268 if (sps
->profile_idc
== 66) {
2269 avctx
->pix_fmt
= avctx
->get_format(avctx
, pixfmt_vdpau_h264_baseline_420
);
2270 } else if (sps
->profile_idc
== 77) {
2271 avctx
->pix_fmt
= avctx
->get_format(avctx
, pixfmt_vdpau_h264_main_420
);
2272 } else if (sps
->profile_idc
== 100) {
2273 avctx
->pix_fmt
= avctx
->get_format(avctx
, pixfmt_vdpau_h264_high_420
);
2277 } else if (avctx
->codec_id
== CODEC_ID_SVQ3
) {
2278 avctx
->pix_fmt
= PIX_FMT_YUVJ420P
;
2280 avctx
->pix_fmt
= PIX_FMT_YUV420P
;
2286 static int frame_start(H264Context
*h
){
2287 MpegEncContext
* const s
= &h
->s
;
2291 ff_VDPAU_h264_set_reference_frames_count(h
);
2294 if(MPV_frame_start(s
, s
->avctx
) < 0)
2296 ff_er_frame_start(s
);
2298 * MPV_frame_start uses pict_type to derive key_frame.
2299 * This is incorrect for H.264; IDR markings must be used.
2300 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2301 * See decode_nal_units().
2303 s
->current_picture_ptr
->key_frame
= 0;
2305 assert(s
->linesize
&& s
->uvlinesize
);
2307 for(i
=0; i
<16; i
++){
2308 h
->block_offset
[i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
2309 h
->block_offset
[24+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
2312 h
->block_offset
[16+i
]=
2313 h
->block_offset
[20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
2314 h
->block_offset
[24+16+i
]=
2315 h
->block_offset
[24+20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
2318 /* can't be in alloc_tables because linesize isn't known there.
2319 * FIXME: redo bipred weight to not require extra buffer? */
2320 for(i
= 0; i
< s
->avctx
->thread_count
; i
++)
2321 if(!h
->thread_context
[i
]->s
.obmc_scratchpad
)
2322 h
->thread_context
[i
]->s
.obmc_scratchpad
= av_malloc(16*2*s
->linesize
+ 8*2*s
->uvlinesize
);
2324 /* some macroblocks will be accessed before they're available */
2325 if(FRAME_MBAFF
|| s
->avctx
->thread_count
> 1)
2326 memset(h
->slice_table
, -1, (s
->mb_height
*s
->mb_stride
-1) * sizeof(*h
->slice_table
));
2328 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2330 // We mark the current picture as non-reference after allocating it, so
2331 // that if we break out due to an error it can be released automatically
2332 // in the next MPV_frame_start().
2333 // SVQ3 as well as most other codecs have only last/next/current and thus
2334 // get released even with set reference, besides SVQ3 and others do not
2335 // mark frames as reference later "naturally".
2336 if(s
->codec_id
!= CODEC_ID_SVQ3
)
2337 s
->current_picture_ptr
->reference
= 0;
2339 s
->current_picture_ptr
->field_poc
[0]=
2340 s
->current_picture_ptr
->field_poc
[1]= INT_MAX
;
2341 assert(s
->current_picture_ptr
->long_ref
==0);
2346 static inline void backup_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int simple
){
2347 MpegEncContext
* const s
= &h
->s
;
2356 src_cb
-= uvlinesize
;
2357 src_cr
-= uvlinesize
;
2359 if(!simple
&& FRAME_MBAFF
){
2361 offset
= MB_MBAFF
? 1 : 17;
2362 uvoffset
= MB_MBAFF
? 1 : 9;
2364 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+ 0)= *(uint64_t*)(src_y
+ 15*linesize
);
2365 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+ 8)= *(uint64_t*)(src_y
+8+15*linesize
);
2366 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2367 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+7*uvlinesize
);
2368 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+7*uvlinesize
);
2373 h
->left_border
[0]= h
->top_borders
[0][s
->mb_x
][15];
2374 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2375 h
->left_border
[34 ]= h
->top_borders
[0][s
->mb_x
][16+7 ];
2376 h
->left_border
[34+18]= h
->top_borders
[0][s
->mb_x
][16+8+7];
2382 top_idx
= MB_MBAFF
? 0 : 1;
2384 step
= MB_MBAFF
? 2 : 1;
2387 // There are two lines saved, the line above the the top macroblock of a pair,
2388 // and the line above the bottom macroblock
2389 h
->left_border
[offset
]= h
->top_borders
[top_idx
][s
->mb_x
][15];
2390 for(i
=1; i
<17 - skiplast
; i
++){
2391 h
->left_border
[offset
+i
*step
]= src_y
[15+i
* linesize
];
2394 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 16*linesize
);
2395 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+16*linesize
);
2397 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2398 h
->left_border
[uvoffset
+34 ]= h
->top_borders
[top_idx
][s
->mb_x
][16+7];
2399 h
->left_border
[uvoffset
+34+18]= h
->top_borders
[top_idx
][s
->mb_x
][24+7];
2400 for(i
=1; i
<9 - skiplast
; i
++){
2401 h
->left_border
[uvoffset
+34 +i
*step
]= src_cb
[7+i
*uvlinesize
];
2402 h
->left_border
[uvoffset
+34+18+i
*step
]= src_cr
[7+i
*uvlinesize
];
2404 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+8*uvlinesize
);
2405 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+8*uvlinesize
);
2409 static inline void xchg_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int xchg
, int simple
){
2410 MpegEncContext
* const s
= &h
->s
;
2421 if(!simple
&& FRAME_MBAFF
){
2423 offset
= MB_MBAFF
? 1 : 17;
2424 uvoffset
= MB_MBAFF
? 1 : 9;
2428 top_idx
= MB_MBAFF
? 0 : 1;
2430 step
= MB_MBAFF
? 2 : 1;
2433 if(h
->deblocking_filter
== 2) {
2435 deblock_left
= h
->slice_table
[mb_xy
] == h
->slice_table
[mb_xy
- 1];
2436 deblock_top
= h
->slice_table
[mb_xy
] == h
->slice_table
[h
->top_mb_xy
];
2438 deblock_left
= (s
->mb_x
> 0);
2439 deblock_top
= (s
->mb_y
> !!MB_FIELD
);
2442 src_y
-= linesize
+ 1;
2443 src_cb
-= uvlinesize
+ 1;
2444 src_cr
-= uvlinesize
+ 1;
2446 #define XCHG(a,b,t,xchg)\
2453 for(i
= !deblock_top
; i
<16; i
++){
2454 XCHG(h
->left_border
[offset
+i
*step
], src_y
[i
* linesize
], temp8
, xchg
);
2456 XCHG(h
->left_border
[offset
+i
*step
], src_y
[i
* linesize
], temp8
, 1);
2460 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+0), *(uint64_t*)(src_y
+1), temp64
, xchg
);
2461 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+8), *(uint64_t*)(src_y
+9), temp64
, 1);
2462 if(s
->mb_x
+1 < s
->mb_width
){
2463 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
+1]), *(uint64_t*)(src_y
+17), temp64
, 1);
2467 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2469 for(i
= !deblock_top
; i
<8; i
++){
2470 XCHG(h
->left_border
[uvoffset
+34 +i
*step
], src_cb
[i
*uvlinesize
], temp8
, xchg
);
2471 XCHG(h
->left_border
[uvoffset
+34+18+i
*step
], src_cr
[i
*uvlinesize
], temp8
, xchg
);
2473 XCHG(h
->left_border
[uvoffset
+34 +i
*step
], src_cb
[i
*uvlinesize
], temp8
, 1);
2474 XCHG(h
->left_border
[uvoffset
+34+18+i
*step
], src_cr
[i
*uvlinesize
], temp8
, 1);
2477 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1), temp64
, 1);
2478 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1), temp64
, 1);
2483 static av_always_inline
void hl_decode_mb_internal(H264Context
*h
, int simple
){
2484 MpegEncContext
* const s
= &h
->s
;
2485 const int mb_x
= s
->mb_x
;
2486 const int mb_y
= s
->mb_y
;
2487 const int mb_xy
= h
->mb_xy
;
2488 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
2489 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
2490 int linesize
, uvlinesize
/*dct_offset*/;
2492 int *block_offset
= &h
->block_offset
[0];
2493 const int transform_bypass
= (s
->qscale
== 0 && h
->sps
.transform_bypass
), is_h264
= (simple
|| s
->codec_id
== CODEC_ID_H264
);
2494 void (*idct_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
2495 void (*idct_dc_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
2497 dest_y
= s
->current_picture
.data
[0] + (mb_y
* 16* s
->linesize
) + mb_x
* 16;
2498 dest_cb
= s
->current_picture
.data
[1] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2499 dest_cr
= s
->current_picture
.data
[2] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2501 s
->dsp
.prefetch(dest_y
+ (s
->mb_x
&3)*4*s
->linesize
+ 64, s
->linesize
, 4);
2502 s
->dsp
.prefetch(dest_cb
+ (s
->mb_x
&7)*s
->uvlinesize
+ 64, dest_cr
- dest_cb
, 2);
2504 if (!simple
&& MB_FIELD
) {
2505 linesize
= h
->mb_linesize
= s
->linesize
* 2;
2506 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
* 2;
2507 block_offset
= &h
->block_offset
[24];
2508 if(mb_y
&1){ //FIXME move out of this function?
2509 dest_y
-= s
->linesize
*15;
2510 dest_cb
-= s
->uvlinesize
*7;
2511 dest_cr
-= s
->uvlinesize
*7;
2515 for(list
=0; list
<h
->list_count
; list
++){
2516 if(!USES_LIST(mb_type
, list
))
2518 if(IS_16X16(mb_type
)){
2519 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
2520 fill_rectangle(ref
, 4, 4, 8, (16+*ref
)^(s
->mb_y
&1), 1);
2522 for(i
=0; i
<16; i
+=4){
2523 int ref
= h
->ref_cache
[list
][scan8
[i
]];
2525 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2, 8, (16+ref
)^(s
->mb_y
&1), 1);
2531 linesize
= h
->mb_linesize
= s
->linesize
;
2532 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
;
2533 // dct_offset = s->linesize * 16;
2536 if(transform_bypass
){
2538 idct_add
= IS_8x8DCT(mb_type
) ? s
->dsp
.add_pixels8
: s
->dsp
.add_pixels4
;
2539 }else if(IS_8x8DCT(mb_type
)){
2540 idct_dc_add
= s
->dsp
.h264_idct8_dc_add
;
2541 idct_add
= s
->dsp
.h264_idct8_add
;
2543 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
2544 idct_add
= s
->dsp
.h264_idct_add
;
2547 if (!simple
&& IS_INTRA_PCM(mb_type
)) {
2548 for (i
=0; i
<16; i
++) {
2549 memcpy(dest_y
+ i
* linesize
, h
->mb
+ i
*8, 16);
2551 for (i
=0; i
<8; i
++) {
2552 memcpy(dest_cb
+ i
*uvlinesize
, h
->mb
+ 128 + i
*4, 8);
2553 memcpy(dest_cr
+ i
*uvlinesize
, h
->mb
+ 160 + i
*4, 8);
2556 if(IS_INTRA(mb_type
)){
2557 if(h
->deblocking_filter
)
2558 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 1, simple
);
2560 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2561 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cb
, uvlinesize
);
2562 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cr
, uvlinesize
);
2565 if(IS_INTRA4x4(mb_type
)){
2566 if(simple
|| !s
->encoding
){
2567 if(IS_8x8DCT(mb_type
)){
2568 for(i
=0; i
<16; i
+=4){
2569 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2570 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
2571 const int nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2572 h
->hpc
.pred8x8l
[ dir
](ptr
, (h
->topleft_samples_available
<<i
)&0x8000,
2573 (h
->topright_samples_available
<<i
)&0x4000, linesize
);
2575 if(nnz
== 1 && h
->mb
[i
*16])
2576 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
2578 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
2582 for(i
=0; i
<16; i
++){
2583 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2585 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
2588 if(dir
== DIAG_DOWN_LEFT_PRED
|| dir
== VERT_LEFT_PRED
){
2589 const int topright_avail
= (h
->topright_samples_available
<<i
)&0x8000;
2590 assert(mb_y
|| linesize
<= block_offset
[i
]);
2591 if(!topright_avail
){
2592 tr
= ptr
[3 - linesize
]*0x01010101;
2593 topright
= (uint8_t*) &tr
;
2595 topright
= ptr
+ 4 - linesize
;
2599 h
->hpc
.pred4x4
[ dir
](ptr
, topright
, linesize
);
2600 nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2603 if(nnz
== 1 && h
->mb
[i
*16])
2604 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
2606 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
2608 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, 0);
2613 h
->hpc
.pred16x16
[ h
->intra16x16_pred_mode
](dest_y
, linesize
);
2615 if(!transform_bypass
)
2616 h264_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
, h
->dequant4_coeff
[0][s
->qscale
][0]);
2618 svq3_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
);
2620 if(h
->deblocking_filter
)
2621 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 0, simple
);
2623 hl_motion(h
, dest_y
, dest_cb
, dest_cr
,
2624 s
->me
.qpel_put
, s
->dsp
.put_h264_chroma_pixels_tab
,
2625 s
->me
.qpel_avg
, s
->dsp
.avg_h264_chroma_pixels_tab
,
2626 s
->dsp
.weight_h264_pixels_tab
, s
->dsp
.biweight_h264_pixels_tab
);
2630 if(!IS_INTRA4x4(mb_type
)){
2632 if(IS_INTRA16x16(mb_type
)){
2633 for(i
=0; i
<16; i
++){
2634 if(h
->non_zero_count_cache
[ scan8
[i
] ])
2635 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2636 else if(h
->mb
[i
*16])
2637 idct_dc_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2640 const int di
= IS_8x8DCT(mb_type
) ? 4 : 1;
2641 for(i
=0; i
<16; i
+=di
){
2642 int nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2644 if(nnz
==1 && h
->mb
[i
*16])
2645 idct_dc_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2647 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2652 for(i
=0; i
<16; i
++){
2653 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){ //FIXME benchmark weird rule, & below
2654 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2655 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, IS_INTRA(mb_type
) ? 1 : 0);
2661 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2662 uint8_t *dest
[2] = {dest_cb
, dest_cr
};
2663 if(transform_bypass
){
2664 idct_add
= idct_dc_add
= s
->dsp
.add_pixels4
;
2666 idct_add
= s
->dsp
.h264_idct_add
;
2667 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
2668 chroma_dc_dequant_idct_c(h
->mb
+ 16*16, h
->chroma_qp
[0], h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 1:4][h
->chroma_qp
[0]][0]);
2669 chroma_dc_dequant_idct_c(h
->mb
+ 16*16+4*16, h
->chroma_qp
[1], h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 2:5][h
->chroma_qp
[1]][0]);
2672 for(i
=16; i
<16+8; i
++){
2673 if(h
->non_zero_count_cache
[ scan8
[i
] ])
2674 idct_add(dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
2675 else if(h
->mb
[i
*16])
2676 idct_dc_add(dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
2679 for(i
=16; i
<16+8; i
++){
2680 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){
2681 uint8_t * const ptr
= dest
[(i
&4)>>2] + block_offset
[i
];
2682 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, uvlinesize
, chroma_qp
[s
->qscale
+ 12] - 12, 2);
2688 if(h
->deblocking_filter
) {
2689 backup_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, simple
);
2690 fill_caches(h
, mb_type
, 1); //FIXME don't fill stuff which isn't used by filter_mb
2691 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->current_picture
.qscale_table
[mb_xy
]);
2692 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->current_picture
.qscale_table
[mb_xy
]);
2693 if (!simple
&& FRAME_MBAFF
) {
2694 filter_mb (h
, mb_x
, mb_y
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
2696 filter_mb_fast(h
, mb_x
, mb_y
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
2702 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2704 static void hl_decode_mb_simple(H264Context
*h
){
2705 hl_decode_mb_internal(h
, 1);
2709 * Process a macroblock; this handles edge cases, such as interlacing.
2711 static void av_noinline
hl_decode_mb_complex(H264Context
*h
){
2712 hl_decode_mb_internal(h
, 0);
2715 static void hl_decode_mb(H264Context
*h
){
2716 MpegEncContext
* const s
= &h
->s
;
2717 const int mb_xy
= h
->mb_xy
;
2718 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
2719 int is_complex
= FRAME_MBAFF
|| MB_FIELD
|| IS_INTRA_PCM(mb_type
) || s
->codec_id
!= CODEC_ID_H264
||
2720 (ENABLE_GRAY
&& (s
->flags
&CODEC_FLAG_GRAY
)) || (ENABLE_H264_ENCODER
&& s
->encoding
) || ENABLE_SMALL
;
2722 if(ENABLE_H264_ENCODER
&& !s
->decode
)
2726 hl_decode_mb_complex(h
);
2727 else hl_decode_mb_simple(h
);
2730 static void pic_as_field(Picture
*pic
, const int parity
){
2732 for (i
= 0; i
< 4; ++i
) {
2733 if (parity
== PICT_BOTTOM_FIELD
)
2734 pic
->data
[i
] += pic
->linesize
[i
];
2735 pic
->reference
= parity
;
2736 pic
->linesize
[i
] *= 2;
2738 pic
->poc
= pic
->field_poc
[parity
== PICT_BOTTOM_FIELD
];
2741 static int split_field_copy(Picture
*dest
, Picture
*src
,
2742 int parity
, int id_add
){
2743 int match
= !!(src
->reference
& parity
);
2747 if(parity
!= PICT_FRAME
){
2748 pic_as_field(dest
, parity
);
2750 dest
->pic_id
+= id_add
;
2757 static int build_def_list(Picture
*def
, Picture
**in
, int len
, int is_long
, int sel
){
2761 while(i
[0]<len
|| i
[1]<len
){
2762 while(i
[0]<len
&& !(in
[ i
[0] ] && (in
[ i
[0] ]->reference
& sel
)))
2764 while(i
[1]<len
&& !(in
[ i
[1] ] && (in
[ i
[1] ]->reference
& (sel
^3))))
2767 in
[ i
[0] ]->pic_id
= is_long
? i
[0] : in
[ i
[0] ]->frame_num
;
2768 split_field_copy(&def
[index
++], in
[ i
[0]++ ], sel
, 1);
2771 in
[ i
[1] ]->pic_id
= is_long
? i
[1] : in
[ i
[1] ]->frame_num
;
2772 split_field_copy(&def
[index
++], in
[ i
[1]++ ], sel
^3, 0);
2779 static int add_sorted(Picture
**sorted
, Picture
**src
, int len
, int limit
, int dir
){
2784 best_poc
= dir
? INT_MIN
: INT_MAX
;
2786 for(i
=0; i
<len
; i
++){
2787 const int poc
= src
[i
]->poc
;
2788 if(((poc
> limit
) ^ dir
) && ((poc
< best_poc
) ^ dir
)){
2790 sorted
[out_i
]= src
[i
];
2793 if(best_poc
== (dir
? INT_MIN
: INT_MAX
))
2795 limit
= sorted
[out_i
++]->poc
- dir
;
2801 * fills the default_ref_list.
2803 static int fill_default_ref_list(H264Context
*h
){
2804 MpegEncContext
* const s
= &h
->s
;
2807 if(h
->slice_type_nos
==FF_B_TYPE
){
2808 Picture
*sorted
[32];
2813 cur_poc
= s
->current_picture_ptr
->field_poc
[ s
->picture_structure
== PICT_BOTTOM_FIELD
];
2815 cur_poc
= s
->current_picture_ptr
->poc
;
2817 for(list
= 0; list
<2; list
++){
2818 len
= add_sorted(sorted
, h
->short_ref
, h
->short_ref_count
, cur_poc
, 1^list
);
2819 len
+=add_sorted(sorted
+len
, h
->short_ref
, h
->short_ref_count
, cur_poc
, 0^list
);
2821 len
= build_def_list(h
->default_ref_list
[list
] , sorted
, len
, 0, s
->picture_structure
);
2822 len
+=build_def_list(h
->default_ref_list
[list
]+len
, h
->long_ref
, 16 , 1, s
->picture_structure
);
2825 if(len
< h
->ref_count
[list
])
2826 memset(&h
->default_ref_list
[list
][len
], 0, sizeof(Picture
)*(h
->ref_count
[list
] - len
));
2830 if(lens
[0] == lens
[1] && lens
[1] > 1){
2831 for(i
=0; h
->default_ref_list
[0][i
].data
[0] == h
->default_ref_list
[1][i
].data
[0] && i
<lens
[0]; i
++);
2833 FFSWAP(Picture
, h
->default_ref_list
[1][0], h
->default_ref_list
[1][1]);
2836 len
= build_def_list(h
->default_ref_list
[0] , h
->short_ref
, h
->short_ref_count
, 0, s
->picture_structure
);
2837 len
+= build_def_list(h
->default_ref_list
[0]+len
, h
-> long_ref
, 16 , 1, s
->picture_structure
);
2839 if(len
< h
->ref_count
[0])
2840 memset(&h
->default_ref_list
[0][len
], 0, sizeof(Picture
)*(h
->ref_count
[0] - len
));
2843 for (i
=0; i
<h
->ref_count
[0]; i
++) {
2844 tprintf(h
->s
.avctx
, "List0: %s fn:%d 0x%p\n", (h
->default_ref_list
[0][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[0][i
].pic_id
, h
->default_ref_list
[0][i
].data
[0]);
2846 if(h
->slice_type_nos
==FF_B_TYPE
){
2847 for (i
=0; i
<h
->ref_count
[1]; i
++) {
2848 tprintf(h
->s
.avctx
, "List1: %s fn:%d 0x%p\n", (h
->default_ref_list
[1][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[1][i
].pic_id
, h
->default_ref_list
[1][i
].data
[0]);
2855 static void print_short_term(H264Context
*h
);
2856 static void print_long_term(H264Context
*h
);
2859 * Extract structure information about the picture described by pic_num in
2860 * the current decoding context (frame or field). Note that pic_num is
2861 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2862 * @param pic_num picture number for which to extract structure information
2863 * @param structure one of PICT_XXX describing structure of picture
2865 * @return frame number (short term) or long term index of picture
2866 * described by pic_num
2868 static int pic_num_extract(H264Context
*h
, int pic_num
, int *structure
){
2869 MpegEncContext
* const s
= &h
->s
;
2871 *structure
= s
->picture_structure
;
2874 /* opposite field */
2875 *structure
^= PICT_FRAME
;
2882 static int decode_ref_pic_list_reordering(H264Context
*h
){
2883 MpegEncContext
* const s
= &h
->s
;
2884 int list
, index
, pic_structure
;
2886 print_short_term(h
);
2889 for(list
=0; list
<h
->list_count
; list
++){
2890 memcpy(h
->ref_list
[list
], h
->default_ref_list
[list
], sizeof(Picture
)*h
->ref_count
[list
]);
2892 if(get_bits1(&s
->gb
)){
2893 int pred
= h
->curr_pic_num
;
2895 for(index
=0; ; index
++){
2896 unsigned int reordering_of_pic_nums_idc
= get_ue_golomb(&s
->gb
);
2897 unsigned int pic_id
;
2899 Picture
*ref
= NULL
;
2901 if(reordering_of_pic_nums_idc
==3)
2904 if(index
>= h
->ref_count
[list
]){
2905 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference count overflow\n");
2909 if(reordering_of_pic_nums_idc
<3){
2910 if(reordering_of_pic_nums_idc
<2){
2911 const unsigned int abs_diff_pic_num
= get_ue_golomb(&s
->gb
) + 1;
2914 if(abs_diff_pic_num
> h
->max_pic_num
){
2915 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "abs_diff_pic_num overflow\n");
2919 if(reordering_of_pic_nums_idc
== 0) pred
-= abs_diff_pic_num
;
2920 else pred
+= abs_diff_pic_num
;
2921 pred
&= h
->max_pic_num
- 1;
2923 frame_num
= pic_num_extract(h
, pred
, &pic_structure
);
2925 for(i
= h
->short_ref_count
-1; i
>=0; i
--){
2926 ref
= h
->short_ref
[i
];
2927 assert(ref
->reference
);
2928 assert(!ref
->long_ref
);
2930 ref
->frame_num
== frame_num
&&
2931 (ref
->reference
& pic_structure
)
2939 pic_id
= get_ue_golomb(&s
->gb
); //long_term_pic_idx
2941 long_idx
= pic_num_extract(h
, pic_id
, &pic_structure
);
2944 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "long_term_pic_idx overflow\n");
2947 ref
= h
->long_ref
[long_idx
];
2948 assert(!(ref
&& !ref
->reference
));
2949 if(ref
&& (ref
->reference
& pic_structure
)){
2950 ref
->pic_id
= pic_id
;
2951 assert(ref
->long_ref
);
2959 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference picture missing during reorder\n");
2960 memset(&h
->ref_list
[list
][index
], 0, sizeof(Picture
)); //FIXME
2962 for(i
=index
; i
+1<h
->ref_count
[list
]; i
++){
2963 if(ref
->long_ref
== h
->ref_list
[list
][i
].long_ref
&& ref
->pic_id
== h
->ref_list
[list
][i
].pic_id
)
2966 for(; i
> index
; i
--){
2967 h
->ref_list
[list
][i
]= h
->ref_list
[list
][i
-1];
2969 h
->ref_list
[list
][index
]= *ref
;
2971 pic_as_field(&h
->ref_list
[list
][index
], pic_structure
);
2975 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal reordering_of_pic_nums_idc\n");
2981 for(list
=0; list
<h
->list_count
; list
++){
2982 for(index
= 0; index
< h
->ref_count
[list
]; index
++){
2983 if(!h
->ref_list
[list
][index
].data
[0]){
2984 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "Missing reference picture\n");
2985 h
->ref_list
[list
][index
]= s
->current_picture
; //FIXME this is not a sensible solution
2993 static void fill_mbaff_ref_list(H264Context
*h
){
2995 for(list
=0; list
<2; list
++){ //FIXME try list_count
2996 for(i
=0; i
<h
->ref_count
[list
]; i
++){
2997 Picture
*frame
= &h
->ref_list
[list
][i
];
2998 Picture
*field
= &h
->ref_list
[list
][16+2*i
];
3001 field
[0].linesize
[j
] <<= 1;
3002 field
[0].reference
= PICT_TOP_FIELD
;
3003 field
[0].poc
= field
[0].field_poc
[0];
3004 field
[1] = field
[0];
3006 field
[1].data
[j
] += frame
->linesize
[j
];
3007 field
[1].reference
= PICT_BOTTOM_FIELD
;
3008 field
[1].poc
= field
[1].field_poc
[1];
3010 h
->luma_weight
[list
][16+2*i
] = h
->luma_weight
[list
][16+2*i
+1] = h
->luma_weight
[list
][i
];
3011 h
->luma_offset
[list
][16+2*i
] = h
->luma_offset
[list
][16+2*i
+1] = h
->luma_offset
[list
][i
];
3013 h
->chroma_weight
[list
][16+2*i
][j
] = h
->chroma_weight
[list
][16+2*i
+1][j
] = h
->chroma_weight
[list
][i
][j
];
3014 h
->chroma_offset
[list
][16+2*i
][j
] = h
->chroma_offset
[list
][16+2*i
+1][j
] = h
->chroma_offset
[list
][i
][j
];
3018 for(j
=0; j
<h
->ref_count
[1]; j
++){
3019 for(i
=0; i
<h
->ref_count
[0]; i
++)
3020 h
->implicit_weight
[j
][16+2*i
] = h
->implicit_weight
[j
][16+2*i
+1] = h
->implicit_weight
[j
][i
];
3021 memcpy(h
->implicit_weight
[16+2*j
], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
3022 memcpy(h
->implicit_weight
[16+2*j
+1], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
3026 static int pred_weight_table(H264Context
*h
){
3027 MpegEncContext
* const s
= &h
->s
;
3029 int luma_def
, chroma_def
;
3032 h
->use_weight_chroma
= 0;
3033 h
->luma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
3034 h
->chroma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
3035 luma_def
= 1<<h
->luma_log2_weight_denom
;
3036 chroma_def
= 1<<h
->chroma_log2_weight_denom
;
3038 for(list
=0; list
<2; list
++){
3039 for(i
=0; i
<h
->ref_count
[list
]; i
++){
3040 int luma_weight_flag
, chroma_weight_flag
;
3042 luma_weight_flag
= get_bits1(&s
->gb
);
3043 if(luma_weight_flag
){
3044 h
->luma_weight
[list
][i
]= get_se_golomb(&s
->gb
);
3045 h
->luma_offset
[list
][i
]= get_se_golomb(&s
->gb
);
3046 if( h
->luma_weight
[list
][i
] != luma_def
3047 || h
->luma_offset
[list
][i
] != 0)
3050 h
->luma_weight
[list
][i
]= luma_def
;
3051 h
->luma_offset
[list
][i
]= 0;
3055 chroma_weight_flag
= get_bits1(&s
->gb
);
3056 if(chroma_weight_flag
){
3059 h
->chroma_weight
[list
][i
][j
]= get_se_golomb(&s
->gb
);
3060 h
->chroma_offset
[list
][i
][j
]= get_se_golomb(&s
->gb
);
3061 if( h
->chroma_weight
[list
][i
][j
] != chroma_def
3062 || h
->chroma_offset
[list
][i
][j
] != 0)
3063 h
->use_weight_chroma
= 1;
3068 h
->chroma_weight
[list
][i
][j
]= chroma_def
;
3069 h
->chroma_offset
[list
][i
][j
]= 0;
3074 if(h
->slice_type_nos
!= FF_B_TYPE
) break;
3076 h
->use_weight
= h
->use_weight
|| h
->use_weight_chroma
;
3080 static void implicit_weight_table(H264Context
*h
){
3081 MpegEncContext
* const s
= &h
->s
;
3083 int cur_poc
= s
->current_picture_ptr
->poc
;
3085 if( h
->ref_count
[0] == 1 && h
->ref_count
[1] == 1
3086 && h
->ref_list
[0][0].poc
+ h
->ref_list
[1][0].poc
== 2*cur_poc
){
3088 h
->use_weight_chroma
= 0;
3093 h
->use_weight_chroma
= 2;
3094 h
->luma_log2_weight_denom
= 5;
3095 h
->chroma_log2_weight_denom
= 5;
3097 for(ref0
=0; ref0
< h
->ref_count
[0]; ref0
++){
3098 int poc0
= h
->ref_list
[0][ref0
].poc
;
3099 for(ref1
=0; ref1
< h
->ref_count
[1]; ref1
++){
3100 int poc1
= h
->ref_list
[1][ref1
].poc
;
3101 int td
= av_clip(poc1
- poc0
, -128, 127);
3103 int tb
= av_clip(cur_poc
- poc0
, -128, 127);
3104 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
3105 int dist_scale_factor
= av_clip((tb
*tx
+ 32) >> 6, -1024, 1023) >> 2;
3106 if(dist_scale_factor
< -64 || dist_scale_factor
> 128)
3107 h
->implicit_weight
[ref0
][ref1
] = 32;
3109 h
->implicit_weight
[ref0
][ref1
] = 64 - dist_scale_factor
;
3111 h
->implicit_weight
[ref0
][ref1
] = 32;
3117 * Mark a picture as no longer needed for reference. The refmask
3118 * argument allows unreferencing of individual fields or the whole frame.
3119 * If the picture becomes entirely unreferenced, but is being held for
3120 * display purposes, it is marked as such.
3121 * @param refmask mask of fields to unreference; the mask is bitwise
3122 * anded with the reference marking of pic
3123 * @return non-zero if pic becomes entirely unreferenced (except possibly
3124 * for display purposes) zero if one of the fields remains in
3127 static inline int unreference_pic(H264Context
*h
, Picture
*pic
, int refmask
){
3129 if (pic
->reference
&= refmask
) {
3132 for(i
= 0; h
->delayed_pic
[i
]; i
++)
3133 if(pic
== h
->delayed_pic
[i
]){
3134 pic
->reference
=DELAYED_PIC_REF
;
3142 * instantaneous decoder refresh.
3144 static void idr(H264Context
*h
){
3147 for(i
=0; i
<16; i
++){
3148 remove_long(h
, i
, 0);
3150 assert(h
->long_ref_count
==0);
3152 for(i
=0; i
<h
->short_ref_count
; i
++){
3153 unreference_pic(h
, h
->short_ref
[i
], 0);
3154 h
->short_ref
[i
]= NULL
;
3156 h
->short_ref_count
=0;
3157 h
->prev_frame_num
= 0;
3158 h
->prev_frame_num_offset
= 0;
3163 /* forget old pics after a seek */
3164 static void flush_dpb(AVCodecContext
*avctx
){
3165 H264Context
*h
= avctx
->priv_data
;
3167 for(i
=0; i
<MAX_DELAYED_PIC_COUNT
; i
++) {
3168 if(h
->delayed_pic
[i
])
3169 h
->delayed_pic
[i
]->reference
= 0;
3170 h
->delayed_pic
[i
]= NULL
;
3172 h
->outputed_poc
= INT_MIN
;
3174 if(h
->s
.current_picture_ptr
)
3175 h
->s
.current_picture_ptr
->reference
= 0;
3176 h
->s
.first_field
= 0;
3177 ff_mpeg_flush(avctx
);
3181 * Find a Picture in the short term reference list by frame number.
3182 * @param frame_num frame number to search for
3183 * @param idx the index into h->short_ref where returned picture is found
3184 * undefined if no picture found.
3185 * @return pointer to the found picture, or NULL if no pic with the provided
3186 * frame number is found
3188 static Picture
* find_short(H264Context
*h
, int frame_num
, int *idx
){
3189 MpegEncContext
* const s
= &h
->s
;
3192 for(i
=0; i
<h
->short_ref_count
; i
++){
3193 Picture
*pic
= h
->short_ref
[i
];
3194 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3195 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d %d %p\n", i
, pic
->frame_num
, pic
);
3196 if(pic
->frame_num
== frame_num
) {
3205 * Remove a picture from the short term reference list by its index in
3206 * that list. This does no checking on the provided index; it is assumed
3207 * to be valid. Other list entries are shifted down.
3208 * @param i index into h->short_ref of picture to remove.
3210 static void remove_short_at_index(H264Context
*h
, int i
){
3211 assert(i
>= 0 && i
< h
->short_ref_count
);
3212 h
->short_ref
[i
]= NULL
;
3213 if (--h
->short_ref_count
)
3214 memmove(&h
->short_ref
[i
], &h
->short_ref
[i
+1], (h
->short_ref_count
- i
)*sizeof(Picture
*));
3219 * @return the removed picture or NULL if an error occurs
3221 static Picture
* remove_short(H264Context
*h
, int frame_num
, int ref_mask
){
3222 MpegEncContext
* const s
= &h
->s
;
3226 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3227 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "remove short %d count %d\n", frame_num
, h
->short_ref_count
);
3229 pic
= find_short(h
, frame_num
, &i
);
3231 if(unreference_pic(h
, pic
, ref_mask
))
3232 remove_short_at_index(h
, i
);
3239 * Remove a picture from the long term reference list by its index in
3241 * @return the removed picture or NULL if an error occurs
3243 static Picture
* remove_long(H264Context
*h
, int i
, int ref_mask
){
3246 pic
= h
->long_ref
[i
];
3248 if(unreference_pic(h
, pic
, ref_mask
)){
3249 assert(h
->long_ref
[i
]->long_ref
== 1);
3250 h
->long_ref
[i
]->long_ref
= 0;
3251 h
->long_ref
[i
]= NULL
;
3252 h
->long_ref_count
--;
3260 * print short term list
3262 static void print_short_term(H264Context
*h
) {
3264 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3265 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "short term list:\n");
3266 for(i
=0; i
<h
->short_ref_count
; i
++){
3267 Picture
*pic
= h
->short_ref
[i
];
3268 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3274 * print long term list
3276 static void print_long_term(H264Context
*h
) {
3278 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3279 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "long term list:\n");
3280 for(i
= 0; i
< 16; i
++){
3281 Picture
*pic
= h
->long_ref
[i
];
3283 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3290 * Executes the reference picture marking (memory management control operations).
3292 static int execute_ref_pic_marking(H264Context
*h
, MMCO
*mmco
, int mmco_count
){
3293 MpegEncContext
* const s
= &h
->s
;
3295 int current_ref_assigned
=0;
3298 if((s
->avctx
->debug
&FF_DEBUG_MMCO
) && mmco_count
==0)
3299 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "no mmco here\n");
3301 for(i
=0; i
<mmco_count
; i
++){
3302 int structure
, frame_num
;
3303 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3304 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco:%d %d %d\n", h
->mmco
[i
].opcode
, h
->mmco
[i
].short_pic_num
, h
->mmco
[i
].long_arg
);
3306 if( mmco
[i
].opcode
== MMCO_SHORT2UNUSED
3307 || mmco
[i
].opcode
== MMCO_SHORT2LONG
){
3308 frame_num
= pic_num_extract(h
, mmco
[i
].short_pic_num
, &structure
);
3309 pic
= find_short(h
, frame_num
, &j
);
3311 if(mmco
[i
].opcode
!= MMCO_SHORT2LONG
|| !h
->long_ref
[mmco
[i
].long_arg
]
3312 || h
->long_ref
[mmco
[i
].long_arg
]->frame_num
!= frame_num
)
3313 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mmco: unref short failure\n");
3318 switch(mmco
[i
].opcode
){
3319 case MMCO_SHORT2UNUSED
:
3320 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3321 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref short %d count %d\n", h
->mmco
[i
].short_pic_num
, h
->short_ref_count
);
3322 remove_short(h
, frame_num
, structure
^ PICT_FRAME
);
3324 case MMCO_SHORT2LONG
:
3325 if (h
->long_ref
[mmco
[i
].long_arg
] != pic
)
3326 remove_long(h
, mmco
[i
].long_arg
, 0);
3328 remove_short_at_index(h
, j
);
3329 h
->long_ref
[ mmco
[i
].long_arg
]= pic
;
3330 if (h
->long_ref
[ mmco
[i
].long_arg
]){
3331 h
->long_ref
[ mmco
[i
].long_arg
]->long_ref
=1;
3332 h
->long_ref_count
++;
3335 case MMCO_LONG2UNUSED
:
3336 j
= pic_num_extract(h
, mmco
[i
].long_arg
, &structure
);
3337 pic
= h
->long_ref
[j
];
3339 remove_long(h
, j
, structure
^ PICT_FRAME
);
3340 } else if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3341 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref long failure\n");
3344 // Comment below left from previous code as it is an interresting note.
3345 /* First field in pair is in short term list or
3346 * at a different long term index.
3347 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3348 * Report the problem and keep the pair where it is,
3349 * and mark this field valid.
3352 if (h
->long_ref
[mmco
[i
].long_arg
] != s
->current_picture_ptr
) {
3353 remove_long(h
, mmco
[i
].long_arg
, 0);
3355 h
->long_ref
[ mmco
[i
].long_arg
]= s
->current_picture_ptr
;
3356 h
->long_ref
[ mmco
[i
].long_arg
]->long_ref
=1;
3357 h
->long_ref_count
++;
3360 s
->current_picture_ptr
->reference
|= s
->picture_structure
;
3361 current_ref_assigned
=1;
3363 case MMCO_SET_MAX_LONG
:
3364 assert(mmco
[i
].long_arg
<= 16);
3365 // just remove the long term which index is greater than new max
3366 for(j
= mmco
[i
].long_arg
; j
<16; j
++){
3367 remove_long(h
, j
, 0);
3371 while(h
->short_ref_count
){
3372 remove_short(h
, h
->short_ref
[0]->frame_num
, 0);
3374 for(j
= 0; j
< 16; j
++) {
3375 remove_long(h
, j
, 0);
3377 s
->current_picture_ptr
->poc
=
3378 s
->current_picture_ptr
->field_poc
[0]=
3379 s
->current_picture_ptr
->field_poc
[1]=
3383 s
->current_picture_ptr
->frame_num
= 0;
3389 if (!current_ref_assigned
) {
3390 /* Second field of complementary field pair; the first field of
3391 * which is already referenced. If short referenced, it
3392 * should be first entry in short_ref. If not, it must exist
3393 * in long_ref; trying to put it on the short list here is an
3394 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3396 if (h
->short_ref_count
&& h
->short_ref
[0] == s
->current_picture_ptr
) {
3397 /* Just mark the second field valid */
3398 s
->current_picture_ptr
->reference
= PICT_FRAME
;
3399 } else if (s
->current_picture_ptr
->long_ref
) {
3400 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term reference "
3401 "assignment for second field "
3402 "in complementary field pair "
3403 "(first field is long term)\n");
3405 pic
= remove_short(h
, s
->current_picture_ptr
->frame_num
, 0);
3407 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term buffer state detected\n");
3410 if(h
->short_ref_count
)
3411 memmove(&h
->short_ref
[1], &h
->short_ref
[0], h
->short_ref_count
*sizeof(Picture
*));
3413 h
->short_ref
[0]= s
->current_picture_ptr
;
3414 h
->short_ref_count
++;
3415 s
->current_picture_ptr
->reference
|= s
->picture_structure
;
3419 if (h
->long_ref_count
+ h
->short_ref_count
> h
->sps
.ref_frame_count
){
3421 /* We have too many reference frames, probably due to corrupted
3422 * stream. Need to discard one frame. Prevents overrun of the
3423 * short_ref and long_ref buffers.
3425 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3426 "number of reference frames exceeds max (probably "
3427 "corrupt input), discarding one\n");
3429 if (h
->long_ref_count
&& !h
->short_ref_count
) {
3430 for (i
= 0; i
< 16; ++i
)
3435 remove_long(h
, i
, 0);
3437 pic
= h
->short_ref
[h
->short_ref_count
- 1];
3438 remove_short(h
, pic
->frame_num
, 0);
3442 print_short_term(h
);
3447 static int decode_ref_pic_marking(H264Context
*h
, GetBitContext
*gb
){
3448 MpegEncContext
* const s
= &h
->s
;
3452 if(h
->nal_unit_type
== NAL_IDR_SLICE
){ //FIXME fields
3453 s
->broken_link
= get_bits1(gb
) -1;
3455 h
->mmco
[0].opcode
= MMCO_LONG
;
3456 h
->mmco
[0].long_arg
= 0;
3460 if(get_bits1(gb
)){ // adaptive_ref_pic_marking_mode_flag
3461 for(i
= 0; i
<MAX_MMCO_COUNT
; i
++) {
3462 MMCOOpcode opcode
= get_ue_golomb(gb
);
3464 h
->mmco
[i
].opcode
= opcode
;
3465 if(opcode
==MMCO_SHORT2UNUSED
|| opcode
==MMCO_SHORT2LONG
){
3466 h
->mmco
[i
].short_pic_num
= (h
->curr_pic_num
- get_ue_golomb(gb
) - 1) & (h
->max_pic_num
- 1);
3467 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3468 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3472 if(opcode
==MMCO_SHORT2LONG
|| opcode
==MMCO_LONG2UNUSED
|| opcode
==MMCO_LONG
|| opcode
==MMCO_SET_MAX_LONG
){
3473 unsigned int long_arg
= get_ue_golomb(gb
);
3474 if(long_arg
>= 32 || (long_arg
>= 16 && !(opcode
== MMCO_LONG2UNUSED
&& FIELD_PICTURE
))){
3475 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal long ref in memory management control operation %d\n", opcode
);
3478 h
->mmco
[i
].long_arg
= long_arg
;
3481 if(opcode
> (unsigned)MMCO_LONG
){
3482 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal memory management control operation %d\n", opcode
);
3485 if(opcode
== MMCO_END
)
3490 assert(h
->long_ref_count
+ h
->short_ref_count
<= h
->sps
.ref_frame_count
);
3492 if(h
->short_ref_count
&& h
->long_ref_count
+ h
->short_ref_count
== h
->sps
.ref_frame_count
&&
3493 !(FIELD_PICTURE
&& !s
->first_field
&& s
->current_picture_ptr
->reference
)) {
3494 h
->mmco
[0].opcode
= MMCO_SHORT2UNUSED
;
3495 h
->mmco
[0].short_pic_num
= h
->short_ref
[ h
->short_ref_count
- 1 ]->frame_num
;
3497 if (FIELD_PICTURE
) {
3498 h
->mmco
[0].short_pic_num
*= 2;
3499 h
->mmco
[1].opcode
= MMCO_SHORT2UNUSED
;
3500 h
->mmco
[1].short_pic_num
= h
->mmco
[0].short_pic_num
+ 1;
3510 static int init_poc(H264Context
*h
){
3511 MpegEncContext
* const s
= &h
->s
;
3512 const int max_frame_num
= 1<<h
->sps
.log2_max_frame_num
;
3514 Picture
*cur
= s
->current_picture_ptr
;
3516 h
->frame_num_offset
= h
->prev_frame_num_offset
;
3517 if(h
->frame_num
< h
->prev_frame_num
)
3518 h
->frame_num_offset
+= max_frame_num
;
3520 if(h
->sps
.poc_type
==0){
3521 const int max_poc_lsb
= 1<<h
->sps
.log2_max_poc_lsb
;
3523 if (h
->poc_lsb
< h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
>= max_poc_lsb
/2)
3524 h
->poc_msb
= h
->prev_poc_msb
+ max_poc_lsb
;
3525 else if(h
->poc_lsb
> h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
< -max_poc_lsb
/2)
3526 h
->poc_msb
= h
->prev_poc_msb
- max_poc_lsb
;
3528 h
->poc_msb
= h
->prev_poc_msb
;
3529 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3531 field_poc
[1] = h
->poc_msb
+ h
->poc_lsb
;
3532 if(s
->picture_structure
== PICT_FRAME
)
3533 field_poc
[1] += h
->delta_poc_bottom
;
3534 }else if(h
->sps
.poc_type
==1){
3535 int abs_frame_num
, expected_delta_per_poc_cycle
, expectedpoc
;
3538 if(h
->sps
.poc_cycle_length
!= 0)
3539 abs_frame_num
= h
->frame_num_offset
+ h
->frame_num
;
3543 if(h
->nal_ref_idc
==0 && abs_frame_num
> 0)
3546 expected_delta_per_poc_cycle
= 0;
3547 for(i
=0; i
< h
->sps
.poc_cycle_length
; i
++)
3548 expected_delta_per_poc_cycle
+= h
->sps
.offset_for_ref_frame
[ i
]; //FIXME integrate during sps parse
3550 if(abs_frame_num
> 0){
3551 int poc_cycle_cnt
= (abs_frame_num
- 1) / h
->sps
.poc_cycle_length
;
3552 int frame_num_in_poc_cycle
= (abs_frame_num
- 1) % h
->sps
.poc_cycle_length
;
3554 expectedpoc
= poc_cycle_cnt
* expected_delta_per_poc_cycle
;
3555 for(i
= 0; i
<= frame_num_in_poc_cycle
; i
++)
3556 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_ref_frame
[ i
];
3560 if(h
->nal_ref_idc
== 0)
3561 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_non_ref_pic
;
3563 field_poc
[0] = expectedpoc
+ h
->delta_poc
[0];
3564 field_poc
[1] = field_poc
[0] + h
->sps
.offset_for_top_to_bottom_field
;
3566 if(s
->picture_structure
== PICT_FRAME
)
3567 field_poc
[1] += h
->delta_poc
[1];
3569 int poc
= 2*(h
->frame_num_offset
+ h
->frame_num
);
3578 if(s
->picture_structure
!= PICT_BOTTOM_FIELD
)
3579 s
->current_picture_ptr
->field_poc
[0]= field_poc
[0];
3580 if(s
->picture_structure
!= PICT_TOP_FIELD
)
3581 s
->current_picture_ptr
->field_poc
[1]= field_poc
[1];
3582 cur
->poc
= FFMIN(cur
->field_poc
[0], cur
->field_poc
[1]);
3589 * initialize scan tables
3591 static void init_scan_tables(H264Context
*h
){
3592 MpegEncContext
* const s
= &h
->s
;
3594 if(s
->dsp
.h264_idct_add
== ff_h264_idct_add_c
){ //FIXME little ugly
3595 memcpy(h
->zigzag_scan
, zigzag_scan
, 16*sizeof(uint8_t));
3596 memcpy(h
-> field_scan
, field_scan
, 16*sizeof(uint8_t));
3598 for(i
=0; i
<16; i
++){
3599 #define T(x) (x>>2) | ((x<<2) & 0xF)
3600 h
->zigzag_scan
[i
] = T(zigzag_scan
[i
]);
3601 h
-> field_scan
[i
] = T( field_scan
[i
]);
3605 if(s
->dsp
.h264_idct8_add
== ff_h264_idct8_add_c
){
3606 memcpy(h
->zigzag_scan8x8
, zigzag_scan8x8
, 64*sizeof(uint8_t));
3607 memcpy(h
->zigzag_scan8x8_cavlc
, zigzag_scan8x8_cavlc
, 64*sizeof(uint8_t));
3608 memcpy(h
->field_scan8x8
, field_scan8x8
, 64*sizeof(uint8_t));
3609 memcpy(h
->field_scan8x8_cavlc
, field_scan8x8_cavlc
, 64*sizeof(uint8_t));
3611 for(i
=0; i
<64; i
++){
3612 #define T(x) (x>>3) | ((x&7)<<3)
3613 h
->zigzag_scan8x8
[i
] = T(zigzag_scan8x8
[i
]);
3614 h
->zigzag_scan8x8_cavlc
[i
] = T(zigzag_scan8x8_cavlc
[i
]);
3615 h
->field_scan8x8
[i
] = T(field_scan8x8
[i
]);
3616 h
->field_scan8x8_cavlc
[i
] = T(field_scan8x8_cavlc
[i
]);
3620 if(h
->sps
.transform_bypass
){ //FIXME same ugly
3621 h
->zigzag_scan_q0
= zigzag_scan
;
3622 h
->zigzag_scan8x8_q0
= zigzag_scan8x8
;
3623 h
->zigzag_scan8x8_cavlc_q0
= zigzag_scan8x8_cavlc
;
3624 h
->field_scan_q0
= field_scan
;
3625 h
->field_scan8x8_q0
= field_scan8x8
;
3626 h
->field_scan8x8_cavlc_q0
= field_scan8x8_cavlc
;
3628 h
->zigzag_scan_q0
= h
->zigzag_scan
;
3629 h
->zigzag_scan8x8_q0
= h
->zigzag_scan8x8
;
3630 h
->zigzag_scan8x8_cavlc_q0
= h
->zigzag_scan8x8_cavlc
;
3631 h
->field_scan_q0
= h
->field_scan
;
3632 h
->field_scan8x8_q0
= h
->field_scan8x8
;
3633 h
->field_scan8x8_cavlc_q0
= h
->field_scan8x8_cavlc
;
3638 * Replicates H264 "master" context to thread contexts.
3640 static void clone_slice(H264Context
*dst
, H264Context
*src
)
3642 memcpy(dst
->block_offset
, src
->block_offset
, sizeof(dst
->block_offset
));
3643 dst
->s
.current_picture_ptr
= src
->s
.current_picture_ptr
;
3644 dst
->s
.current_picture
= src
->s
.current_picture
;
3645 dst
->s
.linesize
= src
->s
.linesize
;
3646 dst
->s
.uvlinesize
= src
->s
.uvlinesize
;
3647 dst
->s
.first_field
= src
->s
.first_field
;
3649 dst
->prev_poc_msb
= src
->prev_poc_msb
;
3650 dst
->prev_poc_lsb
= src
->prev_poc_lsb
;
3651 dst
->prev_frame_num_offset
= src
->prev_frame_num_offset
;
3652 dst
->prev_frame_num
= src
->prev_frame_num
;
3653 dst
->short_ref_count
= src
->short_ref_count
;
3655 memcpy(dst
->short_ref
, src
->short_ref
, sizeof(dst
->short_ref
));
3656 memcpy(dst
->long_ref
, src
->long_ref
, sizeof(dst
->long_ref
));
3657 memcpy(dst
->default_ref_list
, src
->default_ref_list
, sizeof(dst
->default_ref_list
));
3658 memcpy(dst
->ref_list
, src
->ref_list
, sizeof(dst
->ref_list
));
3660 memcpy(dst
->dequant4_coeff
, src
->dequant4_coeff
, sizeof(src
->dequant4_coeff
));
3661 memcpy(dst
->dequant8_coeff
, src
->dequant8_coeff
, sizeof(src
->dequant8_coeff
));
3665 * decodes a slice header.
3666 * This will also call MPV_common_init() and frame_start() as needed.
3668 * @param h h264context
3669 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3671 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3673 static int decode_slice_header(H264Context
*h
, H264Context
*h0
){
3674 MpegEncContext
* const s
= &h
->s
;
3675 MpegEncContext
* const s0
= &h0
->s
;
3676 unsigned int first_mb_in_slice
;
3677 unsigned int pps_id
;
3678 int num_ref_idx_active_override_flag
;
3679 unsigned int slice_type
, tmp
, i
, j
;
3680 int default_ref_list_done
= 0;
3681 int last_pic_structure
;
3683 s
->dropable
= h
->nal_ref_idc
== 0;
3685 if((s
->avctx
->flags2
& CODEC_FLAG2_FAST
) && !h
->nal_ref_idc
){
3686 s
->me
.qpel_put
= s
->dsp
.put_2tap_qpel_pixels_tab
;
3687 s
->me
.qpel_avg
= s
->dsp
.avg_2tap_qpel_pixels_tab
;
3689 s
->me
.qpel_put
= s
->dsp
.put_h264_qpel_pixels_tab
;
3690 s
->me
.qpel_avg
= s
->dsp
.avg_h264_qpel_pixels_tab
;
3693 first_mb_in_slice
= get_ue_golomb(&s
->gb
);
3695 if((s
->flags2
& CODEC_FLAG2_CHUNKS
) && first_mb_in_slice
== 0){
3696 h0
->current_slice
= 0;
3697 if (!s0
->first_field
)
3698 s
->current_picture_ptr
= NULL
;
3701 slice_type
= get_ue_golomb(&s
->gb
);
3703 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "slice type too large (%d) at %d %d\n", h
->slice_type
, s
->mb_x
, s
->mb_y
);
3708 h
->slice_type_fixed
=1;
3710 h
->slice_type_fixed
=0;
3712 slice_type
= golomb_to_pict_type
[ slice_type
];
3713 if (slice_type
== FF_I_TYPE
3714 || (h0
->current_slice
!= 0 && slice_type
== h0
->last_slice_type
) ) {
3715 default_ref_list_done
= 1;
3717 h
->slice_type
= slice_type
;
3718 h
->slice_type_nos
= slice_type
& 3;
3720 s
->pict_type
= h
->slice_type
; // to make a few old functions happy, it's wrong though
3721 if (s
->pict_type
== FF_B_TYPE
&& s0
->last_picture_ptr
== NULL
) {
3722 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3723 "B picture before any references, skipping\n");
3727 pps_id
= get_ue_golomb(&s
->gb
);
3728 if(pps_id
>=MAX_PPS_COUNT
){
3729 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "pps_id out of range\n");
3732 if(!h0
->pps_buffers
[pps_id
]) {
3733 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non-existing PPS referenced\n");
3736 h
->pps
= *h0
->pps_buffers
[pps_id
];
3738 if(!h0
->sps_buffers
[h
->pps
.sps_id
]) {
3739 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non-existing SPS referenced\n");
3742 h
->sps
= *h0
->sps_buffers
[h
->pps
.sps_id
];
3744 if(h
== h0
&& h
->dequant_coeff_pps
!= pps_id
){
3745 h
->dequant_coeff_pps
= pps_id
;
3746 init_dequant_tables(h
);
3749 s
->mb_width
= h
->sps
.mb_width
;
3750 s
->mb_height
= h
->sps
.mb_height
* (2 - h
->sps
.frame_mbs_only_flag
);
3752 h
->b_stride
= s
->mb_width
*4;
3753 h
->b8_stride
= s
->mb_width
*2;
3755 s
->width
= 16*s
->mb_width
- 2*FFMIN(h
->sps
.crop_right
, 7);
3756 if(h
->sps
.frame_mbs_only_flag
)
3757 s
->height
= 16*s
->mb_height
- 2*FFMIN(h
->sps
.crop_bottom
, 7);
3759 s
->height
= 16*s
->mb_height
- 4*FFMIN(h
->sps
.crop_bottom
, 3);
3761 if (s
->context_initialized
3762 && ( s
->width
!= s
->avctx
->width
|| s
->height
!= s
->avctx
->height
)) {
3764 return -1; // width / height changed during parallelized decoding
3768 if (!s
->context_initialized
) {
3770 return -1; // we cant (re-)initialize context during parallel decoding
3771 if (MPV_common_init(s
) < 0)
3775 init_scan_tables(h
);
3778 for(i
= 1; i
< s
->avctx
->thread_count
; i
++) {
3780 c
= h
->thread_context
[i
] = av_malloc(sizeof(H264Context
));
3781 memcpy(c
, h
->s
.thread_context
[i
], sizeof(MpegEncContext
));
3782 memset(&c
->s
+ 1, 0, sizeof(H264Context
) - sizeof(MpegEncContext
));
3785 init_scan_tables(c
);
3789 for(i
= 0; i
< s
->avctx
->thread_count
; i
++)
3790 if(context_init(h
->thread_context
[i
]) < 0)
3793 s
->avctx
->width
= s
->width
;
3794 s
->avctx
->height
= s
->height
;
3795 s
->avctx
->sample_aspect_ratio
= h
->sps
.sar
;
3796 if(!s
->avctx
->sample_aspect_ratio
.den
)
3797 s
->avctx
->sample_aspect_ratio
.den
= 1;
3799 if(h
->sps
.timing_info_present_flag
){
3800 s
->avctx
->time_base
= (AVRational
){h
->sps
.num_units_in_tick
* 2, h
->sps
.time_scale
};
3801 if(h
->x264_build
> 0 && h
->x264_build
< 44)
3802 s
->avctx
->time_base
.den
*= 2;
3803 av_reduce(&s
->avctx
->time_base
.num
, &s
->avctx
->time_base
.den
,
3804 s
->avctx
->time_base
.num
, s
->avctx
->time_base
.den
, 1<<30);
3808 h
->frame_num
= get_bits(&s
->gb
, h
->sps
.log2_max_frame_num
);
3811 h
->mb_aff_frame
= 0;
3812 last_pic_structure
= s0
->picture_structure
;
3813 if(h
->sps
.frame_mbs_only_flag
){
3814 s
->picture_structure
= PICT_FRAME
;
3816 if(get_bits1(&s
->gb
)) { //field_pic_flag
3817 s
->picture_structure
= PICT_TOP_FIELD
+ get_bits1(&s
->gb
); //bottom_field_flag
3819 s
->picture_structure
= PICT_FRAME
;
3820 h
->mb_aff_frame
= h
->sps
.mb_aff
;
3823 h
->mb_field_decoding_flag
= s
->picture_structure
!= PICT_FRAME
;
3825 if(h0
->current_slice
== 0){
3826 while(h
->frame_num
!= h
->prev_frame_num
&&
3827 h
->frame_num
!= (h
->prev_frame_num
+1)%(1<<h
->sps
.log2_max_frame_num
)){
3828 av_log(NULL
, AV_LOG_DEBUG
, "Frame num gap %d %d\n", h
->frame_num
, h
->prev_frame_num
);
3830 h
->prev_frame_num
++;
3831 h
->prev_frame_num
%= 1<<h
->sps
.log2_max_frame_num
;
3832 s
->current_picture_ptr
->frame_num
= h
->prev_frame_num
;
3833 execute_ref_pic_marking(h
, NULL
, 0);
3836 /* See if we have a decoded first field looking for a pair... */
3837 if (s0
->first_field
) {
3838 assert(s0
->current_picture_ptr
);
3839 assert(s0
->current_picture_ptr
->data
[0]);
3840 assert(s0
->current_picture_ptr
->reference
!= DELAYED_PIC_REF
);
3842 /* figure out if we have a complementary field pair */
3843 if (!FIELD_PICTURE
|| s
->picture_structure
== last_pic_structure
) {
3845 * Previous field is unmatched. Don't display it, but let it
3846 * remain for reference if marked as such.
3848 s0
->current_picture_ptr
= NULL
;
3849 s0
->first_field
= FIELD_PICTURE
;
3852 if (h
->nal_ref_idc
&&
3853 s0
->current_picture_ptr
->reference
&&
3854 s0
->current_picture_ptr
->frame_num
!= h
->frame_num
) {
3856 * This and previous field were reference, but had
3857 * different frame_nums. Consider this field first in
3858 * pair. Throw away previous field except for reference
3861 s0
->first_field
= 1;
3862 s0
->current_picture_ptr
= NULL
;
3865 /* Second field in complementary pair */
3866 s0
->first_field
= 0;
3871 /* Frame or first field in a potentially complementary pair */
3872 assert(!s0
->current_picture_ptr
);
3873 s0
->first_field
= FIELD_PICTURE
;
3876 if((!FIELD_PICTURE
|| s0
->first_field
) && frame_start(h
) < 0) {
3877 s0
->first_field
= 0;
3884 s
->current_picture_ptr
->frame_num
= h
->frame_num
; //FIXME frame_num cleanup
3886 assert(s
->mb_num
== s
->mb_width
* s
->mb_height
);
3887 if(first_mb_in_slice
<< FIELD_OR_MBAFF_PICTURE
>= s
->mb_num
||
3888 first_mb_in_slice
>= s
->mb_num
){
3889 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "first_mb_in_slice overflow\n");
3892 s
->resync_mb_x
= s
->mb_x
= first_mb_in_slice
% s
->mb_width
;
3893 s
->resync_mb_y
= s
->mb_y
= (first_mb_in_slice
/ s
->mb_width
) << FIELD_OR_MBAFF_PICTURE
;
3894 if (s
->picture_structure
== PICT_BOTTOM_FIELD
)
3895 s
->resync_mb_y
= s
->mb_y
= s
->mb_y
+ 1;
3896 assert(s
->mb_y
< s
->mb_height
);
3898 if(s
->picture_structure
==PICT_FRAME
){
3899 h
->curr_pic_num
= h
->frame_num
;
3900 h
->max_pic_num
= 1<< h
->sps
.log2_max_frame_num
;
3902 h
->curr_pic_num
= 2*h
->frame_num
+ 1;
3903 h
->max_pic_num
= 1<<(h
->sps
.log2_max_frame_num
+ 1);
3906 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
3907 get_ue_golomb(&s
->gb
); /* idr_pic_id */
3910 if(h
->sps
.poc_type
==0){
3911 h
->poc_lsb
= get_bits(&s
->gb
, h
->sps
.log2_max_poc_lsb
);
3913 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
){
3914 h
->delta_poc_bottom
= get_se_golomb(&s
->gb
);
3918 if(h
->sps
.poc_type
==1 && !h
->sps
.delta_pic_order_always_zero_flag
){
3919 h
->delta_poc
[0]= get_se_golomb(&s
->gb
);
3921 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
)
3922 h
->delta_poc
[1]= get_se_golomb(&s
->gb
);
3927 if(h
->pps
.redundant_pic_cnt_present
){
3928 h
->redundant_pic_count
= get_ue_golomb(&s
->gb
);
3931 //set defaults, might be overridden a few lines later
3932 h
->ref_count
[0]= h
->pps
.ref_count
[0];
3933 h
->ref_count
[1]= h
->pps
.ref_count
[1];
3935 if(h
->slice_type_nos
!= FF_I_TYPE
){
3936 if(h
->slice_type_nos
== FF_B_TYPE
){
3937 h
->direct_spatial_mv_pred
= get_bits1(&s
->gb
);
3939 num_ref_idx_active_override_flag
= get_bits1(&s
->gb
);
3941 if(num_ref_idx_active_override_flag
){
3942 h
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
3943 if(h
->slice_type_nos
==FF_B_TYPE
)
3944 h
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
3946 if(h
->ref_count
[0]-1 > 32-1 || h
->ref_count
[1]-1 > 32-1){
3947 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow\n");
3948 h
->ref_count
[0]= h
->ref_count
[1]= 1;
3952 if(h
->slice_type_nos
== FF_B_TYPE
)
3959 if(!default_ref_list_done
){
3960 fill_default_ref_list(h
);
3963 if(h
->slice_type_nos
!=FF_I_TYPE
&& decode_ref_pic_list_reordering(h
) < 0)
3966 if(h
->slice_type_nos
!=FF_I_TYPE
){
3967 s
->last_picture_ptr
= &h
->ref_list
[0][0];
3968 ff_copy_picture(&s
->last_picture
, s
->last_picture_ptr
);
3970 if(h
->slice_type_nos
==FF_B_TYPE
){
3971 s
->next_picture_ptr
= &h
->ref_list
[1][0];
3972 ff_copy_picture(&s
->next_picture
, s
->next_picture_ptr
);
3975 if( (h
->pps
.weighted_pred
&& h
->slice_type_nos
== FF_P_TYPE
)
3976 || (h
->pps
.weighted_bipred_idc
==1 && h
->slice_type_nos
== FF_B_TYPE
) )
3977 pred_weight_table(h
);
3978 else if(h
->pps
.weighted_bipred_idc
==2 && h
->slice_type_nos
== FF_B_TYPE
)
3979 implicit_weight_table(h
);
3984 decode_ref_pic_marking(h0
, &s
->gb
);
3987 fill_mbaff_ref_list(h
);
3989 if(h
->slice_type_nos
==FF_B_TYPE
&& !h
->direct_spatial_mv_pred
)
3990 direct_dist_scale_factor(h
);
3991 direct_ref_list_init(h
);
3993 if( h
->slice_type_nos
!= FF_I_TYPE
&& h
->pps
.cabac
){
3994 tmp
= get_ue_golomb(&s
->gb
);
3996 av_log(s
->avctx
, AV_LOG_ERROR
, "cabac_init_idc overflow\n");
3999 h
->cabac_init_idc
= tmp
;
4002 h
->last_qscale_diff
= 0;
4003 tmp
= h
->pps
.init_qp
+ get_se_golomb(&s
->gb
);
4005 av_log(s
->avctx
, AV_LOG_ERROR
, "QP %u out of range\n", tmp
);
4009 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
4010 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
4011 //FIXME qscale / qp ... stuff
4012 if(h
->slice_type
== FF_SP_TYPE
){
4013 get_bits1(&s
->gb
); /* sp_for_switch_flag */
4015 if(h
->slice_type
==FF_SP_TYPE
|| h
->slice_type
== FF_SI_TYPE
){
4016 get_se_golomb(&s
->gb
); /* slice_qs_delta */
4019 h
->deblocking_filter
= 1;
4020 h
->slice_alpha_c0_offset
= 0;
4021 h
->slice_beta_offset
= 0;
4022 if( h
->pps
.deblocking_filter_parameters_present
) {
4023 tmp
= get_ue_golomb(&s
->gb
);
4025 av_log(s
->avctx
, AV_LOG_ERROR
, "deblocking_filter_idc %u out of range\n", tmp
);
4028 h
->deblocking_filter
= tmp
;
4029 if(h
->deblocking_filter
< 2)
4030 h
->deblocking_filter
^= 1; // 1<->0
4032 if( h
->deblocking_filter
) {
4033 h
->slice_alpha_c0_offset
= get_se_golomb(&s
->gb
) << 1;
4034 h
->slice_beta_offset
= get_se_golomb(&s
->gb
) << 1;
4038 if( s
->avctx
->skip_loop_filter
>= AVDISCARD_ALL
4039 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONKEY
&& h
->slice_type_nos
!= FF_I_TYPE
)
4040 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_BIDIR
&& h
->slice_type_nos
== FF_B_TYPE
)
4041 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
4042 h
->deblocking_filter
= 0;
4044 if(h
->deblocking_filter
== 1 && h0
->max_contexts
> 1) {
4045 if(s
->avctx
->flags2
& CODEC_FLAG2_FAST
) {
4046 /* Cheat slightly for speed:
4047 Do not bother to deblock across slices. */
4048 h
->deblocking_filter
= 2;
4050 h0
->max_contexts
= 1;
4051 if(!h0
->single_decode_warning
) {
4052 av_log(s
->avctx
, AV_LOG_INFO
, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4053 h0
->single_decode_warning
= 1;
4056 return 1; // deblocking switched inside frame
4061 if( h
->pps
.num_slice_groups
> 1 && h
->pps
.mb_slice_group_map_type
>= 3 && h
->pps
.mb_slice_group_map_type
<= 5)
4062 slice_group_change_cycle
= get_bits(&s
->gb
, ?);
4065 h0
->last_slice_type
= slice_type
;
4066 h
->slice_num
= ++h0
->current_slice
;
4067 if(h
->slice_num
>= MAX_SLICES
){
4068 av_log(s
->avctx
, AV_LOG_ERROR
, "Too many slices, increase MAX_SLICES and recompile\n");
4072 int *ref2frm
= h
->ref2frm
[h
->slice_num
&(MAX_SLICES
-1)][j
];
4076 ref2frm
[i
+2]= 4*h
->ref_list
[j
][i
].frame_num
4077 +(h
->ref_list
[j
][i
].reference
&3);
4080 for(i
=16; i
<48; i
++)
4081 ref2frm
[i
+4]= 4*h
->ref_list
[j
][i
].frame_num
4082 +(h
->ref_list
[j
][i
].reference
&3);
4085 h
->emu_edge_width
= (s
->flags
&CODEC_FLAG_EMU_EDGE
) ? 0 : 16;
4086 h
->emu_edge_height
= (FRAME_MBAFF
|| FIELD_PICTURE
) ? 0 : h
->emu_edge_width
;
4088 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
4089 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4091 (s
->picture_structure
==PICT_FRAME
? "F" : s
->picture_structure
==PICT_TOP_FIELD
? "T" : "B"),
4093 av_get_pict_type_char(h
->slice_type
), h
->slice_type_fixed
? " fix" : "", h
->nal_unit_type
== NAL_IDR_SLICE
? " IDR" : "",
4094 pps_id
, h
->frame_num
,
4095 s
->current_picture_ptr
->field_poc
[0], s
->current_picture_ptr
->field_poc
[1],
4096 h
->ref_count
[0], h
->ref_count
[1],
4098 h
->deblocking_filter
, h
->slice_alpha_c0_offset
/2, h
->slice_beta_offset
/2,
4100 h
->use_weight
==1 && h
->use_weight_chroma
? "c" : "",
4101 h
->slice_type
== FF_B_TYPE
? (h
->direct_spatial_mv_pred
? "SPAT" : "TEMP") : ""
4111 static inline int get_level_prefix(GetBitContext
*gb
){
4115 OPEN_READER(re
, gb
);
4116 UPDATE_CACHE(re
, gb
);
4117 buf
=GET_CACHE(re
, gb
);
4119 log
= 32 - av_log2(buf
);
4121 print_bin(buf
>>(32-log
), log
);
4122 av_log(NULL
, AV_LOG_DEBUG
, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf
>>(32-log
), log
, log
-1, get_bits_count(gb
), __FILE__
);
4125 LAST_SKIP_BITS(re
, gb
, log
);
4126 CLOSE_READER(re
, gb
);
4131 static inline int get_dct8x8_allowed(H264Context
*h
){
4134 if(!IS_SUB_8X8(h
->sub_mb_type
[i
])
4135 || (!h
->sps
.direct_8x8_inference_flag
&& IS_DIRECT(h
->sub_mb_type
[i
])))
4142 * decodes a residual block.
4143 * @param n block index
4144 * @param scantable scantable
4145 * @param max_coeff number of coefficients in the block
4146 * @return <0 if an error occurred
4148 static int decode_residual(H264Context
*h
, GetBitContext
*gb
, DCTELEM
*block
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
){
4149 MpegEncContext
* const s
= &h
->s
;
4150 static const int coeff_token_table_index
[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4152 int zeros_left
, coeff_num
, coeff_token
, total_coeff
, i
, j
, trailing_ones
, run_before
;
4154 //FIXME put trailing_onex into the context
4156 if(n
== CHROMA_DC_BLOCK_INDEX
){
4157 coeff_token
= get_vlc2(gb
, chroma_dc_coeff_token_vlc
.table
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 1);
4158 total_coeff
= coeff_token
>>2;
4160 if(n
== LUMA_DC_BLOCK_INDEX
){
4161 total_coeff
= pred_non_zero_count(h
, 0);
4162 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4163 total_coeff
= coeff_token
>>2;
4165 total_coeff
= pred_non_zero_count(h
, n
);
4166 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4167 total_coeff
= coeff_token
>>2;
4168 h
->non_zero_count_cache
[ scan8
[n
] ]= total_coeff
;
4172 //FIXME set last_non_zero?
4176 if(total_coeff
> (unsigned)max_coeff
) {
4177 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "corrupted macroblock %d %d (total_coeff=%d)\n", s
->mb_x
, s
->mb_y
, total_coeff
);
4181 trailing_ones
= coeff_token
&3;
4182 tprintf(h
->s
.avctx
, "trailing:%d, total:%d\n", trailing_ones
, total_coeff
);
4183 assert(total_coeff
<=16);
4185 for(i
=0; i
<trailing_ones
; i
++){
4186 level
[i
]= 1 - 2*get_bits1(gb
);
4190 int level_code
, mask
;
4191 int suffix_length
= total_coeff
> 10 && trailing_ones
< 3;
4192 int prefix
= get_level_prefix(gb
);
4194 //first coefficient has suffix_length equal to 0 or 1
4195 if(prefix
<14){ //FIXME try to build a large unified VLC table for all this
4197 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4199 level_code
= (prefix
<<suffix_length
); //part
4200 }else if(prefix
==14){
4202 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4204 level_code
= prefix
+ get_bits(gb
, 4); //part
4206 level_code
= (15<<suffix_length
) + get_bits(gb
, prefix
-3); //part
4207 if(suffix_length
==0) level_code
+=15; //FIXME doesn't make (much)sense
4209 level_code
+= (1<<(prefix
-3))-4096;
4212 if(trailing_ones
< 3) level_code
+= 2;
4217 mask
= -(level_code
&1);
4218 level
[i
]= (((2+level_code
)>>1) ^ mask
) - mask
;
4221 //remaining coefficients have suffix_length > 0
4222 for(;i
<total_coeff
;i
++) {
4223 static const int suffix_limit
[7] = {0,5,11,23,47,95,INT_MAX
};
4224 prefix
= get_level_prefix(gb
);
4226 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
);
4228 level_code
= (15<<suffix_length
) + get_bits(gb
, prefix
-3);
4230 level_code
+= (1<<(prefix
-3))-4096;
4232 mask
= -(level_code
&1);
4233 level
[i
]= (((2+level_code
)>>1) ^ mask
) - mask
;
4234 if(level_code
> suffix_limit
[suffix_length
])
4239 if(total_coeff
== max_coeff
)
4242 if(n
== CHROMA_DC_BLOCK_INDEX
)
4243 zeros_left
= get_vlc2(gb
, chroma_dc_total_zeros_vlc
[ total_coeff
-1 ].table
, CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 1);
4245 zeros_left
= get_vlc2(gb
, total_zeros_vlc
[ total_coeff
-1 ].table
, TOTAL_ZEROS_VLC_BITS
, 1);
4248 coeff_num
= zeros_left
+ total_coeff
- 1;
4249 j
= scantable
[coeff_num
];
4251 block
[j
] = level
[0];
4252 for(i
=1;i
<total_coeff
;i
++) {
4255 else if(zeros_left
< 7){
4256 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
4258 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
4260 zeros_left
-= run_before
;
4261 coeff_num
-= 1 + run_before
;
4262 j
= scantable
[ coeff_num
];
4267 block
[j
] = (level
[0] * qmul
[j
] + 32)>>6;
4268 for(i
=1;i
<total_coeff
;i
++) {
4271 else if(zeros_left
< 7){
4272 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
4274 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
4276 zeros_left
-= run_before
;
4277 coeff_num
-= 1 + run_before
;
4278 j
= scantable
[ coeff_num
];
4280 block
[j
]= (level
[i
] * qmul
[j
] + 32)>>6;
4285 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "negative number of zero coeffs at %d %d\n", s
->mb_x
, s
->mb_y
);
4292 static void predict_field_decoding_flag(H264Context
*h
){
4293 MpegEncContext
* const s
= &h
->s
;
4294 const int mb_xy
= h
->mb_xy
;
4295 int mb_type
= (h
->slice_table
[mb_xy
-1] == h
->slice_num
)
4296 ? s
->current_picture
.mb_type
[mb_xy
-1]
4297 : (h
->slice_table
[mb_xy
-s
->mb_stride
] == h
->slice_num
)
4298 ? s
->current_picture
.mb_type
[mb_xy
-s
->mb_stride
]
4300 h
->mb_mbaff
= h
->mb_field_decoding_flag
= IS_INTERLACED(mb_type
) ? 1 : 0;
4304 * decodes a P_SKIP or B_SKIP macroblock
4306 static void decode_mb_skip(H264Context
*h
){
4307 MpegEncContext
* const s
= &h
->s
;
4308 const int mb_xy
= h
->mb_xy
;
4311 memset(h
->non_zero_count
[mb_xy
], 0, 16);
4312 memset(h
->non_zero_count_cache
+ 8, 0, 8*5); //FIXME ugly, remove pfui
4315 mb_type
|= MB_TYPE_INTERLACED
;
4317 if( h
->slice_type_nos
== FF_B_TYPE
)
4319 // just for fill_caches. pred_direct_motion will set the real mb_type
4320 mb_type
|= MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
|MB_TYPE_SKIP
;
4322 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4323 pred_direct_motion(h
, &mb_type
);
4324 mb_type
|= MB_TYPE_SKIP
;
4329 mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P1L0
|MB_TYPE_SKIP
;
4331 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4332 pred_pskip_motion(h
, &mx
, &my
);
4333 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, 0, 1);
4334 fill_rectangle( h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mx
,my
), 4);
4337 write_back_motion(h
, mb_type
);
4338 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4339 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
4340 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4341 h
->prev_mb_skipped
= 1;
4345 * decodes a macroblock
4346 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4348 static int decode_mb_cavlc(H264Context
*h
){
4349 MpegEncContext
* const s
= &h
->s
;
4351 int partition_count
;
4352 unsigned int mb_type
, cbp
;
4353 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
4355 mb_xy
= h
->mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
4357 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?
4359 tprintf(s
->avctx
, "pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
4360 cbp
= 0; /* avoid warning. FIXME: find a solution without slowing
4362 if(h
->slice_type_nos
!= FF_I_TYPE
){
4363 if(s
->mb_skip_run
==-1)
4364 s
->mb_skip_run
= get_ue_golomb(&s
->gb
);
4366 if (s
->mb_skip_run
--) {
4367 if(FRAME_MBAFF
&& (s
->mb_y
&1) == 0){
4368 if(s
->mb_skip_run
==0)
4369 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4371 predict_field_decoding_flag(h
);
4378 if( (s
->mb_y
&1) == 0 )
4379 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4382 h
->prev_mb_skipped
= 0;
4384 mb_type
= get_ue_golomb(&s
->gb
);
4385 if(h
->slice_type_nos
== FF_B_TYPE
){
4387 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
4388 mb_type
= b_mb_type_info
[mb_type
].type
;
4391 goto decode_intra_mb
;
4393 }else if(h
->slice_type_nos
== FF_P_TYPE
){
4395 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
4396 mb_type
= p_mb_type_info
[mb_type
].type
;
4399 goto decode_intra_mb
;
4402 assert(h
->slice_type_nos
== FF_I_TYPE
);
4403 if(h
->slice_type
== FF_SI_TYPE
&& mb_type
)
4407 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_type %d in %c slice too large at %d %d\n", mb_type
, av_get_pict_type_char(h
->slice_type
), s
->mb_x
, s
->mb_y
);
4411 cbp
= i_mb_type_info
[mb_type
].cbp
;
4412 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
4413 mb_type
= i_mb_type_info
[mb_type
].type
;
4417 mb_type
|= MB_TYPE_INTERLACED
;
4419 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4421 if(IS_INTRA_PCM(mb_type
)){
4424 // We assume these blocks are very rare so we do not optimize it.
4425 align_get_bits(&s
->gb
);
4427 // The pixels are stored in the same order as levels in h->mb array.
4428 for(x
=0; x
< (CHROMA
? 384 : 256); x
++){
4429 ((uint8_t*)h
->mb
)[x
]= get_bits(&s
->gb
, 8);
4432 // In deblocking, the quantizer is 0
4433 s
->current_picture
.qscale_table
[mb_xy
]= 0;
4434 // All coeffs are present
4435 memset(h
->non_zero_count
[mb_xy
], 16, 16);
4437 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4442 h
->ref_count
[0] <<= 1;
4443 h
->ref_count
[1] <<= 1;
4446 fill_caches(h
, mb_type
, 0);
4449 if(IS_INTRA(mb_type
)){
4451 // init_top_left_availability(h);
4452 if(IS_INTRA4x4(mb_type
)){
4455 if(dct8x8_allowed
&& get_bits1(&s
->gb
)){
4456 mb_type
|= MB_TYPE_8x8DCT
;
4460 // fill_intra4x4_pred_table(h);
4461 for(i
=0; i
<16; i
+=di
){
4462 int mode
= pred_intra_mode(h
, i
);
4464 if(!get_bits1(&s
->gb
)){
4465 const int rem_mode
= get_bits(&s
->gb
, 3);
4466 mode
= rem_mode
+ (rem_mode
>= mode
);
4470 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
4472 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = mode
;
4474 write_back_intra_pred_mode(h
);
4475 if( check_intra4x4_pred_mode(h
) < 0)
4478 h
->intra16x16_pred_mode
= check_intra_pred_mode(h
, h
->intra16x16_pred_mode
);
4479 if(h
->intra16x16_pred_mode
< 0)
4483 pred_mode
= check_intra_pred_mode(h
, get_ue_golomb(&s
->gb
));
4486 h
->chroma_pred_mode
= pred_mode
;
4488 }else if(partition_count
==4){
4489 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
4491 if(h
->slice_type_nos
== FF_B_TYPE
){
4493 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
4494 if(h
->sub_mb_type
[i
] >=13){
4495 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "B sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4498 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4499 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4501 if( IS_DIRECT(h
->sub_mb_type
[0]) || IS_DIRECT(h
->sub_mb_type
[1])
4502 || IS_DIRECT(h
->sub_mb_type
[2]) || IS_DIRECT(h
->sub_mb_type
[3])) {
4503 pred_direct_motion(h
, &mb_type
);
4504 h
->ref_cache
[0][scan8
[4]] =
4505 h
->ref_cache
[1][scan8
[4]] =
4506 h
->ref_cache
[0][scan8
[12]] =
4507 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
4510 assert(h
->slice_type_nos
== FF_P_TYPE
); //FIXME SP correct ?
4512 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
4513 if(h
->sub_mb_type
[i
] >=4){
4514 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "P sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4517 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4518 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4522 for(list
=0; list
<h
->list_count
; list
++){
4523 int ref_count
= IS_REF0(mb_type
) ? 1 : h
->ref_count
[list
];
4525 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
4526 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4527 unsigned int tmp
= get_te0_golomb(&s
->gb
, ref_count
); //FIXME init to 0 before and skip?
4529 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", tmp
);
4541 dct8x8_allowed
= get_dct8x8_allowed(h
);
4543 for(list
=0; list
<h
->list_count
; list
++){
4545 if(IS_DIRECT(h
->sub_mb_type
[i
])) {
4546 h
->ref_cache
[list
][ scan8
[4*i
] ] = h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
4549 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
4550 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
4552 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4553 const int sub_mb_type
= h
->sub_mb_type
[i
];
4554 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
4555 for(j
=0; j
<sub_partition_count
[i
]; j
++){
4557 const int index
= 4*i
+ block_width
*j
;
4558 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
4559 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mx
, &my
);
4560 mx
+= get_se_golomb(&s
->gb
);
4561 my
+= get_se_golomb(&s
->gb
);
4562 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4564 if(IS_SUB_8X8(sub_mb_type
)){
4566 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
4568 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
4569 }else if(IS_SUB_8X4(sub_mb_type
)){
4570 mv_cache
[ 1 ][0]= mx
;
4571 mv_cache
[ 1 ][1]= my
;
4572 }else if(IS_SUB_4X8(sub_mb_type
)){
4573 mv_cache
[ 8 ][0]= mx
;
4574 mv_cache
[ 8 ][1]= my
;
4576 mv_cache
[ 0 ][0]= mx
;
4577 mv_cache
[ 0 ][1]= my
;
4580 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
4586 }else if(IS_DIRECT(mb_type
)){
4587 pred_direct_motion(h
, &mb_type
);
4588 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
4590 int list
, mx
, my
, i
;
4591 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4592 if(IS_16X16(mb_type
)){
4593 for(list
=0; list
<h
->list_count
; list
++){
4595 if(IS_DIR(mb_type
, 0, list
)){
4596 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4597 if(val
>= h
->ref_count
[list
]){
4598 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4602 val
= LIST_NOT_USED
&0xFF;
4603 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 1);
4605 for(list
=0; list
<h
->list_count
; list
++){
4607 if(IS_DIR(mb_type
, 0, list
)){
4608 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mx
, &my
);
4609 mx
+= get_se_golomb(&s
->gb
);
4610 my
+= get_se_golomb(&s
->gb
);
4611 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4613 val
= pack16to32(mx
,my
);
4616 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 4);
4619 else if(IS_16X8(mb_type
)){
4620 for(list
=0; list
<h
->list_count
; list
++){
4623 if(IS_DIR(mb_type
, i
, list
)){
4624 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4625 if(val
>= h
->ref_count
[list
]){
4626 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4630 val
= LIST_NOT_USED
&0xFF;
4631 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 1);
4634 for(list
=0; list
<h
->list_count
; list
++){
4637 if(IS_DIR(mb_type
, i
, list
)){
4638 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mx
, &my
);
4639 mx
+= get_se_golomb(&s
->gb
);
4640 my
+= get_se_golomb(&s
->gb
);
4641 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4643 val
= pack16to32(mx
,my
);
4646 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 4);
4650 assert(IS_8X16(mb_type
));
4651 for(list
=0; list
<h
->list_count
; list
++){
4654 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
4655 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4656 if(val
>= h
->ref_count
[list
]){
4657 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4661 val
= LIST_NOT_USED
&0xFF;
4662 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 1);
4665 for(list
=0; list
<h
->list_count
; list
++){
4668 if(IS_DIR(mb_type
, i
, list
)){
4669 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mx
, &my
);
4670 mx
+= get_se_golomb(&s
->gb
);
4671 my
+= get_se_golomb(&s
->gb
);
4672 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4674 val
= pack16to32(mx
,my
);
4677 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 4);
4683 if(IS_INTER(mb_type
))
4684 write_back_motion(h
, mb_type
);
4686 if(!IS_INTRA16x16(mb_type
)){
4687 cbp
= get_ue_golomb(&s
->gb
);
4689 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cbp too large (%u) at %d %d\n", cbp
, s
->mb_x
, s
->mb_y
);
4694 if(IS_INTRA4x4(mb_type
)) cbp
= golomb_to_intra4x4_cbp
[cbp
];
4695 else cbp
= golomb_to_inter_cbp
[cbp
];
4697 if(IS_INTRA4x4(mb_type
)) cbp
= golomb_to_intra4x4_cbp_gray
[cbp
];
4698 else cbp
= golomb_to_inter_cbp_gray
[cbp
];
4703 if(dct8x8_allowed
&& (cbp
&15) && !IS_INTRA(mb_type
)){
4704 if(get_bits1(&s
->gb
)){
4705 mb_type
|= MB_TYPE_8x8DCT
;
4706 h
->cbp_table
[mb_xy
]= cbp
;
4709 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4711 if(cbp
|| IS_INTRA16x16(mb_type
)){
4712 int i8x8
, i4x4
, chroma_idx
;
4714 GetBitContext
*gb
= IS_INTRA(mb_type
) ? h
->intra_gb_ptr
: h
->inter_gb_ptr
;
4715 const uint8_t *scan
, *scan8x8
, *dc_scan
;
4717 // fill_non_zero_count_cache(h);
4719 if(IS_INTERLACED(mb_type
)){
4720 scan8x8
= s
->qscale
? h
->field_scan8x8_cavlc
: h
->field_scan8x8_cavlc_q0
;
4721 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
4722 dc_scan
= luma_dc_field_scan
;
4724 scan8x8
= s
->qscale
? h
->zigzag_scan8x8_cavlc
: h
->zigzag_scan8x8_cavlc_q0
;
4725 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
4726 dc_scan
= luma_dc_zigzag_scan
;
4729 dquant
= get_se_golomb(&s
->gb
);
4731 if( dquant
> 25 || dquant
< -26 ){
4732 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "dquant out of range (%d) at %d %d\n", dquant
, s
->mb_x
, s
->mb_y
);
4736 s
->qscale
+= dquant
;
4737 if(((unsigned)s
->qscale
) > 51){
4738 if(s
->qscale
<0) s
->qscale
+= 52;
4739 else s
->qscale
-= 52;
4742 h
->chroma_qp
[0]= get_chroma_qp(h
, 0, s
->qscale
);
4743 h
->chroma_qp
[1]= get_chroma_qp(h
, 1, s
->qscale
);
4744 if(IS_INTRA16x16(mb_type
)){
4745 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
, LUMA_DC_BLOCK_INDEX
, dc_scan
, h
->dequant4_coeff
[0][s
->qscale
], 16) < 0){
4746 return -1; //FIXME continue if partitioned and other return -1 too
4749 assert((cbp
&15) == 0 || (cbp
&15) == 15);
4752 for(i8x8
=0; i8x8
<4; i8x8
++){
4753 for(i4x4
=0; i4x4
<4; i4x4
++){
4754 const int index
= i4x4
+ 4*i8x8
;
4755 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
+ 16*index
, index
, scan
+ 1, h
->dequant4_coeff
[0][s
->qscale
], 15) < 0 ){
4761 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
4764 for(i8x8
=0; i8x8
<4; i8x8
++){
4765 if(cbp
& (1<<i8x8
)){
4766 if(IS_8x8DCT(mb_type
)){
4767 DCTELEM
*buf
= &h
->mb
[64*i8x8
];
4769 for(i4x4
=0; i4x4
<4; i4x4
++){
4770 if( decode_residual(h
, gb
, buf
, i4x4
+4*i8x8
, scan8x8
+16*i4x4
,
4771 h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 16) <0 )
4774 nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
4775 nnz
[0] += nnz
[1] + nnz
[8] + nnz
[9];
4777 for(i4x4
=0; i4x4
<4; i4x4
++){
4778 const int index
= i4x4
+ 4*i8x8
;
4780 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
, h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
], 16) <0 ){
4786 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
4787 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
4793 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++)
4794 if( decode_residual(h
, gb
, h
->mb
+ 256 + 16*4*chroma_idx
, CHROMA_DC_BLOCK_INDEX
, chroma_dc_scan
, NULL
, 4) < 0){
4800 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++){
4801 const uint32_t *qmul
= h
->dequant4_coeff
[chroma_idx
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
[chroma_idx
]];
4802 for(i4x4
=0; i4x4
<4; i4x4
++){
4803 const int index
= 16 + 4*chroma_idx
+ i4x4
;
4804 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
+ 1, qmul
, 15) < 0){
4810 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
4811 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
4812 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
4815 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
4816 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
4817 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
4818 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
4820 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
4821 write_back_non_zero_count(h
);
4824 h
->ref_count
[0] >>= 1;
4825 h
->ref_count
[1] >>= 1;
4831 static int decode_cabac_field_decoding_flag(H264Context
*h
) {
4832 MpegEncContext
* const s
= &h
->s
;
4833 const int mb_x
= s
->mb_x
;
4834 const int mb_y
= s
->mb_y
& ~1;
4835 const int mba_xy
= mb_x
- 1 + mb_y
*s
->mb_stride
;
4836 const int mbb_xy
= mb_x
+ (mb_y
-2)*s
->mb_stride
;
4838 unsigned int ctx
= 0;
4840 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) ) {
4843 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) ) {
4847 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[70 + ctx
] );
4850 static int decode_cabac_intra_mb_type(H264Context
*h
, int ctx_base
, int intra_slice
) {
4851 uint8_t *state
= &h
->cabac_state
[ctx_base
];
4855 MpegEncContext
* const s
= &h
->s
;
4856 const int mba_xy
= h
->left_mb_xy
[0];
4857 const int mbb_xy
= h
->top_mb_xy
;
4859 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mba_xy
] ) )
4861 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mbb_xy
] ) )
4863 if( get_cabac_noinline( &h
->cabac
, &state
[ctx
] ) == 0 )
4864 return 0; /* I4x4 */
4867 if( get_cabac_noinline( &h
->cabac
, &state
[0] ) == 0 )
4868 return 0; /* I4x4 */
4871 if( get_cabac_terminate( &h
->cabac
) )
4872 return 25; /* PCM */
4874 mb_type
= 1; /* I16x16 */
4875 mb_type
+= 12 * get_cabac_noinline( &h
->cabac
, &state
[1] ); /* cbp_luma != 0 */
4876 if( get_cabac_noinline( &h
->cabac
, &state
[2] ) ) /* cbp_chroma */
4877 mb_type
+= 4 + 4 * get_cabac_noinline( &h
->cabac
, &state
[2+intra_slice
] );
4878 mb_type
+= 2 * get_cabac_noinline( &h
->cabac
, &state
[3+intra_slice
] );
4879 mb_type
+= 1 * get_cabac_noinline( &h
->cabac
, &state
[3+2*intra_slice
] );
4883 static int decode_cabac_mb_type( H264Context
*h
) {
4884 MpegEncContext
* const s
= &h
->s
;
4886 if( h
->slice_type_nos
== FF_I_TYPE
) {
4887 return decode_cabac_intra_mb_type(h
, 3, 1);
4888 } else if( h
->slice_type_nos
== FF_P_TYPE
) {
4889 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[14] ) == 0 ) {
4891 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[15] ) == 0 ) {
4892 /* P_L0_D16x16, P_8x8 */
4893 return 3 * get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[16] );
4895 /* P_L0_D8x16, P_L0_D16x8 */
4896 return 2 - get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[17] );
4899 return decode_cabac_intra_mb_type(h
, 17, 0) + 5;
4901 } else if( h
->slice_type_nos
== FF_B_TYPE
) {
4902 const int mba_xy
= h
->left_mb_xy
[0];
4903 const int mbb_xy
= h
->top_mb_xy
;
4907 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mba_xy
] ) )
4909 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mbb_xy
] ) )
4912 if( !get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+ctx
] ) )
4913 return 0; /* B_Direct_16x16 */
4915 if( !get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+3] ) ) {
4916 return 1 + get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ); /* B_L[01]_16x16 */
4919 bits
= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+4] ) << 3;
4920 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ) << 2;
4921 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ) << 1;
4922 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] );
4924 return bits
+ 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4925 else if( bits
== 13 ) {
4926 return decode_cabac_intra_mb_type(h
, 32, 0) + 23;
4927 } else if( bits
== 14 )
4928 return 11; /* B_L1_L0_8x16 */
4929 else if( bits
== 15 )
4930 return 22; /* B_8x8 */
4932 bits
= ( bits
<<1 ) | get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] );
4933 return bits
- 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4935 /* TODO SI/SP frames? */
4940 static int decode_cabac_mb_skip( H264Context
*h
, int mb_x
, int mb_y
) {
4941 MpegEncContext
* const s
= &h
->s
;
4945 if(FRAME_MBAFF
){ //FIXME merge with the stuff in fill_caches?
4946 int mb_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
4949 && h
->slice_table
[mba_xy
] == h
->slice_num
4950 && MB_FIELD
== !!IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) )
4951 mba_xy
+= s
->mb_stride
;
4953 mbb_xy
= mb_xy
- s
->mb_stride
;
4955 && h
->slice_table
[mbb_xy
] == h
->slice_num
4956 && IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) )
4957 mbb_xy
-= s
->mb_stride
;
4959 mbb_xy
= mb_x
+ (mb_y
-1)*s
->mb_stride
;
4961 int mb_xy
= h
->mb_xy
;
4963 mbb_xy
= mb_xy
- (s
->mb_stride
<< FIELD_PICTURE
);
4966 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mba_xy
] ))
4968 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mbb_xy
] ))
4971 if( h
->slice_type_nos
== FF_B_TYPE
)
4973 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[11+ctx
] );
4976 static int decode_cabac_mb_intra4x4_pred_mode( H264Context
*h
, int pred_mode
) {
4979 if( get_cabac( &h
->cabac
, &h
->cabac_state
[68] ) )
4982 mode
+= 1 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
4983 mode
+= 2 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
4984 mode
+= 4 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
4986 if( mode
>= pred_mode
)
4992 static int decode_cabac_mb_chroma_pre_mode( H264Context
*h
) {
4993 const int mba_xy
= h
->left_mb_xy
[0];
4994 const int mbb_xy
= h
->top_mb_xy
;
4998 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4999 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mba_xy
] != 0 )
5002 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mbb_xy
] != 0 )
5005 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+ctx
] ) == 0 )
5008 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5010 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5016 static int decode_cabac_mb_cbp_luma( H264Context
*h
) {
5017 int cbp_b
, cbp_a
, ctx
, cbp
= 0;
5019 cbp_a
= h
->slice_table
[h
->left_mb_xy
[0]] == h
->slice_num
? h
->left_cbp
: -1;
5020 cbp_b
= h
->slice_table
[h
->top_mb_xy
] == h
->slice_num
? h
->top_cbp
: -1;
5022 ctx
= !(cbp_a
& 0x02) + 2 * !(cbp_b
& 0x04);
5023 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]);
5024 ctx
= !(cbp
& 0x01) + 2 * !(cbp_b
& 0x08);
5025 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 1;
5026 ctx
= !(cbp_a
& 0x08) + 2 * !(cbp
& 0x01);
5027 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 2;
5028 ctx
= !(cbp
& 0x04) + 2 * !(cbp
& 0x02);
5029 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 3;
5032 static int decode_cabac_mb_cbp_chroma( H264Context
*h
) {
5036 cbp_a
= (h
->left_cbp
>>4)&0x03;
5037 cbp_b
= (h
-> top_cbp
>>4)&0x03;
5040 if( cbp_a
> 0 ) ctx
++;
5041 if( cbp_b
> 0 ) ctx
+= 2;
5042 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[77 + ctx
] ) == 0 )
5046 if( cbp_a
== 2 ) ctx
++;
5047 if( cbp_b
== 2 ) ctx
+= 2;
5048 return 1 + get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[77 + ctx
] );
5050 static int decode_cabac_mb_dqp( H264Context
*h
) {
5054 if( h
->last_qscale_diff
!= 0 )
5057 while( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[60 + ctx
] ) ) {
5063 if(val
> 102) //prevent infinite loop
5070 return -(val
+ 1)/2;
5072 static int decode_cabac_p_mb_sub_type( H264Context
*h
) {
5073 if( get_cabac( &h
->cabac
, &h
->cabac_state
[21] ) )
5075 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[22] ) )
5077 if( get_cabac( &h
->cabac
, &h
->cabac_state
[23] ) )
5081 static int decode_cabac_b_mb_sub_type( H264Context
*h
) {
5083 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[36] ) )
5084 return 0; /* B_Direct_8x8 */
5085 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[37] ) )
5086 return 1 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L0_8x8, B_L1_8x8 */
5088 if( get_cabac( &h
->cabac
, &h
->cabac_state
[38] ) ) {
5089 if( get_cabac( &h
->cabac
, &h
->cabac_state
[39] ) )
5090 return 11 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L1_4x4, B_Bi_4x4 */
5093 type
+= 2*get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5094 type
+= get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5098 static inline int decode_cabac_mb_transform_size( H264Context
*h
) {
5099 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[399 + h
->neighbor_transform_size
] );
5102 static int decode_cabac_mb_ref( H264Context
*h
, int list
, int n
) {
5103 int refa
= h
->ref_cache
[list
][scan8
[n
] - 1];
5104 int refb
= h
->ref_cache
[list
][scan8
[n
] - 8];
5108 if( h
->slice_type_nos
== FF_B_TYPE
) {
5109 if( refa
> 0 && !h
->direct_cache
[scan8
[n
] - 1] )
5111 if( refb
> 0 && !h
->direct_cache
[scan8
[n
] - 8] )
5120 while( get_cabac( &h
->cabac
, &h
->cabac_state
[54+ctx
] ) ) {
5126 if(ref
>= 32 /*h->ref_list[list]*/){
5127 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "overflow in decode_cabac_mb_ref\n");
5128 return 0; //FIXME we should return -1 and check the return everywhere
5134 static int decode_cabac_mb_mvd( H264Context
*h
, int list
, int n
, int l
) {
5135 int amvd
= abs( h
->mvd_cache
[list
][scan8
[n
] - 1][l
] ) +
5136 abs( h
->mvd_cache
[list
][scan8
[n
] - 8][l
] );
5137 int ctxbase
= (l
== 0) ? 40 : 47;
5142 else if( amvd
> 32 )
5147 if(!get_cabac(&h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
]))
5152 while( mvd
< 9 && get_cabac( &h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
] ) ) {
5160 while( get_cabac_bypass( &h
->cabac
) ) {
5164 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "overflow in decode_cabac_mb_mvd\n");
5169 if( get_cabac_bypass( &h
->cabac
) )
5173 return get_cabac_bypass_sign( &h
->cabac
, -mvd
);
5176 static av_always_inline
int get_cabac_cbf_ctx( H264Context
*h
, int cat
, int idx
, int is_dc
) {
5182 nza
= h
->left_cbp
&0x100;
5183 nzb
= h
-> top_cbp
&0x100;
5185 nza
= (h
->left_cbp
>>(6+idx
))&0x01;
5186 nzb
= (h
-> top_cbp
>>(6+idx
))&0x01;
5190 nza
= h
->non_zero_count_cache
[scan8
[16+idx
] - 1];
5191 nzb
= h
->non_zero_count_cache
[scan8
[16+idx
] - 8];
5193 assert(cat
== 1 || cat
== 2);
5194 nza
= h
->non_zero_count_cache
[scan8
[idx
] - 1];
5195 nzb
= h
->non_zero_count_cache
[scan8
[idx
] - 8];
5205 return ctx
+ 4 * cat
;
5208 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8
[63]) = {
5209 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5210 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5211 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5212 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5215 static av_always_inline
void decode_cabac_residual_internal( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
, int is_dc
) {
5216 static const int significant_coeff_flag_offset
[2][6] = {
5217 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5218 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5220 static const int last_coeff_flag_offset
[2][6] = {
5221 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5222 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5224 static const int coeff_abs_level_m1_offset
[6] = {
5225 227+0, 227+10, 227+20, 227+30, 227+39, 426
5227 static const uint8_t significant_coeff_flag_offset_8x8
[2][63] = {
5228 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5229 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5230 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5231 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5232 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5233 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5234 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5235 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5237 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5238 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5239 * map node ctx => cabac ctx for level=1 */
5240 static const uint8_t coeff_abs_level1_ctx
[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5241 /* map node ctx => cabac ctx for level>1 */
5242 static const uint8_t coeff_abs_levelgt1_ctx
[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5243 static const uint8_t coeff_abs_level_transition
[2][8] = {
5244 /* update node ctx after decoding a level=1 */
5245 { 1, 2, 3, 3, 4, 5, 6, 7 },
5246 /* update node ctx after decoding a level>1 */
5247 { 4, 4, 4, 4, 5, 6, 7, 7 }
5253 int coeff_count
= 0;
5256 uint8_t *significant_coeff_ctx_base
;
5257 uint8_t *last_coeff_ctx_base
;
5258 uint8_t *abs_level_m1_ctx_base
;
5261 #define CABAC_ON_STACK
5263 #ifdef CABAC_ON_STACK
5266 cc
.range
= h
->cabac
.range
;
5267 cc
.low
= h
->cabac
.low
;
5268 cc
.bytestream
= h
->cabac
.bytestream
;
5270 #define CC &h->cabac
5274 /* cat: 0-> DC 16x16 n = 0
5275 * 1-> AC 16x16 n = luma4x4idx
5276 * 2-> Luma4x4 n = luma4x4idx
5277 * 3-> DC Chroma n = iCbCr
5278 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5279 * 5-> Luma8x8 n = 4 * luma8x8idx
5282 /* read coded block flag */
5283 if( is_dc
|| cat
!= 5 ) {
5284 if( get_cabac( CC
, &h
->cabac_state
[85 + get_cabac_cbf_ctx( h
, cat
, n
, is_dc
) ] ) == 0 ) {
5287 h
->non_zero_count_cache
[scan8
[16+n
]] = 0;
5289 h
->non_zero_count_cache
[scan8
[n
]] = 0;
5292 #ifdef CABAC_ON_STACK
5293 h
->cabac
.range
= cc
.range
;
5294 h
->cabac
.low
= cc
.low
;
5295 h
->cabac
.bytestream
= cc
.bytestream
;
5301 significant_coeff_ctx_base
= h
->cabac_state
5302 + significant_coeff_flag_offset
[MB_FIELD
][cat
];
5303 last_coeff_ctx_base
= h
->cabac_state
5304 + last_coeff_flag_offset
[MB_FIELD
][cat
];
5305 abs_level_m1_ctx_base
= h
->cabac_state
5306 + coeff_abs_level_m1_offset
[cat
];
5308 if( !is_dc
&& cat
== 5 ) {
5309 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5310 for(last= 0; last < coefs; last++) { \
5311 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5312 if( get_cabac( CC, sig_ctx )) { \
5313 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5314 index[coeff_count++] = last; \
5315 if( get_cabac( CC, last_ctx ) ) { \
5321 if( last == max_coeff -1 ) {\
5322 index[coeff_count++] = last;\
5324 const uint8_t *sig_off
= significant_coeff_flag_offset_8x8
[MB_FIELD
];
5325 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5326 coeff_count
= decode_significance_8x8_x86(CC
, significant_coeff_ctx_base
, index
, sig_off
);
5328 coeff_count
= decode_significance_x86(CC
, max_coeff
, significant_coeff_ctx_base
, index
);
5330 DECODE_SIGNIFICANCE( 63, sig_off
[last
], last_coeff_flag_offset_8x8
[last
] );
5332 DECODE_SIGNIFICANCE( max_coeff
- 1, last
, last
);
5335 assert(coeff_count
> 0);
5339 h
->cbp_table
[h
->mb_xy
] |= 0x100;
5341 h
->cbp_table
[h
->mb_xy
] |= 0x40 << n
;
5344 fill_rectangle(&h
->non_zero_count_cache
[scan8
[n
]], 2, 2, 8, coeff_count
, 1);
5346 h
->non_zero_count_cache
[scan8
[16+n
]] = coeff_count
;
5348 assert( cat
== 1 || cat
== 2 );
5349 h
->non_zero_count_cache
[scan8
[n
]] = coeff_count
;
5354 uint8_t *ctx
= coeff_abs_level1_ctx
[node_ctx
] + abs_level_m1_ctx_base
;
5356 int j
= scantable
[index
[--coeff_count
]];
5358 if( get_cabac( CC
, ctx
) == 0 ) {
5359 node_ctx
= coeff_abs_level_transition
[0][node_ctx
];
5361 block
[j
] = get_cabac_bypass_sign( CC
, -1);
5363 block
[j
] = (get_cabac_bypass_sign( CC
, -qmul
[j
]) + 32) >> 6;
5367 ctx
= coeff_abs_levelgt1_ctx
[node_ctx
] + abs_level_m1_ctx_base
;
5368 node_ctx
= coeff_abs_level_transition
[1][node_ctx
];
5370 while( coeff_abs
< 15 && get_cabac( CC
, ctx
) ) {
5374 if( coeff_abs
>= 15 ) {
5376 while( get_cabac_bypass( CC
) ) {
5382 coeff_abs
+= coeff_abs
+ get_cabac_bypass( CC
);
5388 block
[j
] = get_cabac_bypass_sign( CC
, -coeff_abs
);
5390 block
[j
] = (get_cabac_bypass_sign( CC
, -coeff_abs
) * qmul
[j
] + 32) >> 6;
5393 } while( coeff_count
);
5394 #ifdef CABAC_ON_STACK
5395 h
->cabac
.range
= cc
.range
;
5396 h
->cabac
.low
= cc
.low
;
5397 h
->cabac
.bytestream
= cc
.bytestream
;
5402 #ifndef CONFIG_SMALL
5403 static void decode_cabac_residual_dc( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
5404 decode_cabac_residual_internal(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
, 1);
5407 static void decode_cabac_residual_nondc( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
5408 decode_cabac_residual_internal(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
, 0);
5412 static void decode_cabac_residual( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
5414 decode_cabac_residual_internal(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
, cat
== 0 || cat
== 3);
5416 if( cat
== 0 || cat
== 3 ) decode_cabac_residual_dc(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
);
5417 else decode_cabac_residual_nondc(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
);
5421 static inline void compute_mb_neighbors(H264Context
*h
)
5423 MpegEncContext
* const s
= &h
->s
;
5424 const int mb_xy
= h
->mb_xy
;
5425 h
->top_mb_xy
= mb_xy
- s
->mb_stride
;
5426 h
->left_mb_xy
[0] = mb_xy
- 1;
5428 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
5429 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
5430 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
5431 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
5432 const int curr_mb_frame_flag
= !MB_FIELD
;
5433 const int bottom
= (s
->mb_y
& 1);
5435 ? !curr_mb_frame_flag
// bottom macroblock
5436 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
5438 h
->top_mb_xy
-= s
->mb_stride
;
5440 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
5441 h
->left_mb_xy
[0] = pair_xy
- 1;
5443 } else if (FIELD_PICTURE
) {
5444 h
->top_mb_xy
-= s
->mb_stride
;
5450 * decodes a macroblock
5451 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5453 static int decode_mb_cabac(H264Context
*h
) {
5454 MpegEncContext
* const s
= &h
->s
;
5456 int mb_type
, partition_count
, cbp
= 0;
5457 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
5459 mb_xy
= h
->mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5461 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?)
5463 tprintf(s
->avctx
, "pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
5464 if( h
->slice_type_nos
!= FF_I_TYPE
) {
5466 /* a skipped mb needs the aff flag from the following mb */
5467 if( FRAME_MBAFF
&& s
->mb_x
==0 && (s
->mb_y
&1)==0 )
5468 predict_field_decoding_flag(h
);
5469 if( FRAME_MBAFF
&& (s
->mb_y
&1)==1 && h
->prev_mb_skipped
)
5470 skip
= h
->next_mb_skipped
;
5472 skip
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
);
5473 /* read skip flags */
5475 if( FRAME_MBAFF
&& (s
->mb_y
&1)==0 ){
5476 s
->current_picture
.mb_type
[mb_xy
] = MB_TYPE_SKIP
;
5477 h
->next_mb_skipped
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
+1 );
5478 if(h
->next_mb_skipped
)
5479 predict_field_decoding_flag(h
);
5481 h
->mb_mbaff
= h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
5486 h
->cbp_table
[mb_xy
] = 0;
5487 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5488 h
->last_qscale_diff
= 0;
5495 if( (s
->mb_y
&1) == 0 )
5497 h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
5500 h
->prev_mb_skipped
= 0;
5502 compute_mb_neighbors(h
);
5503 if( ( mb_type
= decode_cabac_mb_type( h
) ) < 0 ) {
5504 av_log( h
->s
.avctx
, AV_LOG_ERROR
, "decode_cabac_mb_type failed\n" );
5508 if( h
->slice_type_nos
== FF_B_TYPE
) {
5510 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
5511 mb_type
= b_mb_type_info
[mb_type
].type
;
5514 goto decode_intra_mb
;
5516 } else if( h
->slice_type_nos
== FF_P_TYPE
) {
5518 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
5519 mb_type
= p_mb_type_info
[mb_type
].type
;
5522 goto decode_intra_mb
;
5525 if(h
->slice_type
== FF_SI_TYPE
&& mb_type
)
5527 assert(h
->slice_type_nos
== FF_I_TYPE
);
5529 partition_count
= 0;
5530 cbp
= i_mb_type_info
[mb_type
].cbp
;
5531 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
5532 mb_type
= i_mb_type_info
[mb_type
].type
;
5535 mb_type
|= MB_TYPE_INTERLACED
;
5537 h
->slice_table
[ mb_xy
]= h
->slice_num
;
5539 if(IS_INTRA_PCM(mb_type
)) {
5542 // We assume these blocks are very rare so we do not optimize it.
5543 // FIXME The two following lines get the bitstream position in the cabac
5544 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5545 ptr
= h
->cabac
.bytestream
;
5546 if(h
->cabac
.low
&0x1) ptr
--;
5548 if(h
->cabac
.low
&0x1FF) ptr
--;
5551 // The pixels are stored in the same order as levels in h->mb array.
5552 memcpy(h
->mb
, ptr
, 256); ptr
+=256;
5554 memcpy(h
->mb
+128, ptr
, 128); ptr
+=128;
5557 ff_init_cabac_decoder(&h
->cabac
, ptr
, h
->cabac
.bytestream_end
- ptr
);
5559 // All blocks are present
5560 h
->cbp_table
[mb_xy
] = 0x1ef;
5561 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5562 // In deblocking, the quantizer is 0
5563 s
->current_picture
.qscale_table
[mb_xy
]= 0;
5564 // All coeffs are present
5565 memset(h
->non_zero_count
[mb_xy
], 16, 16);
5566 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5567 h
->last_qscale_diff
= 0;
5572 h
->ref_count
[0] <<= 1;
5573 h
->ref_count
[1] <<= 1;
5576 fill_caches(h
, mb_type
, 0);
5578 if( IS_INTRA( mb_type
) ) {
5580 if( IS_INTRA4x4( mb_type
) ) {
5581 if( dct8x8_allowed
&& decode_cabac_mb_transform_size( h
) ) {
5582 mb_type
|= MB_TYPE_8x8DCT
;
5583 for( i
= 0; i
< 16; i
+=4 ) {
5584 int pred
= pred_intra_mode( h
, i
);
5585 int mode
= decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5586 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
5589 for( i
= 0; i
< 16; i
++ ) {
5590 int pred
= pred_intra_mode( h
, i
);
5591 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5593 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5596 write_back_intra_pred_mode(h
);
5597 if( check_intra4x4_pred_mode(h
) < 0 ) return -1;
5599 h
->intra16x16_pred_mode
= check_intra_pred_mode( h
, h
->intra16x16_pred_mode
);
5600 if( h
->intra16x16_pred_mode
< 0 ) return -1;
5603 h
->chroma_pred_mode_table
[mb_xy
] =
5604 pred_mode
= decode_cabac_mb_chroma_pre_mode( h
);
5606 pred_mode
= check_intra_pred_mode( h
, pred_mode
);
5607 if( pred_mode
< 0 ) return -1;
5608 h
->chroma_pred_mode
= pred_mode
;
5610 } else if( partition_count
== 4 ) {
5611 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
5613 if( h
->slice_type_nos
== FF_B_TYPE
) {
5614 for( i
= 0; i
< 4; i
++ ) {
5615 h
->sub_mb_type
[i
] = decode_cabac_b_mb_sub_type( h
);
5616 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5617 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5619 if( IS_DIRECT(h
->sub_mb_type
[0] | h
->sub_mb_type
[1] |
5620 h
->sub_mb_type
[2] | h
->sub_mb_type
[3]) ) {
5621 pred_direct_motion(h
, &mb_type
);
5622 h
->ref_cache
[0][scan8
[4]] =
5623 h
->ref_cache
[1][scan8
[4]] =
5624 h
->ref_cache
[0][scan8
[12]] =
5625 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
5626 if( h
->ref_count
[0] > 1 || h
->ref_count
[1] > 1 ) {
5627 for( i
= 0; i
< 4; i
++ )
5628 if( IS_DIRECT(h
->sub_mb_type
[i
]) )
5629 fill_rectangle( &h
->direct_cache
[scan8
[4*i
]], 2, 2, 8, 1, 1 );
5633 for( i
= 0; i
< 4; i
++ ) {
5634 h
->sub_mb_type
[i
] = decode_cabac_p_mb_sub_type( h
);
5635 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5636 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5640 for( list
= 0; list
< h
->list_count
; list
++ ) {
5641 for( i
= 0; i
< 4; i
++ ) {
5642 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
5643 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
5644 if( h
->ref_count
[list
] > 1 )
5645 ref
[list
][i
] = decode_cabac_mb_ref( h
, list
, 4*i
);
5651 h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
5652 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
5657 dct8x8_allowed
= get_dct8x8_allowed(h
);
5659 for(list
=0; list
<h
->list_count
; list
++){
5661 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
5662 if(IS_DIRECT(h
->sub_mb_type
[i
])){
5663 fill_rectangle(h
->mvd_cache
[list
][scan8
[4*i
]], 2, 2, 8, 0, 4);
5667 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
) && !IS_DIRECT(h
->sub_mb_type
[i
])){
5668 const int sub_mb_type
= h
->sub_mb_type
[i
];
5669 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
5670 for(j
=0; j
<sub_partition_count
[i
]; j
++){
5673 const int index
= 4*i
+ block_width
*j
;
5674 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
5675 int16_t (* mvd_cache
)[2]= &h
->mvd_cache
[list
][ scan8
[index
] ];
5676 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mpx
, &mpy
);
5678 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, index
, 0 );
5679 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, index
, 1 );
5680 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5682 if(IS_SUB_8X8(sub_mb_type
)){
5684 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
5686 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
5689 mvd_cache
[ 8 ][0]= mvd_cache
[ 9 ][0]= mx
- mpx
;
5691 mvd_cache
[ 8 ][1]= mvd_cache
[ 9 ][1]= my
- mpy
;
5692 }else if(IS_SUB_8X4(sub_mb_type
)){
5693 mv_cache
[ 1 ][0]= mx
;
5694 mv_cache
[ 1 ][1]= my
;
5696 mvd_cache
[ 1 ][0]= mx
- mpx
;
5697 mvd_cache
[ 1 ][1]= my
- mpy
;
5698 }else if(IS_SUB_4X8(sub_mb_type
)){
5699 mv_cache
[ 8 ][0]= mx
;
5700 mv_cache
[ 8 ][1]= my
;
5702 mvd_cache
[ 8 ][0]= mx
- mpx
;
5703 mvd_cache
[ 8 ][1]= my
- mpy
;
5705 mv_cache
[ 0 ][0]= mx
;
5706 mv_cache
[ 0 ][1]= my
;
5708 mvd_cache
[ 0 ][0]= mx
- mpx
;
5709 mvd_cache
[ 0 ][1]= my
- mpy
;
5712 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
5713 uint32_t *pd
= (uint32_t *)&h
->mvd_cache
[list
][ scan8
[4*i
] ][0];
5714 p
[0] = p
[1] = p
[8] = p
[9] = 0;
5715 pd
[0]= pd
[1]= pd
[8]= pd
[9]= 0;
5719 } else if( IS_DIRECT(mb_type
) ) {
5720 pred_direct_motion(h
, &mb_type
);
5721 fill_rectangle(h
->mvd_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
5722 fill_rectangle(h
->mvd_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
5723 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
5725 int list
, mx
, my
, i
, mpx
, mpy
;
5726 if(IS_16X16(mb_type
)){
5727 for(list
=0; list
<h
->list_count
; list
++){
5728 if(IS_DIR(mb_type
, 0, list
)){
5729 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 0 ) : 0;
5730 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, ref
, 1);
5732 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED
, 1); //FIXME factorize and the other fill_rect below too
5734 for(list
=0; list
<h
->list_count
; list
++){
5735 if(IS_DIR(mb_type
, 0, list
)){
5736 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mpx
, &mpy
);
5738 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 0, 0 );
5739 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 0, 1 );
5740 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5742 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5743 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
,my
), 4);
5745 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, 0, 4);
5748 else if(IS_16X8(mb_type
)){
5749 for(list
=0; list
<h
->list_count
; list
++){
5751 if(IS_DIR(mb_type
, i
, list
)){
5752 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 8*i
) : 0;
5753 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, ref
, 1);
5755 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, (LIST_NOT_USED
&0xFF), 1);
5758 for(list
=0; list
<h
->list_count
; list
++){
5760 if(IS_DIR(mb_type
, i
, list
)){
5761 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mpx
, &mpy
);
5762 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 0 );
5763 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 1 );
5764 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5766 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5767 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
,my
), 4);
5769 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5770 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5775 assert(IS_8X16(mb_type
));
5776 for(list
=0; list
<h
->list_count
; list
++){
5778 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
5779 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 4*i
) : 0;
5780 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, ref
, 1);
5782 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, (LIST_NOT_USED
&0xFF), 1);
5785 for(list
=0; list
<h
->list_count
; list
++){
5787 if(IS_DIR(mb_type
, i
, list
)){
5788 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mpx
, &mpy
);
5789 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 0 );
5790 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 1 );
5792 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5793 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5794 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
,my
), 4);
5796 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5797 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5804 if( IS_INTER( mb_type
) ) {
5805 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5806 write_back_motion( h
, mb_type
);
5809 if( !IS_INTRA16x16( mb_type
) ) {
5810 cbp
= decode_cabac_mb_cbp_luma( h
);
5812 cbp
|= decode_cabac_mb_cbp_chroma( h
) << 4;
5815 h
->cbp_table
[mb_xy
] = h
->cbp
= cbp
;
5817 if( dct8x8_allowed
&& (cbp
&15) && !IS_INTRA( mb_type
) ) {
5818 if( decode_cabac_mb_transform_size( h
) )
5819 mb_type
|= MB_TYPE_8x8DCT
;
5821 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5823 if( cbp
|| IS_INTRA16x16( mb_type
) ) {
5824 const uint8_t *scan
, *scan8x8
, *dc_scan
;
5825 const uint32_t *qmul
;
5828 if(IS_INTERLACED(mb_type
)){
5829 scan8x8
= s
->qscale
? h
->field_scan8x8
: h
->field_scan8x8_q0
;
5830 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
5831 dc_scan
= luma_dc_field_scan
;
5833 scan8x8
= s
->qscale
? h
->zigzag_scan8x8
: h
->zigzag_scan8x8_q0
;
5834 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
5835 dc_scan
= luma_dc_zigzag_scan
;
5838 h
->last_qscale_diff
= dqp
= decode_cabac_mb_dqp( h
);
5839 if( dqp
== INT_MIN
){
5840 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cabac decode of qscale diff failed at %d %d\n", s
->mb_x
, s
->mb_y
);
5844 if(((unsigned)s
->qscale
) > 51){
5845 if(s
->qscale
<0) s
->qscale
+= 52;
5846 else s
->qscale
-= 52;
5848 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
5849 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
5851 if( IS_INTRA16x16( mb_type
) ) {
5853 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5854 decode_cabac_residual( h
, h
->mb
, 0, 0, dc_scan
, NULL
, 16);
5857 qmul
= h
->dequant4_coeff
[0][s
->qscale
];
5858 for( i
= 0; i
< 16; i
++ ) {
5859 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5860 decode_cabac_residual(h
, h
->mb
+ 16*i
, 1, i
, scan
+ 1, qmul
, 15);
5863 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
5867 for( i8x8
= 0; i8x8
< 4; i8x8
++ ) {
5868 if( cbp
& (1<<i8x8
) ) {
5869 if( IS_8x8DCT(mb_type
) ) {
5870 decode_cabac_residual(h
, h
->mb
+ 64*i8x8
, 5, 4*i8x8
,
5871 scan8x8
, h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 64);
5873 qmul
= h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
];
5874 for( i4x4
= 0; i4x4
< 4; i4x4
++ ) {
5875 const int index
= 4*i8x8
+ i4x4
;
5876 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5878 decode_cabac_residual(h
, h
->mb
+ 16*index
, 2, index
, scan
, qmul
, 16);
5879 //STOP_TIMER("decode_residual")
5883 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
5884 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
5891 for( c
= 0; c
< 2; c
++ ) {
5892 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5893 decode_cabac_residual(h
, h
->mb
+ 256 + 16*4*c
, 3, c
, chroma_dc_scan
, NULL
, 4);
5899 for( c
= 0; c
< 2; c
++ ) {
5900 qmul
= h
->dequant4_coeff
[c
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
[c
]];
5901 for( i
= 0; i
< 4; i
++ ) {
5902 const int index
= 16 + 4 * c
+ i
;
5903 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5904 decode_cabac_residual(h
, h
->mb
+ 16*index
, 4, index
- 16, scan
+ 1, qmul
, 15);
5908 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
5909 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
5910 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
5913 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
5914 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
5915 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
5916 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
5917 h
->last_qscale_diff
= 0;
5920 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
5921 write_back_non_zero_count(h
);
5924 h
->ref_count
[0] >>= 1;
5925 h
->ref_count
[1] >>= 1;
5932 static void filter_mb_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
5934 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
5935 const int alpha
= (alpha_table
+52)[index_a
];
5936 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
5941 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] : -1;
5942 h
->s
.dsp
.h264_h_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
5944 /* 16px edge length, because bS=4 is triggered by being at
5945 * the edge of an intra MB, so all 4 bS are the same */
5946 for( d
= 0; d
< 16; d
++ ) {
5947 const int p0
= pix
[-1];
5948 const int p1
= pix
[-2];
5949 const int p2
= pix
[-3];
5951 const int q0
= pix
[0];
5952 const int q1
= pix
[1];
5953 const int q2
= pix
[2];
5955 if( FFABS( p0
- q0
) < alpha
&&
5956 FFABS( p1
- p0
) < beta
&&
5957 FFABS( q1
- q0
) < beta
) {
5959 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
5960 if( FFABS( p2
- p0
) < beta
)
5962 const int p3
= pix
[-4];
5964 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
5965 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
5966 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
5969 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
5971 if( FFABS( q2
- q0
) < beta
)
5973 const int q3
= pix
[3];
5975 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
5976 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
5977 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
5980 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
5984 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
5985 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
5987 tprintf(h
->s
.avctx
, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2], pix
[-1], pix
[0], pix
[1]);
5993 static void filter_mb_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
5995 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
5996 const int alpha
= (alpha_table
+52)[index_a
];
5997 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6002 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] + 1 : 0;
6003 h
->s
.dsp
.h264_h_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6005 h
->s
.dsp
.h264_h_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6009 static void filter_mb_mbaff_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[8], int qp
[2] ) {
6011 for( i
= 0; i
< 16; i
++, pix
+= stride
) {
6017 int bS_index
= (i
>> 1);
6020 bS_index
|= (i
& 1);
6023 if( bS
[bS_index
] == 0 ) {
6027 qp_index
= MB_FIELD
? (i
>> 3) : (i
& 1);
6028 index_a
= qp
[qp_index
] + h
->slice_alpha_c0_offset
;
6029 alpha
= (alpha_table
+52)[index_a
];
6030 beta
= (beta_table
+52)[qp
[qp_index
] + h
->slice_beta_offset
];
6032 if( bS
[bS_index
] < 4 ) {
6033 const int tc0
= (tc0_table
+52)[index_a
][bS
[bS_index
] - 1];
6034 const int p0
= pix
[-1];
6035 const int p1
= pix
[-2];
6036 const int p2
= pix
[-3];
6037 const int q0
= pix
[0];
6038 const int q1
= pix
[1];
6039 const int q2
= pix
[2];
6041 if( FFABS( p0
- q0
) < alpha
&&
6042 FFABS( p1
- p0
) < beta
&&
6043 FFABS( q1
- q0
) < beta
) {
6047 if( FFABS( p2
- p0
) < beta
) {
6048 pix
[-2] = p1
+ av_clip( ( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( p1
<< 1 ) ) >> 1, -tc0
, tc0
);
6051 if( FFABS( q2
- q0
) < beta
) {
6052 pix
[1] = q1
+ av_clip( ( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( q1
<< 1 ) ) >> 1, -tc0
, tc0
);
6056 i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6057 pix
[-1] = av_clip_uint8( p0
+ i_delta
); /* p0' */
6058 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
6059 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6062 const int p0
= pix
[-1];
6063 const int p1
= pix
[-2];
6064 const int p2
= pix
[-3];
6066 const int q0
= pix
[0];
6067 const int q1
= pix
[1];
6068 const int q2
= pix
[2];
6070 if( FFABS( p0
- q0
) < alpha
&&
6071 FFABS( p1
- p0
) < beta
&&
6072 FFABS( q1
- q0
) < beta
) {
6074 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6075 if( FFABS( p2
- p0
) < beta
)
6077 const int p3
= pix
[-4];
6079 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6080 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6081 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6084 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6086 if( FFABS( q2
- q0
) < beta
)
6088 const int q3
= pix
[3];
6090 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6091 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6092 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6095 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6099 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6100 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6102 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6107 static void filter_mb_mbaff_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[8], int qp
[2] ) {
6109 for( i
= 0; i
< 8; i
++, pix
+= stride
) {
6117 if( bS
[bS_index
] == 0 ) {
6121 qp_index
= MB_FIELD
? (i
>> 2) : (i
& 1);
6122 index_a
= qp
[qp_index
] + h
->slice_alpha_c0_offset
;
6123 alpha
= (alpha_table
+52)[index_a
];
6124 beta
= (beta_table
+52)[qp
[qp_index
] + h
->slice_beta_offset
];
6126 if( bS
[bS_index
] < 4 ) {
6127 const int tc
= (tc0_table
+52)[index_a
][bS
[bS_index
] - 1] + 1;
6128 const int p0
= pix
[-1];
6129 const int p1
= pix
[-2];
6130 const int q0
= pix
[0];
6131 const int q1
= pix
[1];
6133 if( FFABS( p0
- q0
) < alpha
&&
6134 FFABS( p1
- p0
) < beta
&&
6135 FFABS( q1
- q0
) < beta
) {
6136 const int i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6138 pix
[-1] = av_clip_uint8( p0
+ i_delta
); /* p0' */
6139 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
6140 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6143 const int p0
= pix
[-1];
6144 const int p1
= pix
[-2];
6145 const int q0
= pix
[0];
6146 const int q1
= pix
[1];
6148 if( FFABS( p0
- q0
) < alpha
&&
6149 FFABS( p1
- p0
) < beta
&&
6150 FFABS( q1
- q0
) < beta
) {
6152 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2; /* p0' */
6153 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2; /* q0' */
6154 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, pix
[-3], p1
, p0
, q0
, q1
, pix
[2], pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6160 static void filter_mb_edgeh( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6162 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6163 const int alpha
= (alpha_table
+52)[index_a
];
6164 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6165 const int pix_next
= stride
;
6170 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] : -1;
6171 h
->s
.dsp
.h264_v_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6173 /* 16px edge length, see filter_mb_edgev */
6174 for( d
= 0; d
< 16; d
++ ) {
6175 const int p0
= pix
[-1*pix_next
];
6176 const int p1
= pix
[-2*pix_next
];
6177 const int p2
= pix
[-3*pix_next
];
6178 const int q0
= pix
[0];
6179 const int q1
= pix
[1*pix_next
];
6180 const int q2
= pix
[2*pix_next
];
6182 if( FFABS( p0
- q0
) < alpha
&&
6183 FFABS( p1
- p0
) < beta
&&
6184 FFABS( q1
- q0
) < beta
) {
6186 const int p3
= pix
[-4*pix_next
];
6187 const int q3
= pix
[ 3*pix_next
];
6189 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6190 if( FFABS( p2
- p0
) < beta
) {
6192 pix
[-1*pix_next
] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6193 pix
[-2*pix_next
] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6194 pix
[-3*pix_next
] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6197 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6199 if( FFABS( q2
- q0
) < beta
) {
6201 pix
[0*pix_next
] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6202 pix
[1*pix_next
] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6203 pix
[2*pix_next
] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6206 pix
[0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6210 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6211 pix
[ 0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6213 tprintf(h
->s
.avctx
, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, qp
, index_a
, alpha
, beta
, bS
[i
], p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2*pix_next
], pix
[-pix_next
], pix
[0], pix
[pix_next
]);
6220 static void filter_mb_edgech( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6222 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6223 const int alpha
= (alpha_table
+52)[index_a
];
6224 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6229 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] + 1 : 0;
6230 h
->s
.dsp
.h264_v_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6232 h
->s
.dsp
.h264_v_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6236 static void filter_mb_fast( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
6237 MpegEncContext
* const s
= &h
->s
;
6238 int mb_y_firstrow
= s
->picture_structure
== PICT_BOTTOM_FIELD
;
6240 int qp
, qp0
, qp1
, qpc
, qpc0
, qpc1
, qp_thresh
;
6244 if(mb_x
==0 || mb_y
==mb_y_firstrow
|| !s
->dsp
.h264_loop_filter_strength
|| h
->pps
.chroma_qp_diff
||
6246 (h
->deblocking_filter
== 2 && (h
->slice_table
[mb_xy
] != h
->slice_table
[h
->top_mb_xy
] ||
6247 h
->slice_table
[mb_xy
] != h
->slice_table
[mb_xy
- 1]))) {
6248 filter_mb(h
, mb_x
, mb_y
, img_y
, img_cb
, img_cr
, linesize
, uvlinesize
);
6251 assert(!FRAME_MBAFF
);
6253 mb_type
= s
->current_picture
.mb_type
[mb_xy
];
6254 qp
= s
->current_picture
.qscale_table
[mb_xy
];
6255 qp0
= s
->current_picture
.qscale_table
[mb_xy
-1];
6256 qp1
= s
->current_picture
.qscale_table
[h
->top_mb_xy
];
6257 qpc
= get_chroma_qp( h
, 0, qp
);
6258 qpc0
= get_chroma_qp( h
, 0, qp0
);
6259 qpc1
= get_chroma_qp( h
, 0, qp1
);
6260 qp0
= (qp
+ qp0
+ 1) >> 1;
6261 qp1
= (qp
+ qp1
+ 1) >> 1;
6262 qpc0
= (qpc
+ qpc0
+ 1) >> 1;
6263 qpc1
= (qpc
+ qpc1
+ 1) >> 1;
6264 qp_thresh
= 15 - h
->slice_alpha_c0_offset
;
6265 if(qp
<= qp_thresh
&& qp0
<= qp_thresh
&& qp1
<= qp_thresh
&&
6266 qpc
<= qp_thresh
&& qpc0
<= qp_thresh
&& qpc1
<= qp_thresh
)
6269 if( IS_INTRA(mb_type
) ) {
6270 int16_t bS4
[4] = {4,4,4,4};
6271 int16_t bS3
[4] = {3,3,3,3};
6272 int16_t *bSH
= FIELD_PICTURE
? bS3
: bS4
;
6273 if( IS_8x8DCT(mb_type
) ) {
6274 filter_mb_edgev( h
, &img_y
[4*0], linesize
, bS4
, qp0
);
6275 filter_mb_edgev( h
, &img_y
[4*2], linesize
, bS3
, qp
);
6276 filter_mb_edgeh( h
, &img_y
[4*0*linesize
], linesize
, bSH
, qp1
);
6277 filter_mb_edgeh( h
, &img_y
[4*2*linesize
], linesize
, bS3
, qp
);
6279 filter_mb_edgev( h
, &img_y
[4*0], linesize
, bS4
, qp0
);
6280 filter_mb_edgev( h
, &img_y
[4*1], linesize
, bS3
, qp
);
6281 filter_mb_edgev( h
, &img_y
[4*2], linesize
, bS3
, qp
);
6282 filter_mb_edgev( h
, &img_y
[4*3], linesize
, bS3
, qp
);
6283 filter_mb_edgeh( h
, &img_y
[4*0*linesize
], linesize
, bSH
, qp1
);
6284 filter_mb_edgeh( h
, &img_y
[4*1*linesize
], linesize
, bS3
, qp
);
6285 filter_mb_edgeh( h
, &img_y
[4*2*linesize
], linesize
, bS3
, qp
);
6286 filter_mb_edgeh( h
, &img_y
[4*3*linesize
], linesize
, bS3
, qp
);
6288 filter_mb_edgecv( h
, &img_cb
[2*0], uvlinesize
, bS4
, qpc0
);
6289 filter_mb_edgecv( h
, &img_cb
[2*2], uvlinesize
, bS3
, qpc
);
6290 filter_mb_edgecv( h
, &img_cr
[2*0], uvlinesize
, bS4
, qpc0
);
6291 filter_mb_edgecv( h
, &img_cr
[2*2], uvlinesize
, bS3
, qpc
);
6292 filter_mb_edgech( h
, &img_cb
[2*0*uvlinesize
], uvlinesize
, bSH
, qpc1
);
6293 filter_mb_edgech( h
, &img_cb
[2*2*uvlinesize
], uvlinesize
, bS3
, qpc
);
6294 filter_mb_edgech( h
, &img_cr
[2*0*uvlinesize
], uvlinesize
, bSH
, qpc1
);
6295 filter_mb_edgech( h
, &img_cr
[2*2*uvlinesize
], uvlinesize
, bS3
, qpc
);
6298 DECLARE_ALIGNED_8(int16_t, bS
[2][4][4]);
6299 uint64_t (*bSv
)[4] = (uint64_t(*)[4])bS
;
6301 if( IS_8x8DCT(mb_type
) && (h
->cbp
&7) == 7 ) {
6303 bSv
[0][0] = bSv
[0][2] = bSv
[1][0] = bSv
[1][2] = 0x0002000200020002ULL
;
6305 int mask_edge1
= (mb_type
& (MB_TYPE_16x16
| MB_TYPE_8x16
)) ? 3 :
6306 (mb_type
& MB_TYPE_16x8
) ? 1 : 0;
6307 int mask_edge0
= (mb_type
& (MB_TYPE_16x16
| MB_TYPE_8x16
))
6308 && (s
->current_picture
.mb_type
[mb_xy
-1] & (MB_TYPE_16x16
| MB_TYPE_8x16
))
6310 int step
= IS_8x8DCT(mb_type
) ? 2 : 1;
6311 edges
= (mb_type
& MB_TYPE_16x16
) && !(h
->cbp
& 15) ? 1 : 4;
6312 s
->dsp
.h264_loop_filter_strength( bS
, h
->non_zero_count_cache
, h
->ref_cache
, h
->mv_cache
,
6313 (h
->slice_type_nos
== FF_B_TYPE
), edges
, step
, mask_edge0
, mask_edge1
, FIELD_PICTURE
);
6315 if( IS_INTRA(s
->current_picture
.mb_type
[mb_xy
-1]) )
6316 bSv
[0][0] = 0x0004000400040004ULL
;
6317 if( IS_INTRA(s
->current_picture
.mb_type
[h
->top_mb_xy
]) )
6318 bSv
[1][0] = FIELD_PICTURE
? 0x0003000300030003ULL
: 0x0004000400040004ULL
;
6320 #define FILTER(hv,dir,edge)\
6321 if(bSv[dir][edge]) {\
6322 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6324 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6325 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6331 } else if( IS_8x8DCT(mb_type
) ) {
6350 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
6351 MpegEncContext
* const s
= &h
->s
;
6352 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
6353 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
6354 const int mvy_limit
= IS_INTERLACED(mb_type
) ? 2 : 4;
6355 int first_vertical_edge_done
= 0;
6358 //for sufficiently low qp, filtering wouldn't do anything
6359 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6361 int qp_thresh
= 15 - h
->slice_alpha_c0_offset
- FFMAX3(0, h
->pps
.chroma_qp_index_offset
[0], h
->pps
.chroma_qp_index_offset
[1]);
6362 int qp
= s
->current_picture
.qscale_table
[mb_xy
];
6364 && (mb_x
== 0 || ((qp
+ s
->current_picture
.qscale_table
[mb_xy
-1] + 1)>>1) <= qp_thresh
)
6365 && (mb_y
== 0 || ((qp
+ s
->current_picture
.qscale_table
[h
->top_mb_xy
] + 1)>>1) <= qp_thresh
)){
6370 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6371 if(!h
->pps
.cabac
&& h
->pps
.transform_8x8_mode
){
6372 int top_type
, left_type
[2];
6373 top_type
= s
->current_picture
.mb_type
[h
->top_mb_xy
] ;
6374 left_type
[0] = s
->current_picture
.mb_type
[h
->left_mb_xy
[0]];
6375 left_type
[1] = s
->current_picture
.mb_type
[h
->left_mb_xy
[1]];
6377 if(IS_8x8DCT(top_type
)){
6378 h
->non_zero_count_cache
[4+8*0]=
6379 h
->non_zero_count_cache
[5+8*0]= h
->cbp_table
[h
->top_mb_xy
] & 4;
6380 h
->non_zero_count_cache
[6+8*0]=
6381 h
->non_zero_count_cache
[7+8*0]= h
->cbp_table
[h
->top_mb_xy
] & 8;
6383 if(IS_8x8DCT(left_type
[0])){
6384 h
->non_zero_count_cache
[3+8*1]=
6385 h
->non_zero_count_cache
[3+8*2]= h
->cbp_table
[h
->left_mb_xy
[0]]&2; //FIXME check MBAFF
6387 if(IS_8x8DCT(left_type
[1])){
6388 h
->non_zero_count_cache
[3+8*3]=
6389 h
->non_zero_count_cache
[3+8*4]= h
->cbp_table
[h
->left_mb_xy
[1]]&8; //FIXME check MBAFF
6392 if(IS_8x8DCT(mb_type
)){
6393 h
->non_zero_count_cache
[scan8
[0 ]]= h
->non_zero_count_cache
[scan8
[1 ]]=
6394 h
->non_zero_count_cache
[scan8
[2 ]]= h
->non_zero_count_cache
[scan8
[3 ]]= h
->cbp_table
[mb_xy
] & 1;
6396 h
->non_zero_count_cache
[scan8
[0+ 4]]= h
->non_zero_count_cache
[scan8
[1+ 4]]=
6397 h
->non_zero_count_cache
[scan8
[2+ 4]]= h
->non_zero_count_cache
[scan8
[3+ 4]]= h
->cbp_table
[mb_xy
] & 2;
6399 h
->non_zero_count_cache
[scan8
[0+ 8]]= h
->non_zero_count_cache
[scan8
[1+ 8]]=
6400 h
->non_zero_count_cache
[scan8
[2+ 8]]= h
->non_zero_count_cache
[scan8
[3+ 8]]= h
->cbp_table
[mb_xy
] & 4;
6402 h
->non_zero_count_cache
[scan8
[0+12]]= h
->non_zero_count_cache
[scan8
[1+12]]=
6403 h
->non_zero_count_cache
[scan8
[2+12]]= h
->non_zero_count_cache
[scan8
[3+12]]= h
->cbp_table
[mb_xy
] & 8;
6408 // left mb is in picture
6409 && h
->slice_table
[mb_xy
-1] != 0xFFFF
6410 // and current and left pair do not have the same interlaced type
6411 && (IS_INTERLACED(mb_type
) != IS_INTERLACED(s
->current_picture
.mb_type
[mb_xy
-1]))
6412 // and left mb is in the same slice if deblocking_filter == 2
6413 && (h
->deblocking_filter
!=2 || h
->slice_table
[mb_xy
-1] == h
->slice_table
[mb_xy
])) {
6414 /* First vertical edge is different in MBAFF frames
6415 * There are 8 different bS to compute and 2 different Qp
6417 const int pair_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
6418 const int left_mb_xy
[2] = { pair_xy
-1, pair_xy
-1+s
->mb_stride
};
6423 int mb_qp
, mbn0_qp
, mbn1_qp
;
6425 first_vertical_edge_done
= 1;
6427 if( IS_INTRA(mb_type
) )
6428 bS
[0] = bS
[1] = bS
[2] = bS
[3] = bS
[4] = bS
[5] = bS
[6] = bS
[7] = 4;
6430 for( i
= 0; i
< 8; i
++ ) {
6431 int mbn_xy
= MB_FIELD
? left_mb_xy
[i
>>2] : left_mb_xy
[i
&1];
6433 if( IS_INTRA( s
->current_picture
.mb_type
[mbn_xy
] ) )
6435 else if( h
->non_zero_count_cache
[12+8*(i
>>1)] != 0 ||
6436 ((!h
->pps
.cabac
&& IS_8x8DCT(s
->current_picture
.mb_type
[mbn_xy
])) ?
6437 (h
->cbp_table
[mbn_xy
] & ((MB_FIELD
? (i
&2) : (mb_y
&1)) ? 8 : 2))
6439 h
->non_zero_count
[mbn_xy
][MB_FIELD
? i
&3 : (i
>>2)+(mb_y
&1)*2]))
6446 mb_qp
= s
->current_picture
.qscale_table
[mb_xy
];
6447 mbn0_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[0]];
6448 mbn1_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[1]];
6449 qp
[0] = ( mb_qp
+ mbn0_qp
+ 1 ) >> 1;
6450 bqp
[0] = ( get_chroma_qp( h
, 0, mb_qp
) +
6451 get_chroma_qp( h
, 0, mbn0_qp
) + 1 ) >> 1;
6452 rqp
[0] = ( get_chroma_qp( h
, 1, mb_qp
) +
6453 get_chroma_qp( h
, 1, mbn0_qp
) + 1 ) >> 1;
6454 qp
[1] = ( mb_qp
+ mbn1_qp
+ 1 ) >> 1;
6455 bqp
[1] = ( get_chroma_qp( h
, 0, mb_qp
) +
6456 get_chroma_qp( h
, 0, mbn1_qp
) + 1 ) >> 1;
6457 rqp
[1] = ( get_chroma_qp( h
, 1, mb_qp
) +
6458 get_chroma_qp( h
, 1, mbn1_qp
) + 1 ) >> 1;
6461 tprintf(s
->avctx
, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x
, mb_y
, qp
[0], qp
[1], bqp
[0], bqp
[1], rqp
[0], rqp
[1], linesize
, uvlinesize
);
6462 { int i
; for (i
= 0; i
< 8; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6463 filter_mb_mbaff_edgev ( h
, &img_y
[0], linesize
, bS
, qp
);
6464 filter_mb_mbaff_edgecv( h
, &img_cb
[0], uvlinesize
, bS
, bqp
);
6465 filter_mb_mbaff_edgecv( h
, &img_cr
[0], uvlinesize
, bS
, rqp
);
6467 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6468 for( dir
= 0; dir
< 2; dir
++ )
6471 const int mbm_xy
= dir
== 0 ? mb_xy
-1 : h
->top_mb_xy
;
6472 const int mbm_type
= s
->current_picture
.mb_type
[mbm_xy
];
6473 int (*ref2frm
) [64] = h
->ref2frm
[ h
->slice_num
&(MAX_SLICES
-1) ][0] + (MB_MBAFF
? 20 : 2);
6474 int (*ref2frmm
)[64] = h
->ref2frm
[ h
->slice_table
[mbm_xy
]&(MAX_SLICES
-1) ][0] + (MB_MBAFF
? 20 : 2);
6475 int start
= h
->slice_table
[mbm_xy
] == 0xFFFF ? 1 : 0;
6477 const int edges
= (mb_type
& (MB_TYPE_16x16
|MB_TYPE_SKIP
))
6478 == (MB_TYPE_16x16
|MB_TYPE_SKIP
) ? 1 : 4;
6479 // how often to recheck mv-based bS when iterating between edges
6480 const int mask_edge
= (mb_type
& (MB_TYPE_16x16
| (MB_TYPE_16x8
<< dir
))) ? 3 :
6481 (mb_type
& (MB_TYPE_8x16
>> dir
)) ? 1 : 0;
6482 // how often to recheck mv-based bS when iterating along each edge
6483 const int mask_par0
= mb_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
));
6485 if (first_vertical_edge_done
) {
6487 first_vertical_edge_done
= 0;
6490 if (h
->deblocking_filter
==2 && h
->slice_table
[mbm_xy
] != h
->slice_table
[mb_xy
])
6493 if (FRAME_MBAFF
&& (dir
== 1) && ((mb_y
&1) == 0) && start
== 0
6494 && !IS_INTERLACED(mb_type
)
6495 && IS_INTERLACED(mbm_type
)
6497 // This is a special case in the norm where the filtering must
6498 // be done twice (one each of the field) even if we are in a
6499 // frame macroblock.
6501 static const int nnz_idx
[4] = {4,5,6,3};
6502 unsigned int tmp_linesize
= 2 * linesize
;
6503 unsigned int tmp_uvlinesize
= 2 * uvlinesize
;
6504 int mbn_xy
= mb_xy
- 2 * s
->mb_stride
;
6509 for(j
=0; j
<2; j
++, mbn_xy
+= s
->mb_stride
){
6510 if( IS_INTRA(mb_type
) ||
6511 IS_INTRA(s
->current_picture
.mb_type
[mbn_xy
]) ) {
6512 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 3;
6514 const uint8_t *mbn_nnz
= h
->non_zero_count
[mbn_xy
];
6515 for( i
= 0; i
< 4; i
++ ) {
6516 if( h
->non_zero_count_cache
[scan8
[0]+i
] != 0 ||
6517 mbn_nnz
[nnz_idx
[i
]] != 0 )
6523 // Do not use s->qscale as luma quantizer because it has not the same
6524 // value in IPCM macroblocks.
6525 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6526 tprintf(s
->avctx
, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, tmp_linesize
, tmp_uvlinesize
);
6527 { int i
; for (i
= 0; i
< 4; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6528 filter_mb_edgeh( h
, &img_y
[j
*linesize
], tmp_linesize
, bS
, qp
);
6529 filter_mb_edgech( h
, &img_cb
[j
*uvlinesize
], tmp_uvlinesize
, bS
,
6530 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6531 filter_mb_edgech( h
, &img_cr
[j
*uvlinesize
], tmp_uvlinesize
, bS
,
6532 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6539 for( edge
= start
; edge
< edges
; edge
++ ) {
6540 /* mbn_xy: neighbor macroblock */
6541 const int mbn_xy
= edge
> 0 ? mb_xy
: mbm_xy
;
6542 const int mbn_type
= s
->current_picture
.mb_type
[mbn_xy
];
6543 int (*ref2frmn
)[64] = edge
> 0 ? ref2frm
: ref2frmm
;
6547 if( (edge
&1) && IS_8x8DCT(mb_type
) )
6550 if( IS_INTRA(mb_type
) ||
6551 IS_INTRA(mbn_type
) ) {
6554 if ( (!IS_INTERLACED(mb_type
) && !IS_INTERLACED(mbm_type
))
6555 || ((FRAME_MBAFF
|| (s
->picture_structure
!= PICT_FRAME
)) && (dir
== 0))
6564 bS
[0] = bS
[1] = bS
[2] = bS
[3] = value
;
6569 if( edge
& mask_edge
) {
6570 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 0;
6573 else if( FRAME_MBAFF
&& IS_INTERLACED(mb_type
^ mbn_type
)) {
6574 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 1;
6577 else if( mask_par0
&& (edge
|| (mbn_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
)))) ) {
6578 int b_idx
= 8 + 4 + edge
* (dir
? 8:1);
6579 int bn_idx
= b_idx
- (dir
? 8:1);
6582 for( l
= 0; !v
&& l
< 1 + (h
->slice_type_nos
== FF_B_TYPE
); l
++ ) {
6583 v
|= ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[l
][h
->ref_cache
[l
][bn_idx
]] ||
6584 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6585 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
;
6588 if(h
->slice_type_nos
== FF_B_TYPE
&& v
){
6590 for( l
= 0; !v
&& l
< 2; l
++ ) {
6592 v
|= ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[ln
][h
->ref_cache
[ln
][bn_idx
]] ||
6593 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[ln
][bn_idx
][0] ) >= 4 ||
6594 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[ln
][bn_idx
][1] ) >= mvy_limit
;
6598 bS
[0] = bS
[1] = bS
[2] = bS
[3] = v
;
6604 for( i
= 0; i
< 4; i
++ ) {
6605 int x
= dir
== 0 ? edge
: i
;
6606 int y
= dir
== 0 ? i
: edge
;
6607 int b_idx
= 8 + 4 + x
+ 8*y
;
6608 int bn_idx
= b_idx
- (dir
? 8:1);
6610 if( h
->non_zero_count_cache
[b_idx
] != 0 ||
6611 h
->non_zero_count_cache
[bn_idx
] != 0 ) {
6617 for( l
= 0; l
< 1 + (h
->slice_type_nos
== FF_B_TYPE
); l
++ ) {
6618 if( ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[l
][h
->ref_cache
[l
][bn_idx
]] ||
6619 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6620 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
) {
6626 if(h
->slice_type_nos
== FF_B_TYPE
&& bS
[i
]){
6628 for( l
= 0; l
< 2; l
++ ) {
6630 if( ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[ln
][h
->ref_cache
[ln
][bn_idx
]] ||
6631 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[ln
][bn_idx
][0] ) >= 4 ||
6632 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[ln
][bn_idx
][1] ) >= mvy_limit
) {
6641 if(bS
[0]+bS
[1]+bS
[2]+bS
[3] == 0)
6646 // Do not use s->qscale as luma quantizer because it has not the same
6647 // value in IPCM macroblocks.
6648 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6649 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6650 tprintf(s
->avctx
, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, linesize
, uvlinesize
);
6651 { int i
; for (i
= 0; i
< 4; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6653 filter_mb_edgev( h
, &img_y
[4*edge
], linesize
, bS
, qp
);
6654 if( (edge
&1) == 0 ) {
6655 filter_mb_edgecv( h
, &img_cb
[2*edge
], uvlinesize
, bS
,
6656 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6657 filter_mb_edgecv( h
, &img_cr
[2*edge
], uvlinesize
, bS
,
6658 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6661 filter_mb_edgeh( h
, &img_y
[4*edge
*linesize
], linesize
, bS
, qp
);
6662 if( (edge
&1) == 0 ) {
6663 filter_mb_edgech( h
, &img_cb
[2*edge
*uvlinesize
], uvlinesize
, bS
,
6664 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6665 filter_mb_edgech( h
, &img_cr
[2*edge
*uvlinesize
], uvlinesize
, bS
,
6666 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6673 static int decode_slice(struct AVCodecContext
*avctx
, void *arg
){
6674 H264Context
*h
= *(void**)arg
;
6675 MpegEncContext
* const s
= &h
->s
;
6676 const int part_mask
= s
->partitioned_frame
? (AC_END
|AC_ERROR
) : 0x7F;
6680 if( h
->pps
.cabac
) {
6684 align_get_bits( &s
->gb
);
6687 ff_init_cabac_states( &h
->cabac
);
6688 ff_init_cabac_decoder( &h
->cabac
,
6689 s
->gb
.buffer
+ get_bits_count(&s
->gb
)/8,
6690 ( s
->gb
.size_in_bits
- get_bits_count(&s
->gb
) + 7)/8);
6691 /* calculate pre-state */
6692 for( i
= 0; i
< 460; i
++ ) {
6694 if( h
->slice_type_nos
== FF_I_TYPE
)
6695 pre
= av_clip( ((cabac_context_init_I
[i
][0] * s
->qscale
) >>4 ) + cabac_context_init_I
[i
][1], 1, 126 );
6697 pre
= av_clip( ((cabac_context_init_PB
[h
->cabac_init_idc
][i
][0] * s
->qscale
) >>4 ) + cabac_context_init_PB
[h
->cabac_init_idc
][i
][1], 1, 126 );
6700 h
->cabac_state
[i
] = 2 * ( 63 - pre
) + 0;
6702 h
->cabac_state
[i
] = 2 * ( pre
- 64 ) + 1;
6707 int ret
= decode_mb_cabac(h
);
6709 //STOP_TIMER("decode_mb_cabac")
6711 if(ret
>=0) hl_decode_mb(h
);
6713 if( ret
>= 0 && FRAME_MBAFF
) { //FIXME optimal? or let mb_decode decode 16x32 ?
6716 if(ret
>=0) ret
= decode_mb_cabac(h
);
6718 if(ret
>=0) hl_decode_mb(h
);
6721 eos
= get_cabac_terminate( &h
->cabac
);
6723 if( ret
< 0 || h
->cabac
.bytestream
> h
->cabac
.bytestream_end
+ 2) {
6724 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d, bytestream (%td)\n", s
->mb_x
, s
->mb_y
, h
->cabac
.bytestream_end
- h
->cabac
.bytestream
);
6725 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6729 if( ++s
->mb_x
>= s
->mb_width
) {
6731 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6733 if(FIELD_OR_MBAFF_PICTURE
) {
6738 if( eos
|| s
->mb_y
>= s
->mb_height
) {
6739 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6740 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6747 int ret
= decode_mb_cavlc(h
);
6749 if(ret
>=0) hl_decode_mb(h
);
6751 if(ret
>=0 && FRAME_MBAFF
){ //FIXME optimal? or let mb_decode decode 16x32 ?
6753 ret
= decode_mb_cavlc(h
);
6755 if(ret
>=0) hl_decode_mb(h
);
6760 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6761 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6766 if(++s
->mb_x
>= s
->mb_width
){
6768 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6770 if(FIELD_OR_MBAFF_PICTURE
) {
6773 if(s
->mb_y
>= s
->mb_height
){
6774 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6776 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
) {
6777 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6781 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6788 if(get_bits_count(&s
->gb
) >= s
->gb
.size_in_bits
&& s
->mb_skip_run
<=0){
6789 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6790 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
){
6791 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6795 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6804 for(;s
->mb_y
< s
->mb_height
; s
->mb_y
++){
6805 for(;s
->mb_x
< s
->mb_width
; s
->mb_x
++){
6806 int ret
= decode_mb(h
);
6811 av_log(s
->avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6812 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6817 if(++s
->mb_x
>= s
->mb_width
){
6819 if(++s
->mb_y
>= s
->mb_height
){
6820 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6821 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6825 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6832 if(get_bits_count(s
->?gb
) >= s
->gb
?.size_in_bits
){
6833 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6834 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6838 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6845 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6848 return -1; //not reached
6851 static int decode_picture_timing(H264Context
*h
){
6852 MpegEncContext
* const s
= &h
->s
;
6853 if(h
->sps
.nal_hrd_parameters_present_flag
|| h
->sps
.vcl_hrd_parameters_present_flag
){
6854 skip_bits(&s
->gb
, h
->sps
.cpb_removal_delay_length
); /* cpb_removal_delay */
6855 skip_bits(&s
->gb
, h
->sps
.dpb_output_delay_length
); /* dpb_output_delay */
6857 if(h
->sps
.pic_struct_present_flag
){
6858 unsigned int i
, num_clock_ts
;
6859 h
->sei_pic_struct
= get_bits(&s
->gb
, 4);
6861 if (h
->sei_pic_struct
> SEI_PIC_STRUCT_FRAME_TRIPLING
)
6864 num_clock_ts
= sei_num_clock_ts_table
[h
->sei_pic_struct
];
6866 for (i
= 0 ; i
< num_clock_ts
; i
++){
6867 if(get_bits(&s
->gb
, 1)){ /* clock_timestamp_flag */
6868 unsigned int full_timestamp_flag
;
6869 skip_bits(&s
->gb
, 2); /* ct_type */
6870 skip_bits(&s
->gb
, 1); /* nuit_field_based_flag */
6871 skip_bits(&s
->gb
, 5); /* counting_type */
6872 full_timestamp_flag
= get_bits(&s
->gb
, 1);
6873 skip_bits(&s
->gb
, 1); /* discontinuity_flag */
6874 skip_bits(&s
->gb
, 1); /* cnt_dropped_flag */
6875 skip_bits(&s
->gb
, 8); /* n_frames */
6876 if(full_timestamp_flag
){
6877 skip_bits(&s
->gb
, 6); /* seconds_value 0..59 */
6878 skip_bits(&s
->gb
, 6); /* minutes_value 0..59 */
6879 skip_bits(&s
->gb
, 5); /* hours_value 0..23 */
6881 if(get_bits(&s
->gb
, 1)){ /* seconds_flag */
6882 skip_bits(&s
->gb
, 6); /* seconds_value range 0..59 */
6883 if(get_bits(&s
->gb
, 1)){ /* minutes_flag */
6884 skip_bits(&s
->gb
, 6); /* minutes_value 0..59 */
6885 if(get_bits(&s
->gb
, 1)) /* hours_flag */
6886 skip_bits(&s
->gb
, 5); /* hours_value 0..23 */
6890 if(h
->sps
.time_offset_length
> 0)
6891 skip_bits(&s
->gb
, h
->sps
.time_offset_length
); /* time_offset */
6898 static int decode_unregistered_user_data(H264Context
*h
, int size
){
6899 MpegEncContext
* const s
= &h
->s
;
6900 uint8_t user_data
[16+256];
6906 for(i
=0; i
<sizeof(user_data
)-1 && i
<size
; i
++){
6907 user_data
[i
]= get_bits(&s
->gb
, 8);
6911 e
= sscanf(user_data
+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build
);
6912 if(e
==1 && build
>=0)
6913 h
->x264_build
= build
;
6915 if(s
->avctx
->debug
& FF_DEBUG_BUGS
)
6916 av_log(s
->avctx
, AV_LOG_DEBUG
, "user data:\"%s\"\n", user_data
+16);
6919 skip_bits(&s
->gb
, 8);
6924 static int decode_sei(H264Context
*h
){
6925 MpegEncContext
* const s
= &h
->s
;
6927 while(get_bits_count(&s
->gb
) + 16 < s
->gb
.size_in_bits
){
6932 type
+= show_bits(&s
->gb
, 8);
6933 }while(get_bits(&s
->gb
, 8) == 255);
6937 size
+= show_bits(&s
->gb
, 8);
6938 }while(get_bits(&s
->gb
, 8) == 255);
6941 case 1: // Picture timing SEI
6942 if(decode_picture_timing(h
) < 0)
6946 if(decode_unregistered_user_data(h
, size
) < 0)
6950 skip_bits(&s
->gb
, 8*size
);
6953 //FIXME check bits here
6954 align_get_bits(&s
->gb
);
6960 static inline void decode_hrd_parameters(H264Context
*h
, SPS
*sps
){
6961 MpegEncContext
* const s
= &h
->s
;
6963 cpb_count
= get_ue_golomb(&s
->gb
) + 1;
6964 get_bits(&s
->gb
, 4); /* bit_rate_scale */
6965 get_bits(&s
->gb
, 4); /* cpb_size_scale */
6966 for(i
=0; i
<cpb_count
; i
++){
6967 get_ue_golomb(&s
->gb
); /* bit_rate_value_minus1 */
6968 get_ue_golomb(&s
->gb
); /* cpb_size_value_minus1 */
6969 get_bits1(&s
->gb
); /* cbr_flag */
6971 get_bits(&s
->gb
, 5); /* initial_cpb_removal_delay_length_minus1 */
6972 sps
->cpb_removal_delay_length
= get_bits(&s
->gb
, 5) + 1;
6973 sps
->dpb_output_delay_length
= get_bits(&s
->gb
, 5) + 1;
6974 sps
->time_offset_length
= get_bits(&s
->gb
, 5);
6977 static inline int decode_vui_parameters(H264Context
*h
, SPS
*sps
){
6978 MpegEncContext
* const s
= &h
->s
;
6979 int aspect_ratio_info_present_flag
;
6980 unsigned int aspect_ratio_idc
;
6982 aspect_ratio_info_present_flag
= get_bits1(&s
->gb
);
6984 if( aspect_ratio_info_present_flag
) {
6985 aspect_ratio_idc
= get_bits(&s
->gb
, 8);
6986 if( aspect_ratio_idc
== EXTENDED_SAR
) {
6987 sps
->sar
.num
= get_bits(&s
->gb
, 16);
6988 sps
->sar
.den
= get_bits(&s
->gb
, 16);
6989 }else if(aspect_ratio_idc
< FF_ARRAY_ELEMS(pixel_aspect
)){
6990 sps
->sar
= pixel_aspect
[aspect_ratio_idc
];
6992 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal aspect ratio\n");
6999 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7001 if(get_bits1(&s
->gb
)){ /* overscan_info_present_flag */
7002 get_bits1(&s
->gb
); /* overscan_appropriate_flag */
7005 if(get_bits1(&s
->gb
)){ /* video_signal_type_present_flag */
7006 get_bits(&s
->gb
, 3); /* video_format */
7007 get_bits1(&s
->gb
); /* video_full_range_flag */
7008 if(get_bits1(&s
->gb
)){ /* colour_description_present_flag */
7009 get_bits(&s
->gb
, 8); /* colour_primaries */
7010 get_bits(&s
->gb
, 8); /* transfer_characteristics */
7011 get_bits(&s
->gb
, 8); /* matrix_coefficients */
7015 if(get_bits1(&s
->gb
)){ /* chroma_location_info_present_flag */
7016 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_top_field */
7017 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_bottom_field */
7020 sps
->timing_info_present_flag
= get_bits1(&s
->gb
);
7021 if(sps
->timing_info_present_flag
){
7022 sps
->num_units_in_tick
= get_bits_long(&s
->gb
, 32);
7023 sps
->time_scale
= get_bits_long(&s
->gb
, 32);
7024 sps
->fixed_frame_rate_flag
= get_bits1(&s
->gb
);
7027 sps
->nal_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
7028 if(sps
->nal_hrd_parameters_present_flag
)
7029 decode_hrd_parameters(h
, sps
);
7030 sps
->vcl_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
7031 if(sps
->vcl_hrd_parameters_present_flag
)
7032 decode_hrd_parameters(h
, sps
);
7033 if(sps
->nal_hrd_parameters_present_flag
|| sps
->vcl_hrd_parameters_present_flag
)
7034 get_bits1(&s
->gb
); /* low_delay_hrd_flag */
7035 sps
->pic_struct_present_flag
= get_bits1(&s
->gb
);
7037 sps
->bitstream_restriction_flag
= get_bits1(&s
->gb
);
7038 if(sps
->bitstream_restriction_flag
){
7039 unsigned int num_reorder_frames
;
7040 get_bits1(&s
->gb
); /* motion_vectors_over_pic_boundaries_flag */
7041 get_ue_golomb(&s
->gb
); /* max_bytes_per_pic_denom */
7042 get_ue_golomb(&s
->gb
); /* max_bits_per_mb_denom */
7043 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_horizontal */
7044 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_vertical */
7045 num_reorder_frames
= get_ue_golomb(&s
->gb
);
7046 get_ue_golomb(&s
->gb
); /*max_dec_frame_buffering*/
7048 if(num_reorder_frames
> 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7049 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal num_reorder_frames %d\n", num_reorder_frames
);
7053 sps
->num_reorder_frames
= num_reorder_frames
;
7059 static void decode_scaling_list(H264Context
*h
, uint8_t *factors
, int size
,
7060 const uint8_t *jvt_list
, const uint8_t *fallback_list
){
7061 MpegEncContext
* const s
= &h
->s
;
7062 int i
, last
= 8, next
= 8;
7063 const uint8_t *scan
= size
== 16 ? zigzag_scan
: zigzag_scan8x8
;
7064 if(!get_bits1(&s
->gb
)) /* matrix not written, we use the predicted one */
7065 memcpy(factors
, fallback_list
, size
*sizeof(uint8_t));
7067 for(i
=0;i
<size
;i
++){
7069 next
= (last
+ get_se_golomb(&s
->gb
)) & 0xff;
7070 if(!i
&& !next
){ /* matrix not written, we use the preset one */
7071 memcpy(factors
, jvt_list
, size
*sizeof(uint8_t));
7074 last
= factors
[scan
[i
]] = next
? next
: last
;
7078 static void decode_scaling_matrices(H264Context
*h
, SPS
*sps
, PPS
*pps
, int is_sps
,
7079 uint8_t (*scaling_matrix4
)[16], uint8_t (*scaling_matrix8
)[64]){
7080 MpegEncContext
* const s
= &h
->s
;
7081 int fallback_sps
= !is_sps
&& sps
->scaling_matrix_present
;
7082 const uint8_t *fallback
[4] = {
7083 fallback_sps
? sps
->scaling_matrix4
[0] : default_scaling4
[0],
7084 fallback_sps
? sps
->scaling_matrix4
[3] : default_scaling4
[1],
7085 fallback_sps
? sps
->scaling_matrix8
[0] : default_scaling8
[0],
7086 fallback_sps
? sps
->scaling_matrix8
[1] : default_scaling8
[1]
7088 if(get_bits1(&s
->gb
)){
7089 sps
->scaling_matrix_present
|= is_sps
;
7090 decode_scaling_list(h
,scaling_matrix4
[0],16,default_scaling4
[0],fallback
[0]); // Intra, Y
7091 decode_scaling_list(h
,scaling_matrix4
[1],16,default_scaling4
[0],scaling_matrix4
[0]); // Intra, Cr
7092 decode_scaling_list(h
,scaling_matrix4
[2],16,default_scaling4
[0],scaling_matrix4
[1]); // Intra, Cb
7093 decode_scaling_list(h
,scaling_matrix4
[3],16,default_scaling4
[1],fallback
[1]); // Inter, Y
7094 decode_scaling_list(h
,scaling_matrix4
[4],16,default_scaling4
[1],scaling_matrix4
[3]); // Inter, Cr
7095 decode_scaling_list(h
,scaling_matrix4
[5],16,default_scaling4
[1],scaling_matrix4
[4]); // Inter, Cb
7096 if(is_sps
|| pps
->transform_8x8_mode
){
7097 decode_scaling_list(h
,scaling_matrix8
[0],64,default_scaling8
[0],fallback
[2]); // Intra, Y
7098 decode_scaling_list(h
,scaling_matrix8
[1],64,default_scaling8
[1],fallback
[3]); // Inter, Y
7104 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7107 alloc_parameter_set(H264Context
*h
, void **vec
, const unsigned int id
, const unsigned int max
,
7108 const size_t size
, const char *name
)
7111 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "%s_id (%d) out of range\n", name
, id
);
7116 vec
[id
] = av_mallocz(size
);
7118 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cannot allocate memory for %s\n", name
);
7123 static inline int decode_seq_parameter_set(H264Context
*h
){
7124 MpegEncContext
* const s
= &h
->s
;
7125 int profile_idc
, level_idc
;
7126 unsigned int sps_id
, tmp
, mb_width
, mb_height
;
7130 profile_idc
= get_bits(&s
->gb
, 8);
7131 get_bits1(&s
->gb
); //constraint_set0_flag
7132 get_bits1(&s
->gb
); //constraint_set1_flag
7133 get_bits1(&s
->gb
); //constraint_set2_flag
7134 get_bits1(&s
->gb
); //constraint_set3_flag
7135 get_bits(&s
->gb
, 4); // reserved
7136 level_idc
= get_bits(&s
->gb
, 8);
7137 sps_id
= get_ue_golomb(&s
->gb
);
7139 sps
= alloc_parameter_set(h
, (void **)h
->sps_buffers
, sps_id
, MAX_SPS_COUNT
, sizeof(SPS
), "sps");
7143 sps
->profile_idc
= profile_idc
;
7144 sps
->level_idc
= level_idc
;
7146 memset(sps
->scaling_matrix4
, 16, sizeof(sps
->scaling_matrix4
));
7147 memset(sps
->scaling_matrix8
, 16, sizeof(sps
->scaling_matrix8
));
7148 sps
->scaling_matrix_present
= 0;
7150 if(sps
->profile_idc
>= 100){ //high profile
7151 sps
->chroma_format_idc
= get_ue_golomb(&s
->gb
);
7152 if(sps
->chroma_format_idc
== 3)
7153 get_bits1(&s
->gb
); //residual_color_transform_flag
7154 get_ue_golomb(&s
->gb
); //bit_depth_luma_minus8
7155 get_ue_golomb(&s
->gb
); //bit_depth_chroma_minus8
7156 sps
->transform_bypass
= get_bits1(&s
->gb
);
7157 decode_scaling_matrices(h
, sps
, NULL
, 1, sps
->scaling_matrix4
, sps
->scaling_matrix8
);
7159 sps
->chroma_format_idc
= 1;
7162 sps
->log2_max_frame_num
= get_ue_golomb(&s
->gb
) + 4;
7163 sps
->poc_type
= get_ue_golomb(&s
->gb
);
7165 if(sps
->poc_type
== 0){ //FIXME #define
7166 sps
->log2_max_poc_lsb
= get_ue_golomb(&s
->gb
) + 4;
7167 } else if(sps
->poc_type
== 1){//FIXME #define
7168 sps
->delta_pic_order_always_zero_flag
= get_bits1(&s
->gb
);
7169 sps
->offset_for_non_ref_pic
= get_se_golomb(&s
->gb
);
7170 sps
->offset_for_top_to_bottom_field
= get_se_golomb(&s
->gb
);
7171 tmp
= get_ue_golomb(&s
->gb
);
7173 if(tmp
>= FF_ARRAY_ELEMS(sps
->offset_for_ref_frame
)){
7174 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "poc_cycle_length overflow %u\n", tmp
);
7177 sps
->poc_cycle_length
= tmp
;
7179 for(i
=0; i
<sps
->poc_cycle_length
; i
++)
7180 sps
->offset_for_ref_frame
[i
]= get_se_golomb(&s
->gb
);
7181 }else if(sps
->poc_type
!= 2){
7182 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal POC type %d\n", sps
->poc_type
);
7186 tmp
= get_ue_golomb(&s
->gb
);
7187 if(tmp
> MAX_PICTURE_COUNT
-2 || tmp
>= 32){
7188 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "too many reference frames\n");
7191 sps
->ref_frame_count
= tmp
;
7192 sps
->gaps_in_frame_num_allowed_flag
= get_bits1(&s
->gb
);
7193 mb_width
= get_ue_golomb(&s
->gb
) + 1;
7194 mb_height
= get_ue_golomb(&s
->gb
) + 1;
7195 if(mb_width
>= INT_MAX
/16 || mb_height
>= INT_MAX
/16 ||
7196 avcodec_check_dimensions(NULL
, 16*mb_width
, 16*mb_height
)){
7197 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_width/height overflow\n");
7200 sps
->mb_width
= mb_width
;
7201 sps
->mb_height
= mb_height
;
7203 sps
->frame_mbs_only_flag
= get_bits1(&s
->gb
);
7204 if(!sps
->frame_mbs_only_flag
)
7205 sps
->mb_aff
= get_bits1(&s
->gb
);
7209 sps
->direct_8x8_inference_flag
= get_bits1(&s
->gb
);
7211 #ifndef ALLOW_INTERLACE
7213 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MBAFF support not included; enable it at compile-time.\n");
7215 sps
->crop
= get_bits1(&s
->gb
);
7217 sps
->crop_left
= get_ue_golomb(&s
->gb
);
7218 sps
->crop_right
= get_ue_golomb(&s
->gb
);
7219 sps
->crop_top
= get_ue_golomb(&s
->gb
);
7220 sps
->crop_bottom
= get_ue_golomb(&s
->gb
);
7221 if(sps
->crop_left
|| sps
->crop_top
){
7222 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "insane cropping not completely supported, this could look slightly wrong ...\n");
7224 if(sps
->crop_right
>= 8 || sps
->crop_bottom
>= (8>> !sps
->frame_mbs_only_flag
)){
7225 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "brainfart cropping not supported, this could look slightly wrong ...\n");
7231 sps
->crop_bottom
= 0;
7234 sps
->vui_parameters_present_flag
= get_bits1(&s
->gb
);
7235 if( sps
->vui_parameters_present_flag
)
7236 decode_vui_parameters(h
, sps
);
7238 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7239 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7240 sps_id
, sps
->profile_idc
, sps
->level_idc
,
7242 sps
->ref_frame_count
,
7243 sps
->mb_width
, sps
->mb_height
,
7244 sps
->frame_mbs_only_flag
? "FRM" : (sps
->mb_aff
? "MB-AFF" : "PIC-AFF"),
7245 sps
->direct_8x8_inference_flag
? "8B8" : "",
7246 sps
->crop_left
, sps
->crop_right
,
7247 sps
->crop_top
, sps
->crop_bottom
,
7248 sps
->vui_parameters_present_flag
? "VUI" : "",
7249 ((const char*[]){"Gray","420","422","444"})[sps
->chroma_format_idc
]
7253 if (decode_postinit(h
, sps
) < 0)
7260 build_qp_table(PPS
*pps
, int t
, int index
)
7263 for(i
= 0; i
< 52; i
++)
7264 pps
->chroma_qp_table
[t
][i
] = chroma_qp
[av_clip(i
+ index
, 0, 51)];
7267 static inline int decode_picture_parameter_set(H264Context
*h
, int bit_length
){
7268 MpegEncContext
* const s
= &h
->s
;
7269 unsigned int tmp
, pps_id
= get_ue_golomb(&s
->gb
);
7272 pps
= alloc_parameter_set(h
, (void **)h
->pps_buffers
, pps_id
, MAX_PPS_COUNT
, sizeof(PPS
), "pps");
7276 tmp
= get_ue_golomb(&s
->gb
);
7277 if(tmp
>=MAX_SPS_COUNT
|| h
->sps_buffers
[tmp
] == NULL
){
7278 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "sps_id out of range\n");
7283 pps
->cabac
= get_bits1(&s
->gb
);
7284 pps
->pic_order_present
= get_bits1(&s
->gb
);
7285 pps
->slice_group_count
= get_ue_golomb(&s
->gb
) + 1;
7286 if(pps
->slice_group_count
> 1 ){
7287 pps
->mb_slice_group_map_type
= get_ue_golomb(&s
->gb
);
7288 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "FMO not supported\n");
7289 switch(pps
->mb_slice_group_map_type
){
7292 | for( i
= 0; i
<= num_slice_groups_minus1
; i
++ ) | | |
7293 | run_length
[ i
] |1 |ue(v
) |
7298 | for( i
= 0; i
< num_slice_groups_minus1
; i
++ ) | | |
7300 | top_left_mb
[ i
] |1 |ue(v
) |
7301 | bottom_right_mb
[ i
] |1 |ue(v
) |
7309 | slice_group_change_direction_flag
|1 |u(1) |
7310 | slice_group_change_rate_minus1
|1 |ue(v
) |
7315 | slice_group_id_cnt_minus1
|1 |ue(v
) |
7316 | for( i
= 0; i
<= slice_group_id_cnt_minus1
; i
++ | | |
7318 | slice_group_id
[ i
] |1 |u(v
) |
7323 pps
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
7324 pps
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
7325 if(pps
->ref_count
[0]-1 > 32-1 || pps
->ref_count
[1]-1 > 32-1){
7326 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow (pps)\n");
7327 pps
->ref_count
[0]= pps
->ref_count
[1]= 1;
7331 pps
->weighted_pred
= get_bits1(&s
->gb
);
7332 pps
->weighted_bipred_idc
= get_bits(&s
->gb
, 2);
7333 pps
->init_qp
= get_se_golomb(&s
->gb
) + 26;
7334 pps
->init_qs
= get_se_golomb(&s
->gb
) + 26;
7335 pps
->chroma_qp_index_offset
[0]= get_se_golomb(&s
->gb
);
7336 pps
->deblocking_filter_parameters_present
= get_bits1(&s
->gb
);
7337 pps
->constrained_intra_pred
= get_bits1(&s
->gb
);
7338 pps
->redundant_pic_cnt_present
= get_bits1(&s
->gb
);
7340 pps
->transform_8x8_mode
= 0;
7341 h
->dequant_coeff_pps
= -1; //contents of sps/pps can change even if id doesn't, so reinit
7342 memcpy(pps
->scaling_matrix4
, h
->sps_buffers
[pps
->sps_id
]->scaling_matrix4
, sizeof(pps
->scaling_matrix4
));
7343 memcpy(pps
->scaling_matrix8
, h
->sps_buffers
[pps
->sps_id
]->scaling_matrix8
, sizeof(pps
->scaling_matrix8
));
7345 if(get_bits_count(&s
->gb
) < bit_length
){
7346 pps
->transform_8x8_mode
= get_bits1(&s
->gb
);
7347 decode_scaling_matrices(h
, h
->sps_buffers
[pps
->sps_id
], pps
, 0, pps
->scaling_matrix4
, pps
->scaling_matrix8
);
7348 pps
->chroma_qp_index_offset
[1]= get_se_golomb(&s
->gb
); //second_chroma_qp_index_offset
7350 pps
->chroma_qp_index_offset
[1]= pps
->chroma_qp_index_offset
[0];
7353 build_qp_table(pps
, 0, pps
->chroma_qp_index_offset
[0]);
7354 build_qp_table(pps
, 1, pps
->chroma_qp_index_offset
[1]);
7355 if(pps
->chroma_qp_index_offset
[0] != pps
->chroma_qp_index_offset
[1])
7356 h
->pps
.chroma_qp_diff
= 1;
7358 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7359 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7360 pps_id
, pps
->sps_id
,
7361 pps
->cabac
? "CABAC" : "CAVLC",
7362 pps
->slice_group_count
,
7363 pps
->ref_count
[0], pps
->ref_count
[1],
7364 pps
->weighted_pred
? "weighted" : "",
7365 pps
->init_qp
, pps
->init_qs
, pps
->chroma_qp_index_offset
[0], pps
->chroma_qp_index_offset
[1],
7366 pps
->deblocking_filter_parameters_present
? "LPAR" : "",
7367 pps
->constrained_intra_pred
? "CONSTR" : "",
7368 pps
->redundant_pic_cnt_present
? "REDU" : "",
7369 pps
->transform_8x8_mode
? "8x8DCT" : ""
7377 * Call decode_slice() for each context.
7379 * @param h h264 master context
7380 * @param context_count number of contexts to execute
7382 static void execute_decode_slices(H264Context
*h
, int context_count
){
7383 MpegEncContext
* const s
= &h
->s
;
7384 AVCodecContext
* const avctx
= s
->avctx
;
7388 if(avctx
->vdpau_acceleration
) {
7391 if(context_count
== 1) {
7392 decode_slice(avctx
, &h
);
7394 for(i
= 1; i
< context_count
; i
++) {
7395 hx
= h
->thread_context
[i
];
7396 hx
->s
.error_recognition
= avctx
->error_recognition
;
7397 hx
->s
.error_count
= 0;
7400 avctx
->execute(avctx
, (void *)decode_slice
,
7401 (void **)h
->thread_context
, NULL
, context_count
, sizeof(void*));
7403 /* pull back stuff from slices to master context */
7404 hx
= h
->thread_context
[context_count
- 1];
7405 s
->mb_x
= hx
->s
.mb_x
;
7406 s
->mb_y
= hx
->s
.mb_y
;
7407 s
->dropable
= hx
->s
.dropable
;
7408 s
->picture_structure
= hx
->s
.picture_structure
;
7409 for(i
= 1; i
< context_count
; i
++)
7410 h
->s
.error_count
+= h
->thread_context
[i
]->s
.error_count
;
7415 static int decode_nal_units(H264Context
*h
, const uint8_t *buf
, int buf_size
){
7416 MpegEncContext
* const s
= &h
->s
;
7417 AVCodecContext
* const avctx
= s
->avctx
;
7419 H264Context
*hx
; ///< thread context
7420 int context_count
= 0;
7422 h
->max_contexts
= avctx
->thread_count
;
7425 for(i
=0; i
<50; i
++){
7426 av_log(NULL
, AV_LOG_ERROR
,"%02X ", buf
[i
]);
7429 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
)){
7430 h
->current_slice
= 0;
7431 if (!s
->first_field
)
7432 s
->current_picture_ptr
= NULL
;
7444 if(buf_index
>= buf_size
) break;
7446 for(i
= 0; i
< h
->nal_length_size
; i
++)
7447 nalsize
= (nalsize
<< 8) | buf
[buf_index
++];
7448 if(nalsize
<= 1 || (nalsize
+buf_index
> buf_size
)){
7453 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: nal size %d\n", nalsize
);
7458 // start code prefix search
7459 for(; buf_index
+ 3 < buf_size
; buf_index
++){
7460 // This should always succeed in the first iteration.
7461 if(buf
[buf_index
] == 0 && buf
[buf_index
+1] == 0 && buf
[buf_index
+2] == 1)
7465 if(buf_index
+3 >= buf_size
) break;
7470 hx
= h
->thread_context
[context_count
];
7472 ptr
= decode_nal(hx
, buf
+ buf_index
, &dst_length
, &consumed
, h
->is_avc
? nalsize
: buf_size
- buf_index
);
7473 if (ptr
==NULL
|| dst_length
< 0){
7476 while(ptr
[dst_length
- 1] == 0 && dst_length
> 0)
7478 bit_length
= !dst_length
? 0 : (8*dst_length
- decode_rbsp_trailing(h
, ptr
+ dst_length
- 1));
7480 if(s
->avctx
->debug
&FF_DEBUG_STARTCODE
){
7481 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "NAL %d at %d/%d length %d\n", hx
->nal_unit_type
, buf_index
, buf_size
, dst_length
);
7484 if (h
->is_avc
&& (nalsize
!= consumed
)){
7485 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: Consumed only %d bytes instead of %d\n", consumed
, nalsize
);
7489 buf_index
+= consumed
;
7491 if( (s
->hurry_up
== 1 && h
->nal_ref_idc
== 0) //FIXME do not discard SEI id
7492 ||(avctx
->skip_frame
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
7497 switch(hx
->nal_unit_type
){
7499 if (h
->nal_unit_type
!= NAL_IDR_SLICE
) {
7500 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "Invalid mix of idr and non-idr slices");
7503 idr(h
); //FIXME ensure we don't loose some frames if there is reordering
7505 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
7507 hx
->inter_gb_ptr
= &hx
->s
.gb
;
7508 hx
->s
.data_partitioning
= 0;
7510 if((err
= decode_slice_header(hx
, h
)))
7513 s
->current_picture_ptr
->key_frame
|= (hx
->nal_unit_type
== NAL_IDR_SLICE
);
7514 if(hx
->redundant_pic_count
==0 && hx
->s
.hurry_up
< 5
7515 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| hx
->nal_ref_idc
)
7516 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| hx
->slice_type_nos
!=FF_B_TYPE
)
7517 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| hx
->slice_type_nos
==FF_I_TYPE
)
7518 && avctx
->skip_frame
< AVDISCARD_ALL
)
7522 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
7524 hx
->inter_gb_ptr
= NULL
;
7525 hx
->s
.data_partitioning
= 1;
7527 err
= decode_slice_header(hx
, h
);
7530 init_get_bits(&hx
->intra_gb
, ptr
, bit_length
);
7531 hx
->intra_gb_ptr
= &hx
->intra_gb
;
7534 init_get_bits(&hx
->inter_gb
, ptr
, bit_length
);
7535 hx
->inter_gb_ptr
= &hx
->inter_gb
;
7537 if(hx
->redundant_pic_count
==0 && hx
->intra_gb_ptr
&& hx
->s
.data_partitioning
7538 && s
->context_initialized
7540 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| hx
->nal_ref_idc
)
7541 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| hx
->slice_type_nos
!=FF_B_TYPE
)
7542 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| hx
->slice_type_nos
==FF_I_TYPE
)
7543 && avctx
->skip_frame
< AVDISCARD_ALL
)
7547 init_get_bits(&s
->gb
, ptr
, bit_length
);
7551 init_get_bits(&s
->gb
, ptr
, bit_length
);
7552 decode_seq_parameter_set(h
);
7554 if(s
->flags
& CODEC_FLAG_LOW_DELAY
)
7557 if(avctx
->has_b_frames
< 2)
7558 avctx
->has_b_frames
= !s
->low_delay
;
7561 init_get_bits(&s
->gb
, ptr
, bit_length
);
7563 decode_picture_parameter_set(h
, bit_length
);
7567 case NAL_END_SEQUENCE
:
7568 case NAL_END_STREAM
:
7569 case NAL_FILLER_DATA
:
7571 case NAL_AUXILIARY_SLICE
:
7574 av_log(avctx
, AV_LOG_DEBUG
, "Unknown NAL code: %d (%d bits)\n", h
->nal_unit_type
, bit_length
);
7577 if(context_count
== h
->max_contexts
) {
7578 execute_decode_slices(h
, context_count
);
7583 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "decode_slice_header error\n");
7585 /* Slice could not be decoded in parallel mode, copy down
7586 * NAL unit stuff to context 0 and restart. Note that
7587 * rbsp_buffer is not transferred, but since we no longer
7588 * run in parallel mode this should not be an issue. */
7589 h
->nal_unit_type
= hx
->nal_unit_type
;
7590 h
->nal_ref_idc
= hx
->nal_ref_idc
;
7596 execute_decode_slices(h
, context_count
);
7601 * returns the number of bytes consumed for building the current frame
7603 static int get_consumed_bytes(MpegEncContext
*s
, int pos
, int buf_size
){
7604 if(pos
==0) pos
=1; //avoid infinite loops (i doubt that is needed but ...)
7605 if(pos
+10>buf_size
) pos
=buf_size
; // oops ;)
7610 static int decode_frame(AVCodecContext
*avctx
,
7611 void *data
, int *data_size
,
7612 const uint8_t *buf
, int buf_size
)
7614 H264Context
*h
= avctx
->priv_data
;
7615 MpegEncContext
*s
= &h
->s
;
7616 AVFrame
*pict
= data
;
7619 s
->flags
= avctx
->flags
;
7620 s
->flags2
= avctx
->flags2
;
7622 /* end of stream, output what is still in the buffers */
7623 if (buf_size
== 0) {
7627 //FIXME factorize this with the output code below
7628 out
= h
->delayed_pic
[0];
7630 for(i
=1; h
->delayed_pic
[i
] && (h
->delayed_pic
[i
]->poc
&& !h
->delayed_pic
[i
]->key_frame
); i
++)
7631 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
7632 out
= h
->delayed_pic
[i
];
7636 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
7637 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
7640 *data_size
= sizeof(AVFrame
);
7641 *pict
= *(AVFrame
*)out
;
7647 if(h
->is_avc
&& !h
->got_avcC
) {
7648 int i
, cnt
, nalsize
;
7649 unsigned char *p
= avctx
->extradata
;
7650 if(avctx
->extradata_size
< 7) {
7651 av_log(avctx
, AV_LOG_ERROR
, "avcC too short\n");
7655 av_log(avctx
, AV_LOG_ERROR
, "Unknown avcC version %d\n", *p
);
7658 /* sps and pps in the avcC always have length coded with 2 bytes,
7659 so put a fake nal_length_size = 2 while parsing them */
7660 h
->nal_length_size
= 2;
7661 // Decode sps from avcC
7662 cnt
= *(p
+5) & 0x1f; // Number of sps
7664 for (i
= 0; i
< cnt
; i
++) {
7665 nalsize
= AV_RB16(p
) + 2;
7666 if(decode_nal_units(h
, p
, nalsize
) < 0) {
7667 av_log(avctx
, AV_LOG_ERROR
, "Decoding sps %d from avcC failed\n", i
);
7672 // Decode pps from avcC
7673 cnt
= *(p
++); // Number of pps
7674 for (i
= 0; i
< cnt
; i
++) {
7675 nalsize
= AV_RB16(p
) + 2;
7676 if(decode_nal_units(h
, p
, nalsize
) != nalsize
) {
7677 av_log(avctx
, AV_LOG_ERROR
, "Decoding pps %d from avcC failed\n", i
);
7682 // Now store right nal length size, that will be use to parse all other nals
7683 h
->nal_length_size
= ((*(((char*)(avctx
->extradata
))+4))&0x03)+1;
7684 // Do not reparse avcC
7688 if(!h
->got_avcC
&& !h
->is_avc
&& s
->avctx
->extradata_size
){
7689 if(decode_nal_units(h
, s
->avctx
->extradata
, s
->avctx
->extradata_size
) < 0)
7694 buf_index
=decode_nal_units(h
, buf
, buf_size
);
7698 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
) && !s
->current_picture_ptr
){
7699 if (avctx
->skip_frame
>= AVDISCARD_NONREF
|| s
->hurry_up
) return 0;
7700 av_log(avctx
, AV_LOG_ERROR
, "no frame!\n");
7704 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
) || (s
->mb_y
>= s
->mb_height
&& s
->mb_height
)){
7705 Picture
*out
= s
->current_picture_ptr
;
7706 Picture
*cur
= s
->current_picture_ptr
;
7707 int i
, pics
, cross_idr
, out_of_order
, out_idx
;
7711 s
->current_picture_ptr
->qscale_type
= FF_QSCALE_TYPE_H264
;
7712 s
->current_picture_ptr
->pict_type
= s
->pict_type
;
7715 if (avctx
->vdpau_acceleration
) {
7716 ff_VDPAU_h264_set_reference_frames(h
);
7721 execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
7722 h
->prev_poc_msb
= h
->poc_msb
;
7723 h
->prev_poc_lsb
= h
->poc_lsb
;
7725 h
->prev_frame_num_offset
= h
->frame_num_offset
;
7726 h
->prev_frame_num
= h
->frame_num
;
7729 if (avctx
->vdpau_acceleration
) {
7730 ff_VDPAU_h264_picture_complete(h
, buf
, buf_size
);
7735 * FIXME: Error handling code does not seem to support interlaced
7736 * when slices span multiple rows
7737 * The ff_er_add_slice calls don't work right for bottom
7738 * fields; they cause massive erroneous error concealing
7739 * Error marking covers both fields (top and bottom).
7740 * This causes a mismatched s->error_count
7741 * and a bad error table. Further, the error count goes to
7742 * INT_MAX when called for bottom field, because mb_y is
7743 * past end by one (callers fault) and resync_mb_y != 0
7744 * causes problems for the first MB line, too.
7747 if (!avctx
->vdpau_acceleration
)
7754 if (cur
->field_poc
[0]==INT_MAX
|| cur
->field_poc
[1]==INT_MAX
) {
7755 /* Wait for second field. */
7759 cur
->repeat_pict
= 0;
7761 /* Signal interlacing information externally. */
7762 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7763 if(h
->sps
.pic_struct_present_flag
){
7764 switch (h
->sei_pic_struct
)
7766 case SEI_PIC_STRUCT_FRAME
:
7767 cur
->interlaced_frame
= 0;
7769 case SEI_PIC_STRUCT_TOP_FIELD
:
7770 case SEI_PIC_STRUCT_BOTTOM_FIELD
:
7771 case SEI_PIC_STRUCT_TOP_BOTTOM
:
7772 case SEI_PIC_STRUCT_BOTTOM_TOP
:
7773 cur
->interlaced_frame
= 1;
7775 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP
:
7776 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM
:
7777 // Signal the possibility of telecined film externally (pic_struct 5,6)
7778 // From these hints, let the applications decide if they apply deinterlacing.
7779 cur
->repeat_pict
= 1;
7780 cur
->interlaced_frame
= FIELD_OR_MBAFF_PICTURE
;
7782 case SEI_PIC_STRUCT_FRAME_DOUBLING
:
7783 // Force progressive here, as doubling interlaced frame is a bad idea.
7784 cur
->interlaced_frame
= 0;
7785 cur
->repeat_pict
= 2;
7787 case SEI_PIC_STRUCT_FRAME_TRIPLING
:
7788 cur
->interlaced_frame
= 0;
7789 cur
->repeat_pict
= 4;
7793 /* Derive interlacing flag from used decoding process. */
7794 cur
->interlaced_frame
= FIELD_OR_MBAFF_PICTURE
;
7797 if (cur
->field_poc
[0] != cur
->field_poc
[1]){
7798 /* Derive top_field_first from field pocs. */
7799 cur
->top_field_first
= cur
->field_poc
[0] < cur
->field_poc
[1];
7801 if(cur
->interlaced_frame
|| h
->sps
.pic_struct_present_flag
){
7802 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7803 if(h
->sei_pic_struct
== SEI_PIC_STRUCT_TOP_BOTTOM
7804 || h
->sei_pic_struct
== SEI_PIC_STRUCT_TOP_BOTTOM_TOP
)
7805 cur
->top_field_first
= 1;
7807 cur
->top_field_first
= 0;
7809 /* Most likely progressive */
7810 cur
->top_field_first
= 0;
7814 //FIXME do something with unavailable reference frames
7816 /* Sort B-frames into display order */
7818 if(h
->sps
.bitstream_restriction_flag
7819 && s
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
){
7820 s
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
7824 if( s
->avctx
->strict_std_compliance
>= FF_COMPLIANCE_STRICT
7825 && !h
->sps
.bitstream_restriction_flag
){
7826 s
->avctx
->has_b_frames
= MAX_DELAYED_PIC_COUNT
;
7831 while(h
->delayed_pic
[pics
]) pics
++;
7833 assert(pics
<= MAX_DELAYED_PIC_COUNT
);
7835 h
->delayed_pic
[pics
++] = cur
;
7836 if(cur
->reference
== 0)
7837 cur
->reference
= DELAYED_PIC_REF
;
7839 out
= h
->delayed_pic
[0];
7841 for(i
=1; h
->delayed_pic
[i
] && (h
->delayed_pic
[i
]->poc
&& !h
->delayed_pic
[i
]->key_frame
); i
++)
7842 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
7843 out
= h
->delayed_pic
[i
];
7846 cross_idr
= !h
->delayed_pic
[0]->poc
|| !!h
->delayed_pic
[i
] || h
->delayed_pic
[0]->key_frame
;
7848 out_of_order
= !cross_idr
&& out
->poc
< h
->outputed_poc
;
7850 if(h
->sps
.bitstream_restriction_flag
&& s
->avctx
->has_b_frames
>= h
->sps
.num_reorder_frames
)
7852 else if((out_of_order
&& pics
-1 == s
->avctx
->has_b_frames
&& s
->avctx
->has_b_frames
< MAX_DELAYED_PIC_COUNT
)
7854 ((!cross_idr
&& out
->poc
> h
->outputed_poc
+ 2)
7855 || cur
->pict_type
== FF_B_TYPE
)))
7858 s
->avctx
->has_b_frames
++;
7861 if(out_of_order
|| pics
> s
->avctx
->has_b_frames
){
7862 out
->reference
&= ~DELAYED_PIC_REF
;
7863 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
7864 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
7866 if(!out_of_order
&& pics
> s
->avctx
->has_b_frames
){
7867 *data_size
= sizeof(AVFrame
);
7869 h
->outputed_poc
= out
->poc
;
7870 *pict
= *(AVFrame
*)out
;
7872 av_log(avctx
, AV_LOG_DEBUG
, "no picture\n");
7877 assert(pict
->data
[0] || !*data_size
);
7878 ff_print_debug_info(s
, pict
);
7879 //printf("out %d\n", (int)pict->data[0]);
7882 /* Return the Picture timestamp as the frame number */
7883 /* we subtract 1 because it is added on utils.c */
7884 avctx
->frame_number
= s
->picture_number
- 1;
7886 return get_consumed_bytes(s
, buf_index
, buf_size
);
7889 static inline void fill_mb_avail(H264Context
*h
){
7890 MpegEncContext
* const s
= &h
->s
;
7891 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
7894 h
->mb_avail
[0]= s
->mb_x
&& h
->slice_table
[mb_xy
- s
->mb_stride
- 1] == h
->slice_num
;
7895 h
->mb_avail
[1]= h
->slice_table
[mb_xy
- s
->mb_stride
] == h
->slice_num
;
7896 h
->mb_avail
[2]= s
->mb_x
+1 < s
->mb_width
&& h
->slice_table
[mb_xy
- s
->mb_stride
+ 1] == h
->slice_num
;
7902 h
->mb_avail
[3]= s
->mb_x
&& h
->slice_table
[mb_xy
- 1] == h
->slice_num
;
7903 h
->mb_avail
[4]= 1; //FIXME move out
7904 h
->mb_avail
[5]= 0; //FIXME move out
7912 #define SIZE (COUNT*40)
7918 // int int_temp[10000];
7920 AVCodecContext avctx
;
7922 dsputil_init(&dsp
, &avctx
);
7924 init_put_bits(&pb
, temp
, SIZE
);
7925 printf("testing unsigned exp golomb\n");
7926 for(i
=0; i
<COUNT
; i
++){
7928 set_ue_golomb(&pb
, i
);
7929 STOP_TIMER("set_ue_golomb");
7931 flush_put_bits(&pb
);
7933 init_get_bits(&gb
, temp
, 8*SIZE
);
7934 for(i
=0; i
<COUNT
; i
++){
7937 s
= show_bits(&gb
, 24);
7940 j
= get_ue_golomb(&gb
);
7942 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7945 STOP_TIMER("get_ue_golomb");
7949 init_put_bits(&pb
, temp
, SIZE
);
7950 printf("testing signed exp golomb\n");
7951 for(i
=0; i
<COUNT
; i
++){
7953 set_se_golomb(&pb
, i
- COUNT
/2);
7954 STOP_TIMER("set_se_golomb");
7956 flush_put_bits(&pb
);
7958 init_get_bits(&gb
, temp
, 8*SIZE
);
7959 for(i
=0; i
<COUNT
; i
++){
7962 s
= show_bits(&gb
, 24);
7965 j
= get_se_golomb(&gb
);
7966 if(j
!= i
- COUNT
/2){
7967 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7970 STOP_TIMER("get_se_golomb");
7974 printf("testing 4x4 (I)DCT\n");
7977 uint8_t src
[16], ref
[16];
7978 uint64_t error
= 0, max_error
=0;
7980 for(i
=0; i
<COUNT
; i
++){
7982 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7983 for(j
=0; j
<16; j
++){
7984 ref
[j
]= random()%255;
7985 src
[j
]= random()%255;
7988 h264_diff_dct_c(block
, src
, ref
, 4);
7991 for(j
=0; j
<16; j
++){
7992 // printf("%d ", block[j]);
7993 block
[j
]= block
[j
]*4;
7994 if(j
&1) block
[j
]= (block
[j
]*4 + 2)/5;
7995 if(j
&4) block
[j
]= (block
[j
]*4 + 2)/5;
7999 s
->dsp
.h264_idct_add(ref
, block
, 4);
8000 /* for(j=0; j<16; j++){
8001 printf("%d ", ref[j]);
8005 for(j
=0; j
<16; j
++){
8006 int diff
= FFABS(src
[j
] - ref
[j
]);
8009 max_error
= FFMAX(max_error
, diff
);
8012 printf("error=%f max_error=%d\n", ((float)error
)/COUNT
/16, (int)max_error
);
8013 printf("testing quantizer\n");
8014 for(qp
=0; qp
<52; qp
++){
8016 src1_block
[i
]= src2_block
[i
]= random()%255;
8019 printf("Testing NAL layer\n");
8021 uint8_t bitstream
[COUNT
];
8022 uint8_t nal
[COUNT
*2];
8024 memset(&h
, 0, sizeof(H264Context
));
8026 for(i
=0; i
<COUNT
; i
++){
8034 for(j
=0; j
<COUNT
; j
++){
8035 bitstream
[j
]= (random() % 255) + 1;
8038 for(j
=0; j
<zeros
; j
++){
8039 int pos
= random() % COUNT
;
8040 while(bitstream
[pos
] == 0){
8049 nal_length
= encode_nal(&h
, nal
, bitstream
, COUNT
, COUNT
*2);
8051 printf("encoding failed\n");
8055 out
= decode_nal(&h
, nal
, &out_length
, &consumed
, nal_length
);
8059 if(out_length
!= COUNT
){
8060 printf("incorrect length %d %d\n", out_length
, COUNT
);
8064 if(consumed
!= nal_length
){
8065 printf("incorrect consumed length %d %d\n", nal_length
, consumed
);
8069 if(memcmp(bitstream
, out
, COUNT
)){
8070 printf("mismatch\n");
8076 printf("Testing RBSP\n");
8084 static av_cold
int decode_end(AVCodecContext
*avctx
)
8086 H264Context
*h
= avctx
->priv_data
;
8087 MpegEncContext
*s
= &h
->s
;
8090 av_freep(&h
->rbsp_buffer
[0]);
8091 av_freep(&h
->rbsp_buffer
[1]);
8092 free_tables(h
); //FIXME cleanup init stuff perhaps
8094 for(i
= 0; i
< MAX_SPS_COUNT
; i
++)
8095 av_freep(h
->sps_buffers
+ i
);
8097 for(i
= 0; i
< MAX_PPS_COUNT
; i
++)
8098 av_freep(h
->pps_buffers
+ i
);
8102 // memset(h, 0, sizeof(H264Context));
8108 AVCodec h264_decoder
= {
8112 sizeof(H264Context
),
8117 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1
| CODEC_CAP_DELAY
,
8119 .long_name
= NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8123 static av_cold
int h264_vdpau_decode_init(AVCodecContext
*avctx
){
8124 if( avctx
->thread_count
> 1)
8126 if( !(avctx
->slice_flags
& SLICE_FLAG_CODED_ORDER
) )
8128 if( !(avctx
->slice_flags
& SLICE_FLAG_ALLOW_FIELD
) ){
8129 dprintf(avctx
, "h264.c: VDPAU decoder does not set SLICE_FLAG_ALLOW_FIELD\n");
8133 avctx
->vdpau_acceleration
= 1;
8138 AVCodec h264_vdpau_decoder
= {
8141 CODEC_ID_H264_VDPAU
,
8142 sizeof(H264Context
),
8143 h264_vdpau_decode_init
,
8147 CODEC_CAP_DR1
| CODEC_CAP_DELAY
| CODEC_CAP_HWACCEL_VDPAU
,
8149 .long_name
= NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),