2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc
[4];
52 static VLC_TYPE coeff_token_vlc_tables
[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size
[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc
;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table
[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size
= 256;
59 static VLC total_zeros_vlc
[15];
60 static VLC_TYPE total_zeros_vlc_tables
[15][512][2];
61 static const int total_zeros_vlc_tables_size
= 512;
63 static VLC chroma_dc_total_zeros_vlc
[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables
[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size
= 8;
67 static VLC run_vlc
[6];
68 static VLC_TYPE run_vlc_tables
[6][8][2];
69 static const int run_vlc_tables_size
= 8;
72 static VLC_TYPE run7_vlc_table
[96][2];
73 static const int run7_vlc_table_size
= 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
);
76 static void svq3_add_idct_c(uint8_t *dst
, DCTELEM
*block
, int stride
, int qp
, int dc
);
77 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
78 static void filter_mb_fast( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
79 static Picture
* remove_long(H264Context
*h
, int i
, int ref_mask
);
81 static av_always_inline
uint32_t pack16to32(int a
, int b
){
82 #ifdef WORDS_BIGENDIAN
83 return (b
&0xFFFF) + (a
<<16);
85 return (a
&0xFFFF) + (b
<<16);
89 const uint8_t ff_rem6
[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 const uint8_t ff_div6
[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options
[4][8]={
104 static void fill_caches(H264Context
*h
, int mb_type
, int for_deblock
){
105 MpegEncContext
* const s
= &h
->s
;
106 const int mb_xy
= h
->mb_xy
;
107 int topleft_xy
, top_xy
, topright_xy
, left_xy
[2];
108 int topleft_type
, top_type
, topright_type
, left_type
[2];
110 int topleft_partition
= -1;
113 top_xy
= mb_xy
- (s
->mb_stride
<< FIELD_PICTURE
);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock
&& (h
->slice_num
== 1 || h
->slice_table
[mb_xy
] == h
->slice_table
[top_xy
]) && !FRAME_MBAFF
)
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy
= top_xy
- 1;
123 topright_xy
= top_xy
+ 1;
124 left_xy
[1] = left_xy
[0] = mb_xy
-1;
125 left_block
= left_block_options
[0];
127 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
128 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
129 const int topleft_pair_xy
= top_pair_xy
- 1;
130 const int topright_pair_xy
= top_pair_xy
+ 1;
131 const int topleft_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topleft_pair_xy
]);
132 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
133 const int topright_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topright_pair_xy
]);
134 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
135 const int curr_mb_frame_flag
= !IS_INTERLACED(mb_type
);
136 const int bottom
= (s
->mb_y
& 1);
137 tprintf(s
->avctx
, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag
, left_mb_frame_flag
, topleft_mb_frame_flag
, top_mb_frame_flag
, topright_mb_frame_flag
);
139 ? !curr_mb_frame_flag
// bottom macroblock
140 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
142 top_xy
-= s
->mb_stride
;
145 ? !curr_mb_frame_flag
// bottom macroblock
146 : (!curr_mb_frame_flag
&& !topleft_mb_frame_flag
) // top macroblock
148 topleft_xy
-= s
->mb_stride
;
149 } else if(bottom
&& curr_mb_frame_flag
&& !left_mb_frame_flag
) {
150 topleft_xy
+= s
->mb_stride
;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition
= 0;
155 ? !curr_mb_frame_flag
// bottom macroblock
156 : (!curr_mb_frame_flag
&& !topright_mb_frame_flag
) // top macroblock
158 topright_xy
-= s
->mb_stride
;
160 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
161 left_xy
[1] = left_xy
[0] = pair_xy
- 1;
162 if (curr_mb_frame_flag
) {
164 left_block
= left_block_options
[1];
166 left_block
= left_block_options
[2];
169 left_xy
[1] += s
->mb_stride
;
170 left_block
= left_block_options
[3];
175 h
->top_mb_xy
= top_xy
;
176 h
->left_mb_xy
[0] = left_xy
[0];
177 h
->left_mb_xy
[1] = left_xy
[1];
181 top_type
= h
->slice_table
[top_xy
] < 255 ? s
->current_picture
.mb_type
[top_xy
] : 0;
182 left_type
[0] = h
->slice_table
[left_xy
[0] ] < 255 ? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
183 left_type
[1] = h
->slice_table
[left_xy
[1] ] < 255 ? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
185 if(MB_MBAFF
&& !IS_INTRA(mb_type
)){
187 for(list
=0; list
<h
->list_count
; list
++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type
,list
)){
192 int8_t *ref
= &s
->current_picture
.ref_index
[list
][h
->mb2b8_xy
[mb_xy
]];
193 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 0]] =
194 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 2]] = (pack16to32(ref
[0],ref
[1])&0x00FF00FF)*0x0101;
196 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 8]] =
197 *(uint32_t*)&h
->ref_cache
[list
][scan8
[10]] = (pack16to32(ref
[0],ref
[1])&0x00FF00FF)*0x0101;
202 topleft_type
= h
->slice_table
[topleft_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topleft_xy
] : 0;
203 top_type
= h
->slice_table
[top_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[top_xy
] : 0;
204 topright_type
= h
->slice_table
[topright_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topright_xy
]: 0;
205 left_type
[0] = h
->slice_table
[left_xy
[0] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
206 left_type
[1] = h
->slice_table
[left_xy
[1] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
208 if(IS_INTRA(mb_type
)){
209 int type_mask
= h
->pps
.constrained_intra_pred
? IS_INTRA(-1) : -1;
210 h
->topleft_samples_available
=
211 h
->top_samples_available
=
212 h
->left_samples_available
= 0xFFFF;
213 h
->topright_samples_available
= 0xEEEA;
215 if(!(top_type
& type_mask
)){
216 h
->topleft_samples_available
= 0xB3FF;
217 h
->top_samples_available
= 0x33FF;
218 h
->topright_samples_available
= 0x26EA;
220 if(IS_INTERLACED(mb_type
) != IS_INTERLACED(left_type
[0])){
221 if(IS_INTERLACED(mb_type
)){
222 if(!(left_type
[0] & type_mask
)){
223 h
->topleft_samples_available
&= 0xDFFF;
224 h
->left_samples_available
&= 0x5FFF;
226 if(!(left_type
[1] & type_mask
)){
227 h
->topleft_samples_available
&= 0xFF5F;
228 h
->left_samples_available
&= 0xFF5F;
231 int left_typei
= h
->slice_table
[left_xy
[0] + s
->mb_stride
] == h
->slice_num
232 ? s
->current_picture
.mb_type
[left_xy
[0] + s
->mb_stride
] : 0;
233 assert(left_xy
[0] == left_xy
[1]);
234 if(!((left_typei
& type_mask
) && (left_type
[0] & type_mask
))){
235 h
->topleft_samples_available
&= 0xDF5F;
236 h
->left_samples_available
&= 0x5F5F;
240 if(!(left_type
[0] & type_mask
)){
241 h
->topleft_samples_available
&= 0xDF5F;
242 h
->left_samples_available
&= 0x5F5F;
246 if(!(topleft_type
& type_mask
))
247 h
->topleft_samples_available
&= 0x7FFF;
249 if(!(topright_type
& type_mask
))
250 h
->topright_samples_available
&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type
)){
253 if(IS_INTRA4x4(top_type
)){
254 h
->intra4x4_pred_mode_cache
[4+8*0]= h
->intra4x4_pred_mode
[top_xy
][4];
255 h
->intra4x4_pred_mode_cache
[5+8*0]= h
->intra4x4_pred_mode
[top_xy
][5];
256 h
->intra4x4_pred_mode_cache
[6+8*0]= h
->intra4x4_pred_mode
[top_xy
][6];
257 h
->intra4x4_pred_mode_cache
[7+8*0]= h
->intra4x4_pred_mode
[top_xy
][3];
260 if(!(top_type
& type_mask
))
265 h
->intra4x4_pred_mode_cache
[4+8*0]=
266 h
->intra4x4_pred_mode_cache
[5+8*0]=
267 h
->intra4x4_pred_mode_cache
[6+8*0]=
268 h
->intra4x4_pred_mode_cache
[7+8*0]= pred
;
271 if(IS_INTRA4x4(left_type
[i
])){
272 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[0+2*i
]];
273 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[1+2*i
]];
276 if(!(left_type
[i
] & type_mask
))
281 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]=
282 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= pred
;
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
300 h
->non_zero_count_cache
[4+8*0]= h
->non_zero_count
[top_xy
][4];
301 h
->non_zero_count_cache
[5+8*0]= h
->non_zero_count
[top_xy
][5];
302 h
->non_zero_count_cache
[6+8*0]= h
->non_zero_count
[top_xy
][6];
303 h
->non_zero_count_cache
[7+8*0]= h
->non_zero_count
[top_xy
][3];
305 h
->non_zero_count_cache
[1+8*0]= h
->non_zero_count
[top_xy
][9];
306 h
->non_zero_count_cache
[2+8*0]= h
->non_zero_count
[top_xy
][8];
308 h
->non_zero_count_cache
[1+8*3]= h
->non_zero_count
[top_xy
][12];
309 h
->non_zero_count_cache
[2+8*3]= h
->non_zero_count
[top_xy
][11];
312 h
->non_zero_count_cache
[4+8*0]=
313 h
->non_zero_count_cache
[5+8*0]=
314 h
->non_zero_count_cache
[6+8*0]=
315 h
->non_zero_count_cache
[7+8*0]=
317 h
->non_zero_count_cache
[1+8*0]=
318 h
->non_zero_count_cache
[2+8*0]=
320 h
->non_zero_count_cache
[1+8*3]=
321 h
->non_zero_count_cache
[2+8*3]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
325 for (i
=0; i
<2; i
++) {
327 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[0+2*i
]];
328 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[1+2*i
]];
329 h
->non_zero_count_cache
[0+8*1 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[4+2*i
]];
330 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[5+2*i
]];
332 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]=
333 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]=
334 h
->non_zero_count_cache
[0+8*1 + 8*i
]=
335 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
342 h
->top_cbp
= h
->cbp_table
[top_xy
];
343 } else if(IS_INTRA(mb_type
)) {
350 h
->left_cbp
= h
->cbp_table
[left_xy
[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type
)) {
357 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[0]]>>((left_block
[0]&(~1))+1))&0x1) << 1;
360 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[1]]>>((left_block
[2]&(~1))+1))&0x1) << 3;
365 if(IS_INTER(mb_type
) || IS_DIRECT(mb_type
)){
367 for(list
=0; list
<h
->list_count
; list
++){
368 if(!USES_LIST(mb_type
, list
) && !IS_DIRECT(mb_type
) && !h
->deblocking_filter
){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
376 h
->mv_cache_clean
[list
]= 0;
378 if(USES_LIST(top_type
, list
)){
379 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
380 const int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
381 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0];
382 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 1];
383 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2];
384 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 3];
385 h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]=
386 h
->ref_cache
[list
][scan8
[0] + 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 0];
387 h
->ref_cache
[list
][scan8
[0] + 2 - 1*8]=
388 h
->ref_cache
[list
][scan8
[0] + 3 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 1];
390 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]=
391 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]=
392 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]=
393 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]= ((top_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
)&0xFF)*0x01010101;
398 int cache_idx
= scan8
[0] - 1 + i
*2*8;
399 if(USES_LIST(left_type
[i
], list
)){
400 const int b_xy
= h
->mb2b_xy
[left_xy
[i
]] + 3;
401 const int b8_xy
= h
->mb2b8_xy
[left_xy
[i
]] + 1;
402 *(uint32_t*)h
->mv_cache
[list
][cache_idx
]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[0+i
*2]];
403 *(uint32_t*)h
->mv_cache
[list
][cache_idx
+8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[1+i
*2]];
404 h
->ref_cache
[list
][cache_idx
]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[0+i
*2]>>1)];
405 h
->ref_cache
[list
][cache_idx
+8]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[1+i
*2]>>1)];
407 *(uint32_t*)h
->mv_cache
[list
][cache_idx
]=
408 *(uint32_t*)h
->mv_cache
[list
][cache_idx
+8]= 0;
409 h
->ref_cache
[list
][cache_idx
]=
410 h
->ref_cache
[list
][cache_idx
+8]= left_type
[i
] ? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
414 if(for_deblock
|| ((IS_DIRECT(mb_type
) && !h
->direct_spatial_mv_pred
) && !FRAME_MBAFF
))
417 if(USES_LIST(topleft_type
, list
)){
418 const int b_xy
= h
->mb2b_xy
[topleft_xy
] + 3 + h
->b_stride
+ (topleft_partition
& 2*h
->b_stride
);
419 const int b8_xy
= h
->mb2b8_xy
[topleft_xy
] + 1 + (topleft_partition
& h
->b8_stride
);
420 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
421 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
423 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= 0;
424 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= topleft_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
427 if(USES_LIST(topright_type
, list
)){
428 const int b_xy
= h
->mb2b_xy
[topright_xy
] + 3*h
->b_stride
;
429 const int b8_xy
= h
->mb2b8_xy
[topright_xy
] + h
->b8_stride
;
430 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
431 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
433 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= 0;
434 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= topright_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
437 if((IS_SKIP(mb_type
) || IS_DIRECT(mb_type
)) && !FRAME_MBAFF
)
440 h
->ref_cache
[list
][scan8
[5 ]+1] =
441 h
->ref_cache
[list
][scan8
[7 ]+1] =
442 h
->ref_cache
[list
][scan8
[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h
->ref_cache
[list
][scan8
[4 ]] =
444 h
->ref_cache
[list
][scan8
[12]] = PART_NOT_AVAILABLE
;
445 *(uint32_t*)h
->mv_cache
[list
][scan8
[5 ]+1]=
446 *(uint32_t*)h
->mv_cache
[list
][scan8
[7 ]+1]=
447 *(uint32_t*)h
->mv_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h
->mv_cache
[list
][scan8
[4 ]]=
449 *(uint32_t*)h
->mv_cache
[list
][scan8
[12]]= 0;
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type
, list
)){
454 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
455 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 0];
456 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 1];
457 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 2];
458 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 3];
460 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]=
461 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]=
462 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]=
463 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type
[0], list
)){
466 const int b_xy
= h
->mb2b_xy
[left_xy
[0]] + 3;
467 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[0]];
468 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[1]];
470 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]=
471 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type
[1], list
)){
474 const int b_xy
= h
->mb2b_xy
[left_xy
[1]] + 3;
475 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[2]];
476 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[3]];
478 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]=
479 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h
->mvd_cache
[list
][scan8
[5 ]+1]=
482 *(uint32_t*)h
->mvd_cache
[list
][scan8
[7 ]+1]=
483 *(uint32_t*)h
->mvd_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h
->mvd_cache
[list
][scan8
[4 ]]=
485 *(uint32_t*)h
->mvd_cache
[list
][scan8
[12]]= 0;
487 if(h
->slice_type_nos
== FF_B_TYPE
){
488 fill_rectangle(&h
->direct_cache
[scan8
[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type
)){
491 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type
)){
493 int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
494 h
->direct_cache
[scan8
[0] + 0 - 1*8]= h
->direct_table
[b8_xy
];
495 h
->direct_cache
[scan8
[0] + 2 - 1*8]= h
->direct_table
[b8_xy
+ 1];
497 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type
[0]))
501 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type
[0]))
503 h
->direct_cache
[scan8
[0] - 1 + 0*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[0]] + 1 + h
->b8_stride
*(left_block
[0]>>1)];
505 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type
[1]))
508 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type
[1]))
510 h
->direct_cache
[scan8
[0] - 1 + 2*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[1]] + 1 + h
->b8_stride
*(left_block
[2]>>1)];
512 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 0;
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
552 h
->neighbor_transform_size
= !!IS_8x8DCT(top_type
) + !!IS_8x8DCT(left_type
[0]);
555 static inline void write_back_intra_pred_mode(H264Context
*h
){
556 const int mb_xy
= h
->mb_xy
;
558 h
->intra4x4_pred_mode
[mb_xy
][0]= h
->intra4x4_pred_mode_cache
[7+8*1];
559 h
->intra4x4_pred_mode
[mb_xy
][1]= h
->intra4x4_pred_mode_cache
[7+8*2];
560 h
->intra4x4_pred_mode
[mb_xy
][2]= h
->intra4x4_pred_mode_cache
[7+8*3];
561 h
->intra4x4_pred_mode
[mb_xy
][3]= h
->intra4x4_pred_mode_cache
[7+8*4];
562 h
->intra4x4_pred_mode
[mb_xy
][4]= h
->intra4x4_pred_mode_cache
[4+8*4];
563 h
->intra4x4_pred_mode
[mb_xy
][5]= h
->intra4x4_pred_mode_cache
[5+8*4];
564 h
->intra4x4_pred_mode
[mb_xy
][6]= h
->intra4x4_pred_mode_cache
[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context
*h
){
571 MpegEncContext
* const s
= &h
->s
;
572 static const int8_t top
[12]= {-1, 0,LEFT_DC_PRED
,-1,-1,-1,-1,-1, 0};
573 static const int8_t left
[12]= { 0,-1, TOP_DC_PRED
, 0,-1,-1,-1, 0,-1,DC_128_PRED
};
576 if(!(h
->top_samples_available
&0x8000)){
578 int status
= top
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + i
] ];
580 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
583 h
->intra4x4_pred_mode_cache
[scan8
[0] + i
]= status
;
588 if((h
->left_samples_available
&0x8888)!=0x8888){
589 static const int mask
[4]={0x8000,0x2000,0x80,0x20};
591 if(!(h
->left_samples_available
&mask
[i
])){
592 int status
= left
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
] ];
594 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
597 h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
]= status
;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context
*h
, int mode
){
610 MpegEncContext
* const s
= &h
->s
;
611 static const int8_t top
[7]= {LEFT_DC_PRED8x8
, 1,-1,-1};
612 static const int8_t left
[7]= { TOP_DC_PRED8x8
,-1, 2,-1,DC_128_PRED8x8
};
615 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "out of range intra chroma pred mode at %d %d\n", s
->mb_x
, s
->mb_y
);
619 if(!(h
->top_samples_available
&0x8000)){
622 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
627 if((h
->left_samples_available
&0x8080) != 0x8080){
629 if(h
->left_samples_available
&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode
= ALZHEIMER_DC_L0T_PRED8x8
+ (!(h
->left_samples_available
&0x8000)) + 2*(mode
== DC_128_PRED8x8
);
633 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context
*h
, int n
){
645 const int index8
= scan8
[n
];
646 const int left
= h
->intra4x4_pred_mode_cache
[index8
- 1];
647 const int top
= h
->intra4x4_pred_mode_cache
[index8
- 8];
648 const int min
= FFMIN(left
, top
);
650 tprintf(h
->s
.avctx
, "mode:%d %d min:%d\n", left
,top
, min
);
652 if(min
<0) return DC_PRED
;
656 static inline void write_back_non_zero_count(H264Context
*h
){
657 const int mb_xy
= h
->mb_xy
;
659 h
->non_zero_count
[mb_xy
][0]= h
->non_zero_count_cache
[7+8*1];
660 h
->non_zero_count
[mb_xy
][1]= h
->non_zero_count_cache
[7+8*2];
661 h
->non_zero_count
[mb_xy
][2]= h
->non_zero_count_cache
[7+8*3];
662 h
->non_zero_count
[mb_xy
][3]= h
->non_zero_count_cache
[7+8*4];
663 h
->non_zero_count
[mb_xy
][4]= h
->non_zero_count_cache
[4+8*4];
664 h
->non_zero_count
[mb_xy
][5]= h
->non_zero_count_cache
[5+8*4];
665 h
->non_zero_count
[mb_xy
][6]= h
->non_zero_count_cache
[6+8*4];
667 h
->non_zero_count
[mb_xy
][9]= h
->non_zero_count_cache
[1+8*2];
668 h
->non_zero_count
[mb_xy
][8]= h
->non_zero_count_cache
[2+8*2];
669 h
->non_zero_count
[mb_xy
][7]= h
->non_zero_count_cache
[2+8*1];
671 h
->non_zero_count
[mb_xy
][12]=h
->non_zero_count_cache
[1+8*5];
672 h
->non_zero_count
[mb_xy
][11]=h
->non_zero_count_cache
[2+8*5];
673 h
->non_zero_count
[mb_xy
][10]=h
->non_zero_count_cache
[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context
*h
, int n
){
681 const int index8
= scan8
[n
];
682 const int left
= h
->non_zero_count_cache
[index8
- 1];
683 const int top
= h
->non_zero_count_cache
[index8
- 8];
686 if(i
<64) i
= (i
+1)>>1;
688 tprintf(h
->s
.avctx
, "pred_nnz L%X T%X n%d s%d P%X\n", left
, top
, n
, scan8
[n
], i
&31);
693 static inline int fetch_diagonal_mv(H264Context
*h
, const int16_t **C
, int i
, int list
, int part_width
){
694 const int topright_ref
= h
->ref_cache
[list
][ i
- 8 + part_width
];
695 MpegEncContext
*s
= &h
->s
;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
700 const uint32_t *mb_types
= s
->current_picture_ptr
->mb_type
;
702 *(uint32_t*)h
->mv_cache
[list
][scan8
[0]-2] = 0;
703 *C
= h
->mv_cache
[list
][scan8
[0]-2];
706 && (s
->mb_y
&1) && i
< scan8
[0]+8 && topright_ref
!= PART_NOT_AVAILABLE
){
707 int topright_xy
= s
->mb_x
+ (s
->mb_y
-1)*s
->mb_stride
+ (i
== scan8
[0]+3);
708 if(IS_INTERLACED(mb_types
[topright_xy
])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s
->mb_x
*4+(i
&7)-4+part_width
, s
->mb_y
*4-1);
722 if(topright_ref
== PART_NOT_AVAILABLE
723 && ((s
->mb_y
&1) || i
>= scan8
[0]+8) && (i
&7)==4
724 && h
->ref_cache
[list
][scan8
[0]-1] != PART_NOT_AVAILABLE
){
726 && IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])){
727 SET_DIAG_MV(*2, >>1, s
->mb_x
*4-1, (s
->mb_y
|1)*4+(s
->mb_y
&1)*2+(i
>>4)-1);
730 && !IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s
->mb_x
*4-1, (s
->mb_y
&~1)*4 - 1 + ((i
-scan8
[0])>>3)*2);
739 if(topright_ref
!= PART_NOT_AVAILABLE
){
740 *C
= h
->mv_cache
[list
][ i
- 8 + part_width
];
743 tprintf(s
->avctx
, "topright MV not available\n");
745 *C
= h
->mv_cache
[list
][ i
- 8 - 1 ];
746 return h
->ref_cache
[list
][ i
- 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context
* const h
, int n
, int part_width
, int list
, int ref
, int * const mx
, int * const my
){
758 const int index8
= scan8
[n
];
759 const int top_ref
= h
->ref_cache
[list
][ index8
- 8 ];
760 const int left_ref
= h
->ref_cache
[list
][ index8
- 1 ];
761 const int16_t * const A
= h
->mv_cache
[list
][ index8
- 1 ];
762 const int16_t * const B
= h
->mv_cache
[list
][ index8
- 8 ];
764 int diagonal_ref
, match_count
;
766 assert(part_width
==1 || part_width
==2 || part_width
==4);
776 diagonal_ref
= fetch_diagonal_mv(h
, &C
, index8
, list
, part_width
);
777 match_count
= (diagonal_ref
==ref
) + (top_ref
==ref
) + (left_ref
==ref
);
778 tprintf(h
->s
.avctx
, "pred_motion match_count=%d\n", match_count
);
779 if(match_count
> 1){ //most common
780 *mx
= mid_pred(A
[0], B
[0], C
[0]);
781 *my
= mid_pred(A
[1], B
[1], C
[1]);
782 }else if(match_count
==1){
786 }else if(top_ref
==ref
){
794 if(top_ref
== PART_NOT_AVAILABLE
&& diagonal_ref
== PART_NOT_AVAILABLE
&& left_ref
!= PART_NOT_AVAILABLE
){
798 *mx
= mid_pred(A
[0], B
[0], C
[0]);
799 *my
= mid_pred(A
[1], B
[1], C
[1]);
803 tprintf(h
->s
.avctx
, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], diagonal_ref
, C
[0], C
[1], left_ref
, A
[0], A
[1], ref
, *mx
, *my
, h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
814 const int top_ref
= h
->ref_cache
[list
][ scan8
[0] - 8 ];
815 const int16_t * const B
= h
->mv_cache
[list
][ scan8
[0] - 8 ];
817 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
825 const int left_ref
= h
->ref_cache
[list
][ scan8
[8] - 1 ];
826 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[8] - 1 ];
828 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
838 pred_motion(h
, n
, 4, list
, ref
, mx
, my
);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
849 const int left_ref
= h
->ref_cache
[list
][ scan8
[0] - 1 ];
850 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[0] - 1 ];
852 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
863 diagonal_ref
= fetch_diagonal_mv(h
, &C
, scan8
[4], list
, 2);
865 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref
, C
[0], C
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
867 if(diagonal_ref
== ref
){
875 pred_motion(h
, n
, 2, list
, ref
, mx
, my
);
878 static inline void pred_pskip_motion(H264Context
* const h
, int * const mx
, int * const my
){
879 const int top_ref
= h
->ref_cache
[0][ scan8
[0] - 8 ];
880 const int left_ref
= h
->ref_cache
[0][ scan8
[0] - 1 ];
882 tprintf(h
->s
.avctx
, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref
, left_ref
, h
->s
.mb_x
, h
->s
.mb_y
);
884 if(top_ref
== PART_NOT_AVAILABLE
|| left_ref
== PART_NOT_AVAILABLE
885 || (top_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 8 ] == 0)
886 || (left_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 1 ] == 0)){
892 pred_motion(h
, 0, 4, 0, 0, mx
, my
);
897 static inline void direct_dist_scale_factor(H264Context
* const h
){
898 MpegEncContext
* const s
= &h
->s
;
899 const int poc
= h
->s
.current_picture_ptr
->field_poc
[ s
->picture_structure
== PICT_BOTTOM_FIELD
];
900 const int poc1
= h
->ref_list
[1][0].poc
;
902 for(i
=0; i
<h
->ref_count
[0]; i
++){
903 int poc0
= h
->ref_list
[0][i
].poc
;
904 int td
= av_clip(poc1
- poc0
, -128, 127);
905 if(td
== 0 || h
->ref_list
[0][i
].long_ref
){
906 h
->dist_scale_factor
[i
] = 256;
908 int tb
= av_clip(poc
- poc0
, -128, 127);
909 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
910 h
->dist_scale_factor
[i
] = av_clip((tb
*tx
+ 32) >> 6, -1024, 1023);
914 for(i
=0; i
<h
->ref_count
[0]; i
++){
915 h
->dist_scale_factor_field
[2*i
] =
916 h
->dist_scale_factor_field
[2*i
+1] = h
->dist_scale_factor
[i
];
920 static inline void direct_ref_list_init(H264Context
* const h
){
921 MpegEncContext
* const s
= &h
->s
;
922 Picture
* const ref1
= &h
->ref_list
[1][0];
923 Picture
* const cur
= s
->current_picture_ptr
;
925 int sidx
= s
->picture_structure
&1;
926 int ref1sidx
= ref1
->reference
&1;
927 for(list
=0; list
<2; list
++){
928 cur
->ref_count
[sidx
][list
] = h
->ref_count
[list
];
929 for(j
=0; j
<h
->ref_count
[list
]; j
++)
930 cur
->ref_poc
[sidx
][list
][j
] = 4*h
->ref_list
[list
][j
].frame_num
+ (h
->ref_list
[list
][j
].reference
&3);
932 if(s
->picture_structure
== PICT_FRAME
){
933 memcpy(cur
->ref_count
[0], cur
->ref_count
[1], sizeof(cur
->ref_count
[0]));
934 memcpy(cur
->ref_poc
[0], cur
->ref_poc
[1], sizeof(cur
->ref_poc
[0]));
936 if(cur
->pict_type
!= FF_B_TYPE
|| h
->direct_spatial_mv_pred
)
938 for(list
=0; list
<2; list
++){
939 for(i
=0; i
<ref1
->ref_count
[ref1sidx
][list
]; i
++){
940 int poc
= ref1
->ref_poc
[ref1sidx
][list
][i
];
941 if(((poc
&3) == 3) != (s
->picture_structure
== PICT_FRAME
))
942 poc
= (poc
&~3) + s
->picture_structure
;
943 h
->map_col_to_list0
[list
][i
] = 0; /* bogus; fills in for missing frames */
944 for(j
=0; j
<h
->ref_count
[list
]; j
++)
945 if(4*h
->ref_list
[list
][j
].frame_num
+ (h
->ref_list
[list
][j
].reference
&3) == poc
){
946 h
->map_col_to_list0
[list
][i
] = j
;
952 for(list
=0; list
<2; list
++){
953 for(i
=0; i
<ref1
->ref_count
[ref1sidx
][list
]; i
++){
954 j
= h
->map_col_to_list0
[list
][i
];
955 h
->map_col_to_list0_field
[list
][2*i
] = 2*j
;
956 h
->map_col_to_list0_field
[list
][2*i
+1] = 2*j
+1;
962 static inline void pred_direct_motion(H264Context
* const h
, int *mb_type
){
963 MpegEncContext
* const s
= &h
->s
;
964 int b8_stride
= h
->b8_stride
;
965 int b4_stride
= h
->b_stride
;
966 int mb_xy
= h
->mb_xy
;
968 const int16_t (*l1mv0
)[2], (*l1mv1
)[2];
969 const int8_t *l1ref0
, *l1ref1
;
970 const int is_b8x8
= IS_8X8(*mb_type
);
971 unsigned int sub_mb_type
;
974 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
976 if(IS_INTERLACED(h
->ref_list
[1][0].mb_type
[mb_xy
])){ // AFL/AFR/FR/FL -> AFL/FL
977 if(h
->ref_list
[1][0].reference
== PICT_FRAME
){ // AFL/AFR/FR/FL -> AFL
978 if(!IS_INTERLACED(*mb_type
)){ // AFR/FR -> AFL
979 int cur_poc
= s
->current_picture_ptr
->poc
;
980 int *col_poc
= h
->ref_list
[1]->field_poc
;
981 int col_parity
= FFABS(col_poc
[0] - cur_poc
) >= FFABS(col_poc
[1] - cur_poc
);
982 mb_xy
= s
->mb_x
+ ((s
->mb_y
&~1) + col_parity
)*s
->mb_stride
;
985 }else if(!(s
->picture_structure
& h
->ref_list
[1][0].reference
)){// FL -> FL & differ parity
986 int fieldoff
= 2*(h
->ref_list
[1][0].reference
)-3;
987 mb_xy
+= s
->mb_stride
*fieldoff
;
990 }else{ // AFL/AFR/FR/FL -> AFR/FR
991 if(IS_INTERLACED(*mb_type
)){ // AFL /FL -> AFR/FR
992 mb_xy
= s
->mb_x
+ (s
->mb_y
&~1)*s
->mb_stride
;
993 mb_type_col
[0] = h
->ref_list
[1][0].mb_type
[mb_xy
];
994 mb_type_col
[1] = h
->ref_list
[1][0].mb_type
[mb_xy
+ s
->mb_stride
];
997 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
998 if( (mb_type_col
[0] & MB_TYPE_16x16_OR_INTRA
)
999 && (mb_type_col
[1] & MB_TYPE_16x16_OR_INTRA
)
1001 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1002 *mb_type
|= MB_TYPE_16x8
|MB_TYPE_L0L1
|MB_TYPE_DIRECT2
; /* B_16x8 */
1004 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1005 *mb_type
|= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1007 }else{ // AFR/FR -> AFR/FR
1010 mb_type_col
[1] = h
->ref_list
[1][0].mb_type
[mb_xy
];
1011 if(IS_8X8(mb_type_col
[0]) && !h
->sps
.direct_8x8_inference_flag
){
1012 /* FIXME save sub mb types from previous frames (or derive from MVs)
1013 * so we know exactly what block size to use */
1014 sub_mb_type
= MB_TYPE_8x8
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_4x4 */
1015 *mb_type
|= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1016 }else if(!is_b8x8
&& (mb_type_col
[0] & MB_TYPE_16x16_OR_INTRA
)){
1017 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1018 *mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_16x16 */
1020 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1021 *mb_type
|= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1026 l1mv0
= &h
->ref_list
[1][0].motion_val
[0][h
->mb2b_xy
[mb_xy
]];
1027 l1mv1
= &h
->ref_list
[1][0].motion_val
[1][h
->mb2b_xy
[mb_xy
]];
1028 l1ref0
= &h
->ref_list
[1][0].ref_index
[0][h
->mb2b8_xy
[mb_xy
]];
1029 l1ref1
= &h
->ref_list
[1][0].ref_index
[1][h
->mb2b8_xy
[mb_xy
]];
1032 l1ref0
+= h
->b8_stride
;
1033 l1ref1
+= h
->b8_stride
;
1034 l1mv0
+= 2*b4_stride
;
1035 l1mv1
+= 2*b4_stride
;
1039 if(h
->direct_spatial_mv_pred
){
1044 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1046 /* ref = min(neighbors) */
1047 for(list
=0; list
<2; list
++){
1048 int refa
= h
->ref_cache
[list
][scan8
[0] - 1];
1049 int refb
= h
->ref_cache
[list
][scan8
[0] - 8];
1050 int refc
= h
->ref_cache
[list
][scan8
[0] - 8 + 4];
1051 if(refc
== PART_NOT_AVAILABLE
)
1052 refc
= h
->ref_cache
[list
][scan8
[0] - 8 - 1];
1053 ref
[list
] = FFMIN3((unsigned)refa
, (unsigned)refb
, (unsigned)refc
);
1058 if(ref
[0] < 0 && ref
[1] < 0){
1059 ref
[0] = ref
[1] = 0;
1060 mv
[0][0] = mv
[0][1] =
1061 mv
[1][0] = mv
[1][1] = 0;
1063 for(list
=0; list
<2; list
++){
1065 pred_motion(h
, 0, 4, list
, ref
[list
], &mv
[list
][0], &mv
[list
][1]);
1067 mv
[list
][0] = mv
[list
][1] = 0;
1073 *mb_type
&= ~MB_TYPE_L1
;
1074 sub_mb_type
&= ~MB_TYPE_L1
;
1075 }else if(ref
[0] < 0){
1077 *mb_type
&= ~MB_TYPE_L0
;
1078 sub_mb_type
&= ~MB_TYPE_L0
;
1081 if(IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
[0])){
1082 for(i8
=0; i8
<4; i8
++){
1085 int xy8
= x8
+y8
*b8_stride
;
1086 int xy4
= 3*x8
+y8
*b4_stride
;
1089 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1091 h
->sub_mb_type
[i8
] = sub_mb_type
;
1093 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[0], 1);
1094 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[1], 1);
1095 if(!IS_INTRA(mb_type_col
[y8
])
1096 && ( (l1ref0
[xy8
] == 0 && FFABS(l1mv0
[xy4
][0]) <= 1 && FFABS(l1mv0
[xy4
][1]) <= 1)
1097 || (l1ref0
[xy8
] < 0 && l1ref1
[xy8
] == 0 && FFABS(l1mv1
[xy4
][0]) <= 1 && FFABS(l1mv1
[xy4
][1]) <= 1))){
1099 a
= pack16to32(mv
[0][0],mv
[0][1]);
1101 b
= pack16to32(mv
[1][0],mv
[1][1]);
1103 a
= pack16to32(mv
[0][0],mv
[0][1]);
1104 b
= pack16to32(mv
[1][0],mv
[1][1]);
1106 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, a
, 4);
1107 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, b
, 4);
1109 }else if(IS_16X16(*mb_type
)){
1112 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, (uint8_t)ref
[0], 1);
1113 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, (uint8_t)ref
[1], 1);
1114 if(!IS_INTRA(mb_type_col
[0])
1115 && ( (l1ref0
[0] == 0 && FFABS(l1mv0
[0][0]) <= 1 && FFABS(l1mv0
[0][1]) <= 1)
1116 || (l1ref0
[0] < 0 && l1ref1
[0] == 0 && FFABS(l1mv1
[0][0]) <= 1 && FFABS(l1mv1
[0][1]) <= 1
1117 && (h
->x264_build
>33 || !h
->x264_build
)))){
1119 a
= pack16to32(mv
[0][0],mv
[0][1]);
1121 b
= pack16to32(mv
[1][0],mv
[1][1]);
1123 a
= pack16to32(mv
[0][0],mv
[0][1]);
1124 b
= pack16to32(mv
[1][0],mv
[1][1]);
1126 fill_rectangle(&h
->mv_cache
[0][scan8
[0]], 4, 4, 8, a
, 4);
1127 fill_rectangle(&h
->mv_cache
[1][scan8
[0]], 4, 4, 8, b
, 4);
1129 for(i8
=0; i8
<4; i8
++){
1130 const int x8
= i8
&1;
1131 const int y8
= i8
>>1;
1133 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1135 h
->sub_mb_type
[i8
] = sub_mb_type
;
1137 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[0][0],mv
[0][1]), 4);
1138 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[1][0],mv
[1][1]), 4);
1139 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[0], 1);
1140 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[1], 1);
1143 if(!IS_INTRA(mb_type_col
[0]) && ( l1ref0
[x8
+ y8
*b8_stride
] == 0
1144 || (l1ref0
[x8
+ y8
*b8_stride
] < 0 && l1ref1
[x8
+ y8
*b8_stride
] == 0
1145 && (h
->x264_build
>33 || !h
->x264_build
)))){
1146 const int16_t (*l1mv
)[2]= l1ref0
[x8
+ y8
*b8_stride
] == 0 ? l1mv0
: l1mv1
;
1147 if(IS_SUB_8X8(sub_mb_type
)){
1148 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*b4_stride
];
1149 if(FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1){
1151 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1153 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1156 for(i4
=0; i4
<4; i4
++){
1157 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*b4_stride
];
1158 if(FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1){
1160 *(uint32_t*)h
->mv_cache
[0][scan8
[i8
*4+i4
]] = 0;
1162 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] = 0;
1168 }else{ /* direct temporal mv pred */
1169 const int *map_col_to_list0
[2] = {h
->map_col_to_list0
[0], h
->map_col_to_list0
[1]};
1170 const int *dist_scale_factor
= h
->dist_scale_factor
;
1172 if(FRAME_MBAFF
&& IS_INTERLACED(*mb_type
)){
1173 map_col_to_list0
[0] = h
->map_col_to_list0_field
[0];
1174 map_col_to_list0
[1] = h
->map_col_to_list0_field
[1];
1175 dist_scale_factor
= h
->dist_scale_factor_field
;
1177 if(IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
[0])){
1178 /* FIXME assumes direct_8x8_inference == 1 */
1179 int y_shift
= 2*!IS_INTERLACED(*mb_type
);
1180 int ref_shift
= FRAME_MBAFF
? y_shift
: 1;
1182 for(i8
=0; i8
<4; i8
++){
1183 const int x8
= i8
&1;
1184 const int y8
= i8
>>1;
1186 const int16_t (*l1mv
)[2]= l1mv0
;
1188 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1190 h
->sub_mb_type
[i8
] = sub_mb_type
;
1192 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1193 if(IS_INTRA(mb_type_col
[y8
])){
1194 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1195 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1196 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1200 ref0
= l1ref0
[x8
+ y8
*b8_stride
];
1202 ref0
= map_col_to_list0
[0][ref0
*2>>ref_shift
];
1204 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ y8
*b8_stride
]*2>>ref_shift
];
1207 scale
= dist_scale_factor
[ref0
];
1208 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1211 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*b4_stride
];
1212 int my_col
= (mv_col
[1]<<y_shift
)/2;
1213 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1214 int my
= (scale
* my_col
+ 128) >> 8;
1215 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1216 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-my_col
), 4);
1222 /* one-to-one mv scaling */
1224 if(IS_16X16(*mb_type
)){
1227 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, 0, 1);
1228 if(IS_INTRA(mb_type_col
[0])){
1231 const int ref0
= l1ref0
[0] >= 0 ? map_col_to_list0
[0][l1ref0
[0]]
1232 : map_col_to_list0
[1][l1ref1
[0]];
1233 const int scale
= dist_scale_factor
[ref0
];
1234 const int16_t *mv_col
= l1ref0
[0] >= 0 ? l1mv0
[0] : l1mv1
[0];
1236 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1237 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1239 mv0
= pack16to32(mv_l0
[0],mv_l0
[1]);
1240 mv1
= pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1242 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, ref
, 1);
1243 fill_rectangle(&h
-> mv_cache
[0][scan8
[0]], 4, 4, 8, mv0
, 4);
1244 fill_rectangle(&h
-> mv_cache
[1][scan8
[0]], 4, 4, 8, mv1
, 4);
1246 for(i8
=0; i8
<4; i8
++){
1247 const int x8
= i8
&1;
1248 const int y8
= i8
>>1;
1250 const int16_t (*l1mv
)[2]= l1mv0
;
1252 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1254 h
->sub_mb_type
[i8
] = sub_mb_type
;
1255 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1256 if(IS_INTRA(mb_type_col
[0])){
1257 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1258 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1259 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1263 ref0
= l1ref0
[x8
+ y8
*b8_stride
];
1265 ref0
= map_col_to_list0
[0][ref0
];
1267 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ y8
*b8_stride
]];
1270 scale
= dist_scale_factor
[ref0
];
1272 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1273 if(IS_SUB_8X8(sub_mb_type
)){
1274 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*b4_stride
];
1275 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1276 int my
= (scale
* mv_col
[1] + 128) >> 8;
1277 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1278 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-mv_col
[1]), 4);
1280 for(i4
=0; i4
<4; i4
++){
1281 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*b4_stride
];
1282 int16_t *mv_l0
= h
->mv_cache
[0][scan8
[i8
*4+i4
]];
1283 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1284 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1285 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] =
1286 pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1293 static inline void write_back_motion(H264Context
*h
, int mb_type
){
1294 MpegEncContext
* const s
= &h
->s
;
1295 const int b_xy
= 4*s
->mb_x
+ 4*s
->mb_y
*h
->b_stride
;
1296 const int b8_xy
= 2*s
->mb_x
+ 2*s
->mb_y
*h
->b8_stride
;
1299 if(!USES_LIST(mb_type
, 0))
1300 fill_rectangle(&s
->current_picture
.ref_index
[0][b8_xy
], 2, 2, h
->b8_stride
, (uint8_t)LIST_NOT_USED
, 1);
1302 for(list
=0; list
<h
->list_count
; list
++){
1304 if(!USES_LIST(mb_type
, list
))
1308 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+0 + 8*y
];
1309 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+2 + 8*y
];
1311 if( h
->pps
.cabac
) {
1312 if(IS_SKIP(mb_type
))
1313 fill_rectangle(h
->mvd_table
[list
][b_xy
], 4, 4, h
->b_stride
, 0, 4);
1316 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+0 + 8*y
];
1317 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+2 + 8*y
];
1322 int8_t *ref_index
= &s
->current_picture
.ref_index
[list
][b8_xy
];
1323 ref_index
[0+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[0]];
1324 ref_index
[1+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[4]];
1325 ref_index
[0+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[8]];
1326 ref_index
[1+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[12]];
1330 if(h
->slice_type_nos
== FF_B_TYPE
&& h
->pps
.cabac
){
1331 if(IS_8X8(mb_type
)){
1332 uint8_t *direct_table
= &h
->direct_table
[b8_xy
];
1333 direct_table
[1+0*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[1]) ? 1 : 0;
1334 direct_table
[0+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[2]) ? 1 : 0;
1335 direct_table
[1+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[3]) ? 1 : 0;
1341 * Decodes a network abstraction layer unit.
1342 * @param consumed is the number of bytes used as input
1343 * @param length is the length of the array
1344 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1345 * @returns decoded bytes, might be src+1 if no escapes
1347 static const uint8_t *decode_nal(H264Context
*h
, const uint8_t *src
, int *dst_length
, int *consumed
, int length
){
1352 // src[0]&0x80; //forbidden bit
1353 h
->nal_ref_idc
= src
[0]>>5;
1354 h
->nal_unit_type
= src
[0]&0x1F;
1358 for(i
=0; i
<length
; i
++)
1359 printf("%2X ", src
[i
]);
1361 for(i
=0; i
+1<length
; i
+=2){
1362 if(src
[i
]) continue;
1363 if(i
>0 && src
[i
-1]==0) i
--;
1364 if(i
+2<length
&& src
[i
+1]==0 && src
[i
+2]<=3){
1366 /* startcode, so we must be past the end */
1373 if(i
>=length
-1){ //no escaped 0
1374 *dst_length
= length
;
1375 *consumed
= length
+1; //+1 for the header
1379 bufidx
= h
->nal_unit_type
== NAL_DPC
? 1 : 0; // use second escape buffer for inter data
1380 h
->rbsp_buffer
[bufidx
]= av_fast_realloc(h
->rbsp_buffer
[bufidx
], &h
->rbsp_buffer_size
[bufidx
], length
);
1381 dst
= h
->rbsp_buffer
[bufidx
];
1387 //printf("decoding esc\n");
1390 //remove escapes (very rare 1:2^22)
1391 if(si
+2<length
&& src
[si
]==0 && src
[si
+1]==0 && src
[si
+2]<=3){
1392 if(src
[si
+2]==3){ //escape
1397 }else //next start code
1401 dst
[di
++]= src
[si
++];
1405 *consumed
= si
+ 1;//+1 for the header
1406 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1411 * identifies the exact end of the bitstream
1412 * @return the length of the trailing, or 0 if damaged
1414 static int decode_rbsp_trailing(H264Context
*h
, const uint8_t *src
){
1418 tprintf(h
->s
.avctx
, "rbsp trailing %X\n", v
);
1428 * IDCT transforms the 16 dc values and dequantizes them.
1429 * @param qp quantization parameter
1431 static void h264_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1434 int temp
[16]; //FIXME check if this is a good idea
1435 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1436 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1438 //memset(block, 64, 2*256);
1441 const int offset
= y_offset
[i
];
1442 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1443 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1444 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1445 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1454 const int offset
= x_offset
[i
];
1455 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1456 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1457 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1458 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1460 block
[stride
*0 +offset
]= ((((z0
+ z3
)*qmul
+ 128 ) >> 8)); //FIXME think about merging this into decode_residual
1461 block
[stride
*2 +offset
]= ((((z1
+ z2
)*qmul
+ 128 ) >> 8));
1462 block
[stride
*8 +offset
]= ((((z1
- z2
)*qmul
+ 128 ) >> 8));
1463 block
[stride
*10+offset
]= ((((z0
- z3
)*qmul
+ 128 ) >> 8));
1469 * DCT transforms the 16 dc values.
1470 * @param qp quantization parameter ??? FIXME
1472 static void h264_luma_dc_dct_c(DCTELEM
*block
/*, int qp*/){
1473 // const int qmul= dequant_coeff[qp][0];
1475 int temp
[16]; //FIXME check if this is a good idea
1476 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1477 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1480 const int offset
= y_offset
[i
];
1481 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1482 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1483 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1484 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1493 const int offset
= x_offset
[i
];
1494 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1495 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1496 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1497 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1499 block
[stride
*0 +offset
]= (z0
+ z3
)>>1;
1500 block
[stride
*2 +offset
]= (z1
+ z2
)>>1;
1501 block
[stride
*8 +offset
]= (z1
- z2
)>>1;
1502 block
[stride
*10+offset
]= (z0
- z3
)>>1;
1510 static void chroma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1511 const int stride
= 16*2;
1512 const int xStride
= 16;
1515 a
= block
[stride
*0 + xStride
*0];
1516 b
= block
[stride
*0 + xStride
*1];
1517 c
= block
[stride
*1 + xStride
*0];
1518 d
= block
[stride
*1 + xStride
*1];
1525 block
[stride
*0 + xStride
*0]= ((a
+c
)*qmul
) >> 7;
1526 block
[stride
*0 + xStride
*1]= ((e
+b
)*qmul
) >> 7;
1527 block
[stride
*1 + xStride
*0]= ((a
-c
)*qmul
) >> 7;
1528 block
[stride
*1 + xStride
*1]= ((e
-b
)*qmul
) >> 7;
1532 static void chroma_dc_dct_c(DCTELEM
*block
){
1533 const int stride
= 16*2;
1534 const int xStride
= 16;
1537 a
= block
[stride
*0 + xStride
*0];
1538 b
= block
[stride
*0 + xStride
*1];
1539 c
= block
[stride
*1 + xStride
*0];
1540 d
= block
[stride
*1 + xStride
*1];
1547 block
[stride
*0 + xStride
*0]= (a
+c
);
1548 block
[stride
*0 + xStride
*1]= (e
+b
);
1549 block
[stride
*1 + xStride
*0]= (a
-c
);
1550 block
[stride
*1 + xStride
*1]= (e
-b
);
1555 * gets the chroma qp.
1557 static inline int get_chroma_qp(H264Context
*h
, int t
, int qscale
){
1558 return h
->pps
.chroma_qp_table
[t
][qscale
];
1561 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1562 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1563 static inline int quantize_c(DCTELEM
*block
, uint8_t *scantable
, int qscale
, int intra
, int separate_dc
){
1565 const int * const quant_table
= quant_coeff
[qscale
];
1566 const int bias
= intra
? (1<<QUANT_SHIFT
)/3 : (1<<QUANT_SHIFT
)/6;
1567 const unsigned int threshold1
= (1<<QUANT_SHIFT
) - bias
- 1;
1568 const unsigned int threshold2
= (threshold1
<<1);
1574 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
-2))/3 : (1<<(QUANT_SHIFT
-2))/6;
1575 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
-2)) - dc_bias
- 1;
1576 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
1578 int level
= block
[0]*quant_coeff
[qscale
+18][0];
1579 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
1581 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
-2);
1584 level
= (dc_bias
- level
)>>(QUANT_SHIFT
-2);
1587 // last_non_zero = i;
1592 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
+1))/3 : (1<<(QUANT_SHIFT
+1))/6;
1593 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
+1)) - dc_bias
- 1;
1594 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
1596 int level
= block
[0]*quant_table
[0];
1597 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
1599 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
+1);
1602 level
= (dc_bias
- level
)>>(QUANT_SHIFT
+1);
1605 // last_non_zero = i;
1618 const int j
= scantable
[i
];
1619 int level
= block
[j
]*quant_table
[j
];
1621 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1622 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1623 if(((unsigned)(level
+threshold1
))>threshold2
){
1625 level
= (bias
+ level
)>>QUANT_SHIFT
;
1628 level
= (bias
- level
)>>QUANT_SHIFT
;
1637 return last_non_zero
;
1640 static inline void mc_dir_part(H264Context
*h
, Picture
*pic
, int n
, int square
, int chroma_height
, int delta
, int list
,
1641 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1642 int src_x_offset
, int src_y_offset
,
1643 qpel_mc_func
*qpix_op
, h264_chroma_mc_func chroma_op
){
1644 MpegEncContext
* const s
= &h
->s
;
1645 const int mx
= h
->mv_cache
[list
][ scan8
[n
] ][0] + src_x_offset
*8;
1646 int my
= h
->mv_cache
[list
][ scan8
[n
] ][1] + src_y_offset
*8;
1647 const int luma_xy
= (mx
&3) + ((my
&3)<<2);
1648 uint8_t * src_y
= pic
->data
[0] + (mx
>>2) + (my
>>2)*h
->mb_linesize
;
1649 uint8_t * src_cb
, * src_cr
;
1650 int extra_width
= h
->emu_edge_width
;
1651 int extra_height
= h
->emu_edge_height
;
1653 const int full_mx
= mx
>>2;
1654 const int full_my
= my
>>2;
1655 const int pic_width
= 16*s
->mb_width
;
1656 const int pic_height
= 16*s
->mb_height
>> MB_FIELD
;
1658 if(!pic
->data
[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1661 if(mx
&7) extra_width
-= 3;
1662 if(my
&7) extra_height
-= 3;
1664 if( full_mx
< 0-extra_width
1665 || full_my
< 0-extra_height
1666 || full_mx
+ 16/*FIXME*/ > pic_width
+ extra_width
1667 || full_my
+ 16/*FIXME*/ > pic_height
+ extra_height
){
1668 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_y
- 2 - 2*h
->mb_linesize
, h
->mb_linesize
, 16+5, 16+5/*FIXME*/, full_mx
-2, full_my
-2, pic_width
, pic_height
);
1669 src_y
= s
->edge_emu_buffer
+ 2 + 2*h
->mb_linesize
;
1673 qpix_op
[luma_xy
](dest_y
, src_y
, h
->mb_linesize
); //FIXME try variable height perhaps?
1675 qpix_op
[luma_xy
](dest_y
+ delta
, src_y
+ delta
, h
->mb_linesize
);
1678 if(ENABLE_GRAY
&& s
->flags
&CODEC_FLAG_GRAY
) return;
1681 // chroma offset when predicting from a field of opposite parity
1682 my
+= 2 * ((s
->mb_y
& 1) - (pic
->reference
- 1));
1683 emu
|= (my
>>3) < 0 || (my
>>3) + 8 >= (pic_height
>>1);
1685 src_cb
= pic
->data
[1] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
1686 src_cr
= pic
->data
[2] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
1689 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cb
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
1690 src_cb
= s
->edge_emu_buffer
;
1692 chroma_op(dest_cb
, src_cb
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
1695 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cr
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
1696 src_cr
= s
->edge_emu_buffer
;
1698 chroma_op(dest_cr
, src_cr
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
1701 static inline void mc_part_std(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1702 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1703 int x_offset
, int y_offset
,
1704 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1705 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
1706 int list0
, int list1
){
1707 MpegEncContext
* const s
= &h
->s
;
1708 qpel_mc_func
*qpix_op
= qpix_put
;
1709 h264_chroma_mc_func chroma_op
= chroma_put
;
1711 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
1712 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1713 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1714 x_offset
+= 8*s
->mb_x
;
1715 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
1718 Picture
*ref
= &h
->ref_list
[0][ h
->ref_cache
[0][ scan8
[n
] ] ];
1719 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 0,
1720 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1721 qpix_op
, chroma_op
);
1724 chroma_op
= chroma_avg
;
1728 Picture
*ref
= &h
->ref_list
[1][ h
->ref_cache
[1][ scan8
[n
] ] ];
1729 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 1,
1730 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1731 qpix_op
, chroma_op
);
1735 static inline void mc_part_weighted(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1736 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1737 int x_offset
, int y_offset
,
1738 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1739 h264_weight_func luma_weight_op
, h264_weight_func chroma_weight_op
,
1740 h264_biweight_func luma_weight_avg
, h264_biweight_func chroma_weight_avg
,
1741 int list0
, int list1
){
1742 MpegEncContext
* const s
= &h
->s
;
1744 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
1745 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1746 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1747 x_offset
+= 8*s
->mb_x
;
1748 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
1751 /* don't optimize for luma-only case, since B-frames usually
1752 * use implicit weights => chroma too. */
1753 uint8_t *tmp_cb
= s
->obmc_scratchpad
;
1754 uint8_t *tmp_cr
= s
->obmc_scratchpad
+ 8;
1755 uint8_t *tmp_y
= s
->obmc_scratchpad
+ 8*h
->mb_uvlinesize
;
1756 int refn0
= h
->ref_cache
[0][ scan8
[n
] ];
1757 int refn1
= h
->ref_cache
[1][ scan8
[n
] ];
1759 mc_dir_part(h
, &h
->ref_list
[0][refn0
], n
, square
, chroma_height
, delta
, 0,
1760 dest_y
, dest_cb
, dest_cr
,
1761 x_offset
, y_offset
, qpix_put
, chroma_put
);
1762 mc_dir_part(h
, &h
->ref_list
[1][refn1
], n
, square
, chroma_height
, delta
, 1,
1763 tmp_y
, tmp_cb
, tmp_cr
,
1764 x_offset
, y_offset
, qpix_put
, chroma_put
);
1766 if(h
->use_weight
== 2){
1767 int weight0
= h
->implicit_weight
[refn0
][refn1
];
1768 int weight1
= 64 - weight0
;
1769 luma_weight_avg( dest_y
, tmp_y
, h
-> mb_linesize
, 5, weight0
, weight1
, 0);
1770 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
1771 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
1773 luma_weight_avg(dest_y
, tmp_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
1774 h
->luma_weight
[0][refn0
], h
->luma_weight
[1][refn1
],
1775 h
->luma_offset
[0][refn0
] + h
->luma_offset
[1][refn1
]);
1776 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1777 h
->chroma_weight
[0][refn0
][0], h
->chroma_weight
[1][refn1
][0],
1778 h
->chroma_offset
[0][refn0
][0] + h
->chroma_offset
[1][refn1
][0]);
1779 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1780 h
->chroma_weight
[0][refn0
][1], h
->chroma_weight
[1][refn1
][1],
1781 h
->chroma_offset
[0][refn0
][1] + h
->chroma_offset
[1][refn1
][1]);
1784 int list
= list1
? 1 : 0;
1785 int refn
= h
->ref_cache
[list
][ scan8
[n
] ];
1786 Picture
*ref
= &h
->ref_list
[list
][refn
];
1787 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, list
,
1788 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1789 qpix_put
, chroma_put
);
1791 luma_weight_op(dest_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
1792 h
->luma_weight
[list
][refn
], h
->luma_offset
[list
][refn
]);
1793 if(h
->use_weight_chroma
){
1794 chroma_weight_op(dest_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1795 h
->chroma_weight
[list
][refn
][0], h
->chroma_offset
[list
][refn
][0]);
1796 chroma_weight_op(dest_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1797 h
->chroma_weight
[list
][refn
][1], h
->chroma_offset
[list
][refn
][1]);
1802 static inline void mc_part(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1803 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1804 int x_offset
, int y_offset
,
1805 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1806 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
1807 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
,
1808 int list0
, int list1
){
1809 if((h
->use_weight
==2 && list0
&& list1
1810 && (h
->implicit_weight
[ h
->ref_cache
[0][scan8
[n
]] ][ h
->ref_cache
[1][scan8
[n
]] ] != 32))
1811 || h
->use_weight
==1)
1812 mc_part_weighted(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
1813 x_offset
, y_offset
, qpix_put
, chroma_put
,
1814 weight_op
[0], weight_op
[3], weight_avg
[0], weight_avg
[3], list0
, list1
);
1816 mc_part_std(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
1817 x_offset
, y_offset
, qpix_put
, chroma_put
, qpix_avg
, chroma_avg
, list0
, list1
);
1820 static inline void prefetch_motion(H264Context
*h
, int list
){
1821 /* fetch pixels for estimated mv 4 macroblocks ahead
1822 * optimized for 64byte cache lines */
1823 MpegEncContext
* const s
= &h
->s
;
1824 const int refn
= h
->ref_cache
[list
][scan8
[0]];
1826 const int mx
= (h
->mv_cache
[list
][scan8
[0]][0]>>2) + 16*s
->mb_x
+ 8;
1827 const int my
= (h
->mv_cache
[list
][scan8
[0]][1]>>2) + 16*s
->mb_y
;
1828 uint8_t **src
= h
->ref_list
[list
][refn
].data
;
1829 int off
= mx
+ (my
+ (s
->mb_x
&3)*4)*h
->mb_linesize
+ 64;
1830 s
->dsp
.prefetch(src
[0]+off
, s
->linesize
, 4);
1831 off
= (mx
>>1) + ((my
>>1) + (s
->mb_x
&7))*s
->uvlinesize
+ 64;
1832 s
->dsp
.prefetch(src
[1]+off
, src
[2]-src
[1], 2);
1836 static void hl_motion(H264Context
*h
, uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1837 qpel_mc_func (*qpix_put
)[16], h264_chroma_mc_func (*chroma_put
),
1838 qpel_mc_func (*qpix_avg
)[16], h264_chroma_mc_func (*chroma_avg
),
1839 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
){
1840 MpegEncContext
* const s
= &h
->s
;
1841 const int mb_xy
= h
->mb_xy
;
1842 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
1844 assert(IS_INTER(mb_type
));
1846 prefetch_motion(h
, 0);
1848 if(IS_16X16(mb_type
)){
1849 mc_part(h
, 0, 1, 8, 0, dest_y
, dest_cb
, dest_cr
, 0, 0,
1850 qpix_put
[0], chroma_put
[0], qpix_avg
[0], chroma_avg
[0],
1851 &weight_op
[0], &weight_avg
[0],
1852 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1853 }else if(IS_16X8(mb_type
)){
1854 mc_part(h
, 0, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 0,
1855 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
1856 &weight_op
[1], &weight_avg
[1],
1857 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1858 mc_part(h
, 8, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 4,
1859 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
1860 &weight_op
[1], &weight_avg
[1],
1861 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
1862 }else if(IS_8X16(mb_type
)){
1863 mc_part(h
, 0, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 0, 0,
1864 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1865 &weight_op
[2], &weight_avg
[2],
1866 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1867 mc_part(h
, 4, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 4, 0,
1868 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1869 &weight_op
[2], &weight_avg
[2],
1870 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
1874 assert(IS_8X8(mb_type
));
1877 const int sub_mb_type
= h
->sub_mb_type
[i
];
1879 int x_offset
= (i
&1)<<2;
1880 int y_offset
= (i
&2)<<1;
1882 if(IS_SUB_8X8(sub_mb_type
)){
1883 mc_part(h
, n
, 1, 4, 0, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1884 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1885 &weight_op
[3], &weight_avg
[3],
1886 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1887 }else if(IS_SUB_8X4(sub_mb_type
)){
1888 mc_part(h
, n
, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1889 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
1890 &weight_op
[4], &weight_avg
[4],
1891 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1892 mc_part(h
, n
+2, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
+2,
1893 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
1894 &weight_op
[4], &weight_avg
[4],
1895 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1896 }else if(IS_SUB_4X8(sub_mb_type
)){
1897 mc_part(h
, n
, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1898 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1899 &weight_op
[5], &weight_avg
[5],
1900 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1901 mc_part(h
, n
+1, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
+2, y_offset
,
1902 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1903 &weight_op
[5], &weight_avg
[5],
1904 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1907 assert(IS_SUB_4X4(sub_mb_type
));
1909 int sub_x_offset
= x_offset
+ 2*(j
&1);
1910 int sub_y_offset
= y_offset
+ (j
&2);
1911 mc_part(h
, n
+j
, 1, 2, 0, dest_y
, dest_cb
, dest_cr
, sub_x_offset
, sub_y_offset
,
1912 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1913 &weight_op
[6], &weight_avg
[6],
1914 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1920 prefetch_motion(h
, 1);
1923 static av_cold
void decode_init_vlc(void){
1924 static int done
= 0;
1931 chroma_dc_coeff_token_vlc
.table
= chroma_dc_coeff_token_vlc_table
;
1932 chroma_dc_coeff_token_vlc
.table_allocated
= chroma_dc_coeff_token_vlc_table_size
;
1933 init_vlc(&chroma_dc_coeff_token_vlc
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 4*5,
1934 &chroma_dc_coeff_token_len
[0], 1, 1,
1935 &chroma_dc_coeff_token_bits
[0], 1, 1,
1936 INIT_VLC_USE_NEW_STATIC
);
1940 coeff_token_vlc
[i
].table
= coeff_token_vlc_tables
+offset
;
1941 coeff_token_vlc
[i
].table_allocated
= coeff_token_vlc_tables_size
[i
];
1942 init_vlc(&coeff_token_vlc
[i
], COEFF_TOKEN_VLC_BITS
, 4*17,
1943 &coeff_token_len
[i
][0], 1, 1,
1944 &coeff_token_bits
[i
][0], 1, 1,
1945 INIT_VLC_USE_NEW_STATIC
);
1946 offset
+= coeff_token_vlc_tables_size
[i
];
1949 * This is a one time safety check to make sure that
1950 * the packed static coeff_token_vlc table sizes
1951 * were initialized correctly.
1953 assert(offset
== sizeof(coeff_token_vlc_tables
)/(sizeof(VLC_TYPE
)*2));
1956 chroma_dc_total_zeros_vlc
[i
].table
= chroma_dc_total_zeros_vlc_tables
[i
];
1957 chroma_dc_total_zeros_vlc
[i
].table_allocated
= chroma_dc_total_zeros_vlc_tables_size
;
1958 init_vlc(&chroma_dc_total_zeros_vlc
[i
],
1959 CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 4,
1960 &chroma_dc_total_zeros_len
[i
][0], 1, 1,
1961 &chroma_dc_total_zeros_bits
[i
][0], 1, 1,
1962 INIT_VLC_USE_NEW_STATIC
);
1964 for(i
=0; i
<15; i
++){
1965 total_zeros_vlc
[i
].table
= total_zeros_vlc_tables
[i
];
1966 total_zeros_vlc
[i
].table_allocated
= total_zeros_vlc_tables_size
;
1967 init_vlc(&total_zeros_vlc
[i
],
1968 TOTAL_ZEROS_VLC_BITS
, 16,
1969 &total_zeros_len
[i
][0], 1, 1,
1970 &total_zeros_bits
[i
][0], 1, 1,
1971 INIT_VLC_USE_NEW_STATIC
);
1975 run_vlc
[i
].table
= run_vlc_tables
[i
];
1976 run_vlc
[i
].table_allocated
= run_vlc_tables_size
;
1977 init_vlc(&run_vlc
[i
],
1979 &run_len
[i
][0], 1, 1,
1980 &run_bits
[i
][0], 1, 1,
1981 INIT_VLC_USE_NEW_STATIC
);
1983 run7_vlc
.table
= run7_vlc_table
,
1984 run7_vlc
.table_allocated
= run7_vlc_table_size
;
1985 init_vlc(&run7_vlc
, RUN7_VLC_BITS
, 16,
1986 &run_len
[6][0], 1, 1,
1987 &run_bits
[6][0], 1, 1,
1988 INIT_VLC_USE_NEW_STATIC
);
1992 static void free_tables(H264Context
*h
){
1995 av_freep(&h
->intra4x4_pred_mode
);
1996 av_freep(&h
->chroma_pred_mode_table
);
1997 av_freep(&h
->cbp_table
);
1998 av_freep(&h
->mvd_table
[0]);
1999 av_freep(&h
->mvd_table
[1]);
2000 av_freep(&h
->direct_table
);
2001 av_freep(&h
->non_zero_count
);
2002 av_freep(&h
->slice_table_base
);
2003 h
->slice_table
= NULL
;
2005 av_freep(&h
->mb2b_xy
);
2006 av_freep(&h
->mb2b8_xy
);
2008 for(i
= 0; i
< MAX_SPS_COUNT
; i
++)
2009 av_freep(h
->sps_buffers
+ i
);
2011 for(i
= 0; i
< MAX_PPS_COUNT
; i
++)
2012 av_freep(h
->pps_buffers
+ i
);
2014 for(i
= 0; i
< h
->s
.avctx
->thread_count
; i
++) {
2015 hx
= h
->thread_context
[i
];
2017 av_freep(&hx
->top_borders
[1]);
2018 av_freep(&hx
->top_borders
[0]);
2019 av_freep(&hx
->s
.obmc_scratchpad
);
2023 static void init_dequant8_coeff_table(H264Context
*h
){
2025 const int transpose
= (h
->s
.dsp
.h264_idct8_add
!= ff_h264_idct8_add_c
); //FIXME ugly
2026 h
->dequant8_coeff
[0] = h
->dequant8_buffer
[0];
2027 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[1];
2029 for(i
=0; i
<2; i
++ ){
2030 if(i
&& !memcmp(h
->pps
.scaling_matrix8
[0], h
->pps
.scaling_matrix8
[1], 64*sizeof(uint8_t))){
2031 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[0];
2035 for(q
=0; q
<52; q
++){
2036 int shift
= ff_div6
[q
];
2037 int idx
= ff_rem6
[q
];
2039 h
->dequant8_coeff
[i
][q
][transpose
? (x
>>3)|((x
&7)<<3) : x
] =
2040 ((uint32_t)dequant8_coeff_init
[idx
][ dequant8_coeff_init_scan
[((x
>>1)&12) | (x
&3)] ] *
2041 h
->pps
.scaling_matrix8
[i
][x
]) << shift
;
2046 static void init_dequant4_coeff_table(H264Context
*h
){
2048 const int transpose
= (h
->s
.dsp
.h264_idct_add
!= ff_h264_idct_add_c
); //FIXME ugly
2049 for(i
=0; i
<6; i
++ ){
2050 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[i
];
2052 if(!memcmp(h
->pps
.scaling_matrix4
[j
], h
->pps
.scaling_matrix4
[i
], 16*sizeof(uint8_t))){
2053 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[j
];
2060 for(q
=0; q
<52; q
++){
2061 int shift
= ff_div6
[q
] + 2;
2062 int idx
= ff_rem6
[q
];
2064 h
->dequant4_coeff
[i
][q
][transpose
? (x
>>2)|((x
<<2)&0xF) : x
] =
2065 ((uint32_t)dequant4_coeff_init
[idx
][(x
&1) + ((x
>>2)&1)] *
2066 h
->pps
.scaling_matrix4
[i
][x
]) << shift
;
2071 static void init_dequant_tables(H264Context
*h
){
2073 init_dequant4_coeff_table(h
);
2074 if(h
->pps
.transform_8x8_mode
)
2075 init_dequant8_coeff_table(h
);
2076 if(h
->sps
.transform_bypass
){
2079 h
->dequant4_coeff
[i
][0][x
] = 1<<6;
2080 if(h
->pps
.transform_8x8_mode
)
2083 h
->dequant8_coeff
[i
][0][x
] = 1<<6;
2090 * needs width/height
2092 static int alloc_tables(H264Context
*h
){
2093 MpegEncContext
* const s
= &h
->s
;
2094 const int big_mb_num
= s
->mb_stride
* (s
->mb_height
+1);
2097 CHECKED_ALLOCZ(h
->intra4x4_pred_mode
, big_mb_num
* 8 * sizeof(uint8_t))
2099 CHECKED_ALLOCZ(h
->non_zero_count
, big_mb_num
* 16 * sizeof(uint8_t))
2100 CHECKED_ALLOCZ(h
->slice_table_base
, (big_mb_num
+s
->mb_stride
) * sizeof(uint8_t))
2101 CHECKED_ALLOCZ(h
->cbp_table
, big_mb_num
* sizeof(uint16_t))
2103 CHECKED_ALLOCZ(h
->chroma_pred_mode_table
, big_mb_num
* sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h
->mvd_table
[0], 32*big_mb_num
* sizeof(uint16_t));
2105 CHECKED_ALLOCZ(h
->mvd_table
[1], 32*big_mb_num
* sizeof(uint16_t));
2106 CHECKED_ALLOCZ(h
->direct_table
, 32*big_mb_num
* sizeof(uint8_t));
2108 memset(h
->slice_table_base
, -1, (big_mb_num
+s
->mb_stride
) * sizeof(uint8_t));
2109 h
->slice_table
= h
->slice_table_base
+ s
->mb_stride
*2 + 1;
2111 CHECKED_ALLOCZ(h
->mb2b_xy
, big_mb_num
* sizeof(uint32_t));
2112 CHECKED_ALLOCZ(h
->mb2b8_xy
, big_mb_num
* sizeof(uint32_t));
2113 for(y
=0; y
<s
->mb_height
; y
++){
2114 for(x
=0; x
<s
->mb_width
; x
++){
2115 const int mb_xy
= x
+ y
*s
->mb_stride
;
2116 const int b_xy
= 4*x
+ 4*y
*h
->b_stride
;
2117 const int b8_xy
= 2*x
+ 2*y
*h
->b8_stride
;
2119 h
->mb2b_xy
[mb_xy
]= b_xy
;
2120 h
->mb2b8_xy
[mb_xy
]= b8_xy
;
2124 s
->obmc_scratchpad
= NULL
;
2126 if(!h
->dequant4_coeff
[0])
2127 init_dequant_tables(h
);
2136 * Mimic alloc_tables(), but for every context thread.
2138 static void clone_tables(H264Context
*dst
, H264Context
*src
){
2139 dst
->intra4x4_pred_mode
= src
->intra4x4_pred_mode
;
2140 dst
->non_zero_count
= src
->non_zero_count
;
2141 dst
->slice_table
= src
->slice_table
;
2142 dst
->cbp_table
= src
->cbp_table
;
2143 dst
->mb2b_xy
= src
->mb2b_xy
;
2144 dst
->mb2b8_xy
= src
->mb2b8_xy
;
2145 dst
->chroma_pred_mode_table
= src
->chroma_pred_mode_table
;
2146 dst
->mvd_table
[0] = src
->mvd_table
[0];
2147 dst
->mvd_table
[1] = src
->mvd_table
[1];
2148 dst
->direct_table
= src
->direct_table
;
2150 dst
->s
.obmc_scratchpad
= NULL
;
2151 ff_h264_pred_init(&dst
->hpc
, src
->s
.codec_id
);
2156 * Allocate buffers which are not shared amongst multiple threads.
2158 static int context_init(H264Context
*h
){
2159 CHECKED_ALLOCZ(h
->top_borders
[0], h
->s
.mb_width
* (16+8+8) * sizeof(uint8_t))
2160 CHECKED_ALLOCZ(h
->top_borders
[1], h
->s
.mb_width
* (16+8+8) * sizeof(uint8_t))
2164 return -1; // free_tables will clean up for us
2167 static av_cold
void common_init(H264Context
*h
){
2168 MpegEncContext
* const s
= &h
->s
;
2170 s
->width
= s
->avctx
->width
;
2171 s
->height
= s
->avctx
->height
;
2172 s
->codec_id
= s
->avctx
->codec
->id
;
2174 ff_h264_pred_init(&h
->hpc
, s
->codec_id
);
2176 h
->dequant_coeff_pps
= -1;
2177 s
->unrestricted_mv
=1;
2178 s
->decode
=1; //FIXME
2180 memset(h
->pps
.scaling_matrix4
, 16, 6*16*sizeof(uint8_t));
2181 memset(h
->pps
.scaling_matrix8
, 16, 2*64*sizeof(uint8_t));
2184 static av_cold
int decode_init(AVCodecContext
*avctx
){
2185 H264Context
*h
= avctx
->priv_data
;
2186 MpegEncContext
* const s
= &h
->s
;
2188 MPV_decode_defaults(s
);
2193 s
->out_format
= FMT_H264
;
2194 s
->workaround_bugs
= avctx
->workaround_bugs
;
2197 // s->decode_mb= ff_h263_decode_mb;
2198 s
->quarter_sample
= 1;
2201 if(avctx
->codec_id
== CODEC_ID_SVQ3
)
2202 avctx
->pix_fmt
= PIX_FMT_YUVJ420P
;
2204 avctx
->pix_fmt
= PIX_FMT_YUV420P
;
2208 if(avctx
->extradata_size
> 0 && avctx
->extradata
&&
2209 *(char *)avctx
->extradata
== 1){
2216 h
->thread_context
[0] = h
;
2217 h
->outputed_poc
= INT_MIN
;
2221 static int frame_start(H264Context
*h
){
2222 MpegEncContext
* const s
= &h
->s
;
2225 if(MPV_frame_start(s
, s
->avctx
) < 0)
2227 ff_er_frame_start(s
);
2229 * MPV_frame_start uses pict_type to derive key_frame.
2230 * This is incorrect for H.264; IDR markings must be used.
2231 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2232 * See decode_nal_units().
2234 s
->current_picture_ptr
->key_frame
= 0;
2236 assert(s
->linesize
&& s
->uvlinesize
);
2238 for(i
=0; i
<16; i
++){
2239 h
->block_offset
[i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
2240 h
->block_offset
[24+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
2243 h
->block_offset
[16+i
]=
2244 h
->block_offset
[20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
2245 h
->block_offset
[24+16+i
]=
2246 h
->block_offset
[24+20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
2249 /* can't be in alloc_tables because linesize isn't known there.
2250 * FIXME: redo bipred weight to not require extra buffer? */
2251 for(i
= 0; i
< s
->avctx
->thread_count
; i
++)
2252 if(!h
->thread_context
[i
]->s
.obmc_scratchpad
)
2253 h
->thread_context
[i
]->s
.obmc_scratchpad
= av_malloc(16*2*s
->linesize
+ 8*2*s
->uvlinesize
);
2255 /* some macroblocks will be accessed before they're available */
2256 if(FRAME_MBAFF
|| s
->avctx
->thread_count
> 1)
2257 memset(h
->slice_table
, -1, (s
->mb_height
*s
->mb_stride
-1) * sizeof(uint8_t));
2259 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2261 // We mark the current picture as non-reference after allocating it, so
2262 // that if we break out due to an error it can be released automatically
2263 // in the next MPV_frame_start().
2264 // SVQ3 as well as most other codecs have only last/next/current and thus
2265 // get released even with set reference, besides SVQ3 and others do not
2266 // mark frames as reference later "naturally".
2267 if(s
->codec_id
!= CODEC_ID_SVQ3
)
2268 s
->current_picture_ptr
->reference
= 0;
2270 s
->current_picture_ptr
->field_poc
[0]=
2271 s
->current_picture_ptr
->field_poc
[1]= INT_MAX
;
2272 assert(s
->current_picture_ptr
->long_ref
==0);
2277 static inline void backup_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int simple
){
2278 MpegEncContext
* const s
= &h
->s
;
2287 src_cb
-= uvlinesize
;
2288 src_cr
-= uvlinesize
;
2290 if(!simple
&& FRAME_MBAFF
){
2292 offset
= MB_MBAFF
? 1 : 17;
2293 uvoffset
= MB_MBAFF
? 1 : 9;
2295 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+ 0)= *(uint64_t*)(src_y
+ 15*linesize
);
2296 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+ 8)= *(uint64_t*)(src_y
+8+15*linesize
);
2297 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2298 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+7*uvlinesize
);
2299 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+7*uvlinesize
);
2304 h
->left_border
[0]= h
->top_borders
[0][s
->mb_x
][15];
2305 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2306 h
->left_border
[34 ]= h
->top_borders
[0][s
->mb_x
][16+7 ];
2307 h
->left_border
[34+18]= h
->top_borders
[0][s
->mb_x
][16+8+7];
2313 top_idx
= MB_MBAFF
? 0 : 1;
2315 step
= MB_MBAFF
? 2 : 1;
2318 // There are two lines saved, the line above the the top macroblock of a pair,
2319 // and the line above the bottom macroblock
2320 h
->left_border
[offset
]= h
->top_borders
[top_idx
][s
->mb_x
][15];
2321 for(i
=1; i
<17 - skiplast
; i
++){
2322 h
->left_border
[offset
+i
*step
]= src_y
[15+i
* linesize
];
2325 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 16*linesize
);
2326 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+16*linesize
);
2328 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2329 h
->left_border
[uvoffset
+34 ]= h
->top_borders
[top_idx
][s
->mb_x
][16+7];
2330 h
->left_border
[uvoffset
+34+18]= h
->top_borders
[top_idx
][s
->mb_x
][24+7];
2331 for(i
=1; i
<9 - skiplast
; i
++){
2332 h
->left_border
[uvoffset
+34 +i
*step
]= src_cb
[7+i
*uvlinesize
];
2333 h
->left_border
[uvoffset
+34+18+i
*step
]= src_cr
[7+i
*uvlinesize
];
2335 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+8*uvlinesize
);
2336 *(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+8*uvlinesize
);
2340 static inline void xchg_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int xchg
, int simple
){
2341 MpegEncContext
* const s
= &h
->s
;
2352 if(!simple
&& FRAME_MBAFF
){
2354 offset
= MB_MBAFF
? 1 : 17;
2355 uvoffset
= MB_MBAFF
? 1 : 9;
2359 top_idx
= MB_MBAFF
? 0 : 1;
2361 step
= MB_MBAFF
? 2 : 1;
2364 if(h
->deblocking_filter
== 2) {
2366 deblock_left
= h
->slice_table
[mb_xy
] == h
->slice_table
[mb_xy
- 1];
2367 deblock_top
= h
->slice_table
[mb_xy
] == h
->slice_table
[h
->top_mb_xy
];
2369 deblock_left
= (s
->mb_x
> 0);
2370 deblock_top
= (s
->mb_y
> 0);
2373 src_y
-= linesize
+ 1;
2374 src_cb
-= uvlinesize
+ 1;
2375 src_cr
-= uvlinesize
+ 1;
2377 #define XCHG(a,b,t,xchg)\
2384 for(i
= !deblock_top
; i
<16; i
++){
2385 XCHG(h
->left_border
[offset
+i
*step
], src_y
[i
* linesize
], temp8
, xchg
);
2387 XCHG(h
->left_border
[offset
+i
*step
], src_y
[i
* linesize
], temp8
, 1);
2391 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+0), *(uint64_t*)(src_y
+1), temp64
, xchg
);
2392 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+8), *(uint64_t*)(src_y
+9), temp64
, 1);
2393 if(s
->mb_x
+1 < s
->mb_width
){
2394 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
+1]), *(uint64_t*)(src_y
+17), temp64
, 1);
2398 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2400 for(i
= !deblock_top
; i
<8; i
++){
2401 XCHG(h
->left_border
[uvoffset
+34 +i
*step
], src_cb
[i
*uvlinesize
], temp8
, xchg
);
2402 XCHG(h
->left_border
[uvoffset
+34+18+i
*step
], src_cr
[i
*uvlinesize
], temp8
, xchg
);
2404 XCHG(h
->left_border
[uvoffset
+34 +i
*step
], src_cb
[i
*uvlinesize
], temp8
, 1);
2405 XCHG(h
->left_border
[uvoffset
+34+18+i
*step
], src_cr
[i
*uvlinesize
], temp8
, 1);
2408 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1), temp64
, 1);
2409 XCHG(*(uint64_t*)(h
->top_borders
[top_idx
][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1), temp64
, 1);
2414 static av_always_inline
void hl_decode_mb_internal(H264Context
*h
, int simple
){
2415 MpegEncContext
* const s
= &h
->s
;
2416 const int mb_x
= s
->mb_x
;
2417 const int mb_y
= s
->mb_y
;
2418 const int mb_xy
= h
->mb_xy
;
2419 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
2420 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
2421 int linesize
, uvlinesize
/*dct_offset*/;
2423 int *block_offset
= &h
->block_offset
[0];
2424 const int transform_bypass
= (s
->qscale
== 0 && h
->sps
.transform_bypass
), is_h264
= (simple
|| s
->codec_id
== CODEC_ID_H264
);
2425 void (*idct_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
2426 void (*idct_dc_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
2428 dest_y
= s
->current_picture
.data
[0] + (mb_y
* 16* s
->linesize
) + mb_x
* 16;
2429 dest_cb
= s
->current_picture
.data
[1] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2430 dest_cr
= s
->current_picture
.data
[2] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2432 s
->dsp
.prefetch(dest_y
+ (s
->mb_x
&3)*4*s
->linesize
+ 64, s
->linesize
, 4);
2433 s
->dsp
.prefetch(dest_cb
+ (s
->mb_x
&7)*s
->uvlinesize
+ 64, dest_cr
- dest_cb
, 2);
2435 if (!simple
&& MB_FIELD
) {
2436 linesize
= h
->mb_linesize
= s
->linesize
* 2;
2437 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
* 2;
2438 block_offset
= &h
->block_offset
[24];
2439 if(mb_y
&1){ //FIXME move out of this function?
2440 dest_y
-= s
->linesize
*15;
2441 dest_cb
-= s
->uvlinesize
*7;
2442 dest_cr
-= s
->uvlinesize
*7;
2446 for(list
=0; list
<h
->list_count
; list
++){
2447 if(!USES_LIST(mb_type
, list
))
2449 if(IS_16X16(mb_type
)){
2450 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
2451 fill_rectangle(ref
, 4, 4, 8, (16+*ref
)^(s
->mb_y
&1), 1);
2453 for(i
=0; i
<16; i
+=4){
2454 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2455 int ref
= h
->ref_cache
[list
][scan8
[i
]];
2457 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2, 8, (16+ref
)^(s
->mb_y
&1), 1);
2463 linesize
= h
->mb_linesize
= s
->linesize
;
2464 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
;
2465 // dct_offset = s->linesize * 16;
2468 if(transform_bypass
){
2470 idct_add
= IS_8x8DCT(mb_type
) ? s
->dsp
.add_pixels8
: s
->dsp
.add_pixels4
;
2471 }else if(IS_8x8DCT(mb_type
)){
2472 idct_dc_add
= s
->dsp
.h264_idct8_dc_add
;
2473 idct_add
= s
->dsp
.h264_idct8_add
;
2475 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
2476 idct_add
= s
->dsp
.h264_idct_add
;
2479 if (!simple
&& IS_INTRA_PCM(mb_type
)) {
2480 for (i
=0; i
<16; i
++) {
2481 memcpy(dest_y
+ i
* linesize
, h
->mb
+ i
*8, 16);
2483 for (i
=0; i
<8; i
++) {
2484 memcpy(dest_cb
+ i
*uvlinesize
, h
->mb
+ 128 + i
*4, 8);
2485 memcpy(dest_cr
+ i
*uvlinesize
, h
->mb
+ 160 + i
*4, 8);
2488 if(IS_INTRA(mb_type
)){
2489 if(h
->deblocking_filter
)
2490 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 1, simple
);
2492 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2493 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cb
, uvlinesize
);
2494 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cr
, uvlinesize
);
2497 if(IS_INTRA4x4(mb_type
)){
2498 if(simple
|| !s
->encoding
){
2499 if(IS_8x8DCT(mb_type
)){
2500 for(i
=0; i
<16; i
+=4){
2501 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2502 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
2503 const int nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2504 h
->hpc
.pred8x8l
[ dir
](ptr
, (h
->topleft_samples_available
<<i
)&0x8000,
2505 (h
->topright_samples_available
<<i
)&0x4000, linesize
);
2507 if(nnz
== 1 && h
->mb
[i
*16])
2508 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
2510 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
2514 for(i
=0; i
<16; i
++){
2515 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2517 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
2520 if(dir
== DIAG_DOWN_LEFT_PRED
|| dir
== VERT_LEFT_PRED
){
2521 const int topright_avail
= (h
->topright_samples_available
<<i
)&0x8000;
2522 assert(mb_y
|| linesize
<= block_offset
[i
]);
2523 if(!topright_avail
){
2524 tr
= ptr
[3 - linesize
]*0x01010101;
2525 topright
= (uint8_t*) &tr
;
2527 topright
= ptr
+ 4 - linesize
;
2531 h
->hpc
.pred4x4
[ dir
](ptr
, topright
, linesize
);
2532 nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2535 if(nnz
== 1 && h
->mb
[i
*16])
2536 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
2538 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
2540 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, 0);
2545 h
->hpc
.pred16x16
[ h
->intra16x16_pred_mode
](dest_y
, linesize
);
2547 if(!transform_bypass
)
2548 h264_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
, h
->dequant4_coeff
[0][s
->qscale
][0]);
2550 svq3_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
);
2552 if(h
->deblocking_filter
)
2553 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 0, simple
);
2555 hl_motion(h
, dest_y
, dest_cb
, dest_cr
,
2556 s
->me
.qpel_put
, s
->dsp
.put_h264_chroma_pixels_tab
,
2557 s
->me
.qpel_avg
, s
->dsp
.avg_h264_chroma_pixels_tab
,
2558 s
->dsp
.weight_h264_pixels_tab
, s
->dsp
.biweight_h264_pixels_tab
);
2562 if(!IS_INTRA4x4(mb_type
)){
2564 if(IS_INTRA16x16(mb_type
)){
2565 for(i
=0; i
<16; i
++){
2566 if(h
->non_zero_count_cache
[ scan8
[i
] ])
2567 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2568 else if(h
->mb
[i
*16])
2569 idct_dc_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2572 const int di
= IS_8x8DCT(mb_type
) ? 4 : 1;
2573 for(i
=0; i
<16; i
+=di
){
2574 int nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2576 if(nnz
==1 && h
->mb
[i
*16])
2577 idct_dc_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2579 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2584 for(i
=0; i
<16; i
++){
2585 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){ //FIXME benchmark weird rule, & below
2586 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2587 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, IS_INTRA(mb_type
) ? 1 : 0);
2593 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2594 uint8_t *dest
[2] = {dest_cb
, dest_cr
};
2595 if(transform_bypass
){
2596 idct_add
= idct_dc_add
= s
->dsp
.add_pixels4
;
2598 idct_add
= s
->dsp
.h264_idct_add
;
2599 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
2600 chroma_dc_dequant_idct_c(h
->mb
+ 16*16, h
->chroma_qp
[0], h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 1:4][h
->chroma_qp
[0]][0]);
2601 chroma_dc_dequant_idct_c(h
->mb
+ 16*16+4*16, h
->chroma_qp
[1], h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 2:5][h
->chroma_qp
[1]][0]);
2604 for(i
=16; i
<16+8; i
++){
2605 if(h
->non_zero_count_cache
[ scan8
[i
] ])
2606 idct_add(dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
2607 else if(h
->mb
[i
*16])
2608 idct_dc_add(dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
2611 for(i
=16; i
<16+8; i
++){
2612 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){
2613 uint8_t * const ptr
= dest
[(i
&4)>>2] + block_offset
[i
];
2614 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, uvlinesize
, chroma_qp
[s
->qscale
+ 12] - 12, 2);
2620 if(h
->deblocking_filter
) {
2621 backup_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, simple
);
2622 fill_caches(h
, mb_type
, 1); //FIXME don't fill stuff which isn't used by filter_mb
2623 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->current_picture
.qscale_table
[mb_xy
]);
2624 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->current_picture
.qscale_table
[mb_xy
]);
2625 if (!simple
&& FRAME_MBAFF
) {
2626 filter_mb (h
, mb_x
, mb_y
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
2628 filter_mb_fast(h
, mb_x
, mb_y
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
2634 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2636 static void hl_decode_mb_simple(H264Context
*h
){
2637 hl_decode_mb_internal(h
, 1);
2641 * Process a macroblock; this handles edge cases, such as interlacing.
2643 static void av_noinline
hl_decode_mb_complex(H264Context
*h
){
2644 hl_decode_mb_internal(h
, 0);
2647 static void hl_decode_mb(H264Context
*h
){
2648 MpegEncContext
* const s
= &h
->s
;
2649 const int mb_xy
= h
->mb_xy
;
2650 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
2651 int is_complex
= FRAME_MBAFF
|| MB_FIELD
|| IS_INTRA_PCM(mb_type
) || s
->codec_id
!= CODEC_ID_H264
||
2652 (ENABLE_GRAY
&& (s
->flags
&CODEC_FLAG_GRAY
)) || (ENABLE_H264_ENCODER
&& s
->encoding
) || ENABLE_SMALL
;
2654 if(ENABLE_H264_ENCODER
&& !s
->decode
)
2658 hl_decode_mb_complex(h
);
2659 else hl_decode_mb_simple(h
);
2662 static void pic_as_field(Picture
*pic
, const int parity
){
2664 for (i
= 0; i
< 4; ++i
) {
2665 if (parity
== PICT_BOTTOM_FIELD
)
2666 pic
->data
[i
] += pic
->linesize
[i
];
2667 pic
->reference
= parity
;
2668 pic
->linesize
[i
] *= 2;
2670 pic
->poc
= pic
->field_poc
[parity
== PICT_BOTTOM_FIELD
];
2673 static int split_field_copy(Picture
*dest
, Picture
*src
,
2674 int parity
, int id_add
){
2675 int match
= !!(src
->reference
& parity
);
2679 if(parity
!= PICT_FRAME
){
2680 pic_as_field(dest
, parity
);
2682 dest
->pic_id
+= id_add
;
2689 static int build_def_list(Picture
*def
, Picture
**in
, int len
, int is_long
, int sel
){
2693 while(i
[0]<len
|| i
[1]<len
){
2694 while(i
[0]<len
&& !(in
[ i
[0] ] && (in
[ i
[0] ]->reference
& sel
)))
2696 while(i
[1]<len
&& !(in
[ i
[1] ] && (in
[ i
[1] ]->reference
& (sel
^3))))
2699 in
[ i
[0] ]->pic_id
= is_long
? i
[0] : in
[ i
[0] ]->frame_num
;
2700 split_field_copy(&def
[index
++], in
[ i
[0]++ ], sel
, 1);
2703 in
[ i
[1] ]->pic_id
= is_long
? i
[1] : in
[ i
[1] ]->frame_num
;
2704 split_field_copy(&def
[index
++], in
[ i
[1]++ ], sel
^3, 0);
2711 static int add_sorted(Picture
**sorted
, Picture
**src
, int len
, int limit
, int dir
){
2716 best_poc
= dir
? INT_MIN
: INT_MAX
;
2718 for(i
=0; i
<len
; i
++){
2719 const int poc
= src
[i
]->poc
;
2720 if(((poc
> limit
) ^ dir
) && ((poc
< best_poc
) ^ dir
)){
2722 sorted
[out_i
]= src
[i
];
2725 if(best_poc
== (dir
? INT_MIN
: INT_MAX
))
2727 limit
= sorted
[out_i
++]->poc
- dir
;
2733 * fills the default_ref_list.
2735 static int fill_default_ref_list(H264Context
*h
){
2736 MpegEncContext
* const s
= &h
->s
;
2739 if(h
->slice_type_nos
==FF_B_TYPE
){
2740 Picture
*sorted
[32];
2745 cur_poc
= s
->current_picture_ptr
->field_poc
[ s
->picture_structure
== PICT_BOTTOM_FIELD
];
2747 cur_poc
= s
->current_picture_ptr
->poc
;
2749 for(list
= 0; list
<2; list
++){
2750 len
= add_sorted(sorted
, h
->short_ref
, h
->short_ref_count
, cur_poc
, 1^list
);
2751 len
+=add_sorted(sorted
+len
, h
->short_ref
, h
->short_ref_count
, cur_poc
, 0^list
);
2753 len
= build_def_list(h
->default_ref_list
[list
] , sorted
, len
, 0, s
->picture_structure
);
2754 len
+=build_def_list(h
->default_ref_list
[list
]+len
, h
->long_ref
, 16 , 1, s
->picture_structure
);
2757 if(len
< h
->ref_count
[list
])
2758 memset(&h
->default_ref_list
[list
][len
], 0, sizeof(Picture
)*(h
->ref_count
[list
] - len
));
2762 if(lens
[0] == lens
[1] && lens
[1] > 1){
2763 for(i
=0; h
->default_ref_list
[0][i
].data
[0] == h
->default_ref_list
[1][i
].data
[0] && i
<lens
[0]; i
++);
2765 FFSWAP(Picture
, h
->default_ref_list
[1][0], h
->default_ref_list
[1][1]);
2768 len
= build_def_list(h
->default_ref_list
[0] , h
->short_ref
, h
->short_ref_count
, 0, s
->picture_structure
);
2769 len
+= build_def_list(h
->default_ref_list
[0]+len
, h
-> long_ref
, 16 , 1, s
->picture_structure
);
2771 if(len
< h
->ref_count
[0])
2772 memset(&h
->default_ref_list
[0][len
], 0, sizeof(Picture
)*(h
->ref_count
[0] - len
));
2775 for (i
=0; i
<h
->ref_count
[0]; i
++) {
2776 tprintf(h
->s
.avctx
, "List0: %s fn:%d 0x%p\n", (h
->default_ref_list
[0][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[0][i
].pic_id
, h
->default_ref_list
[0][i
].data
[0]);
2778 if(h
->slice_type_nos
==FF_B_TYPE
){
2779 for (i
=0; i
<h
->ref_count
[1]; i
++) {
2780 tprintf(h
->s
.avctx
, "List1: %s fn:%d 0x%p\n", (h
->default_ref_list
[1][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[1][i
].pic_id
, h
->default_ref_list
[1][i
].data
[0]);
2787 static void print_short_term(H264Context
*h
);
2788 static void print_long_term(H264Context
*h
);
2791 * Extract structure information about the picture described by pic_num in
2792 * the current decoding context (frame or field). Note that pic_num is
2793 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2794 * @param pic_num picture number for which to extract structure information
2795 * @param structure one of PICT_XXX describing structure of picture
2797 * @return frame number (short term) or long term index of picture
2798 * described by pic_num
2800 static int pic_num_extract(H264Context
*h
, int pic_num
, int *structure
){
2801 MpegEncContext
* const s
= &h
->s
;
2803 *structure
= s
->picture_structure
;
2806 /* opposite field */
2807 *structure
^= PICT_FRAME
;
2814 static int decode_ref_pic_list_reordering(H264Context
*h
){
2815 MpegEncContext
* const s
= &h
->s
;
2816 int list
, index
, pic_structure
;
2818 print_short_term(h
);
2821 for(list
=0; list
<h
->list_count
; list
++){
2822 memcpy(h
->ref_list
[list
], h
->default_ref_list
[list
], sizeof(Picture
)*h
->ref_count
[list
]);
2824 if(get_bits1(&s
->gb
)){
2825 int pred
= h
->curr_pic_num
;
2827 for(index
=0; ; index
++){
2828 unsigned int reordering_of_pic_nums_idc
= get_ue_golomb(&s
->gb
);
2829 unsigned int pic_id
;
2831 Picture
*ref
= NULL
;
2833 if(reordering_of_pic_nums_idc
==3)
2836 if(index
>= h
->ref_count
[list
]){
2837 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference count overflow\n");
2841 if(reordering_of_pic_nums_idc
<3){
2842 if(reordering_of_pic_nums_idc
<2){
2843 const unsigned int abs_diff_pic_num
= get_ue_golomb(&s
->gb
) + 1;
2846 if(abs_diff_pic_num
> h
->max_pic_num
){
2847 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "abs_diff_pic_num overflow\n");
2851 if(reordering_of_pic_nums_idc
== 0) pred
-= abs_diff_pic_num
;
2852 else pred
+= abs_diff_pic_num
;
2853 pred
&= h
->max_pic_num
- 1;
2855 frame_num
= pic_num_extract(h
, pred
, &pic_structure
);
2857 for(i
= h
->short_ref_count
-1; i
>=0; i
--){
2858 ref
= h
->short_ref
[i
];
2859 assert(ref
->reference
);
2860 assert(!ref
->long_ref
);
2862 ref
->frame_num
== frame_num
&&
2863 (ref
->reference
& pic_structure
)
2871 pic_id
= get_ue_golomb(&s
->gb
); //long_term_pic_idx
2873 long_idx
= pic_num_extract(h
, pic_id
, &pic_structure
);
2876 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "long_term_pic_idx overflow\n");
2879 ref
= h
->long_ref
[long_idx
];
2880 assert(!(ref
&& !ref
->reference
));
2881 if(ref
&& (ref
->reference
& pic_structure
)){
2882 ref
->pic_id
= pic_id
;
2883 assert(ref
->long_ref
);
2891 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference picture missing during reorder\n");
2892 memset(&h
->ref_list
[list
][index
], 0, sizeof(Picture
)); //FIXME
2894 for(i
=index
; i
+1<h
->ref_count
[list
]; i
++){
2895 if(ref
->long_ref
== h
->ref_list
[list
][i
].long_ref
&& ref
->pic_id
== h
->ref_list
[list
][i
].pic_id
)
2898 for(; i
> index
; i
--){
2899 h
->ref_list
[list
][i
]= h
->ref_list
[list
][i
-1];
2901 h
->ref_list
[list
][index
]= *ref
;
2903 pic_as_field(&h
->ref_list
[list
][index
], pic_structure
);
2907 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal reordering_of_pic_nums_idc\n");
2913 for(list
=0; list
<h
->list_count
; list
++){
2914 for(index
= 0; index
< h
->ref_count
[list
]; index
++){
2915 if(!h
->ref_list
[list
][index
].data
[0]){
2916 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "Missing reference picture\n");
2917 h
->ref_list
[list
][index
]= s
->current_picture
; //FIXME this is not a sensible solution
2922 if(h
->slice_type_nos
==FF_B_TYPE
&& !h
->direct_spatial_mv_pred
)
2923 direct_dist_scale_factor(h
);
2924 direct_ref_list_init(h
);
2928 static void fill_mbaff_ref_list(H264Context
*h
){
2930 for(list
=0; list
<2; list
++){ //FIXME try list_count
2931 for(i
=0; i
<h
->ref_count
[list
]; i
++){
2932 Picture
*frame
= &h
->ref_list
[list
][i
];
2933 Picture
*field
= &h
->ref_list
[list
][16+2*i
];
2936 field
[0].linesize
[j
] <<= 1;
2937 field
[0].reference
= PICT_TOP_FIELD
;
2938 field
[1] = field
[0];
2940 field
[1].data
[j
] += frame
->linesize
[j
];
2941 field
[1].reference
= PICT_BOTTOM_FIELD
;
2943 h
->luma_weight
[list
][16+2*i
] = h
->luma_weight
[list
][16+2*i
+1] = h
->luma_weight
[list
][i
];
2944 h
->luma_offset
[list
][16+2*i
] = h
->luma_offset
[list
][16+2*i
+1] = h
->luma_offset
[list
][i
];
2946 h
->chroma_weight
[list
][16+2*i
][j
] = h
->chroma_weight
[list
][16+2*i
+1][j
] = h
->chroma_weight
[list
][i
][j
];
2947 h
->chroma_offset
[list
][16+2*i
][j
] = h
->chroma_offset
[list
][16+2*i
+1][j
] = h
->chroma_offset
[list
][i
][j
];
2951 for(j
=0; j
<h
->ref_count
[1]; j
++){
2952 for(i
=0; i
<h
->ref_count
[0]; i
++)
2953 h
->implicit_weight
[j
][16+2*i
] = h
->implicit_weight
[j
][16+2*i
+1] = h
->implicit_weight
[j
][i
];
2954 memcpy(h
->implicit_weight
[16+2*j
], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
2955 memcpy(h
->implicit_weight
[16+2*j
+1], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
2959 static int pred_weight_table(H264Context
*h
){
2960 MpegEncContext
* const s
= &h
->s
;
2962 int luma_def
, chroma_def
;
2965 h
->use_weight_chroma
= 0;
2966 h
->luma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
2967 h
->chroma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
2968 luma_def
= 1<<h
->luma_log2_weight_denom
;
2969 chroma_def
= 1<<h
->chroma_log2_weight_denom
;
2971 for(list
=0; list
<2; list
++){
2972 for(i
=0; i
<h
->ref_count
[list
]; i
++){
2973 int luma_weight_flag
, chroma_weight_flag
;
2975 luma_weight_flag
= get_bits1(&s
->gb
);
2976 if(luma_weight_flag
){
2977 h
->luma_weight
[list
][i
]= get_se_golomb(&s
->gb
);
2978 h
->luma_offset
[list
][i
]= get_se_golomb(&s
->gb
);
2979 if( h
->luma_weight
[list
][i
] != luma_def
2980 || h
->luma_offset
[list
][i
] != 0)
2983 h
->luma_weight
[list
][i
]= luma_def
;
2984 h
->luma_offset
[list
][i
]= 0;
2988 chroma_weight_flag
= get_bits1(&s
->gb
);
2989 if(chroma_weight_flag
){
2992 h
->chroma_weight
[list
][i
][j
]= get_se_golomb(&s
->gb
);
2993 h
->chroma_offset
[list
][i
][j
]= get_se_golomb(&s
->gb
);
2994 if( h
->chroma_weight
[list
][i
][j
] != chroma_def
2995 || h
->chroma_offset
[list
][i
][j
] != 0)
2996 h
->use_weight_chroma
= 1;
3001 h
->chroma_weight
[list
][i
][j
]= chroma_def
;
3002 h
->chroma_offset
[list
][i
][j
]= 0;
3007 if(h
->slice_type_nos
!= FF_B_TYPE
) break;
3009 h
->use_weight
= h
->use_weight
|| h
->use_weight_chroma
;
3013 static void implicit_weight_table(H264Context
*h
){
3014 MpegEncContext
* const s
= &h
->s
;
3016 int cur_poc
= s
->current_picture_ptr
->poc
;
3018 if( h
->ref_count
[0] == 1 && h
->ref_count
[1] == 1
3019 && h
->ref_list
[0][0].poc
+ h
->ref_list
[1][0].poc
== 2*cur_poc
){
3021 h
->use_weight_chroma
= 0;
3026 h
->use_weight_chroma
= 2;
3027 h
->luma_log2_weight_denom
= 5;
3028 h
->chroma_log2_weight_denom
= 5;
3030 for(ref0
=0; ref0
< h
->ref_count
[0]; ref0
++){
3031 int poc0
= h
->ref_list
[0][ref0
].poc
;
3032 for(ref1
=0; ref1
< h
->ref_count
[1]; ref1
++){
3033 int poc1
= h
->ref_list
[1][ref1
].poc
;
3034 int td
= av_clip(poc1
- poc0
, -128, 127);
3036 int tb
= av_clip(cur_poc
- poc0
, -128, 127);
3037 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
3038 int dist_scale_factor
= av_clip((tb
*tx
+ 32) >> 6, -1024, 1023) >> 2;
3039 if(dist_scale_factor
< -64 || dist_scale_factor
> 128)
3040 h
->implicit_weight
[ref0
][ref1
] = 32;
3042 h
->implicit_weight
[ref0
][ref1
] = 64 - dist_scale_factor
;
3044 h
->implicit_weight
[ref0
][ref1
] = 32;
3050 * Mark a picture as no longer needed for reference. The refmask
3051 * argument allows unreferencing of individual fields or the whole frame.
3052 * If the picture becomes entirely unreferenced, but is being held for
3053 * display purposes, it is marked as such.
3054 * @param refmask mask of fields to unreference; the mask is bitwise
3055 * anded with the reference marking of pic
3056 * @return non-zero if pic becomes entirely unreferenced (except possibly
3057 * for display purposes) zero if one of the fields remains in
3060 static inline int unreference_pic(H264Context
*h
, Picture
*pic
, int refmask
){
3062 if (pic
->reference
&= refmask
) {
3065 for(i
= 0; h
->delayed_pic
[i
]; i
++)
3066 if(pic
== h
->delayed_pic
[i
]){
3067 pic
->reference
=DELAYED_PIC_REF
;
3075 * instantaneous decoder refresh.
3077 static void idr(H264Context
*h
){
3080 for(i
=0; i
<16; i
++){
3081 remove_long(h
, i
, 0);
3083 assert(h
->long_ref_count
==0);
3085 for(i
=0; i
<h
->short_ref_count
; i
++){
3086 unreference_pic(h
, h
->short_ref
[i
], 0);
3087 h
->short_ref
[i
]= NULL
;
3089 h
->short_ref_count
=0;
3090 h
->prev_frame_num
= 0;
3091 h
->prev_frame_num_offset
= 0;
3096 /* forget old pics after a seek */
3097 static void flush_dpb(AVCodecContext
*avctx
){
3098 H264Context
*h
= avctx
->priv_data
;
3100 for(i
=0; i
<MAX_DELAYED_PIC_COUNT
; i
++) {
3101 if(h
->delayed_pic
[i
])
3102 h
->delayed_pic
[i
]->reference
= 0;
3103 h
->delayed_pic
[i
]= NULL
;
3105 h
->outputed_poc
= INT_MIN
;
3107 if(h
->s
.current_picture_ptr
)
3108 h
->s
.current_picture_ptr
->reference
= 0;
3109 h
->s
.first_field
= 0;
3110 ff_mpeg_flush(avctx
);
3114 * Find a Picture in the short term reference list by frame number.
3115 * @param frame_num frame number to search for
3116 * @param idx the index into h->short_ref where returned picture is found
3117 * undefined if no picture found.
3118 * @return pointer to the found picture, or NULL if no pic with the provided
3119 * frame number is found
3121 static Picture
* find_short(H264Context
*h
, int frame_num
, int *idx
){
3122 MpegEncContext
* const s
= &h
->s
;
3125 for(i
=0; i
<h
->short_ref_count
; i
++){
3126 Picture
*pic
= h
->short_ref
[i
];
3127 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3128 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d %d %p\n", i
, pic
->frame_num
, pic
);
3129 if(pic
->frame_num
== frame_num
) {
3138 * Remove a picture from the short term reference list by its index in
3139 * that list. This does no checking on the provided index; it is assumed
3140 * to be valid. Other list entries are shifted down.
3141 * @param i index into h->short_ref of picture to remove.
3143 static void remove_short_at_index(H264Context
*h
, int i
){
3144 assert(i
>= 0 && i
< h
->short_ref_count
);
3145 h
->short_ref
[i
]= NULL
;
3146 if (--h
->short_ref_count
)
3147 memmove(&h
->short_ref
[i
], &h
->short_ref
[i
+1], (h
->short_ref_count
- i
)*sizeof(Picture
*));
3152 * @return the removed picture or NULL if an error occurs
3154 static Picture
* remove_short(H264Context
*h
, int frame_num
, int ref_mask
){
3155 MpegEncContext
* const s
= &h
->s
;
3159 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3160 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "remove short %d count %d\n", frame_num
, h
->short_ref_count
);
3162 pic
= find_short(h
, frame_num
, &i
);
3164 if(unreference_pic(h
, pic
, ref_mask
))
3165 remove_short_at_index(h
, i
);
3172 * Remove a picture from the long term reference list by its index in
3174 * @return the removed picture or NULL if an error occurs
3176 static Picture
* remove_long(H264Context
*h
, int i
, int ref_mask
){
3179 pic
= h
->long_ref
[i
];
3181 if(unreference_pic(h
, pic
, ref_mask
)){
3182 assert(h
->long_ref
[i
]->long_ref
== 1);
3183 h
->long_ref
[i
]->long_ref
= 0;
3184 h
->long_ref
[i
]= NULL
;
3185 h
->long_ref_count
--;
3193 * print short term list
3195 static void print_short_term(H264Context
*h
) {
3197 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3198 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "short term list:\n");
3199 for(i
=0; i
<h
->short_ref_count
; i
++){
3200 Picture
*pic
= h
->short_ref
[i
];
3201 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3207 * print long term list
3209 static void print_long_term(H264Context
*h
) {
3211 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3212 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "long term list:\n");
3213 for(i
= 0; i
< 16; i
++){
3214 Picture
*pic
= h
->long_ref
[i
];
3216 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3223 * Executes the reference picture marking (memory management control operations).
3225 static int execute_ref_pic_marking(H264Context
*h
, MMCO
*mmco
, int mmco_count
){
3226 MpegEncContext
* const s
= &h
->s
;
3228 int current_ref_assigned
=0;
3231 if((s
->avctx
->debug
&FF_DEBUG_MMCO
) && mmco_count
==0)
3232 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "no mmco here\n");
3234 for(i
=0; i
<mmco_count
; i
++){
3235 int structure
, frame_num
;
3236 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3237 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco:%d %d %d\n", h
->mmco
[i
].opcode
, h
->mmco
[i
].short_pic_num
, h
->mmco
[i
].long_arg
);
3239 if( mmco
[i
].opcode
== MMCO_SHORT2UNUSED
3240 || mmco
[i
].opcode
== MMCO_SHORT2LONG
){
3241 frame_num
= pic_num_extract(h
, mmco
[i
].short_pic_num
, &structure
);
3242 pic
= find_short(h
, frame_num
, &j
);
3244 if(mmco
[i
].opcode
!= MMCO_SHORT2LONG
|| !h
->long_ref
[mmco
[i
].long_arg
]
3245 || h
->long_ref
[mmco
[i
].long_arg
]->frame_num
!= frame_num
)
3246 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mmco: unref short failure\n");
3251 switch(mmco
[i
].opcode
){
3252 case MMCO_SHORT2UNUSED
:
3253 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3254 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref short %d count %d\n", h
->mmco
[i
].short_pic_num
, h
->short_ref_count
);
3255 remove_short(h
, frame_num
, structure
^ PICT_FRAME
);
3257 case MMCO_SHORT2LONG
:
3258 if (h
->long_ref
[mmco
[i
].long_arg
] != pic
)
3259 remove_long(h
, mmco
[i
].long_arg
, 0);
3261 remove_short_at_index(h
, j
);
3262 h
->long_ref
[ mmco
[i
].long_arg
]= pic
;
3263 if (h
->long_ref
[ mmco
[i
].long_arg
]){
3264 h
->long_ref
[ mmco
[i
].long_arg
]->long_ref
=1;
3265 h
->long_ref_count
++;
3268 case MMCO_LONG2UNUSED
:
3269 j
= pic_num_extract(h
, mmco
[i
].long_arg
, &structure
);
3270 pic
= h
->long_ref
[j
];
3272 remove_long(h
, j
, structure
^ PICT_FRAME
);
3273 } else if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3274 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref long failure\n");
3277 // Comment below left from previous code as it is an interresting note.
3278 /* First field in pair is in short term list or
3279 * at a different long term index.
3280 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3281 * Report the problem and keep the pair where it is,
3282 * and mark this field valid.
3285 if (h
->long_ref
[mmco
[i
].long_arg
] != s
->current_picture_ptr
) {
3286 remove_long(h
, mmco
[i
].long_arg
, 0);
3288 h
->long_ref
[ mmco
[i
].long_arg
]= s
->current_picture_ptr
;
3289 h
->long_ref
[ mmco
[i
].long_arg
]->long_ref
=1;
3290 h
->long_ref_count
++;
3293 s
->current_picture_ptr
->reference
|= s
->picture_structure
;
3294 current_ref_assigned
=1;
3296 case MMCO_SET_MAX_LONG
:
3297 assert(mmco
[i
].long_arg
<= 16);
3298 // just remove the long term which index is greater than new max
3299 for(j
= mmco
[i
].long_arg
; j
<16; j
++){
3300 remove_long(h
, j
, 0);
3304 while(h
->short_ref_count
){
3305 remove_short(h
, h
->short_ref
[0]->frame_num
, 0);
3307 for(j
= 0; j
< 16; j
++) {
3308 remove_long(h
, j
, 0);
3310 s
->current_picture_ptr
->poc
=
3311 s
->current_picture_ptr
->field_poc
[0]=
3312 s
->current_picture_ptr
->field_poc
[1]=
3316 s
->current_picture_ptr
->frame_num
= 0;
3322 if (!current_ref_assigned
) {
3323 /* Second field of complementary field pair; the first field of
3324 * which is already referenced. If short referenced, it
3325 * should be first entry in short_ref. If not, it must exist
3326 * in long_ref; trying to put it on the short list here is an
3327 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3329 if (h
->short_ref_count
&& h
->short_ref
[0] == s
->current_picture_ptr
) {
3330 /* Just mark the second field valid */
3331 s
->current_picture_ptr
->reference
= PICT_FRAME
;
3332 } else if (s
->current_picture_ptr
->long_ref
) {
3333 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term reference "
3334 "assignment for second field "
3335 "in complementary field pair "
3336 "(first field is long term)\n");
3338 pic
= remove_short(h
, s
->current_picture_ptr
->frame_num
, 0);
3340 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term buffer state detected\n");
3343 if(h
->short_ref_count
)
3344 memmove(&h
->short_ref
[1], &h
->short_ref
[0], h
->short_ref_count
*sizeof(Picture
*));
3346 h
->short_ref
[0]= s
->current_picture_ptr
;
3347 h
->short_ref_count
++;
3348 s
->current_picture_ptr
->reference
|= s
->picture_structure
;
3352 if (h
->long_ref_count
+ h
->short_ref_count
> h
->sps
.ref_frame_count
){
3354 /* We have too many reference frames, probably due to corrupted
3355 * stream. Need to discard one frame. Prevents overrun of the
3356 * short_ref and long_ref buffers.
3358 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3359 "number of reference frames exceeds max (probably "
3360 "corrupt input), discarding one\n");
3362 if (h
->long_ref_count
&& !h
->short_ref_count
) {
3363 for (i
= 0; i
< 16; ++i
)
3368 remove_long(h
, i
, 0);
3370 pic
= h
->short_ref
[h
->short_ref_count
- 1];
3371 remove_short(h
, pic
->frame_num
, 0);
3375 print_short_term(h
);
3380 static int decode_ref_pic_marking(H264Context
*h
, GetBitContext
*gb
){
3381 MpegEncContext
* const s
= &h
->s
;
3385 if(h
->nal_unit_type
== NAL_IDR_SLICE
){ //FIXME fields
3386 s
->broken_link
= get_bits1(gb
) -1;
3388 h
->mmco
[0].opcode
= MMCO_LONG
;
3389 h
->mmco
[0].long_arg
= 0;
3393 if(get_bits1(gb
)){ // adaptive_ref_pic_marking_mode_flag
3394 for(i
= 0; i
<MAX_MMCO_COUNT
; i
++) {
3395 MMCOOpcode opcode
= get_ue_golomb(gb
);
3397 h
->mmco
[i
].opcode
= opcode
;
3398 if(opcode
==MMCO_SHORT2UNUSED
|| opcode
==MMCO_SHORT2LONG
){
3399 h
->mmco
[i
].short_pic_num
= (h
->curr_pic_num
- get_ue_golomb(gb
) - 1) & (h
->max_pic_num
- 1);
3400 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3401 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3405 if(opcode
==MMCO_SHORT2LONG
|| opcode
==MMCO_LONG2UNUSED
|| opcode
==MMCO_LONG
|| opcode
==MMCO_SET_MAX_LONG
){
3406 unsigned int long_arg
= get_ue_golomb(gb
);
3407 if(long_arg
>= 32 || (long_arg
>= 16 && !(opcode
== MMCO_LONG2UNUSED
&& FIELD_PICTURE
))){
3408 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal long ref in memory management control operation %d\n", opcode
);
3411 h
->mmco
[i
].long_arg
= long_arg
;
3414 if(opcode
> (unsigned)MMCO_LONG
){
3415 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal memory management control operation %d\n", opcode
);
3418 if(opcode
== MMCO_END
)
3423 assert(h
->long_ref_count
+ h
->short_ref_count
<= h
->sps
.ref_frame_count
);
3425 if(h
->short_ref_count
&& h
->long_ref_count
+ h
->short_ref_count
== h
->sps
.ref_frame_count
&&
3426 !(FIELD_PICTURE
&& !s
->first_field
&& s
->current_picture_ptr
->reference
)) {
3427 h
->mmco
[0].opcode
= MMCO_SHORT2UNUSED
;
3428 h
->mmco
[0].short_pic_num
= h
->short_ref
[ h
->short_ref_count
- 1 ]->frame_num
;
3430 if (FIELD_PICTURE
) {
3431 h
->mmco
[0].short_pic_num
*= 2;
3432 h
->mmco
[1].opcode
= MMCO_SHORT2UNUSED
;
3433 h
->mmco
[1].short_pic_num
= h
->mmco
[0].short_pic_num
+ 1;
3443 static int init_poc(H264Context
*h
){
3444 MpegEncContext
* const s
= &h
->s
;
3445 const int max_frame_num
= 1<<h
->sps
.log2_max_frame_num
;
3447 Picture
*cur
= s
->current_picture_ptr
;
3449 h
->frame_num_offset
= h
->prev_frame_num_offset
;
3450 if(h
->frame_num
< h
->prev_frame_num
)
3451 h
->frame_num_offset
+= max_frame_num
;
3453 if(h
->sps
.poc_type
==0){
3454 const int max_poc_lsb
= 1<<h
->sps
.log2_max_poc_lsb
;
3456 if (h
->poc_lsb
< h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
>= max_poc_lsb
/2)
3457 h
->poc_msb
= h
->prev_poc_msb
+ max_poc_lsb
;
3458 else if(h
->poc_lsb
> h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
< -max_poc_lsb
/2)
3459 h
->poc_msb
= h
->prev_poc_msb
- max_poc_lsb
;
3461 h
->poc_msb
= h
->prev_poc_msb
;
3462 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3464 field_poc
[1] = h
->poc_msb
+ h
->poc_lsb
;
3465 if(s
->picture_structure
== PICT_FRAME
)
3466 field_poc
[1] += h
->delta_poc_bottom
;
3467 }else if(h
->sps
.poc_type
==1){
3468 int abs_frame_num
, expected_delta_per_poc_cycle
, expectedpoc
;
3471 if(h
->sps
.poc_cycle_length
!= 0)
3472 abs_frame_num
= h
->frame_num_offset
+ h
->frame_num
;
3476 if(h
->nal_ref_idc
==0 && abs_frame_num
> 0)
3479 expected_delta_per_poc_cycle
= 0;
3480 for(i
=0; i
< h
->sps
.poc_cycle_length
; i
++)
3481 expected_delta_per_poc_cycle
+= h
->sps
.offset_for_ref_frame
[ i
]; //FIXME integrate during sps parse
3483 if(abs_frame_num
> 0){
3484 int poc_cycle_cnt
= (abs_frame_num
- 1) / h
->sps
.poc_cycle_length
;
3485 int frame_num_in_poc_cycle
= (abs_frame_num
- 1) % h
->sps
.poc_cycle_length
;
3487 expectedpoc
= poc_cycle_cnt
* expected_delta_per_poc_cycle
;
3488 for(i
= 0; i
<= frame_num_in_poc_cycle
; i
++)
3489 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_ref_frame
[ i
];
3493 if(h
->nal_ref_idc
== 0)
3494 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_non_ref_pic
;
3496 field_poc
[0] = expectedpoc
+ h
->delta_poc
[0];
3497 field_poc
[1] = field_poc
[0] + h
->sps
.offset_for_top_to_bottom_field
;
3499 if(s
->picture_structure
== PICT_FRAME
)
3500 field_poc
[1] += h
->delta_poc
[1];
3502 int poc
= 2*(h
->frame_num_offset
+ h
->frame_num
);
3511 if(s
->picture_structure
!= PICT_BOTTOM_FIELD
)
3512 s
->current_picture_ptr
->field_poc
[0]= field_poc
[0];
3513 if(s
->picture_structure
!= PICT_TOP_FIELD
)
3514 s
->current_picture_ptr
->field_poc
[1]= field_poc
[1];
3515 cur
->poc
= FFMIN(cur
->field_poc
[0], cur
->field_poc
[1]);
3522 * initialize scan tables
3524 static void init_scan_tables(H264Context
*h
){
3525 MpegEncContext
* const s
= &h
->s
;
3527 if(s
->dsp
.h264_idct_add
== ff_h264_idct_add_c
){ //FIXME little ugly
3528 memcpy(h
->zigzag_scan
, zigzag_scan
, 16*sizeof(uint8_t));
3529 memcpy(h
-> field_scan
, field_scan
, 16*sizeof(uint8_t));
3531 for(i
=0; i
<16; i
++){
3532 #define T(x) (x>>2) | ((x<<2) & 0xF)
3533 h
->zigzag_scan
[i
] = T(zigzag_scan
[i
]);
3534 h
-> field_scan
[i
] = T( field_scan
[i
]);
3538 if(s
->dsp
.h264_idct8_add
== ff_h264_idct8_add_c
){
3539 memcpy(h
->zigzag_scan8x8
, zigzag_scan8x8
, 64*sizeof(uint8_t));
3540 memcpy(h
->zigzag_scan8x8_cavlc
, zigzag_scan8x8_cavlc
, 64*sizeof(uint8_t));
3541 memcpy(h
->field_scan8x8
, field_scan8x8
, 64*sizeof(uint8_t));
3542 memcpy(h
->field_scan8x8_cavlc
, field_scan8x8_cavlc
, 64*sizeof(uint8_t));
3544 for(i
=0; i
<64; i
++){
3545 #define T(x) (x>>3) | ((x&7)<<3)
3546 h
->zigzag_scan8x8
[i
] = T(zigzag_scan8x8
[i
]);
3547 h
->zigzag_scan8x8_cavlc
[i
] = T(zigzag_scan8x8_cavlc
[i
]);
3548 h
->field_scan8x8
[i
] = T(field_scan8x8
[i
]);
3549 h
->field_scan8x8_cavlc
[i
] = T(field_scan8x8_cavlc
[i
]);
3553 if(h
->sps
.transform_bypass
){ //FIXME same ugly
3554 h
->zigzag_scan_q0
= zigzag_scan
;
3555 h
->zigzag_scan8x8_q0
= zigzag_scan8x8
;
3556 h
->zigzag_scan8x8_cavlc_q0
= zigzag_scan8x8_cavlc
;
3557 h
->field_scan_q0
= field_scan
;
3558 h
->field_scan8x8_q0
= field_scan8x8
;
3559 h
->field_scan8x8_cavlc_q0
= field_scan8x8_cavlc
;
3561 h
->zigzag_scan_q0
= h
->zigzag_scan
;
3562 h
->zigzag_scan8x8_q0
= h
->zigzag_scan8x8
;
3563 h
->zigzag_scan8x8_cavlc_q0
= h
->zigzag_scan8x8_cavlc
;
3564 h
->field_scan_q0
= h
->field_scan
;
3565 h
->field_scan8x8_q0
= h
->field_scan8x8
;
3566 h
->field_scan8x8_cavlc_q0
= h
->field_scan8x8_cavlc
;
3571 * Replicates H264 "master" context to thread contexts.
3573 static void clone_slice(H264Context
*dst
, H264Context
*src
)
3575 memcpy(dst
->block_offset
, src
->block_offset
, sizeof(dst
->block_offset
));
3576 dst
->s
.current_picture_ptr
= src
->s
.current_picture_ptr
;
3577 dst
->s
.current_picture
= src
->s
.current_picture
;
3578 dst
->s
.linesize
= src
->s
.linesize
;
3579 dst
->s
.uvlinesize
= src
->s
.uvlinesize
;
3580 dst
->s
.first_field
= src
->s
.first_field
;
3582 dst
->prev_poc_msb
= src
->prev_poc_msb
;
3583 dst
->prev_poc_lsb
= src
->prev_poc_lsb
;
3584 dst
->prev_frame_num_offset
= src
->prev_frame_num_offset
;
3585 dst
->prev_frame_num
= src
->prev_frame_num
;
3586 dst
->short_ref_count
= src
->short_ref_count
;
3588 memcpy(dst
->short_ref
, src
->short_ref
, sizeof(dst
->short_ref
));
3589 memcpy(dst
->long_ref
, src
->long_ref
, sizeof(dst
->long_ref
));
3590 memcpy(dst
->default_ref_list
, src
->default_ref_list
, sizeof(dst
->default_ref_list
));
3591 memcpy(dst
->ref_list
, src
->ref_list
, sizeof(dst
->ref_list
));
3593 memcpy(dst
->dequant4_coeff
, src
->dequant4_coeff
, sizeof(src
->dequant4_coeff
));
3594 memcpy(dst
->dequant8_coeff
, src
->dequant8_coeff
, sizeof(src
->dequant8_coeff
));
3598 * decodes a slice header.
3599 * This will also call MPV_common_init() and frame_start() as needed.
3601 * @param h h264context
3602 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3604 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3606 static int decode_slice_header(H264Context
*h
, H264Context
*h0
){
3607 MpegEncContext
* const s
= &h
->s
;
3608 MpegEncContext
* const s0
= &h0
->s
;
3609 unsigned int first_mb_in_slice
;
3610 unsigned int pps_id
;
3611 int num_ref_idx_active_override_flag
;
3612 static const uint8_t slice_type_map
[5]= {FF_P_TYPE
, FF_B_TYPE
, FF_I_TYPE
, FF_SP_TYPE
, FF_SI_TYPE
};
3613 unsigned int slice_type
, tmp
, i
, j
;
3614 int default_ref_list_done
= 0;
3615 int last_pic_structure
;
3617 s
->dropable
= h
->nal_ref_idc
== 0;
3619 if((s
->avctx
->flags2
& CODEC_FLAG2_FAST
) && !h
->nal_ref_idc
){
3620 s
->me
.qpel_put
= s
->dsp
.put_2tap_qpel_pixels_tab
;
3621 s
->me
.qpel_avg
= s
->dsp
.avg_2tap_qpel_pixels_tab
;
3623 s
->me
.qpel_put
= s
->dsp
.put_h264_qpel_pixels_tab
;
3624 s
->me
.qpel_avg
= s
->dsp
.avg_h264_qpel_pixels_tab
;
3627 first_mb_in_slice
= get_ue_golomb(&s
->gb
);
3629 if((s
->flags2
& CODEC_FLAG2_CHUNKS
) && first_mb_in_slice
== 0){
3630 h0
->current_slice
= 0;
3631 if (!s0
->first_field
)
3632 s
->current_picture_ptr
= NULL
;
3635 slice_type
= get_ue_golomb(&s
->gb
);
3637 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "slice type too large (%d) at %d %d\n", h
->slice_type
, s
->mb_x
, s
->mb_y
);
3642 h
->slice_type_fixed
=1;
3644 h
->slice_type_fixed
=0;
3646 slice_type
= slice_type_map
[ slice_type
];
3647 if (slice_type
== FF_I_TYPE
3648 || (h0
->current_slice
!= 0 && slice_type
== h0
->last_slice_type
) ) {
3649 default_ref_list_done
= 1;
3651 h
->slice_type
= slice_type
;
3652 h
->slice_type_nos
= slice_type
& 3;
3654 s
->pict_type
= h
->slice_type
; // to make a few old functions happy, it's wrong though
3655 if (s
->pict_type
== FF_B_TYPE
&& s0
->last_picture_ptr
== NULL
) {
3656 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3657 "B picture before any references, skipping\n");
3661 pps_id
= get_ue_golomb(&s
->gb
);
3662 if(pps_id
>=MAX_PPS_COUNT
){
3663 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "pps_id out of range\n");
3666 if(!h0
->pps_buffers
[pps_id
]) {
3667 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non-existing PPS referenced\n");
3670 h
->pps
= *h0
->pps_buffers
[pps_id
];
3672 if(!h0
->sps_buffers
[h
->pps
.sps_id
]) {
3673 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non-existing SPS referenced\n");
3676 h
->sps
= *h0
->sps_buffers
[h
->pps
.sps_id
];
3678 if(h
== h0
&& h
->dequant_coeff_pps
!= pps_id
){
3679 h
->dequant_coeff_pps
= pps_id
;
3680 init_dequant_tables(h
);
3683 s
->mb_width
= h
->sps
.mb_width
;
3684 s
->mb_height
= h
->sps
.mb_height
* (2 - h
->sps
.frame_mbs_only_flag
);
3686 h
->b_stride
= s
->mb_width
*4;
3687 h
->b8_stride
= s
->mb_width
*2;
3689 s
->width
= 16*s
->mb_width
- 2*FFMIN(h
->sps
.crop_right
, 7);
3690 if(h
->sps
.frame_mbs_only_flag
)
3691 s
->height
= 16*s
->mb_height
- 2*FFMIN(h
->sps
.crop_bottom
, 7);
3693 s
->height
= 16*s
->mb_height
- 4*FFMIN(h
->sps
.crop_bottom
, 3);
3695 if (s
->context_initialized
3696 && ( s
->width
!= s
->avctx
->width
|| s
->height
!= s
->avctx
->height
)) {
3698 return -1; // width / height changed during parallelized decoding
3702 if (!s
->context_initialized
) {
3704 return -1; // we cant (re-)initialize context during parallel decoding
3705 if (MPV_common_init(s
) < 0)
3709 init_scan_tables(h
);
3712 for(i
= 1; i
< s
->avctx
->thread_count
; i
++) {
3714 c
= h
->thread_context
[i
] = av_malloc(sizeof(H264Context
));
3715 memcpy(c
, h
->s
.thread_context
[i
], sizeof(MpegEncContext
));
3716 memset(&c
->s
+ 1, 0, sizeof(H264Context
) - sizeof(MpegEncContext
));
3719 init_scan_tables(c
);
3723 for(i
= 0; i
< s
->avctx
->thread_count
; i
++)
3724 if(context_init(h
->thread_context
[i
]) < 0)
3727 s
->avctx
->width
= s
->width
;
3728 s
->avctx
->height
= s
->height
;
3729 s
->avctx
->sample_aspect_ratio
= h
->sps
.sar
;
3730 if(!s
->avctx
->sample_aspect_ratio
.den
)
3731 s
->avctx
->sample_aspect_ratio
.den
= 1;
3733 if(h
->sps
.timing_info_present_flag
){
3734 s
->avctx
->time_base
= (AVRational
){h
->sps
.num_units_in_tick
* 2, h
->sps
.time_scale
};
3735 if(h
->x264_build
> 0 && h
->x264_build
< 44)
3736 s
->avctx
->time_base
.den
*= 2;
3737 av_reduce(&s
->avctx
->time_base
.num
, &s
->avctx
->time_base
.den
,
3738 s
->avctx
->time_base
.num
, s
->avctx
->time_base
.den
, 1<<30);
3742 h
->frame_num
= get_bits(&s
->gb
, h
->sps
.log2_max_frame_num
);
3745 h
->mb_aff_frame
= 0;
3746 last_pic_structure
= s0
->picture_structure
;
3747 if(h
->sps
.frame_mbs_only_flag
){
3748 s
->picture_structure
= PICT_FRAME
;
3750 if(get_bits1(&s
->gb
)) { //field_pic_flag
3751 s
->picture_structure
= PICT_TOP_FIELD
+ get_bits1(&s
->gb
); //bottom_field_flag
3753 s
->picture_structure
= PICT_FRAME
;
3754 h
->mb_aff_frame
= h
->sps
.mb_aff
;
3757 h
->mb_field_decoding_flag
= s
->picture_structure
!= PICT_FRAME
;
3759 if(h0
->current_slice
== 0){
3760 while(h
->frame_num
!= h
->prev_frame_num
&&
3761 h
->frame_num
!= (h
->prev_frame_num
+1)%(1<<h
->sps
.log2_max_frame_num
)){
3762 av_log(NULL
, AV_LOG_DEBUG
, "Frame num gap %d %d\n", h
->frame_num
, h
->prev_frame_num
);
3764 h
->prev_frame_num
++;
3765 h
->prev_frame_num
%= 1<<h
->sps
.log2_max_frame_num
;
3766 s
->current_picture_ptr
->frame_num
= h
->prev_frame_num
;
3767 execute_ref_pic_marking(h
, NULL
, 0);
3770 /* See if we have a decoded first field looking for a pair... */
3771 if (s0
->first_field
) {
3772 assert(s0
->current_picture_ptr
);
3773 assert(s0
->current_picture_ptr
->data
[0]);
3774 assert(s0
->current_picture_ptr
->reference
!= DELAYED_PIC_REF
);
3776 /* figure out if we have a complementary field pair */
3777 if (!FIELD_PICTURE
|| s
->picture_structure
== last_pic_structure
) {
3779 * Previous field is unmatched. Don't display it, but let it
3780 * remain for reference if marked as such.
3782 s0
->current_picture_ptr
= NULL
;
3783 s0
->first_field
= FIELD_PICTURE
;
3786 if (h
->nal_ref_idc
&&
3787 s0
->current_picture_ptr
->reference
&&
3788 s0
->current_picture_ptr
->frame_num
!= h
->frame_num
) {
3790 * This and previous field were reference, but had
3791 * different frame_nums. Consider this field first in
3792 * pair. Throw away previous field except for reference
3795 s0
->first_field
= 1;
3796 s0
->current_picture_ptr
= NULL
;
3799 /* Second field in complementary pair */
3800 s0
->first_field
= 0;
3805 /* Frame or first field in a potentially complementary pair */
3806 assert(!s0
->current_picture_ptr
);
3807 s0
->first_field
= FIELD_PICTURE
;
3810 if((!FIELD_PICTURE
|| s0
->first_field
) && frame_start(h
) < 0) {
3811 s0
->first_field
= 0;
3818 s
->current_picture_ptr
->frame_num
= h
->frame_num
; //FIXME frame_num cleanup
3820 assert(s
->mb_num
== s
->mb_width
* s
->mb_height
);
3821 if(first_mb_in_slice
<< FIELD_OR_MBAFF_PICTURE
>= s
->mb_num
||
3822 first_mb_in_slice
>= s
->mb_num
){
3823 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "first_mb_in_slice overflow\n");
3826 s
->resync_mb_x
= s
->mb_x
= first_mb_in_slice
% s
->mb_width
;
3827 s
->resync_mb_y
= s
->mb_y
= (first_mb_in_slice
/ s
->mb_width
) << FIELD_OR_MBAFF_PICTURE
;
3828 if (s
->picture_structure
== PICT_BOTTOM_FIELD
)
3829 s
->resync_mb_y
= s
->mb_y
= s
->mb_y
+ 1;
3830 assert(s
->mb_y
< s
->mb_height
);
3832 if(s
->picture_structure
==PICT_FRAME
){
3833 h
->curr_pic_num
= h
->frame_num
;
3834 h
->max_pic_num
= 1<< h
->sps
.log2_max_frame_num
;
3836 h
->curr_pic_num
= 2*h
->frame_num
+ 1;
3837 h
->max_pic_num
= 1<<(h
->sps
.log2_max_frame_num
+ 1);
3840 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
3841 get_ue_golomb(&s
->gb
); /* idr_pic_id */
3844 if(h
->sps
.poc_type
==0){
3845 h
->poc_lsb
= get_bits(&s
->gb
, h
->sps
.log2_max_poc_lsb
);
3847 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
){
3848 h
->delta_poc_bottom
= get_se_golomb(&s
->gb
);
3852 if(h
->sps
.poc_type
==1 && !h
->sps
.delta_pic_order_always_zero_flag
){
3853 h
->delta_poc
[0]= get_se_golomb(&s
->gb
);
3855 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
)
3856 h
->delta_poc
[1]= get_se_golomb(&s
->gb
);
3861 if(h
->pps
.redundant_pic_cnt_present
){
3862 h
->redundant_pic_count
= get_ue_golomb(&s
->gb
);
3865 //set defaults, might be overridden a few lines later
3866 h
->ref_count
[0]= h
->pps
.ref_count
[0];
3867 h
->ref_count
[1]= h
->pps
.ref_count
[1];
3869 if(h
->slice_type_nos
!= FF_I_TYPE
){
3870 if(h
->slice_type_nos
== FF_B_TYPE
){
3871 h
->direct_spatial_mv_pred
= get_bits1(&s
->gb
);
3873 num_ref_idx_active_override_flag
= get_bits1(&s
->gb
);
3875 if(num_ref_idx_active_override_flag
){
3876 h
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
3877 if(h
->slice_type_nos
==FF_B_TYPE
)
3878 h
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
3880 if(h
->ref_count
[0]-1 > 32-1 || h
->ref_count
[1]-1 > 32-1){
3881 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow\n");
3882 h
->ref_count
[0]= h
->ref_count
[1]= 1;
3886 if(h
->slice_type_nos
== FF_B_TYPE
)
3893 if(!default_ref_list_done
){
3894 fill_default_ref_list(h
);
3897 if(h
->slice_type_nos
!=FF_I_TYPE
&& decode_ref_pic_list_reordering(h
) < 0)
3900 if( (h
->pps
.weighted_pred
&& h
->slice_type_nos
== FF_P_TYPE
)
3901 || (h
->pps
.weighted_bipred_idc
==1 && h
->slice_type_nos
== FF_B_TYPE
) )
3902 pred_weight_table(h
);
3903 else if(h
->pps
.weighted_bipred_idc
==2 && h
->slice_type_nos
== FF_B_TYPE
)
3904 implicit_weight_table(h
);
3909 decode_ref_pic_marking(h0
, &s
->gb
);
3912 fill_mbaff_ref_list(h
);
3914 if( h
->slice_type_nos
!= FF_I_TYPE
&& h
->pps
.cabac
){
3915 tmp
= get_ue_golomb(&s
->gb
);
3917 av_log(s
->avctx
, AV_LOG_ERROR
, "cabac_init_idc overflow\n");
3920 h
->cabac_init_idc
= tmp
;
3923 h
->last_qscale_diff
= 0;
3924 tmp
= h
->pps
.init_qp
+ get_se_golomb(&s
->gb
);
3926 av_log(s
->avctx
, AV_LOG_ERROR
, "QP %u out of range\n", tmp
);
3930 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
3931 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
3932 //FIXME qscale / qp ... stuff
3933 if(h
->slice_type
== FF_SP_TYPE
){
3934 get_bits1(&s
->gb
); /* sp_for_switch_flag */
3936 if(h
->slice_type
==FF_SP_TYPE
|| h
->slice_type
== FF_SI_TYPE
){
3937 get_se_golomb(&s
->gb
); /* slice_qs_delta */
3940 h
->deblocking_filter
= 1;
3941 h
->slice_alpha_c0_offset
= 0;
3942 h
->slice_beta_offset
= 0;
3943 if( h
->pps
.deblocking_filter_parameters_present
) {
3944 tmp
= get_ue_golomb(&s
->gb
);
3946 av_log(s
->avctx
, AV_LOG_ERROR
, "deblocking_filter_idc %u out of range\n", tmp
);
3949 h
->deblocking_filter
= tmp
;
3950 if(h
->deblocking_filter
< 2)
3951 h
->deblocking_filter
^= 1; // 1<->0
3953 if( h
->deblocking_filter
) {
3954 h
->slice_alpha_c0_offset
= get_se_golomb(&s
->gb
) << 1;
3955 h
->slice_beta_offset
= get_se_golomb(&s
->gb
) << 1;
3959 if( s
->avctx
->skip_loop_filter
>= AVDISCARD_ALL
3960 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONKEY
&& h
->slice_type_nos
!= FF_I_TYPE
)
3961 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_BIDIR
&& h
->slice_type_nos
== FF_B_TYPE
)
3962 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
3963 h
->deblocking_filter
= 0;
3965 if(h
->deblocking_filter
== 1 && h0
->max_contexts
> 1) {
3966 if(s
->avctx
->flags2
& CODEC_FLAG2_FAST
) {
3967 /* Cheat slightly for speed:
3968 Do not bother to deblock across slices. */
3969 h
->deblocking_filter
= 2;
3971 h0
->max_contexts
= 1;
3972 if(!h0
->single_decode_warning
) {
3973 av_log(s
->avctx
, AV_LOG_INFO
, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3974 h0
->single_decode_warning
= 1;
3977 return 1; // deblocking switched inside frame
3982 if( h
->pps
.num_slice_groups
> 1 && h
->pps
.mb_slice_group_map_type
>= 3 && h
->pps
.mb_slice_group_map_type
<= 5)
3983 slice_group_change_cycle
= get_bits(&s
->gb
, ?);
3986 h0
->last_slice_type
= slice_type
;
3987 h
->slice_num
= ++h0
->current_slice
;
3990 int *ref2frm
= h
->ref2frm
[h
->slice_num
&15][j
];
3994 ref2frm
[i
+2]= 4*h
->ref_list
[j
][i
].frame_num
3995 +(h
->ref_list
[j
][i
].reference
&3);
3998 for(i
=16; i
<48; i
++)
3999 ref2frm
[i
+4]= 4*h
->ref_list
[j
][i
].frame_num
4000 +(h
->ref_list
[j
][i
].reference
&3);
4003 h
->emu_edge_width
= (s
->flags
&CODEC_FLAG_EMU_EDGE
) ? 0 : 16;
4004 h
->emu_edge_height
= (FRAME_MBAFF
|| FIELD_PICTURE
) ? 0 : h
->emu_edge_width
;
4006 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
4007 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4009 (s
->picture_structure
==PICT_FRAME
? "F" : s
->picture_structure
==PICT_TOP_FIELD
? "T" : "B"),
4011 av_get_pict_type_char(h
->slice_type
),
4012 pps_id
, h
->frame_num
,
4013 s
->current_picture_ptr
->field_poc
[0], s
->current_picture_ptr
->field_poc
[1],
4014 h
->ref_count
[0], h
->ref_count
[1],
4016 h
->deblocking_filter
, h
->slice_alpha_c0_offset
/2, h
->slice_beta_offset
/2,
4018 h
->use_weight
==1 && h
->use_weight_chroma
? "c" : "",
4019 h
->slice_type
== FF_B_TYPE
? (h
->direct_spatial_mv_pred
? "SPAT" : "TEMP") : ""
4029 static inline int get_level_prefix(GetBitContext
*gb
){
4033 OPEN_READER(re
, gb
);
4034 UPDATE_CACHE(re
, gb
);
4035 buf
=GET_CACHE(re
, gb
);
4037 log
= 32 - av_log2(buf
);
4039 print_bin(buf
>>(32-log
), log
);
4040 av_log(NULL
, AV_LOG_DEBUG
, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf
>>(32-log
), log
, log
-1, get_bits_count(gb
), __FILE__
);
4043 LAST_SKIP_BITS(re
, gb
, log
);
4044 CLOSE_READER(re
, gb
);
4049 static inline int get_dct8x8_allowed(H264Context
*h
){
4052 if(!IS_SUB_8X8(h
->sub_mb_type
[i
])
4053 || (!h
->sps
.direct_8x8_inference_flag
&& IS_DIRECT(h
->sub_mb_type
[i
])))
4060 * decodes a residual block.
4061 * @param n block index
4062 * @param scantable scantable
4063 * @param max_coeff number of coefficients in the block
4064 * @return <0 if an error occurred
4066 static int decode_residual(H264Context
*h
, GetBitContext
*gb
, DCTELEM
*block
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
){
4067 MpegEncContext
* const s
= &h
->s
;
4068 static const int coeff_token_table_index
[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4070 int zeros_left
, coeff_num
, coeff_token
, total_coeff
, i
, j
, trailing_ones
, run_before
;
4072 //FIXME put trailing_onex into the context
4074 if(n
== CHROMA_DC_BLOCK_INDEX
){
4075 coeff_token
= get_vlc2(gb
, chroma_dc_coeff_token_vlc
.table
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 1);
4076 total_coeff
= coeff_token
>>2;
4078 if(n
== LUMA_DC_BLOCK_INDEX
){
4079 total_coeff
= pred_non_zero_count(h
, 0);
4080 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4081 total_coeff
= coeff_token
>>2;
4083 total_coeff
= pred_non_zero_count(h
, n
);
4084 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4085 total_coeff
= coeff_token
>>2;
4086 h
->non_zero_count_cache
[ scan8
[n
] ]= total_coeff
;
4090 //FIXME set last_non_zero?
4094 if(total_coeff
> (unsigned)max_coeff
) {
4095 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "corrupted macroblock %d %d (total_coeff=%d)\n", s
->mb_x
, s
->mb_y
, total_coeff
);
4099 trailing_ones
= coeff_token
&3;
4100 tprintf(h
->s
.avctx
, "trailing:%d, total:%d\n", trailing_ones
, total_coeff
);
4101 assert(total_coeff
<=16);
4103 for(i
=0; i
<trailing_ones
; i
++){
4104 level
[i
]= 1 - 2*get_bits1(gb
);
4108 int level_code
, mask
;
4109 int suffix_length
= total_coeff
> 10 && trailing_ones
< 3;
4110 int prefix
= get_level_prefix(gb
);
4112 //first coefficient has suffix_length equal to 0 or 1
4113 if(prefix
<14){ //FIXME try to build a large unified VLC table for all this
4115 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4117 level_code
= (prefix
<<suffix_length
); //part
4118 }else if(prefix
==14){
4120 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4122 level_code
= prefix
+ get_bits(gb
, 4); //part
4124 level_code
= (15<<suffix_length
) + get_bits(gb
, prefix
-3); //part
4125 if(suffix_length
==0) level_code
+=15; //FIXME doesn't make (much)sense
4127 level_code
+= (1<<(prefix
-3))-4096;
4130 if(trailing_ones
< 3) level_code
+= 2;
4135 mask
= -(level_code
&1);
4136 level
[i
]= (((2+level_code
)>>1) ^ mask
) - mask
;
4139 //remaining coefficients have suffix_length > 0
4140 for(;i
<total_coeff
;i
++) {
4141 static const int suffix_limit
[7] = {0,5,11,23,47,95,INT_MAX
};
4142 prefix
= get_level_prefix(gb
);
4144 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
);
4146 level_code
= (15<<suffix_length
) + get_bits(gb
, prefix
-3);
4148 level_code
+= (1<<(prefix
-3))-4096;
4150 mask
= -(level_code
&1);
4151 level
[i
]= (((2+level_code
)>>1) ^ mask
) - mask
;
4152 if(level_code
> suffix_limit
[suffix_length
])
4157 if(total_coeff
== max_coeff
)
4160 if(n
== CHROMA_DC_BLOCK_INDEX
)
4161 zeros_left
= get_vlc2(gb
, chroma_dc_total_zeros_vlc
[ total_coeff
-1 ].table
, CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 1);
4163 zeros_left
= get_vlc2(gb
, total_zeros_vlc
[ total_coeff
-1 ].table
, TOTAL_ZEROS_VLC_BITS
, 1);
4166 coeff_num
= zeros_left
+ total_coeff
- 1;
4167 j
= scantable
[coeff_num
];
4169 block
[j
] = level
[0];
4170 for(i
=1;i
<total_coeff
;i
++) {
4173 else if(zeros_left
< 7){
4174 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
4176 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
4178 zeros_left
-= run_before
;
4179 coeff_num
-= 1 + run_before
;
4180 j
= scantable
[ coeff_num
];
4185 block
[j
] = (level
[0] * qmul
[j
] + 32)>>6;
4186 for(i
=1;i
<total_coeff
;i
++) {
4189 else if(zeros_left
< 7){
4190 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
4192 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
4194 zeros_left
-= run_before
;
4195 coeff_num
-= 1 + run_before
;
4196 j
= scantable
[ coeff_num
];
4198 block
[j
]= (level
[i
] * qmul
[j
] + 32)>>6;
4203 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "negative number of zero coeffs at %d %d\n", s
->mb_x
, s
->mb_y
);
4210 static void predict_field_decoding_flag(H264Context
*h
){
4211 MpegEncContext
* const s
= &h
->s
;
4212 const int mb_xy
= h
->mb_xy
;
4213 int mb_type
= (h
->slice_table
[mb_xy
-1] == h
->slice_num
)
4214 ? s
->current_picture
.mb_type
[mb_xy
-1]
4215 : (h
->slice_table
[mb_xy
-s
->mb_stride
] == h
->slice_num
)
4216 ? s
->current_picture
.mb_type
[mb_xy
-s
->mb_stride
]
4218 h
->mb_mbaff
= h
->mb_field_decoding_flag
= IS_INTERLACED(mb_type
) ? 1 : 0;
4222 * decodes a P_SKIP or B_SKIP macroblock
4224 static void decode_mb_skip(H264Context
*h
){
4225 MpegEncContext
* const s
= &h
->s
;
4226 const int mb_xy
= h
->mb_xy
;
4229 memset(h
->non_zero_count
[mb_xy
], 0, 16);
4230 memset(h
->non_zero_count_cache
+ 8, 0, 8*5); //FIXME ugly, remove pfui
4233 mb_type
|= MB_TYPE_INTERLACED
;
4235 if( h
->slice_type_nos
== FF_B_TYPE
)
4237 // just for fill_caches. pred_direct_motion will set the real mb_type
4238 mb_type
|= MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
|MB_TYPE_SKIP
;
4240 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4241 pred_direct_motion(h
, &mb_type
);
4242 mb_type
|= MB_TYPE_SKIP
;
4247 mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P1L0
|MB_TYPE_SKIP
;
4249 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4250 pred_pskip_motion(h
, &mx
, &my
);
4251 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, 0, 1);
4252 fill_rectangle( h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mx
,my
), 4);
4255 write_back_motion(h
, mb_type
);
4256 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4257 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
4258 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4259 h
->prev_mb_skipped
= 1;
4263 * decodes a macroblock
4264 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4266 static int decode_mb_cavlc(H264Context
*h
){
4267 MpegEncContext
* const s
= &h
->s
;
4269 int partition_count
;
4270 unsigned int mb_type
, cbp
;
4271 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
4273 mb_xy
= h
->mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
4275 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?
4277 tprintf(s
->avctx
, "pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
4278 cbp
= 0; /* avoid warning. FIXME: find a solution without slowing
4280 if(h
->slice_type_nos
!= FF_I_TYPE
){
4281 if(s
->mb_skip_run
==-1)
4282 s
->mb_skip_run
= get_ue_golomb(&s
->gb
);
4284 if (s
->mb_skip_run
--) {
4285 if(FRAME_MBAFF
&& (s
->mb_y
&1) == 0){
4286 if(s
->mb_skip_run
==0)
4287 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4289 predict_field_decoding_flag(h
);
4296 if( (s
->mb_y
&1) == 0 )
4297 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4300 h
->prev_mb_skipped
= 0;
4302 mb_type
= get_ue_golomb(&s
->gb
);
4303 if(h
->slice_type_nos
== FF_B_TYPE
){
4305 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
4306 mb_type
= b_mb_type_info
[mb_type
].type
;
4309 goto decode_intra_mb
;
4311 }else if(h
->slice_type_nos
== FF_P_TYPE
){
4313 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
4314 mb_type
= p_mb_type_info
[mb_type
].type
;
4317 goto decode_intra_mb
;
4320 assert(h
->slice_type_nos
== FF_I_TYPE
);
4321 if(h
->slice_type
== FF_SI_TYPE
&& mb_type
)
4325 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_type %d in %c slice too large at %d %d\n", mb_type
, av_get_pict_type_char(h
->slice_type
), s
->mb_x
, s
->mb_y
);
4329 cbp
= i_mb_type_info
[mb_type
].cbp
;
4330 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
4331 mb_type
= i_mb_type_info
[mb_type
].type
;
4335 mb_type
|= MB_TYPE_INTERLACED
;
4337 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4339 if(IS_INTRA_PCM(mb_type
)){
4342 // We assume these blocks are very rare so we do not optimize it.
4343 align_get_bits(&s
->gb
);
4345 // The pixels are stored in the same order as levels in h->mb array.
4346 for(x
=0; x
< (CHROMA
? 384 : 256); x
++){
4347 ((uint8_t*)h
->mb
)[x
]= get_bits(&s
->gb
, 8);
4350 // In deblocking, the quantizer is 0
4351 s
->current_picture
.qscale_table
[mb_xy
]= 0;
4352 // All coeffs are present
4353 memset(h
->non_zero_count
[mb_xy
], 16, 16);
4355 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4360 h
->ref_count
[0] <<= 1;
4361 h
->ref_count
[1] <<= 1;
4364 fill_caches(h
, mb_type
, 0);
4367 if(IS_INTRA(mb_type
)){
4369 // init_top_left_availability(h);
4370 if(IS_INTRA4x4(mb_type
)){
4373 if(dct8x8_allowed
&& get_bits1(&s
->gb
)){
4374 mb_type
|= MB_TYPE_8x8DCT
;
4378 // fill_intra4x4_pred_table(h);
4379 for(i
=0; i
<16; i
+=di
){
4380 int mode
= pred_intra_mode(h
, i
);
4382 if(!get_bits1(&s
->gb
)){
4383 const int rem_mode
= get_bits(&s
->gb
, 3);
4384 mode
= rem_mode
+ (rem_mode
>= mode
);
4388 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
4390 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = mode
;
4392 write_back_intra_pred_mode(h
);
4393 if( check_intra4x4_pred_mode(h
) < 0)
4396 h
->intra16x16_pred_mode
= check_intra_pred_mode(h
, h
->intra16x16_pred_mode
);
4397 if(h
->intra16x16_pred_mode
< 0)
4401 pred_mode
= check_intra_pred_mode(h
, get_ue_golomb(&s
->gb
));
4404 h
->chroma_pred_mode
= pred_mode
;
4406 }else if(partition_count
==4){
4407 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
4409 if(h
->slice_type_nos
== FF_B_TYPE
){
4411 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
4412 if(h
->sub_mb_type
[i
] >=13){
4413 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "B sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4416 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4417 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4419 if( IS_DIRECT(h
->sub_mb_type
[0]) || IS_DIRECT(h
->sub_mb_type
[1])
4420 || IS_DIRECT(h
->sub_mb_type
[2]) || IS_DIRECT(h
->sub_mb_type
[3])) {
4421 pred_direct_motion(h
, &mb_type
);
4422 h
->ref_cache
[0][scan8
[4]] =
4423 h
->ref_cache
[1][scan8
[4]] =
4424 h
->ref_cache
[0][scan8
[12]] =
4425 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
4428 assert(h
->slice_type_nos
== FF_P_TYPE
); //FIXME SP correct ?
4430 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
4431 if(h
->sub_mb_type
[i
] >=4){
4432 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "P sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4435 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4436 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4440 for(list
=0; list
<h
->list_count
; list
++){
4441 int ref_count
= IS_REF0(mb_type
) ? 1 : h
->ref_count
[list
];
4443 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
4444 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4445 unsigned int tmp
= get_te0_golomb(&s
->gb
, ref_count
); //FIXME init to 0 before and skip?
4447 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", tmp
);
4459 dct8x8_allowed
= get_dct8x8_allowed(h
);
4461 for(list
=0; list
<h
->list_count
; list
++){
4463 if(IS_DIRECT(h
->sub_mb_type
[i
])) {
4464 h
->ref_cache
[list
][ scan8
[4*i
] ] = h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
4467 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
4468 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
4470 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4471 const int sub_mb_type
= h
->sub_mb_type
[i
];
4472 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
4473 for(j
=0; j
<sub_partition_count
[i
]; j
++){
4475 const int index
= 4*i
+ block_width
*j
;
4476 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
4477 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mx
, &my
);
4478 mx
+= get_se_golomb(&s
->gb
);
4479 my
+= get_se_golomb(&s
->gb
);
4480 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4482 if(IS_SUB_8X8(sub_mb_type
)){
4484 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
4486 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
4487 }else if(IS_SUB_8X4(sub_mb_type
)){
4488 mv_cache
[ 1 ][0]= mx
;
4489 mv_cache
[ 1 ][1]= my
;
4490 }else if(IS_SUB_4X8(sub_mb_type
)){
4491 mv_cache
[ 8 ][0]= mx
;
4492 mv_cache
[ 8 ][1]= my
;
4494 mv_cache
[ 0 ][0]= mx
;
4495 mv_cache
[ 0 ][1]= my
;
4498 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
4504 }else if(IS_DIRECT(mb_type
)){
4505 pred_direct_motion(h
, &mb_type
);
4506 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
4508 int list
, mx
, my
, i
;
4509 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4510 if(IS_16X16(mb_type
)){
4511 for(list
=0; list
<h
->list_count
; list
++){
4513 if(IS_DIR(mb_type
, 0, list
)){
4514 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4515 if(val
>= h
->ref_count
[list
]){
4516 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4520 val
= LIST_NOT_USED
&0xFF;
4521 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 1);
4523 for(list
=0; list
<h
->list_count
; list
++){
4525 if(IS_DIR(mb_type
, 0, list
)){
4526 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mx
, &my
);
4527 mx
+= get_se_golomb(&s
->gb
);
4528 my
+= get_se_golomb(&s
->gb
);
4529 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4531 val
= pack16to32(mx
,my
);
4534 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 4);
4537 else if(IS_16X8(mb_type
)){
4538 for(list
=0; list
<h
->list_count
; list
++){
4541 if(IS_DIR(mb_type
, i
, list
)){
4542 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4543 if(val
>= h
->ref_count
[list
]){
4544 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4548 val
= LIST_NOT_USED
&0xFF;
4549 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 1);
4552 for(list
=0; list
<h
->list_count
; list
++){
4555 if(IS_DIR(mb_type
, i
, list
)){
4556 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mx
, &my
);
4557 mx
+= get_se_golomb(&s
->gb
);
4558 my
+= get_se_golomb(&s
->gb
);
4559 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4561 val
= pack16to32(mx
,my
);
4564 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 4);
4568 assert(IS_8X16(mb_type
));
4569 for(list
=0; list
<h
->list_count
; list
++){
4572 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
4573 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4574 if(val
>= h
->ref_count
[list
]){
4575 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4579 val
= LIST_NOT_USED
&0xFF;
4580 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 1);
4583 for(list
=0; list
<h
->list_count
; list
++){
4586 if(IS_DIR(mb_type
, i
, list
)){
4587 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mx
, &my
);
4588 mx
+= get_se_golomb(&s
->gb
);
4589 my
+= get_se_golomb(&s
->gb
);
4590 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4592 val
= pack16to32(mx
,my
);
4595 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 4);
4601 if(IS_INTER(mb_type
))
4602 write_back_motion(h
, mb_type
);
4604 if(!IS_INTRA16x16(mb_type
)){
4605 cbp
= get_ue_golomb(&s
->gb
);
4607 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cbp too large (%u) at %d %d\n", cbp
, s
->mb_x
, s
->mb_y
);
4612 if(IS_INTRA4x4(mb_type
)) cbp
= golomb_to_intra4x4_cbp
[cbp
];
4613 else cbp
= golomb_to_inter_cbp
[cbp
];
4615 if(IS_INTRA4x4(mb_type
)) cbp
= golomb_to_intra4x4_cbp_gray
[cbp
];
4616 else cbp
= golomb_to_inter_cbp_gray
[cbp
];
4621 if(dct8x8_allowed
&& (cbp
&15) && !IS_INTRA(mb_type
)){
4622 if(get_bits1(&s
->gb
)){
4623 mb_type
|= MB_TYPE_8x8DCT
;
4624 h
->cbp_table
[mb_xy
]= cbp
;
4627 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4629 if(cbp
|| IS_INTRA16x16(mb_type
)){
4630 int i8x8
, i4x4
, chroma_idx
;
4632 GetBitContext
*gb
= IS_INTRA(mb_type
) ? h
->intra_gb_ptr
: h
->inter_gb_ptr
;
4633 const uint8_t *scan
, *scan8x8
, *dc_scan
;
4635 // fill_non_zero_count_cache(h);
4637 if(IS_INTERLACED(mb_type
)){
4638 scan8x8
= s
->qscale
? h
->field_scan8x8_cavlc
: h
->field_scan8x8_cavlc_q0
;
4639 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
4640 dc_scan
= luma_dc_field_scan
;
4642 scan8x8
= s
->qscale
? h
->zigzag_scan8x8_cavlc
: h
->zigzag_scan8x8_cavlc_q0
;
4643 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
4644 dc_scan
= luma_dc_zigzag_scan
;
4647 dquant
= get_se_golomb(&s
->gb
);
4649 if( dquant
> 25 || dquant
< -26 ){
4650 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "dquant out of range (%d) at %d %d\n", dquant
, s
->mb_x
, s
->mb_y
);
4654 s
->qscale
+= dquant
;
4655 if(((unsigned)s
->qscale
) > 51){
4656 if(s
->qscale
<0) s
->qscale
+= 52;
4657 else s
->qscale
-= 52;
4660 h
->chroma_qp
[0]= get_chroma_qp(h
, 0, s
->qscale
);
4661 h
->chroma_qp
[1]= get_chroma_qp(h
, 1, s
->qscale
);
4662 if(IS_INTRA16x16(mb_type
)){
4663 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
, LUMA_DC_BLOCK_INDEX
, dc_scan
, h
->dequant4_coeff
[0][s
->qscale
], 16) < 0){
4664 return -1; //FIXME continue if partitioned and other return -1 too
4667 assert((cbp
&15) == 0 || (cbp
&15) == 15);
4670 for(i8x8
=0; i8x8
<4; i8x8
++){
4671 for(i4x4
=0; i4x4
<4; i4x4
++){
4672 const int index
= i4x4
+ 4*i8x8
;
4673 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
+ 16*index
, index
, scan
+ 1, h
->dequant4_coeff
[0][s
->qscale
], 15) < 0 ){
4679 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
4682 for(i8x8
=0; i8x8
<4; i8x8
++){
4683 if(cbp
& (1<<i8x8
)){
4684 if(IS_8x8DCT(mb_type
)){
4685 DCTELEM
*buf
= &h
->mb
[64*i8x8
];
4687 for(i4x4
=0; i4x4
<4; i4x4
++){
4688 if( decode_residual(h
, gb
, buf
, i4x4
+4*i8x8
, scan8x8
+16*i4x4
,
4689 h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 16) <0 )
4692 nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
4693 nnz
[0] += nnz
[1] + nnz
[8] + nnz
[9];
4695 for(i4x4
=0; i4x4
<4; i4x4
++){
4696 const int index
= i4x4
+ 4*i8x8
;
4698 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
, h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
], 16) <0 ){
4704 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
4705 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
4711 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++)
4712 if( decode_residual(h
, gb
, h
->mb
+ 256 + 16*4*chroma_idx
, CHROMA_DC_BLOCK_INDEX
, chroma_dc_scan
, NULL
, 4) < 0){
4718 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++){
4719 const uint32_t *qmul
= h
->dequant4_coeff
[chroma_idx
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
[chroma_idx
]];
4720 for(i4x4
=0; i4x4
<4; i4x4
++){
4721 const int index
= 16 + 4*chroma_idx
+ i4x4
;
4722 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
+ 1, qmul
, 15) < 0){
4728 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
4729 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
4730 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
4733 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
4734 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
4735 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
4736 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
4738 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
4739 write_back_non_zero_count(h
);
4742 h
->ref_count
[0] >>= 1;
4743 h
->ref_count
[1] >>= 1;
4749 static int decode_cabac_field_decoding_flag(H264Context
*h
) {
4750 MpegEncContext
* const s
= &h
->s
;
4751 const int mb_x
= s
->mb_x
;
4752 const int mb_y
= s
->mb_y
& ~1;
4753 const int mba_xy
= mb_x
- 1 + mb_y
*s
->mb_stride
;
4754 const int mbb_xy
= mb_x
+ (mb_y
-2)*s
->mb_stride
;
4756 unsigned int ctx
= 0;
4758 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) ) {
4761 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) ) {
4765 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[70 + ctx
] );
4768 static int decode_cabac_intra_mb_type(H264Context
*h
, int ctx_base
, int intra_slice
) {
4769 uint8_t *state
= &h
->cabac_state
[ctx_base
];
4773 MpegEncContext
* const s
= &h
->s
;
4774 const int mba_xy
= h
->left_mb_xy
[0];
4775 const int mbb_xy
= h
->top_mb_xy
;
4777 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mba_xy
] ) )
4779 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mbb_xy
] ) )
4781 if( get_cabac_noinline( &h
->cabac
, &state
[ctx
] ) == 0 )
4782 return 0; /* I4x4 */
4785 if( get_cabac_noinline( &h
->cabac
, &state
[0] ) == 0 )
4786 return 0; /* I4x4 */
4789 if( get_cabac_terminate( &h
->cabac
) )
4790 return 25; /* PCM */
4792 mb_type
= 1; /* I16x16 */
4793 mb_type
+= 12 * get_cabac_noinline( &h
->cabac
, &state
[1] ); /* cbp_luma != 0 */
4794 if( get_cabac_noinline( &h
->cabac
, &state
[2] ) ) /* cbp_chroma */
4795 mb_type
+= 4 + 4 * get_cabac_noinline( &h
->cabac
, &state
[2+intra_slice
] );
4796 mb_type
+= 2 * get_cabac_noinline( &h
->cabac
, &state
[3+intra_slice
] );
4797 mb_type
+= 1 * get_cabac_noinline( &h
->cabac
, &state
[3+2*intra_slice
] );
4801 static int decode_cabac_mb_type( H264Context
*h
) {
4802 MpegEncContext
* const s
= &h
->s
;
4804 if( h
->slice_type_nos
== FF_I_TYPE
) {
4805 return decode_cabac_intra_mb_type(h
, 3, 1);
4806 } else if( h
->slice_type_nos
== FF_P_TYPE
) {
4807 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[14] ) == 0 ) {
4809 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[15] ) == 0 ) {
4810 /* P_L0_D16x16, P_8x8 */
4811 return 3 * get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[16] );
4813 /* P_L0_D8x16, P_L0_D16x8 */
4814 return 2 - get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[17] );
4817 return decode_cabac_intra_mb_type(h
, 17, 0) + 5;
4819 } else if( h
->slice_type_nos
== FF_B_TYPE
) {
4820 const int mba_xy
= h
->left_mb_xy
[0];
4821 const int mbb_xy
= h
->top_mb_xy
;
4825 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mba_xy
] ) )
4827 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mbb_xy
] ) )
4830 if( !get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+ctx
] ) )
4831 return 0; /* B_Direct_16x16 */
4833 if( !get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+3] ) ) {
4834 return 1 + get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ); /* B_L[01]_16x16 */
4837 bits
= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+4] ) << 3;
4838 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ) << 2;
4839 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ) << 1;
4840 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] );
4842 return bits
+ 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4843 else if( bits
== 13 ) {
4844 return decode_cabac_intra_mb_type(h
, 32, 0) + 23;
4845 } else if( bits
== 14 )
4846 return 11; /* B_L1_L0_8x16 */
4847 else if( bits
== 15 )
4848 return 22; /* B_8x8 */
4850 bits
= ( bits
<<1 ) | get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] );
4851 return bits
- 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4853 /* TODO SI/SP frames? */
4858 static int decode_cabac_mb_skip( H264Context
*h
, int mb_x
, int mb_y
) {
4859 MpegEncContext
* const s
= &h
->s
;
4863 if(FRAME_MBAFF
){ //FIXME merge with the stuff in fill_caches?
4864 int mb_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
4867 && h
->slice_table
[mba_xy
] == h
->slice_num
4868 && MB_FIELD
== !!IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) )
4869 mba_xy
+= s
->mb_stride
;
4871 mbb_xy
= mb_xy
- s
->mb_stride
;
4873 && h
->slice_table
[mbb_xy
] == h
->slice_num
4874 && IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) )
4875 mbb_xy
-= s
->mb_stride
;
4877 mbb_xy
= mb_x
+ (mb_y
-1)*s
->mb_stride
;
4879 int mb_xy
= h
->mb_xy
;
4881 mbb_xy
= mb_xy
- (s
->mb_stride
<< FIELD_PICTURE
);
4884 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mba_xy
] ))
4886 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mbb_xy
] ))
4889 if( h
->slice_type_nos
== FF_B_TYPE
)
4891 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[11+ctx
] );
4894 static int decode_cabac_mb_intra4x4_pred_mode( H264Context
*h
, int pred_mode
) {
4897 if( get_cabac( &h
->cabac
, &h
->cabac_state
[68] ) )
4900 mode
+= 1 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
4901 mode
+= 2 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
4902 mode
+= 4 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
4904 if( mode
>= pred_mode
)
4910 static int decode_cabac_mb_chroma_pre_mode( H264Context
*h
) {
4911 const int mba_xy
= h
->left_mb_xy
[0];
4912 const int mbb_xy
= h
->top_mb_xy
;
4916 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4917 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mba_xy
] != 0 )
4920 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mbb_xy
] != 0 )
4923 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+ctx
] ) == 0 )
4926 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
4928 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
4934 static int decode_cabac_mb_cbp_luma( H264Context
*h
) {
4935 int cbp_b
, cbp_a
, ctx
, cbp
= 0;
4937 cbp_a
= h
->slice_table
[h
->left_mb_xy
[0]] == h
->slice_num
? h
->left_cbp
: -1;
4938 cbp_b
= h
->slice_table
[h
->top_mb_xy
] == h
->slice_num
? h
->top_cbp
: -1;
4940 ctx
= !(cbp_a
& 0x02) + 2 * !(cbp_b
& 0x04);
4941 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]);
4942 ctx
= !(cbp
& 0x01) + 2 * !(cbp_b
& 0x08);
4943 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 1;
4944 ctx
= !(cbp_a
& 0x08) + 2 * !(cbp
& 0x01);
4945 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 2;
4946 ctx
= !(cbp
& 0x04) + 2 * !(cbp
& 0x02);
4947 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 3;
4950 static int decode_cabac_mb_cbp_chroma( H264Context
*h
) {
4954 cbp_a
= (h
->left_cbp
>>4)&0x03;
4955 cbp_b
= (h
-> top_cbp
>>4)&0x03;
4958 if( cbp_a
> 0 ) ctx
++;
4959 if( cbp_b
> 0 ) ctx
+= 2;
4960 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[77 + ctx
] ) == 0 )
4964 if( cbp_a
== 2 ) ctx
++;
4965 if( cbp_b
== 2 ) ctx
+= 2;
4966 return 1 + get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[77 + ctx
] );
4968 static int decode_cabac_mb_dqp( H264Context
*h
) {
4972 if( h
->last_qscale_diff
!= 0 )
4975 while( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[60 + ctx
] ) ) {
4981 if(val
> 102) //prevent infinite loop
4988 return -(val
+ 1)/2;
4990 static int decode_cabac_p_mb_sub_type( H264Context
*h
) {
4991 if( get_cabac( &h
->cabac
, &h
->cabac_state
[21] ) )
4993 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[22] ) )
4995 if( get_cabac( &h
->cabac
, &h
->cabac_state
[23] ) )
4999 static int decode_cabac_b_mb_sub_type( H264Context
*h
) {
5001 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[36] ) )
5002 return 0; /* B_Direct_8x8 */
5003 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[37] ) )
5004 return 1 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L0_8x8, B_L1_8x8 */
5006 if( get_cabac( &h
->cabac
, &h
->cabac_state
[38] ) ) {
5007 if( get_cabac( &h
->cabac
, &h
->cabac_state
[39] ) )
5008 return 11 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L1_4x4, B_Bi_4x4 */
5011 type
+= 2*get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5012 type
+= get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5016 static inline int decode_cabac_mb_transform_size( H264Context
*h
) {
5017 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[399 + h
->neighbor_transform_size
] );
5020 static int decode_cabac_mb_ref( H264Context
*h
, int list
, int n
) {
5021 int refa
= h
->ref_cache
[list
][scan8
[n
] - 1];
5022 int refb
= h
->ref_cache
[list
][scan8
[n
] - 8];
5026 if( h
->slice_type_nos
== FF_B_TYPE
) {
5027 if( refa
> 0 && !h
->direct_cache
[scan8
[n
] - 1] )
5029 if( refb
> 0 && !h
->direct_cache
[scan8
[n
] - 8] )
5038 while( get_cabac( &h
->cabac
, &h
->cabac_state
[54+ctx
] ) ) {
5044 if(ref
>= 32 /*h->ref_list[list]*/){
5045 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "overflow in decode_cabac_mb_ref\n");
5046 return 0; //FIXME we should return -1 and check the return everywhere
5052 static int decode_cabac_mb_mvd( H264Context
*h
, int list
, int n
, int l
) {
5053 int amvd
= abs( h
->mvd_cache
[list
][scan8
[n
] - 1][l
] ) +
5054 abs( h
->mvd_cache
[list
][scan8
[n
] - 8][l
] );
5055 int ctxbase
= (l
== 0) ? 40 : 47;
5060 else if( amvd
> 32 )
5065 if(!get_cabac(&h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
]))
5070 while( mvd
< 9 && get_cabac( &h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
] ) ) {
5078 while( get_cabac_bypass( &h
->cabac
) ) {
5082 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "overflow in decode_cabac_mb_mvd\n");
5087 if( get_cabac_bypass( &h
->cabac
) )
5091 return get_cabac_bypass_sign( &h
->cabac
, -mvd
);
5094 static av_always_inline
int get_cabac_cbf_ctx( H264Context
*h
, int cat
, int idx
, int is_dc
) {
5100 nza
= h
->left_cbp
&0x100;
5101 nzb
= h
-> top_cbp
&0x100;
5103 nza
= (h
->left_cbp
>>(6+idx
))&0x01;
5104 nzb
= (h
-> top_cbp
>>(6+idx
))&0x01;
5108 nza
= h
->non_zero_count_cache
[scan8
[16+idx
] - 1];
5109 nzb
= h
->non_zero_count_cache
[scan8
[16+idx
] - 8];
5111 assert(cat
== 1 || cat
== 2);
5112 nza
= h
->non_zero_count_cache
[scan8
[idx
] - 1];
5113 nzb
= h
->non_zero_count_cache
[scan8
[idx
] - 8];
5123 return ctx
+ 4 * cat
;
5126 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8
[63]) = {
5127 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5128 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5129 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5130 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5133 static av_always_inline
void decode_cabac_residual_internal( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
, int is_dc
) {
5134 static const int significant_coeff_flag_offset
[2][6] = {
5135 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5136 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5138 static const int last_coeff_flag_offset
[2][6] = {
5139 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5140 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5142 static const int coeff_abs_level_m1_offset
[6] = {
5143 227+0, 227+10, 227+20, 227+30, 227+39, 426
5145 static const uint8_t significant_coeff_flag_offset_8x8
[2][63] = {
5146 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5147 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5148 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5149 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5150 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5151 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5152 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5153 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5155 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5156 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5157 * map node ctx => cabac ctx for level=1 */
5158 static const uint8_t coeff_abs_level1_ctx
[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5159 /* map node ctx => cabac ctx for level>1 */
5160 static const uint8_t coeff_abs_levelgt1_ctx
[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5161 static const uint8_t coeff_abs_level_transition
[2][8] = {
5162 /* update node ctx after decoding a level=1 */
5163 { 1, 2, 3, 3, 4, 5, 6, 7 },
5164 /* update node ctx after decoding a level>1 */
5165 { 4, 4, 4, 4, 5, 6, 7, 7 }
5171 int coeff_count
= 0;
5174 uint8_t *significant_coeff_ctx_base
;
5175 uint8_t *last_coeff_ctx_base
;
5176 uint8_t *abs_level_m1_ctx_base
;
5179 #define CABAC_ON_STACK
5181 #ifdef CABAC_ON_STACK
5184 cc
.range
= h
->cabac
.range
;
5185 cc
.low
= h
->cabac
.low
;
5186 cc
.bytestream
= h
->cabac
.bytestream
;
5188 #define CC &h->cabac
5192 /* cat: 0-> DC 16x16 n = 0
5193 * 1-> AC 16x16 n = luma4x4idx
5194 * 2-> Luma4x4 n = luma4x4idx
5195 * 3-> DC Chroma n = iCbCr
5196 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5197 * 5-> Luma8x8 n = 4 * luma8x8idx
5200 /* read coded block flag */
5201 if( is_dc
|| cat
!= 5 ) {
5202 if( get_cabac( CC
, &h
->cabac_state
[85 + get_cabac_cbf_ctx( h
, cat
, n
, is_dc
) ] ) == 0 ) {
5205 h
->non_zero_count_cache
[scan8
[16+n
]] = 0;
5207 h
->non_zero_count_cache
[scan8
[n
]] = 0;
5210 #ifdef CABAC_ON_STACK
5211 h
->cabac
.range
= cc
.range
;
5212 h
->cabac
.low
= cc
.low
;
5213 h
->cabac
.bytestream
= cc
.bytestream
;
5219 significant_coeff_ctx_base
= h
->cabac_state
5220 + significant_coeff_flag_offset
[MB_FIELD
][cat
];
5221 last_coeff_ctx_base
= h
->cabac_state
5222 + last_coeff_flag_offset
[MB_FIELD
][cat
];
5223 abs_level_m1_ctx_base
= h
->cabac_state
5224 + coeff_abs_level_m1_offset
[cat
];
5226 if( !is_dc
&& cat
== 5 ) {
5227 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5228 for(last= 0; last < coefs; last++) { \
5229 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5230 if( get_cabac( CC, sig_ctx )) { \
5231 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5232 index[coeff_count++] = last; \
5233 if( get_cabac( CC, last_ctx ) ) { \
5239 if( last == max_coeff -1 ) {\
5240 index[coeff_count++] = last;\
5242 const uint8_t *sig_off
= significant_coeff_flag_offset_8x8
[MB_FIELD
];
5243 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5244 coeff_count
= decode_significance_8x8_x86(CC
, significant_coeff_ctx_base
, index
, sig_off
);
5246 coeff_count
= decode_significance_x86(CC
, max_coeff
, significant_coeff_ctx_base
, index
);
5248 DECODE_SIGNIFICANCE( 63, sig_off
[last
], last_coeff_flag_offset_8x8
[last
] );
5250 DECODE_SIGNIFICANCE( max_coeff
- 1, last
, last
);
5253 assert(coeff_count
> 0);
5257 h
->cbp_table
[h
->mb_xy
] |= 0x100;
5259 h
->cbp_table
[h
->mb_xy
] |= 0x40 << n
;
5262 fill_rectangle(&h
->non_zero_count_cache
[scan8
[n
]], 2, 2, 8, coeff_count
, 1);
5264 h
->non_zero_count_cache
[scan8
[16+n
]] = coeff_count
;
5266 assert( cat
== 1 || cat
== 2 );
5267 h
->non_zero_count_cache
[scan8
[n
]] = coeff_count
;
5272 uint8_t *ctx
= coeff_abs_level1_ctx
[node_ctx
] + abs_level_m1_ctx_base
;
5274 int j
= scantable
[index
[--coeff_count
]];
5276 if( get_cabac( CC
, ctx
) == 0 ) {
5277 node_ctx
= coeff_abs_level_transition
[0][node_ctx
];
5279 block
[j
] = get_cabac_bypass_sign( CC
, -1);
5281 block
[j
] = (get_cabac_bypass_sign( CC
, -qmul
[j
]) + 32) >> 6;
5285 ctx
= coeff_abs_levelgt1_ctx
[node_ctx
] + abs_level_m1_ctx_base
;
5286 node_ctx
= coeff_abs_level_transition
[1][node_ctx
];
5288 while( coeff_abs
< 15 && get_cabac( CC
, ctx
) ) {
5292 if( coeff_abs
>= 15 ) {
5294 while( get_cabac_bypass( CC
) ) {
5300 coeff_abs
+= coeff_abs
+ get_cabac_bypass( CC
);
5306 block
[j
] = get_cabac_bypass_sign( CC
, -coeff_abs
);
5308 block
[j
] = (get_cabac_bypass_sign( CC
, -coeff_abs
) * qmul
[j
] + 32) >> 6;
5311 } while( coeff_count
);
5312 #ifdef CABAC_ON_STACK
5313 h
->cabac
.range
= cc
.range
;
5314 h
->cabac
.low
= cc
.low
;
5315 h
->cabac
.bytestream
= cc
.bytestream
;
5320 #ifndef CONFIG_SMALL
5321 static void decode_cabac_residual_dc( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
5322 decode_cabac_residual_internal(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
, 1);
5325 static void decode_cabac_residual_nondc( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
5326 decode_cabac_residual_internal(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
, 0);
5330 static void decode_cabac_residual( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
5332 decode_cabac_residual_internal(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
, cat
== 0 || cat
== 3);
5334 if( cat
== 0 || cat
== 3 ) decode_cabac_residual_dc(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
);
5335 else decode_cabac_residual_nondc(h
, block
, cat
, n
, scantable
, qmul
, max_coeff
);
5339 static inline void compute_mb_neighbors(H264Context
*h
)
5341 MpegEncContext
* const s
= &h
->s
;
5342 const int mb_xy
= h
->mb_xy
;
5343 h
->top_mb_xy
= mb_xy
- s
->mb_stride
;
5344 h
->left_mb_xy
[0] = mb_xy
- 1;
5346 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
5347 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
5348 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
5349 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
5350 const int curr_mb_frame_flag
= !MB_FIELD
;
5351 const int bottom
= (s
->mb_y
& 1);
5353 ? !curr_mb_frame_flag
// bottom macroblock
5354 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
5356 h
->top_mb_xy
-= s
->mb_stride
;
5358 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
5359 h
->left_mb_xy
[0] = pair_xy
- 1;
5361 } else if (FIELD_PICTURE
) {
5362 h
->top_mb_xy
-= s
->mb_stride
;
5368 * decodes a macroblock
5369 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5371 static int decode_mb_cabac(H264Context
*h
) {
5372 MpegEncContext
* const s
= &h
->s
;
5374 int mb_type
, partition_count
, cbp
= 0;
5375 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
5377 mb_xy
= h
->mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5379 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?)
5381 tprintf(s
->avctx
, "pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
5382 if( h
->slice_type_nos
!= FF_I_TYPE
) {
5384 /* a skipped mb needs the aff flag from the following mb */
5385 if( FRAME_MBAFF
&& s
->mb_x
==0 && (s
->mb_y
&1)==0 )
5386 predict_field_decoding_flag(h
);
5387 if( FRAME_MBAFF
&& (s
->mb_y
&1)==1 && h
->prev_mb_skipped
)
5388 skip
= h
->next_mb_skipped
;
5390 skip
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
);
5391 /* read skip flags */
5393 if( FRAME_MBAFF
&& (s
->mb_y
&1)==0 ){
5394 s
->current_picture
.mb_type
[mb_xy
] = MB_TYPE_SKIP
;
5395 h
->next_mb_skipped
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
+1 );
5396 if(h
->next_mb_skipped
)
5397 predict_field_decoding_flag(h
);
5399 h
->mb_mbaff
= h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
5404 h
->cbp_table
[mb_xy
] = 0;
5405 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5406 h
->last_qscale_diff
= 0;
5413 if( (s
->mb_y
&1) == 0 )
5415 h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
5418 h
->prev_mb_skipped
= 0;
5420 compute_mb_neighbors(h
);
5421 if( ( mb_type
= decode_cabac_mb_type( h
) ) < 0 ) {
5422 av_log( h
->s
.avctx
, AV_LOG_ERROR
, "decode_cabac_mb_type failed\n" );
5426 if( h
->slice_type_nos
== FF_B_TYPE
) {
5428 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
5429 mb_type
= b_mb_type_info
[mb_type
].type
;
5432 goto decode_intra_mb
;
5434 } else if( h
->slice_type_nos
== FF_P_TYPE
) {
5436 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
5437 mb_type
= p_mb_type_info
[mb_type
].type
;
5440 goto decode_intra_mb
;
5443 if(h
->slice_type
== FF_SI_TYPE
&& mb_type
)
5445 assert(h
->slice_type_nos
== FF_I_TYPE
);
5447 partition_count
= 0;
5448 cbp
= i_mb_type_info
[mb_type
].cbp
;
5449 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
5450 mb_type
= i_mb_type_info
[mb_type
].type
;
5453 mb_type
|= MB_TYPE_INTERLACED
;
5455 h
->slice_table
[ mb_xy
]= h
->slice_num
;
5457 if(IS_INTRA_PCM(mb_type
)) {
5460 // We assume these blocks are very rare so we do not optimize it.
5461 // FIXME The two following lines get the bitstream position in the cabac
5462 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5463 ptr
= h
->cabac
.bytestream
;
5464 if(h
->cabac
.low
&0x1) ptr
--;
5466 if(h
->cabac
.low
&0x1FF) ptr
--;
5469 // The pixels are stored in the same order as levels in h->mb array.
5470 memcpy(h
->mb
, ptr
, 256); ptr
+=256;
5472 memcpy(h
->mb
+128, ptr
, 128); ptr
+=128;
5475 ff_init_cabac_decoder(&h
->cabac
, ptr
, h
->cabac
.bytestream_end
- ptr
);
5477 // All blocks are present
5478 h
->cbp_table
[mb_xy
] = 0x1ef;
5479 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5480 // In deblocking, the quantizer is 0
5481 s
->current_picture
.qscale_table
[mb_xy
]= 0;
5482 // All coeffs are present
5483 memset(h
->non_zero_count
[mb_xy
], 16, 16);
5484 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5485 h
->last_qscale_diff
= 0;
5490 h
->ref_count
[0] <<= 1;
5491 h
->ref_count
[1] <<= 1;
5494 fill_caches(h
, mb_type
, 0);
5496 if( IS_INTRA( mb_type
) ) {
5498 if( IS_INTRA4x4( mb_type
) ) {
5499 if( dct8x8_allowed
&& decode_cabac_mb_transform_size( h
) ) {
5500 mb_type
|= MB_TYPE_8x8DCT
;
5501 for( i
= 0; i
< 16; i
+=4 ) {
5502 int pred
= pred_intra_mode( h
, i
);
5503 int mode
= decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5504 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
5507 for( i
= 0; i
< 16; i
++ ) {
5508 int pred
= pred_intra_mode( h
, i
);
5509 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5511 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5514 write_back_intra_pred_mode(h
);
5515 if( check_intra4x4_pred_mode(h
) < 0 ) return -1;
5517 h
->intra16x16_pred_mode
= check_intra_pred_mode( h
, h
->intra16x16_pred_mode
);
5518 if( h
->intra16x16_pred_mode
< 0 ) return -1;
5521 h
->chroma_pred_mode_table
[mb_xy
] =
5522 pred_mode
= decode_cabac_mb_chroma_pre_mode( h
);
5524 pred_mode
= check_intra_pred_mode( h
, pred_mode
);
5525 if( pred_mode
< 0 ) return -1;
5526 h
->chroma_pred_mode
= pred_mode
;
5528 } else if( partition_count
== 4 ) {
5529 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
5531 if( h
->slice_type_nos
== FF_B_TYPE
) {
5532 for( i
= 0; i
< 4; i
++ ) {
5533 h
->sub_mb_type
[i
] = decode_cabac_b_mb_sub_type( h
);
5534 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5535 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5537 if( IS_DIRECT(h
->sub_mb_type
[0] | h
->sub_mb_type
[1] |
5538 h
->sub_mb_type
[2] | h
->sub_mb_type
[3]) ) {
5539 pred_direct_motion(h
, &mb_type
);
5540 h
->ref_cache
[0][scan8
[4]] =
5541 h
->ref_cache
[1][scan8
[4]] =
5542 h
->ref_cache
[0][scan8
[12]] =
5543 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
5544 if( h
->ref_count
[0] > 1 || h
->ref_count
[1] > 1 ) {
5545 for( i
= 0; i
< 4; i
++ )
5546 if( IS_DIRECT(h
->sub_mb_type
[i
]) )
5547 fill_rectangle( &h
->direct_cache
[scan8
[4*i
]], 2, 2, 8, 1, 1 );
5551 for( i
= 0; i
< 4; i
++ ) {
5552 h
->sub_mb_type
[i
] = decode_cabac_p_mb_sub_type( h
);
5553 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5554 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5558 for( list
= 0; list
< h
->list_count
; list
++ ) {
5559 for( i
= 0; i
< 4; i
++ ) {
5560 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
5561 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
5562 if( h
->ref_count
[list
] > 1 )
5563 ref
[list
][i
] = decode_cabac_mb_ref( h
, list
, 4*i
);
5569 h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
5570 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
5575 dct8x8_allowed
= get_dct8x8_allowed(h
);
5577 for(list
=0; list
<h
->list_count
; list
++){
5579 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
5580 if(IS_DIRECT(h
->sub_mb_type
[i
])){
5581 fill_rectangle(h
->mvd_cache
[list
][scan8
[4*i
]], 2, 2, 8, 0, 4);
5585 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
) && !IS_DIRECT(h
->sub_mb_type
[i
])){
5586 const int sub_mb_type
= h
->sub_mb_type
[i
];
5587 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
5588 for(j
=0; j
<sub_partition_count
[i
]; j
++){
5591 const int index
= 4*i
+ block_width
*j
;
5592 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
5593 int16_t (* mvd_cache
)[2]= &h
->mvd_cache
[list
][ scan8
[index
] ];
5594 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mpx
, &mpy
);
5596 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, index
, 0 );
5597 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, index
, 1 );
5598 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5600 if(IS_SUB_8X8(sub_mb_type
)){
5602 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
5604 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
5607 mvd_cache
[ 8 ][0]= mvd_cache
[ 9 ][0]= mx
- mpx
;
5609 mvd_cache
[ 8 ][1]= mvd_cache
[ 9 ][1]= my
- mpy
;
5610 }else if(IS_SUB_8X4(sub_mb_type
)){
5611 mv_cache
[ 1 ][0]= mx
;
5612 mv_cache
[ 1 ][1]= my
;
5614 mvd_cache
[ 1 ][0]= mx
- mpx
;
5615 mvd_cache
[ 1 ][1]= my
- mpy
;
5616 }else if(IS_SUB_4X8(sub_mb_type
)){
5617 mv_cache
[ 8 ][0]= mx
;
5618 mv_cache
[ 8 ][1]= my
;
5620 mvd_cache
[ 8 ][0]= mx
- mpx
;
5621 mvd_cache
[ 8 ][1]= my
- mpy
;
5623 mv_cache
[ 0 ][0]= mx
;
5624 mv_cache
[ 0 ][1]= my
;
5626 mvd_cache
[ 0 ][0]= mx
- mpx
;
5627 mvd_cache
[ 0 ][1]= my
- mpy
;
5630 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
5631 uint32_t *pd
= (uint32_t *)&h
->mvd_cache
[list
][ scan8
[4*i
] ][0];
5632 p
[0] = p
[1] = p
[8] = p
[9] = 0;
5633 pd
[0]= pd
[1]= pd
[8]= pd
[9]= 0;
5637 } else if( IS_DIRECT(mb_type
) ) {
5638 pred_direct_motion(h
, &mb_type
);
5639 fill_rectangle(h
->mvd_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
5640 fill_rectangle(h
->mvd_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
5641 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
5643 int list
, mx
, my
, i
, mpx
, mpy
;
5644 if(IS_16X16(mb_type
)){
5645 for(list
=0; list
<h
->list_count
; list
++){
5646 if(IS_DIR(mb_type
, 0, list
)){
5647 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 0 ) : 0;
5648 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, ref
, 1);
5650 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED
, 1); //FIXME factorize and the other fill_rect below too
5652 for(list
=0; list
<h
->list_count
; list
++){
5653 if(IS_DIR(mb_type
, 0, list
)){
5654 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mpx
, &mpy
);
5656 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 0, 0 );
5657 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 0, 1 );
5658 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5660 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5661 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
,my
), 4);
5663 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, 0, 4);
5666 else if(IS_16X8(mb_type
)){
5667 for(list
=0; list
<h
->list_count
; list
++){
5669 if(IS_DIR(mb_type
, i
, list
)){
5670 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 8*i
) : 0;
5671 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, ref
, 1);
5673 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, (LIST_NOT_USED
&0xFF), 1);
5676 for(list
=0; list
<h
->list_count
; list
++){
5678 if(IS_DIR(mb_type
, i
, list
)){
5679 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mpx
, &mpy
);
5680 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 0 );
5681 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 1 );
5682 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5684 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5685 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
,my
), 4);
5687 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5688 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5693 assert(IS_8X16(mb_type
));
5694 for(list
=0; list
<h
->list_count
; list
++){
5696 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
5697 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 4*i
) : 0;
5698 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, ref
, 1);
5700 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, (LIST_NOT_USED
&0xFF), 1);
5703 for(list
=0; list
<h
->list_count
; list
++){
5705 if(IS_DIR(mb_type
, i
, list
)){
5706 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mpx
, &mpy
);
5707 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 0 );
5708 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 1 );
5710 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5711 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5712 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
,my
), 4);
5714 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5715 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5722 if( IS_INTER( mb_type
) ) {
5723 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5724 write_back_motion( h
, mb_type
);
5727 if( !IS_INTRA16x16( mb_type
) ) {
5728 cbp
= decode_cabac_mb_cbp_luma( h
);
5730 cbp
|= decode_cabac_mb_cbp_chroma( h
) << 4;
5733 h
->cbp_table
[mb_xy
] = h
->cbp
= cbp
;
5735 if( dct8x8_allowed
&& (cbp
&15) && !IS_INTRA( mb_type
) ) {
5736 if( decode_cabac_mb_transform_size( h
) )
5737 mb_type
|= MB_TYPE_8x8DCT
;
5739 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5741 if( cbp
|| IS_INTRA16x16( mb_type
) ) {
5742 const uint8_t *scan
, *scan8x8
, *dc_scan
;
5743 const uint32_t *qmul
;
5746 if(IS_INTERLACED(mb_type
)){
5747 scan8x8
= s
->qscale
? h
->field_scan8x8
: h
->field_scan8x8_q0
;
5748 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
5749 dc_scan
= luma_dc_field_scan
;
5751 scan8x8
= s
->qscale
? h
->zigzag_scan8x8
: h
->zigzag_scan8x8_q0
;
5752 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
5753 dc_scan
= luma_dc_zigzag_scan
;
5756 h
->last_qscale_diff
= dqp
= decode_cabac_mb_dqp( h
);
5757 if( dqp
== INT_MIN
){
5758 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cabac decode of qscale diff failed at %d %d\n", s
->mb_x
, s
->mb_y
);
5762 if(((unsigned)s
->qscale
) > 51){
5763 if(s
->qscale
<0) s
->qscale
+= 52;
5764 else s
->qscale
-= 52;
5766 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
5767 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
5769 if( IS_INTRA16x16( mb_type
) ) {
5771 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5772 decode_cabac_residual( h
, h
->mb
, 0, 0, dc_scan
, NULL
, 16);
5775 qmul
= h
->dequant4_coeff
[0][s
->qscale
];
5776 for( i
= 0; i
< 16; i
++ ) {
5777 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5778 decode_cabac_residual(h
, h
->mb
+ 16*i
, 1, i
, scan
+ 1, qmul
, 15);
5781 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
5785 for( i8x8
= 0; i8x8
< 4; i8x8
++ ) {
5786 if( cbp
& (1<<i8x8
) ) {
5787 if( IS_8x8DCT(mb_type
) ) {
5788 decode_cabac_residual(h
, h
->mb
+ 64*i8x8
, 5, 4*i8x8
,
5789 scan8x8
, h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 64);
5791 qmul
= h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
];
5792 for( i4x4
= 0; i4x4
< 4; i4x4
++ ) {
5793 const int index
= 4*i8x8
+ i4x4
;
5794 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5796 decode_cabac_residual(h
, h
->mb
+ 16*index
, 2, index
, scan
, qmul
, 16);
5797 //STOP_TIMER("decode_residual")
5801 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
5802 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
5809 for( c
= 0; c
< 2; c
++ ) {
5810 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5811 decode_cabac_residual(h
, h
->mb
+ 256 + 16*4*c
, 3, c
, chroma_dc_scan
, NULL
, 4);
5817 for( c
= 0; c
< 2; c
++ ) {
5818 qmul
= h
->dequant4_coeff
[c
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
[c
]];
5819 for( i
= 0; i
< 4; i
++ ) {
5820 const int index
= 16 + 4 * c
+ i
;
5821 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5822 decode_cabac_residual(h
, h
->mb
+ 16*index
, 4, index
- 16, scan
+ 1, qmul
, 15);
5826 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
5827 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
5828 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
5831 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
5832 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
5833 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
5834 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
5835 h
->last_qscale_diff
= 0;
5838 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
5839 write_back_non_zero_count(h
);
5842 h
->ref_count
[0] >>= 1;
5843 h
->ref_count
[1] >>= 1;
5850 static void filter_mb_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
5852 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
5853 const int alpha
= (alpha_table
+52)[index_a
];
5854 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
5859 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] : -1;
5860 h
->s
.dsp
.h264_h_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
5862 /* 16px edge length, because bS=4 is triggered by being at
5863 * the edge of an intra MB, so all 4 bS are the same */
5864 for( d
= 0; d
< 16; d
++ ) {
5865 const int p0
= pix
[-1];
5866 const int p1
= pix
[-2];
5867 const int p2
= pix
[-3];
5869 const int q0
= pix
[0];
5870 const int q1
= pix
[1];
5871 const int q2
= pix
[2];
5873 if( FFABS( p0
- q0
) < alpha
&&
5874 FFABS( p1
- p0
) < beta
&&
5875 FFABS( q1
- q0
) < beta
) {
5877 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
5878 if( FFABS( p2
- p0
) < beta
)
5880 const int p3
= pix
[-4];
5882 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
5883 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
5884 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
5887 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
5889 if( FFABS( q2
- q0
) < beta
)
5891 const int q3
= pix
[3];
5893 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
5894 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
5895 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
5898 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
5902 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
5903 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
5905 tprintf(h
->s
.avctx
, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2], pix
[-1], pix
[0], pix
[1]);
5911 static void filter_mb_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
5913 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
5914 const int alpha
= (alpha_table
+52)[index_a
];
5915 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
5920 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] + 1 : 0;
5921 h
->s
.dsp
.h264_h_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
5923 h
->s
.dsp
.h264_h_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
5927 static void filter_mb_mbaff_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[8], int qp
[2] ) {
5929 for( i
= 0; i
< 16; i
++, pix
+= stride
) {
5935 int bS_index
= (i
>> 1);
5938 bS_index
|= (i
& 1);
5941 if( bS
[bS_index
] == 0 ) {
5945 qp_index
= MB_FIELD
? (i
>> 3) : (i
& 1);
5946 index_a
= qp
[qp_index
] + h
->slice_alpha_c0_offset
;
5947 alpha
= (alpha_table
+52)[index_a
];
5948 beta
= (beta_table
+52)[qp
[qp_index
] + h
->slice_beta_offset
];
5950 if( bS
[bS_index
] < 4 ) {
5951 const int tc0
= (tc0_table
+52)[index_a
][bS
[bS_index
] - 1];
5952 const int p0
= pix
[-1];
5953 const int p1
= pix
[-2];
5954 const int p2
= pix
[-3];
5955 const int q0
= pix
[0];
5956 const int q1
= pix
[1];
5957 const int q2
= pix
[2];
5959 if( FFABS( p0
- q0
) < alpha
&&
5960 FFABS( p1
- p0
) < beta
&&
5961 FFABS( q1
- q0
) < beta
) {
5965 if( FFABS( p2
- p0
) < beta
) {
5966 pix
[-2] = p1
+ av_clip( ( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( p1
<< 1 ) ) >> 1, -tc0
, tc0
);
5969 if( FFABS( q2
- q0
) < beta
) {
5970 pix
[1] = q1
+ av_clip( ( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( q1
<< 1 ) ) >> 1, -tc0
, tc0
);
5974 i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
5975 pix
[-1] = av_clip_uint8( p0
+ i_delta
); /* p0' */
5976 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
5977 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
5980 const int p0
= pix
[-1];
5981 const int p1
= pix
[-2];
5982 const int p2
= pix
[-3];
5984 const int q0
= pix
[0];
5985 const int q1
= pix
[1];
5986 const int q2
= pix
[2];
5988 if( FFABS( p0
- q0
) < alpha
&&
5989 FFABS( p1
- p0
) < beta
&&
5990 FFABS( q1
- q0
) < beta
) {
5992 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
5993 if( FFABS( p2
- p0
) < beta
)
5995 const int p3
= pix
[-4];
5997 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
5998 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
5999 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6002 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6004 if( FFABS( q2
- q0
) < beta
)
6006 const int q3
= pix
[3];
6008 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6009 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6010 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6013 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6017 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6018 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6020 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6025 static void filter_mb_mbaff_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[8], int qp
[2] ) {
6027 for( i
= 0; i
< 8; i
++, pix
+= stride
) {
6035 if( bS
[bS_index
] == 0 ) {
6039 qp_index
= MB_FIELD
? (i
>> 2) : (i
& 1);
6040 index_a
= qp
[qp_index
] + h
->slice_alpha_c0_offset
;
6041 alpha
= (alpha_table
+52)[index_a
];
6042 beta
= (beta_table
+52)[qp
[qp_index
] + h
->slice_beta_offset
];
6044 if( bS
[bS_index
] < 4 ) {
6045 const int tc
= (tc0_table
+52)[index_a
][bS
[bS_index
] - 1] + 1;
6046 const int p0
= pix
[-1];
6047 const int p1
= pix
[-2];
6048 const int q0
= pix
[0];
6049 const int q1
= pix
[1];
6051 if( FFABS( p0
- q0
) < alpha
&&
6052 FFABS( p1
- p0
) < beta
&&
6053 FFABS( q1
- q0
) < beta
) {
6054 const int i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6056 pix
[-1] = av_clip_uint8( p0
+ i_delta
); /* p0' */
6057 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
6058 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6061 const int p0
= pix
[-1];
6062 const int p1
= pix
[-2];
6063 const int q0
= pix
[0];
6064 const int q1
= pix
[1];
6066 if( FFABS( p0
- q0
) < alpha
&&
6067 FFABS( p1
- p0
) < beta
&&
6068 FFABS( q1
- q0
) < beta
) {
6070 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2; /* p0' */
6071 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2; /* q0' */
6072 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, pix
[-3], p1
, p0
, q0
, q1
, pix
[2], pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6078 static void filter_mb_edgeh( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6080 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6081 const int alpha
= (alpha_table
+52)[index_a
];
6082 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6083 const int pix_next
= stride
;
6088 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] : -1;
6089 h
->s
.dsp
.h264_v_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6091 /* 16px edge length, see filter_mb_edgev */
6092 for( d
= 0; d
< 16; d
++ ) {
6093 const int p0
= pix
[-1*pix_next
];
6094 const int p1
= pix
[-2*pix_next
];
6095 const int p2
= pix
[-3*pix_next
];
6096 const int q0
= pix
[0];
6097 const int q1
= pix
[1*pix_next
];
6098 const int q2
= pix
[2*pix_next
];
6100 if( FFABS( p0
- q0
) < alpha
&&
6101 FFABS( p1
- p0
) < beta
&&
6102 FFABS( q1
- q0
) < beta
) {
6104 const int p3
= pix
[-4*pix_next
];
6105 const int q3
= pix
[ 3*pix_next
];
6107 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6108 if( FFABS( p2
- p0
) < beta
) {
6110 pix
[-1*pix_next
] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6111 pix
[-2*pix_next
] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6112 pix
[-3*pix_next
] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6115 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6117 if( FFABS( q2
- q0
) < beta
) {
6119 pix
[0*pix_next
] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6120 pix
[1*pix_next
] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6121 pix
[2*pix_next
] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6124 pix
[0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6128 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6129 pix
[ 0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6131 tprintf(h
->s
.avctx
, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, qp
, index_a
, alpha
, beta
, bS
[i
], p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2*pix_next
], pix
[-pix_next
], pix
[0], pix
[pix_next
]);
6138 static void filter_mb_edgech( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6140 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6141 const int alpha
= (alpha_table
+52)[index_a
];
6142 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6147 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] + 1 : 0;
6148 h
->s
.dsp
.h264_v_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6150 h
->s
.dsp
.h264_v_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6154 static void filter_mb_fast( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
6155 MpegEncContext
* const s
= &h
->s
;
6156 int mb_y_firstrow
= s
->picture_structure
== PICT_BOTTOM_FIELD
;
6158 int qp
, qp0
, qp1
, qpc
, qpc0
, qpc1
, qp_thresh
;
6162 if(mb_x
==0 || mb_y
==mb_y_firstrow
|| !s
->dsp
.h264_loop_filter_strength
|| h
->pps
.chroma_qp_diff
||
6164 (h
->deblocking_filter
== 2 && (h
->slice_table
[mb_xy
] != h
->slice_table
[h
->top_mb_xy
] ||
6165 h
->slice_table
[mb_xy
] != h
->slice_table
[mb_xy
- 1]))) {
6166 filter_mb(h
, mb_x
, mb_y
, img_y
, img_cb
, img_cr
, linesize
, uvlinesize
);
6169 assert(!FRAME_MBAFF
);
6171 mb_type
= s
->current_picture
.mb_type
[mb_xy
];
6172 qp
= s
->current_picture
.qscale_table
[mb_xy
];
6173 qp0
= s
->current_picture
.qscale_table
[mb_xy
-1];
6174 qp1
= s
->current_picture
.qscale_table
[h
->top_mb_xy
];
6175 qpc
= get_chroma_qp( h
, 0, qp
);
6176 qpc0
= get_chroma_qp( h
, 0, qp0
);
6177 qpc1
= get_chroma_qp( h
, 0, qp1
);
6178 qp0
= (qp
+ qp0
+ 1) >> 1;
6179 qp1
= (qp
+ qp1
+ 1) >> 1;
6180 qpc0
= (qpc
+ qpc0
+ 1) >> 1;
6181 qpc1
= (qpc
+ qpc1
+ 1) >> 1;
6182 qp_thresh
= 15 - h
->slice_alpha_c0_offset
;
6183 if(qp
<= qp_thresh
&& qp0
<= qp_thresh
&& qp1
<= qp_thresh
&&
6184 qpc
<= qp_thresh
&& qpc0
<= qp_thresh
&& qpc1
<= qp_thresh
)
6187 if( IS_INTRA(mb_type
) ) {
6188 int16_t bS4
[4] = {4,4,4,4};
6189 int16_t bS3
[4] = {3,3,3,3};
6190 int16_t *bSH
= FIELD_PICTURE
? bS3
: bS4
;
6191 if( IS_8x8DCT(mb_type
) ) {
6192 filter_mb_edgev( h
, &img_y
[4*0], linesize
, bS4
, qp0
);
6193 filter_mb_edgev( h
, &img_y
[4*2], linesize
, bS3
, qp
);
6194 filter_mb_edgeh( h
, &img_y
[4*0*linesize
], linesize
, bSH
, qp1
);
6195 filter_mb_edgeh( h
, &img_y
[4*2*linesize
], linesize
, bS3
, qp
);
6197 filter_mb_edgev( h
, &img_y
[4*0], linesize
, bS4
, qp0
);
6198 filter_mb_edgev( h
, &img_y
[4*1], linesize
, bS3
, qp
);
6199 filter_mb_edgev( h
, &img_y
[4*2], linesize
, bS3
, qp
);
6200 filter_mb_edgev( h
, &img_y
[4*3], linesize
, bS3
, qp
);
6201 filter_mb_edgeh( h
, &img_y
[4*0*linesize
], linesize
, bSH
, qp1
);
6202 filter_mb_edgeh( h
, &img_y
[4*1*linesize
], linesize
, bS3
, qp
);
6203 filter_mb_edgeh( h
, &img_y
[4*2*linesize
], linesize
, bS3
, qp
);
6204 filter_mb_edgeh( h
, &img_y
[4*3*linesize
], linesize
, bS3
, qp
);
6206 filter_mb_edgecv( h
, &img_cb
[2*0], uvlinesize
, bS4
, qpc0
);
6207 filter_mb_edgecv( h
, &img_cb
[2*2], uvlinesize
, bS3
, qpc
);
6208 filter_mb_edgecv( h
, &img_cr
[2*0], uvlinesize
, bS4
, qpc0
);
6209 filter_mb_edgecv( h
, &img_cr
[2*2], uvlinesize
, bS3
, qpc
);
6210 filter_mb_edgech( h
, &img_cb
[2*0*uvlinesize
], uvlinesize
, bSH
, qpc1
);
6211 filter_mb_edgech( h
, &img_cb
[2*2*uvlinesize
], uvlinesize
, bS3
, qpc
);
6212 filter_mb_edgech( h
, &img_cr
[2*0*uvlinesize
], uvlinesize
, bSH
, qpc1
);
6213 filter_mb_edgech( h
, &img_cr
[2*2*uvlinesize
], uvlinesize
, bS3
, qpc
);
6216 DECLARE_ALIGNED_8(int16_t, bS
[2][4][4]);
6217 uint64_t (*bSv
)[4] = (uint64_t(*)[4])bS
;
6219 if( IS_8x8DCT(mb_type
) && (h
->cbp
&7) == 7 ) {
6221 bSv
[0][0] = bSv
[0][2] = bSv
[1][0] = bSv
[1][2] = 0x0002000200020002ULL
;
6223 int mask_edge1
= (mb_type
& (MB_TYPE_16x16
| MB_TYPE_8x16
)) ? 3 :
6224 (mb_type
& MB_TYPE_16x8
) ? 1 : 0;
6225 int mask_edge0
= (mb_type
& (MB_TYPE_16x16
| MB_TYPE_8x16
))
6226 && (s
->current_picture
.mb_type
[mb_xy
-1] & (MB_TYPE_16x16
| MB_TYPE_8x16
))
6228 int step
= IS_8x8DCT(mb_type
) ? 2 : 1;
6229 edges
= (mb_type
& MB_TYPE_16x16
) && !(h
->cbp
& 15) ? 1 : 4;
6230 s
->dsp
.h264_loop_filter_strength( bS
, h
->non_zero_count_cache
, h
->ref_cache
, h
->mv_cache
,
6231 (h
->slice_type_nos
== FF_B_TYPE
), edges
, step
, mask_edge0
, mask_edge1
, FIELD_PICTURE
);
6233 if( IS_INTRA(s
->current_picture
.mb_type
[mb_xy
-1]) )
6234 bSv
[0][0] = 0x0004000400040004ULL
;
6235 if( IS_INTRA(s
->current_picture
.mb_type
[h
->top_mb_xy
]) )
6236 bSv
[1][0] = FIELD_PICTURE
? 0x0003000300030003ULL
: 0x0004000400040004ULL
;
6238 #define FILTER(hv,dir,edge)\
6239 if(bSv[dir][edge]) {\
6240 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6242 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6243 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6249 } else if( IS_8x8DCT(mb_type
) ) {
6268 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
6269 MpegEncContext
* const s
= &h
->s
;
6270 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
6271 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
6272 const int mvy_limit
= IS_INTERLACED(mb_type
) ? 2 : 4;
6273 int first_vertical_edge_done
= 0;
6276 //for sufficiently low qp, filtering wouldn't do anything
6277 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6279 int qp_thresh
= 15 - h
->slice_alpha_c0_offset
- FFMAX3(0, h
->pps
.chroma_qp_index_offset
[0], h
->pps
.chroma_qp_index_offset
[1]);
6280 int qp
= s
->current_picture
.qscale_table
[mb_xy
];
6282 && (mb_x
== 0 || ((qp
+ s
->current_picture
.qscale_table
[mb_xy
-1] + 1)>>1) <= qp_thresh
)
6283 && (mb_y
== 0 || ((qp
+ s
->current_picture
.qscale_table
[h
->top_mb_xy
] + 1)>>1) <= qp_thresh
)){
6288 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6289 if(!h
->pps
.cabac
&& h
->pps
.transform_8x8_mode
){
6290 int top_type
, left_type
[2];
6291 top_type
= s
->current_picture
.mb_type
[h
->top_mb_xy
] ;
6292 left_type
[0] = s
->current_picture
.mb_type
[h
->left_mb_xy
[0]];
6293 left_type
[1] = s
->current_picture
.mb_type
[h
->left_mb_xy
[1]];
6295 if(IS_8x8DCT(top_type
)){
6296 h
->non_zero_count_cache
[4+8*0]=
6297 h
->non_zero_count_cache
[5+8*0]= h
->cbp_table
[h
->top_mb_xy
] & 4;
6298 h
->non_zero_count_cache
[6+8*0]=
6299 h
->non_zero_count_cache
[7+8*0]= h
->cbp_table
[h
->top_mb_xy
] & 8;
6301 if(IS_8x8DCT(left_type
[0])){
6302 h
->non_zero_count_cache
[3+8*1]=
6303 h
->non_zero_count_cache
[3+8*2]= h
->cbp_table
[h
->left_mb_xy
[0]]&2; //FIXME check MBAFF
6305 if(IS_8x8DCT(left_type
[1])){
6306 h
->non_zero_count_cache
[3+8*3]=
6307 h
->non_zero_count_cache
[3+8*4]= h
->cbp_table
[h
->left_mb_xy
[1]]&8; //FIXME check MBAFF
6310 if(IS_8x8DCT(mb_type
)){
6311 h
->non_zero_count_cache
[scan8
[0 ]]= h
->non_zero_count_cache
[scan8
[1 ]]=
6312 h
->non_zero_count_cache
[scan8
[2 ]]= h
->non_zero_count_cache
[scan8
[3 ]]= h
->cbp_table
[mb_xy
] & 1;
6314 h
->non_zero_count_cache
[scan8
[0+ 4]]= h
->non_zero_count_cache
[scan8
[1+ 4]]=
6315 h
->non_zero_count_cache
[scan8
[2+ 4]]= h
->non_zero_count_cache
[scan8
[3+ 4]]= h
->cbp_table
[mb_xy
] & 2;
6317 h
->non_zero_count_cache
[scan8
[0+ 8]]= h
->non_zero_count_cache
[scan8
[1+ 8]]=
6318 h
->non_zero_count_cache
[scan8
[2+ 8]]= h
->non_zero_count_cache
[scan8
[3+ 8]]= h
->cbp_table
[mb_xy
] & 4;
6320 h
->non_zero_count_cache
[scan8
[0+12]]= h
->non_zero_count_cache
[scan8
[1+12]]=
6321 h
->non_zero_count_cache
[scan8
[2+12]]= h
->non_zero_count_cache
[scan8
[3+12]]= h
->cbp_table
[mb_xy
] & 8;
6326 // left mb is in picture
6327 && h
->slice_table
[mb_xy
-1] != 255
6328 // and current and left pair do not have the same interlaced type
6329 && (IS_INTERLACED(mb_type
) != IS_INTERLACED(s
->current_picture
.mb_type
[mb_xy
-1]))
6330 // and left mb is in the same slice if deblocking_filter == 2
6331 && (h
->deblocking_filter
!=2 || h
->slice_table
[mb_xy
-1] == h
->slice_table
[mb_xy
])) {
6332 /* First vertical edge is different in MBAFF frames
6333 * There are 8 different bS to compute and 2 different Qp
6335 const int pair_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
6336 const int left_mb_xy
[2] = { pair_xy
-1, pair_xy
-1+s
->mb_stride
};
6341 int mb_qp
, mbn0_qp
, mbn1_qp
;
6343 first_vertical_edge_done
= 1;
6345 if( IS_INTRA(mb_type
) )
6346 bS
[0] = bS
[1] = bS
[2] = bS
[3] = bS
[4] = bS
[5] = bS
[6] = bS
[7] = 4;
6348 for( i
= 0; i
< 8; i
++ ) {
6349 int mbn_xy
= MB_FIELD
? left_mb_xy
[i
>>2] : left_mb_xy
[i
&1];
6351 if( IS_INTRA( s
->current_picture
.mb_type
[mbn_xy
] ) )
6353 else if( h
->non_zero_count_cache
[12+8*(i
>>1)] != 0 ||
6354 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6355 h
->non_zero_count
[mbn_xy
][MB_FIELD
? i
&3 : (i
>>2)+(mb_y
&1)*2] )
6362 mb_qp
= s
->current_picture
.qscale_table
[mb_xy
];
6363 mbn0_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[0]];
6364 mbn1_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[1]];
6365 qp
[0] = ( mb_qp
+ mbn0_qp
+ 1 ) >> 1;
6366 bqp
[0] = ( get_chroma_qp( h
, 0, mb_qp
) +
6367 get_chroma_qp( h
, 0, mbn0_qp
) + 1 ) >> 1;
6368 rqp
[0] = ( get_chroma_qp( h
, 1, mb_qp
) +
6369 get_chroma_qp( h
, 1, mbn0_qp
) + 1 ) >> 1;
6370 qp
[1] = ( mb_qp
+ mbn1_qp
+ 1 ) >> 1;
6371 bqp
[1] = ( get_chroma_qp( h
, 0, mb_qp
) +
6372 get_chroma_qp( h
, 0, mbn1_qp
) + 1 ) >> 1;
6373 rqp
[1] = ( get_chroma_qp( h
, 1, mb_qp
) +
6374 get_chroma_qp( h
, 1, mbn1_qp
) + 1 ) >> 1;
6377 tprintf(s
->avctx
, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x
, mb_y
, qp
[0], qp
[1], bqp
[0], bqp
[1], rqp
[0], rqp
[1], linesize
, uvlinesize
);
6378 { int i
; for (i
= 0; i
< 8; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6379 filter_mb_mbaff_edgev ( h
, &img_y
[0], linesize
, bS
, qp
);
6380 filter_mb_mbaff_edgecv( h
, &img_cb
[0], uvlinesize
, bS
, bqp
);
6381 filter_mb_mbaff_edgecv( h
, &img_cr
[0], uvlinesize
, bS
, rqp
);
6383 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6384 for( dir
= 0; dir
< 2; dir
++ )
6387 const int mbm_xy
= dir
== 0 ? mb_xy
-1 : h
->top_mb_xy
;
6388 const int mbm_type
= s
->current_picture
.mb_type
[mbm_xy
];
6389 int (*ref2frm
) [64] = h
->ref2frm
[ h
->slice_num
&15 ][0] + (MB_MBAFF
? 20 : 2);
6390 int (*ref2frmm
)[64] = h
->ref2frm
[ h
->slice_table
[mbm_xy
]&15 ][0] + (MB_MBAFF
? 20 : 2);
6391 int start
= h
->slice_table
[mbm_xy
] == 255 ? 1 : 0;
6393 const int edges
= (mb_type
& (MB_TYPE_16x16
|MB_TYPE_SKIP
))
6394 == (MB_TYPE_16x16
|MB_TYPE_SKIP
) ? 1 : 4;
6395 // how often to recheck mv-based bS when iterating between edges
6396 const int mask_edge
= (mb_type
& (MB_TYPE_16x16
| (MB_TYPE_16x8
<< dir
))) ? 3 :
6397 (mb_type
& (MB_TYPE_8x16
>> dir
)) ? 1 : 0;
6398 // how often to recheck mv-based bS when iterating along each edge
6399 const int mask_par0
= mb_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
));
6401 if (first_vertical_edge_done
) {
6403 first_vertical_edge_done
= 0;
6406 if (h
->deblocking_filter
==2 && h
->slice_table
[mbm_xy
] != h
->slice_table
[mb_xy
])
6409 if (FRAME_MBAFF
&& (dir
== 1) && ((mb_y
&1) == 0) && start
== 0
6410 && !IS_INTERLACED(mb_type
)
6411 && IS_INTERLACED(mbm_type
)
6413 // This is a special case in the norm where the filtering must
6414 // be done twice (one each of the field) even if we are in a
6415 // frame macroblock.
6417 static const int nnz_idx
[4] = {4,5,6,3};
6418 unsigned int tmp_linesize
= 2 * linesize
;
6419 unsigned int tmp_uvlinesize
= 2 * uvlinesize
;
6420 int mbn_xy
= mb_xy
- 2 * s
->mb_stride
;
6425 for(j
=0; j
<2; j
++, mbn_xy
+= s
->mb_stride
){
6426 if( IS_INTRA(mb_type
) ||
6427 IS_INTRA(s
->current_picture
.mb_type
[mbn_xy
]) ) {
6428 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 3;
6430 const uint8_t *mbn_nnz
= h
->non_zero_count
[mbn_xy
];
6431 for( i
= 0; i
< 4; i
++ ) {
6432 if( h
->non_zero_count_cache
[scan8
[0]+i
] != 0 ||
6433 mbn_nnz
[nnz_idx
[i
]] != 0 )
6439 // Do not use s->qscale as luma quantizer because it has not the same
6440 // value in IPCM macroblocks.
6441 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6442 tprintf(s
->avctx
, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, tmp_linesize
, tmp_uvlinesize
);
6443 { int i
; for (i
= 0; i
< 4; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6444 filter_mb_edgeh( h
, &img_y
[j
*linesize
], tmp_linesize
, bS
, qp
);
6445 filter_mb_edgech( h
, &img_cb
[j
*uvlinesize
], tmp_uvlinesize
, bS
,
6446 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6447 filter_mb_edgech( h
, &img_cr
[j
*uvlinesize
], tmp_uvlinesize
, bS
,
6448 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6455 for( edge
= start
; edge
< edges
; edge
++ ) {
6456 /* mbn_xy: neighbor macroblock */
6457 const int mbn_xy
= edge
> 0 ? mb_xy
: mbm_xy
;
6458 const int mbn_type
= s
->current_picture
.mb_type
[mbn_xy
];
6459 int (*ref2frmn
)[64] = edge
> 0 ? ref2frm
: ref2frmm
;
6463 if( (edge
&1) && IS_8x8DCT(mb_type
) )
6466 if( IS_INTRA(mb_type
) ||
6467 IS_INTRA(mbn_type
) ) {
6470 if ( (!IS_INTERLACED(mb_type
) && !IS_INTERLACED(mbm_type
))
6471 || ((FRAME_MBAFF
|| (s
->picture_structure
!= PICT_FRAME
)) && (dir
== 0))
6480 bS
[0] = bS
[1] = bS
[2] = bS
[3] = value
;
6485 if( edge
& mask_edge
) {
6486 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 0;
6489 else if( FRAME_MBAFF
&& IS_INTERLACED(mb_type
^ mbn_type
)) {
6490 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 1;
6493 else if( mask_par0
&& (edge
|| (mbn_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
)))) ) {
6494 int b_idx
= 8 + 4 + edge
* (dir
? 8:1);
6495 int bn_idx
= b_idx
- (dir
? 8:1);
6498 for( l
= 0; !v
&& l
< 1 + (h
->slice_type_nos
== FF_B_TYPE
); l
++ ) {
6499 v
|= ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[l
][h
->ref_cache
[l
][bn_idx
]] ||
6500 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6501 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
;
6504 if(h
->slice_type_nos
== FF_B_TYPE
&& v
){
6506 for( l
= 0; !v
&& l
< 2; l
++ ) {
6508 v
|= ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[ln
][h
->ref_cache
[ln
][bn_idx
]] ||
6509 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[ln
][bn_idx
][0] ) >= 4 ||
6510 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[ln
][bn_idx
][1] ) >= mvy_limit
;
6514 bS
[0] = bS
[1] = bS
[2] = bS
[3] = v
;
6520 for( i
= 0; i
< 4; i
++ ) {
6521 int x
= dir
== 0 ? edge
: i
;
6522 int y
= dir
== 0 ? i
: edge
;
6523 int b_idx
= 8 + 4 + x
+ 8*y
;
6524 int bn_idx
= b_idx
- (dir
? 8:1);
6526 if( h
->non_zero_count_cache
[b_idx
] != 0 ||
6527 h
->non_zero_count_cache
[bn_idx
] != 0 ) {
6533 for( l
= 0; l
< 1 + (h
->slice_type_nos
== FF_B_TYPE
); l
++ ) {
6534 if( ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[l
][h
->ref_cache
[l
][bn_idx
]] ||
6535 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6536 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
) {
6542 if(h
->slice_type_nos
== FF_B_TYPE
&& bS
[i
]){
6544 for( l
= 0; l
< 2; l
++ ) {
6546 if( ref2frm
[l
][h
->ref_cache
[l
][b_idx
]] != ref2frmn
[ln
][h
->ref_cache
[ln
][bn_idx
]] ||
6547 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[ln
][bn_idx
][0] ) >= 4 ||
6548 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[ln
][bn_idx
][1] ) >= mvy_limit
) {
6557 if(bS
[0]+bS
[1]+bS
[2]+bS
[3] == 0)
6562 // Do not use s->qscale as luma quantizer because it has not the same
6563 // value in IPCM macroblocks.
6564 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6565 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6566 tprintf(s
->avctx
, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, linesize
, uvlinesize
);
6567 { int i
; for (i
= 0; i
< 4; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6569 filter_mb_edgev( h
, &img_y
[4*edge
], linesize
, bS
, qp
);
6570 if( (edge
&1) == 0 ) {
6571 filter_mb_edgecv( h
, &img_cb
[2*edge
], uvlinesize
, bS
,
6572 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6573 filter_mb_edgecv( h
, &img_cr
[2*edge
], uvlinesize
, bS
,
6574 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6577 filter_mb_edgeh( h
, &img_y
[4*edge
*linesize
], linesize
, bS
, qp
);
6578 if( (edge
&1) == 0 ) {
6579 filter_mb_edgech( h
, &img_cb
[2*edge
*uvlinesize
], uvlinesize
, bS
,
6580 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6581 filter_mb_edgech( h
, &img_cr
[2*edge
*uvlinesize
], uvlinesize
, bS
,
6582 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6589 static int decode_slice(struct AVCodecContext
*avctx
, H264Context
*h
){
6590 MpegEncContext
* const s
= &h
->s
;
6591 const int part_mask
= s
->partitioned_frame
? (AC_END
|AC_ERROR
) : 0x7F;
6595 if( h
->pps
.cabac
) {
6599 align_get_bits( &s
->gb
);
6602 ff_init_cabac_states( &h
->cabac
);
6603 ff_init_cabac_decoder( &h
->cabac
,
6604 s
->gb
.buffer
+ get_bits_count(&s
->gb
)/8,
6605 ( s
->gb
.size_in_bits
- get_bits_count(&s
->gb
) + 7)/8);
6606 /* calculate pre-state */
6607 for( i
= 0; i
< 460; i
++ ) {
6609 if( h
->slice_type_nos
== FF_I_TYPE
)
6610 pre
= av_clip( ((cabac_context_init_I
[i
][0] * s
->qscale
) >>4 ) + cabac_context_init_I
[i
][1], 1, 126 );
6612 pre
= av_clip( ((cabac_context_init_PB
[h
->cabac_init_idc
][i
][0] * s
->qscale
) >>4 ) + cabac_context_init_PB
[h
->cabac_init_idc
][i
][1], 1, 126 );
6615 h
->cabac_state
[i
] = 2 * ( 63 - pre
) + 0;
6617 h
->cabac_state
[i
] = 2 * ( pre
- 64 ) + 1;
6622 int ret
= decode_mb_cabac(h
);
6624 //STOP_TIMER("decode_mb_cabac")
6626 if(ret
>=0) hl_decode_mb(h
);
6628 if( ret
>= 0 && FRAME_MBAFF
) { //FIXME optimal? or let mb_decode decode 16x32 ?
6631 if(ret
>=0) ret
= decode_mb_cabac(h
);
6633 if(ret
>=0) hl_decode_mb(h
);
6636 eos
= get_cabac_terminate( &h
->cabac
);
6638 if( ret
< 0 || h
->cabac
.bytestream
> h
->cabac
.bytestream_end
+ 2) {
6639 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d, bytestream (%td)\n", s
->mb_x
, s
->mb_y
, h
->cabac
.bytestream_end
- h
->cabac
.bytestream
);
6640 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6644 if( ++s
->mb_x
>= s
->mb_width
) {
6646 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6648 if(FIELD_OR_MBAFF_PICTURE
) {
6653 if( eos
|| s
->mb_y
>= s
->mb_height
) {
6654 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6655 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6662 int ret
= decode_mb_cavlc(h
);
6664 if(ret
>=0) hl_decode_mb(h
);
6666 if(ret
>=0 && FRAME_MBAFF
){ //FIXME optimal? or let mb_decode decode 16x32 ?
6668 ret
= decode_mb_cavlc(h
);
6670 if(ret
>=0) hl_decode_mb(h
);
6675 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6676 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6681 if(++s
->mb_x
>= s
->mb_width
){
6683 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6685 if(FIELD_OR_MBAFF_PICTURE
) {
6688 if(s
->mb_y
>= s
->mb_height
){
6689 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6691 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
) {
6692 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6696 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6703 if(get_bits_count(&s
->gb
) >= s
->gb
.size_in_bits
&& s
->mb_skip_run
<=0){
6704 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6705 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
){
6706 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6710 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6719 for(;s
->mb_y
< s
->mb_height
; s
->mb_y
++){
6720 for(;s
->mb_x
< s
->mb_width
; s
->mb_x
++){
6721 int ret
= decode_mb(h
);
6726 av_log(s
->avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6727 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6732 if(++s
->mb_x
>= s
->mb_width
){
6734 if(++s
->mb_y
>= s
->mb_height
){
6735 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6736 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6740 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6747 if(get_bits_count(s
->?gb
) >= s
->gb
?.size_in_bits
){
6748 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6749 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6753 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6760 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6763 return -1; //not reached
6766 static int decode_unregistered_user_data(H264Context
*h
, int size
){
6767 MpegEncContext
* const s
= &h
->s
;
6768 uint8_t user_data
[16+256];
6774 for(i
=0; i
<sizeof(user_data
)-1 && i
<size
; i
++){
6775 user_data
[i
]= get_bits(&s
->gb
, 8);
6779 e
= sscanf(user_data
+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build
);
6780 if(e
==1 && build
>=0)
6781 h
->x264_build
= build
;
6783 if(s
->avctx
->debug
& FF_DEBUG_BUGS
)
6784 av_log(s
->avctx
, AV_LOG_DEBUG
, "user data:\"%s\"\n", user_data
+16);
6787 skip_bits(&s
->gb
, 8);
6792 static int decode_sei(H264Context
*h
){
6793 MpegEncContext
* const s
= &h
->s
;
6795 while(get_bits_count(&s
->gb
) + 16 < s
->gb
.size_in_bits
){
6800 type
+= show_bits(&s
->gb
, 8);
6801 }while(get_bits(&s
->gb
, 8) == 255);
6805 size
+= show_bits(&s
->gb
, 8);
6806 }while(get_bits(&s
->gb
, 8) == 255);
6810 if(decode_unregistered_user_data(h
, size
) < 0)
6814 skip_bits(&s
->gb
, 8*size
);
6817 //FIXME check bits here
6818 align_get_bits(&s
->gb
);
6824 static inline void decode_hrd_parameters(H264Context
*h
, SPS
*sps
){
6825 MpegEncContext
* const s
= &h
->s
;
6827 cpb_count
= get_ue_golomb(&s
->gb
) + 1;
6828 get_bits(&s
->gb
, 4); /* bit_rate_scale */
6829 get_bits(&s
->gb
, 4); /* cpb_size_scale */
6830 for(i
=0; i
<cpb_count
; i
++){
6831 get_ue_golomb(&s
->gb
); /* bit_rate_value_minus1 */
6832 get_ue_golomb(&s
->gb
); /* cpb_size_value_minus1 */
6833 get_bits1(&s
->gb
); /* cbr_flag */
6835 get_bits(&s
->gb
, 5); /* initial_cpb_removal_delay_length_minus1 */
6836 get_bits(&s
->gb
, 5); /* cpb_removal_delay_length_minus1 */
6837 get_bits(&s
->gb
, 5); /* dpb_output_delay_length_minus1 */
6838 get_bits(&s
->gb
, 5); /* time_offset_length */
6841 static inline int decode_vui_parameters(H264Context
*h
, SPS
*sps
){
6842 MpegEncContext
* const s
= &h
->s
;
6843 int aspect_ratio_info_present_flag
;
6844 unsigned int aspect_ratio_idc
;
6845 int nal_hrd_parameters_present_flag
, vcl_hrd_parameters_present_flag
;
6847 aspect_ratio_info_present_flag
= get_bits1(&s
->gb
);
6849 if( aspect_ratio_info_present_flag
) {
6850 aspect_ratio_idc
= get_bits(&s
->gb
, 8);
6851 if( aspect_ratio_idc
== EXTENDED_SAR
) {
6852 sps
->sar
.num
= get_bits(&s
->gb
, 16);
6853 sps
->sar
.den
= get_bits(&s
->gb
, 16);
6854 }else if(aspect_ratio_idc
< sizeof(pixel_aspect
)/sizeof(*pixel_aspect
)){
6855 sps
->sar
= pixel_aspect
[aspect_ratio_idc
];
6857 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal aspect ratio\n");
6864 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6866 if(get_bits1(&s
->gb
)){ /* overscan_info_present_flag */
6867 get_bits1(&s
->gb
); /* overscan_appropriate_flag */
6870 if(get_bits1(&s
->gb
)){ /* video_signal_type_present_flag */
6871 get_bits(&s
->gb
, 3); /* video_format */
6872 get_bits1(&s
->gb
); /* video_full_range_flag */
6873 if(get_bits1(&s
->gb
)){ /* colour_description_present_flag */
6874 get_bits(&s
->gb
, 8); /* colour_primaries */
6875 get_bits(&s
->gb
, 8); /* transfer_characteristics */
6876 get_bits(&s
->gb
, 8); /* matrix_coefficients */
6880 if(get_bits1(&s
->gb
)){ /* chroma_location_info_present_flag */
6881 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_top_field */
6882 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_bottom_field */
6885 sps
->timing_info_present_flag
= get_bits1(&s
->gb
);
6886 if(sps
->timing_info_present_flag
){
6887 sps
->num_units_in_tick
= get_bits_long(&s
->gb
, 32);
6888 sps
->time_scale
= get_bits_long(&s
->gb
, 32);
6889 sps
->fixed_frame_rate_flag
= get_bits1(&s
->gb
);
6892 nal_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
6893 if(nal_hrd_parameters_present_flag
)
6894 decode_hrd_parameters(h
, sps
);
6895 vcl_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
6896 if(vcl_hrd_parameters_present_flag
)
6897 decode_hrd_parameters(h
, sps
);
6898 if(nal_hrd_parameters_present_flag
|| vcl_hrd_parameters_present_flag
)
6899 get_bits1(&s
->gb
); /* low_delay_hrd_flag */
6900 get_bits1(&s
->gb
); /* pic_struct_present_flag */
6902 sps
->bitstream_restriction_flag
= get_bits1(&s
->gb
);
6903 if(sps
->bitstream_restriction_flag
){
6904 unsigned int num_reorder_frames
;
6905 get_bits1(&s
->gb
); /* motion_vectors_over_pic_boundaries_flag */
6906 get_ue_golomb(&s
->gb
); /* max_bytes_per_pic_denom */
6907 get_ue_golomb(&s
->gb
); /* max_bits_per_mb_denom */
6908 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_horizontal */
6909 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_vertical */
6910 num_reorder_frames
= get_ue_golomb(&s
->gb
);
6911 get_ue_golomb(&s
->gb
); /*max_dec_frame_buffering*/
6913 if(num_reorder_frames
> 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6914 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal num_reorder_frames %d\n", num_reorder_frames
);
6918 sps
->num_reorder_frames
= num_reorder_frames
;
6924 static void decode_scaling_list(H264Context
*h
, uint8_t *factors
, int size
,
6925 const uint8_t *jvt_list
, const uint8_t *fallback_list
){
6926 MpegEncContext
* const s
= &h
->s
;
6927 int i
, last
= 8, next
= 8;
6928 const uint8_t *scan
= size
== 16 ? zigzag_scan
: zigzag_scan8x8
;
6929 if(!get_bits1(&s
->gb
)) /* matrix not written, we use the predicted one */
6930 memcpy(factors
, fallback_list
, size
*sizeof(uint8_t));
6932 for(i
=0;i
<size
;i
++){
6934 next
= (last
+ get_se_golomb(&s
->gb
)) & 0xff;
6935 if(!i
&& !next
){ /* matrix not written, we use the preset one */
6936 memcpy(factors
, jvt_list
, size
*sizeof(uint8_t));
6939 last
= factors
[scan
[i
]] = next
? next
: last
;
6943 static void decode_scaling_matrices(H264Context
*h
, SPS
*sps
, PPS
*pps
, int is_sps
,
6944 uint8_t (*scaling_matrix4
)[16], uint8_t (*scaling_matrix8
)[64]){
6945 MpegEncContext
* const s
= &h
->s
;
6946 int fallback_sps
= !is_sps
&& sps
->scaling_matrix_present
;
6947 const uint8_t *fallback
[4] = {
6948 fallback_sps
? sps
->scaling_matrix4
[0] : default_scaling4
[0],
6949 fallback_sps
? sps
->scaling_matrix4
[3] : default_scaling4
[1],
6950 fallback_sps
? sps
->scaling_matrix8
[0] : default_scaling8
[0],
6951 fallback_sps
? sps
->scaling_matrix8
[1] : default_scaling8
[1]
6953 if(get_bits1(&s
->gb
)){
6954 sps
->scaling_matrix_present
|= is_sps
;
6955 decode_scaling_list(h
,scaling_matrix4
[0],16,default_scaling4
[0],fallback
[0]); // Intra, Y
6956 decode_scaling_list(h
,scaling_matrix4
[1],16,default_scaling4
[0],scaling_matrix4
[0]); // Intra, Cr
6957 decode_scaling_list(h
,scaling_matrix4
[2],16,default_scaling4
[0],scaling_matrix4
[1]); // Intra, Cb
6958 decode_scaling_list(h
,scaling_matrix4
[3],16,default_scaling4
[1],fallback
[1]); // Inter, Y
6959 decode_scaling_list(h
,scaling_matrix4
[4],16,default_scaling4
[1],scaling_matrix4
[3]); // Inter, Cr
6960 decode_scaling_list(h
,scaling_matrix4
[5],16,default_scaling4
[1],scaling_matrix4
[4]); // Inter, Cb
6961 if(is_sps
|| pps
->transform_8x8_mode
){
6962 decode_scaling_list(h
,scaling_matrix8
[0],64,default_scaling8
[0],fallback
[2]); // Intra, Y
6963 decode_scaling_list(h
,scaling_matrix8
[1],64,default_scaling8
[1],fallback
[3]); // Inter, Y
6965 } else if(fallback_sps
) {
6966 memcpy(scaling_matrix4
, sps
->scaling_matrix4
, 6*16*sizeof(uint8_t));
6967 memcpy(scaling_matrix8
, sps
->scaling_matrix8
, 2*64*sizeof(uint8_t));
6972 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
6975 alloc_parameter_set(H264Context
*h
, void **vec
, const unsigned int id
, const unsigned int max
,
6976 const size_t size
, const char *name
)
6979 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "%s_id (%d) out of range\n", name
, id
);
6984 vec
[id
] = av_mallocz(size
);
6986 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cannot allocate memory for %s\n", name
);
6991 static inline int decode_seq_parameter_set(H264Context
*h
){
6992 MpegEncContext
* const s
= &h
->s
;
6993 int profile_idc
, level_idc
;
6994 unsigned int sps_id
, tmp
, mb_width
, mb_height
;
6998 profile_idc
= get_bits(&s
->gb
, 8);
6999 get_bits1(&s
->gb
); //constraint_set0_flag
7000 get_bits1(&s
->gb
); //constraint_set1_flag
7001 get_bits1(&s
->gb
); //constraint_set2_flag
7002 get_bits1(&s
->gb
); //constraint_set3_flag
7003 get_bits(&s
->gb
, 4); // reserved
7004 level_idc
= get_bits(&s
->gb
, 8);
7005 sps_id
= get_ue_golomb(&s
->gb
);
7007 sps
= alloc_parameter_set(h
, (void **)h
->sps_buffers
, sps_id
, MAX_SPS_COUNT
, sizeof(SPS
), "sps");
7011 sps
->profile_idc
= profile_idc
;
7012 sps
->level_idc
= level_idc
;
7014 if(sps
->profile_idc
>= 100){ //high profile
7015 sps
->chroma_format_idc
= get_ue_golomb(&s
->gb
);
7016 if(sps
->chroma_format_idc
== 3)
7017 get_bits1(&s
->gb
); //residual_color_transform_flag
7018 get_ue_golomb(&s
->gb
); //bit_depth_luma_minus8
7019 get_ue_golomb(&s
->gb
); //bit_depth_chroma_minus8
7020 sps
->transform_bypass
= get_bits1(&s
->gb
);
7021 decode_scaling_matrices(h
, sps
, NULL
, 1, sps
->scaling_matrix4
, sps
->scaling_matrix8
);
7023 sps
->scaling_matrix_present
= 0;
7024 sps
->chroma_format_idc
= 1;
7027 sps
->log2_max_frame_num
= get_ue_golomb(&s
->gb
) + 4;
7028 sps
->poc_type
= get_ue_golomb(&s
->gb
);
7030 if(sps
->poc_type
== 0){ //FIXME #define
7031 sps
->log2_max_poc_lsb
= get_ue_golomb(&s
->gb
) + 4;
7032 } else if(sps
->poc_type
== 1){//FIXME #define
7033 sps
->delta_pic_order_always_zero_flag
= get_bits1(&s
->gb
);
7034 sps
->offset_for_non_ref_pic
= get_se_golomb(&s
->gb
);
7035 sps
->offset_for_top_to_bottom_field
= get_se_golomb(&s
->gb
);
7036 tmp
= get_ue_golomb(&s
->gb
);
7038 if(tmp
>= sizeof(sps
->offset_for_ref_frame
) / sizeof(sps
->offset_for_ref_frame
[0])){
7039 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "poc_cycle_length overflow %u\n", tmp
);
7042 sps
->poc_cycle_length
= tmp
;
7044 for(i
=0; i
<sps
->poc_cycle_length
; i
++)
7045 sps
->offset_for_ref_frame
[i
]= get_se_golomb(&s
->gb
);
7046 }else if(sps
->poc_type
!= 2){
7047 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal POC type %d\n", sps
->poc_type
);
7051 tmp
= get_ue_golomb(&s
->gb
);
7052 if(tmp
> MAX_PICTURE_COUNT
-2 || tmp
>= 32){
7053 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "too many reference frames\n");
7056 sps
->ref_frame_count
= tmp
;
7057 sps
->gaps_in_frame_num_allowed_flag
= get_bits1(&s
->gb
);
7058 mb_width
= get_ue_golomb(&s
->gb
) + 1;
7059 mb_height
= get_ue_golomb(&s
->gb
) + 1;
7060 if(mb_width
>= INT_MAX
/16 || mb_height
>= INT_MAX
/16 ||
7061 avcodec_check_dimensions(NULL
, 16*mb_width
, 16*mb_height
)){
7062 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_width/height overflow\n");
7065 sps
->mb_width
= mb_width
;
7066 sps
->mb_height
= mb_height
;
7068 sps
->frame_mbs_only_flag
= get_bits1(&s
->gb
);
7069 if(!sps
->frame_mbs_only_flag
)
7070 sps
->mb_aff
= get_bits1(&s
->gb
);
7074 sps
->direct_8x8_inference_flag
= get_bits1(&s
->gb
);
7076 #ifndef ALLOW_INTERLACE
7078 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MBAFF support not included; enable it at compile-time.\n");
7080 if(!sps
->direct_8x8_inference_flag
&& sps
->mb_aff
)
7081 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MBAFF + !direct_8x8_inference is not implemented\n");
7083 sps
->crop
= get_bits1(&s
->gb
);
7085 sps
->crop_left
= get_ue_golomb(&s
->gb
);
7086 sps
->crop_right
= get_ue_golomb(&s
->gb
);
7087 sps
->crop_top
= get_ue_golomb(&s
->gb
);
7088 sps
->crop_bottom
= get_ue_golomb(&s
->gb
);
7089 if(sps
->crop_left
|| sps
->crop_top
){
7090 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "insane cropping not completely supported, this could look slightly wrong ...\n");
7092 if(sps
->crop_right
>= 8 || sps
->crop_bottom
>= (8>> !h
->sps
.frame_mbs_only_flag
)){
7093 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "brainfart cropping not supported, this could look slightly wrong ...\n");
7099 sps
->crop_bottom
= 0;
7102 sps
->vui_parameters_present_flag
= get_bits1(&s
->gb
);
7103 if( sps
->vui_parameters_present_flag
)
7104 decode_vui_parameters(h
, sps
);
7106 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7107 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7108 sps_id
, sps
->profile_idc
, sps
->level_idc
,
7110 sps
->ref_frame_count
,
7111 sps
->mb_width
, sps
->mb_height
,
7112 sps
->frame_mbs_only_flag
? "FRM" : (sps
->mb_aff
? "MB-AFF" : "PIC-AFF"),
7113 sps
->direct_8x8_inference_flag
? "8B8" : "",
7114 sps
->crop_left
, sps
->crop_right
,
7115 sps
->crop_top
, sps
->crop_bottom
,
7116 sps
->vui_parameters_present_flag
? "VUI" : "",
7117 ((const char*[]){"Gray","420","422","444"})[sps
->chroma_format_idc
]
7124 build_qp_table(PPS
*pps
, int t
, int index
)
7127 for(i
= 0; i
< 52; i
++)
7128 pps
->chroma_qp_table
[t
][i
] = chroma_qp
[av_clip(i
+ index
, 0, 51)];
7131 static inline int decode_picture_parameter_set(H264Context
*h
, int bit_length
){
7132 MpegEncContext
* const s
= &h
->s
;
7133 unsigned int tmp
, pps_id
= get_ue_golomb(&s
->gb
);
7136 pps
= alloc_parameter_set(h
, (void **)h
->pps_buffers
, pps_id
, MAX_PPS_COUNT
, sizeof(PPS
), "pps");
7140 tmp
= get_ue_golomb(&s
->gb
);
7141 if(tmp
>=MAX_SPS_COUNT
|| h
->sps_buffers
[tmp
] == NULL
){
7142 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "sps_id out of range\n");
7147 pps
->cabac
= get_bits1(&s
->gb
);
7148 pps
->pic_order_present
= get_bits1(&s
->gb
);
7149 pps
->slice_group_count
= get_ue_golomb(&s
->gb
) + 1;
7150 if(pps
->slice_group_count
> 1 ){
7151 pps
->mb_slice_group_map_type
= get_ue_golomb(&s
->gb
);
7152 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "FMO not supported\n");
7153 switch(pps
->mb_slice_group_map_type
){
7156 | for( i
= 0; i
<= num_slice_groups_minus1
; i
++ ) | | |
7157 | run_length
[ i
] |1 |ue(v
) |
7162 | for( i
= 0; i
< num_slice_groups_minus1
; i
++ ) | | |
7164 | top_left_mb
[ i
] |1 |ue(v
) |
7165 | bottom_right_mb
[ i
] |1 |ue(v
) |
7173 | slice_group_change_direction_flag
|1 |u(1) |
7174 | slice_group_change_rate_minus1
|1 |ue(v
) |
7179 | slice_group_id_cnt_minus1
|1 |ue(v
) |
7180 | for( i
= 0; i
<= slice_group_id_cnt_minus1
; i
++ | | |
7182 | slice_group_id
[ i
] |1 |u(v
) |
7187 pps
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
7188 pps
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
7189 if(pps
->ref_count
[0]-1 > 32-1 || pps
->ref_count
[1]-1 > 32-1){
7190 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow (pps)\n");
7191 pps
->ref_count
[0]= pps
->ref_count
[1]= 1;
7195 pps
->weighted_pred
= get_bits1(&s
->gb
);
7196 pps
->weighted_bipred_idc
= get_bits(&s
->gb
, 2);
7197 pps
->init_qp
= get_se_golomb(&s
->gb
) + 26;
7198 pps
->init_qs
= get_se_golomb(&s
->gb
) + 26;
7199 pps
->chroma_qp_index_offset
[0]= get_se_golomb(&s
->gb
);
7200 pps
->deblocking_filter_parameters_present
= get_bits1(&s
->gb
);
7201 pps
->constrained_intra_pred
= get_bits1(&s
->gb
);
7202 pps
->redundant_pic_cnt_present
= get_bits1(&s
->gb
);
7204 pps
->transform_8x8_mode
= 0;
7205 h
->dequant_coeff_pps
= -1; //contents of sps/pps can change even if id doesn't, so reinit
7206 memset(pps
->scaling_matrix4
, 16, 6*16*sizeof(uint8_t));
7207 memset(pps
->scaling_matrix8
, 16, 2*64*sizeof(uint8_t));
7209 if(get_bits_count(&s
->gb
) < bit_length
){
7210 pps
->transform_8x8_mode
= get_bits1(&s
->gb
);
7211 decode_scaling_matrices(h
, h
->sps_buffers
[pps
->sps_id
], pps
, 0, pps
->scaling_matrix4
, pps
->scaling_matrix8
);
7212 pps
->chroma_qp_index_offset
[1]= get_se_golomb(&s
->gb
); //second_chroma_qp_index_offset
7214 pps
->chroma_qp_index_offset
[1]= pps
->chroma_qp_index_offset
[0];
7217 build_qp_table(pps
, 0, pps
->chroma_qp_index_offset
[0]);
7218 build_qp_table(pps
, 1, pps
->chroma_qp_index_offset
[1]);
7219 if(pps
->chroma_qp_index_offset
[0] != pps
->chroma_qp_index_offset
[1])
7220 h
->pps
.chroma_qp_diff
= 1;
7222 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7223 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7224 pps_id
, pps
->sps_id
,
7225 pps
->cabac
? "CABAC" : "CAVLC",
7226 pps
->slice_group_count
,
7227 pps
->ref_count
[0], pps
->ref_count
[1],
7228 pps
->weighted_pred
? "weighted" : "",
7229 pps
->init_qp
, pps
->init_qs
, pps
->chroma_qp_index_offset
[0], pps
->chroma_qp_index_offset
[1],
7230 pps
->deblocking_filter_parameters_present
? "LPAR" : "",
7231 pps
->constrained_intra_pred
? "CONSTR" : "",
7232 pps
->redundant_pic_cnt_present
? "REDU" : "",
7233 pps
->transform_8x8_mode
? "8x8DCT" : ""
7241 * Call decode_slice() for each context.
7243 * @param h h264 master context
7244 * @param context_count number of contexts to execute
7246 static void execute_decode_slices(H264Context
*h
, int context_count
){
7247 MpegEncContext
* const s
= &h
->s
;
7248 AVCodecContext
* const avctx
= s
->avctx
;
7252 if(context_count
== 1) {
7253 decode_slice(avctx
, h
);
7255 for(i
= 1; i
< context_count
; i
++) {
7256 hx
= h
->thread_context
[i
];
7257 hx
->s
.error_resilience
= avctx
->error_resilience
;
7258 hx
->s
.error_count
= 0;
7261 avctx
->execute(avctx
, (void *)decode_slice
,
7262 (void **)h
->thread_context
, NULL
, context_count
);
7264 /* pull back stuff from slices to master context */
7265 hx
= h
->thread_context
[context_count
- 1];
7266 s
->mb_x
= hx
->s
.mb_x
;
7267 s
->mb_y
= hx
->s
.mb_y
;
7268 s
->dropable
= hx
->s
.dropable
;
7269 s
->picture_structure
= hx
->s
.picture_structure
;
7270 for(i
= 1; i
< context_count
; i
++)
7271 h
->s
.error_count
+= h
->thread_context
[i
]->s
.error_count
;
7276 static int decode_nal_units(H264Context
*h
, const uint8_t *buf
, int buf_size
){
7277 MpegEncContext
* const s
= &h
->s
;
7278 AVCodecContext
* const avctx
= s
->avctx
;
7280 H264Context
*hx
; ///< thread context
7281 int context_count
= 0;
7283 h
->max_contexts
= avctx
->thread_count
;
7286 for(i
=0; i
<50; i
++){
7287 av_log(NULL
, AV_LOG_ERROR
,"%02X ", buf
[i
]);
7290 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
)){
7291 h
->current_slice
= 0;
7292 if (!s
->first_field
)
7293 s
->current_picture_ptr
= NULL
;
7305 if(buf_index
>= buf_size
) break;
7307 for(i
= 0; i
< h
->nal_length_size
; i
++)
7308 nalsize
= (nalsize
<< 8) | buf
[buf_index
++];
7309 if(nalsize
<= 1 || (nalsize
+buf_index
> buf_size
)){
7314 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: nal size %d\n", nalsize
);
7319 // start code prefix search
7320 for(; buf_index
+ 3 < buf_size
; buf_index
++){
7321 // This should always succeed in the first iteration.
7322 if(buf
[buf_index
] == 0 && buf
[buf_index
+1] == 0 && buf
[buf_index
+2] == 1)
7326 if(buf_index
+3 >= buf_size
) break;
7331 hx
= h
->thread_context
[context_count
];
7333 ptr
= decode_nal(hx
, buf
+ buf_index
, &dst_length
, &consumed
, h
->is_avc
? nalsize
: buf_size
- buf_index
);
7334 if (ptr
==NULL
|| dst_length
< 0){
7337 while(ptr
[dst_length
- 1] == 0 && dst_length
> 0)
7339 bit_length
= !dst_length
? 0 : (8*dst_length
- decode_rbsp_trailing(h
, ptr
+ dst_length
- 1));
7341 if(s
->avctx
->debug
&FF_DEBUG_STARTCODE
){
7342 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "NAL %d at %d/%d length %d\n", hx
->nal_unit_type
, buf_index
, buf_size
, dst_length
);
7345 if (h
->is_avc
&& (nalsize
!= consumed
)){
7346 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: Consumed only %d bytes instead of %d\n", consumed
, nalsize
);
7350 buf_index
+= consumed
;
7352 if( (s
->hurry_up
== 1 && h
->nal_ref_idc
== 0) //FIXME do not discard SEI id
7353 ||(avctx
->skip_frame
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
7358 switch(hx
->nal_unit_type
){
7360 if (h
->nal_unit_type
!= NAL_IDR_SLICE
) {
7361 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "Invalid mix of idr and non-idr slices");
7364 idr(h
); //FIXME ensure we don't loose some frames if there is reordering
7366 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
7368 hx
->inter_gb_ptr
= &hx
->s
.gb
;
7369 hx
->s
.data_partitioning
= 0;
7371 if((err
= decode_slice_header(hx
, h
)))
7374 s
->current_picture_ptr
->key_frame
|= (hx
->nal_unit_type
== NAL_IDR_SLICE
);
7375 if(hx
->redundant_pic_count
==0 && hx
->s
.hurry_up
< 5
7376 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| hx
->nal_ref_idc
)
7377 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| hx
->slice_type_nos
!=FF_B_TYPE
)
7378 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| hx
->slice_type_nos
==FF_I_TYPE
)
7379 && avctx
->skip_frame
< AVDISCARD_ALL
)
7383 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
7385 hx
->inter_gb_ptr
= NULL
;
7386 hx
->s
.data_partitioning
= 1;
7388 err
= decode_slice_header(hx
, h
);
7391 init_get_bits(&hx
->intra_gb
, ptr
, bit_length
);
7392 hx
->intra_gb_ptr
= &hx
->intra_gb
;
7395 init_get_bits(&hx
->inter_gb
, ptr
, bit_length
);
7396 hx
->inter_gb_ptr
= &hx
->inter_gb
;
7398 if(hx
->redundant_pic_count
==0 && hx
->intra_gb_ptr
&& hx
->s
.data_partitioning
7399 && s
->context_initialized
7401 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| hx
->nal_ref_idc
)
7402 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| hx
->slice_type_nos
!=FF_B_TYPE
)
7403 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| hx
->slice_type_nos
==FF_I_TYPE
)
7404 && avctx
->skip_frame
< AVDISCARD_ALL
)
7408 init_get_bits(&s
->gb
, ptr
, bit_length
);
7412 init_get_bits(&s
->gb
, ptr
, bit_length
);
7413 decode_seq_parameter_set(h
);
7415 if(s
->flags
& CODEC_FLAG_LOW_DELAY
)
7418 if(avctx
->has_b_frames
< 2)
7419 avctx
->has_b_frames
= !s
->low_delay
;
7422 init_get_bits(&s
->gb
, ptr
, bit_length
);
7424 decode_picture_parameter_set(h
, bit_length
);
7428 case NAL_END_SEQUENCE
:
7429 case NAL_END_STREAM
:
7430 case NAL_FILLER_DATA
:
7432 case NAL_AUXILIARY_SLICE
:
7435 av_log(avctx
, AV_LOG_DEBUG
, "Unknown NAL code: %d (%d bits)\n", h
->nal_unit_type
, bit_length
);
7438 if(context_count
== h
->max_contexts
) {
7439 execute_decode_slices(h
, context_count
);
7444 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "decode_slice_header error\n");
7446 /* Slice could not be decoded in parallel mode, copy down
7447 * NAL unit stuff to context 0 and restart. Note that
7448 * rbsp_buffer is not transferred, but since we no longer
7449 * run in parallel mode this should not be an issue. */
7450 h
->nal_unit_type
= hx
->nal_unit_type
;
7451 h
->nal_ref_idc
= hx
->nal_ref_idc
;
7457 execute_decode_slices(h
, context_count
);
7462 * returns the number of bytes consumed for building the current frame
7464 static int get_consumed_bytes(MpegEncContext
*s
, int pos
, int buf_size
){
7465 if(pos
==0) pos
=1; //avoid infinite loops (i doubt that is needed but ...)
7466 if(pos
+10>buf_size
) pos
=buf_size
; // oops ;)
7471 static int decode_frame(AVCodecContext
*avctx
,
7472 void *data
, int *data_size
,
7473 const uint8_t *buf
, int buf_size
)
7475 H264Context
*h
= avctx
->priv_data
;
7476 MpegEncContext
*s
= &h
->s
;
7477 AVFrame
*pict
= data
;
7480 s
->flags
= avctx
->flags
;
7481 s
->flags2
= avctx
->flags2
;
7483 /* end of stream, output what is still in the buffers */
7484 if (buf_size
== 0) {
7488 //FIXME factorize this with the output code below
7489 out
= h
->delayed_pic
[0];
7491 for(i
=1; h
->delayed_pic
[i
] && (h
->delayed_pic
[i
]->poc
&& !h
->delayed_pic
[i
]->key_frame
); i
++)
7492 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
7493 out
= h
->delayed_pic
[i
];
7497 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
7498 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
7501 *data_size
= sizeof(AVFrame
);
7502 *pict
= *(AVFrame
*)out
;
7508 if(h
->is_avc
&& !h
->got_avcC
) {
7509 int i
, cnt
, nalsize
;
7510 unsigned char *p
= avctx
->extradata
;
7511 if(avctx
->extradata_size
< 7) {
7512 av_log(avctx
, AV_LOG_ERROR
, "avcC too short\n");
7516 av_log(avctx
, AV_LOG_ERROR
, "Unknown avcC version %d\n", *p
);
7519 /* sps and pps in the avcC always have length coded with 2 bytes,
7520 so put a fake nal_length_size = 2 while parsing them */
7521 h
->nal_length_size
= 2;
7522 // Decode sps from avcC
7523 cnt
= *(p
+5) & 0x1f; // Number of sps
7525 for (i
= 0; i
< cnt
; i
++) {
7526 nalsize
= AV_RB16(p
) + 2;
7527 if(decode_nal_units(h
, p
, nalsize
) < 0) {
7528 av_log(avctx
, AV_LOG_ERROR
, "Decoding sps %d from avcC failed\n", i
);
7533 // Decode pps from avcC
7534 cnt
= *(p
++); // Number of pps
7535 for (i
= 0; i
< cnt
; i
++) {
7536 nalsize
= AV_RB16(p
) + 2;
7537 if(decode_nal_units(h
, p
, nalsize
) != nalsize
) {
7538 av_log(avctx
, AV_LOG_ERROR
, "Decoding pps %d from avcC failed\n", i
);
7543 // Now store right nal length size, that will be use to parse all other nals
7544 h
->nal_length_size
= ((*(((char*)(avctx
->extradata
))+4))&0x03)+1;
7545 // Do not reparse avcC
7549 if(avctx
->frame_number
==0 && !h
->is_avc
&& s
->avctx
->extradata_size
){
7550 if(decode_nal_units(h
, s
->avctx
->extradata
, s
->avctx
->extradata_size
) < 0)
7554 buf_index
=decode_nal_units(h
, buf
, buf_size
);
7558 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
) && !s
->current_picture_ptr
){
7559 if (avctx
->skip_frame
>= AVDISCARD_NONREF
|| s
->hurry_up
) return 0;
7560 av_log(avctx
, AV_LOG_ERROR
, "no frame!\n");
7564 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
) || (s
->mb_y
>= s
->mb_height
&& s
->mb_height
)){
7565 Picture
*out
= s
->current_picture_ptr
;
7566 Picture
*cur
= s
->current_picture_ptr
;
7567 int i
, pics
, cross_idr
, out_of_order
, out_idx
;
7571 s
->current_picture_ptr
->qscale_type
= FF_QSCALE_TYPE_H264
;
7572 s
->current_picture_ptr
->pict_type
= s
->pict_type
;
7575 execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
7576 h
->prev_poc_msb
= h
->poc_msb
;
7577 h
->prev_poc_lsb
= h
->poc_lsb
;
7579 h
->prev_frame_num_offset
= h
->frame_num_offset
;
7580 h
->prev_frame_num
= h
->frame_num
;
7583 * FIXME: Error handling code does not seem to support interlaced
7584 * when slices span multiple rows
7585 * The ff_er_add_slice calls don't work right for bottom
7586 * fields; they cause massive erroneous error concealing
7587 * Error marking covers both fields (top and bottom).
7588 * This causes a mismatched s->error_count
7589 * and a bad error table. Further, the error count goes to
7590 * INT_MAX when called for bottom field, because mb_y is
7591 * past end by one (callers fault) and resync_mb_y != 0
7592 * causes problems for the first MB line, too.
7599 if (cur
->field_poc
[0]==INT_MAX
|| cur
->field_poc
[1]==INT_MAX
) {
7600 /* Wait for second field. */
7604 cur
->interlaced_frame
= FIELD_OR_MBAFF_PICTURE
;
7605 /* Derive top_field_first from field pocs. */
7606 cur
->top_field_first
= cur
->field_poc
[0] < cur
->field_poc
[1];
7608 //FIXME do something with unavailable reference frames
7610 /* Sort B-frames into display order */
7612 if(h
->sps
.bitstream_restriction_flag
7613 && s
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
){
7614 s
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
7618 if( s
->avctx
->strict_std_compliance
>= FF_COMPLIANCE_STRICT
7619 && !h
->sps
.bitstream_restriction_flag
){
7620 s
->avctx
->has_b_frames
= MAX_DELAYED_PIC_COUNT
;
7625 while(h
->delayed_pic
[pics
]) pics
++;
7627 assert(pics
<= MAX_DELAYED_PIC_COUNT
);
7629 h
->delayed_pic
[pics
++] = cur
;
7630 if(cur
->reference
== 0)
7631 cur
->reference
= DELAYED_PIC_REF
;
7633 out
= h
->delayed_pic
[0];
7635 for(i
=1; h
->delayed_pic
[i
] && (h
->delayed_pic
[i
]->poc
&& !h
->delayed_pic
[i
]->key_frame
); i
++)
7636 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
7637 out
= h
->delayed_pic
[i
];
7640 cross_idr
= !h
->delayed_pic
[0]->poc
|| !!h
->delayed_pic
[i
] || h
->delayed_pic
[0]->key_frame
;
7642 out_of_order
= !cross_idr
&& out
->poc
< h
->outputed_poc
;
7644 if(h
->sps
.bitstream_restriction_flag
&& s
->avctx
->has_b_frames
>= h
->sps
.num_reorder_frames
)
7646 else if((out_of_order
&& pics
-1 == s
->avctx
->has_b_frames
&& s
->avctx
->has_b_frames
< MAX_DELAYED_PIC_COUNT
)
7648 ((!cross_idr
&& out
->poc
> h
->outputed_poc
+ 2)
7649 || cur
->pict_type
== FF_B_TYPE
)))
7652 s
->avctx
->has_b_frames
++;
7655 if(out_of_order
|| pics
> s
->avctx
->has_b_frames
){
7656 out
->reference
&= ~DELAYED_PIC_REF
;
7657 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
7658 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
7660 if(!out_of_order
&& pics
> s
->avctx
->has_b_frames
){
7661 *data_size
= sizeof(AVFrame
);
7663 h
->outputed_poc
= out
->poc
;
7664 *pict
= *(AVFrame
*)out
;
7666 av_log(avctx
, AV_LOG_DEBUG
, "no picture\n");
7671 assert(pict
->data
[0] || !*data_size
);
7672 ff_print_debug_info(s
, pict
);
7673 //printf("out %d\n", (int)pict->data[0]);
7676 /* Return the Picture timestamp as the frame number */
7677 /* we subtract 1 because it is added on utils.c */
7678 avctx
->frame_number
= s
->picture_number
- 1;
7680 return get_consumed_bytes(s
, buf_index
, buf_size
);
7683 static inline void fill_mb_avail(H264Context
*h
){
7684 MpegEncContext
* const s
= &h
->s
;
7685 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
7688 h
->mb_avail
[0]= s
->mb_x
&& h
->slice_table
[mb_xy
- s
->mb_stride
- 1] == h
->slice_num
;
7689 h
->mb_avail
[1]= h
->slice_table
[mb_xy
- s
->mb_stride
] == h
->slice_num
;
7690 h
->mb_avail
[2]= s
->mb_x
+1 < s
->mb_width
&& h
->slice_table
[mb_xy
- s
->mb_stride
+ 1] == h
->slice_num
;
7696 h
->mb_avail
[3]= s
->mb_x
&& h
->slice_table
[mb_xy
- 1] == h
->slice_num
;
7697 h
->mb_avail
[4]= 1; //FIXME move out
7698 h
->mb_avail
[5]= 0; //FIXME move out
7706 #define SIZE (COUNT*40)
7712 // int int_temp[10000];
7714 AVCodecContext avctx
;
7716 dsputil_init(&dsp
, &avctx
);
7718 init_put_bits(&pb
, temp
, SIZE
);
7719 printf("testing unsigned exp golomb\n");
7720 for(i
=0; i
<COUNT
; i
++){
7722 set_ue_golomb(&pb
, i
);
7723 STOP_TIMER("set_ue_golomb");
7725 flush_put_bits(&pb
);
7727 init_get_bits(&gb
, temp
, 8*SIZE
);
7728 for(i
=0; i
<COUNT
; i
++){
7731 s
= show_bits(&gb
, 24);
7734 j
= get_ue_golomb(&gb
);
7736 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7739 STOP_TIMER("get_ue_golomb");
7743 init_put_bits(&pb
, temp
, SIZE
);
7744 printf("testing signed exp golomb\n");
7745 for(i
=0; i
<COUNT
; i
++){
7747 set_se_golomb(&pb
, i
- COUNT
/2);
7748 STOP_TIMER("set_se_golomb");
7750 flush_put_bits(&pb
);
7752 init_get_bits(&gb
, temp
, 8*SIZE
);
7753 for(i
=0; i
<COUNT
; i
++){
7756 s
= show_bits(&gb
, 24);
7759 j
= get_se_golomb(&gb
);
7760 if(j
!= i
- COUNT
/2){
7761 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7764 STOP_TIMER("get_se_golomb");
7768 printf("testing 4x4 (I)DCT\n");
7771 uint8_t src
[16], ref
[16];
7772 uint64_t error
= 0, max_error
=0;
7774 for(i
=0; i
<COUNT
; i
++){
7776 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7777 for(j
=0; j
<16; j
++){
7778 ref
[j
]= random()%255;
7779 src
[j
]= random()%255;
7782 h264_diff_dct_c(block
, src
, ref
, 4);
7785 for(j
=0; j
<16; j
++){
7786 // printf("%d ", block[j]);
7787 block
[j
]= block
[j
]*4;
7788 if(j
&1) block
[j
]= (block
[j
]*4 + 2)/5;
7789 if(j
&4) block
[j
]= (block
[j
]*4 + 2)/5;
7793 s
->dsp
.h264_idct_add(ref
, block
, 4);
7794 /* for(j=0; j<16; j++){
7795 printf("%d ", ref[j]);
7799 for(j
=0; j
<16; j
++){
7800 int diff
= FFABS(src
[j
] - ref
[j
]);
7803 max_error
= FFMAX(max_error
, diff
);
7806 printf("error=%f max_error=%d\n", ((float)error
)/COUNT
/16, (int)max_error
);
7807 printf("testing quantizer\n");
7808 for(qp
=0; qp
<52; qp
++){
7810 src1_block
[i
]= src2_block
[i
]= random()%255;
7813 printf("Testing NAL layer\n");
7815 uint8_t bitstream
[COUNT
];
7816 uint8_t nal
[COUNT
*2];
7818 memset(&h
, 0, sizeof(H264Context
));
7820 for(i
=0; i
<COUNT
; i
++){
7828 for(j
=0; j
<COUNT
; j
++){
7829 bitstream
[j
]= (random() % 255) + 1;
7832 for(j
=0; j
<zeros
; j
++){
7833 int pos
= random() % COUNT
;
7834 while(bitstream
[pos
] == 0){
7843 nal_length
= encode_nal(&h
, nal
, bitstream
, COUNT
, COUNT
*2);
7845 printf("encoding failed\n");
7849 out
= decode_nal(&h
, nal
, &out_length
, &consumed
, nal_length
);
7853 if(out_length
!= COUNT
){
7854 printf("incorrect length %d %d\n", out_length
, COUNT
);
7858 if(consumed
!= nal_length
){
7859 printf("incorrect consumed length %d %d\n", nal_length
, consumed
);
7863 if(memcmp(bitstream
, out
, COUNT
)){
7864 printf("mismatch\n");
7870 printf("Testing RBSP\n");
7878 static av_cold
int decode_end(AVCodecContext
*avctx
)
7880 H264Context
*h
= avctx
->priv_data
;
7881 MpegEncContext
*s
= &h
->s
;
7883 av_freep(&h
->rbsp_buffer
[0]);
7884 av_freep(&h
->rbsp_buffer
[1]);
7885 free_tables(h
); //FIXME cleanup init stuff perhaps
7888 // memset(h, 0, sizeof(H264Context));
7894 AVCodec h264_decoder
= {
7898 sizeof(H264Context
),
7903 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1
| CODEC_CAP_DELAY
,
7905 .long_name
= NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),