2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
43 * Value of Picture.reference when Picture is not a reference picture, but
44 * is held for delayed output.
46 #define DELAYED_PIC_REF 4
48 static VLC coeff_token_vlc
[4];
49 static VLC chroma_dc_coeff_token_vlc
;
51 static VLC total_zeros_vlc
[15];
52 static VLC chroma_dc_total_zeros_vlc
[3];
54 static VLC run_vlc
[6];
57 static void svq3_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
);
58 static void svq3_add_idct_c(uint8_t *dst
, DCTELEM
*block
, int stride
, int qp
, int dc
);
59 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
60 static void filter_mb_fast( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
62 static av_always_inline
uint32_t pack16to32(int a
, int b
){
63 #ifdef WORDS_BIGENDIAN
64 return (b
&0xFFFF) + (a
<<16);
66 return (a
&0xFFFF) + (b
<<16);
70 const uint8_t ff_rem6
[52]={
71 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
74 const uint8_t ff_div6
[52]={
75 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
79 static void fill_caches(H264Context
*h
, int mb_type
, int for_deblock
){
80 MpegEncContext
* const s
= &h
->s
;
81 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
82 int topleft_xy
, top_xy
, topright_xy
, left_xy
[2];
83 int topleft_type
, top_type
, topright_type
, left_type
[2];
85 int topleft_partition
= -1;
88 top_xy
= mb_xy
- (s
->mb_stride
<< FIELD_PICTURE
);
90 //FIXME deblocking could skip the intra and nnz parts.
91 if(for_deblock
&& (h
->slice_num
== 1 || h
->slice_table
[mb_xy
] == h
->slice_table
[top_xy
]) && !FRAME_MBAFF
)
94 /* Wow, what a mess, why didn't they simplify the interlacing & intra
95 * stuff, I can't imagine that these complex rules are worth it. */
97 topleft_xy
= top_xy
- 1;
98 topright_xy
= top_xy
+ 1;
99 left_xy
[1] = left_xy
[0] = mb_xy
-1;
109 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
110 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
111 const int topleft_pair_xy
= top_pair_xy
- 1;
112 const int topright_pair_xy
= top_pair_xy
+ 1;
113 const int topleft_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topleft_pair_xy
]);
114 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
115 const int topright_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topright_pair_xy
]);
116 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
117 const int curr_mb_frame_flag
= !IS_INTERLACED(mb_type
);
118 const int bottom
= (s
->mb_y
& 1);
119 tprintf(s
->avctx
, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag
, left_mb_frame_flag
, topleft_mb_frame_flag
, top_mb_frame_flag
, topright_mb_frame_flag
);
121 ? !curr_mb_frame_flag
// bottom macroblock
122 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
124 top_xy
-= s
->mb_stride
;
127 ? !curr_mb_frame_flag
// bottom macroblock
128 : (!curr_mb_frame_flag
&& !topleft_mb_frame_flag
) // top macroblock
130 topleft_xy
-= s
->mb_stride
;
131 } else if(bottom
&& curr_mb_frame_flag
&& !left_mb_frame_flag
) {
132 topleft_xy
+= s
->mb_stride
;
133 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
134 topleft_partition
= 0;
137 ? !curr_mb_frame_flag
// bottom macroblock
138 : (!curr_mb_frame_flag
&& !topright_mb_frame_flag
) // top macroblock
140 topright_xy
-= s
->mb_stride
;
142 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
143 left_xy
[1] = left_xy
[0] = pair_xy
- 1;
144 if (curr_mb_frame_flag
) {
165 left_xy
[1] += s
->mb_stride
;
178 h
->top_mb_xy
= top_xy
;
179 h
->left_mb_xy
[0] = left_xy
[0];
180 h
->left_mb_xy
[1] = left_xy
[1];
184 top_type
= h
->slice_table
[top_xy
] < 255 ? s
->current_picture
.mb_type
[top_xy
] : 0;
185 left_type
[0] = h
->slice_table
[left_xy
[0] ] < 255 ? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
186 left_type
[1] = h
->slice_table
[left_xy
[1] ] < 255 ? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
188 if(FRAME_MBAFF
&& !IS_INTRA(mb_type
)){
190 int v
= *(uint16_t*)&h
->non_zero_count
[mb_xy
][14];
192 h
->non_zero_count_cache
[scan8
[i
]] = (v
>>i
)&1;
193 for(list
=0; list
<h
->list_count
; list
++){
194 if(USES_LIST(mb_type
,list
)){
195 uint32_t *src
= (uint32_t*)s
->current_picture
.motion_val
[list
][h
->mb2b_xy
[mb_xy
]];
196 uint32_t *dst
= (uint32_t*)h
->mv_cache
[list
][scan8
[0]];
197 int8_t *ref
= &s
->current_picture
.ref_index
[list
][h
->mb2b8_xy
[mb_xy
]];
198 for(i
=0; i
<4; i
++, dst
+=8, src
+=h
->b_stride
){
204 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 0]] =
205 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 2]] = pack16to32(ref
[0],ref
[1])*0x0101;
207 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 8]] =
208 *(uint32_t*)&h
->ref_cache
[list
][scan8
[10]] = pack16to32(ref
[0],ref
[1])*0x0101;
210 fill_rectangle(&h
-> mv_cache
[list
][scan8
[ 0]], 4, 4, 8, 0, 4);
211 fill_rectangle(&h
->ref_cache
[list
][scan8
[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED
, 1);
216 topleft_type
= h
->slice_table
[topleft_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topleft_xy
] : 0;
217 top_type
= h
->slice_table
[top_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[top_xy
] : 0;
218 topright_type
= h
->slice_table
[topright_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topright_xy
]: 0;
219 left_type
[0] = h
->slice_table
[left_xy
[0] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
220 left_type
[1] = h
->slice_table
[left_xy
[1] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
223 if(IS_INTRA(mb_type
)){
224 h
->topleft_samples_available
=
225 h
->top_samples_available
=
226 h
->left_samples_available
= 0xFFFF;
227 h
->topright_samples_available
= 0xEEEA;
229 if(!IS_INTRA(top_type
) && (top_type
==0 || h
->pps
.constrained_intra_pred
)){
230 h
->topleft_samples_available
= 0xB3FF;
231 h
->top_samples_available
= 0x33FF;
232 h
->topright_samples_available
= 0x26EA;
235 if(!IS_INTRA(left_type
[i
]) && (left_type
[i
]==0 || h
->pps
.constrained_intra_pred
)){
236 h
->topleft_samples_available
&= 0xDF5F;
237 h
->left_samples_available
&= 0x5F5F;
241 if(!IS_INTRA(topleft_type
) && (topleft_type
==0 || h
->pps
.constrained_intra_pred
))
242 h
->topleft_samples_available
&= 0x7FFF;
244 if(!IS_INTRA(topright_type
) && (topright_type
==0 || h
->pps
.constrained_intra_pred
))
245 h
->topright_samples_available
&= 0xFBFF;
247 if(IS_INTRA4x4(mb_type
)){
248 if(IS_INTRA4x4(top_type
)){
249 h
->intra4x4_pred_mode_cache
[4+8*0]= h
->intra4x4_pred_mode
[top_xy
][4];
250 h
->intra4x4_pred_mode_cache
[5+8*0]= h
->intra4x4_pred_mode
[top_xy
][5];
251 h
->intra4x4_pred_mode_cache
[6+8*0]= h
->intra4x4_pred_mode
[top_xy
][6];
252 h
->intra4x4_pred_mode_cache
[7+8*0]= h
->intra4x4_pred_mode
[top_xy
][3];
255 if(!top_type
|| (IS_INTER(top_type
) && h
->pps
.constrained_intra_pred
))
260 h
->intra4x4_pred_mode_cache
[4+8*0]=
261 h
->intra4x4_pred_mode_cache
[5+8*0]=
262 h
->intra4x4_pred_mode_cache
[6+8*0]=
263 h
->intra4x4_pred_mode_cache
[7+8*0]= pred
;
266 if(IS_INTRA4x4(left_type
[i
])){
267 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[0+2*i
]];
268 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[1+2*i
]];
271 if(!left_type
[i
] || (IS_INTER(left_type
[i
]) && h
->pps
.constrained_intra_pred
))
276 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]=
277 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= pred
;
292 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
294 h
->non_zero_count_cache
[4+8*0]= h
->non_zero_count
[top_xy
][4];
295 h
->non_zero_count_cache
[5+8*0]= h
->non_zero_count
[top_xy
][5];
296 h
->non_zero_count_cache
[6+8*0]= h
->non_zero_count
[top_xy
][6];
297 h
->non_zero_count_cache
[7+8*0]= h
->non_zero_count
[top_xy
][3];
299 h
->non_zero_count_cache
[1+8*0]= h
->non_zero_count
[top_xy
][9];
300 h
->non_zero_count_cache
[2+8*0]= h
->non_zero_count
[top_xy
][8];
302 h
->non_zero_count_cache
[1+8*3]= h
->non_zero_count
[top_xy
][12];
303 h
->non_zero_count_cache
[2+8*3]= h
->non_zero_count
[top_xy
][11];
306 h
->non_zero_count_cache
[4+8*0]=
307 h
->non_zero_count_cache
[5+8*0]=
308 h
->non_zero_count_cache
[6+8*0]=
309 h
->non_zero_count_cache
[7+8*0]=
311 h
->non_zero_count_cache
[1+8*0]=
312 h
->non_zero_count_cache
[2+8*0]=
314 h
->non_zero_count_cache
[1+8*3]=
315 h
->non_zero_count_cache
[2+8*3]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
319 for (i
=0; i
<2; i
++) {
321 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[0+2*i
]];
322 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[1+2*i
]];
323 h
->non_zero_count_cache
[0+8*1 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[4+2*i
]];
324 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[5+2*i
]];
326 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]=
327 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]=
328 h
->non_zero_count_cache
[0+8*1 + 8*i
]=
329 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
336 h
->top_cbp
= h
->cbp_table
[top_xy
];
337 } else if(IS_INTRA(mb_type
)) {
344 h
->left_cbp
= h
->cbp_table
[left_xy
[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type
)) {
351 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[0]]>>((left_block
[0]&(~1))+1))&0x1) << 1;
354 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[1]]>>((left_block
[2]&(~1))+1))&0x1) << 3;
359 if(IS_INTER(mb_type
) || IS_DIRECT(mb_type
)){
361 for(list
=0; list
<h
->list_count
; list
++){
362 if(!USES_LIST(mb_type
, list
) && !IS_DIRECT(mb_type
) && !h
->deblocking_filter
){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
370 h
->mv_cache_clean
[list
]= 0;
372 if(USES_LIST(top_type
, list
)){
373 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
374 const int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
375 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0];
376 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 1];
377 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2];
378 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 3];
379 h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]=
380 h
->ref_cache
[list
][scan8
[0] + 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 0];
381 h
->ref_cache
[list
][scan8
[0] + 2 - 1*8]=
382 h
->ref_cache
[list
][scan8
[0] + 3 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 1];
384 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]=
385 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]=
386 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]=
387 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]= ((top_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
)&0xFF)*0x01010101;
392 int cache_idx
= scan8
[0] - 1 + i
*2*8;
393 if(USES_LIST(left_type
[i
], list
)){
394 const int b_xy
= h
->mb2b_xy
[left_xy
[i
]] + 3;
395 const int b8_xy
= h
->mb2b8_xy
[left_xy
[i
]] + 1;
396 *(uint32_t*)h
->mv_cache
[list
][cache_idx
]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[0+i
*2]];
397 *(uint32_t*)h
->mv_cache
[list
][cache_idx
+8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[1+i
*2]];
398 h
->ref_cache
[list
][cache_idx
]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[0+i
*2]>>1)];
399 h
->ref_cache
[list
][cache_idx
+8]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[1+i
*2]>>1)];
401 *(uint32_t*)h
->mv_cache
[list
][cache_idx
]=
402 *(uint32_t*)h
->mv_cache
[list
][cache_idx
+8]= 0;
403 h
->ref_cache
[list
][cache_idx
]=
404 h
->ref_cache
[list
][cache_idx
+8]= left_type
[i
] ? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
408 if((for_deblock
|| (IS_DIRECT(mb_type
) && !h
->direct_spatial_mv_pred
)) && !FRAME_MBAFF
)
411 if(USES_LIST(topleft_type
, list
)){
412 const int b_xy
= h
->mb2b_xy
[topleft_xy
] + 3 + h
->b_stride
+ (topleft_partition
& 2*h
->b_stride
);
413 const int b8_xy
= h
->mb2b8_xy
[topleft_xy
] + 1 + (topleft_partition
& h
->b8_stride
);
414 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
415 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
417 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= 0;
418 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= topleft_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
421 if(USES_LIST(topright_type
, list
)){
422 const int b_xy
= h
->mb2b_xy
[topright_xy
] + 3*h
->b_stride
;
423 const int b8_xy
= h
->mb2b8_xy
[topright_xy
] + h
->b8_stride
;
424 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
425 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
427 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= 0;
428 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= topright_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
431 if((IS_SKIP(mb_type
) || IS_DIRECT(mb_type
)) && !FRAME_MBAFF
)
434 h
->ref_cache
[list
][scan8
[5 ]+1] =
435 h
->ref_cache
[list
][scan8
[7 ]+1] =
436 h
->ref_cache
[list
][scan8
[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h
->ref_cache
[list
][scan8
[4 ]] =
438 h
->ref_cache
[list
][scan8
[12]] = PART_NOT_AVAILABLE
;
439 *(uint32_t*)h
->mv_cache
[list
][scan8
[5 ]+1]=
440 *(uint32_t*)h
->mv_cache
[list
][scan8
[7 ]+1]=
441 *(uint32_t*)h
->mv_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h
->mv_cache
[list
][scan8
[4 ]]=
443 *(uint32_t*)h
->mv_cache
[list
][scan8
[12]]= 0;
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type
, list
)){
448 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
449 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 0];
450 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 1];
451 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 2];
452 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 3];
454 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]=
455 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]=
456 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]=
457 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type
[0], list
)){
460 const int b_xy
= h
->mb2b_xy
[left_xy
[0]] + 3;
461 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[0]];
462 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[1]];
464 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]=
465 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type
[1], list
)){
468 const int b_xy
= h
->mb2b_xy
[left_xy
[1]] + 3;
469 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[2]];
470 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[3]];
472 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]=
473 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h
->mvd_cache
[list
][scan8
[5 ]+1]=
476 *(uint32_t*)h
->mvd_cache
[list
][scan8
[7 ]+1]=
477 *(uint32_t*)h
->mvd_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h
->mvd_cache
[list
][scan8
[4 ]]=
479 *(uint32_t*)h
->mvd_cache
[list
][scan8
[12]]= 0;
481 if(h
->slice_type
== FF_B_TYPE
){
482 fill_rectangle(&h
->direct_cache
[scan8
[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type
)){
485 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type
)){
487 int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
488 h
->direct_cache
[scan8
[0] + 0 - 1*8]= h
->direct_table
[b8_xy
];
489 h
->direct_cache
[scan8
[0] + 2 - 1*8]= h
->direct_table
[b8_xy
+ 1];
491 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type
[0]))
495 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type
[0]))
497 h
->direct_cache
[scan8
[0] - 1 + 0*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[0]] + 1 + h
->b8_stride
*(left_block
[0]>>1)];
499 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type
[1]))
502 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type
[1]))
504 h
->direct_cache
[scan8
[0] - 1 + 2*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[1]] + 1 + h
->b8_stride
*(left_block
[2]>>1)];
506 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 0;
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
546 h
->neighbor_transform_size
= !!IS_8x8DCT(top_type
) + !!IS_8x8DCT(left_type
[0]);
549 static inline void write_back_intra_pred_mode(H264Context
*h
){
550 MpegEncContext
* const s
= &h
->s
;
551 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
553 h
->intra4x4_pred_mode
[mb_xy
][0]= h
->intra4x4_pred_mode_cache
[7+8*1];
554 h
->intra4x4_pred_mode
[mb_xy
][1]= h
->intra4x4_pred_mode_cache
[7+8*2];
555 h
->intra4x4_pred_mode
[mb_xy
][2]= h
->intra4x4_pred_mode_cache
[7+8*3];
556 h
->intra4x4_pred_mode
[mb_xy
][3]= h
->intra4x4_pred_mode_cache
[7+8*4];
557 h
->intra4x4_pred_mode
[mb_xy
][4]= h
->intra4x4_pred_mode_cache
[4+8*4];
558 h
->intra4x4_pred_mode
[mb_xy
][5]= h
->intra4x4_pred_mode_cache
[5+8*4];
559 h
->intra4x4_pred_mode
[mb_xy
][6]= h
->intra4x4_pred_mode_cache
[6+8*4];
563 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
565 static inline int check_intra4x4_pred_mode(H264Context
*h
){
566 MpegEncContext
* const s
= &h
->s
;
567 static const int8_t top
[12]= {-1, 0,LEFT_DC_PRED
,-1,-1,-1,-1,-1, 0};
568 static const int8_t left
[12]= { 0,-1, TOP_DC_PRED
, 0,-1,-1,-1, 0,-1,DC_128_PRED
};
571 if(!(h
->top_samples_available
&0x8000)){
573 int status
= top
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + i
] ];
575 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
578 h
->intra4x4_pred_mode_cache
[scan8
[0] + i
]= status
;
583 if(!(h
->left_samples_available
&0x8000)){
585 int status
= left
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
] ];
587 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
590 h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
]= status
;
596 } //FIXME cleanup like next
599 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
601 static inline int check_intra_pred_mode(H264Context
*h
, int mode
){
602 MpegEncContext
* const s
= &h
->s
;
603 static const int8_t top
[7]= {LEFT_DC_PRED8x8
, 1,-1,-1};
604 static const int8_t left
[7]= { TOP_DC_PRED8x8
,-1, 2,-1,DC_128_PRED8x8
};
607 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "out of range intra chroma pred mode at %d %d\n", s
->mb_x
, s
->mb_y
);
611 if(!(h
->top_samples_available
&0x8000)){
614 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
619 if(!(h
->left_samples_available
&0x8000)){
622 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
631 * gets the predicted intra4x4 prediction mode.
633 static inline int pred_intra_mode(H264Context
*h
, int n
){
634 const int index8
= scan8
[n
];
635 const int left
= h
->intra4x4_pred_mode_cache
[index8
- 1];
636 const int top
= h
->intra4x4_pred_mode_cache
[index8
- 8];
637 const int min
= FFMIN(left
, top
);
639 tprintf(h
->s
.avctx
, "mode:%d %d min:%d\n", left
,top
, min
);
641 if(min
<0) return DC_PRED
;
645 static inline void write_back_non_zero_count(H264Context
*h
){
646 MpegEncContext
* const s
= &h
->s
;
647 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
649 h
->non_zero_count
[mb_xy
][0]= h
->non_zero_count_cache
[7+8*1];
650 h
->non_zero_count
[mb_xy
][1]= h
->non_zero_count_cache
[7+8*2];
651 h
->non_zero_count
[mb_xy
][2]= h
->non_zero_count_cache
[7+8*3];
652 h
->non_zero_count
[mb_xy
][3]= h
->non_zero_count_cache
[7+8*4];
653 h
->non_zero_count
[mb_xy
][4]= h
->non_zero_count_cache
[4+8*4];
654 h
->non_zero_count
[mb_xy
][5]= h
->non_zero_count_cache
[5+8*4];
655 h
->non_zero_count
[mb_xy
][6]= h
->non_zero_count_cache
[6+8*4];
657 h
->non_zero_count
[mb_xy
][9]= h
->non_zero_count_cache
[1+8*2];
658 h
->non_zero_count
[mb_xy
][8]= h
->non_zero_count_cache
[2+8*2];
659 h
->non_zero_count
[mb_xy
][7]= h
->non_zero_count_cache
[2+8*1];
661 h
->non_zero_count
[mb_xy
][12]=h
->non_zero_count_cache
[1+8*5];
662 h
->non_zero_count
[mb_xy
][11]=h
->non_zero_count_cache
[2+8*5];
663 h
->non_zero_count
[mb_xy
][10]=h
->non_zero_count_cache
[2+8*4];
666 // store all luma nnzs, for deblocking
669 v
+= (!!h
->non_zero_count_cache
[scan8
[i
]]) << i
;
670 *(uint16_t*)&h
->non_zero_count
[mb_xy
][14] = v
;
675 * gets the predicted number of non zero coefficients.
676 * @param n block index
678 static inline int pred_non_zero_count(H264Context
*h
, int n
){
679 const int index8
= scan8
[n
];
680 const int left
= h
->non_zero_count_cache
[index8
- 1];
681 const int top
= h
->non_zero_count_cache
[index8
- 8];
684 if(i
<64) i
= (i
+1)>>1;
686 tprintf(h
->s
.avctx
, "pred_nnz L%X T%X n%d s%d P%X\n", left
, top
, n
, scan8
[n
], i
&31);
691 static inline int fetch_diagonal_mv(H264Context
*h
, const int16_t **C
, int i
, int list
, int part_width
){
692 const int topright_ref
= h
->ref_cache
[list
][ i
- 8 + part_width
];
693 MpegEncContext
*s
= &h
->s
;
695 /* there is no consistent mapping of mvs to neighboring locations that will
696 * make mbaff happy, so we can't move all this logic to fill_caches */
698 const uint32_t *mb_types
= s
->current_picture_ptr
->mb_type
;
700 *(uint32_t*)h
->mv_cache
[list
][scan8
[0]-2] = 0;
701 *C
= h
->mv_cache
[list
][scan8
[0]-2];
704 && (s
->mb_y
&1) && i
< scan8
[0]+8 && topright_ref
!= PART_NOT_AVAILABLE
){
705 int topright_xy
= s
->mb_x
+ (s
->mb_y
-1)*s
->mb_stride
+ (i
== scan8
[0]+3);
706 if(IS_INTERLACED(mb_types
[topright_xy
])){
707 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
708 const int x4 = X4, y4 = Y4;\
709 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
710 if(!USES_LIST(mb_type,list))\
711 return LIST_NOT_USED;\
712 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
713 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
714 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
715 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
717 SET_DIAG_MV(*2, >>1, s
->mb_x
*4+(i
&7)-4+part_width
, s
->mb_y
*4-1);
720 if(topright_ref
== PART_NOT_AVAILABLE
721 && ((s
->mb_y
&1) || i
>= scan8
[0]+8) && (i
&7)==4
722 && h
->ref_cache
[list
][scan8
[0]-1] != PART_NOT_AVAILABLE
){
724 && IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])){
725 SET_DIAG_MV(*2, >>1, s
->mb_x
*4-1, (s
->mb_y
|1)*4+(s
->mb_y
&1)*2+(i
>>4)-1);
728 && !IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])
730 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
731 SET_DIAG_MV(/2, <<1, s
->mb_x
*4-1, (s
->mb_y
&~1)*4 - 1 + ((i
-scan8
[0])>>3)*2);
737 if(topright_ref
!= PART_NOT_AVAILABLE
){
738 *C
= h
->mv_cache
[list
][ i
- 8 + part_width
];
741 tprintf(s
->avctx
, "topright MV not available\n");
743 *C
= h
->mv_cache
[list
][ i
- 8 - 1 ];
744 return h
->ref_cache
[list
][ i
- 8 - 1 ];
749 * gets the predicted MV.
750 * @param n the block index
751 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
752 * @param mx the x component of the predicted motion vector
753 * @param my the y component of the predicted motion vector
755 static inline void pred_motion(H264Context
* const h
, int n
, int part_width
, int list
, int ref
, int * const mx
, int * const my
){
756 const int index8
= scan8
[n
];
757 const int top_ref
= h
->ref_cache
[list
][ index8
- 8 ];
758 const int left_ref
= h
->ref_cache
[list
][ index8
- 1 ];
759 const int16_t * const A
= h
->mv_cache
[list
][ index8
- 1 ];
760 const int16_t * const B
= h
->mv_cache
[list
][ index8
- 8 ];
762 int diagonal_ref
, match_count
;
764 assert(part_width
==1 || part_width
==2 || part_width
==4);
774 diagonal_ref
= fetch_diagonal_mv(h
, &C
, index8
, list
, part_width
);
775 match_count
= (diagonal_ref
==ref
) + (top_ref
==ref
) + (left_ref
==ref
);
776 tprintf(h
->s
.avctx
, "pred_motion match_count=%d\n", match_count
);
777 if(match_count
> 1){ //most common
778 *mx
= mid_pred(A
[0], B
[0], C
[0]);
779 *my
= mid_pred(A
[1], B
[1], C
[1]);
780 }else if(match_count
==1){
784 }else if(top_ref
==ref
){
792 if(top_ref
== PART_NOT_AVAILABLE
&& diagonal_ref
== PART_NOT_AVAILABLE
&& left_ref
!= PART_NOT_AVAILABLE
){
796 *mx
= mid_pred(A
[0], B
[0], C
[0]);
797 *my
= mid_pred(A
[1], B
[1], C
[1]);
801 tprintf(h
->s
.avctx
, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], diagonal_ref
, C
[0], C
[1], left_ref
, A
[0], A
[1], ref
, *mx
, *my
, h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
805 * gets the directionally predicted 16x8 MV.
806 * @param n the block index
807 * @param mx the x component of the predicted motion vector
808 * @param my the y component of the predicted motion vector
810 static inline void pred_16x8_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
812 const int top_ref
= h
->ref_cache
[list
][ scan8
[0] - 8 ];
813 const int16_t * const B
= h
->mv_cache
[list
][ scan8
[0] - 8 ];
815 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
823 const int left_ref
= h
->ref_cache
[list
][ scan8
[8] - 1 ];
824 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[8] - 1 ];
826 tprintf(h
->s
.avctx
, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
836 pred_motion(h
, n
, 4, list
, ref
, mx
, my
);
840 * gets the directionally predicted 8x16 MV.
841 * @param n the block index
842 * @param mx the x component of the predicted motion vector
843 * @param my the y component of the predicted motion vector
845 static inline void pred_8x16_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
847 const int left_ref
= h
->ref_cache
[list
][ scan8
[0] - 1 ];
848 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[0] - 1 ];
850 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
861 diagonal_ref
= fetch_diagonal_mv(h
, &C
, scan8
[4], list
, 2);
863 tprintf(h
->s
.avctx
, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref
, C
[0], C
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
865 if(diagonal_ref
== ref
){
873 pred_motion(h
, n
, 2, list
, ref
, mx
, my
);
876 static inline void pred_pskip_motion(H264Context
* const h
, int * const mx
, int * const my
){
877 const int top_ref
= h
->ref_cache
[0][ scan8
[0] - 8 ];
878 const int left_ref
= h
->ref_cache
[0][ scan8
[0] - 1 ];
880 tprintf(h
->s
.avctx
, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref
, left_ref
, h
->s
.mb_x
, h
->s
.mb_y
);
882 if(top_ref
== PART_NOT_AVAILABLE
|| left_ref
== PART_NOT_AVAILABLE
883 || (top_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 8 ] == 0)
884 || (left_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 1 ] == 0)){
890 pred_motion(h
, 0, 4, 0, 0, mx
, my
);
895 static inline void direct_dist_scale_factor(H264Context
* const h
){
896 const int poc
= h
->s
.current_picture_ptr
->poc
;
897 const int poc1
= h
->ref_list
[1][0].poc
;
899 for(i
=0; i
<h
->ref_count
[0]; i
++){
900 int poc0
= h
->ref_list
[0][i
].poc
;
901 int td
= av_clip(poc1
- poc0
, -128, 127);
902 if(td
== 0 /* FIXME || pic0 is a long-term ref */){
903 h
->dist_scale_factor
[i
] = 256;
905 int tb
= av_clip(poc
- poc0
, -128, 127);
906 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
907 h
->dist_scale_factor
[i
] = av_clip((tb
*tx
+ 32) >> 6, -1024, 1023);
911 for(i
=0; i
<h
->ref_count
[0]; i
++){
912 h
->dist_scale_factor_field
[2*i
] =
913 h
->dist_scale_factor_field
[2*i
+1] = h
->dist_scale_factor
[i
];
917 static inline void direct_ref_list_init(H264Context
* const h
){
918 MpegEncContext
* const s
= &h
->s
;
919 Picture
* const ref1
= &h
->ref_list
[1][0];
920 Picture
* const cur
= s
->current_picture_ptr
;
922 if(cur
->pict_type
== FF_I_TYPE
)
923 cur
->ref_count
[0] = 0;
924 if(cur
->pict_type
!= FF_B_TYPE
)
925 cur
->ref_count
[1] = 0;
926 for(list
=0; list
<2; list
++){
927 cur
->ref_count
[list
] = h
->ref_count
[list
];
928 for(j
=0; j
<h
->ref_count
[list
]; j
++)
929 cur
->ref_poc
[list
][j
] = h
->ref_list
[list
][j
].poc
;
931 if(cur
->pict_type
!= FF_B_TYPE
|| h
->direct_spatial_mv_pred
)
933 for(list
=0; list
<2; list
++){
934 for(i
=0; i
<ref1
->ref_count
[list
]; i
++){
935 const int poc
= ref1
->ref_poc
[list
][i
];
936 h
->map_col_to_list0
[list
][i
] = 0; /* bogus; fills in for missing frames */
937 for(j
=0; j
<h
->ref_count
[list
]; j
++)
938 if(h
->ref_list
[list
][j
].poc
== poc
){
939 h
->map_col_to_list0
[list
][i
] = j
;
945 for(list
=0; list
<2; list
++){
946 for(i
=0; i
<ref1
->ref_count
[list
]; i
++){
947 j
= h
->map_col_to_list0
[list
][i
];
948 h
->map_col_to_list0_field
[list
][2*i
] = 2*j
;
949 h
->map_col_to_list0_field
[list
][2*i
+1] = 2*j
+1;
955 static inline void pred_direct_motion(H264Context
* const h
, int *mb_type
){
956 MpegEncContext
* const s
= &h
->s
;
957 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
958 const int b8_xy
= 2*s
->mb_x
+ 2*s
->mb_y
*h
->b8_stride
;
959 const int b4_xy
= 4*s
->mb_x
+ 4*s
->mb_y
*h
->b_stride
;
960 const int mb_type_col
= h
->ref_list
[1][0].mb_type
[mb_xy
];
961 const int16_t (*l1mv0
)[2] = (const int16_t (*)[2]) &h
->ref_list
[1][0].motion_val
[0][b4_xy
];
962 const int16_t (*l1mv1
)[2] = (const int16_t (*)[2]) &h
->ref_list
[1][0].motion_val
[1][b4_xy
];
963 const int8_t *l1ref0
= &h
->ref_list
[1][0].ref_index
[0][b8_xy
];
964 const int8_t *l1ref1
= &h
->ref_list
[1][0].ref_index
[1][b8_xy
];
965 const int is_b8x8
= IS_8X8(*mb_type
);
966 unsigned int sub_mb_type
;
969 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
970 if(IS_8X8(mb_type_col
) && !h
->sps
.direct_8x8_inference_flag
){
971 /* FIXME save sub mb types from previous frames (or derive from MVs)
972 * so we know exactly what block size to use */
973 sub_mb_type
= MB_TYPE_8x8
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_4x4 */
974 *mb_type
= MB_TYPE_8x8
|MB_TYPE_L0L1
;
975 }else if(!is_b8x8
&& (mb_type_col
& MB_TYPE_16x16_OR_INTRA
)){
976 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
977 *mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_16x16 */
979 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
980 *mb_type
= MB_TYPE_8x8
|MB_TYPE_L0L1
;
983 *mb_type
|= MB_TYPE_DIRECT2
;
985 *mb_type
|= MB_TYPE_INTERLACED
;
987 tprintf(s
->avctx
, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type
, sub_mb_type
, is_b8x8
, mb_type_col
);
989 if(h
->direct_spatial_mv_pred
){
994 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
996 /* ref = min(neighbors) */
997 for(list
=0; list
<2; list
++){
998 int refa
= h
->ref_cache
[list
][scan8
[0] - 1];
999 int refb
= h
->ref_cache
[list
][scan8
[0] - 8];
1000 int refc
= h
->ref_cache
[list
][scan8
[0] - 8 + 4];
1002 refc
= h
->ref_cache
[list
][scan8
[0] - 8 - 1];
1004 if(ref
[list
] < 0 || (refb
< ref
[list
] && refb
>= 0))
1006 if(ref
[list
] < 0 || (refc
< ref
[list
] && refc
>= 0))
1012 if(ref
[0] < 0 && ref
[1] < 0){
1013 ref
[0] = ref
[1] = 0;
1014 mv
[0][0] = mv
[0][1] =
1015 mv
[1][0] = mv
[1][1] = 0;
1017 for(list
=0; list
<2; list
++){
1019 pred_motion(h
, 0, 4, list
, ref
[list
], &mv
[list
][0], &mv
[list
][1]);
1021 mv
[list
][0] = mv
[list
][1] = 0;
1027 *mb_type
&= ~MB_TYPE_L1
;
1028 sub_mb_type
&= ~MB_TYPE_L1
;
1029 }else if(ref
[0] < 0){
1031 *mb_type
&= ~MB_TYPE_L0
;
1032 sub_mb_type
&= ~MB_TYPE_L0
;
1035 if(IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
)){
1036 int pair_xy
= s
->mb_x
+ (s
->mb_y
&~1)*s
->mb_stride
;
1037 int mb_types_col
[2];
1038 int b8_stride
= h
->b8_stride
;
1039 int b4_stride
= h
->b_stride
;
1041 *mb_type
= (*mb_type
& ~MB_TYPE_16x16
) | MB_TYPE_8x8
;
1043 if(IS_INTERLACED(*mb_type
)){
1044 mb_types_col
[0] = h
->ref_list
[1][0].mb_type
[pair_xy
];
1045 mb_types_col
[1] = h
->ref_list
[1][0].mb_type
[pair_xy
+s
->mb_stride
];
1047 l1ref0
-= 2*b8_stride
;
1048 l1ref1
-= 2*b8_stride
;
1049 l1mv0
-= 4*b4_stride
;
1050 l1mv1
-= 4*b4_stride
;
1055 int cur_poc
= s
->current_picture_ptr
->poc
;
1056 int *col_poc
= h
->ref_list
[1]->field_poc
;
1057 int col_parity
= FFABS(col_poc
[0] - cur_poc
) >= FFABS(col_poc
[1] - cur_poc
);
1058 int dy
= 2*col_parity
- (s
->mb_y
&1);
1060 mb_types_col
[1] = h
->ref_list
[1][0].mb_type
[pair_xy
+ col_parity
*s
->mb_stride
];
1061 l1ref0
+= dy
*b8_stride
;
1062 l1ref1
+= dy
*b8_stride
;
1063 l1mv0
+= 2*dy
*b4_stride
;
1064 l1mv1
+= 2*dy
*b4_stride
;
1068 for(i8
=0; i8
<4; i8
++){
1071 int xy8
= x8
+y8
*b8_stride
;
1072 int xy4
= 3*x8
+y8
*b4_stride
;
1075 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1077 h
->sub_mb_type
[i8
] = sub_mb_type
;
1079 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[0], 1);
1080 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[1], 1);
1081 if(!IS_INTRA(mb_types_col
[y8
])
1082 && ( (l1ref0
[xy8
] == 0 && FFABS(l1mv0
[xy4
][0]) <= 1 && FFABS(l1mv0
[xy4
][1]) <= 1)
1083 || (l1ref0
[xy8
] < 0 && l1ref1
[xy8
] == 0 && FFABS(l1mv1
[xy4
][0]) <= 1 && FFABS(l1mv1
[xy4
][1]) <= 1))){
1085 a
= pack16to32(mv
[0][0],mv
[0][1]);
1087 b
= pack16to32(mv
[1][0],mv
[1][1]);
1089 a
= pack16to32(mv
[0][0],mv
[0][1]);
1090 b
= pack16to32(mv
[1][0],mv
[1][1]);
1092 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, a
, 4);
1093 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, b
, 4);
1095 }else if(IS_16X16(*mb_type
)){
1098 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, (uint8_t)ref
[0], 1);
1099 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, (uint8_t)ref
[1], 1);
1100 if(!IS_INTRA(mb_type_col
)
1101 && ( (l1ref0
[0] == 0 && FFABS(l1mv0
[0][0]) <= 1 && FFABS(l1mv0
[0][1]) <= 1)
1102 || (l1ref0
[0] < 0 && l1ref1
[0] == 0 && FFABS(l1mv1
[0][0]) <= 1 && FFABS(l1mv1
[0][1]) <= 1
1103 && (h
->x264_build
>33 || !h
->x264_build
)))){
1105 a
= pack16to32(mv
[0][0],mv
[0][1]);
1107 b
= pack16to32(mv
[1][0],mv
[1][1]);
1109 a
= pack16to32(mv
[0][0],mv
[0][1]);
1110 b
= pack16to32(mv
[1][0],mv
[1][1]);
1112 fill_rectangle(&h
->mv_cache
[0][scan8
[0]], 4, 4, 8, a
, 4);
1113 fill_rectangle(&h
->mv_cache
[1][scan8
[0]], 4, 4, 8, b
, 4);
1115 for(i8
=0; i8
<4; i8
++){
1116 const int x8
= i8
&1;
1117 const int y8
= i8
>>1;
1119 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1121 h
->sub_mb_type
[i8
] = sub_mb_type
;
1123 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[0][0],mv
[0][1]), 4);
1124 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[1][0],mv
[1][1]), 4);
1125 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[0], 1);
1126 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[1], 1);
1129 if(!IS_INTRA(mb_type_col
) && ( l1ref0
[x8
+ y8
*h
->b8_stride
] == 0
1130 || (l1ref0
[x8
+ y8
*h
->b8_stride
] < 0 && l1ref1
[x8
+ y8
*h
->b8_stride
] == 0
1131 && (h
->x264_build
>33 || !h
->x264_build
)))){
1132 const int16_t (*l1mv
)[2]= l1ref0
[x8
+ y8
*h
->b8_stride
] == 0 ? l1mv0
: l1mv1
;
1133 if(IS_SUB_8X8(sub_mb_type
)){
1134 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*h
->b_stride
];
1135 if(FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1){
1137 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1139 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1142 for(i4
=0; i4
<4; i4
++){
1143 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*h
->b_stride
];
1144 if(FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1){
1146 *(uint32_t*)h
->mv_cache
[0][scan8
[i8
*4+i4
]] = 0;
1148 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] = 0;
1154 }else{ /* direct temporal mv pred */
1155 const int *map_col_to_list0
[2] = {h
->map_col_to_list0
[0], h
->map_col_to_list0
[1]};
1156 const int *dist_scale_factor
= h
->dist_scale_factor
;
1159 if(IS_INTERLACED(*mb_type
)){
1160 map_col_to_list0
[0] = h
->map_col_to_list0_field
[0];
1161 map_col_to_list0
[1] = h
->map_col_to_list0_field
[1];
1162 dist_scale_factor
= h
->dist_scale_factor_field
;
1164 if(IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
)){
1165 /* FIXME assumes direct_8x8_inference == 1 */
1166 const int pair_xy
= s
->mb_x
+ (s
->mb_y
&~1)*s
->mb_stride
;
1167 int mb_types_col
[2];
1170 *mb_type
= MB_TYPE_8x8
|MB_TYPE_L0L1
1171 | (is_b8x8
? 0 : MB_TYPE_DIRECT2
)
1172 | (*mb_type
& MB_TYPE_INTERLACED
);
1173 sub_mb_type
= MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
|MB_TYPE_16x16
;
1175 if(IS_INTERLACED(*mb_type
)){
1176 /* frame to field scaling */
1177 mb_types_col
[0] = h
->ref_list
[1][0].mb_type
[pair_xy
];
1178 mb_types_col
[1] = h
->ref_list
[1][0].mb_type
[pair_xy
+s
->mb_stride
];
1180 l1ref0
-= 2*h
->b8_stride
;
1181 l1ref1
-= 2*h
->b8_stride
;
1182 l1mv0
-= 4*h
->b_stride
;
1183 l1mv1
-= 4*h
->b_stride
;
1187 if( (mb_types_col
[0] & MB_TYPE_16x16_OR_INTRA
)
1188 && (mb_types_col
[1] & MB_TYPE_16x16_OR_INTRA
)
1190 *mb_type
|= MB_TYPE_16x8
;
1192 *mb_type
|= MB_TYPE_8x8
;
1194 /* field to frame scaling */
1195 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1196 * but in MBAFF, top and bottom POC are equal */
1197 int dy
= (s
->mb_y
&1) ? 1 : 2;
1199 mb_types_col
[1] = h
->ref_list
[1][0].mb_type
[pair_xy
+s
->mb_stride
];
1200 l1ref0
+= dy
*h
->b8_stride
;
1201 l1ref1
+= dy
*h
->b8_stride
;
1202 l1mv0
+= 2*dy
*h
->b_stride
;
1203 l1mv1
+= 2*dy
*h
->b_stride
;
1206 if((mb_types_col
[0] & (MB_TYPE_16x16_OR_INTRA
|MB_TYPE_16x8
))
1208 *mb_type
|= MB_TYPE_16x16
;
1210 *mb_type
|= MB_TYPE_8x8
;
1213 for(i8
=0; i8
<4; i8
++){
1214 const int x8
= i8
&1;
1215 const int y8
= i8
>>1;
1217 const int16_t (*l1mv
)[2]= l1mv0
;
1219 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1221 h
->sub_mb_type
[i8
] = sub_mb_type
;
1223 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1224 if(IS_INTRA(mb_types_col
[y8
])){
1225 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1226 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1227 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1231 ref0
= l1ref0
[x8
+ (y8
*2>>y_shift
)*h
->b8_stride
];
1233 ref0
= map_col_to_list0
[0][ref0
*2>>y_shift
];
1235 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ (y8
*2>>y_shift
)*h
->b8_stride
]*2>>y_shift
];
1238 scale
= dist_scale_factor
[ref0
];
1239 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1242 const int16_t *mv_col
= l1mv
[x8
*3 + (y8
*6>>y_shift
)*h
->b_stride
];
1243 int my_col
= (mv_col
[1]<<y_shift
)/2;
1244 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1245 int my
= (scale
* my_col
+ 128) >> 8;
1246 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1247 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-my_col
), 4);
1254 /* one-to-one mv scaling */
1256 if(IS_16X16(*mb_type
)){
1259 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, 0, 1);
1260 if(IS_INTRA(mb_type_col
)){
1263 const int ref0
= l1ref0
[0] >= 0 ? map_col_to_list0
[0][l1ref0
[0]]
1264 : map_col_to_list0
[1][l1ref1
[0]];
1265 const int scale
= dist_scale_factor
[ref0
];
1266 const int16_t *mv_col
= l1ref0
[0] >= 0 ? l1mv0
[0] : l1mv1
[0];
1268 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1269 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1271 mv0
= pack16to32(mv_l0
[0],mv_l0
[1]);
1272 mv1
= pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1274 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, ref
, 1);
1275 fill_rectangle(&h
-> mv_cache
[0][scan8
[0]], 4, 4, 8, mv0
, 4);
1276 fill_rectangle(&h
-> mv_cache
[1][scan8
[0]], 4, 4, 8, mv1
, 4);
1278 for(i8
=0; i8
<4; i8
++){
1279 const int x8
= i8
&1;
1280 const int y8
= i8
>>1;
1282 const int16_t (*l1mv
)[2]= l1mv0
;
1284 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1286 h
->sub_mb_type
[i8
] = sub_mb_type
;
1287 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1288 if(IS_INTRA(mb_type_col
)){
1289 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1290 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1291 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1295 ref0
= l1ref0
[x8
+ y8
*h
->b8_stride
];
1297 ref0
= map_col_to_list0
[0][ref0
];
1299 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ y8
*h
->b8_stride
]];
1302 scale
= dist_scale_factor
[ref0
];
1304 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1305 if(IS_SUB_8X8(sub_mb_type
)){
1306 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*h
->b_stride
];
1307 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1308 int my
= (scale
* mv_col
[1] + 128) >> 8;
1309 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1310 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-mv_col
[1]), 4);
1312 for(i4
=0; i4
<4; i4
++){
1313 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*h
->b_stride
];
1314 int16_t *mv_l0
= h
->mv_cache
[0][scan8
[i8
*4+i4
]];
1315 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1316 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1317 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] =
1318 pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1325 static inline void write_back_motion(H264Context
*h
, int mb_type
){
1326 MpegEncContext
* const s
= &h
->s
;
1327 const int b_xy
= 4*s
->mb_x
+ 4*s
->mb_y
*h
->b_stride
;
1328 const int b8_xy
= 2*s
->mb_x
+ 2*s
->mb_y
*h
->b8_stride
;
1331 if(!USES_LIST(mb_type
, 0))
1332 fill_rectangle(&s
->current_picture
.ref_index
[0][b8_xy
], 2, 2, h
->b8_stride
, (uint8_t)LIST_NOT_USED
, 1);
1334 for(list
=0; list
<h
->list_count
; list
++){
1336 if(!USES_LIST(mb_type
, list
))
1340 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+0 + 8*y
];
1341 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+2 + 8*y
];
1343 if( h
->pps
.cabac
) {
1344 if(IS_SKIP(mb_type
))
1345 fill_rectangle(h
->mvd_table
[list
][b_xy
], 4, 4, h
->b_stride
, 0, 4);
1348 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+0 + 8*y
];
1349 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+2 + 8*y
];
1354 int8_t *ref_index
= &s
->current_picture
.ref_index
[list
][b8_xy
];
1355 ref_index
[0+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[0]];
1356 ref_index
[1+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[4]];
1357 ref_index
[0+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[8]];
1358 ref_index
[1+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[12]];
1362 if(h
->slice_type
== FF_B_TYPE
&& h
->pps
.cabac
){
1363 if(IS_8X8(mb_type
)){
1364 uint8_t *direct_table
= &h
->direct_table
[b8_xy
];
1365 direct_table
[1+0*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[1]) ? 1 : 0;
1366 direct_table
[0+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[2]) ? 1 : 0;
1367 direct_table
[1+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[3]) ? 1 : 0;
1373 * Decodes a network abstraction layer unit.
1374 * @param consumed is the number of bytes used as input
1375 * @param length is the length of the array
1376 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1377 * @returns decoded bytes, might be src+1 if no escapes
1379 static const uint8_t *decode_nal(H264Context
*h
, const uint8_t *src
, int *dst_length
, int *consumed
, int length
){
1384 // src[0]&0x80; //forbidden bit
1385 h
->nal_ref_idc
= src
[0]>>5;
1386 h
->nal_unit_type
= src
[0]&0x1F;
1390 for(i
=0; i
<length
; i
++)
1391 printf("%2X ", src
[i
]);
1393 for(i
=0; i
+1<length
; i
+=2){
1394 if(src
[i
]) continue;
1395 if(i
>0 && src
[i
-1]==0) i
--;
1396 if(i
+2<length
&& src
[i
+1]==0 && src
[i
+2]<=3){
1398 /* startcode, so we must be past the end */
1405 if(i
>=length
-1){ //no escaped 0
1406 *dst_length
= length
;
1407 *consumed
= length
+1; //+1 for the header
1411 bufidx
= h
->nal_unit_type
== NAL_DPC
? 1 : 0; // use second escape buffer for inter data
1412 h
->rbsp_buffer
[bufidx
]= av_fast_realloc(h
->rbsp_buffer
[bufidx
], &h
->rbsp_buffer_size
[bufidx
], length
);
1413 dst
= h
->rbsp_buffer
[bufidx
];
1419 //printf("decoding esc\n");
1422 //remove escapes (very rare 1:2^22)
1423 if(si
+2<length
&& src
[si
]==0 && src
[si
+1]==0 && src
[si
+2]<=3){
1424 if(src
[si
+2]==3){ //escape
1429 }else //next start code
1433 dst
[di
++]= src
[si
++];
1437 *consumed
= si
+ 1;//+1 for the header
1438 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1443 * identifies the exact end of the bitstream
1444 * @return the length of the trailing, or 0 if damaged
1446 static int decode_rbsp_trailing(H264Context
*h
, const uint8_t *src
){
1450 tprintf(h
->s
.avctx
, "rbsp trailing %X\n", v
);
1460 * idct tranforms the 16 dc values and dequantize them.
1461 * @param qp quantization parameter
1463 static void h264_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1466 int temp
[16]; //FIXME check if this is a good idea
1467 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1468 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1470 //memset(block, 64, 2*256);
1473 const int offset
= y_offset
[i
];
1474 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1475 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1476 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1477 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1486 const int offset
= x_offset
[i
];
1487 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1488 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1489 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1490 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1492 block
[stride
*0 +offset
]= ((((z0
+ z3
)*qmul
+ 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1493 block
[stride
*2 +offset
]= ((((z1
+ z2
)*qmul
+ 128 ) >> 8));
1494 block
[stride
*8 +offset
]= ((((z1
- z2
)*qmul
+ 128 ) >> 8));
1495 block
[stride
*10+offset
]= ((((z0
- z3
)*qmul
+ 128 ) >> 8));
1501 * dct tranforms the 16 dc values.
1502 * @param qp quantization parameter ??? FIXME
1504 static void h264_luma_dc_dct_c(DCTELEM
*block
/*, int qp*/){
1505 // const int qmul= dequant_coeff[qp][0];
1507 int temp
[16]; //FIXME check if this is a good idea
1508 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1509 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1512 const int offset
= y_offset
[i
];
1513 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1514 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1515 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1516 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1525 const int offset
= x_offset
[i
];
1526 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1527 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1528 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1529 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1531 block
[stride
*0 +offset
]= (z0
+ z3
)>>1;
1532 block
[stride
*2 +offset
]= (z1
+ z2
)>>1;
1533 block
[stride
*8 +offset
]= (z1
- z2
)>>1;
1534 block
[stride
*10+offset
]= (z0
- z3
)>>1;
1542 static void chroma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1543 const int stride
= 16*2;
1544 const int xStride
= 16;
1547 a
= block
[stride
*0 + xStride
*0];
1548 b
= block
[stride
*0 + xStride
*1];
1549 c
= block
[stride
*1 + xStride
*0];
1550 d
= block
[stride
*1 + xStride
*1];
1557 block
[stride
*0 + xStride
*0]= ((a
+c
)*qmul
) >> 7;
1558 block
[stride
*0 + xStride
*1]= ((e
+b
)*qmul
) >> 7;
1559 block
[stride
*1 + xStride
*0]= ((a
-c
)*qmul
) >> 7;
1560 block
[stride
*1 + xStride
*1]= ((e
-b
)*qmul
) >> 7;
1564 static void chroma_dc_dct_c(DCTELEM
*block
){
1565 const int stride
= 16*2;
1566 const int xStride
= 16;
1569 a
= block
[stride
*0 + xStride
*0];
1570 b
= block
[stride
*0 + xStride
*1];
1571 c
= block
[stride
*1 + xStride
*0];
1572 d
= block
[stride
*1 + xStride
*1];
1579 block
[stride
*0 + xStride
*0]= (a
+c
);
1580 block
[stride
*0 + xStride
*1]= (e
+b
);
1581 block
[stride
*1 + xStride
*0]= (a
-c
);
1582 block
[stride
*1 + xStride
*1]= (e
-b
);
1587 * gets the chroma qp.
1589 static inline int get_chroma_qp(H264Context
*h
, int t
, int qscale
){
1590 return h
->pps
.chroma_qp_table
[t
][qscale
& 0xff];
1593 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1594 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1595 static inline int quantize_c(DCTELEM
*block
, uint8_t *scantable
, int qscale
, int intra
, int separate_dc
){
1597 const int * const quant_table
= quant_coeff
[qscale
];
1598 const int bias
= intra
? (1<<QUANT_SHIFT
)/3 : (1<<QUANT_SHIFT
)/6;
1599 const unsigned int threshold1
= (1<<QUANT_SHIFT
) - bias
- 1;
1600 const unsigned int threshold2
= (threshold1
<<1);
1606 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
-2))/3 : (1<<(QUANT_SHIFT
-2))/6;
1607 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
-2)) - dc_bias
- 1;
1608 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
1610 int level
= block
[0]*quant_coeff
[qscale
+18][0];
1611 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
1613 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
-2);
1616 level
= (dc_bias
- level
)>>(QUANT_SHIFT
-2);
1619 // last_non_zero = i;
1624 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
+1))/3 : (1<<(QUANT_SHIFT
+1))/6;
1625 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
+1)) - dc_bias
- 1;
1626 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
1628 int level
= block
[0]*quant_table
[0];
1629 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
1631 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
+1);
1634 level
= (dc_bias
- level
)>>(QUANT_SHIFT
+1);
1637 // last_non_zero = i;
1650 const int j
= scantable
[i
];
1651 int level
= block
[j
]*quant_table
[j
];
1653 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1654 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1655 if(((unsigned)(level
+threshold1
))>threshold2
){
1657 level
= (bias
+ level
)>>QUANT_SHIFT
;
1660 level
= (bias
- level
)>>QUANT_SHIFT
;
1669 return last_non_zero
;
1672 static inline void mc_dir_part(H264Context
*h
, Picture
*pic
, int n
, int square
, int chroma_height
, int delta
, int list
,
1673 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1674 int src_x_offset
, int src_y_offset
,
1675 qpel_mc_func
*qpix_op
, h264_chroma_mc_func chroma_op
){
1676 MpegEncContext
* const s
= &h
->s
;
1677 const int mx
= h
->mv_cache
[list
][ scan8
[n
] ][0] + src_x_offset
*8;
1678 int my
= h
->mv_cache
[list
][ scan8
[n
] ][1] + src_y_offset
*8;
1679 const int luma_xy
= (mx
&3) + ((my
&3)<<2);
1680 uint8_t * src_y
= pic
->data
[0] + (mx
>>2) + (my
>>2)*h
->mb_linesize
;
1681 uint8_t * src_cb
, * src_cr
;
1682 int extra_width
= h
->emu_edge_width
;
1683 int extra_height
= h
->emu_edge_height
;
1685 const int full_mx
= mx
>>2;
1686 const int full_my
= my
>>2;
1687 const int pic_width
= 16*s
->mb_width
;
1688 const int pic_height
= 16*s
->mb_height
>> MB_FIELD
;
1690 if(!pic
->data
[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1693 if(mx
&7) extra_width
-= 3;
1694 if(my
&7) extra_height
-= 3;
1696 if( full_mx
< 0-extra_width
1697 || full_my
< 0-extra_height
1698 || full_mx
+ 16/*FIXME*/ > pic_width
+ extra_width
1699 || full_my
+ 16/*FIXME*/ > pic_height
+ extra_height
){
1700 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_y
- 2 - 2*h
->mb_linesize
, h
->mb_linesize
, 16+5, 16+5/*FIXME*/, full_mx
-2, full_my
-2, pic_width
, pic_height
);
1701 src_y
= s
->edge_emu_buffer
+ 2 + 2*h
->mb_linesize
;
1705 qpix_op
[luma_xy
](dest_y
, src_y
, h
->mb_linesize
); //FIXME try variable height perhaps?
1707 qpix_op
[luma_xy
](dest_y
+ delta
, src_y
+ delta
, h
->mb_linesize
);
1710 if(ENABLE_GRAY
&& s
->flags
&CODEC_FLAG_GRAY
) return;
1713 // chroma offset when predicting from a field of opposite parity
1714 my
+= 2 * ((s
->mb_y
& 1) - (pic
->reference
- 1));
1715 emu
|= (my
>>3) < 0 || (my
>>3) + 8 >= (pic_height
>>1);
1717 src_cb
= pic
->data
[1] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
1718 src_cr
= pic
->data
[2] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
1721 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cb
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
1722 src_cb
= s
->edge_emu_buffer
;
1724 chroma_op(dest_cb
, src_cb
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
1727 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cr
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
1728 src_cr
= s
->edge_emu_buffer
;
1730 chroma_op(dest_cr
, src_cr
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
1733 static inline void mc_part_std(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1734 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1735 int x_offset
, int y_offset
,
1736 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1737 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
1738 int list0
, int list1
){
1739 MpegEncContext
* const s
= &h
->s
;
1740 qpel_mc_func
*qpix_op
= qpix_put
;
1741 h264_chroma_mc_func chroma_op
= chroma_put
;
1743 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
1744 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1745 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1746 x_offset
+= 8*s
->mb_x
;
1747 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
1750 Picture
*ref
= &h
->ref_list
[0][ h
->ref_cache
[0][ scan8
[n
] ] ];
1751 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 0,
1752 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1753 qpix_op
, chroma_op
);
1756 chroma_op
= chroma_avg
;
1760 Picture
*ref
= &h
->ref_list
[1][ h
->ref_cache
[1][ scan8
[n
] ] ];
1761 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 1,
1762 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1763 qpix_op
, chroma_op
);
1767 static inline void mc_part_weighted(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1768 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1769 int x_offset
, int y_offset
,
1770 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1771 h264_weight_func luma_weight_op
, h264_weight_func chroma_weight_op
,
1772 h264_biweight_func luma_weight_avg
, h264_biweight_func chroma_weight_avg
,
1773 int list0
, int list1
){
1774 MpegEncContext
* const s
= &h
->s
;
1776 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
1777 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1778 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
1779 x_offset
+= 8*s
->mb_x
;
1780 y_offset
+= 8*(s
->mb_y
>> MB_FIELD
);
1783 /* don't optimize for luma-only case, since B-frames usually
1784 * use implicit weights => chroma too. */
1785 uint8_t *tmp_cb
= s
->obmc_scratchpad
;
1786 uint8_t *tmp_cr
= s
->obmc_scratchpad
+ 8;
1787 uint8_t *tmp_y
= s
->obmc_scratchpad
+ 8*h
->mb_uvlinesize
;
1788 int refn0
= h
->ref_cache
[0][ scan8
[n
] ];
1789 int refn1
= h
->ref_cache
[1][ scan8
[n
] ];
1791 mc_dir_part(h
, &h
->ref_list
[0][refn0
], n
, square
, chroma_height
, delta
, 0,
1792 dest_y
, dest_cb
, dest_cr
,
1793 x_offset
, y_offset
, qpix_put
, chroma_put
);
1794 mc_dir_part(h
, &h
->ref_list
[1][refn1
], n
, square
, chroma_height
, delta
, 1,
1795 tmp_y
, tmp_cb
, tmp_cr
,
1796 x_offset
, y_offset
, qpix_put
, chroma_put
);
1798 if(h
->use_weight
== 2){
1799 int weight0
= h
->implicit_weight
[refn0
][refn1
];
1800 int weight1
= 64 - weight0
;
1801 luma_weight_avg( dest_y
, tmp_y
, h
-> mb_linesize
, 5, weight0
, weight1
, 0);
1802 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
1803 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
1805 luma_weight_avg(dest_y
, tmp_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
1806 h
->luma_weight
[0][refn0
], h
->luma_weight
[1][refn1
],
1807 h
->luma_offset
[0][refn0
] + h
->luma_offset
[1][refn1
]);
1808 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1809 h
->chroma_weight
[0][refn0
][0], h
->chroma_weight
[1][refn1
][0],
1810 h
->chroma_offset
[0][refn0
][0] + h
->chroma_offset
[1][refn1
][0]);
1811 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1812 h
->chroma_weight
[0][refn0
][1], h
->chroma_weight
[1][refn1
][1],
1813 h
->chroma_offset
[0][refn0
][1] + h
->chroma_offset
[1][refn1
][1]);
1816 int list
= list1
? 1 : 0;
1817 int refn
= h
->ref_cache
[list
][ scan8
[n
] ];
1818 Picture
*ref
= &h
->ref_list
[list
][refn
];
1819 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, list
,
1820 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1821 qpix_put
, chroma_put
);
1823 luma_weight_op(dest_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
1824 h
->luma_weight
[list
][refn
], h
->luma_offset
[list
][refn
]);
1825 if(h
->use_weight_chroma
){
1826 chroma_weight_op(dest_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1827 h
->chroma_weight
[list
][refn
][0], h
->chroma_offset
[list
][refn
][0]);
1828 chroma_weight_op(dest_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
1829 h
->chroma_weight
[list
][refn
][1], h
->chroma_offset
[list
][refn
][1]);
1834 static inline void mc_part(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
1835 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1836 int x_offset
, int y_offset
,
1837 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
1838 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
1839 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
,
1840 int list0
, int list1
){
1841 if((h
->use_weight
==2 && list0
&& list1
1842 && (h
->implicit_weight
[ h
->ref_cache
[0][scan8
[n
]] ][ h
->ref_cache
[1][scan8
[n
]] ] != 32))
1843 || h
->use_weight
==1)
1844 mc_part_weighted(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
1845 x_offset
, y_offset
, qpix_put
, chroma_put
,
1846 weight_op
[0], weight_op
[3], weight_avg
[0], weight_avg
[3], list0
, list1
);
1848 mc_part_std(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
1849 x_offset
, y_offset
, qpix_put
, chroma_put
, qpix_avg
, chroma_avg
, list0
, list1
);
1852 static inline void prefetch_motion(H264Context
*h
, int list
){
1853 /* fetch pixels for estimated mv 4 macroblocks ahead
1854 * optimized for 64byte cache lines */
1855 MpegEncContext
* const s
= &h
->s
;
1856 const int refn
= h
->ref_cache
[list
][scan8
[0]];
1858 const int mx
= (h
->mv_cache
[list
][scan8
[0]][0]>>2) + 16*s
->mb_x
+ 8;
1859 const int my
= (h
->mv_cache
[list
][scan8
[0]][1]>>2) + 16*s
->mb_y
;
1860 uint8_t **src
= h
->ref_list
[list
][refn
].data
;
1861 int off
= mx
+ (my
+ (s
->mb_x
&3)*4)*h
->mb_linesize
+ 64;
1862 s
->dsp
.prefetch(src
[0]+off
, s
->linesize
, 4);
1863 off
= (mx
>>1) + ((my
>>1) + (s
->mb_x
&7))*s
->uvlinesize
+ 64;
1864 s
->dsp
.prefetch(src
[1]+off
, src
[2]-src
[1], 2);
1868 static void hl_motion(H264Context
*h
, uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
1869 qpel_mc_func (*qpix_put
)[16], h264_chroma_mc_func (*chroma_put
),
1870 qpel_mc_func (*qpix_avg
)[16], h264_chroma_mc_func (*chroma_avg
),
1871 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
){
1872 MpegEncContext
* const s
= &h
->s
;
1873 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
1874 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
1876 assert(IS_INTER(mb_type
));
1878 prefetch_motion(h
, 0);
1880 if(IS_16X16(mb_type
)){
1881 mc_part(h
, 0, 1, 8, 0, dest_y
, dest_cb
, dest_cr
, 0, 0,
1882 qpix_put
[0], chroma_put
[0], qpix_avg
[0], chroma_avg
[0],
1883 &weight_op
[0], &weight_avg
[0],
1884 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1885 }else if(IS_16X8(mb_type
)){
1886 mc_part(h
, 0, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 0,
1887 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
1888 &weight_op
[1], &weight_avg
[1],
1889 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1890 mc_part(h
, 8, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 4,
1891 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
1892 &weight_op
[1], &weight_avg
[1],
1893 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
1894 }else if(IS_8X16(mb_type
)){
1895 mc_part(h
, 0, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 0, 0,
1896 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1897 &weight_op
[2], &weight_avg
[2],
1898 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
1899 mc_part(h
, 4, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 4, 0,
1900 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1901 &weight_op
[2], &weight_avg
[2],
1902 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
1906 assert(IS_8X8(mb_type
));
1909 const int sub_mb_type
= h
->sub_mb_type
[i
];
1911 int x_offset
= (i
&1)<<2;
1912 int y_offset
= (i
&2)<<1;
1914 if(IS_SUB_8X8(sub_mb_type
)){
1915 mc_part(h
, n
, 1, 4, 0, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1916 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
1917 &weight_op
[3], &weight_avg
[3],
1918 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1919 }else if(IS_SUB_8X4(sub_mb_type
)){
1920 mc_part(h
, n
, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1921 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
1922 &weight_op
[4], &weight_avg
[4],
1923 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1924 mc_part(h
, n
+2, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
+2,
1925 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
1926 &weight_op
[4], &weight_avg
[4],
1927 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1928 }else if(IS_SUB_4X8(sub_mb_type
)){
1929 mc_part(h
, n
, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
1930 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1931 &weight_op
[5], &weight_avg
[5],
1932 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1933 mc_part(h
, n
+1, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
+2, y_offset
,
1934 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1935 &weight_op
[5], &weight_avg
[5],
1936 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1939 assert(IS_SUB_4X4(sub_mb_type
));
1941 int sub_x_offset
= x_offset
+ 2*(j
&1);
1942 int sub_y_offset
= y_offset
+ (j
&2);
1943 mc_part(h
, n
+j
, 1, 2, 0, dest_y
, dest_cb
, dest_cr
, sub_x_offset
, sub_y_offset
,
1944 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
1945 &weight_op
[6], &weight_avg
[6],
1946 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
1952 prefetch_motion(h
, 1);
1955 static av_cold
void decode_init_vlc(void){
1956 static int done
= 0;
1962 init_vlc(&chroma_dc_coeff_token_vlc
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 4*5,
1963 &chroma_dc_coeff_token_len
[0], 1, 1,
1964 &chroma_dc_coeff_token_bits
[0], 1, 1, 1);
1967 init_vlc(&coeff_token_vlc
[i
], COEFF_TOKEN_VLC_BITS
, 4*17,
1968 &coeff_token_len
[i
][0], 1, 1,
1969 &coeff_token_bits
[i
][0], 1, 1, 1);
1973 init_vlc(&chroma_dc_total_zeros_vlc
[i
], CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 4,
1974 &chroma_dc_total_zeros_len
[i
][0], 1, 1,
1975 &chroma_dc_total_zeros_bits
[i
][0], 1, 1, 1);
1977 for(i
=0; i
<15; i
++){
1978 init_vlc(&total_zeros_vlc
[i
], TOTAL_ZEROS_VLC_BITS
, 16,
1979 &total_zeros_len
[i
][0], 1, 1,
1980 &total_zeros_bits
[i
][0], 1, 1, 1);
1984 init_vlc(&run_vlc
[i
], RUN_VLC_BITS
, 7,
1985 &run_len
[i
][0], 1, 1,
1986 &run_bits
[i
][0], 1, 1, 1);
1988 init_vlc(&run7_vlc
, RUN7_VLC_BITS
, 16,
1989 &run_len
[6][0], 1, 1,
1990 &run_bits
[6][0], 1, 1, 1);
1994 static void free_tables(H264Context
*h
){
1997 av_freep(&h
->intra4x4_pred_mode
);
1998 av_freep(&h
->chroma_pred_mode_table
);
1999 av_freep(&h
->cbp_table
);
2000 av_freep(&h
->mvd_table
[0]);
2001 av_freep(&h
->mvd_table
[1]);
2002 av_freep(&h
->direct_table
);
2003 av_freep(&h
->non_zero_count
);
2004 av_freep(&h
->slice_table_base
);
2005 h
->slice_table
= NULL
;
2007 av_freep(&h
->mb2b_xy
);
2008 av_freep(&h
->mb2b8_xy
);
2010 for(i
= 0; i
< MAX_SPS_COUNT
; i
++)
2011 av_freep(h
->sps_buffers
+ i
);
2013 for(i
= 0; i
< MAX_PPS_COUNT
; i
++)
2014 av_freep(h
->pps_buffers
+ i
);
2016 for(i
= 0; i
< h
->s
.avctx
->thread_count
; i
++) {
2017 hx
= h
->thread_context
[i
];
2019 av_freep(&hx
->top_borders
[1]);
2020 av_freep(&hx
->top_borders
[0]);
2021 av_freep(&hx
->s
.obmc_scratchpad
);
2025 static void init_dequant8_coeff_table(H264Context
*h
){
2027 const int transpose
= (h
->s
.dsp
.h264_idct8_add
!= ff_h264_idct8_add_c
); //FIXME ugly
2028 h
->dequant8_coeff
[0] = h
->dequant8_buffer
[0];
2029 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[1];
2031 for(i
=0; i
<2; i
++ ){
2032 if(i
&& !memcmp(h
->pps
.scaling_matrix8
[0], h
->pps
.scaling_matrix8
[1], 64*sizeof(uint8_t))){
2033 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[0];
2037 for(q
=0; q
<52; q
++){
2038 int shift
= ff_div6
[q
];
2039 int idx
= ff_rem6
[q
];
2041 h
->dequant8_coeff
[i
][q
][transpose
? (x
>>3)|((x
&7)<<3) : x
] =
2042 ((uint32_t)dequant8_coeff_init
[idx
][ dequant8_coeff_init_scan
[((x
>>1)&12) | (x
&3)] ] *
2043 h
->pps
.scaling_matrix8
[i
][x
]) << shift
;
2048 static void init_dequant4_coeff_table(H264Context
*h
){
2050 const int transpose
= (h
->s
.dsp
.h264_idct_add
!= ff_h264_idct_add_c
); //FIXME ugly
2051 for(i
=0; i
<6; i
++ ){
2052 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[i
];
2054 if(!memcmp(h
->pps
.scaling_matrix4
[j
], h
->pps
.scaling_matrix4
[i
], 16*sizeof(uint8_t))){
2055 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[j
];
2062 for(q
=0; q
<52; q
++){
2063 int shift
= ff_div6
[q
] + 2;
2064 int idx
= ff_rem6
[q
];
2066 h
->dequant4_coeff
[i
][q
][transpose
? (x
>>2)|((x
<<2)&0xF) : x
] =
2067 ((uint32_t)dequant4_coeff_init
[idx
][(x
&1) + ((x
>>2)&1)] *
2068 h
->pps
.scaling_matrix4
[i
][x
]) << shift
;
2073 static void init_dequant_tables(H264Context
*h
){
2075 init_dequant4_coeff_table(h
);
2076 if(h
->pps
.transform_8x8_mode
)
2077 init_dequant8_coeff_table(h
);
2078 if(h
->sps
.transform_bypass
){
2081 h
->dequant4_coeff
[i
][0][x
] = 1<<6;
2082 if(h
->pps
.transform_8x8_mode
)
2085 h
->dequant8_coeff
[i
][0][x
] = 1<<6;
2092 * needs width/height
2094 static int alloc_tables(H264Context
*h
){
2095 MpegEncContext
* const s
= &h
->s
;
2096 const int big_mb_num
= s
->mb_stride
* (s
->mb_height
+1);
2099 CHECKED_ALLOCZ(h
->intra4x4_pred_mode
, big_mb_num
* 8 * sizeof(uint8_t))
2101 CHECKED_ALLOCZ(h
->non_zero_count
, big_mb_num
* 16 * sizeof(uint8_t))
2102 CHECKED_ALLOCZ(h
->slice_table_base
, (big_mb_num
+s
->mb_stride
) * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h
->cbp_table
, big_mb_num
* sizeof(uint16_t))
2105 CHECKED_ALLOCZ(h
->chroma_pred_mode_table
, big_mb_num
* sizeof(uint8_t))
2106 CHECKED_ALLOCZ(h
->mvd_table
[0], 32*big_mb_num
* sizeof(uint16_t));
2107 CHECKED_ALLOCZ(h
->mvd_table
[1], 32*big_mb_num
* sizeof(uint16_t));
2108 CHECKED_ALLOCZ(h
->direct_table
, 32*big_mb_num
* sizeof(uint8_t));
2110 memset(h
->slice_table_base
, -1, (big_mb_num
+s
->mb_stride
) * sizeof(uint8_t));
2111 h
->slice_table
= h
->slice_table_base
+ s
->mb_stride
*2 + 1;
2113 CHECKED_ALLOCZ(h
->mb2b_xy
, big_mb_num
* sizeof(uint32_t));
2114 CHECKED_ALLOCZ(h
->mb2b8_xy
, big_mb_num
* sizeof(uint32_t));
2115 for(y
=0; y
<s
->mb_height
; y
++){
2116 for(x
=0; x
<s
->mb_width
; x
++){
2117 const int mb_xy
= x
+ y
*s
->mb_stride
;
2118 const int b_xy
= 4*x
+ 4*y
*h
->b_stride
;
2119 const int b8_xy
= 2*x
+ 2*y
*h
->b8_stride
;
2121 h
->mb2b_xy
[mb_xy
]= b_xy
;
2122 h
->mb2b8_xy
[mb_xy
]= b8_xy
;
2126 s
->obmc_scratchpad
= NULL
;
2128 if(!h
->dequant4_coeff
[0])
2129 init_dequant_tables(h
);
2138 * Mimic alloc_tables(), but for every context thread.
2140 static void clone_tables(H264Context
*dst
, H264Context
*src
){
2141 dst
->intra4x4_pred_mode
= src
->intra4x4_pred_mode
;
2142 dst
->non_zero_count
= src
->non_zero_count
;
2143 dst
->slice_table
= src
->slice_table
;
2144 dst
->cbp_table
= src
->cbp_table
;
2145 dst
->mb2b_xy
= src
->mb2b_xy
;
2146 dst
->mb2b8_xy
= src
->mb2b8_xy
;
2147 dst
->chroma_pred_mode_table
= src
->chroma_pred_mode_table
;
2148 dst
->mvd_table
[0] = src
->mvd_table
[0];
2149 dst
->mvd_table
[1] = src
->mvd_table
[1];
2150 dst
->direct_table
= src
->direct_table
;
2152 dst
->s
.obmc_scratchpad
= NULL
;
2153 ff_h264_pred_init(&dst
->hpc
, src
->s
.codec_id
);
2158 * Allocate buffers which are not shared amongst multiple threads.
2160 static int context_init(H264Context
*h
){
2161 CHECKED_ALLOCZ(h
->top_borders
[0], h
->s
.mb_width
* (16+8+8) * sizeof(uint8_t))
2162 CHECKED_ALLOCZ(h
->top_borders
[1], h
->s
.mb_width
* (16+8+8) * sizeof(uint8_t))
2166 return -1; // free_tables will clean up for us
2169 static av_cold
void common_init(H264Context
*h
){
2170 MpegEncContext
* const s
= &h
->s
;
2172 s
->width
= s
->avctx
->width
;
2173 s
->height
= s
->avctx
->height
;
2174 s
->codec_id
= s
->avctx
->codec
->id
;
2176 ff_h264_pred_init(&h
->hpc
, s
->codec_id
);
2178 h
->dequant_coeff_pps
= -1;
2179 s
->unrestricted_mv
=1;
2180 s
->decode
=1; //FIXME
2182 memset(h
->pps
.scaling_matrix4
, 16, 6*16*sizeof(uint8_t));
2183 memset(h
->pps
.scaling_matrix8
, 16, 2*64*sizeof(uint8_t));
2186 static av_cold
int decode_init(AVCodecContext
*avctx
){
2187 H264Context
*h
= avctx
->priv_data
;
2188 MpegEncContext
* const s
= &h
->s
;
2190 MPV_decode_defaults(s
);
2195 s
->out_format
= FMT_H264
;
2196 s
->workaround_bugs
= avctx
->workaround_bugs
;
2199 // s->decode_mb= ff_h263_decode_mb;
2200 s
->quarter_sample
= 1;
2202 avctx
->pix_fmt
= PIX_FMT_YUV420P
;
2206 if(avctx
->extradata_size
> 0 && avctx
->extradata
&&
2207 *(char *)avctx
->extradata
== 1){
2214 h
->thread_context
[0] = h
;
2218 static int frame_start(H264Context
*h
){
2219 MpegEncContext
* const s
= &h
->s
;
2222 if(MPV_frame_start(s
, s
->avctx
) < 0)
2224 ff_er_frame_start(s
);
2226 * MPV_frame_start uses pict_type to derive key_frame.
2227 * This is incorrect for H.264; IDR markings must be used.
2228 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2229 * See decode_nal_units().
2231 s
->current_picture_ptr
->key_frame
= 0;
2233 assert(s
->linesize
&& s
->uvlinesize
);
2235 for(i
=0; i
<16; i
++){
2236 h
->block_offset
[i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
2237 h
->block_offset
[24+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
2240 h
->block_offset
[16+i
]=
2241 h
->block_offset
[20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
2242 h
->block_offset
[24+16+i
]=
2243 h
->block_offset
[24+20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
2246 /* can't be in alloc_tables because linesize isn't known there.
2247 * FIXME: redo bipred weight to not require extra buffer? */
2248 for(i
= 0; i
< s
->avctx
->thread_count
; i
++)
2249 if(!h
->thread_context
[i
]->s
.obmc_scratchpad
)
2250 h
->thread_context
[i
]->s
.obmc_scratchpad
= av_malloc(16*2*s
->linesize
+ 8*2*s
->uvlinesize
);
2252 /* some macroblocks will be accessed before they're available */
2253 if(FRAME_MBAFF
|| s
->avctx
->thread_count
> 1)
2254 memset(h
->slice_table
, -1, (s
->mb_height
*s
->mb_stride
-1) * sizeof(uint8_t));
2256 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2260 static inline void backup_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int simple
){
2261 MpegEncContext
* const s
= &h
->s
;
2265 src_cb
-= uvlinesize
;
2266 src_cr
-= uvlinesize
;
2268 // There are two lines saved, the line above the the top macroblock of a pair,
2269 // and the line above the bottom macroblock
2270 h
->left_border
[0]= h
->top_borders
[0][s
->mb_x
][15];
2271 for(i
=1; i
<17; i
++){
2272 h
->left_border
[i
]= src_y
[15+i
* linesize
];
2275 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 16*linesize
);
2276 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+16*linesize
);
2278 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2279 h
->left_border
[17 ]= h
->top_borders
[0][s
->mb_x
][16+7];
2280 h
->left_border
[17+9]= h
->top_borders
[0][s
->mb_x
][24+7];
2282 h
->left_border
[i
+17 ]= src_cb
[7+i
*uvlinesize
];
2283 h
->left_border
[i
+17+9]= src_cr
[7+i
*uvlinesize
];
2285 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+8*uvlinesize
);
2286 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+8*uvlinesize
);
2290 static inline void xchg_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int xchg
, int simple
){
2291 MpegEncContext
* const s
= &h
->s
;
2298 if(h
->deblocking_filter
== 2) {
2299 mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
2300 deblock_left
= h
->slice_table
[mb_xy
] == h
->slice_table
[mb_xy
- 1];
2301 deblock_top
= h
->slice_table
[mb_xy
] == h
->slice_table
[h
->top_mb_xy
];
2303 deblock_left
= (s
->mb_x
> 0);
2304 deblock_top
= (s
->mb_y
> 0);
2307 src_y
-= linesize
+ 1;
2308 src_cb
-= uvlinesize
+ 1;
2309 src_cr
-= uvlinesize
+ 1;
2311 #define XCHG(a,b,t,xchg)\
2318 for(i
= !deblock_top
; i
<17; i
++){
2319 XCHG(h
->left_border
[i
], src_y
[i
* linesize
], temp8
, xchg
);
2324 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0), *(uint64_t*)(src_y
+1), temp64
, xchg
);
2325 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8), *(uint64_t*)(src_y
+9), temp64
, 1);
2326 if(s
->mb_x
+1 < s
->mb_width
){
2327 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
+1]), *(uint64_t*)(src_y
+17), temp64
, 1);
2331 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2333 for(i
= !deblock_top
; i
<9; i
++){
2334 XCHG(h
->left_border
[i
+17 ], src_cb
[i
*uvlinesize
], temp8
, xchg
);
2335 XCHG(h
->left_border
[i
+17+9], src_cr
[i
*uvlinesize
], temp8
, xchg
);
2339 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1), temp64
, 1);
2340 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1), temp64
, 1);
2345 static inline void backup_pair_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
){
2346 MpegEncContext
* const s
= &h
->s
;
2349 src_y
-= 2 * linesize
;
2350 src_cb
-= 2 * uvlinesize
;
2351 src_cr
-= 2 * uvlinesize
;
2353 // There are two lines saved, the line above the the top macroblock of a pair,
2354 // and the line above the bottom macroblock
2355 h
->left_border
[0]= h
->top_borders
[0][s
->mb_x
][15];
2356 h
->left_border
[1]= h
->top_borders
[1][s
->mb_x
][15];
2357 for(i
=2; i
<34; i
++){
2358 h
->left_border
[i
]= src_y
[15+i
* linesize
];
2361 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 32*linesize
);
2362 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+32*linesize
);
2363 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 33*linesize
);
2364 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+33*linesize
);
2366 if(!ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2367 h
->left_border
[34 ]= h
->top_borders
[0][s
->mb_x
][16+7];
2368 h
->left_border
[34+ 1]= h
->top_borders
[1][s
->mb_x
][16+7];
2369 h
->left_border
[34+18 ]= h
->top_borders
[0][s
->mb_x
][24+7];
2370 h
->left_border
[34+18+1]= h
->top_borders
[1][s
->mb_x
][24+7];
2371 for(i
=2; i
<18; i
++){
2372 h
->left_border
[i
+34 ]= src_cb
[7+i
*uvlinesize
];
2373 h
->left_border
[i
+34+18]= src_cr
[7+i
*uvlinesize
];
2375 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+16*uvlinesize
);
2376 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+16*uvlinesize
);
2377 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+17*uvlinesize
);
2378 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+17*uvlinesize
);
2382 static inline void xchg_pair_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int xchg
){
2383 MpegEncContext
* const s
= &h
->s
;
2386 int deblock_left
= (s
->mb_x
> 0);
2387 int deblock_top
= (s
->mb_y
> 1);
2389 tprintf(s
->avctx
, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y
, src_cb
, src_cr
, linesize
, uvlinesize
);
2391 src_y
-= 2 * linesize
+ 1;
2392 src_cb
-= 2 * uvlinesize
+ 1;
2393 src_cr
-= 2 * uvlinesize
+ 1;
2395 #define XCHG(a,b,t,xchg)\
2402 for(i
= (!deblock_top
)<<1; i
<34; i
++){
2403 XCHG(h
->left_border
[i
], src_y
[i
* linesize
], temp8
, xchg
);
2408 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0), *(uint64_t*)(src_y
+1), temp64
, xchg
);
2409 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8), *(uint64_t*)(src_y
+9), temp64
, 1);
2410 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+0), *(uint64_t*)(src_y
+1 +linesize
), temp64
, xchg
);
2411 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+8), *(uint64_t*)(src_y
+9 +linesize
), temp64
, 1);
2412 if(s
->mb_x
+1 < s
->mb_width
){
2413 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
+1]), *(uint64_t*)(src_y
+17), temp64
, 1);
2414 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
+1]), *(uint64_t*)(src_y
+17 +linesize
), temp64
, 1);
2418 if(!ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2420 for(i
= (!deblock_top
) << 1; i
<18; i
++){
2421 XCHG(h
->left_border
[i
+34 ], src_cb
[i
*uvlinesize
], temp8
, xchg
);
2422 XCHG(h
->left_border
[i
+34+18], src_cr
[i
*uvlinesize
], temp8
, xchg
);
2426 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1), temp64
, 1);
2427 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1), temp64
, 1);
2428 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1 +uvlinesize
), temp64
, 1);
2429 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1 +uvlinesize
), temp64
, 1);
2434 static av_always_inline
void hl_decode_mb_internal(H264Context
*h
, int simple
){
2435 MpegEncContext
* const s
= &h
->s
;
2436 const int mb_x
= s
->mb_x
;
2437 const int mb_y
= s
->mb_y
;
2438 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
2439 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
2440 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
2441 int linesize
, uvlinesize
/*dct_offset*/;
2443 int *block_offset
= &h
->block_offset
[0];
2444 const unsigned int bottom
= mb_y
& 1;
2445 const int transform_bypass
= (s
->qscale
== 0 && h
->sps
.transform_bypass
), is_h264
= (simple
|| s
->codec_id
== CODEC_ID_H264
);
2446 void (*idct_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
2447 void (*idct_dc_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
2449 dest_y
= s
->current_picture
.data
[0] + (mb_y
* 16* s
->linesize
) + mb_x
* 16;
2450 dest_cb
= s
->current_picture
.data
[1] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2451 dest_cr
= s
->current_picture
.data
[2] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2453 s
->dsp
.prefetch(dest_y
+ (s
->mb_x
&3)*4*s
->linesize
+ 64, s
->linesize
, 4);
2454 s
->dsp
.prefetch(dest_cb
+ (s
->mb_x
&7)*s
->uvlinesize
+ 64, dest_cr
- dest_cb
, 2);
2456 if (!simple
&& MB_FIELD
) {
2457 linesize
= h
->mb_linesize
= s
->linesize
* 2;
2458 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
* 2;
2459 block_offset
= &h
->block_offset
[24];
2460 if(mb_y
&1){ //FIXME move out of this func?
2461 dest_y
-= s
->linesize
*15;
2462 dest_cb
-= s
->uvlinesize
*7;
2463 dest_cr
-= s
->uvlinesize
*7;
2467 for(list
=0; list
<h
->list_count
; list
++){
2468 if(!USES_LIST(mb_type
, list
))
2470 if(IS_16X16(mb_type
)){
2471 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
2472 fill_rectangle(ref
, 4, 4, 8, (16+*ref
)^(s
->mb_y
&1), 1);
2474 for(i
=0; i
<16; i
+=4){
2475 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2476 int ref
= h
->ref_cache
[list
][scan8
[i
]];
2478 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2, 8, (16+ref
)^(s
->mb_y
&1), 1);
2484 linesize
= h
->mb_linesize
= s
->linesize
;
2485 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
;
2486 // dct_offset = s->linesize * 16;
2489 if(transform_bypass
){
2491 idct_add
= IS_8x8DCT(mb_type
) ? s
->dsp
.add_pixels8
: s
->dsp
.add_pixels4
;
2492 }else if(IS_8x8DCT(mb_type
)){
2493 idct_dc_add
= s
->dsp
.h264_idct8_dc_add
;
2494 idct_add
= s
->dsp
.h264_idct8_add
;
2496 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
2497 idct_add
= s
->dsp
.h264_idct_add
;
2500 if(!simple
&& FRAME_MBAFF
&& h
->deblocking_filter
&& IS_INTRA(mb_type
)
2501 && (!bottom
|| !IS_INTRA(s
->current_picture
.mb_type
[mb_xy
-s
->mb_stride
]))){
2502 int mbt_y
= mb_y
&~1;
2503 uint8_t *top_y
= s
->current_picture
.data
[0] + (mbt_y
* 16* s
->linesize
) + mb_x
* 16;
2504 uint8_t *top_cb
= s
->current_picture
.data
[1] + (mbt_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2505 uint8_t *top_cr
= s
->current_picture
.data
[2] + (mbt_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2506 xchg_pair_border(h
, top_y
, top_cb
, top_cr
, s
->linesize
, s
->uvlinesize
, 1);
2509 if (!simple
&& IS_INTRA_PCM(mb_type
)) {
2512 // The pixels are stored in h->mb array in the same order as levels,
2513 // copy them in output in the correct order.
2514 for(i
=0; i
<16; i
++) {
2515 for (y
=0; y
<4; y
++) {
2516 for (x
=0; x
<4; x
++) {
2517 *(dest_y
+ block_offset
[i
] + y
*linesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
2521 for(i
=16; i
<16+4; i
++) {
2522 for (y
=0; y
<4; y
++) {
2523 for (x
=0; x
<4; x
++) {
2524 *(dest_cb
+ block_offset
[i
] + y
*uvlinesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
2528 for(i
=20; i
<20+4; i
++) {
2529 for (y
=0; y
<4; y
++) {
2530 for (x
=0; x
<4; x
++) {
2531 *(dest_cr
+ block_offset
[i
] + y
*uvlinesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
2536 if(IS_INTRA(mb_type
)){
2537 if(h
->deblocking_filter
&& (simple
|| !FRAME_MBAFF
))
2538 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 1, simple
);
2540 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2541 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cb
, uvlinesize
);
2542 h
->hpc
.pred8x8
[ h
->chroma_pred_mode
](dest_cr
, uvlinesize
);
2545 if(IS_INTRA4x4(mb_type
)){
2546 if(simple
|| !s
->encoding
){
2547 if(IS_8x8DCT(mb_type
)){
2548 for(i
=0; i
<16; i
+=4){
2549 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2550 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
2551 const int nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2552 h
->hpc
.pred8x8l
[ dir
](ptr
, (h
->topleft_samples_available
<<i
)&0x8000,
2553 (h
->topright_samples_available
<<i
)&0x4000, linesize
);
2555 if(nnz
== 1 && h
->mb
[i
*16])
2556 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
2558 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
2562 for(i
=0; i
<16; i
++){
2563 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2565 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
2568 if(dir
== DIAG_DOWN_LEFT_PRED
|| dir
== VERT_LEFT_PRED
){
2569 const int topright_avail
= (h
->topright_samples_available
<<i
)&0x8000;
2570 assert(mb_y
|| linesize
<= block_offset
[i
]);
2571 if(!topright_avail
){
2572 tr
= ptr
[3 - linesize
]*0x01010101;
2573 topright
= (uint8_t*) &tr
;
2575 topright
= ptr
+ 4 - linesize
;
2579 h
->hpc
.pred4x4
[ dir
](ptr
, topright
, linesize
);
2580 nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2583 if(nnz
== 1 && h
->mb
[i
*16])
2584 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
2586 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
2588 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, 0);
2593 h
->hpc
.pred16x16
[ h
->intra16x16_pred_mode
](dest_y
, linesize
);
2595 if(!transform_bypass
)
2596 h264_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
, h
->dequant4_coeff
[0][s
->qscale
][0]);
2598 svq3_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
);
2600 if(h
->deblocking_filter
&& (simple
|| !FRAME_MBAFF
))
2601 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 0, simple
);
2603 hl_motion(h
, dest_y
, dest_cb
, dest_cr
,
2604 s
->me
.qpel_put
, s
->dsp
.put_h264_chroma_pixels_tab
,
2605 s
->me
.qpel_avg
, s
->dsp
.avg_h264_chroma_pixels_tab
,
2606 s
->dsp
.weight_h264_pixels_tab
, s
->dsp
.biweight_h264_pixels_tab
);
2610 if(!IS_INTRA4x4(mb_type
)){
2612 if(IS_INTRA16x16(mb_type
)){
2613 for(i
=0; i
<16; i
++){
2614 if(h
->non_zero_count_cache
[ scan8
[i
] ])
2615 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2616 else if(h
->mb
[i
*16])
2617 idct_dc_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2620 const int di
= IS_8x8DCT(mb_type
) ? 4 : 1;
2621 for(i
=0; i
<16; i
+=di
){
2622 int nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
2624 if(nnz
==1 && h
->mb
[i
*16])
2625 idct_dc_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2627 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
2632 for(i
=0; i
<16; i
++){
2633 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){ //FIXME benchmark weird rule, & below
2634 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
2635 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, IS_INTRA(mb_type
) ? 1 : 0);
2641 if(simple
|| !ENABLE_GRAY
|| !(s
->flags
&CODEC_FLAG_GRAY
)){
2642 uint8_t *dest
[2] = {dest_cb
, dest_cr
};
2643 if(transform_bypass
){
2644 idct_add
= idct_dc_add
= s
->dsp
.add_pixels4
;
2646 idct_add
= s
->dsp
.h264_idct_add
;
2647 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
2648 chroma_dc_dequant_idct_c(h
->mb
+ 16*16, h
->chroma_qp
[0], h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 1:4][h
->chroma_qp
[0]][0]);
2649 chroma_dc_dequant_idct_c(h
->mb
+ 16*16+4*16, h
->chroma_qp
[1], h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 2:5][h
->chroma_qp
[1]][0]);
2652 for(i
=16; i
<16+8; i
++){
2653 if(h
->non_zero_count_cache
[ scan8
[i
] ])
2654 idct_add(dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
2655 else if(h
->mb
[i
*16])
2656 idct_dc_add(dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
2659 for(i
=16; i
<16+8; i
++){
2660 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){
2661 uint8_t * const ptr
= dest
[(i
&4)>>2] + block_offset
[i
];
2662 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, uvlinesize
, chroma_qp
[s
->qscale
+ 12] - 12, 2);
2668 if(h
->deblocking_filter
) {
2669 if (!simple
&& FRAME_MBAFF
) {
2670 //FIXME try deblocking one mb at a time?
2671 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2672 const int mb_y
= s
->mb_y
- 1;
2673 uint8_t *pair_dest_y
, *pair_dest_cb
, *pair_dest_cr
;
2674 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
2675 const int mb_type_top
= s
->current_picture
.mb_type
[mb_xy
];
2676 const int mb_type_bottom
= s
->current_picture
.mb_type
[mb_xy
+s
->mb_stride
];
2677 if (!bottom
) return;
2678 pair_dest_y
= s
->current_picture
.data
[0] + (mb_y
* 16* s
->linesize
) + mb_x
* 16;
2679 pair_dest_cb
= s
->current_picture
.data
[1] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2680 pair_dest_cr
= s
->current_picture
.data
[2] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
2682 if(IS_INTRA(mb_type_top
| mb_type_bottom
))
2683 xchg_pair_border(h
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, s
->linesize
, s
->uvlinesize
, 0);
2685 backup_pair_border(h
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, s
->linesize
, s
->uvlinesize
);
2689 tprintf(h
->s
.avctx
, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x
, mb_y
, pair_dest_y
, dest_y
);
2690 fill_caches(h
, mb_type_top
, 1); //FIXME don't fill stuff which isn't used by filter_mb
2691 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->current_picture
.qscale_table
[mb_xy
]);
2692 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->current_picture
.qscale_table
[mb_xy
]);
2693 filter_mb(h
, mb_x
, mb_y
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, linesize
, uvlinesize
);
2696 tprintf(h
->s
.avctx
, "call mbaff filter_mb\n");
2697 fill_caches(h
, mb_type_bottom
, 1); //FIXME don't fill stuff which isn't used by filter_mb
2698 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->current_picture
.qscale_table
[mb_xy
+s
->mb_stride
]);
2699 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->current_picture
.qscale_table
[mb_xy
+s
->mb_stride
]);
2700 filter_mb(h
, mb_x
, mb_y
+1, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
2702 tprintf(h
->s
.avctx
, "call filter_mb\n");
2703 backup_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, simple
);
2704 fill_caches(h
, mb_type
, 1); //FIXME don't fill stuff which isn't used by filter_mb
2705 filter_mb_fast(h
, mb_x
, mb_y
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
2711 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2713 static void hl_decode_mb_simple(H264Context
*h
){
2714 hl_decode_mb_internal(h
, 1);
2718 * Process a macroblock; this handles edge cases, such as interlacing.
2720 static void av_noinline
hl_decode_mb_complex(H264Context
*h
){
2721 hl_decode_mb_internal(h
, 0);
2724 static void hl_decode_mb(H264Context
*h
){
2725 MpegEncContext
* const s
= &h
->s
;
2726 const int mb_x
= s
->mb_x
;
2727 const int mb_y
= s
->mb_y
;
2728 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
2729 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
2730 int is_complex
= FRAME_MBAFF
|| MB_FIELD
|| IS_INTRA_PCM(mb_type
) || s
->codec_id
!= CODEC_ID_H264
|| (ENABLE_GRAY
&& (s
->flags
&CODEC_FLAG_GRAY
)) || s
->encoding
;
2736 hl_decode_mb_complex(h
);
2737 else hl_decode_mb_simple(h
);
2740 static void pic_as_field(Picture
*pic
, const int parity
){
2742 for (i
= 0; i
< 4; ++i
) {
2743 if (parity
== PICT_BOTTOM_FIELD
)
2744 pic
->data
[i
] += pic
->linesize
[i
];
2745 pic
->reference
= parity
;
2746 pic
->linesize
[i
] *= 2;
2750 static int split_field_copy(Picture
*dest
, Picture
*src
,
2751 int parity
, int id_add
){
2752 int match
= !!(src
->reference
& parity
);
2756 pic_as_field(dest
, parity
);
2758 dest
->pic_id
+= id_add
;
2765 * Split one reference list into field parts, interleaving by parity
2766 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2767 * set to look at the actual start of data for that field.
2769 * @param dest output list
2770 * @param dest_len maximum number of fields to put in dest
2771 * @param src the source reference list containing fields and/or field pairs
2772 * (aka short_ref/long_ref, or
2773 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2774 * @param src_len number of Picture's in source (pairs and unmatched fields)
2775 * @param parity the parity of the picture being decoded/needing
2776 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2777 * @return number of fields placed in dest
2779 static int split_field_half_ref_list(Picture
*dest
, int dest_len
,
2780 Picture
*src
, int src_len
, int parity
){
2781 int same_parity
= 1;
2787 for (out_i
= 0; out_i
< dest_len
; out_i
+= field_output
) {
2788 if (same_parity
&& same_i
< src_len
) {
2789 field_output
= split_field_copy(dest
+ out_i
, src
+ same_i
,
2791 same_parity
= !field_output
;
2794 } else if (opp_i
< src_len
) {
2795 field_output
= split_field_copy(dest
+ out_i
, src
+ opp_i
,
2796 PICT_FRAME
- parity
, 0);
2797 same_parity
= field_output
;
2809 * Split the reference frame list into a reference field list.
2810 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2811 * The input list contains both reference field pairs and
2812 * unmatched reference fields; it is ordered as spec describes
2813 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2814 * unmatched field pairs are also present. Conceptually this is equivalent
2815 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2817 * @param dest output reference list where ordered fields are to be placed
2818 * @param dest_len max number of fields to place at dest
2819 * @param src source reference list, as described above
2820 * @param src_len number of pictures (pairs and unmatched fields) in src
2821 * @param parity parity of field being currently decoded
2822 * (one of PICT_{TOP,BOTTOM}_FIELD)
2823 * @param long_i index into src array that holds first long reference picture,
2824 * or src_len if no long refs present.
2826 static int split_field_ref_list(Picture
*dest
, int dest_len
,
2827 Picture
*src
, int src_len
,
2828 int parity
, int long_i
){
2830 int i
= split_field_half_ref_list(dest
, dest_len
, src
, long_i
, parity
);
2834 i
+= split_field_half_ref_list(dest
, dest_len
, src
+ long_i
,
2835 src_len
- long_i
, parity
);
2840 * fills the default_ref_list.
2842 static int fill_default_ref_list(H264Context
*h
){
2843 MpegEncContext
* const s
= &h
->s
;
2845 int smallest_poc_greater_than_current
= -1;
2847 Picture sorted_short_ref
[32];
2848 Picture field_entry_list
[2][32];
2849 Picture
*frame_list
[2];
2851 if (FIELD_PICTURE
) {
2852 structure_sel
= PICT_FRAME
;
2853 frame_list
[0] = field_entry_list
[0];
2854 frame_list
[1] = field_entry_list
[1];
2857 frame_list
[0] = h
->default_ref_list
[0];
2858 frame_list
[1] = h
->default_ref_list
[1];
2861 if(h
->slice_type
==FF_B_TYPE
){
2868 /* sort frame according to poc in B slice */
2869 for(out_i
=0; out_i
<h
->short_ref_count
; out_i
++){
2871 int best_poc
=INT_MAX
;
2873 for(i
=0; i
<h
->short_ref_count
; i
++){
2874 const int poc
= h
->short_ref
[i
]->poc
;
2875 if(poc
> limit
&& poc
< best_poc
){
2881 assert(best_i
!= INT_MIN
);
2884 sorted_short_ref
[out_i
]= *h
->short_ref
[best_i
];
2885 tprintf(h
->s
.avctx
, "sorted poc: %d->%d poc:%d fn:%d\n", best_i
, out_i
, sorted_short_ref
[out_i
].poc
, sorted_short_ref
[out_i
].frame_num
);
2886 if (-1 == smallest_poc_greater_than_current
) {
2887 if (h
->short_ref
[best_i
]->poc
>= s
->current_picture_ptr
->poc
) {
2888 smallest_poc_greater_than_current
= out_i
;
2893 tprintf(h
->s
.avctx
, "current poc: %d, smallest_poc_greater_than_current: %d\n", s
->current_picture_ptr
->poc
, smallest_poc_greater_than_current
);
2895 // find the largest poc
2896 for(list
=0; list
<2; list
++){
2899 int step
= list
? -1 : 1;
2901 for(i
=0; i
<h
->short_ref_count
&& index
< h
->ref_count
[list
]; i
++, j
+=step
) {
2903 while(j
<0 || j
>= h
->short_ref_count
){
2904 if(j
!= -99 && step
== (list
? -1 : 1))
2907 j
= smallest_poc_greater_than_current
+ (step
>>1);
2909 sel
= sorted_short_ref
[j
].reference
| structure_sel
;
2910 if(sel
!= PICT_FRAME
) continue;
2911 frame_list
[list
][index
]= sorted_short_ref
[j
];
2912 frame_list
[list
][index
++].pic_id
= sorted_short_ref
[j
].frame_num
;
2914 short_len
[list
] = index
;
2916 for(i
= 0; i
< 16 && index
< h
->ref_count
[ list
]; i
++){
2918 if(h
->long_ref
[i
] == NULL
) continue;
2919 sel
= h
->long_ref
[i
]->reference
| structure_sel
;
2920 if(sel
!= PICT_FRAME
) continue;
2922 frame_list
[ list
][index
]= *h
->long_ref
[i
];
2923 frame_list
[ list
][index
++].pic_id
= i
;
2928 for(list
=0; list
<2; list
++){
2930 len
[list
] = split_field_ref_list(h
->default_ref_list
[list
],
2934 s
->picture_structure
,
2937 // swap the two first elements of L1 when L0 and L1 are identical
2938 if(list
&& len
[0] > 1 && len
[0] == len
[1])
2939 for(i
=0; h
->default_ref_list
[0][i
].data
[0] == h
->default_ref_list
[1][i
].data
[0]; i
++)
2941 FFSWAP(Picture
, h
->default_ref_list
[1][0], h
->default_ref_list
[1][1]);
2945 if(len
[list
] < h
->ref_count
[ list
])
2946 memset(&h
->default_ref_list
[list
][len
[list
]], 0, sizeof(Picture
)*(h
->ref_count
[ list
] - len
[list
]));
2953 for(i
=0; i
<h
->short_ref_count
; i
++){
2955 sel
= h
->short_ref
[i
]->reference
| structure_sel
;
2956 if(sel
!= PICT_FRAME
) continue;
2957 frame_list
[0][index
]= *h
->short_ref
[i
];
2958 frame_list
[0][index
++].pic_id
= h
->short_ref
[i
]->frame_num
;
2961 for(i
= 0; i
< 16; i
++){
2963 if(h
->long_ref
[i
] == NULL
) continue;
2964 sel
= h
->long_ref
[i
]->reference
| structure_sel
;
2965 if(sel
!= PICT_FRAME
) continue;
2966 frame_list
[0][index
]= *h
->long_ref
[i
];
2967 frame_list
[0][index
++].pic_id
= i
;
2971 index
= split_field_ref_list(h
->default_ref_list
[0],
2972 h
->ref_count
[0], frame_list
[0],
2973 index
, s
->picture_structure
,
2976 if(index
< h
->ref_count
[0])
2977 memset(&h
->default_ref_list
[0][index
], 0, sizeof(Picture
)*(h
->ref_count
[0] - index
));
2980 for (i
=0; i
<h
->ref_count
[0]; i
++) {
2981 tprintf(h
->s
.avctx
, "List0: %s fn:%d 0x%p\n", (h
->default_ref_list
[0][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[0][i
].pic_id
, h
->default_ref_list
[0][i
].data
[0]);
2983 if(h
->slice_type
==FF_B_TYPE
){
2984 for (i
=0; i
<h
->ref_count
[1]; i
++) {
2985 tprintf(h
->s
.avctx
, "List1: %s fn:%d 0x%p\n", (h
->default_ref_list
[1][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[1][i
].pic_id
, h
->default_ref_list
[1][i
].data
[0]);
2992 static void print_short_term(H264Context
*h
);
2993 static void print_long_term(H264Context
*h
);
2996 * Extract structure information about the picture described by pic_num in
2997 * the current decoding context (frame or field). Note that pic_num is
2998 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2999 * @param pic_num picture number for which to extract structure information
3000 * @param structure one of PICT_XXX describing structure of picture
3002 * @return frame number (short term) or long term index of picture
3003 * described by pic_num
3005 static int pic_num_extract(H264Context
*h
, int pic_num
, int *structure
){
3006 MpegEncContext
* const s
= &h
->s
;
3008 *structure
= s
->picture_structure
;
3011 /* opposite field */
3012 *structure
^= PICT_FRAME
;
3019 static int decode_ref_pic_list_reordering(H264Context
*h
){
3020 MpegEncContext
* const s
= &h
->s
;
3021 int list
, index
, pic_structure
;
3023 print_short_term(h
);
3025 if(h
->slice_type
==FF_I_TYPE
|| h
->slice_type
==FF_SI_TYPE
) return 0; //FIXME move before func
3027 for(list
=0; list
<h
->list_count
; list
++){
3028 memcpy(h
->ref_list
[list
], h
->default_ref_list
[list
], sizeof(Picture
)*h
->ref_count
[list
]);
3030 if(get_bits1(&s
->gb
)){
3031 int pred
= h
->curr_pic_num
;
3033 for(index
=0; ; index
++){
3034 unsigned int reordering_of_pic_nums_idc
= get_ue_golomb(&s
->gb
);
3035 unsigned int pic_id
;
3037 Picture
*ref
= NULL
;
3039 if(reordering_of_pic_nums_idc
==3)
3042 if(index
>= h
->ref_count
[list
]){
3043 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference count overflow\n");
3047 if(reordering_of_pic_nums_idc
<3){
3048 if(reordering_of_pic_nums_idc
<2){
3049 const unsigned int abs_diff_pic_num
= get_ue_golomb(&s
->gb
) + 1;
3052 if(abs_diff_pic_num
> h
->max_pic_num
){
3053 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "abs_diff_pic_num overflow\n");
3057 if(reordering_of_pic_nums_idc
== 0) pred
-= abs_diff_pic_num
;
3058 else pred
+= abs_diff_pic_num
;
3059 pred
&= h
->max_pic_num
- 1;
3061 frame_num
= pic_num_extract(h
, pred
, &pic_structure
);
3063 for(i
= h
->short_ref_count
-1; i
>=0; i
--){
3064 ref
= h
->short_ref
[i
];
3065 assert(ref
->reference
);
3066 assert(!ref
->long_ref
);
3067 if(ref
->data
[0] != NULL
&&
3068 ref
->frame_num
== frame_num
&&
3069 (ref
->reference
& pic_structure
) &&
3070 ref
->long_ref
== 0) // ignore non existing pictures by testing data[0] pointer
3077 pic_id
= get_ue_golomb(&s
->gb
); //long_term_pic_idx
3079 long_idx
= pic_num_extract(h
, pic_id
, &pic_structure
);
3082 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "long_term_pic_idx overflow\n");
3085 ref
= h
->long_ref
[long_idx
];
3086 assert(!(ref
&& !ref
->reference
));
3087 if(ref
&& (ref
->reference
& pic_structure
)){
3088 ref
->pic_id
= pic_id
;
3089 assert(ref
->long_ref
);
3097 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference picture missing during reorder\n");
3098 memset(&h
->ref_list
[list
][index
], 0, sizeof(Picture
)); //FIXME
3100 for(i
=index
; i
+1<h
->ref_count
[list
]; i
++){
3101 if(ref
->long_ref
== h
->ref_list
[list
][i
].long_ref
&& ref
->pic_id
== h
->ref_list
[list
][i
].pic_id
)
3104 for(; i
> index
; i
--){
3105 h
->ref_list
[list
][i
]= h
->ref_list
[list
][i
-1];
3107 h
->ref_list
[list
][index
]= *ref
;
3109 pic_as_field(&h
->ref_list
[list
][index
], pic_structure
);
3113 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal reordering_of_pic_nums_idc\n");
3119 for(list
=0; list
<h
->list_count
; list
++){
3120 for(index
= 0; index
< h
->ref_count
[list
]; index
++){
3121 if(!h
->ref_list
[list
][index
].data
[0])
3122 h
->ref_list
[list
][index
]= s
->current_picture
;
3126 if(h
->slice_type
==FF_B_TYPE
&& !h
->direct_spatial_mv_pred
)
3127 direct_dist_scale_factor(h
);
3128 direct_ref_list_init(h
);
3132 static void fill_mbaff_ref_list(H264Context
*h
){
3134 for(list
=0; list
<2; list
++){ //FIXME try list_count
3135 for(i
=0; i
<h
->ref_count
[list
]; i
++){
3136 Picture
*frame
= &h
->ref_list
[list
][i
];
3137 Picture
*field
= &h
->ref_list
[list
][16+2*i
];
3140 field
[0].linesize
[j
] <<= 1;
3141 field
[0].reference
= PICT_TOP_FIELD
;
3142 field
[1] = field
[0];
3144 field
[1].data
[j
] += frame
->linesize
[j
];
3145 field
[1].reference
= PICT_BOTTOM_FIELD
;
3147 h
->luma_weight
[list
][16+2*i
] = h
->luma_weight
[list
][16+2*i
+1] = h
->luma_weight
[list
][i
];
3148 h
->luma_offset
[list
][16+2*i
] = h
->luma_offset
[list
][16+2*i
+1] = h
->luma_offset
[list
][i
];
3150 h
->chroma_weight
[list
][16+2*i
][j
] = h
->chroma_weight
[list
][16+2*i
+1][j
] = h
->chroma_weight
[list
][i
][j
];
3151 h
->chroma_offset
[list
][16+2*i
][j
] = h
->chroma_offset
[list
][16+2*i
+1][j
] = h
->chroma_offset
[list
][i
][j
];
3155 for(j
=0; j
<h
->ref_count
[1]; j
++){
3156 for(i
=0; i
<h
->ref_count
[0]; i
++)
3157 h
->implicit_weight
[j
][16+2*i
] = h
->implicit_weight
[j
][16+2*i
+1] = h
->implicit_weight
[j
][i
];
3158 memcpy(h
->implicit_weight
[16+2*j
], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
3159 memcpy(h
->implicit_weight
[16+2*j
+1], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
3163 static int pred_weight_table(H264Context
*h
){
3164 MpegEncContext
* const s
= &h
->s
;
3166 int luma_def
, chroma_def
;
3169 h
->use_weight_chroma
= 0;
3170 h
->luma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
3171 h
->chroma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
3172 luma_def
= 1<<h
->luma_log2_weight_denom
;
3173 chroma_def
= 1<<h
->chroma_log2_weight_denom
;
3175 for(list
=0; list
<2; list
++){
3176 for(i
=0; i
<h
->ref_count
[list
]; i
++){
3177 int luma_weight_flag
, chroma_weight_flag
;
3179 luma_weight_flag
= get_bits1(&s
->gb
);
3180 if(luma_weight_flag
){
3181 h
->luma_weight
[list
][i
]= get_se_golomb(&s
->gb
);
3182 h
->luma_offset
[list
][i
]= get_se_golomb(&s
->gb
);
3183 if( h
->luma_weight
[list
][i
] != luma_def
3184 || h
->luma_offset
[list
][i
] != 0)
3187 h
->luma_weight
[list
][i
]= luma_def
;
3188 h
->luma_offset
[list
][i
]= 0;
3191 chroma_weight_flag
= get_bits1(&s
->gb
);
3192 if(chroma_weight_flag
){
3195 h
->chroma_weight
[list
][i
][j
]= get_se_golomb(&s
->gb
);
3196 h
->chroma_offset
[list
][i
][j
]= get_se_golomb(&s
->gb
);
3197 if( h
->chroma_weight
[list
][i
][j
] != chroma_def
3198 || h
->chroma_offset
[list
][i
][j
] != 0)
3199 h
->use_weight_chroma
= 1;
3204 h
->chroma_weight
[list
][i
][j
]= chroma_def
;
3205 h
->chroma_offset
[list
][i
][j
]= 0;
3209 if(h
->slice_type
!= FF_B_TYPE
) break;
3211 h
->use_weight
= h
->use_weight
|| h
->use_weight_chroma
;
3215 static void implicit_weight_table(H264Context
*h
){
3216 MpegEncContext
* const s
= &h
->s
;
3218 int cur_poc
= s
->current_picture_ptr
->poc
;
3220 if( h
->ref_count
[0] == 1 && h
->ref_count
[1] == 1
3221 && h
->ref_list
[0][0].poc
+ h
->ref_list
[1][0].poc
== 2*cur_poc
){
3223 h
->use_weight_chroma
= 0;
3228 h
->use_weight_chroma
= 2;
3229 h
->luma_log2_weight_denom
= 5;
3230 h
->chroma_log2_weight_denom
= 5;
3232 for(ref0
=0; ref0
< h
->ref_count
[0]; ref0
++){
3233 int poc0
= h
->ref_list
[0][ref0
].poc
;
3234 for(ref1
=0; ref1
< h
->ref_count
[1]; ref1
++){
3235 int poc1
= h
->ref_list
[1][ref1
].poc
;
3236 int td
= av_clip(poc1
- poc0
, -128, 127);
3238 int tb
= av_clip(cur_poc
- poc0
, -128, 127);
3239 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
3240 int dist_scale_factor
= av_clip((tb
*tx
+ 32) >> 6, -1024, 1023) >> 2;
3241 if(dist_scale_factor
< -64 || dist_scale_factor
> 128)
3242 h
->implicit_weight
[ref0
][ref1
] = 32;
3244 h
->implicit_weight
[ref0
][ref1
] = 64 - dist_scale_factor
;
3246 h
->implicit_weight
[ref0
][ref1
] = 32;
3252 * Mark a picture as no longer needed for reference. The refmask
3253 * argument allows unreferencing of individual fields or the whole frame.
3254 * If the picture becomes entirely unreferenced, but is being held for
3255 * display purposes, it is marked as such.
3256 * @param refmask mask of fields to unreference; the mask is bitwise
3257 * anded with the reference marking of pic
3258 * @return non-zero if pic becomes entirely unreferenced (except possibly
3259 * for display purposes) zero if one of the fields remains in
3262 static inline int unreference_pic(H264Context
*h
, Picture
*pic
, int refmask
){
3264 if (pic
->reference
&= refmask
) {
3267 if(pic
== h
->delayed_output_pic
)
3268 pic
->reference
=DELAYED_PIC_REF
;
3270 for(i
= 0; h
->delayed_pic
[i
]; i
++)
3271 if(pic
== h
->delayed_pic
[i
]){
3272 pic
->reference
=DELAYED_PIC_REF
;
3281 * instantaneous decoder refresh.
3283 static void idr(H264Context
*h
){
3286 for(i
=0; i
<16; i
++){
3287 if (h
->long_ref
[i
] != NULL
) {
3288 unreference_pic(h
, h
->long_ref
[i
], 0);
3289 h
->long_ref
[i
]= NULL
;
3292 h
->long_ref_count
=0;
3294 for(i
=0; i
<h
->short_ref_count
; i
++){
3295 unreference_pic(h
, h
->short_ref
[i
], 0);
3296 h
->short_ref
[i
]= NULL
;
3298 h
->short_ref_count
=0;
3301 /* forget old pics after a seek */
3302 static void flush_dpb(AVCodecContext
*avctx
){
3303 H264Context
*h
= avctx
->priv_data
;
3305 for(i
=0; i
<16; i
++) {
3306 if(h
->delayed_pic
[i
])
3307 h
->delayed_pic
[i
]->reference
= 0;
3308 h
->delayed_pic
[i
]= NULL
;
3310 if(h
->delayed_output_pic
)
3311 h
->delayed_output_pic
->reference
= 0;
3312 h
->delayed_output_pic
= NULL
;
3314 if(h
->s
.current_picture_ptr
)
3315 h
->s
.current_picture_ptr
->reference
= 0;
3316 h
->s
.first_field
= 0;
3317 ff_mpeg_flush(avctx
);
3321 * Find a Picture in the short term reference list by frame number.
3322 * @param frame_num frame number to search for
3323 * @param idx the index into h->short_ref where returned picture is found
3324 * undefined if no picture found.
3325 * @return pointer to the found picture, or NULL if no pic with the provided
3326 * frame number is found
3328 static Picture
* find_short(H264Context
*h
, int frame_num
, int *idx
){
3329 MpegEncContext
* const s
= &h
->s
;
3332 for(i
=0; i
<h
->short_ref_count
; i
++){
3333 Picture
*pic
= h
->short_ref
[i
];
3334 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3335 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d %d %p\n", i
, pic
->frame_num
, pic
);
3336 if(pic
->frame_num
== frame_num
) {
3345 * Remove a picture from the short term reference list by its index in
3346 * that list. This does no checking on the provided index; it is assumed
3347 * to be valid. Other list entries are shifted down.
3348 * @param i index into h->short_ref of picture to remove.
3350 static void remove_short_at_index(H264Context
*h
, int i
){
3351 assert(i
> 0 && i
< h
->short_ref_count
);
3352 h
->short_ref
[i
]= NULL
;
3353 if (--h
->short_ref_count
)
3354 memmove(&h
->short_ref
[i
], &h
->short_ref
[i
+1], (h
->short_ref_count
- i
)*sizeof(Picture
*));
3359 * @return the removed picture or NULL if an error occurs
3361 static Picture
* remove_short(H264Context
*h
, int frame_num
){
3362 MpegEncContext
* const s
= &h
->s
;
3366 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3367 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "remove short %d count %d\n", frame_num
, h
->short_ref_count
);
3369 pic
= find_short(h
, frame_num
, &i
);
3371 remove_short_at_index(h
, i
);
3377 * Remove a picture from the long term reference list by its index in
3378 * that list. This does no checking on the provided index; it is assumed
3379 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3380 * @param i index into h->long_ref of picture to remove.
3382 static void remove_long_at_index(H264Context
*h
, int i
){
3383 h
->long_ref
[i
]= NULL
;
3384 h
->long_ref_count
--;
3389 * @return the removed picture or NULL if an error occurs
3391 static Picture
* remove_long(H264Context
*h
, int i
){
3394 pic
= h
->long_ref
[i
];
3396 remove_long_at_index(h
, i
);
3402 * print short term list
3404 static void print_short_term(H264Context
*h
) {
3406 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3407 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "short term list:\n");
3408 for(i
=0; i
<h
->short_ref_count
; i
++){
3409 Picture
*pic
= h
->short_ref
[i
];
3410 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3416 * print long term list
3418 static void print_long_term(H264Context
*h
) {
3420 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3421 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "long term list:\n");
3422 for(i
= 0; i
< 16; i
++){
3423 Picture
*pic
= h
->long_ref
[i
];
3425 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3432 * Executes the reference picture marking (memory management control operations).
3434 static int execute_ref_pic_marking(H264Context
*h
, MMCO
*mmco
, int mmco_count
){
3435 MpegEncContext
* const s
= &h
->s
;
3437 int current_ref_assigned
=0;
3440 if((s
->avctx
->debug
&FF_DEBUG_MMCO
) && mmco_count
==0)
3441 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "no mmco here\n");
3443 for(i
=0; i
<mmco_count
; i
++){
3444 int structure
, frame_num
, unref_pic
;
3445 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3446 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco:%d %d %d\n", h
->mmco
[i
].opcode
, h
->mmco
[i
].short_pic_num
, h
->mmco
[i
].long_arg
);
3448 switch(mmco
[i
].opcode
){
3449 case MMCO_SHORT2UNUSED
:
3450 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3451 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref short %d count %d\n", h
->mmco
[i
].short_pic_num
, h
->short_ref_count
);
3452 frame_num
= pic_num_extract(h
, mmco
[i
].short_pic_num
, &structure
);
3453 pic
= find_short(h
, frame_num
, &j
);
3455 if (unreference_pic(h
, pic
, structure
^ PICT_FRAME
))
3456 remove_short_at_index(h
, j
);
3457 } else if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3458 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref short failure\n");
3460 case MMCO_SHORT2LONG
:
3461 if (FIELD_PICTURE
&& mmco
[i
].long_arg
< h
->long_ref_count
&&
3462 h
->long_ref
[mmco
[i
].long_arg
]->frame_num
==
3463 mmco
[i
].short_pic_num
/ 2) {
3464 /* do nothing, we've already moved this field pair. */
3466 int frame_num
= mmco
[i
].short_pic_num
>> FIELD_PICTURE
;
3468 pic
= remove_long(h
, mmco
[i
].long_arg
);
3469 if(pic
) unreference_pic(h
, pic
, 0);
3471 h
->long_ref
[ mmco
[i
].long_arg
]= remove_short(h
, frame_num
);
3472 if (h
->long_ref
[ mmco
[i
].long_arg
]){
3473 h
->long_ref
[ mmco
[i
].long_arg
]->long_ref
=1;
3474 h
->long_ref_count
++;
3478 case MMCO_LONG2UNUSED
:
3479 j
= pic_num_extract(h
, mmco
[i
].long_arg
, &structure
);
3480 pic
= h
->long_ref
[j
];
3482 if (unreference_pic(h
, pic
, structure
^ PICT_FRAME
))
3483 remove_long_at_index(h
, j
);
3484 } else if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3485 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: unref long failure\n");
3489 if (FIELD_PICTURE
&& !s
->first_field
) {
3490 if (h
->long_ref
[mmco
[i
].long_arg
] == s
->current_picture_ptr
) {
3491 /* Just mark second field as referenced */
3493 } else if (s
->current_picture_ptr
->reference
) {
3494 /* First field in pair is in short term list or
3495 * at a different long term index.
3496 * This is not allowed; see 7.4.3, notes 2 and 3.
3497 * Report the problem and keep the pair where it is,
3498 * and mark this field valid.
3500 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3501 "illegal long term reference assignment for second "
3502 "field in complementary field pair (first field is "
3503 "short term or has non-matching long index)\n");
3509 pic
= remove_long(h
, mmco
[i
].long_arg
);
3510 if(pic
) unreference_pic(h
, pic
, 0);
3512 h
->long_ref
[ mmco
[i
].long_arg
]= s
->current_picture_ptr
;
3513 h
->long_ref
[ mmco
[i
].long_arg
]->long_ref
=1;
3514 h
->long_ref_count
++;
3517 s
->current_picture_ptr
->reference
|= s
->picture_structure
;
3518 current_ref_assigned
=1;
3520 case MMCO_SET_MAX_LONG
:
3521 assert(mmco
[i
].long_arg
<= 16);
3522 // just remove the long term which index is greater than new max
3523 for(j
= mmco
[i
].long_arg
; j
<16; j
++){
3524 pic
= remove_long(h
, j
);
3525 if (pic
) unreference_pic(h
, pic
, 0);
3529 while(h
->short_ref_count
){
3530 pic
= remove_short(h
, h
->short_ref
[0]->frame_num
);
3531 if(pic
) unreference_pic(h
, pic
, 0);
3533 for(j
= 0; j
< 16; j
++) {
3534 pic
= remove_long(h
, j
);
3535 if(pic
) unreference_pic(h
, pic
, 0);
3542 if (!current_ref_assigned
&& FIELD_PICTURE
&&
3543 !s
->first_field
&& s
->current_picture_ptr
->reference
) {
3545 /* Second field of complementary field pair; the first field of
3546 * which is already referenced. If short referenced, it
3547 * should be first entry in short_ref. If not, it must exist
3548 * in long_ref; trying to put it on the short list here is an
3549 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3551 if (h
->short_ref_count
&& h
->short_ref
[0] == s
->current_picture_ptr
) {
3552 /* Just mark the second field valid */
3553 s
->current_picture_ptr
->reference
= PICT_FRAME
;
3554 } else if (s
->current_picture_ptr
->long_ref
) {
3555 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term reference "
3556 "assignment for second field "
3557 "in complementary field pair "
3558 "(first field is long term)\n");
3561 * First field in reference, but not in any sensible place on our
3562 * reference lists. This shouldn't happen unless reference
3563 * handling somewhere else is wrong.
3567 current_ref_assigned
= 1;
3570 if(!current_ref_assigned
){
3571 pic
= remove_short(h
, s
->current_picture_ptr
->frame_num
);
3573 unreference_pic(h
, pic
, 0);
3574 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term buffer state detected\n");
3577 if(h
->short_ref_count
)
3578 memmove(&h
->short_ref
[1], &h
->short_ref
[0], h
->short_ref_count
*sizeof(Picture
*));
3580 h
->short_ref
[0]= s
->current_picture_ptr
;
3581 h
->short_ref
[0]->long_ref
=0;
3582 h
->short_ref_count
++;
3583 s
->current_picture_ptr
->reference
|= s
->picture_structure
;
3586 if (h
->long_ref_count
+ h
->short_ref_count
> h
->sps
.ref_frame_count
){
3588 /* We have too many reference frames, probably due to corrupted
3589 * stream. Need to discard one frame. Prevents overrun of the
3590 * short_ref and long_ref buffers.
3592 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3593 "number of reference frames exceeds max (probably "
3594 "corrupt input), discarding one\n");
3596 if (h
->long_ref_count
) {
3597 for (i
= 0; i
< 16; ++i
)
3602 pic
= h
->long_ref
[i
];
3603 remove_long_at_index(h
, i
);
3605 pic
= h
->short_ref
[h
->short_ref_count
- 1];
3606 remove_short_at_index(h
, h
->short_ref_count
- 1);
3608 unreference_pic(h
, pic
, 0);
3611 print_short_term(h
);
3616 static int decode_ref_pic_marking(H264Context
*h
, GetBitContext
*gb
){
3617 MpegEncContext
* const s
= &h
->s
;
3620 if(h
->nal_unit_type
== NAL_IDR_SLICE
){ //FIXME fields
3621 s
->broken_link
= get_bits1(gb
) -1;
3622 h
->mmco
[0].long_arg
= get_bits1(gb
) - 1; // current_long_term_idx
3623 if(h
->mmco
[0].long_arg
== -1)
3626 h
->mmco
[0].opcode
= MMCO_LONG
;
3630 if(get_bits1(gb
)){ // adaptive_ref_pic_marking_mode_flag
3631 for(i
= 0; i
<MAX_MMCO_COUNT
; i
++) {
3632 MMCOOpcode opcode
= get_ue_golomb(gb
);
3634 h
->mmco
[i
].opcode
= opcode
;
3635 if(opcode
==MMCO_SHORT2UNUSED
|| opcode
==MMCO_SHORT2LONG
){
3636 h
->mmco
[i
].short_pic_num
= (h
->curr_pic_num
- get_ue_golomb(gb
) - 1) & (h
->max_pic_num
- 1);
3637 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3638 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3642 if(opcode
==MMCO_SHORT2LONG
|| opcode
==MMCO_LONG2UNUSED
|| opcode
==MMCO_LONG
|| opcode
==MMCO_SET_MAX_LONG
){
3643 unsigned int long_arg
= get_ue_golomb(gb
);
3644 if(long_arg
>= 32 || (long_arg
>= 16 && !(opcode
== MMCO_LONG2UNUSED
&& FIELD_PICTURE
))){
3645 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal long ref in memory management control operation %d\n", opcode
);
3648 h
->mmco
[i
].long_arg
= long_arg
;
3651 if(opcode
> (unsigned)MMCO_LONG
){
3652 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal memory management control operation %d\n", opcode
);
3655 if(opcode
== MMCO_END
)
3660 assert(h
->long_ref_count
+ h
->short_ref_count
<= h
->sps
.ref_frame_count
);
3662 if(h
->short_ref_count
&& h
->long_ref_count
+ h
->short_ref_count
== h
->sps
.ref_frame_count
&&
3663 !(FIELD_PICTURE
&& !s
->first_field
&& s
->current_picture_ptr
->reference
)) {
3664 h
->mmco
[0].opcode
= MMCO_SHORT2UNUSED
;
3665 h
->mmco
[0].short_pic_num
= h
->short_ref
[ h
->short_ref_count
- 1 ]->frame_num
;
3667 if (FIELD_PICTURE
) {
3668 h
->mmco
[0].short_pic_num
*= 2;
3669 h
->mmco
[1].opcode
= MMCO_SHORT2UNUSED
;
3670 h
->mmco
[1].short_pic_num
= h
->mmco
[0].short_pic_num
+ 1;
3681 static int init_poc(H264Context
*h
){
3682 MpegEncContext
* const s
= &h
->s
;
3683 const int max_frame_num
= 1<<h
->sps
.log2_max_frame_num
;
3686 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
3687 h
->frame_num_offset
= 0;
3689 if(h
->frame_num
< h
->prev_frame_num
)
3690 h
->frame_num_offset
= h
->prev_frame_num_offset
+ max_frame_num
;
3692 h
->frame_num_offset
= h
->prev_frame_num_offset
;
3695 if(h
->sps
.poc_type
==0){
3696 const int max_poc_lsb
= 1<<h
->sps
.log2_max_poc_lsb
;
3698 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
3703 if (h
->poc_lsb
< h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
>= max_poc_lsb
/2)
3704 h
->poc_msb
= h
->prev_poc_msb
+ max_poc_lsb
;
3705 else if(h
->poc_lsb
> h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
< -max_poc_lsb
/2)
3706 h
->poc_msb
= h
->prev_poc_msb
- max_poc_lsb
;
3708 h
->poc_msb
= h
->prev_poc_msb
;
3709 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3711 field_poc
[1] = h
->poc_msb
+ h
->poc_lsb
;
3712 if(s
->picture_structure
== PICT_FRAME
)
3713 field_poc
[1] += h
->delta_poc_bottom
;
3714 }else if(h
->sps
.poc_type
==1){
3715 int abs_frame_num
, expected_delta_per_poc_cycle
, expectedpoc
;
3718 if(h
->sps
.poc_cycle_length
!= 0)
3719 abs_frame_num
= h
->frame_num_offset
+ h
->frame_num
;
3723 if(h
->nal_ref_idc
==0 && abs_frame_num
> 0)
3726 expected_delta_per_poc_cycle
= 0;
3727 for(i
=0; i
< h
->sps
.poc_cycle_length
; i
++)
3728 expected_delta_per_poc_cycle
+= h
->sps
.offset_for_ref_frame
[ i
]; //FIXME integrate during sps parse
3730 if(abs_frame_num
> 0){
3731 int poc_cycle_cnt
= (abs_frame_num
- 1) / h
->sps
.poc_cycle_length
;
3732 int frame_num_in_poc_cycle
= (abs_frame_num
- 1) % h
->sps
.poc_cycle_length
;
3734 expectedpoc
= poc_cycle_cnt
* expected_delta_per_poc_cycle
;
3735 for(i
= 0; i
<= frame_num_in_poc_cycle
; i
++)
3736 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_ref_frame
[ i
];
3740 if(h
->nal_ref_idc
== 0)
3741 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_non_ref_pic
;
3743 field_poc
[0] = expectedpoc
+ h
->delta_poc
[0];
3744 field_poc
[1] = field_poc
[0] + h
->sps
.offset_for_top_to_bottom_field
;
3746 if(s
->picture_structure
== PICT_FRAME
)
3747 field_poc
[1] += h
->delta_poc
[1];
3750 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
3753 if(h
->nal_ref_idc
) poc
= 2*(h
->frame_num_offset
+ h
->frame_num
);
3754 else poc
= 2*(h
->frame_num_offset
+ h
->frame_num
) - 1;
3760 if(s
->picture_structure
!= PICT_BOTTOM_FIELD
) {
3761 s
->current_picture_ptr
->field_poc
[0]= field_poc
[0];
3762 s
->current_picture_ptr
->poc
= field_poc
[0];
3764 if(s
->picture_structure
!= PICT_TOP_FIELD
) {
3765 s
->current_picture_ptr
->field_poc
[1]= field_poc
[1];
3766 s
->current_picture_ptr
->poc
= field_poc
[1];
3768 if(!FIELD_PICTURE
|| !s
->first_field
) {
3769 Picture
*cur
= s
->current_picture_ptr
;
3770 cur
->poc
= FFMIN(cur
->field_poc
[0], cur
->field_poc
[1]);
3778 * initialize scan tables
3780 static void init_scan_tables(H264Context
*h
){
3781 MpegEncContext
* const s
= &h
->s
;
3783 if(s
->dsp
.h264_idct_add
== ff_h264_idct_add_c
){ //FIXME little ugly
3784 memcpy(h
->zigzag_scan
, zigzag_scan
, 16*sizeof(uint8_t));
3785 memcpy(h
-> field_scan
, field_scan
, 16*sizeof(uint8_t));
3787 for(i
=0; i
<16; i
++){
3788 #define T(x) (x>>2) | ((x<<2) & 0xF)
3789 h
->zigzag_scan
[i
] = T(zigzag_scan
[i
]);
3790 h
-> field_scan
[i
] = T( field_scan
[i
]);
3794 if(s
->dsp
.h264_idct8_add
== ff_h264_idct8_add_c
){
3795 memcpy(h
->zigzag_scan8x8
, zigzag_scan8x8
, 64*sizeof(uint8_t));
3796 memcpy(h
->zigzag_scan8x8_cavlc
, zigzag_scan8x8_cavlc
, 64*sizeof(uint8_t));
3797 memcpy(h
->field_scan8x8
, field_scan8x8
, 64*sizeof(uint8_t));
3798 memcpy(h
->field_scan8x8_cavlc
, field_scan8x8_cavlc
, 64*sizeof(uint8_t));
3800 for(i
=0; i
<64; i
++){
3801 #define T(x) (x>>3) | ((x&7)<<3)
3802 h
->zigzag_scan8x8
[i
] = T(zigzag_scan8x8
[i
]);
3803 h
->zigzag_scan8x8_cavlc
[i
] = T(zigzag_scan8x8_cavlc
[i
]);
3804 h
->field_scan8x8
[i
] = T(field_scan8x8
[i
]);
3805 h
->field_scan8x8_cavlc
[i
] = T(field_scan8x8_cavlc
[i
]);
3809 if(h
->sps
.transform_bypass
){ //FIXME same ugly
3810 h
->zigzag_scan_q0
= zigzag_scan
;
3811 h
->zigzag_scan8x8_q0
= zigzag_scan8x8
;
3812 h
->zigzag_scan8x8_cavlc_q0
= zigzag_scan8x8_cavlc
;
3813 h
->field_scan_q0
= field_scan
;
3814 h
->field_scan8x8_q0
= field_scan8x8
;
3815 h
->field_scan8x8_cavlc_q0
= field_scan8x8_cavlc
;
3817 h
->zigzag_scan_q0
= h
->zigzag_scan
;
3818 h
->zigzag_scan8x8_q0
= h
->zigzag_scan8x8
;
3819 h
->zigzag_scan8x8_cavlc_q0
= h
->zigzag_scan8x8_cavlc
;
3820 h
->field_scan_q0
= h
->field_scan
;
3821 h
->field_scan8x8_q0
= h
->field_scan8x8
;
3822 h
->field_scan8x8_cavlc_q0
= h
->field_scan8x8_cavlc
;
3827 * Replicates H264 "master" context to thread contexts.
3829 static void clone_slice(H264Context
*dst
, H264Context
*src
)
3831 memcpy(dst
->block_offset
, src
->block_offset
, sizeof(dst
->block_offset
));
3832 dst
->s
.current_picture_ptr
= src
->s
.current_picture_ptr
;
3833 dst
->s
.current_picture
= src
->s
.current_picture
;
3834 dst
->s
.linesize
= src
->s
.linesize
;
3835 dst
->s
.uvlinesize
= src
->s
.uvlinesize
;
3836 dst
->s
.first_field
= src
->s
.first_field
;
3838 dst
->prev_poc_msb
= src
->prev_poc_msb
;
3839 dst
->prev_poc_lsb
= src
->prev_poc_lsb
;
3840 dst
->prev_frame_num_offset
= src
->prev_frame_num_offset
;
3841 dst
->prev_frame_num
= src
->prev_frame_num
;
3842 dst
->short_ref_count
= src
->short_ref_count
;
3844 memcpy(dst
->short_ref
, src
->short_ref
, sizeof(dst
->short_ref
));
3845 memcpy(dst
->long_ref
, src
->long_ref
, sizeof(dst
->long_ref
));
3846 memcpy(dst
->default_ref_list
, src
->default_ref_list
, sizeof(dst
->default_ref_list
));
3847 memcpy(dst
->ref_list
, src
->ref_list
, sizeof(dst
->ref_list
));
3849 memcpy(dst
->dequant4_coeff
, src
->dequant4_coeff
, sizeof(src
->dequant4_coeff
));
3850 memcpy(dst
->dequant8_coeff
, src
->dequant8_coeff
, sizeof(src
->dequant8_coeff
));
3854 * decodes a slice header.
3855 * this will allso call MPV_common_init() and frame_start() as needed
3857 * @param h h264context
3858 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3860 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3862 static int decode_slice_header(H264Context
*h
, H264Context
*h0
){
3863 MpegEncContext
* const s
= &h
->s
;
3864 MpegEncContext
* const s0
= &h0
->s
;
3865 unsigned int first_mb_in_slice
;
3866 unsigned int pps_id
;
3867 int num_ref_idx_active_override_flag
;
3868 static const uint8_t slice_type_map
[5]= {FF_P_TYPE
, FF_B_TYPE
, FF_I_TYPE
, FF_SP_TYPE
, FF_SI_TYPE
};
3869 unsigned int slice_type
, tmp
, i
;
3870 int default_ref_list_done
= 0;
3871 int last_pic_structure
;
3873 s
->dropable
= h
->nal_ref_idc
== 0;
3875 if((s
->avctx
->flags2
& CODEC_FLAG2_FAST
) && !h
->nal_ref_idc
){
3876 s
->me
.qpel_put
= s
->dsp
.put_2tap_qpel_pixels_tab
;
3877 s
->me
.qpel_avg
= s
->dsp
.avg_2tap_qpel_pixels_tab
;
3879 s
->me
.qpel_put
= s
->dsp
.put_h264_qpel_pixels_tab
;
3880 s
->me
.qpel_avg
= s
->dsp
.avg_h264_qpel_pixels_tab
;
3883 first_mb_in_slice
= get_ue_golomb(&s
->gb
);
3885 if((s
->flags2
& CODEC_FLAG2_CHUNKS
) && first_mb_in_slice
== 0){
3886 h0
->current_slice
= 0;
3887 if (!s0
->first_field
)
3888 s
->current_picture_ptr
= NULL
;
3891 slice_type
= get_ue_golomb(&s
->gb
);
3893 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "slice type too large (%d) at %d %d\n", h
->slice_type
, s
->mb_x
, s
->mb_y
);
3898 h
->slice_type_fixed
=1;
3900 h
->slice_type_fixed
=0;
3902 slice_type
= slice_type_map
[ slice_type
];
3903 if (slice_type
== FF_I_TYPE
3904 || (h0
->current_slice
!= 0 && slice_type
== h0
->last_slice_type
) ) {
3905 default_ref_list_done
= 1;
3907 h
->slice_type
= slice_type
;
3909 s
->pict_type
= h
->slice_type
; // to make a few old func happy, it's wrong though
3910 if (s
->pict_type
== FF_B_TYPE
&& s0
->last_picture_ptr
== NULL
) {
3911 av_log(h
->s
.avctx
, AV_LOG_ERROR
,
3912 "B picture before any references, skipping\n");
3916 pps_id
= get_ue_golomb(&s
->gb
);
3917 if(pps_id
>=MAX_PPS_COUNT
){
3918 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "pps_id out of range\n");
3921 if(!h0
->pps_buffers
[pps_id
]) {
3922 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non existing PPS referenced\n");
3925 h
->pps
= *h0
->pps_buffers
[pps_id
];
3927 if(!h0
->sps_buffers
[h
->pps
.sps_id
]) {
3928 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non existing SPS referenced\n");
3931 h
->sps
= *h0
->sps_buffers
[h
->pps
.sps_id
];
3933 if(h
== h0
&& h
->dequant_coeff_pps
!= pps_id
){
3934 h
->dequant_coeff_pps
= pps_id
;
3935 init_dequant_tables(h
);
3938 s
->mb_width
= h
->sps
.mb_width
;
3939 s
->mb_height
= h
->sps
.mb_height
* (2 - h
->sps
.frame_mbs_only_flag
);
3941 h
->b_stride
= s
->mb_width
*4;
3942 h
->b8_stride
= s
->mb_width
*2;
3944 s
->width
= 16*s
->mb_width
- 2*(h
->sps
.crop_left
+ h
->sps
.crop_right
);
3945 if(h
->sps
.frame_mbs_only_flag
)
3946 s
->height
= 16*s
->mb_height
- 2*(h
->sps
.crop_top
+ h
->sps
.crop_bottom
);
3948 s
->height
= 16*s
->mb_height
- 4*(h
->sps
.crop_top
+ h
->sps
.crop_bottom
); //FIXME recheck
3950 if (s
->context_initialized
3951 && ( s
->width
!= s
->avctx
->width
|| s
->height
!= s
->avctx
->height
)) {
3953 return -1; // width / height changed during parallelized decoding
3957 if (!s
->context_initialized
) {
3959 return -1; // we cant (re-)initialize context during parallel decoding
3960 if (MPV_common_init(s
) < 0)
3964 init_scan_tables(h
);
3967 for(i
= 1; i
< s
->avctx
->thread_count
; i
++) {
3969 c
= h
->thread_context
[i
] = av_malloc(sizeof(H264Context
));
3970 memcpy(c
, h
->s
.thread_context
[i
], sizeof(MpegEncContext
));
3971 memset(&c
->s
+ 1, 0, sizeof(H264Context
) - sizeof(MpegEncContext
));
3974 init_scan_tables(c
);
3978 for(i
= 0; i
< s
->avctx
->thread_count
; i
++)
3979 if(context_init(h
->thread_context
[i
]) < 0)
3982 s
->avctx
->width
= s
->width
;
3983 s
->avctx
->height
= s
->height
;
3984 s
->avctx
->sample_aspect_ratio
= h
->sps
.sar
;
3985 if(!s
->avctx
->sample_aspect_ratio
.den
)
3986 s
->avctx
->sample_aspect_ratio
.den
= 1;
3988 if(h
->sps
.timing_info_present_flag
){
3989 s
->avctx
->time_base
= (AVRational
){h
->sps
.num_units_in_tick
* 2, h
->sps
.time_scale
};
3990 if(h
->x264_build
> 0 && h
->x264_build
< 44)
3991 s
->avctx
->time_base
.den
*= 2;
3992 av_reduce(&s
->avctx
->time_base
.num
, &s
->avctx
->time_base
.den
,
3993 s
->avctx
->time_base
.num
, s
->avctx
->time_base
.den
, 1<<30);
3997 h
->frame_num
= get_bits(&s
->gb
, h
->sps
.log2_max_frame_num
);
4000 h
->mb_aff_frame
= 0;
4001 last_pic_structure
= s0
->picture_structure
;
4002 if(h
->sps
.frame_mbs_only_flag
){
4003 s
->picture_structure
= PICT_FRAME
;
4005 if(get_bits1(&s
->gb
)) { //field_pic_flag
4006 s
->picture_structure
= PICT_TOP_FIELD
+ get_bits1(&s
->gb
); //bottom_field_flag
4008 s
->picture_structure
= PICT_FRAME
;
4009 h
->mb_aff_frame
= h
->sps
.mb_aff
;
4013 if(h0
->current_slice
== 0){
4014 /* See if we have a decoded first field looking for a pair... */
4015 if (s0
->first_field
) {
4016 assert(s0
->current_picture_ptr
);
4017 assert(s0
->current_picture_ptr
->data
[0]);
4018 assert(s0
->current_picture_ptr
->reference
!= DELAYED_PIC_REF
);
4020 /* figure out if we have a complementary field pair */
4021 if (!FIELD_PICTURE
|| s
->picture_structure
== last_pic_structure
) {
4023 * Previous field is unmatched. Don't display it, but let it
4024 * remain for reference if marked as such.
4026 s0
->current_picture_ptr
= NULL
;
4027 s0
->first_field
= FIELD_PICTURE
;
4030 if (h
->nal_ref_idc
&&
4031 s0
->current_picture_ptr
->reference
&&
4032 s0
->current_picture_ptr
->frame_num
!= h
->frame_num
) {
4034 * This and previous field were reference, but had
4035 * different frame_nums. Consider this field first in
4036 * pair. Throw away previous field except for reference
4039 s0
->first_field
= 1;
4040 s0
->current_picture_ptr
= NULL
;
4043 /* Second field in complementary pair */
4044 s0
->first_field
= 0;
4049 /* Frame or first field in a potentially complementary pair */
4050 assert(!s0
->current_picture_ptr
);
4051 s0
->first_field
= FIELD_PICTURE
;
4054 if((!FIELD_PICTURE
|| s0
->first_field
) && frame_start(h
) < 0) {
4055 s0
->first_field
= 0;
4062 s
->current_picture_ptr
->frame_num
= h
->frame_num
; //FIXME frame_num cleanup
4064 assert(s
->mb_num
== s
->mb_width
* s
->mb_height
);
4065 if(first_mb_in_slice
<< FIELD_OR_MBAFF_PICTURE
>= s
->mb_num
||
4066 first_mb_in_slice
>= s
->mb_num
){
4067 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "first_mb_in_slice overflow\n");
4070 s
->resync_mb_x
= s
->mb_x
= first_mb_in_slice
% s
->mb_width
;
4071 s
->resync_mb_y
= s
->mb_y
= (first_mb_in_slice
/ s
->mb_width
) << FIELD_OR_MBAFF_PICTURE
;
4072 if (s
->picture_structure
== PICT_BOTTOM_FIELD
)
4073 s
->resync_mb_y
= s
->mb_y
= s
->mb_y
+ 1;
4074 assert(s
->mb_y
< s
->mb_height
);
4076 if(s
->picture_structure
==PICT_FRAME
){
4077 h
->curr_pic_num
= h
->frame_num
;
4078 h
->max_pic_num
= 1<< h
->sps
.log2_max_frame_num
;
4080 h
->curr_pic_num
= 2*h
->frame_num
+ 1;
4081 h
->max_pic_num
= 1<<(h
->sps
.log2_max_frame_num
+ 1);
4084 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
4085 get_ue_golomb(&s
->gb
); /* idr_pic_id */
4088 if(h
->sps
.poc_type
==0){
4089 h
->poc_lsb
= get_bits(&s
->gb
, h
->sps
.log2_max_poc_lsb
);
4091 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
){
4092 h
->delta_poc_bottom
= get_se_golomb(&s
->gb
);
4096 if(h
->sps
.poc_type
==1 && !h
->sps
.delta_pic_order_always_zero_flag
){
4097 h
->delta_poc
[0]= get_se_golomb(&s
->gb
);
4099 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
)
4100 h
->delta_poc
[1]= get_se_golomb(&s
->gb
);
4105 if(h
->pps
.redundant_pic_cnt_present
){
4106 h
->redundant_pic_count
= get_ue_golomb(&s
->gb
);
4109 //set defaults, might be overriden a few line later
4110 h
->ref_count
[0]= h
->pps
.ref_count
[0];
4111 h
->ref_count
[1]= h
->pps
.ref_count
[1];
4113 if(h
->slice_type
== FF_P_TYPE
|| h
->slice_type
== FF_SP_TYPE
|| h
->slice_type
== FF_B_TYPE
){
4114 if(h
->slice_type
== FF_B_TYPE
){
4115 h
->direct_spatial_mv_pred
= get_bits1(&s
->gb
);
4117 num_ref_idx_active_override_flag
= get_bits1(&s
->gb
);
4119 if(num_ref_idx_active_override_flag
){
4120 h
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
4121 if(h
->slice_type
==FF_B_TYPE
)
4122 h
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
4124 if(h
->ref_count
[0]-1 > 32-1 || h
->ref_count
[1]-1 > 32-1){
4125 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow\n");
4126 h
->ref_count
[0]= h
->ref_count
[1]= 1;
4130 if(h
->slice_type
== FF_B_TYPE
)
4137 if(!default_ref_list_done
){
4138 fill_default_ref_list(h
);
4141 if(decode_ref_pic_list_reordering(h
) < 0)
4144 if( (h
->pps
.weighted_pred
&& (h
->slice_type
== FF_P_TYPE
|| h
->slice_type
== FF_SP_TYPE
))
4145 || (h
->pps
.weighted_bipred_idc
==1 && h
->slice_type
==FF_B_TYPE
) )
4146 pred_weight_table(h
);
4147 else if(h
->pps
.weighted_bipred_idc
==2 && h
->slice_type
==FF_B_TYPE
)
4148 implicit_weight_table(h
);
4153 decode_ref_pic_marking(h0
, &s
->gb
);
4156 fill_mbaff_ref_list(h
);
4158 if( h
->slice_type
!= FF_I_TYPE
&& h
->slice_type
!= FF_SI_TYPE
&& h
->pps
.cabac
){
4159 tmp
= get_ue_golomb(&s
->gb
);
4161 av_log(s
->avctx
, AV_LOG_ERROR
, "cabac_init_idc overflow\n");
4164 h
->cabac_init_idc
= tmp
;
4167 h
->last_qscale_diff
= 0;
4168 tmp
= h
->pps
.init_qp
+ get_se_golomb(&s
->gb
);
4170 av_log(s
->avctx
, AV_LOG_ERROR
, "QP %u out of range\n", tmp
);
4174 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
4175 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
4176 //FIXME qscale / qp ... stuff
4177 if(h
->slice_type
== FF_SP_TYPE
){
4178 get_bits1(&s
->gb
); /* sp_for_switch_flag */
4180 if(h
->slice_type
==FF_SP_TYPE
|| h
->slice_type
== FF_SI_TYPE
){
4181 get_se_golomb(&s
->gb
); /* slice_qs_delta */
4184 h
->deblocking_filter
= 1;
4185 h
->slice_alpha_c0_offset
= 0;
4186 h
->slice_beta_offset
= 0;
4187 if( h
->pps
.deblocking_filter_parameters_present
) {
4188 tmp
= get_ue_golomb(&s
->gb
);
4190 av_log(s
->avctx
, AV_LOG_ERROR
, "deblocking_filter_idc %u out of range\n", tmp
);
4193 h
->deblocking_filter
= tmp
;
4194 if(h
->deblocking_filter
< 2)
4195 h
->deblocking_filter
^= 1; // 1<->0
4197 if( h
->deblocking_filter
) {
4198 h
->slice_alpha_c0_offset
= get_se_golomb(&s
->gb
) << 1;
4199 h
->slice_beta_offset
= get_se_golomb(&s
->gb
) << 1;
4203 if( s
->avctx
->skip_loop_filter
>= AVDISCARD_ALL
4204 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONKEY
&& h
->slice_type
!= FF_I_TYPE
)
4205 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_BIDIR
&& h
->slice_type
== FF_B_TYPE
)
4206 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
4207 h
->deblocking_filter
= 0;
4209 if(h
->deblocking_filter
== 1 && h0
->max_contexts
> 1) {
4210 if(s
->avctx
->flags2
& CODEC_FLAG2_FAST
) {
4211 /* Cheat slightly for speed:
4212 Do not bother to deblock across slices. */
4213 h
->deblocking_filter
= 2;
4215 h0
->max_contexts
= 1;
4216 if(!h0
->single_decode_warning
) {
4217 av_log(s
->avctx
, AV_LOG_INFO
, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4218 h0
->single_decode_warning
= 1;
4221 return 1; // deblocking switched inside frame
4226 if( h
->pps
.num_slice_groups
> 1 && h
->pps
.mb_slice_group_map_type
>= 3 && h
->pps
.mb_slice_group_map_type
<= 5)
4227 slice_group_change_cycle
= get_bits(&s
->gb
, ?);
4230 h0
->last_slice_type
= slice_type
;
4231 h
->slice_num
= ++h0
->current_slice
;
4233 h
->emu_edge_width
= (s
->flags
&CODEC_FLAG_EMU_EDGE
) ? 0 : 16;
4234 h
->emu_edge_height
= (FRAME_MBAFF
|| FIELD_PICTURE
) ? 0 : h
->emu_edge_width
;
4236 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
4237 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4239 (s
->picture_structure
==PICT_FRAME
? "F" : s
->picture_structure
==PICT_TOP_FIELD
? "T" : "B"),
4241 av_get_pict_type_char(h
->slice_type
),
4242 pps_id
, h
->frame_num
,
4243 s
->current_picture_ptr
->field_poc
[0], s
->current_picture_ptr
->field_poc
[1],
4244 h
->ref_count
[0], h
->ref_count
[1],
4246 h
->deblocking_filter
, h
->slice_alpha_c0_offset
/2, h
->slice_beta_offset
/2,
4248 h
->use_weight
==1 && h
->use_weight_chroma
? "c" : ""
4258 static inline int get_level_prefix(GetBitContext
*gb
){
4262 OPEN_READER(re
, gb
);
4263 UPDATE_CACHE(re
, gb
);
4264 buf
=GET_CACHE(re
, gb
);
4266 log
= 32 - av_log2(buf
);
4268 print_bin(buf
>>(32-log
), log
);
4269 av_log(NULL
, AV_LOG_DEBUG
, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf
>>(32-log
), log
, log
-1, get_bits_count(gb
), __FILE__
);
4272 LAST_SKIP_BITS(re
, gb
, log
);
4273 CLOSE_READER(re
, gb
);
4278 static inline int get_dct8x8_allowed(H264Context
*h
){
4281 if(!IS_SUB_8X8(h
->sub_mb_type
[i
])
4282 || (!h
->sps
.direct_8x8_inference_flag
&& IS_DIRECT(h
->sub_mb_type
[i
])))
4289 * decodes a residual block.
4290 * @param n block index
4291 * @param scantable scantable
4292 * @param max_coeff number of coefficients in the block
4293 * @return <0 if an error occurred
4295 static int decode_residual(H264Context
*h
, GetBitContext
*gb
, DCTELEM
*block
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
){
4296 MpegEncContext
* const s
= &h
->s
;
4297 static const int coeff_token_table_index
[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4299 int zeros_left
, coeff_num
, coeff_token
, total_coeff
, i
, j
, trailing_ones
, run_before
;
4301 //FIXME put trailing_onex into the context
4303 if(n
== CHROMA_DC_BLOCK_INDEX
){
4304 coeff_token
= get_vlc2(gb
, chroma_dc_coeff_token_vlc
.table
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 1);
4305 total_coeff
= coeff_token
>>2;
4307 if(n
== LUMA_DC_BLOCK_INDEX
){
4308 total_coeff
= pred_non_zero_count(h
, 0);
4309 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4310 total_coeff
= coeff_token
>>2;
4312 total_coeff
= pred_non_zero_count(h
, n
);
4313 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4314 total_coeff
= coeff_token
>>2;
4315 h
->non_zero_count_cache
[ scan8
[n
] ]= total_coeff
;
4319 //FIXME set last_non_zero?
4323 if(total_coeff
> (unsigned)max_coeff
) {
4324 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "corrupted macroblock %d %d (total_coeff=%d)\n", s
->mb_x
, s
->mb_y
, total_coeff
);
4328 trailing_ones
= coeff_token
&3;
4329 tprintf(h
->s
.avctx
, "trailing:%d, total:%d\n", trailing_ones
, total_coeff
);
4330 assert(total_coeff
<=16);
4332 for(i
=0; i
<trailing_ones
; i
++){
4333 level
[i
]= 1 - 2*get_bits1(gb
);
4337 int level_code
, mask
;
4338 int suffix_length
= total_coeff
> 10 && trailing_ones
< 3;
4339 int prefix
= get_level_prefix(gb
);
4341 //first coefficient has suffix_length equal to 0 or 1
4342 if(prefix
<14){ //FIXME try to build a large unified VLC table for all this
4344 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4346 level_code
= (prefix
<<suffix_length
); //part
4347 }else if(prefix
==14){
4349 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4351 level_code
= prefix
+ get_bits(gb
, 4); //part
4352 }else if(prefix
==15){
4353 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, 12); //part
4354 if(suffix_length
==0) level_code
+=15; //FIXME doesn't make (much)sense
4356 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "prefix too large at %d %d\n", s
->mb_x
, s
->mb_y
);
4360 if(trailing_ones
< 3) level_code
+= 2;
4365 mask
= -(level_code
&1);
4366 level
[i
]= (((2+level_code
)>>1) ^ mask
) - mask
;
4369 //remaining coefficients have suffix_length > 0
4370 for(;i
<total_coeff
;i
++) {
4371 static const int suffix_limit
[7] = {0,5,11,23,47,95,INT_MAX
};
4372 prefix
= get_level_prefix(gb
);
4374 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
);
4375 }else if(prefix
==15){
4376 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, 12);
4378 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "prefix too large at %d %d\n", s
->mb_x
, s
->mb_y
);
4381 mask
= -(level_code
&1);
4382 level
[i
]= (((2+level_code
)>>1) ^ mask
) - mask
;
4383 if(level_code
> suffix_limit
[suffix_length
])
4388 if(total_coeff
== max_coeff
)
4391 if(n
== CHROMA_DC_BLOCK_INDEX
)
4392 zeros_left
= get_vlc2(gb
, chroma_dc_total_zeros_vlc
[ total_coeff
-1 ].table
, CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 1);
4394 zeros_left
= get_vlc2(gb
, total_zeros_vlc
[ total_coeff
-1 ].table
, TOTAL_ZEROS_VLC_BITS
, 1);
4397 coeff_num
= zeros_left
+ total_coeff
- 1;
4398 j
= scantable
[coeff_num
];
4400 block
[j
] = level
[0];
4401 for(i
=1;i
<total_coeff
;i
++) {
4404 else if(zeros_left
< 7){
4405 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
4407 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
4409 zeros_left
-= run_before
;
4410 coeff_num
-= 1 + run_before
;
4411 j
= scantable
[ coeff_num
];
4416 block
[j
] = (level
[0] * qmul
[j
] + 32)>>6;
4417 for(i
=1;i
<total_coeff
;i
++) {
4420 else if(zeros_left
< 7){
4421 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
4423 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
4425 zeros_left
-= run_before
;
4426 coeff_num
-= 1 + run_before
;
4427 j
= scantable
[ coeff_num
];
4429 block
[j
]= (level
[i
] * qmul
[j
] + 32)>>6;
4434 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "negative number of zero coeffs at %d %d\n", s
->mb_x
, s
->mb_y
);
4441 static void predict_field_decoding_flag(H264Context
*h
){
4442 MpegEncContext
* const s
= &h
->s
;
4443 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
4444 int mb_type
= (h
->slice_table
[mb_xy
-1] == h
->slice_num
)
4445 ? s
->current_picture
.mb_type
[mb_xy
-1]
4446 : (h
->slice_table
[mb_xy
-s
->mb_stride
] == h
->slice_num
)
4447 ? s
->current_picture
.mb_type
[mb_xy
-s
->mb_stride
]
4449 h
->mb_mbaff
= h
->mb_field_decoding_flag
= IS_INTERLACED(mb_type
) ? 1 : 0;
4453 * decodes a P_SKIP or B_SKIP macroblock
4455 static void decode_mb_skip(H264Context
*h
){
4456 MpegEncContext
* const s
= &h
->s
;
4457 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
4460 memset(h
->non_zero_count
[mb_xy
], 0, 16);
4461 memset(h
->non_zero_count_cache
+ 8, 0, 8*5); //FIXME ugly, remove pfui
4464 mb_type
|= MB_TYPE_INTERLACED
;
4466 if( h
->slice_type
== FF_B_TYPE
)
4468 // just for fill_caches. pred_direct_motion will set the real mb_type
4469 mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
|MB_TYPE_SKIP
;
4471 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4472 pred_direct_motion(h
, &mb_type
);
4473 mb_type
|= MB_TYPE_SKIP
;
4478 mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P1L0
|MB_TYPE_SKIP
;
4480 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4481 pred_pskip_motion(h
, &mx
, &my
);
4482 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, 0, 1);
4483 fill_rectangle( h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mx
,my
), 4);
4486 write_back_motion(h
, mb_type
);
4487 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4488 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
4489 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4490 h
->prev_mb_skipped
= 1;
4494 * decodes a macroblock
4495 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4497 static int decode_mb_cavlc(H264Context
*h
){
4498 MpegEncContext
* const s
= &h
->s
;
4499 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
4500 int partition_count
;
4501 unsigned int mb_type
, cbp
;
4502 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
4504 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?
4506 tprintf(s
->avctx
, "pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
4507 cbp
= 0; /* avoid warning. FIXME: find a solution without slowing
4509 if(h
->slice_type
!= FF_I_TYPE
&& h
->slice_type
!= FF_SI_TYPE
){
4510 if(s
->mb_skip_run
==-1)
4511 s
->mb_skip_run
= get_ue_golomb(&s
->gb
);
4513 if (s
->mb_skip_run
--) {
4514 if(FRAME_MBAFF
&& (s
->mb_y
&1) == 0){
4515 if(s
->mb_skip_run
==0)
4516 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4518 predict_field_decoding_flag(h
);
4525 if( (s
->mb_y
&1) == 0 )
4526 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4528 h
->mb_field_decoding_flag
= (s
->picture_structure
!=PICT_FRAME
);
4530 h
->prev_mb_skipped
= 0;
4532 mb_type
= get_ue_golomb(&s
->gb
);
4533 if(h
->slice_type
== FF_B_TYPE
){
4535 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
4536 mb_type
= b_mb_type_info
[mb_type
].type
;
4539 goto decode_intra_mb
;
4541 }else if(h
->slice_type
== FF_P_TYPE
/*|| h->slice_type == FF_SP_TYPE */){
4543 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
4544 mb_type
= p_mb_type_info
[mb_type
].type
;
4547 goto decode_intra_mb
;
4550 assert(h
->slice_type
== FF_I_TYPE
);
4553 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_type %d in %c slice too large at %d %d\n", mb_type
, av_get_pict_type_char(h
->slice_type
), s
->mb_x
, s
->mb_y
);
4557 cbp
= i_mb_type_info
[mb_type
].cbp
;
4558 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
4559 mb_type
= i_mb_type_info
[mb_type
].type
;
4563 mb_type
|= MB_TYPE_INTERLACED
;
4565 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4567 if(IS_INTRA_PCM(mb_type
)){
4570 // We assume these blocks are very rare so we do not optimize it.
4571 align_get_bits(&s
->gb
);
4573 // The pixels are stored in the same order as levels in h->mb array.
4574 for(y
=0; y
<16; y
++){
4575 const int index
= 4*(y
&3) + 32*((y
>>2)&1) + 128*(y
>>3);
4576 for(x
=0; x
<16; x
++){
4577 tprintf(s
->avctx
, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
4578 h
->mb
[index
+ (x
&3) + 16*((x
>>2)&1) + 64*(x
>>3)]= get_bits(&s
->gb
, 8);
4582 const int index
= 256 + 4*(y
&3) + 32*(y
>>2);
4584 tprintf(s
->avctx
, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
4585 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= get_bits(&s
->gb
, 8);
4589 const int index
= 256 + 64 + 4*(y
&3) + 32*(y
>>2);
4591 tprintf(s
->avctx
, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
4592 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= get_bits(&s
->gb
, 8);
4596 // In deblocking, the quantizer is 0
4597 s
->current_picture
.qscale_table
[mb_xy
]= 0;
4598 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, 0);
4599 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, 0);
4600 // All coeffs are present
4601 memset(h
->non_zero_count
[mb_xy
], 16, 16);
4603 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4608 h
->ref_count
[0] <<= 1;
4609 h
->ref_count
[1] <<= 1;
4612 fill_caches(h
, mb_type
, 0);
4615 if(IS_INTRA(mb_type
)){
4617 // init_top_left_availability(h);
4618 if(IS_INTRA4x4(mb_type
)){
4621 if(dct8x8_allowed
&& get_bits1(&s
->gb
)){
4622 mb_type
|= MB_TYPE_8x8DCT
;
4626 // fill_intra4x4_pred_table(h);
4627 for(i
=0; i
<16; i
+=di
){
4628 int mode
= pred_intra_mode(h
, i
);
4630 if(!get_bits1(&s
->gb
)){
4631 const int rem_mode
= get_bits(&s
->gb
, 3);
4632 mode
= rem_mode
+ (rem_mode
>= mode
);
4636 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
4638 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = mode
;
4640 write_back_intra_pred_mode(h
);
4641 if( check_intra4x4_pred_mode(h
) < 0)
4644 h
->intra16x16_pred_mode
= check_intra_pred_mode(h
, h
->intra16x16_pred_mode
);
4645 if(h
->intra16x16_pred_mode
< 0)
4649 pred_mode
= check_intra_pred_mode(h
, get_ue_golomb(&s
->gb
));
4652 h
->chroma_pred_mode
= pred_mode
;
4653 }else if(partition_count
==4){
4654 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
4656 if(h
->slice_type
== FF_B_TYPE
){
4658 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
4659 if(h
->sub_mb_type
[i
] >=13){
4660 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "B sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4663 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4664 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4666 if( IS_DIRECT(h
->sub_mb_type
[0]) || IS_DIRECT(h
->sub_mb_type
[1])
4667 || IS_DIRECT(h
->sub_mb_type
[2]) || IS_DIRECT(h
->sub_mb_type
[3])) {
4668 pred_direct_motion(h
, &mb_type
);
4669 h
->ref_cache
[0][scan8
[4]] =
4670 h
->ref_cache
[1][scan8
[4]] =
4671 h
->ref_cache
[0][scan8
[12]] =
4672 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
4675 assert(h
->slice_type
== FF_P_TYPE
|| h
->slice_type
== FF_SP_TYPE
); //FIXME SP correct ?
4677 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
4678 if(h
->sub_mb_type
[i
] >=4){
4679 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "P sub_mb_type %u out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4682 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4683 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4687 for(list
=0; list
<h
->list_count
; list
++){
4688 int ref_count
= IS_REF0(mb_type
) ? 1 : h
->ref_count
[list
];
4690 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
4691 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4692 unsigned int tmp
= get_te0_golomb(&s
->gb
, ref_count
); //FIXME init to 0 before and skip?
4694 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", tmp
);
4706 dct8x8_allowed
= get_dct8x8_allowed(h
);
4708 for(list
=0; list
<h
->list_count
; list
++){
4710 if(IS_DIRECT(h
->sub_mb_type
[i
])) {
4711 h
->ref_cache
[list
][ scan8
[4*i
] ] = h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
4714 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
4715 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
4717 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4718 const int sub_mb_type
= h
->sub_mb_type
[i
];
4719 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
4720 for(j
=0; j
<sub_partition_count
[i
]; j
++){
4722 const int index
= 4*i
+ block_width
*j
;
4723 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
4724 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mx
, &my
);
4725 mx
+= get_se_golomb(&s
->gb
);
4726 my
+= get_se_golomb(&s
->gb
);
4727 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4729 if(IS_SUB_8X8(sub_mb_type
)){
4731 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
4733 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
4734 }else if(IS_SUB_8X4(sub_mb_type
)){
4735 mv_cache
[ 1 ][0]= mx
;
4736 mv_cache
[ 1 ][1]= my
;
4737 }else if(IS_SUB_4X8(sub_mb_type
)){
4738 mv_cache
[ 8 ][0]= mx
;
4739 mv_cache
[ 8 ][1]= my
;
4741 mv_cache
[ 0 ][0]= mx
;
4742 mv_cache
[ 0 ][1]= my
;
4745 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
4751 }else if(IS_DIRECT(mb_type
)){
4752 pred_direct_motion(h
, &mb_type
);
4753 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
4755 int list
, mx
, my
, i
;
4756 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4757 if(IS_16X16(mb_type
)){
4758 for(list
=0; list
<h
->list_count
; list
++){
4760 if(IS_DIR(mb_type
, 0, list
)){
4761 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4762 if(val
>= h
->ref_count
[list
]){
4763 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4767 val
= LIST_NOT_USED
&0xFF;
4768 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 1);
4770 for(list
=0; list
<h
->list_count
; list
++){
4772 if(IS_DIR(mb_type
, 0, list
)){
4773 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mx
, &my
);
4774 mx
+= get_se_golomb(&s
->gb
);
4775 my
+= get_se_golomb(&s
->gb
);
4776 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4778 val
= pack16to32(mx
,my
);
4781 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 4);
4784 else if(IS_16X8(mb_type
)){
4785 for(list
=0; list
<h
->list_count
; list
++){
4788 if(IS_DIR(mb_type
, i
, list
)){
4789 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4790 if(val
>= h
->ref_count
[list
]){
4791 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4795 val
= LIST_NOT_USED
&0xFF;
4796 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 1);
4799 for(list
=0; list
<h
->list_count
; list
++){
4802 if(IS_DIR(mb_type
, i
, list
)){
4803 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mx
, &my
);
4804 mx
+= get_se_golomb(&s
->gb
);
4805 my
+= get_se_golomb(&s
->gb
);
4806 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4808 val
= pack16to32(mx
,my
);
4811 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 4);
4815 assert(IS_8X16(mb_type
));
4816 for(list
=0; list
<h
->list_count
; list
++){
4819 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
4820 val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4821 if(val
>= h
->ref_count
[list
]){
4822 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "ref %u overflow\n", val
);
4826 val
= LIST_NOT_USED
&0xFF;
4827 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 1);
4830 for(list
=0; list
<h
->list_count
; list
++){
4833 if(IS_DIR(mb_type
, i
, list
)){
4834 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mx
, &my
);
4835 mx
+= get_se_golomb(&s
->gb
);
4836 my
+= get_se_golomb(&s
->gb
);
4837 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
4839 val
= pack16to32(mx
,my
);
4842 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 4);
4848 if(IS_INTER(mb_type
))
4849 write_back_motion(h
, mb_type
);
4851 if(!IS_INTRA16x16(mb_type
)){
4852 cbp
= get_ue_golomb(&s
->gb
);
4854 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cbp too large (%u) at %d %d\n", cbp
, s
->mb_x
, s
->mb_y
);
4858 if(IS_INTRA4x4(mb_type
))
4859 cbp
= golomb_to_intra4x4_cbp
[cbp
];
4861 cbp
= golomb_to_inter_cbp
[cbp
];
4865 if(dct8x8_allowed
&& (cbp
&15) && !IS_INTRA(mb_type
)){
4866 if(get_bits1(&s
->gb
))
4867 mb_type
|= MB_TYPE_8x8DCT
;
4869 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4871 if(cbp
|| IS_INTRA16x16(mb_type
)){
4872 int i8x8
, i4x4
, chroma_idx
;
4874 GetBitContext
*gb
= IS_INTRA(mb_type
) ? h
->intra_gb_ptr
: h
->inter_gb_ptr
;
4875 const uint8_t *scan
, *scan8x8
, *dc_scan
;
4877 // fill_non_zero_count_cache(h);
4879 if(IS_INTERLACED(mb_type
)){
4880 scan8x8
= s
->qscale
? h
->field_scan8x8_cavlc
: h
->field_scan8x8_cavlc_q0
;
4881 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
4882 dc_scan
= luma_dc_field_scan
;
4884 scan8x8
= s
->qscale
? h
->zigzag_scan8x8_cavlc
: h
->zigzag_scan8x8_cavlc_q0
;
4885 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
4886 dc_scan
= luma_dc_zigzag_scan
;
4889 dquant
= get_se_golomb(&s
->gb
);
4891 if( dquant
> 25 || dquant
< -26 ){
4892 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "dquant out of range (%d) at %d %d\n", dquant
, s
->mb_x
, s
->mb_y
);
4896 s
->qscale
+= dquant
;
4897 if(((unsigned)s
->qscale
) > 51){
4898 if(s
->qscale
<0) s
->qscale
+= 52;
4899 else s
->qscale
-= 52;
4902 h
->chroma_qp
[0]= get_chroma_qp(h
, 0, s
->qscale
);
4903 h
->chroma_qp
[1]= get_chroma_qp(h
, 1, s
->qscale
);
4904 if(IS_INTRA16x16(mb_type
)){
4905 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
, LUMA_DC_BLOCK_INDEX
, dc_scan
, h
->dequant4_coeff
[0][s
->qscale
], 16) < 0){
4906 return -1; //FIXME continue if partitioned and other return -1 too
4909 assert((cbp
&15) == 0 || (cbp
&15) == 15);
4912 for(i8x8
=0; i8x8
<4; i8x8
++){
4913 for(i4x4
=0; i4x4
<4; i4x4
++){
4914 const int index
= i4x4
+ 4*i8x8
;
4915 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
+ 16*index
, index
, scan
+ 1, h
->dequant4_coeff
[0][s
->qscale
], 15) < 0 ){
4921 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
4924 for(i8x8
=0; i8x8
<4; i8x8
++){
4925 if(cbp
& (1<<i8x8
)){
4926 if(IS_8x8DCT(mb_type
)){
4927 DCTELEM
*buf
= &h
->mb
[64*i8x8
];
4929 for(i4x4
=0; i4x4
<4; i4x4
++){
4930 if( decode_residual(h
, gb
, buf
, i4x4
+4*i8x8
, scan8x8
+16*i4x4
,
4931 h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 16) <0 )
4934 nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
4935 nnz
[0] += nnz
[1] + nnz
[8] + nnz
[9];
4937 for(i4x4
=0; i4x4
<4; i4x4
++){
4938 const int index
= i4x4
+ 4*i8x8
;
4940 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
, h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
], 16) <0 ){
4946 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
4947 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
4953 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++)
4954 if( decode_residual(h
, gb
, h
->mb
+ 256 + 16*4*chroma_idx
, CHROMA_DC_BLOCK_INDEX
, chroma_dc_scan
, NULL
, 4) < 0){
4960 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++){
4961 const uint32_t *qmul
= h
->dequant4_coeff
[chroma_idx
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
[chroma_idx
]];
4962 for(i4x4
=0; i4x4
<4; i4x4
++){
4963 const int index
= 16 + 4*chroma_idx
+ i4x4
;
4964 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
+ 1, qmul
, 15) < 0){
4970 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
4971 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
4972 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
4975 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
4976 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
4977 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
4978 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
4980 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
4981 write_back_non_zero_count(h
);
4984 h
->ref_count
[0] >>= 1;
4985 h
->ref_count
[1] >>= 1;
4991 static int decode_cabac_field_decoding_flag(H264Context
*h
) {
4992 MpegEncContext
* const s
= &h
->s
;
4993 const int mb_x
= s
->mb_x
;
4994 const int mb_y
= s
->mb_y
& ~1;
4995 const int mba_xy
= mb_x
- 1 + mb_y
*s
->mb_stride
;
4996 const int mbb_xy
= mb_x
+ (mb_y
-2)*s
->mb_stride
;
4998 unsigned int ctx
= 0;
5000 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) ) {
5003 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) ) {
5007 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[70 + ctx
] );
5010 static int decode_cabac_intra_mb_type(H264Context
*h
, int ctx_base
, int intra_slice
) {
5011 uint8_t *state
= &h
->cabac_state
[ctx_base
];
5015 MpegEncContext
* const s
= &h
->s
;
5016 const int mba_xy
= h
->left_mb_xy
[0];
5017 const int mbb_xy
= h
->top_mb_xy
;
5019 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mba_xy
] ) )
5021 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mbb_xy
] ) )
5023 if( get_cabac_noinline( &h
->cabac
, &state
[ctx
] ) == 0 )
5024 return 0; /* I4x4 */
5027 if( get_cabac_noinline( &h
->cabac
, &state
[0] ) == 0 )
5028 return 0; /* I4x4 */
5031 if( get_cabac_terminate( &h
->cabac
) )
5032 return 25; /* PCM */
5034 mb_type
= 1; /* I16x16 */
5035 mb_type
+= 12 * get_cabac_noinline( &h
->cabac
, &state
[1] ); /* cbp_luma != 0 */
5036 if( get_cabac_noinline( &h
->cabac
, &state
[2] ) ) /* cbp_chroma */
5037 mb_type
+= 4 + 4 * get_cabac_noinline( &h
->cabac
, &state
[2+intra_slice
] );
5038 mb_type
+= 2 * get_cabac_noinline( &h
->cabac
, &state
[3+intra_slice
] );
5039 mb_type
+= 1 * get_cabac_noinline( &h
->cabac
, &state
[3+2*intra_slice
] );
5043 static int decode_cabac_mb_type( H264Context
*h
) {
5044 MpegEncContext
* const s
= &h
->s
;
5046 if( h
->slice_type
== FF_I_TYPE
) {
5047 return decode_cabac_intra_mb_type(h
, 3, 1);
5048 } else if( h
->slice_type
== FF_P_TYPE
) {
5049 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[14] ) == 0 ) {
5051 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[15] ) == 0 ) {
5052 /* P_L0_D16x16, P_8x8 */
5053 return 3 * get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[16] );
5055 /* P_L0_D8x16, P_L0_D16x8 */
5056 return 2 - get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[17] );
5059 return decode_cabac_intra_mb_type(h
, 17, 0) + 5;
5061 } else if( h
->slice_type
== FF_B_TYPE
) {
5062 const int mba_xy
= h
->left_mb_xy
[0];
5063 const int mbb_xy
= h
->top_mb_xy
;
5067 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mba_xy
] ) )
5069 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mbb_xy
] ) )
5072 if( !get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+ctx
] ) )
5073 return 0; /* B_Direct_16x16 */
5075 if( !get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+3] ) ) {
5076 return 1 + get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ); /* B_L[01]_16x16 */
5079 bits
= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+4] ) << 3;
5080 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ) << 2;
5081 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] ) << 1;
5082 bits
|= get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] );
5084 return bits
+ 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5085 else if( bits
== 13 ) {
5086 return decode_cabac_intra_mb_type(h
, 32, 0) + 23;
5087 } else if( bits
== 14 )
5088 return 11; /* B_L1_L0_8x16 */
5089 else if( bits
== 15 )
5090 return 22; /* B_8x8 */
5092 bits
= ( bits
<<1 ) | get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[27+5] );
5093 return bits
- 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5095 /* TODO SI/SP frames? */
5100 static int decode_cabac_mb_skip( H264Context
*h
, int mb_x
, int mb_y
) {
5101 MpegEncContext
* const s
= &h
->s
;
5105 if(FRAME_MBAFF
){ //FIXME merge with the stuff in fill_caches?
5106 int mb_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
5109 && h
->slice_table
[mba_xy
] == h
->slice_num
5110 && MB_FIELD
== !!IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) )
5111 mba_xy
+= s
->mb_stride
;
5113 mbb_xy
= mb_xy
- s
->mb_stride
;
5115 && h
->slice_table
[mbb_xy
] == h
->slice_num
5116 && IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) )
5117 mbb_xy
-= s
->mb_stride
;
5119 mbb_xy
= mb_x
+ (mb_y
-1)*s
->mb_stride
;
5121 int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
5123 mbb_xy
= mb_xy
- (s
->mb_stride
<< FIELD_PICTURE
);
5126 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mba_xy
] ))
5128 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mbb_xy
] ))
5131 if( h
->slice_type
== FF_B_TYPE
)
5133 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[11+ctx
] );
5136 static int decode_cabac_mb_intra4x4_pred_mode( H264Context
*h
, int pred_mode
) {
5139 if( get_cabac( &h
->cabac
, &h
->cabac_state
[68] ) )
5142 mode
+= 1 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5143 mode
+= 2 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5144 mode
+= 4 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5146 if( mode
>= pred_mode
)
5152 static int decode_cabac_mb_chroma_pre_mode( H264Context
*h
) {
5153 const int mba_xy
= h
->left_mb_xy
[0];
5154 const int mbb_xy
= h
->top_mb_xy
;
5158 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5159 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mba_xy
] != 0 )
5162 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mbb_xy
] != 0 )
5165 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+ctx
] ) == 0 )
5168 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5170 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5176 static int decode_cabac_mb_cbp_luma( H264Context
*h
) {
5177 int cbp_b
, cbp_a
, ctx
, cbp
= 0;
5179 cbp_a
= h
->slice_table
[h
->left_mb_xy
[0]] == h
->slice_num
? h
->left_cbp
: -1;
5180 cbp_b
= h
->slice_table
[h
->top_mb_xy
] == h
->slice_num
? h
->top_cbp
: -1;
5182 ctx
= !(cbp_a
& 0x02) + 2 * !(cbp_b
& 0x04);
5183 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]);
5184 ctx
= !(cbp
& 0x01) + 2 * !(cbp_b
& 0x08);
5185 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 1;
5186 ctx
= !(cbp_a
& 0x08) + 2 * !(cbp
& 0x01);
5187 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 2;
5188 ctx
= !(cbp
& 0x04) + 2 * !(cbp
& 0x02);
5189 cbp
|= get_cabac_noinline(&h
->cabac
, &h
->cabac_state
[73 + ctx
]) << 3;
5192 static int decode_cabac_mb_cbp_chroma( H264Context
*h
) {
5196 cbp_a
= (h
->left_cbp
>>4)&0x03;
5197 cbp_b
= (h
-> top_cbp
>>4)&0x03;
5200 if( cbp_a
> 0 ) ctx
++;
5201 if( cbp_b
> 0 ) ctx
+= 2;
5202 if( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[77 + ctx
] ) == 0 )
5206 if( cbp_a
== 2 ) ctx
++;
5207 if( cbp_b
== 2 ) ctx
+= 2;
5208 return 1 + get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[77 + ctx
] );
5210 static int decode_cabac_mb_dqp( H264Context
*h
) {
5214 if( h
->last_qscale_diff
!= 0 )
5217 while( get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[60 + ctx
] ) ) {
5223 if(val
> 102) //prevent infinite loop
5230 return -(val
+ 1)/2;
5232 static int decode_cabac_p_mb_sub_type( H264Context
*h
) {
5233 if( get_cabac( &h
->cabac
, &h
->cabac_state
[21] ) )
5235 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[22] ) )
5237 if( get_cabac( &h
->cabac
, &h
->cabac_state
[23] ) )
5241 static int decode_cabac_b_mb_sub_type( H264Context
*h
) {
5243 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[36] ) )
5244 return 0; /* B_Direct_8x8 */
5245 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[37] ) )
5246 return 1 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L0_8x8, B_L1_8x8 */
5248 if( get_cabac( &h
->cabac
, &h
->cabac_state
[38] ) ) {
5249 if( get_cabac( &h
->cabac
, &h
->cabac_state
[39] ) )
5250 return 11 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L1_4x4, B_Bi_4x4 */
5253 type
+= 2*get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5254 type
+= get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5258 static inline int decode_cabac_mb_transform_size( H264Context
*h
) {
5259 return get_cabac_noinline( &h
->cabac
, &h
->cabac_state
[399 + h
->neighbor_transform_size
] );
5262 static int decode_cabac_mb_ref( H264Context
*h
, int list
, int n
) {
5263 int refa
= h
->ref_cache
[list
][scan8
[n
] - 1];
5264 int refb
= h
->ref_cache
[list
][scan8
[n
] - 8];
5268 if( h
->slice_type
== FF_B_TYPE
) {
5269 if( refa
> 0 && !h
->direct_cache
[scan8
[n
] - 1] )
5271 if( refb
> 0 && !h
->direct_cache
[scan8
[n
] - 8] )
5280 while( get_cabac( &h
->cabac
, &h
->cabac_state
[54+ctx
] ) ) {
5286 if(ref
>= 32 /*h->ref_list[list]*/){
5287 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "overflow in decode_cabac_mb_ref\n");
5288 return 0; //FIXME we should return -1 and check the return everywhere
5294 static int decode_cabac_mb_mvd( H264Context
*h
, int list
, int n
, int l
) {
5295 int amvd
= abs( h
->mvd_cache
[list
][scan8
[n
] - 1][l
] ) +
5296 abs( h
->mvd_cache
[list
][scan8
[n
] - 8][l
] );
5297 int ctxbase
= (l
== 0) ? 40 : 47;
5302 else if( amvd
> 32 )
5307 if(!get_cabac(&h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
]))
5312 while( mvd
< 9 && get_cabac( &h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
] ) ) {
5320 while( get_cabac_bypass( &h
->cabac
) ) {
5324 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "overflow in decode_cabac_mb_mvd\n");
5329 if( get_cabac_bypass( &h
->cabac
) )
5333 return get_cabac_bypass_sign( &h
->cabac
, -mvd
);
5336 static inline int get_cabac_cbf_ctx( H264Context
*h
, int cat
, int idx
) {
5341 nza
= h
->left_cbp
&0x100;
5342 nzb
= h
-> top_cbp
&0x100;
5343 } else if( cat
== 1 || cat
== 2 ) {
5344 nza
= h
->non_zero_count_cache
[scan8
[idx
] - 1];
5345 nzb
= h
->non_zero_count_cache
[scan8
[idx
] - 8];
5346 } else if( cat
== 3 ) {
5347 nza
= (h
->left_cbp
>>(6+idx
))&0x01;
5348 nzb
= (h
-> top_cbp
>>(6+idx
))&0x01;
5351 nza
= h
->non_zero_count_cache
[scan8
[16+idx
] - 1];
5352 nzb
= h
->non_zero_count_cache
[scan8
[16+idx
] - 8];
5361 return ctx
+ 4 * cat
;
5364 DECLARE_ASM_CONST(1, const uint8_t, last_coeff_flag_offset_8x8
[63]) = {
5365 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5366 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5367 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5368 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5371 static void decode_cabac_residual( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
5372 const int mb_xy
= h
->s
.mb_x
+ h
->s
.mb_y
*h
->s
.mb_stride
;
5373 static const int significant_coeff_flag_offset
[2][6] = {
5374 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5375 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5377 static const int last_coeff_flag_offset
[2][6] = {
5378 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5379 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5381 static const int coeff_abs_level_m1_offset
[6] = {
5382 227+0, 227+10, 227+20, 227+30, 227+39, 426
5384 static const uint8_t significant_coeff_flag_offset_8x8
[2][63] = {
5385 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5386 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5387 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5388 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5389 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5390 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5391 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5392 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5398 int coeff_count
= 0;
5401 int abslevelgt1
= 0;
5403 uint8_t *significant_coeff_ctx_base
;
5404 uint8_t *last_coeff_ctx_base
;
5405 uint8_t *abs_level_m1_ctx_base
;
5408 #define CABAC_ON_STACK
5410 #ifdef CABAC_ON_STACK
5413 cc
.range
= h
->cabac
.range
;
5414 cc
.low
= h
->cabac
.low
;
5415 cc
.bytestream
= h
->cabac
.bytestream
;
5417 #define CC &h->cabac
5421 /* cat: 0-> DC 16x16 n = 0
5422 * 1-> AC 16x16 n = luma4x4idx
5423 * 2-> Luma4x4 n = luma4x4idx
5424 * 3-> DC Chroma n = iCbCr
5425 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5426 * 5-> Luma8x8 n = 4 * luma8x8idx
5429 /* read coded block flag */
5431 if( get_cabac( CC
, &h
->cabac_state
[85 + get_cabac_cbf_ctx( h
, cat
, n
) ] ) == 0 ) {
5432 if( cat
== 1 || cat
== 2 )
5433 h
->non_zero_count_cache
[scan8
[n
]] = 0;
5435 h
->non_zero_count_cache
[scan8
[16+n
]] = 0;
5436 #ifdef CABAC_ON_STACK
5437 h
->cabac
.range
= cc
.range
;
5438 h
->cabac
.low
= cc
.low
;
5439 h
->cabac
.bytestream
= cc
.bytestream
;
5445 significant_coeff_ctx_base
= h
->cabac_state
5446 + significant_coeff_flag_offset
[MB_FIELD
][cat
];
5447 last_coeff_ctx_base
= h
->cabac_state
5448 + last_coeff_flag_offset
[MB_FIELD
][cat
];
5449 abs_level_m1_ctx_base
= h
->cabac_state
5450 + coeff_abs_level_m1_offset
[cat
];
5453 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5454 for(last= 0; last < coefs; last++) { \
5455 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5456 if( get_cabac( CC, sig_ctx )) { \
5457 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5458 index[coeff_count++] = last; \
5459 if( get_cabac( CC, last_ctx ) ) { \
5465 if( last == max_coeff -1 ) {\
5466 index[coeff_count++] = last;\
5468 const uint8_t *sig_off
= significant_coeff_flag_offset_8x8
[MB_FIELD
];
5469 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5470 coeff_count
= decode_significance_8x8_x86(CC
, significant_coeff_ctx_base
, index
, sig_off
);
5472 coeff_count
= decode_significance_x86(CC
, max_coeff
, significant_coeff_ctx_base
, index
);
5474 DECODE_SIGNIFICANCE( 63, sig_off
[last
], last_coeff_flag_offset_8x8
[last
] );
5476 DECODE_SIGNIFICANCE( max_coeff
- 1, last
, last
);
5479 assert(coeff_count
> 0);
5482 h
->cbp_table
[mb_xy
] |= 0x100;
5483 else if( cat
== 1 || cat
== 2 )
5484 h
->non_zero_count_cache
[scan8
[n
]] = coeff_count
;
5486 h
->cbp_table
[mb_xy
] |= 0x40 << n
;
5488 h
->non_zero_count_cache
[scan8
[16+n
]] = coeff_count
;
5491 fill_rectangle(&h
->non_zero_count_cache
[scan8
[n
]], 2, 2, 8, coeff_count
, 1);
5494 for( coeff_count
--; coeff_count
>= 0; coeff_count
-- ) {
5495 uint8_t *ctx
= (abslevelgt1
!= 0 ? 0 : FFMIN( 4, abslevel1
)) + abs_level_m1_ctx_base
;
5496 int j
= scantable
[index
[coeff_count
]];
5498 if( get_cabac( CC
, ctx
) == 0 ) {
5500 block
[j
] = get_cabac_bypass_sign( CC
, -1);
5502 block
[j
] = (get_cabac_bypass_sign( CC
, -qmul
[j
]) + 32) >> 6;
5508 ctx
= 5 + FFMIN( 4, abslevelgt1
) + abs_level_m1_ctx_base
;
5509 while( coeff_abs
< 15 && get_cabac( CC
, ctx
) ) {
5513 if( coeff_abs
>= 15 ) {
5515 while( get_cabac_bypass( CC
) ) {
5521 coeff_abs
+= coeff_abs
+ get_cabac_bypass( CC
);
5527 if( get_cabac_bypass( CC
) ) block
[j
] = -coeff_abs
;
5528 else block
[j
] = coeff_abs
;
5530 if( get_cabac_bypass( CC
) ) block
[j
] = (-coeff_abs
* qmul
[j
] + 32) >> 6;
5531 else block
[j
] = ( coeff_abs
* qmul
[j
] + 32) >> 6;
5537 #ifdef CABAC_ON_STACK
5538 h
->cabac
.range
= cc
.range
;
5539 h
->cabac
.low
= cc
.low
;
5540 h
->cabac
.bytestream
= cc
.bytestream
;
5545 static inline void compute_mb_neighbors(H264Context
*h
)
5547 MpegEncContext
* const s
= &h
->s
;
5548 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5549 h
->top_mb_xy
= mb_xy
- s
->mb_stride
;
5550 h
->left_mb_xy
[0] = mb_xy
- 1;
5552 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
5553 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
5554 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
5555 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
5556 const int curr_mb_frame_flag
= !MB_FIELD
;
5557 const int bottom
= (s
->mb_y
& 1);
5559 ? !curr_mb_frame_flag
// bottom macroblock
5560 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
5562 h
->top_mb_xy
-= s
->mb_stride
;
5564 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
5565 h
->left_mb_xy
[0] = pair_xy
- 1;
5567 } else if (FIELD_PICTURE
) {
5568 h
->top_mb_xy
-= s
->mb_stride
;
5574 * decodes a macroblock
5575 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5577 static int decode_mb_cabac(H264Context
*h
) {
5578 MpegEncContext
* const s
= &h
->s
;
5579 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5580 int mb_type
, partition_count
, cbp
= 0;
5581 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
5583 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?)
5585 tprintf(s
->avctx
, "pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
5586 if( h
->slice_type
!= FF_I_TYPE
&& h
->slice_type
!= FF_SI_TYPE
) {
5588 /* a skipped mb needs the aff flag from the following mb */
5589 if( FRAME_MBAFF
&& s
->mb_x
==0 && (s
->mb_y
&1)==0 )
5590 predict_field_decoding_flag(h
);
5591 if( FRAME_MBAFF
&& (s
->mb_y
&1)==1 && h
->prev_mb_skipped
)
5592 skip
= h
->next_mb_skipped
;
5594 skip
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
);
5595 /* read skip flags */
5597 if( FRAME_MBAFF
&& (s
->mb_y
&1)==0 ){
5598 s
->current_picture
.mb_type
[mb_xy
] = MB_TYPE_SKIP
;
5599 h
->next_mb_skipped
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
+1 );
5600 if(h
->next_mb_skipped
)
5601 predict_field_decoding_flag(h
);
5603 h
->mb_mbaff
= h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
5608 h
->cbp_table
[mb_xy
] = 0;
5609 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5610 h
->last_qscale_diff
= 0;
5617 if( (s
->mb_y
&1) == 0 )
5619 h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
5621 h
->mb_field_decoding_flag
= (s
->picture_structure
!=PICT_FRAME
);
5623 h
->prev_mb_skipped
= 0;
5625 compute_mb_neighbors(h
);
5626 if( ( mb_type
= decode_cabac_mb_type( h
) ) < 0 ) {
5627 av_log( h
->s
.avctx
, AV_LOG_ERROR
, "decode_cabac_mb_type failed\n" );
5631 if( h
->slice_type
== FF_B_TYPE
) {
5633 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
5634 mb_type
= b_mb_type_info
[mb_type
].type
;
5637 goto decode_intra_mb
;
5639 } else if( h
->slice_type
== FF_P_TYPE
) {
5641 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
5642 mb_type
= p_mb_type_info
[mb_type
].type
;
5645 goto decode_intra_mb
;
5648 assert(h
->slice_type
== FF_I_TYPE
);
5650 partition_count
= 0;
5651 cbp
= i_mb_type_info
[mb_type
].cbp
;
5652 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
5653 mb_type
= i_mb_type_info
[mb_type
].type
;
5656 mb_type
|= MB_TYPE_INTERLACED
;
5658 h
->slice_table
[ mb_xy
]= h
->slice_num
;
5660 if(IS_INTRA_PCM(mb_type
)) {
5664 // We assume these blocks are very rare so we do not optimize it.
5665 // FIXME The two following lines get the bitstream position in the cabac
5666 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5667 ptr
= h
->cabac
.bytestream
;
5668 if(h
->cabac
.low
&0x1) ptr
--;
5670 if(h
->cabac
.low
&0x1FF) ptr
--;
5673 // The pixels are stored in the same order as levels in h->mb array.
5674 for(y
=0; y
<16; y
++){
5675 const int index
= 4*(y
&3) + 32*((y
>>2)&1) + 128*(y
>>3);
5676 for(x
=0; x
<16; x
++){
5677 tprintf(s
->avctx
, "LUMA ICPM LEVEL (%3d)\n", *ptr
);
5678 h
->mb
[index
+ (x
&3) + 16*((x
>>2)&1) + 64*(x
>>3)]= *ptr
++;
5682 const int index
= 256 + 4*(y
&3) + 32*(y
>>2);
5684 tprintf(s
->avctx
, "CHROMA U ICPM LEVEL (%3d)\n", *ptr
);
5685 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= *ptr
++;
5689 const int index
= 256 + 64 + 4*(y
&3) + 32*(y
>>2);
5691 tprintf(s
->avctx
, "CHROMA V ICPM LEVEL (%3d)\n", *ptr
);
5692 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= *ptr
++;
5696 ff_init_cabac_decoder(&h
->cabac
, ptr
, h
->cabac
.bytestream_end
- ptr
);
5698 // All blocks are present
5699 h
->cbp_table
[mb_xy
] = 0x1ef;
5700 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5701 // In deblocking, the quantizer is 0
5702 s
->current_picture
.qscale_table
[mb_xy
]= 0;
5703 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, 0);
5704 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, 0);
5705 // All coeffs are present
5706 memset(h
->non_zero_count
[mb_xy
], 16, 16);
5707 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5712 h
->ref_count
[0] <<= 1;
5713 h
->ref_count
[1] <<= 1;
5716 fill_caches(h
, mb_type
, 0);
5718 if( IS_INTRA( mb_type
) ) {
5720 if( IS_INTRA4x4( mb_type
) ) {
5721 if( dct8x8_allowed
&& decode_cabac_mb_transform_size( h
) ) {
5722 mb_type
|= MB_TYPE_8x8DCT
;
5723 for( i
= 0; i
< 16; i
+=4 ) {
5724 int pred
= pred_intra_mode( h
, i
);
5725 int mode
= decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5726 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
5729 for( i
= 0; i
< 16; i
++ ) {
5730 int pred
= pred_intra_mode( h
, i
);
5731 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5733 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5736 write_back_intra_pred_mode(h
);
5737 if( check_intra4x4_pred_mode(h
) < 0 ) return -1;
5739 h
->intra16x16_pred_mode
= check_intra_pred_mode( h
, h
->intra16x16_pred_mode
);
5740 if( h
->intra16x16_pred_mode
< 0 ) return -1;
5742 h
->chroma_pred_mode_table
[mb_xy
] =
5743 pred_mode
= decode_cabac_mb_chroma_pre_mode( h
);
5745 pred_mode
= check_intra_pred_mode( h
, pred_mode
);
5746 if( pred_mode
< 0 ) return -1;
5747 h
->chroma_pred_mode
= pred_mode
;
5748 } else if( partition_count
== 4 ) {
5749 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
5751 if( h
->slice_type
== FF_B_TYPE
) {
5752 for( i
= 0; i
< 4; i
++ ) {
5753 h
->sub_mb_type
[i
] = decode_cabac_b_mb_sub_type( h
);
5754 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5755 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5757 if( IS_DIRECT(h
->sub_mb_type
[0] | h
->sub_mb_type
[1] |
5758 h
->sub_mb_type
[2] | h
->sub_mb_type
[3]) ) {
5759 pred_direct_motion(h
, &mb_type
);
5760 h
->ref_cache
[0][scan8
[4]] =
5761 h
->ref_cache
[1][scan8
[4]] =
5762 h
->ref_cache
[0][scan8
[12]] =
5763 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
5764 if( h
->ref_count
[0] > 1 || h
->ref_count
[1] > 1 ) {
5765 for( i
= 0; i
< 4; i
++ )
5766 if( IS_DIRECT(h
->sub_mb_type
[i
]) )
5767 fill_rectangle( &h
->direct_cache
[scan8
[4*i
]], 2, 2, 8, 1, 1 );
5771 for( i
= 0; i
< 4; i
++ ) {
5772 h
->sub_mb_type
[i
] = decode_cabac_p_mb_sub_type( h
);
5773 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5774 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5778 for( list
= 0; list
< h
->list_count
; list
++ ) {
5779 for( i
= 0; i
< 4; i
++ ) {
5780 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
5781 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
5782 if( h
->ref_count
[list
] > 1 )
5783 ref
[list
][i
] = decode_cabac_mb_ref( h
, list
, 4*i
);
5789 h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
5790 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
5795 dct8x8_allowed
= get_dct8x8_allowed(h
);
5797 for(list
=0; list
<h
->list_count
; list
++){
5799 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
5800 if(IS_DIRECT(h
->sub_mb_type
[i
])){
5801 fill_rectangle(h
->mvd_cache
[list
][scan8
[4*i
]], 2, 2, 8, 0, 4);
5805 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
) && !IS_DIRECT(h
->sub_mb_type
[i
])){
5806 const int sub_mb_type
= h
->sub_mb_type
[i
];
5807 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
5808 for(j
=0; j
<sub_partition_count
[i
]; j
++){
5811 const int index
= 4*i
+ block_width
*j
;
5812 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
5813 int16_t (* mvd_cache
)[2]= &h
->mvd_cache
[list
][ scan8
[index
] ];
5814 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mpx
, &mpy
);
5816 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, index
, 0 );
5817 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, index
, 1 );
5818 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5820 if(IS_SUB_8X8(sub_mb_type
)){
5822 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
5824 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
5827 mvd_cache
[ 8 ][0]= mvd_cache
[ 9 ][0]= mx
- mpx
;
5829 mvd_cache
[ 8 ][1]= mvd_cache
[ 9 ][1]= my
- mpy
;
5830 }else if(IS_SUB_8X4(sub_mb_type
)){
5831 mv_cache
[ 1 ][0]= mx
;
5832 mv_cache
[ 1 ][1]= my
;
5834 mvd_cache
[ 1 ][0]= mx
- mpx
;
5835 mvd_cache
[ 1 ][1]= my
- mpy
;
5836 }else if(IS_SUB_4X8(sub_mb_type
)){
5837 mv_cache
[ 8 ][0]= mx
;
5838 mv_cache
[ 8 ][1]= my
;
5840 mvd_cache
[ 8 ][0]= mx
- mpx
;
5841 mvd_cache
[ 8 ][1]= my
- mpy
;
5843 mv_cache
[ 0 ][0]= mx
;
5844 mv_cache
[ 0 ][1]= my
;
5846 mvd_cache
[ 0 ][0]= mx
- mpx
;
5847 mvd_cache
[ 0 ][1]= my
- mpy
;
5850 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
5851 uint32_t *pd
= (uint32_t *)&h
->mvd_cache
[list
][ scan8
[4*i
] ][0];
5852 p
[0] = p
[1] = p
[8] = p
[9] = 0;
5853 pd
[0]= pd
[1]= pd
[8]= pd
[9]= 0;
5857 } else if( IS_DIRECT(mb_type
) ) {
5858 pred_direct_motion(h
, &mb_type
);
5859 fill_rectangle(h
->mvd_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
5860 fill_rectangle(h
->mvd_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
5861 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
5863 int list
, mx
, my
, i
, mpx
, mpy
;
5864 if(IS_16X16(mb_type
)){
5865 for(list
=0; list
<h
->list_count
; list
++){
5866 if(IS_DIR(mb_type
, 0, list
)){
5867 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 0 ) : 0;
5868 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, ref
, 1);
5870 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED
, 1); //FIXME factorize and the other fill_rect below too
5872 for(list
=0; list
<h
->list_count
; list
++){
5873 if(IS_DIR(mb_type
, 0, list
)){
5874 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mpx
, &mpy
);
5876 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 0, 0 );
5877 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 0, 1 );
5878 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5880 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5881 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
,my
), 4);
5883 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, 0, 4);
5886 else if(IS_16X8(mb_type
)){
5887 for(list
=0; list
<h
->list_count
; list
++){
5889 if(IS_DIR(mb_type
, i
, list
)){
5890 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 8*i
) : 0;
5891 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, ref
, 1);
5893 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, (LIST_NOT_USED
&0xFF), 1);
5896 for(list
=0; list
<h
->list_count
; list
++){
5898 if(IS_DIR(mb_type
, i
, list
)){
5899 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mpx
, &mpy
);
5900 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 0 );
5901 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 1 );
5902 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5904 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5905 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
,my
), 4);
5907 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5908 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5913 assert(IS_8X16(mb_type
));
5914 for(list
=0; list
<h
->list_count
; list
++){
5916 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
5917 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 4*i
) : 0;
5918 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, ref
, 1);
5920 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, (LIST_NOT_USED
&0xFF), 1);
5923 for(list
=0; list
<h
->list_count
; list
++){
5925 if(IS_DIR(mb_type
, i
, list
)){
5926 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mpx
, &mpy
);
5927 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 0 );
5928 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 1 );
5930 tprintf(s
->avctx
, "final mv:%d %d\n", mx
, my
);
5931 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5932 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
,my
), 4);
5934 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5935 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5942 if( IS_INTER( mb_type
) ) {
5943 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5944 write_back_motion( h
, mb_type
);
5947 if( !IS_INTRA16x16( mb_type
) ) {
5948 cbp
= decode_cabac_mb_cbp_luma( h
);
5949 cbp
|= decode_cabac_mb_cbp_chroma( h
) << 4;
5952 h
->cbp_table
[mb_xy
] = h
->cbp
= cbp
;
5954 if( dct8x8_allowed
&& (cbp
&15) && !IS_INTRA( mb_type
) ) {
5955 if( decode_cabac_mb_transform_size( h
) )
5956 mb_type
|= MB_TYPE_8x8DCT
;
5958 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5960 if( cbp
|| IS_INTRA16x16( mb_type
) ) {
5961 const uint8_t *scan
, *scan8x8
, *dc_scan
;
5962 const uint32_t *qmul
;
5965 if(IS_INTERLACED(mb_type
)){
5966 scan8x8
= s
->qscale
? h
->field_scan8x8
: h
->field_scan8x8_q0
;
5967 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
5968 dc_scan
= luma_dc_field_scan
;
5970 scan8x8
= s
->qscale
? h
->zigzag_scan8x8
: h
->zigzag_scan8x8_q0
;
5971 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
5972 dc_scan
= luma_dc_zigzag_scan
;
5975 h
->last_qscale_diff
= dqp
= decode_cabac_mb_dqp( h
);
5976 if( dqp
== INT_MIN
){
5977 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cabac decode of qscale diff failed at %d %d\n", s
->mb_x
, s
->mb_y
);
5981 if(((unsigned)s
->qscale
) > 51){
5982 if(s
->qscale
<0) s
->qscale
+= 52;
5983 else s
->qscale
-= 52;
5985 h
->chroma_qp
[0] = get_chroma_qp(h
, 0, s
->qscale
);
5986 h
->chroma_qp
[1] = get_chroma_qp(h
, 1, s
->qscale
);
5988 if( IS_INTRA16x16( mb_type
) ) {
5990 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5991 decode_cabac_residual( h
, h
->mb
, 0, 0, dc_scan
, NULL
, 16);
5994 qmul
= h
->dequant4_coeff
[0][s
->qscale
];
5995 for( i
= 0; i
< 16; i
++ ) {
5996 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5997 decode_cabac_residual(h
, h
->mb
+ 16*i
, 1, i
, scan
+ 1, qmul
, 15);
6000 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
6004 for( i8x8
= 0; i8x8
< 4; i8x8
++ ) {
6005 if( cbp
& (1<<i8x8
) ) {
6006 if( IS_8x8DCT(mb_type
) ) {
6007 decode_cabac_residual(h
, h
->mb
+ 64*i8x8
, 5, 4*i8x8
,
6008 scan8x8
, h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 64);
6010 qmul
= h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
];
6011 for( i4x4
= 0; i4x4
< 4; i4x4
++ ) {
6012 const int index
= 4*i8x8
+ i4x4
;
6013 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6015 decode_cabac_residual(h
, h
->mb
+ 16*index
, 2, index
, scan
, qmul
, 16);
6016 //STOP_TIMER("decode_residual")
6020 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
6021 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
6028 for( c
= 0; c
< 2; c
++ ) {
6029 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6030 decode_cabac_residual(h
, h
->mb
+ 256 + 16*4*c
, 3, c
, chroma_dc_scan
, NULL
, 4);
6036 for( c
= 0; c
< 2; c
++ ) {
6037 qmul
= h
->dequant4_coeff
[c
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
[c
]];
6038 for( i
= 0; i
< 4; i
++ ) {
6039 const int index
= 16 + 4 * c
+ i
;
6040 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6041 decode_cabac_residual(h
, h
->mb
+ 16*index
, 4, index
- 16, scan
+ 1, qmul
, 15);
6045 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
6046 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
6047 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
6050 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
6051 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
6052 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
6053 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
6054 h
->last_qscale_diff
= 0;
6057 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
6058 write_back_non_zero_count(h
);
6061 h
->ref_count
[0] >>= 1;
6062 h
->ref_count
[1] >>= 1;
6069 static void filter_mb_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6071 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6072 const int alpha
= (alpha_table
+52)[index_a
];
6073 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6078 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] : -1;
6079 h
->s
.dsp
.h264_h_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6081 /* 16px edge length, because bS=4 is triggered by being at
6082 * the edge of an intra MB, so all 4 bS are the same */
6083 for( d
= 0; d
< 16; d
++ ) {
6084 const int p0
= pix
[-1];
6085 const int p1
= pix
[-2];
6086 const int p2
= pix
[-3];
6088 const int q0
= pix
[0];
6089 const int q1
= pix
[1];
6090 const int q2
= pix
[2];
6092 if( FFABS( p0
- q0
) < alpha
&&
6093 FFABS( p1
- p0
) < beta
&&
6094 FFABS( q1
- q0
) < beta
) {
6096 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6097 if( FFABS( p2
- p0
) < beta
)
6099 const int p3
= pix
[-4];
6101 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6102 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6103 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6106 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6108 if( FFABS( q2
- q0
) < beta
)
6110 const int q3
= pix
[3];
6112 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6113 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6114 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6117 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6121 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6122 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6124 tprintf(h
->s
.avctx
, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2], pix
[-1], pix
[0], pix
[1]);
6130 static void filter_mb_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6132 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6133 const int alpha
= (alpha_table
+52)[index_a
];
6134 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6139 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] + 1 : 0;
6140 h
->s
.dsp
.h264_h_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6142 h
->s
.dsp
.h264_h_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6146 static void filter_mb_mbaff_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[8], int qp
[2] ) {
6148 for( i
= 0; i
< 16; i
++, pix
+= stride
) {
6154 int bS_index
= (i
>> 1);
6157 bS_index
|= (i
& 1);
6160 if( bS
[bS_index
] == 0 ) {
6164 qp_index
= MB_FIELD
? (i
>> 3) : (i
& 1);
6165 index_a
= qp
[qp_index
] + h
->slice_alpha_c0_offset
;
6166 alpha
= (alpha_table
+52)[index_a
];
6167 beta
= (beta_table
+52)[qp
[qp_index
] + h
->slice_beta_offset
];
6169 if( bS
[bS_index
] < 4 ) {
6170 const int tc0
= (tc0_table
+52)[index_a
][bS
[bS_index
] - 1];
6171 const int p0
= pix
[-1];
6172 const int p1
= pix
[-2];
6173 const int p2
= pix
[-3];
6174 const int q0
= pix
[0];
6175 const int q1
= pix
[1];
6176 const int q2
= pix
[2];
6178 if( FFABS( p0
- q0
) < alpha
&&
6179 FFABS( p1
- p0
) < beta
&&
6180 FFABS( q1
- q0
) < beta
) {
6184 if( FFABS( p2
- p0
) < beta
) {
6185 pix
[-2] = p1
+ av_clip( ( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( p1
<< 1 ) ) >> 1, -tc0
, tc0
);
6188 if( FFABS( q2
- q0
) < beta
) {
6189 pix
[1] = q1
+ av_clip( ( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( q1
<< 1 ) ) >> 1, -tc0
, tc0
);
6193 i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6194 pix
[-1] = av_clip_uint8( p0
+ i_delta
); /* p0' */
6195 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
6196 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6199 const int p0
= pix
[-1];
6200 const int p1
= pix
[-2];
6201 const int p2
= pix
[-3];
6203 const int q0
= pix
[0];
6204 const int q1
= pix
[1];
6205 const int q2
= pix
[2];
6207 if( FFABS( p0
- q0
) < alpha
&&
6208 FFABS( p1
- p0
) < beta
&&
6209 FFABS( q1
- q0
) < beta
) {
6211 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6212 if( FFABS( p2
- p0
) < beta
)
6214 const int p3
= pix
[-4];
6216 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6217 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6218 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6221 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6223 if( FFABS( q2
- q0
) < beta
)
6225 const int q3
= pix
[3];
6227 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6228 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6229 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6232 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6236 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6237 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6239 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6244 static void filter_mb_mbaff_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[8], int qp
[2] ) {
6246 for( i
= 0; i
< 8; i
++, pix
+= stride
) {
6254 if( bS
[bS_index
] == 0 ) {
6258 qp_index
= MB_FIELD
? (i
>> 2) : (i
& 1);
6259 index_a
= qp
[qp_index
] + h
->slice_alpha_c0_offset
;
6260 alpha
= (alpha_table
+52)[index_a
];
6261 beta
= (beta_table
+52)[qp
[qp_index
] + h
->slice_beta_offset
];
6263 if( bS
[bS_index
] < 4 ) {
6264 const int tc
= (tc0_table
+52)[index_a
][bS
[bS_index
] - 1] + 1;
6265 const int p0
= pix
[-1];
6266 const int p1
= pix
[-2];
6267 const int q0
= pix
[0];
6268 const int q1
= pix
[1];
6270 if( FFABS( p0
- q0
) < alpha
&&
6271 FFABS( p1
- p0
) < beta
&&
6272 FFABS( q1
- q0
) < beta
) {
6273 const int i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6275 pix
[-1] = av_clip_uint8( p0
+ i_delta
); /* p0' */
6276 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
6277 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6280 const int p0
= pix
[-1];
6281 const int p1
= pix
[-2];
6282 const int q0
= pix
[0];
6283 const int q1
= pix
[1];
6285 if( FFABS( p0
- q0
) < alpha
&&
6286 FFABS( p1
- p0
) < beta
&&
6287 FFABS( q1
- q0
) < beta
) {
6289 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2; /* p0' */
6290 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2; /* q0' */
6291 tprintf(h
->s
.avctx
, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, pix
[-3], p1
, p0
, q0
, q1
, pix
[2], pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6297 static void filter_mb_edgeh( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6299 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6300 const int alpha
= (alpha_table
+52)[index_a
];
6301 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6302 const int pix_next
= stride
;
6307 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] : -1;
6308 h
->s
.dsp
.h264_v_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6310 /* 16px edge length, see filter_mb_edgev */
6311 for( d
= 0; d
< 16; d
++ ) {
6312 const int p0
= pix
[-1*pix_next
];
6313 const int p1
= pix
[-2*pix_next
];
6314 const int p2
= pix
[-3*pix_next
];
6315 const int q0
= pix
[0];
6316 const int q1
= pix
[1*pix_next
];
6317 const int q2
= pix
[2*pix_next
];
6319 if( FFABS( p0
- q0
) < alpha
&&
6320 FFABS( p1
- p0
) < beta
&&
6321 FFABS( q1
- q0
) < beta
) {
6323 const int p3
= pix
[-4*pix_next
];
6324 const int q3
= pix
[ 3*pix_next
];
6326 if(FFABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6327 if( FFABS( p2
- p0
) < beta
) {
6329 pix
[-1*pix_next
] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6330 pix
[-2*pix_next
] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6331 pix
[-3*pix_next
] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6334 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6336 if( FFABS( q2
- q0
) < beta
) {
6338 pix
[0*pix_next
] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6339 pix
[1*pix_next
] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6340 pix
[2*pix_next
] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6343 pix
[0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6347 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6348 pix
[ 0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6350 tprintf(h
->s
.avctx
, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, qp
, index_a
, alpha
, beta
, bS
[i
], p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2*pix_next
], pix
[-pix_next
], pix
[0], pix
[pix_next
]);
6357 static void filter_mb_edgech( H264Context
*h
, uint8_t *pix
, int stride
, int16_t bS
[4], int qp
) {
6359 const int index_a
= qp
+ h
->slice_alpha_c0_offset
;
6360 const int alpha
= (alpha_table
+52)[index_a
];
6361 const int beta
= (beta_table
+52)[qp
+ h
->slice_beta_offset
];
6366 tc
[i
] = bS
[i
] ? (tc0_table
+52)[index_a
][bS
[i
] - 1] + 1 : 0;
6367 h
->s
.dsp
.h264_v_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6369 h
->s
.dsp
.h264_v_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6373 static void filter_mb_fast( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
6374 MpegEncContext
* const s
= &h
->s
;
6375 int mb_y_firstrow
= s
->picture_structure
== PICT_BOTTOM_FIELD
;
6377 int qp
, qp0
, qp1
, qpc
, qpc0
, qpc1
, qp_thresh
;
6379 mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
6381 if(mb_x
==0 || mb_y
==mb_y_firstrow
|| !s
->dsp
.h264_loop_filter_strength
|| h
->pps
.chroma_qp_diff
||
6382 (h
->deblocking_filter
== 2 && (h
->slice_table
[mb_xy
] != h
->slice_table
[h
->top_mb_xy
] ||
6383 h
->slice_table
[mb_xy
] != h
->slice_table
[mb_xy
- 1]))) {
6384 filter_mb(h
, mb_x
, mb_y
, img_y
, img_cb
, img_cr
, linesize
, uvlinesize
);
6387 assert(!FRAME_MBAFF
);
6389 mb_type
= s
->current_picture
.mb_type
[mb_xy
];
6390 qp
= s
->current_picture
.qscale_table
[mb_xy
];
6391 qp0
= s
->current_picture
.qscale_table
[mb_xy
-1];
6392 qp1
= s
->current_picture
.qscale_table
[h
->top_mb_xy
];
6393 qpc
= get_chroma_qp( h
, 0, qp
);
6394 qpc0
= get_chroma_qp( h
, 0, qp0
);
6395 qpc1
= get_chroma_qp( h
, 0, qp1
);
6396 qp0
= (qp
+ qp0
+ 1) >> 1;
6397 qp1
= (qp
+ qp1
+ 1) >> 1;
6398 qpc0
= (qpc
+ qpc0
+ 1) >> 1;
6399 qpc1
= (qpc
+ qpc1
+ 1) >> 1;
6400 qp_thresh
= 15 - h
->slice_alpha_c0_offset
;
6401 if(qp
<= qp_thresh
&& qp0
<= qp_thresh
&& qp1
<= qp_thresh
&&
6402 qpc
<= qp_thresh
&& qpc0
<= qp_thresh
&& qpc1
<= qp_thresh
)
6405 if( IS_INTRA(mb_type
) ) {
6406 int16_t bS4
[4] = {4,4,4,4};
6407 int16_t bS3
[4] = {3,3,3,3};
6408 int16_t *bSH
= FIELD_PICTURE
? bS3
: bS4
;
6409 if( IS_8x8DCT(mb_type
) ) {
6410 filter_mb_edgev( h
, &img_y
[4*0], linesize
, bS4
, qp0
);
6411 filter_mb_edgev( h
, &img_y
[4*2], linesize
, bS3
, qp
);
6412 filter_mb_edgeh( h
, &img_y
[4*0*linesize
], linesize
, bSH
, qp1
);
6413 filter_mb_edgeh( h
, &img_y
[4*2*linesize
], linesize
, bS3
, qp
);
6415 filter_mb_edgev( h
, &img_y
[4*0], linesize
, bS4
, qp0
);
6416 filter_mb_edgev( h
, &img_y
[4*1], linesize
, bS3
, qp
);
6417 filter_mb_edgev( h
, &img_y
[4*2], linesize
, bS3
, qp
);
6418 filter_mb_edgev( h
, &img_y
[4*3], linesize
, bS3
, qp
);
6419 filter_mb_edgeh( h
, &img_y
[4*0*linesize
], linesize
, bSH
, qp1
);
6420 filter_mb_edgeh( h
, &img_y
[4*1*linesize
], linesize
, bS3
, qp
);
6421 filter_mb_edgeh( h
, &img_y
[4*2*linesize
], linesize
, bS3
, qp
);
6422 filter_mb_edgeh( h
, &img_y
[4*3*linesize
], linesize
, bS3
, qp
);
6424 filter_mb_edgecv( h
, &img_cb
[2*0], uvlinesize
, bS4
, qpc0
);
6425 filter_mb_edgecv( h
, &img_cb
[2*2], uvlinesize
, bS3
, qpc
);
6426 filter_mb_edgecv( h
, &img_cr
[2*0], uvlinesize
, bS4
, qpc0
);
6427 filter_mb_edgecv( h
, &img_cr
[2*2], uvlinesize
, bS3
, qpc
);
6428 filter_mb_edgech( h
, &img_cb
[2*0*uvlinesize
], uvlinesize
, bSH
, qpc1
);
6429 filter_mb_edgech( h
, &img_cb
[2*2*uvlinesize
], uvlinesize
, bS3
, qpc
);
6430 filter_mb_edgech( h
, &img_cr
[2*0*uvlinesize
], uvlinesize
, bSH
, qpc1
);
6431 filter_mb_edgech( h
, &img_cr
[2*2*uvlinesize
], uvlinesize
, bS3
, qpc
);
6434 DECLARE_ALIGNED_8(int16_t, bS
[2][4][4]);
6435 uint64_t (*bSv
)[4] = (uint64_t(*)[4])bS
;
6437 if( IS_8x8DCT(mb_type
) && (h
->cbp
&7) == 7 ) {
6439 bSv
[0][0] = bSv
[0][2] = bSv
[1][0] = bSv
[1][2] = 0x0002000200020002ULL
;
6441 int mask_edge1
= (mb_type
& (MB_TYPE_16x16
| MB_TYPE_8x16
)) ? 3 :
6442 (mb_type
& MB_TYPE_16x8
) ? 1 : 0;
6443 int mask_edge0
= (mb_type
& (MB_TYPE_16x16
| MB_TYPE_8x16
))
6444 && (s
->current_picture
.mb_type
[mb_xy
-1] & (MB_TYPE_16x16
| MB_TYPE_8x16
))
6446 int step
= IS_8x8DCT(mb_type
) ? 2 : 1;
6447 edges
= (mb_type
& MB_TYPE_16x16
) && !(h
->cbp
& 15) ? 1 : 4;
6448 s
->dsp
.h264_loop_filter_strength( bS
, h
->non_zero_count_cache
, h
->ref_cache
, h
->mv_cache
,
6449 (h
->slice_type
== FF_B_TYPE
), edges
, step
, mask_edge0
, mask_edge1
);
6451 if( IS_INTRA(s
->current_picture
.mb_type
[mb_xy
-1]) )
6452 bSv
[0][0] = 0x0004000400040004ULL
;
6453 if( IS_INTRA(s
->current_picture
.mb_type
[h
->top_mb_xy
]) )
6454 bSv
[1][0] = FIELD_PICTURE
? 0x0003000300030003ULL
: 0x0004000400040004ULL
;
6456 #define FILTER(hv,dir,edge)\
6457 if(bSv[dir][edge]) {\
6458 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6460 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6461 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6467 } else if( IS_8x8DCT(mb_type
) ) {
6486 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
6487 MpegEncContext
* const s
= &h
->s
;
6488 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
6489 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
6490 const int mvy_limit
= IS_INTERLACED(mb_type
) ? 2 : 4;
6491 int first_vertical_edge_done
= 0;
6493 /* FIXME: A given frame may occupy more than one position in
6494 * the reference list. So ref2frm should be populated with
6495 * frame numbers, not indices. */
6496 static const int ref2frm
[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6497 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6499 //for sufficiently low qp, filtering wouldn't do anything
6500 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6502 int qp_thresh
= 15 - h
->slice_alpha_c0_offset
- FFMAX(0, FFMAX(h
->pps
.chroma_qp_index_offset
[0], h
->pps
.chroma_qp_index_offset
[1]));
6503 int qp
= s
->current_picture
.qscale_table
[mb_xy
];
6505 && (mb_x
== 0 || ((qp
+ s
->current_picture
.qscale_table
[mb_xy
-1] + 1)>>1) <= qp_thresh
)
6506 && (mb_y
== 0 || ((qp
+ s
->current_picture
.qscale_table
[h
->top_mb_xy
] + 1)>>1) <= qp_thresh
)){
6512 // left mb is in picture
6513 && h
->slice_table
[mb_xy
-1] != 255
6514 // and current and left pair do not have the same interlaced type
6515 && (IS_INTERLACED(mb_type
) != IS_INTERLACED(s
->current_picture
.mb_type
[mb_xy
-1]))
6516 // and left mb is in the same slice if deblocking_filter == 2
6517 && (h
->deblocking_filter
!=2 || h
->slice_table
[mb_xy
-1] == h
->slice_table
[mb_xy
])) {
6518 /* First vertical edge is different in MBAFF frames
6519 * There are 8 different bS to compute and 2 different Qp
6521 const int pair_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
6522 const int left_mb_xy
[2] = { pair_xy
-1, pair_xy
-1+s
->mb_stride
};
6527 int mb_qp
, mbn0_qp
, mbn1_qp
;
6529 first_vertical_edge_done
= 1;
6531 if( IS_INTRA(mb_type
) )
6532 bS
[0] = bS
[1] = bS
[2] = bS
[3] = bS
[4] = bS
[5] = bS
[6] = bS
[7] = 4;
6534 for( i
= 0; i
< 8; i
++ ) {
6535 int mbn_xy
= MB_FIELD
? left_mb_xy
[i
>>2] : left_mb_xy
[i
&1];
6537 if( IS_INTRA( s
->current_picture
.mb_type
[mbn_xy
] ) )
6539 else if( h
->non_zero_count_cache
[12+8*(i
>>1)] != 0 ||
6540 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6541 h
->non_zero_count
[mbn_xy
][MB_FIELD
? i
&3 : (i
>>2)+(mb_y
&1)*2] )
6548 mb_qp
= s
->current_picture
.qscale_table
[mb_xy
];
6549 mbn0_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[0]];
6550 mbn1_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[1]];
6551 qp
[0] = ( mb_qp
+ mbn0_qp
+ 1 ) >> 1;
6552 bqp
[0] = ( get_chroma_qp( h
, 0, mb_qp
) +
6553 get_chroma_qp( h
, 0, mbn0_qp
) + 1 ) >> 1;
6554 rqp
[0] = ( get_chroma_qp( h
, 1, mb_qp
) +
6555 get_chroma_qp( h
, 1, mbn0_qp
) + 1 ) >> 1;
6556 qp
[1] = ( mb_qp
+ mbn1_qp
+ 1 ) >> 1;
6557 bqp
[1] = ( get_chroma_qp( h
, 0, mb_qp
) +
6558 get_chroma_qp( h
, 0, mbn1_qp
) + 1 ) >> 1;
6559 rqp
[1] = ( get_chroma_qp( h
, 1, mb_qp
) +
6560 get_chroma_qp( h
, 1, mbn1_qp
) + 1 ) >> 1;
6563 tprintf(s
->avctx
, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x
, mb_y
, qp
[0], qp
[1], bqp
[0], bqp
[1], rqp
[0], rqp
[1], linesize
, uvlinesize
);
6564 { int i
; for (i
= 0; i
< 8; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6565 filter_mb_mbaff_edgev ( h
, &img_y
[0], linesize
, bS
, qp
);
6566 filter_mb_mbaff_edgecv( h
, &img_cb
[0], uvlinesize
, bS
, bqp
);
6567 filter_mb_mbaff_edgecv( h
, &img_cr
[0], uvlinesize
, bS
, rqp
);
6569 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6570 for( dir
= 0; dir
< 2; dir
++ )
6573 const int mbm_xy
= dir
== 0 ? mb_xy
-1 : h
->top_mb_xy
;
6574 const int mbm_type
= s
->current_picture
.mb_type
[mbm_xy
];
6575 int start
= h
->slice_table
[mbm_xy
] == 255 ? 1 : 0;
6577 const int edges
= (mb_type
& (MB_TYPE_16x16
|MB_TYPE_SKIP
))
6578 == (MB_TYPE_16x16
|MB_TYPE_SKIP
) ? 1 : 4;
6579 // how often to recheck mv-based bS when iterating between edges
6580 const int mask_edge
= (mb_type
& (MB_TYPE_16x16
| (MB_TYPE_16x8
<< dir
))) ? 3 :
6581 (mb_type
& (MB_TYPE_8x16
>> dir
)) ? 1 : 0;
6582 // how often to recheck mv-based bS when iterating along each edge
6583 const int mask_par0
= mb_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
));
6585 if (first_vertical_edge_done
) {
6587 first_vertical_edge_done
= 0;
6590 if (h
->deblocking_filter
==2 && h
->slice_table
[mbm_xy
] != h
->slice_table
[mb_xy
])
6593 if (FRAME_MBAFF
&& (dir
== 1) && ((mb_y
&1) == 0) && start
== 0
6594 && !IS_INTERLACED(mb_type
)
6595 && IS_INTERLACED(mbm_type
)
6597 // This is a special case in the norm where the filtering must
6598 // be done twice (one each of the field) even if we are in a
6599 // frame macroblock.
6601 static const int nnz_idx
[4] = {4,5,6,3};
6602 unsigned int tmp_linesize
= 2 * linesize
;
6603 unsigned int tmp_uvlinesize
= 2 * uvlinesize
;
6604 int mbn_xy
= mb_xy
- 2 * s
->mb_stride
;
6609 for(j
=0; j
<2; j
++, mbn_xy
+= s
->mb_stride
){
6610 if( IS_INTRA(mb_type
) ||
6611 IS_INTRA(s
->current_picture
.mb_type
[mbn_xy
]) ) {
6612 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 3;
6614 const uint8_t *mbn_nnz
= h
->non_zero_count
[mbn_xy
];
6615 for( i
= 0; i
< 4; i
++ ) {
6616 if( h
->non_zero_count_cache
[scan8
[0]+i
] != 0 ||
6617 mbn_nnz
[nnz_idx
[i
]] != 0 )
6623 // Do not use s->qscale as luma quantizer because it has not the same
6624 // value in IPCM macroblocks.
6625 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6626 tprintf(s
->avctx
, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, tmp_linesize
, tmp_uvlinesize
);
6627 { int i
; for (i
= 0; i
< 4; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6628 filter_mb_edgeh( h
, &img_y
[j
*linesize
], tmp_linesize
, bS
, qp
);
6629 filter_mb_edgech( h
, &img_cb
[j
*uvlinesize
], tmp_uvlinesize
, bS
,
6630 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6631 filter_mb_edgech( h
, &img_cr
[j
*uvlinesize
], tmp_uvlinesize
, bS
,
6632 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6639 for( edge
= start
; edge
< edges
; edge
++ ) {
6640 /* mbn_xy: neighbor macroblock */
6641 const int mbn_xy
= edge
> 0 ? mb_xy
: mbm_xy
;
6642 const int mbn_type
= s
->current_picture
.mb_type
[mbn_xy
];
6646 if( (edge
&1) && IS_8x8DCT(mb_type
) )
6649 if( IS_INTRA(mb_type
) ||
6650 IS_INTRA(mbn_type
) ) {
6653 if ( (!IS_INTERLACED(mb_type
) && !IS_INTERLACED(mbm_type
))
6654 || ((FRAME_MBAFF
|| (s
->picture_structure
!= PICT_FRAME
)) && (dir
== 0))
6663 bS
[0] = bS
[1] = bS
[2] = bS
[3] = value
;
6668 if( edge
& mask_edge
) {
6669 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 0;
6672 else if( FRAME_MBAFF
&& IS_INTERLACED(mb_type
^ mbn_type
)) {
6673 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 1;
6676 else if( mask_par0
&& (edge
|| (mbn_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
)))) ) {
6677 int b_idx
= 8 + 4 + edge
* (dir
? 8:1);
6678 int bn_idx
= b_idx
- (dir
? 8:1);
6680 for( l
= 0; !v
&& l
< 1 + (h
->slice_type
== FF_B_TYPE
); l
++ ) {
6681 v
|= ref2frm
[h
->ref_cache
[l
][b_idx
]+2] != ref2frm
[h
->ref_cache
[l
][bn_idx
]+2] ||
6682 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6683 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
;
6685 bS
[0] = bS
[1] = bS
[2] = bS
[3] = v
;
6691 for( i
= 0; i
< 4; i
++ ) {
6692 int x
= dir
== 0 ? edge
: i
;
6693 int y
= dir
== 0 ? i
: edge
;
6694 int b_idx
= 8 + 4 + x
+ 8*y
;
6695 int bn_idx
= b_idx
- (dir
? 8:1);
6697 if( h
->non_zero_count_cache
[b_idx
] != 0 ||
6698 h
->non_zero_count_cache
[bn_idx
] != 0 ) {
6704 for( l
= 0; l
< 1 + (h
->slice_type
== FF_B_TYPE
); l
++ ) {
6705 if( ref2frm
[h
->ref_cache
[l
][b_idx
]+2] != ref2frm
[h
->ref_cache
[l
][bn_idx
]+2] ||
6706 FFABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6707 FFABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
) {
6715 if(bS
[0]+bS
[1]+bS
[2]+bS
[3] == 0)
6720 // Do not use s->qscale as luma quantizer because it has not the same
6721 // value in IPCM macroblocks.
6722 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6723 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6724 tprintf(s
->avctx
, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, linesize
, uvlinesize
);
6725 { int i
; for (i
= 0; i
< 4; i
++) tprintf(s
->avctx
, " bS[%d]:%d", i
, bS
[i
]); tprintf(s
->avctx
, "\n"); }
6727 filter_mb_edgev( h
, &img_y
[4*edge
], linesize
, bS
, qp
);
6728 if( (edge
&1) == 0 ) {
6729 filter_mb_edgecv( h
, &img_cb
[2*edge
], uvlinesize
, bS
,
6730 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6731 filter_mb_edgecv( h
, &img_cr
[2*edge
], uvlinesize
, bS
,
6732 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6735 filter_mb_edgeh( h
, &img_y
[4*edge
*linesize
], linesize
, bS
, qp
);
6736 if( (edge
&1) == 0 ) {
6737 filter_mb_edgech( h
, &img_cb
[2*edge
*uvlinesize
], uvlinesize
, bS
,
6738 ( h
->chroma_qp
[0] + get_chroma_qp( h
, 0, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6739 filter_mb_edgech( h
, &img_cr
[2*edge
*uvlinesize
], uvlinesize
, bS
,
6740 ( h
->chroma_qp
[1] + get_chroma_qp( h
, 1, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1);
6747 static int decode_slice(struct AVCodecContext
*avctx
, H264Context
*h
){
6748 MpegEncContext
* const s
= &h
->s
;
6749 const int part_mask
= s
->partitioned_frame
? (AC_END
|AC_ERROR
) : 0x7F;
6753 if( h
->pps
.cabac
) {
6757 align_get_bits( &s
->gb
);
6760 ff_init_cabac_states( &h
->cabac
);
6761 ff_init_cabac_decoder( &h
->cabac
,
6762 s
->gb
.buffer
+ get_bits_count(&s
->gb
)/8,
6763 ( s
->gb
.size_in_bits
- get_bits_count(&s
->gb
) + 7)/8);
6764 /* calculate pre-state */
6765 for( i
= 0; i
< 460; i
++ ) {
6767 if( h
->slice_type
== FF_I_TYPE
)
6768 pre
= av_clip( ((cabac_context_init_I
[i
][0] * s
->qscale
) >>4 ) + cabac_context_init_I
[i
][1], 1, 126 );
6770 pre
= av_clip( ((cabac_context_init_PB
[h
->cabac_init_idc
][i
][0] * s
->qscale
) >>4 ) + cabac_context_init_PB
[h
->cabac_init_idc
][i
][1], 1, 126 );
6773 h
->cabac_state
[i
] = 2 * ( 63 - pre
) + 0;
6775 h
->cabac_state
[i
] = 2 * ( pre
- 64 ) + 1;
6780 int ret
= decode_mb_cabac(h
);
6782 //STOP_TIMER("decode_mb_cabac")
6784 if(ret
>=0) hl_decode_mb(h
);
6786 if( ret
>= 0 && FRAME_MBAFF
) { //FIXME optimal? or let mb_decode decode 16x32 ?
6789 if(ret
>=0) ret
= decode_mb_cabac(h
);
6791 if(ret
>=0) hl_decode_mb(h
);
6794 eos
= get_cabac_terminate( &h
->cabac
);
6796 if( ret
< 0 || h
->cabac
.bytestream
> h
->cabac
.bytestream_end
+ 2) {
6797 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d, bytestream (%td)\n", s
->mb_x
, s
->mb_y
, h
->cabac
.bytestream_end
- h
->cabac
.bytestream
);
6798 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6802 if( ++s
->mb_x
>= s
->mb_width
) {
6804 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6806 if(FIELD_OR_MBAFF_PICTURE
) {
6811 if( eos
|| s
->mb_y
>= s
->mb_height
) {
6812 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6813 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6820 int ret
= decode_mb_cavlc(h
);
6822 if(ret
>=0) hl_decode_mb(h
);
6824 if(ret
>=0 && FRAME_MBAFF
){ //FIXME optimal? or let mb_decode decode 16x32 ?
6826 ret
= decode_mb_cavlc(h
);
6828 if(ret
>=0) hl_decode_mb(h
);
6833 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6834 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6839 if(++s
->mb_x
>= s
->mb_width
){
6841 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6843 if(FIELD_OR_MBAFF_PICTURE
) {
6846 if(s
->mb_y
>= s
->mb_height
){
6847 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6849 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
) {
6850 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6854 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6861 if(get_bits_count(&s
->gb
) >= s
->gb
.size_in_bits
&& s
->mb_skip_run
<=0){
6862 tprintf(s
->avctx
, "slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6863 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
){
6864 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6868 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6877 for(;s
->mb_y
< s
->mb_height
; s
->mb_y
++){
6878 for(;s
->mb_x
< s
->mb_width
; s
->mb_x
++){
6879 int ret
= decode_mb(h
);
6884 av_log(s
->avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6885 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6890 if(++s
->mb_x
>= s
->mb_width
){
6892 if(++s
->mb_y
>= s
->mb_height
){
6893 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6894 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6898 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6905 if(get_bits_count(s
->?gb
) >= s
->gb
?.size_in_bits
){
6906 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6907 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6911 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6918 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6921 return -1; //not reached
6924 static int decode_unregistered_user_data(H264Context
*h
, int size
){
6925 MpegEncContext
* const s
= &h
->s
;
6926 uint8_t user_data
[16+256];
6932 for(i
=0; i
<sizeof(user_data
)-1 && i
<size
; i
++){
6933 user_data
[i
]= get_bits(&s
->gb
, 8);
6937 e
= sscanf(user_data
+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build
);
6938 if(e
==1 && build
>=0)
6939 h
->x264_build
= build
;
6941 if(s
->avctx
->debug
& FF_DEBUG_BUGS
)
6942 av_log(s
->avctx
, AV_LOG_DEBUG
, "user data:\"%s\"\n", user_data
+16);
6945 skip_bits(&s
->gb
, 8);
6950 static int decode_sei(H264Context
*h
){
6951 MpegEncContext
* const s
= &h
->s
;
6953 while(get_bits_count(&s
->gb
) + 16 < s
->gb
.size_in_bits
){
6958 type
+= show_bits(&s
->gb
, 8);
6959 }while(get_bits(&s
->gb
, 8) == 255);
6963 size
+= show_bits(&s
->gb
, 8);
6964 }while(get_bits(&s
->gb
, 8) == 255);
6968 if(decode_unregistered_user_data(h
, size
) < 0)
6972 skip_bits(&s
->gb
, 8*size
);
6975 //FIXME check bits here
6976 align_get_bits(&s
->gb
);
6982 static inline void decode_hrd_parameters(H264Context
*h
, SPS
*sps
){
6983 MpegEncContext
* const s
= &h
->s
;
6985 cpb_count
= get_ue_golomb(&s
->gb
) + 1;
6986 get_bits(&s
->gb
, 4); /* bit_rate_scale */
6987 get_bits(&s
->gb
, 4); /* cpb_size_scale */
6988 for(i
=0; i
<cpb_count
; i
++){
6989 get_ue_golomb(&s
->gb
); /* bit_rate_value_minus1 */
6990 get_ue_golomb(&s
->gb
); /* cpb_size_value_minus1 */
6991 get_bits1(&s
->gb
); /* cbr_flag */
6993 get_bits(&s
->gb
, 5); /* initial_cpb_removal_delay_length_minus1 */
6994 get_bits(&s
->gb
, 5); /* cpb_removal_delay_length_minus1 */
6995 get_bits(&s
->gb
, 5); /* dpb_output_delay_length_minus1 */
6996 get_bits(&s
->gb
, 5); /* time_offset_length */
6999 static inline int decode_vui_parameters(H264Context
*h
, SPS
*sps
){
7000 MpegEncContext
* const s
= &h
->s
;
7001 int aspect_ratio_info_present_flag
;
7002 unsigned int aspect_ratio_idc
;
7003 int nal_hrd_parameters_present_flag
, vcl_hrd_parameters_present_flag
;
7005 aspect_ratio_info_present_flag
= get_bits1(&s
->gb
);
7007 if( aspect_ratio_info_present_flag
) {
7008 aspect_ratio_idc
= get_bits(&s
->gb
, 8);
7009 if( aspect_ratio_idc
== EXTENDED_SAR
) {
7010 sps
->sar
.num
= get_bits(&s
->gb
, 16);
7011 sps
->sar
.den
= get_bits(&s
->gb
, 16);
7012 }else if(aspect_ratio_idc
< sizeof(pixel_aspect
)/sizeof(*pixel_aspect
)){
7013 sps
->sar
= pixel_aspect
[aspect_ratio_idc
];
7015 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal aspect ratio\n");
7022 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7024 if(get_bits1(&s
->gb
)){ /* overscan_info_present_flag */
7025 get_bits1(&s
->gb
); /* overscan_appropriate_flag */
7028 if(get_bits1(&s
->gb
)){ /* video_signal_type_present_flag */
7029 get_bits(&s
->gb
, 3); /* video_format */
7030 get_bits1(&s
->gb
); /* video_full_range_flag */
7031 if(get_bits1(&s
->gb
)){ /* colour_description_present_flag */
7032 get_bits(&s
->gb
, 8); /* colour_primaries */
7033 get_bits(&s
->gb
, 8); /* transfer_characteristics */
7034 get_bits(&s
->gb
, 8); /* matrix_coefficients */
7038 if(get_bits1(&s
->gb
)){ /* chroma_location_info_present_flag */
7039 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_top_field */
7040 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_bottom_field */
7043 sps
->timing_info_present_flag
= get_bits1(&s
->gb
);
7044 if(sps
->timing_info_present_flag
){
7045 sps
->num_units_in_tick
= get_bits_long(&s
->gb
, 32);
7046 sps
->time_scale
= get_bits_long(&s
->gb
, 32);
7047 sps
->fixed_frame_rate_flag
= get_bits1(&s
->gb
);
7050 nal_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
7051 if(nal_hrd_parameters_present_flag
)
7052 decode_hrd_parameters(h
, sps
);
7053 vcl_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
7054 if(vcl_hrd_parameters_present_flag
)
7055 decode_hrd_parameters(h
, sps
);
7056 if(nal_hrd_parameters_present_flag
|| vcl_hrd_parameters_present_flag
)
7057 get_bits1(&s
->gb
); /* low_delay_hrd_flag */
7058 get_bits1(&s
->gb
); /* pic_struct_present_flag */
7060 sps
->bitstream_restriction_flag
= get_bits1(&s
->gb
);
7061 if(sps
->bitstream_restriction_flag
){
7062 unsigned int num_reorder_frames
;
7063 get_bits1(&s
->gb
); /* motion_vectors_over_pic_boundaries_flag */
7064 get_ue_golomb(&s
->gb
); /* max_bytes_per_pic_denom */
7065 get_ue_golomb(&s
->gb
); /* max_bits_per_mb_denom */
7066 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_horizontal */
7067 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_vertical */
7068 num_reorder_frames
= get_ue_golomb(&s
->gb
);
7069 get_ue_golomb(&s
->gb
); /*max_dec_frame_buffering*/
7071 if(num_reorder_frames
> 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7072 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal num_reorder_frames %d\n", num_reorder_frames
);
7076 sps
->num_reorder_frames
= num_reorder_frames
;
7082 static void decode_scaling_list(H264Context
*h
, uint8_t *factors
, int size
,
7083 const uint8_t *jvt_list
, const uint8_t *fallback_list
){
7084 MpegEncContext
* const s
= &h
->s
;
7085 int i
, last
= 8, next
= 8;
7086 const uint8_t *scan
= size
== 16 ? zigzag_scan
: zigzag_scan8x8
;
7087 if(!get_bits1(&s
->gb
)) /* matrix not written, we use the predicted one */
7088 memcpy(factors
, fallback_list
, size
*sizeof(uint8_t));
7090 for(i
=0;i
<size
;i
++){
7092 next
= (last
+ get_se_golomb(&s
->gb
)) & 0xff;
7093 if(!i
&& !next
){ /* matrix not written, we use the preset one */
7094 memcpy(factors
, jvt_list
, size
*sizeof(uint8_t));
7097 last
= factors
[scan
[i
]] = next
? next
: last
;
7101 static void decode_scaling_matrices(H264Context
*h
, SPS
*sps
, PPS
*pps
, int is_sps
,
7102 uint8_t (*scaling_matrix4
)[16], uint8_t (*scaling_matrix8
)[64]){
7103 MpegEncContext
* const s
= &h
->s
;
7104 int fallback_sps
= !is_sps
&& sps
->scaling_matrix_present
;
7105 const uint8_t *fallback
[4] = {
7106 fallback_sps
? sps
->scaling_matrix4
[0] : default_scaling4
[0],
7107 fallback_sps
? sps
->scaling_matrix4
[3] : default_scaling4
[1],
7108 fallback_sps
? sps
->scaling_matrix8
[0] : default_scaling8
[0],
7109 fallback_sps
? sps
->scaling_matrix8
[1] : default_scaling8
[1]
7111 if(get_bits1(&s
->gb
)){
7112 sps
->scaling_matrix_present
|= is_sps
;
7113 decode_scaling_list(h
,scaling_matrix4
[0],16,default_scaling4
[0],fallback
[0]); // Intra, Y
7114 decode_scaling_list(h
,scaling_matrix4
[1],16,default_scaling4
[0],scaling_matrix4
[0]); // Intra, Cr
7115 decode_scaling_list(h
,scaling_matrix4
[2],16,default_scaling4
[0],scaling_matrix4
[1]); // Intra, Cb
7116 decode_scaling_list(h
,scaling_matrix4
[3],16,default_scaling4
[1],fallback
[1]); // Inter, Y
7117 decode_scaling_list(h
,scaling_matrix4
[4],16,default_scaling4
[1],scaling_matrix4
[3]); // Inter, Cr
7118 decode_scaling_list(h
,scaling_matrix4
[5],16,default_scaling4
[1],scaling_matrix4
[4]); // Inter, Cb
7119 if(is_sps
|| pps
->transform_8x8_mode
){
7120 decode_scaling_list(h
,scaling_matrix8
[0],64,default_scaling8
[0],fallback
[2]); // Intra, Y
7121 decode_scaling_list(h
,scaling_matrix8
[1],64,default_scaling8
[1],fallback
[3]); // Inter, Y
7123 } else if(fallback_sps
) {
7124 memcpy(scaling_matrix4
, sps
->scaling_matrix4
, 6*16*sizeof(uint8_t));
7125 memcpy(scaling_matrix8
, sps
->scaling_matrix8
, 2*64*sizeof(uint8_t));
7130 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7133 alloc_parameter_set(H264Context
*h
, void **vec
, const unsigned int id
, const unsigned int max
,
7134 const size_t size
, const char *name
)
7137 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "%s_id (%d) out of range\n", name
, id
);
7142 vec
[id
] = av_mallocz(size
);
7144 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cannot allocate memory for %s\n", name
);
7149 static inline int decode_seq_parameter_set(H264Context
*h
){
7150 MpegEncContext
* const s
= &h
->s
;
7151 int profile_idc
, level_idc
;
7152 unsigned int sps_id
, tmp
, mb_width
, mb_height
;
7156 profile_idc
= get_bits(&s
->gb
, 8);
7157 get_bits1(&s
->gb
); //constraint_set0_flag
7158 get_bits1(&s
->gb
); //constraint_set1_flag
7159 get_bits1(&s
->gb
); //constraint_set2_flag
7160 get_bits1(&s
->gb
); //constraint_set3_flag
7161 get_bits(&s
->gb
, 4); // reserved
7162 level_idc
= get_bits(&s
->gb
, 8);
7163 sps_id
= get_ue_golomb(&s
->gb
);
7165 sps
= alloc_parameter_set(h
, (void **)h
->sps_buffers
, sps_id
, MAX_SPS_COUNT
, sizeof(SPS
), "sps");
7169 sps
->profile_idc
= profile_idc
;
7170 sps
->level_idc
= level_idc
;
7172 if(sps
->profile_idc
>= 100){ //high profile
7173 if(get_ue_golomb(&s
->gb
) == 3) //chroma_format_idc
7174 get_bits1(&s
->gb
); //residual_color_transform_flag
7175 get_ue_golomb(&s
->gb
); //bit_depth_luma_minus8
7176 get_ue_golomb(&s
->gb
); //bit_depth_chroma_minus8
7177 sps
->transform_bypass
= get_bits1(&s
->gb
);
7178 decode_scaling_matrices(h
, sps
, NULL
, 1, sps
->scaling_matrix4
, sps
->scaling_matrix8
);
7180 sps
->scaling_matrix_present
= 0;
7182 sps
->log2_max_frame_num
= get_ue_golomb(&s
->gb
) + 4;
7183 sps
->poc_type
= get_ue_golomb(&s
->gb
);
7185 if(sps
->poc_type
== 0){ //FIXME #define
7186 sps
->log2_max_poc_lsb
= get_ue_golomb(&s
->gb
) + 4;
7187 } else if(sps
->poc_type
== 1){//FIXME #define
7188 sps
->delta_pic_order_always_zero_flag
= get_bits1(&s
->gb
);
7189 sps
->offset_for_non_ref_pic
= get_se_golomb(&s
->gb
);
7190 sps
->offset_for_top_to_bottom_field
= get_se_golomb(&s
->gb
);
7191 tmp
= get_ue_golomb(&s
->gb
);
7193 if(tmp
>= sizeof(sps
->offset_for_ref_frame
) / sizeof(sps
->offset_for_ref_frame
[0])){
7194 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "poc_cycle_length overflow %u\n", tmp
);
7197 sps
->poc_cycle_length
= tmp
;
7199 for(i
=0; i
<sps
->poc_cycle_length
; i
++)
7200 sps
->offset_for_ref_frame
[i
]= get_se_golomb(&s
->gb
);
7201 }else if(sps
->poc_type
!= 2){
7202 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal POC type %d\n", sps
->poc_type
);
7206 tmp
= get_ue_golomb(&s
->gb
);
7207 if(tmp
> MAX_PICTURE_COUNT
-2 || tmp
>= 32){
7208 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "too many reference frames\n");
7211 sps
->ref_frame_count
= tmp
;
7212 sps
->gaps_in_frame_num_allowed_flag
= get_bits1(&s
->gb
);
7213 mb_width
= get_ue_golomb(&s
->gb
) + 1;
7214 mb_height
= get_ue_golomb(&s
->gb
) + 1;
7215 if(mb_width
>= INT_MAX
/16 || mb_height
>= INT_MAX
/16 ||
7216 avcodec_check_dimensions(NULL
, 16*mb_width
, 16*mb_height
)){
7217 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_width/height overflow\n");
7220 sps
->mb_width
= mb_width
;
7221 sps
->mb_height
= mb_height
;
7223 sps
->frame_mbs_only_flag
= get_bits1(&s
->gb
);
7224 if(!sps
->frame_mbs_only_flag
)
7225 sps
->mb_aff
= get_bits1(&s
->gb
);
7229 sps
->direct_8x8_inference_flag
= get_bits1(&s
->gb
);
7231 #ifndef ALLOW_INTERLACE
7233 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MBAFF support not included; enable it at compile-time.\n");
7235 if(!sps
->direct_8x8_inference_flag
&& sps
->mb_aff
)
7236 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MBAFF + !direct_8x8_inference is not implemented\n");
7238 sps
->crop
= get_bits1(&s
->gb
);
7240 sps
->crop_left
= get_ue_golomb(&s
->gb
);
7241 sps
->crop_right
= get_ue_golomb(&s
->gb
);
7242 sps
->crop_top
= get_ue_golomb(&s
->gb
);
7243 sps
->crop_bottom
= get_ue_golomb(&s
->gb
);
7244 if(sps
->crop_left
|| sps
->crop_top
){
7245 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "insane cropping not completely supported, this could look slightly wrong ...\n");
7251 sps
->crop_bottom
= 0;
7254 sps
->vui_parameters_present_flag
= get_bits1(&s
->gb
);
7255 if( sps
->vui_parameters_present_flag
)
7256 decode_vui_parameters(h
, sps
);
7258 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7259 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7260 sps_id
, sps
->profile_idc
, sps
->level_idc
,
7262 sps
->ref_frame_count
,
7263 sps
->mb_width
, sps
->mb_height
,
7264 sps
->frame_mbs_only_flag
? "FRM" : (sps
->mb_aff
? "MB-AFF" : "PIC-AFF"),
7265 sps
->direct_8x8_inference_flag
? "8B8" : "",
7266 sps
->crop_left
, sps
->crop_right
,
7267 sps
->crop_top
, sps
->crop_bottom
,
7268 sps
->vui_parameters_present_flag
? "VUI" : ""
7275 build_qp_table(PPS
*pps
, int t
, int index
)
7278 for(i
= 0; i
< 255; i
++)
7279 pps
->chroma_qp_table
[t
][i
& 0xff] = chroma_qp
[av_clip(i
+ index
, 0, 51)];
7282 static inline int decode_picture_parameter_set(H264Context
*h
, int bit_length
){
7283 MpegEncContext
* const s
= &h
->s
;
7284 unsigned int tmp
, pps_id
= get_ue_golomb(&s
->gb
);
7287 pps
= alloc_parameter_set(h
, (void **)h
->pps_buffers
, pps_id
, MAX_PPS_COUNT
, sizeof(PPS
), "pps");
7291 tmp
= get_ue_golomb(&s
->gb
);
7292 if(tmp
>=MAX_SPS_COUNT
|| h
->sps_buffers
[tmp
] == NULL
){
7293 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "sps_id out of range\n");
7298 pps
->cabac
= get_bits1(&s
->gb
);
7299 pps
->pic_order_present
= get_bits1(&s
->gb
);
7300 pps
->slice_group_count
= get_ue_golomb(&s
->gb
) + 1;
7301 if(pps
->slice_group_count
> 1 ){
7302 pps
->mb_slice_group_map_type
= get_ue_golomb(&s
->gb
);
7303 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "FMO not supported\n");
7304 switch(pps
->mb_slice_group_map_type
){
7307 | for( i
= 0; i
<= num_slice_groups_minus1
; i
++ ) | | |
7308 | run_length
[ i
] |1 |ue(v
) |
7313 | for( i
= 0; i
< num_slice_groups_minus1
; i
++ ) | | |
7315 | top_left_mb
[ i
] |1 |ue(v
) |
7316 | bottom_right_mb
[ i
] |1 |ue(v
) |
7324 | slice_group_change_direction_flag
|1 |u(1) |
7325 | slice_group_change_rate_minus1
|1 |ue(v
) |
7330 | slice_group_id_cnt_minus1
|1 |ue(v
) |
7331 | for( i
= 0; i
<= slice_group_id_cnt_minus1
; i
++ | | |
7333 | slice_group_id
[ i
] |1 |u(v
) |
7338 pps
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
7339 pps
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
7340 if(pps
->ref_count
[0]-1 > 32-1 || pps
->ref_count
[1]-1 > 32-1){
7341 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow (pps)\n");
7342 pps
->ref_count
[0]= pps
->ref_count
[1]= 1;
7346 pps
->weighted_pred
= get_bits1(&s
->gb
);
7347 pps
->weighted_bipred_idc
= get_bits(&s
->gb
, 2);
7348 pps
->init_qp
= get_se_golomb(&s
->gb
) + 26;
7349 pps
->init_qs
= get_se_golomb(&s
->gb
) + 26;
7350 pps
->chroma_qp_index_offset
[0]= get_se_golomb(&s
->gb
);
7351 pps
->deblocking_filter_parameters_present
= get_bits1(&s
->gb
);
7352 pps
->constrained_intra_pred
= get_bits1(&s
->gb
);
7353 pps
->redundant_pic_cnt_present
= get_bits1(&s
->gb
);
7355 pps
->transform_8x8_mode
= 0;
7356 h
->dequant_coeff_pps
= -1; //contents of sps/pps can change even if id doesn't, so reinit
7357 memset(pps
->scaling_matrix4
, 16, 6*16*sizeof(uint8_t));
7358 memset(pps
->scaling_matrix8
, 16, 2*64*sizeof(uint8_t));
7360 if(get_bits_count(&s
->gb
) < bit_length
){
7361 pps
->transform_8x8_mode
= get_bits1(&s
->gb
);
7362 decode_scaling_matrices(h
, h
->sps_buffers
[pps
->sps_id
], pps
, 0, pps
->scaling_matrix4
, pps
->scaling_matrix8
);
7363 pps
->chroma_qp_index_offset
[1]= get_se_golomb(&s
->gb
); //second_chroma_qp_index_offset
7365 pps
->chroma_qp_index_offset
[1]= pps
->chroma_qp_index_offset
[0];
7368 build_qp_table(pps
, 0, pps
->chroma_qp_index_offset
[0]);
7369 if(pps
->chroma_qp_index_offset
[0] != pps
->chroma_qp_index_offset
[1]) {
7370 build_qp_table(pps
, 1, pps
->chroma_qp_index_offset
[1]);
7371 h
->pps
.chroma_qp_diff
= 1;
7373 memcpy(pps
->chroma_qp_table
[1], pps
->chroma_qp_table
[0], 256);
7375 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7376 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7377 pps_id
, pps
->sps_id
,
7378 pps
->cabac
? "CABAC" : "CAVLC",
7379 pps
->slice_group_count
,
7380 pps
->ref_count
[0], pps
->ref_count
[1],
7381 pps
->weighted_pred
? "weighted" : "",
7382 pps
->init_qp
, pps
->init_qs
, pps
->chroma_qp_index_offset
[0], pps
->chroma_qp_index_offset
[1],
7383 pps
->deblocking_filter_parameters_present
? "LPAR" : "",
7384 pps
->constrained_intra_pred
? "CONSTR" : "",
7385 pps
->redundant_pic_cnt_present
? "REDU" : "",
7386 pps
->transform_8x8_mode
? "8x8DCT" : ""
7394 * Call decode_slice() for each context.
7396 * @param h h264 master context
7397 * @param context_count number of contexts to execute
7399 static void execute_decode_slices(H264Context
*h
, int context_count
){
7400 MpegEncContext
* const s
= &h
->s
;
7401 AVCodecContext
* const avctx
= s
->avctx
;
7405 if(context_count
== 1) {
7406 decode_slice(avctx
, h
);
7408 for(i
= 1; i
< context_count
; i
++) {
7409 hx
= h
->thread_context
[i
];
7410 hx
->s
.error_resilience
= avctx
->error_resilience
;
7411 hx
->s
.error_count
= 0;
7414 avctx
->execute(avctx
, (void *)decode_slice
,
7415 (void **)h
->thread_context
, NULL
, context_count
);
7417 /* pull back stuff from slices to master context */
7418 hx
= h
->thread_context
[context_count
- 1];
7419 s
->mb_x
= hx
->s
.mb_x
;
7420 s
->mb_y
= hx
->s
.mb_y
;
7421 s
->dropable
= hx
->s
.dropable
;
7422 s
->picture_structure
= hx
->s
.picture_structure
;
7423 for(i
= 1; i
< context_count
; i
++)
7424 h
->s
.error_count
+= h
->thread_context
[i
]->s
.error_count
;
7429 static int decode_nal_units(H264Context
*h
, const uint8_t *buf
, int buf_size
){
7430 MpegEncContext
* const s
= &h
->s
;
7431 AVCodecContext
* const avctx
= s
->avctx
;
7433 H264Context
*hx
; ///< thread context
7434 int context_count
= 0;
7436 h
->max_contexts
= avctx
->thread_count
;
7439 for(i
=0; i
<50; i
++){
7440 av_log(NULL
, AV_LOG_ERROR
,"%02X ", buf
[i
]);
7443 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
)){
7444 h
->current_slice
= 0;
7445 if (!s
->first_field
)
7446 s
->current_picture_ptr
= NULL
;
7458 if(buf_index
>= buf_size
) break;
7460 for(i
= 0; i
< h
->nal_length_size
; i
++)
7461 nalsize
= (nalsize
<< 8) | buf
[buf_index
++];
7462 if(nalsize
<= 1 || (nalsize
+buf_index
> buf_size
)){
7467 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: nal size %d\n", nalsize
);
7472 // start code prefix search
7473 for(; buf_index
+ 3 < buf_size
; buf_index
++){
7474 // This should always succeed in the first iteration.
7475 if(buf
[buf_index
] == 0 && buf
[buf_index
+1] == 0 && buf
[buf_index
+2] == 1)
7479 if(buf_index
+3 >= buf_size
) break;
7484 hx
= h
->thread_context
[context_count
];
7486 ptr
= decode_nal(hx
, buf
+ buf_index
, &dst_length
, &consumed
, h
->is_avc
? nalsize
: buf_size
- buf_index
);
7487 if (ptr
==NULL
|| dst_length
< 0){
7490 while(ptr
[dst_length
- 1] == 0 && dst_length
> 0)
7492 bit_length
= !dst_length
? 0 : (8*dst_length
- decode_rbsp_trailing(h
, ptr
+ dst_length
- 1));
7494 if(s
->avctx
->debug
&FF_DEBUG_STARTCODE
){
7495 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "NAL %d at %d/%d length %d\n", hx
->nal_unit_type
, buf_index
, buf_size
, dst_length
);
7498 if (h
->is_avc
&& (nalsize
!= consumed
)){
7499 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: Consumed only %d bytes instead of %d\n", consumed
, nalsize
);
7503 buf_index
+= consumed
;
7505 if( (s
->hurry_up
== 1 && h
->nal_ref_idc
== 0) //FIXME do not discard SEI id
7506 ||(avctx
->skip_frame
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
7511 switch(hx
->nal_unit_type
){
7513 if (h
->nal_unit_type
!= NAL_IDR_SLICE
) {
7514 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "Invalid mix of idr and non-idr slices");
7517 idr(h
); //FIXME ensure we don't loose some frames if there is reordering
7519 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
7521 hx
->inter_gb_ptr
= &hx
->s
.gb
;
7522 hx
->s
.data_partitioning
= 0;
7524 if((err
= decode_slice_header(hx
, h
)))
7527 s
->current_picture_ptr
->key_frame
|= (hx
->nal_unit_type
== NAL_IDR_SLICE
);
7528 if(hx
->redundant_pic_count
==0 && hx
->s
.hurry_up
< 5
7529 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| hx
->nal_ref_idc
)
7530 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| hx
->slice_type
!=FF_B_TYPE
)
7531 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| hx
->slice_type
==FF_I_TYPE
)
7532 && avctx
->skip_frame
< AVDISCARD_ALL
)
7536 init_get_bits(&hx
->s
.gb
, ptr
, bit_length
);
7538 hx
->inter_gb_ptr
= NULL
;
7539 hx
->s
.data_partitioning
= 1;
7541 err
= decode_slice_header(hx
, h
);
7544 init_get_bits(&hx
->intra_gb
, ptr
, bit_length
);
7545 hx
->intra_gb_ptr
= &hx
->intra_gb
;
7548 init_get_bits(&hx
->inter_gb
, ptr
, bit_length
);
7549 hx
->inter_gb_ptr
= &hx
->inter_gb
;
7551 if(hx
->redundant_pic_count
==0 && hx
->intra_gb_ptr
&& hx
->s
.data_partitioning
7552 && s
->context_initialized
7554 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| hx
->nal_ref_idc
)
7555 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| hx
->slice_type
!=FF_B_TYPE
)
7556 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| hx
->slice_type
==FF_I_TYPE
)
7557 && avctx
->skip_frame
< AVDISCARD_ALL
)
7561 init_get_bits(&s
->gb
, ptr
, bit_length
);
7565 init_get_bits(&s
->gb
, ptr
, bit_length
);
7566 decode_seq_parameter_set(h
);
7568 if(s
->flags
& CODEC_FLAG_LOW_DELAY
)
7571 if(avctx
->has_b_frames
< 2)
7572 avctx
->has_b_frames
= !s
->low_delay
;
7575 init_get_bits(&s
->gb
, ptr
, bit_length
);
7577 decode_picture_parameter_set(h
, bit_length
);
7581 case NAL_END_SEQUENCE
:
7582 case NAL_END_STREAM
:
7583 case NAL_FILLER_DATA
:
7585 case NAL_AUXILIARY_SLICE
:
7588 av_log(avctx
, AV_LOG_DEBUG
, "Unknown NAL code: %d (%d bits)\n", h
->nal_unit_type
, bit_length
);
7591 if(context_count
== h
->max_contexts
) {
7592 execute_decode_slices(h
, context_count
);
7597 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "decode_slice_header error\n");
7599 /* Slice could not be decoded in parallel mode, copy down
7600 * NAL unit stuff to context 0 and restart. Note that
7601 * rbsp_buffer is not transfered, but since we no longer
7602 * run in parallel mode this should not be an issue. */
7603 h
->nal_unit_type
= hx
->nal_unit_type
;
7604 h
->nal_ref_idc
= hx
->nal_ref_idc
;
7610 execute_decode_slices(h
, context_count
);
7615 * returns the number of bytes consumed for building the current frame
7617 static int get_consumed_bytes(MpegEncContext
*s
, int pos
, int buf_size
){
7618 if(s
->flags
&CODEC_FLAG_TRUNCATED
){
7619 pos
-= s
->parse_context
.last_index
;
7620 if(pos
<0) pos
=0; // FIXME remove (unneeded?)
7624 if(pos
==0) pos
=1; //avoid infinite loops (i doubt that is needed but ...)
7625 if(pos
+10>buf_size
) pos
=buf_size
; // oops ;)
7631 static int decode_frame(AVCodecContext
*avctx
,
7632 void *data
, int *data_size
,
7633 const uint8_t *buf
, int buf_size
)
7635 H264Context
*h
= avctx
->priv_data
;
7636 MpegEncContext
*s
= &h
->s
;
7637 AVFrame
*pict
= data
;
7640 s
->flags
= avctx
->flags
;
7641 s
->flags2
= avctx
->flags2
;
7643 /* no supplementary picture */
7644 if (buf_size
== 0) {
7648 //FIXME factorize this with the output code below
7649 out
= h
->delayed_pic
[0];
7651 for(i
=1; h
->delayed_pic
[i
] && !h
->delayed_pic
[i
]->key_frame
; i
++)
7652 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
7653 out
= h
->delayed_pic
[i
];
7657 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
7658 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
7661 *data_size
= sizeof(AVFrame
);
7662 *pict
= *(AVFrame
*)out
;
7668 if(s
->flags
&CODEC_FLAG_TRUNCATED
){
7669 int next
= ff_h264_find_frame_end(h
, buf
, buf_size
);
7671 if( ff_combine_frame(&s
->parse_context
, next
, (const uint8_t **)&buf
, &buf_size
) < 0 )
7673 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7676 if(h
->is_avc
&& !h
->got_avcC
) {
7677 int i
, cnt
, nalsize
;
7678 unsigned char *p
= avctx
->extradata
;
7679 if(avctx
->extradata_size
< 7) {
7680 av_log(avctx
, AV_LOG_ERROR
, "avcC too short\n");
7684 av_log(avctx
, AV_LOG_ERROR
, "Unknown avcC version %d\n", *p
);
7687 /* sps and pps in the avcC always have length coded with 2 bytes,
7688 so put a fake nal_length_size = 2 while parsing them */
7689 h
->nal_length_size
= 2;
7690 // Decode sps from avcC
7691 cnt
= *(p
+5) & 0x1f; // Number of sps
7693 for (i
= 0; i
< cnt
; i
++) {
7694 nalsize
= AV_RB16(p
) + 2;
7695 if(decode_nal_units(h
, p
, nalsize
) < 0) {
7696 av_log(avctx
, AV_LOG_ERROR
, "Decoding sps %d from avcC failed\n", i
);
7701 // Decode pps from avcC
7702 cnt
= *(p
++); // Number of pps
7703 for (i
= 0; i
< cnt
; i
++) {
7704 nalsize
= AV_RB16(p
) + 2;
7705 if(decode_nal_units(h
, p
, nalsize
) != nalsize
) {
7706 av_log(avctx
, AV_LOG_ERROR
, "Decoding pps %d from avcC failed\n", i
);
7711 // Now store right nal length size, that will be use to parse all other nals
7712 h
->nal_length_size
= ((*(((char*)(avctx
->extradata
))+4))&0x03)+1;
7713 // Do not reparse avcC
7717 if(avctx
->frame_number
==0 && !h
->is_avc
&& s
->avctx
->extradata_size
){
7718 if(decode_nal_units(h
, s
->avctx
->extradata
, s
->avctx
->extradata_size
) < 0)
7722 buf_index
=decode_nal_units(h
, buf
, buf_size
);
7726 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
) && !s
->current_picture_ptr
){
7727 if (avctx
->skip_frame
>= AVDISCARD_NONREF
|| s
->hurry_up
) return 0;
7728 av_log(avctx
, AV_LOG_ERROR
, "no frame!\n");
7732 if(!(s
->flags2
& CODEC_FLAG2_CHUNKS
) || (s
->mb_y
>= s
->mb_height
&& s
->mb_height
)){
7733 Picture
*out
= s
->current_picture_ptr
;
7734 Picture
*cur
= s
->current_picture_ptr
;
7735 Picture
*prev
= h
->delayed_output_pic
;
7736 int i
, pics
, cross_idr
, out_of_order
, out_idx
;
7740 s
->current_picture_ptr
->qscale_type
= FF_QSCALE_TYPE_H264
;
7741 s
->current_picture_ptr
->pict_type
= s
->pict_type
;
7743 h
->prev_frame_num_offset
= h
->frame_num_offset
;
7744 h
->prev_frame_num
= h
->frame_num
;
7746 h
->prev_poc_msb
= h
->poc_msb
;
7747 h
->prev_poc_lsb
= h
->poc_lsb
;
7748 execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
7752 * FIXME: Error handling code does not seem to support interlaced
7753 * when slices span multiple rows
7754 * The ff_er_add_slice calls don't work right for bottom
7755 * fields; they cause massive erroneous error concealing
7756 * Error marking covers both fields (top and bottom).
7757 * This causes a mismatched s->error_count
7758 * and a bad error table. Further, the error count goes to
7759 * INT_MAX when called for bottom field, because mb_y is
7760 * past end by one (callers fault) and resync_mb_y != 0
7761 * causes problems for the first MB line, too.
7768 if (s
->first_field
) {
7769 /* Wait for second field. */
7773 cur
->interlaced_frame
= FIELD_OR_MBAFF_PICTURE
;
7774 /* Derive top_field_first from field pocs. */
7775 cur
->top_field_first
= cur
->field_poc
[0] < cur
->field_poc
[1];
7777 //FIXME do something with unavailable reference frames
7779 #if 0 //decode order
7780 *data_size
= sizeof(AVFrame
);
7782 /* Sort B-frames into display order */
7784 if(h
->sps
.bitstream_restriction_flag
7785 && s
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
){
7786 s
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
7791 while(h
->delayed_pic
[pics
]) pics
++;
7793 assert(pics
+1 < sizeof(h
->delayed_pic
) / sizeof(h
->delayed_pic
[0]));
7795 h
->delayed_pic
[pics
++] = cur
;
7796 if(cur
->reference
== 0)
7797 cur
->reference
= DELAYED_PIC_REF
;
7800 for(i
=0; h
->delayed_pic
[i
]; i
++)
7801 if(h
->delayed_pic
[i
]->key_frame
|| h
->delayed_pic
[i
]->poc
==0)
7804 out
= h
->delayed_pic
[0];
7806 for(i
=1; h
->delayed_pic
[i
] && !h
->delayed_pic
[i
]->key_frame
; i
++)
7807 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
7808 out
= h
->delayed_pic
[i
];
7812 out_of_order
= !cross_idr
&& prev
&& out
->poc
< prev
->poc
;
7813 if(h
->sps
.bitstream_restriction_flag
&& s
->avctx
->has_b_frames
>= h
->sps
.num_reorder_frames
)
7815 else if(prev
&& pics
<= s
->avctx
->has_b_frames
)
7817 else if((out_of_order
&& pics
-1 == s
->avctx
->has_b_frames
&& pics
< 15)
7819 ((!cross_idr
&& prev
&& out
->poc
> prev
->poc
+ 2)
7820 || cur
->pict_type
== FF_B_TYPE
)))
7823 s
->avctx
->has_b_frames
++;
7826 else if(out_of_order
)
7829 if(out_of_order
|| pics
> s
->avctx
->has_b_frames
){
7830 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
7831 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
7837 *data_size
= sizeof(AVFrame
);
7838 if(prev
&& prev
!= out
&& prev
->reference
== DELAYED_PIC_REF
)
7839 prev
->reference
= 0;
7840 h
->delayed_output_pic
= out
;
7844 *pict
= *(AVFrame
*)out
;
7846 av_log(avctx
, AV_LOG_DEBUG
, "no picture\n");
7850 assert(pict
->data
[0] || !*data_size
);
7851 ff_print_debug_info(s
, pict
);
7852 //printf("out %d\n", (int)pict->data[0]);
7855 /* Return the Picture timestamp as the frame number */
7856 /* we subtract 1 because it is added on utils.c */
7857 avctx
->frame_number
= s
->picture_number
- 1;
7859 return get_consumed_bytes(s
, buf_index
, buf_size
);
7862 static inline void fill_mb_avail(H264Context
*h
){
7863 MpegEncContext
* const s
= &h
->s
;
7864 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
7867 h
->mb_avail
[0]= s
->mb_x
&& h
->slice_table
[mb_xy
- s
->mb_stride
- 1] == h
->slice_num
;
7868 h
->mb_avail
[1]= h
->slice_table
[mb_xy
- s
->mb_stride
] == h
->slice_num
;
7869 h
->mb_avail
[2]= s
->mb_x
+1 < s
->mb_width
&& h
->slice_table
[mb_xy
- s
->mb_stride
+ 1] == h
->slice_num
;
7875 h
->mb_avail
[3]= s
->mb_x
&& h
->slice_table
[mb_xy
- 1] == h
->slice_num
;
7876 h
->mb_avail
[4]= 1; //FIXME move out
7877 h
->mb_avail
[5]= 0; //FIXME move out
7885 #define SIZE (COUNT*40)
7891 // int int_temp[10000];
7893 AVCodecContext avctx
;
7895 dsputil_init(&dsp
, &avctx
);
7897 init_put_bits(&pb
, temp
, SIZE
);
7898 printf("testing unsigned exp golomb\n");
7899 for(i
=0; i
<COUNT
; i
++){
7901 set_ue_golomb(&pb
, i
);
7902 STOP_TIMER("set_ue_golomb");
7904 flush_put_bits(&pb
);
7906 init_get_bits(&gb
, temp
, 8*SIZE
);
7907 for(i
=0; i
<COUNT
; i
++){
7910 s
= show_bits(&gb
, 24);
7913 j
= get_ue_golomb(&gb
);
7915 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7918 STOP_TIMER("get_ue_golomb");
7922 init_put_bits(&pb
, temp
, SIZE
);
7923 printf("testing signed exp golomb\n");
7924 for(i
=0; i
<COUNT
; i
++){
7926 set_se_golomb(&pb
, i
- COUNT
/2);
7927 STOP_TIMER("set_se_golomb");
7929 flush_put_bits(&pb
);
7931 init_get_bits(&gb
, temp
, 8*SIZE
);
7932 for(i
=0; i
<COUNT
; i
++){
7935 s
= show_bits(&gb
, 24);
7938 j
= get_se_golomb(&gb
);
7939 if(j
!= i
- COUNT
/2){
7940 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7943 STOP_TIMER("get_se_golomb");
7947 printf("testing 4x4 (I)DCT\n");
7950 uint8_t src
[16], ref
[16];
7951 uint64_t error
= 0, max_error
=0;
7953 for(i
=0; i
<COUNT
; i
++){
7955 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7956 for(j
=0; j
<16; j
++){
7957 ref
[j
]= random()%255;
7958 src
[j
]= random()%255;
7961 h264_diff_dct_c(block
, src
, ref
, 4);
7964 for(j
=0; j
<16; j
++){
7965 // printf("%d ", block[j]);
7966 block
[j
]= block
[j
]*4;
7967 if(j
&1) block
[j
]= (block
[j
]*4 + 2)/5;
7968 if(j
&4) block
[j
]= (block
[j
]*4 + 2)/5;
7972 s
->dsp
.h264_idct_add(ref
, block
, 4);
7973 /* for(j=0; j<16; j++){
7974 printf("%d ", ref[j]);
7978 for(j
=0; j
<16; j
++){
7979 int diff
= FFABS(src
[j
] - ref
[j
]);
7982 max_error
= FFMAX(max_error
, diff
);
7985 printf("error=%f max_error=%d\n", ((float)error
)/COUNT
/16, (int)max_error
);
7986 printf("testing quantizer\n");
7987 for(qp
=0; qp
<52; qp
++){
7989 src1_block
[i
]= src2_block
[i
]= random()%255;
7992 printf("Testing NAL layer\n");
7994 uint8_t bitstream
[COUNT
];
7995 uint8_t nal
[COUNT
*2];
7997 memset(&h
, 0, sizeof(H264Context
));
7999 for(i
=0; i
<COUNT
; i
++){
8007 for(j
=0; j
<COUNT
; j
++){
8008 bitstream
[j
]= (random() % 255) + 1;
8011 for(j
=0; j
<zeros
; j
++){
8012 int pos
= random() % COUNT
;
8013 while(bitstream
[pos
] == 0){
8022 nal_length
= encode_nal(&h
, nal
, bitstream
, COUNT
, COUNT
*2);
8024 printf("encoding failed\n");
8028 out
= decode_nal(&h
, nal
, &out_length
, &consumed
, nal_length
);
8032 if(out_length
!= COUNT
){
8033 printf("incorrect length %d %d\n", out_length
, COUNT
);
8037 if(consumed
!= nal_length
){
8038 printf("incorrect consumed length %d %d\n", nal_length
, consumed
);
8042 if(memcmp(bitstream
, out
, COUNT
)){
8043 printf("mismatch\n");
8049 printf("Testing RBSP\n");
8057 static av_cold
int decode_end(AVCodecContext
*avctx
)
8059 H264Context
*h
= avctx
->priv_data
;
8060 MpegEncContext
*s
= &h
->s
;
8062 av_freep(&h
->rbsp_buffer
[0]);
8063 av_freep(&h
->rbsp_buffer
[1]);
8064 free_tables(h
); //FIXME cleanup init stuff perhaps
8067 // memset(h, 0, sizeof(H264Context));
8073 AVCodec h264_decoder
= {
8077 sizeof(H264Context
),
8082 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1
| CODEC_CAP_TRUNCATED
| CODEC_CAP_DELAY
,