Use git_bits_left() instead of size_in_bits - get_bits_count().
[ffmpeg-lucabe.git] / libavcodec / h264.c
blobcf6771aa79e7a42187aba2f43e498595af51a58c
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "internal.h"
29 #include "dsputil.h"
30 #include "avcodec.h"
31 #include "mpegvideo.h"
32 #include "h264.h"
33 #include "h264data.h"
34 #include "h264_parser.h"
35 #include "golomb.h"
36 #include "mathops.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
40 #include "cabac.h"
41 #if ARCH_X86
42 #include "x86/h264_i386.h"
43 #endif
45 //#undef NDEBUG
46 #include <assert.h>
48 /**
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc[4];
55 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size = 256;
62 static VLC total_zeros_vlc[15];
63 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64 static const int total_zeros_vlc_tables_size = 512;
66 static VLC chroma_dc_total_zeros_vlc[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
70 static VLC run_vlc[6];
71 static VLC_TYPE run_vlc_tables[6][8][2];
72 static const int run_vlc_tables_size = 8;
74 static VLC run7_vlc;
75 static VLC_TYPE run7_vlc_table[96][2];
76 static const int run7_vlc_table_size = 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static Picture * remove_long(H264Context *h, int i, int ref_mask);
84 static av_always_inline uint32_t pack16to32(int a, int b){
85 #if HAVE_BIGENDIAN
86 return (b&0xFFFF) + (a<<16);
87 #else
88 return (a&0xFFFF) + (b<<16);
89 #endif
92 static const uint8_t rem6[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options[4][8]={
101 {0,1,2,3,7,10,8,11},
102 {2,2,3,3,8,11,8,11},
103 {0,0,1,1,7,10,7,10},
104 {0,2,0,2,7,10,7,10}
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111 MpegEncContext * const s = &h->s;
112 const int mb_xy= h->mb_xy;
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
115 const uint8_t * left_block;
116 int topleft_partition= -1;
117 int i;
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
123 return;
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
131 left_block = left_block_options[0];
132 if(FRAME_MBAFF){
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
142 const int bottom = (s->mb_y & 1);
143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146 top_xy -= s->mb_stride;
148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149 topleft_xy -= s->mb_stride;
150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151 topleft_xy += s->mb_stride;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition = 0;
155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156 topright_xy -= s->mb_stride;
158 if (left_mb_field_flag != curr_mb_field_flag) {
159 left_xy[1] = left_xy[0] = pair_xy - 1;
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
163 } else {
164 left_block= left_block_options[2 - bottom];
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
172 if(for_deblock){
173 topleft_type = 0;
174 topright_type = 0;
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
179 if(MB_MBAFF && !IS_INTRA(mb_type)){
180 int list;
181 for(list=0; list<h->list_count; list++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type,list)){
186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
189 ref += h->b8_stride;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 }else{
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
202 if(IS_INTRA(mb_type)){
203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204 h->topleft_samples_available=
205 h->top_samples_available=
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
209 if(!(top_type & type_mask)){
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
216 if(!(left_type[0] & type_mask)){
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
220 if(!(left_type[1] & type_mask)){
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
224 }else{
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
233 }else{
234 if(!(left_type[0] & type_mask)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(topleft_type & type_mask))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!(topright_type & type_mask))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252 }else{
253 int pred;
254 if(!(top_type & type_mask))
255 pred= -1;
256 else{
257 pred= 2;
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
264 for(i=0; i<2; i++){
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268 }else{
269 int pred;
270 if(!(left_type[i] & type_mask))
271 pred= -1;
272 else{
273 pred= 2;
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
285 0 . T T. T T T T
286 1 L . .L . . . .
287 2 L . .L . . . .
288 3 . T TL . . . .
289 4 L . .L . . . .
290 5 L . .. . . . .
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
293 if(top_type){
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
305 }else{
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
320 if(left_type[i]){
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325 }else{
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
333 if( h->pps.cabac ) {
334 // top_cbp
335 if(top_type) {
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
338 h->top_cbp = 0x1C0;
339 } else {
340 h->top_cbp = 0;
342 // left_cbp
343 if (left_type[0]) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
346 h->left_cbp = 0x1C0;
347 } else {
348 h->left_cbp = 0;
350 if (left_type[0]) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
353 if (left_type[1]) {
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
358 #if 1
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360 int list;
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
368 continue;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 }else{
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
391 for(i=0; i<2; i++){
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 }else{
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
409 continue;
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 }else{
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 }else{
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
432 continue;
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
445 if( h->pps.cabac ) {
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 }else{
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 }else{
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 }else{
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type_nos == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 }else{
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 else
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 else
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
510 if(FRAME_MBAFF){
511 #define MAP_MVS\
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 if(MB_FIELD){
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
529 MAP_MVS
530 #undef MAP_F2F
531 }else{
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
538 MAP_MVS
539 #undef MAP_F2F
544 #endif
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 const int mb_xy= h->mb_xy;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
568 int i;
570 if(!(h->top_samples_available&0x8000)){
571 for(i=0; i<4; i++){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573 if(status<0){
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
575 return -1;
576 } else if(status){
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
584 for(i=0; i<4; i++){
585 if(!(h->left_samples_available&mask[i])){
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587 if(status<0){
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
589 return -1;
590 } else if(status){
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
597 return 0;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
608 if(mode > 6U) {
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
610 return -1;
613 if(!(h->top_samples_available&0x8000)){
614 mode= top[ mode ];
615 if(mode<0){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
617 return -1;
621 if((h->left_samples_available&0x8080) != 0x8080){
622 mode= left[ mode ];
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
626 if(mode<0){
627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
628 return -1;
632 return mode;
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
646 if(min<0) return DC_PRED;
647 else return min;
650 static inline void write_back_non_zero_count(H264Context *h){
651 const int mb_xy= h->mb_xy;
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
678 int i= left + top;
680 if(i<64) i= (i+1)>>1;
682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
684 return i&31;
687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689 MpegEncContext *s = &h->s;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
693 if(FRAME_MBAFF){
694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695 const int16_t *mv;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
699 if(!MB_FIELD
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 if(!MB_FIELD
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
723 if(MB_FIELD
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 && i >= scan8[0]+8){
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
730 #undef SET_DIAG_MV
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
735 return topright_ref;
736 }else{
737 tprintf(s->avctx, "topright MV not available\n");
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757 const int16_t * C;
758 int diagonal_ref, match_count;
760 assert(part_width==1 || part_width==2 || part_width==4);
762 /* mv_cache
763 B . . A T T T T
764 U . . L . . , .
765 U . . L . . . .
766 U . . L . . , .
767 . . . L . . . .
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
777 if(left_ref==ref){
778 *mx= A[0];
779 *my= A[1];
780 }else if(top_ref==ref){
781 *mx= B[0];
782 *my= B[1];
783 }else{
784 *mx= C[0];
785 *my= C[1];
787 }else{
788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
789 *mx= A[0];
790 *my= A[1];
791 }else{
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807 if(n==0){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
813 if(top_ref == ref){
814 *mx= B[0];
815 *my= B[1];
816 return;
818 }else{
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
824 if(left_ref == ref){
825 *mx= A[0];
826 *my= A[1];
827 return;
831 //RARE
832 pred_motion(h, n, 4, list, ref, mx, my);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 if(n==0){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
848 if(left_ref == ref){
849 *mx= A[0];
850 *my= A[1];
851 return;
853 }else{
854 const int16_t * C;
855 int diagonal_ref;
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861 if(diagonal_ref == ref){
862 *mx= C[0];
863 *my= C[1];
864 return;
868 //RARE
869 pred_motion(h, n, 2, list, ref, mx, my);
872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
882 *mx = *my = 0;
883 return;
886 pred_motion(h, 0, 4, 0, 0, mx, my);
888 return;
891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
895 return 256;
896 }else{
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context * const h){
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906 const int poc1 = h->ref_list[1][0].poc;
907 int i, field;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
935 if (!interl)
936 poc |= 3;
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944 if(rfield == field)
945 map[list][old_ref] = cur_ref;
946 break;
953 static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
957 int list, j, field;
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
961 for(list=0; list<2; list++){
962 cur->ref_count[sidx][list] = h->ref_count[list];
963 for(j=0; j<h->ref_count[list]; j++)
964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
967 if(s->picture_structure == PICT_FRAME){
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
972 cur->mbaff= FRAME_MBAFF;
974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
975 return;
977 for(list=0; list<2; list++){
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
989 int mb_type_col[2];
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
992 const int is_b8x8 = IS_8X8(*mb_type);
993 unsigned int sub_mb_type;
994 int i8, i4;
996 assert(h->ref_list[1][0].reference&3);
998 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1000 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1001 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1002 int cur_poc = s->current_picture_ptr->poc;
1003 int *col_poc = h->ref_list[1]->field_poc;
1004 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1005 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1006 b8_stride = 0;
1007 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1008 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1009 mb_xy += s->mb_stride*fieldoff;
1011 goto single_col;
1012 }else{ // AFL/AFR/FR/FL -> AFR/FR
1013 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1014 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1015 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1016 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1017 b8_stride *= 3;
1018 b4_stride *= 6;
1019 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1020 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1021 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1022 && !is_b8x8){
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1025 }else{
1026 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1027 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1029 }else{ // AFR/FR -> AFR/FR
1030 single_col:
1031 mb_type_col[0] =
1032 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1033 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1034 /* FIXME save sub mb types from previous frames (or derive from MVs)
1035 * so we know exactly what block size to use */
1036 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1037 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1038 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1041 }else{
1042 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1043 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1048 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1049 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1050 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1051 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1052 if(!b8_stride){
1053 if(s->mb_y&1){
1054 l1ref0 += h->b8_stride;
1055 l1ref1 += h->b8_stride;
1056 l1mv0 += 2*b4_stride;
1057 l1mv1 += 2*b4_stride;
1061 if(h->direct_spatial_mv_pred){
1062 int ref[2];
1063 int mv[2][2];
1064 int list;
1066 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1068 /* ref = min(neighbors) */
1069 for(list=0; list<2; list++){
1070 int refa = h->ref_cache[list][scan8[0] - 1];
1071 int refb = h->ref_cache[list][scan8[0] - 8];
1072 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1073 if(refc == PART_NOT_AVAILABLE)
1074 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1075 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1076 if(ref[list] < 0)
1077 ref[list] = -1;
1080 if(ref[0] < 0 && ref[1] < 0){
1081 ref[0] = ref[1] = 0;
1082 mv[0][0] = mv[0][1] =
1083 mv[1][0] = mv[1][1] = 0;
1084 }else{
1085 for(list=0; list<2; list++){
1086 if(ref[list] >= 0)
1087 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1088 else
1089 mv[list][0] = mv[list][1] = 0;
1093 if(ref[1] < 0){
1094 if(!is_b8x8)
1095 *mb_type &= ~MB_TYPE_L1;
1096 sub_mb_type &= ~MB_TYPE_L1;
1097 }else if(ref[0] < 0){
1098 if(!is_b8x8)
1099 *mb_type &= ~MB_TYPE_L0;
1100 sub_mb_type &= ~MB_TYPE_L0;
1103 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1104 for(i8=0; i8<4; i8++){
1105 int x8 = i8&1;
1106 int y8 = i8>>1;
1107 int xy8 = x8+y8*b8_stride;
1108 int xy4 = 3*x8+y8*b4_stride;
1109 int a=0, b=0;
1111 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1112 continue;
1113 h->sub_mb_type[i8] = sub_mb_type;
1115 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1116 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1117 if(!IS_INTRA(mb_type_col[y8])
1118 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1119 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1120 if(ref[0] > 0)
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 if(ref[1] > 0)
1123 b= pack16to32(mv[1][0],mv[1][1]);
1124 }else{
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 b= pack16to32(mv[1][0],mv[1][1]);
1128 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1129 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1131 }else if(IS_16X16(*mb_type)){
1132 int a=0, b=0;
1134 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1135 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1136 if(!IS_INTRA(mb_type_col[0])
1137 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1138 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1139 && (h->x264_build>33 || !h->x264_build)))){
1140 if(ref[0] > 0)
1141 a= pack16to32(mv[0][0],mv[0][1]);
1142 if(ref[1] > 0)
1143 b= pack16to32(mv[1][0],mv[1][1]);
1144 }else{
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 b= pack16to32(mv[1][0],mv[1][1]);
1148 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1149 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1150 }else{
1151 for(i8=0; i8<4; i8++){
1152 const int x8 = i8&1;
1153 const int y8 = i8>>1;
1155 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1156 continue;
1157 h->sub_mb_type[i8] = sub_mb_type;
1159 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1160 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1161 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1162 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1164 /* col_zero_flag */
1165 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1166 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1167 && (h->x264_build>33 || !h->x264_build)))){
1168 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1169 if(IS_SUB_8X8(sub_mb_type)){
1170 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1171 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1172 if(ref[0] == 0)
1173 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1174 if(ref[1] == 0)
1175 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1177 }else
1178 for(i4=0; i4<4; i4++){
1179 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1180 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1181 if(ref[0] == 0)
1182 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1183 if(ref[1] == 0)
1184 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1190 }else{ /* direct temporal mv pred */
1191 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1192 const int *dist_scale_factor = h->dist_scale_factor;
1193 int ref_offset= 0;
1195 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1196 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1197 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1198 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1200 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1201 ref_offset += 16;
1203 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1204 /* FIXME assumes direct_8x8_inference == 1 */
1205 int y_shift = 2*!IS_INTERLACED(*mb_type);
1207 for(i8=0; i8<4; i8++){
1208 const int x8 = i8&1;
1209 const int y8 = i8>>1;
1210 int ref0, scale;
1211 const int16_t (*l1mv)[2]= l1mv0;
1213 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1214 continue;
1215 h->sub_mb_type[i8] = sub_mb_type;
1217 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 if(IS_INTRA(mb_type_col[y8])){
1219 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1220 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1222 continue;
1225 ref0 = l1ref0[x8 + y8*b8_stride];
1226 if(ref0 >= 0)
1227 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1228 else{
1229 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1230 l1mv= l1mv1;
1232 scale = dist_scale_factor[ref0];
1233 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1236 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1237 int my_col = (mv_col[1]<<y_shift)/2;
1238 int mx = (scale * mv_col[0] + 128) >> 8;
1239 int my = (scale * my_col + 128) >> 8;
1240 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1241 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1244 return;
1247 /* one-to-one mv scaling */
1249 if(IS_16X16(*mb_type)){
1250 int ref, mv0, mv1;
1252 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1253 if(IS_INTRA(mb_type_col[0])){
1254 ref=mv0=mv1=0;
1255 }else{
1256 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1257 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1258 const int scale = dist_scale_factor[ref0];
1259 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1260 int mv_l0[2];
1261 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1262 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1263 ref= ref0;
1264 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1265 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1267 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1268 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1269 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1270 }else{
1271 for(i8=0; i8<4; i8++){
1272 const int x8 = i8&1;
1273 const int y8 = i8>>1;
1274 int ref0, scale;
1275 const int16_t (*l1mv)[2]= l1mv0;
1277 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1278 continue;
1279 h->sub_mb_type[i8] = sub_mb_type;
1280 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1281 if(IS_INTRA(mb_type_col[0])){
1282 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1283 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1285 continue;
1288 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1289 if(ref0 >= 0)
1290 ref0 = map_col_to_list0[0][ref0];
1291 else{
1292 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1293 l1mv= l1mv1;
1295 scale = dist_scale_factor[ref0];
1297 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1298 if(IS_SUB_8X8(sub_mb_type)){
1299 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1300 int mx = (scale * mv_col[0] + 128) >> 8;
1301 int my = (scale * mv_col[1] + 128) >> 8;
1302 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1303 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1304 }else
1305 for(i4=0; i4<4; i4++){
1306 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1307 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1308 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1309 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1310 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1311 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1318 static inline void write_back_motion(H264Context *h, int mb_type){
1319 MpegEncContext * const s = &h->s;
1320 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1321 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1322 int list;
1324 if(!USES_LIST(mb_type, 0))
1325 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1327 for(list=0; list<h->list_count; list++){
1328 int y;
1329 if(!USES_LIST(mb_type, list))
1330 continue;
1332 for(y=0; y<4; y++){
1333 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1336 if( h->pps.cabac ) {
1337 if(IS_SKIP(mb_type))
1338 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1339 else
1340 for(y=0; y<4; y++){
1341 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1347 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1348 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1349 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1350 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1351 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1355 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1356 if(IS_8X8(mb_type)){
1357 uint8_t *direct_table = &h->direct_table[b8_xy];
1358 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1359 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1360 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1365 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1366 int i, si, di;
1367 uint8_t *dst;
1368 int bufidx;
1370 // src[0]&0x80; //forbidden bit
1371 h->nal_ref_idc= src[0]>>5;
1372 h->nal_unit_type= src[0]&0x1F;
1374 src++; length--;
1375 #if 0
1376 for(i=0; i<length; i++)
1377 printf("%2X ", src[i]);
1378 #endif
1380 #if HAVE_FAST_UNALIGNED
1381 # if HAVE_FAST_64BIT
1382 # define RS 7
1383 for(i=0; i+1<length; i+=9){
1384 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1385 # else
1386 # define RS 3
1387 for(i=0; i+1<length; i+=5){
1388 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1389 # endif
1390 continue;
1391 if(i>0 && !src[i]) i--;
1392 while(src[i]) i++;
1393 #else
1394 # define RS 0
1395 for(i=0; i+1<length; i+=2){
1396 if(src[i]) continue;
1397 if(i>0 && src[i-1]==0) i--;
1398 #endif
1399 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1400 if(src[i+2]!=3){
1401 /* startcode, so we must be past the end */
1402 length=i;
1404 break;
1406 i-= RS;
1409 if(i>=length-1){ //no escaped 0
1410 *dst_length= length;
1411 *consumed= length+1; //+1 for the header
1412 return src;
1415 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1416 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1417 dst= h->rbsp_buffer[bufidx];
1419 if (dst == NULL){
1420 return NULL;
1423 //printf("decoding esc\n");
1424 memcpy(dst, src, i);
1425 si=di=i;
1426 while(si+2<length){
1427 //remove escapes (very rare 1:2^22)
1428 if(src[si+2]>3){
1429 dst[di++]= src[si++];
1430 dst[di++]= src[si++];
1431 }else if(src[si]==0 && src[si+1]==0){
1432 if(src[si+2]==3){ //escape
1433 dst[di++]= 0;
1434 dst[di++]= 0;
1435 si+=3;
1436 continue;
1437 }else //next start code
1438 goto nsc;
1441 dst[di++]= src[si++];
1443 while(si<length)
1444 dst[di++]= src[si++];
1445 nsc:
1447 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1449 *dst_length= di;
1450 *consumed= si + 1;//+1 for the header
1451 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1452 return dst;
1455 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1456 int v= *src;
1457 int r;
1459 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1461 for(r=1; r<9; r++){
1462 if(v&1) return r;
1463 v>>=1;
1465 return 0;
1469 * IDCT transforms the 16 dc values and dequantizes them.
1470 * @param qp quantization parameter
1472 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1473 #define stride 16
1474 int i;
1475 int temp[16]; //FIXME check if this is a good idea
1476 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1477 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1479 //memset(block, 64, 2*256);
1480 //return;
1481 for(i=0; i<4; i++){
1482 const int offset= y_offset[i];
1483 const int z0= block[offset+stride*0] + block[offset+stride*4];
1484 const int z1= block[offset+stride*0] - block[offset+stride*4];
1485 const int z2= block[offset+stride*1] - block[offset+stride*5];
1486 const int z3= block[offset+stride*1] + block[offset+stride*5];
1488 temp[4*i+0]= z0+z3;
1489 temp[4*i+1]= z1+z2;
1490 temp[4*i+2]= z1-z2;
1491 temp[4*i+3]= z0-z3;
1494 for(i=0; i<4; i++){
1495 const int offset= x_offset[i];
1496 const int z0= temp[4*0+i] + temp[4*2+i];
1497 const int z1= temp[4*0+i] - temp[4*2+i];
1498 const int z2= temp[4*1+i] - temp[4*3+i];
1499 const int z3= temp[4*1+i] + temp[4*3+i];
1501 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1502 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1503 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1504 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1508 #if 0
1510 * DCT transforms the 16 dc values.
1511 * @param qp quantization parameter ??? FIXME
1513 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1514 // const int qmul= dequant_coeff[qp][0];
1515 int i;
1516 int temp[16]; //FIXME check if this is a good idea
1517 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1518 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1520 for(i=0; i<4; i++){
1521 const int offset= y_offset[i];
1522 const int z0= block[offset+stride*0] + block[offset+stride*4];
1523 const int z1= block[offset+stride*0] - block[offset+stride*4];
1524 const int z2= block[offset+stride*1] - block[offset+stride*5];
1525 const int z3= block[offset+stride*1] + block[offset+stride*5];
1527 temp[4*i+0]= z0+z3;
1528 temp[4*i+1]= z1+z2;
1529 temp[4*i+2]= z1-z2;
1530 temp[4*i+3]= z0-z3;
1533 for(i=0; i<4; i++){
1534 const int offset= x_offset[i];
1535 const int z0= temp[4*0+i] + temp[4*2+i];
1536 const int z1= temp[4*0+i] - temp[4*2+i];
1537 const int z2= temp[4*1+i] - temp[4*3+i];
1538 const int z3= temp[4*1+i] + temp[4*3+i];
1540 block[stride*0 +offset]= (z0 + z3)>>1;
1541 block[stride*2 +offset]= (z1 + z2)>>1;
1542 block[stride*8 +offset]= (z1 - z2)>>1;
1543 block[stride*10+offset]= (z0 - z3)>>1;
1546 #endif
1548 #undef xStride
1549 #undef stride
1551 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1552 const int stride= 16*2;
1553 const int xStride= 16;
1554 int a,b,c,d,e;
1556 a= block[stride*0 + xStride*0];
1557 b= block[stride*0 + xStride*1];
1558 c= block[stride*1 + xStride*0];
1559 d= block[stride*1 + xStride*1];
1561 e= a-b;
1562 a= a+b;
1563 b= c-d;
1564 c= c+d;
1566 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1567 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1568 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1569 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1572 #if 0
1573 static void chroma_dc_dct_c(DCTELEM *block){
1574 const int stride= 16*2;
1575 const int xStride= 16;
1576 int a,b,c,d,e;
1578 a= block[stride*0 + xStride*0];
1579 b= block[stride*0 + xStride*1];
1580 c= block[stride*1 + xStride*0];
1581 d= block[stride*1 + xStride*1];
1583 e= a-b;
1584 a= a+b;
1585 b= c-d;
1586 c= c+d;
1588 block[stride*0 + xStride*0]= (a+c);
1589 block[stride*0 + xStride*1]= (e+b);
1590 block[stride*1 + xStride*0]= (a-c);
1591 block[stride*1 + xStride*1]= (e-b);
1593 #endif
1596 * gets the chroma qp.
1598 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1599 return h->pps.chroma_qp_table[t][qscale];
1602 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1603 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1604 int src_x_offset, int src_y_offset,
1605 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1606 MpegEncContext * const s = &h->s;
1607 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1608 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1609 const int luma_xy= (mx&3) + ((my&3)<<2);
1610 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1611 uint8_t * src_cb, * src_cr;
1612 int extra_width= h->emu_edge_width;
1613 int extra_height= h->emu_edge_height;
1614 int emu=0;
1615 const int full_mx= mx>>2;
1616 const int full_my= my>>2;
1617 const int pic_width = 16*s->mb_width;
1618 const int pic_height = 16*s->mb_height >> MB_FIELD;
1620 if(mx&7) extra_width -= 3;
1621 if(my&7) extra_height -= 3;
1623 if( full_mx < 0-extra_width
1624 || full_my < 0-extra_height
1625 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1626 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1627 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1628 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1629 emu=1;
1632 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1633 if(!square){
1634 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1637 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1639 if(MB_FIELD){
1640 // chroma offset when predicting from a field of opposite parity
1641 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1642 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1644 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1645 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1647 if(emu){
1648 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1649 src_cb= s->edge_emu_buffer;
1651 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1653 if(emu){
1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1655 src_cr= s->edge_emu_buffer;
1657 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1660 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1661 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1662 int x_offset, int y_offset,
1663 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1664 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1665 int list0, int list1){
1666 MpegEncContext * const s = &h->s;
1667 qpel_mc_func *qpix_op= qpix_put;
1668 h264_chroma_mc_func chroma_op= chroma_put;
1670 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1671 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1672 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1673 x_offset += 8*s->mb_x;
1674 y_offset += 8*(s->mb_y >> MB_FIELD);
1676 if(list0){
1677 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1682 qpix_op= qpix_avg;
1683 chroma_op= chroma_avg;
1686 if(list1){
1687 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1688 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1689 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1690 qpix_op, chroma_op);
1694 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1695 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1696 int x_offset, int y_offset,
1697 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1698 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1699 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1700 int list0, int list1){
1701 MpegEncContext * const s = &h->s;
1703 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1704 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1705 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1706 x_offset += 8*s->mb_x;
1707 y_offset += 8*(s->mb_y >> MB_FIELD);
1709 if(list0 && list1){
1710 /* don't optimize for luma-only case, since B-frames usually
1711 * use implicit weights => chroma too. */
1712 uint8_t *tmp_cb = s->obmc_scratchpad;
1713 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1714 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1715 int refn0 = h->ref_cache[0][ scan8[n] ];
1716 int refn1 = h->ref_cache[1][ scan8[n] ];
1718 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1719 dest_y, dest_cb, dest_cr,
1720 x_offset, y_offset, qpix_put, chroma_put);
1721 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1722 tmp_y, tmp_cb, tmp_cr,
1723 x_offset, y_offset, qpix_put, chroma_put);
1725 if(h->use_weight == 2){
1726 int weight0 = h->implicit_weight[refn0][refn1];
1727 int weight1 = 64 - weight0;
1728 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1729 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1731 }else{
1732 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1733 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1734 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1737 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1739 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1740 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1742 }else{
1743 int list = list1 ? 1 : 0;
1744 int refn = h->ref_cache[list][ scan8[n] ];
1745 Picture *ref= &h->ref_list[list][refn];
1746 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1747 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1748 qpix_put, chroma_put);
1750 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1751 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1752 if(h->use_weight_chroma){
1753 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1755 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1756 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1761 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1762 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1763 int x_offset, int y_offset,
1764 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1765 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1766 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1767 int list0, int list1){
1768 if((h->use_weight==2 && list0 && list1
1769 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1770 || h->use_weight==1)
1771 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put,
1773 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1774 else
1775 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1776 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1779 static inline void prefetch_motion(H264Context *h, int list){
1780 /* fetch pixels for estimated mv 4 macroblocks ahead
1781 * optimized for 64byte cache lines */
1782 MpegEncContext * const s = &h->s;
1783 const int refn = h->ref_cache[list][scan8[0]];
1784 if(refn >= 0){
1785 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1786 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1787 uint8_t **src= h->ref_list[list][refn].data;
1788 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1789 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1790 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1791 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1795 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1796 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1797 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1798 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1799 MpegEncContext * const s = &h->s;
1800 const int mb_xy= h->mb_xy;
1801 const int mb_type= s->current_picture.mb_type[mb_xy];
1803 assert(IS_INTER(mb_type));
1805 prefetch_motion(h, 0);
1807 if(IS_16X16(mb_type)){
1808 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1809 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1810 &weight_op[0], &weight_avg[0],
1811 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1812 }else if(IS_16X8(mb_type)){
1813 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1815 &weight_op[1], &weight_avg[1],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1818 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1819 &weight_op[1], &weight_avg[1],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 }else if(IS_8X16(mb_type)){
1822 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1823 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1824 &weight_op[2], &weight_avg[2],
1825 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1826 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1827 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1828 &weight_op[2], &weight_avg[2],
1829 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1830 }else{
1831 int i;
1833 assert(IS_8X8(mb_type));
1835 for(i=0; i<4; i++){
1836 const int sub_mb_type= h->sub_mb_type[i];
1837 const int n= 4*i;
1838 int x_offset= (i&1)<<2;
1839 int y_offset= (i&2)<<1;
1841 if(IS_SUB_8X8(sub_mb_type)){
1842 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1843 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1844 &weight_op[3], &weight_avg[3],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_8X4(sub_mb_type)){
1847 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1849 &weight_op[4], &weight_avg[4],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1852 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1853 &weight_op[4], &weight_avg[4],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1855 }else if(IS_SUB_4X8(sub_mb_type)){
1856 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1857 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1858 &weight_op[5], &weight_avg[5],
1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1860 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1862 &weight_op[5], &weight_avg[5],
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1864 }else{
1865 int j;
1866 assert(IS_SUB_4X4(sub_mb_type));
1867 for(j=0; j<4; j++){
1868 int sub_x_offset= x_offset + 2*(j&1);
1869 int sub_y_offset= y_offset + (j&2);
1870 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1871 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1872 &weight_op[6], &weight_avg[6],
1873 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1879 prefetch_motion(h, 1);
1882 static av_cold void init_cavlc_level_tab(void){
1883 int suffix_length, mask;
1884 unsigned int i;
1886 for(suffix_length=0; suffix_length<7; suffix_length++){
1887 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1888 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1889 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1891 mask= -(level_code&1);
1892 level_code= (((2+level_code)>>1) ^ mask) - mask;
1893 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1894 cavlc_level_tab[suffix_length][i][0]= level_code;
1895 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1896 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1897 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1898 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1899 }else{
1900 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1901 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1907 static av_cold void decode_init_vlc(void){
1908 static int done = 0;
1910 if (!done) {
1911 int i;
1912 int offset;
1913 done = 1;
1915 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1916 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1917 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1918 &chroma_dc_coeff_token_len [0], 1, 1,
1919 &chroma_dc_coeff_token_bits[0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
1922 offset = 0;
1923 for(i=0; i<4; i++){
1924 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1925 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1926 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1927 &coeff_token_len [i][0], 1, 1,
1928 &coeff_token_bits[i][0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC);
1930 offset += coeff_token_vlc_tables_size[i];
1933 * This is a one time safety check to make sure that
1934 * the packed static coeff_token_vlc table sizes
1935 * were initialized correctly.
1937 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1939 for(i=0; i<3; i++){
1940 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1941 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1942 init_vlc(&chroma_dc_total_zeros_vlc[i],
1943 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1944 &chroma_dc_total_zeros_len [i][0], 1, 1,
1945 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1946 INIT_VLC_USE_NEW_STATIC);
1948 for(i=0; i<15; i++){
1949 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1950 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1951 init_vlc(&total_zeros_vlc[i],
1952 TOTAL_ZEROS_VLC_BITS, 16,
1953 &total_zeros_len [i][0], 1, 1,
1954 &total_zeros_bits[i][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC);
1958 for(i=0; i<6; i++){
1959 run_vlc[i].table = run_vlc_tables[i];
1960 run_vlc[i].table_allocated = run_vlc_tables_size;
1961 init_vlc(&run_vlc[i],
1962 RUN_VLC_BITS, 7,
1963 &run_len [i][0], 1, 1,
1964 &run_bits[i][0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
1967 run7_vlc.table = run7_vlc_table,
1968 run7_vlc.table_allocated = run7_vlc_table_size;
1969 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1970 &run_len [6][0], 1, 1,
1971 &run_bits[6][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC);
1974 init_cavlc_level_tab();
1978 static void free_tables(H264Context *h){
1979 int i;
1980 H264Context *hx;
1981 av_freep(&h->intra4x4_pred_mode);
1982 av_freep(&h->chroma_pred_mode_table);
1983 av_freep(&h->cbp_table);
1984 av_freep(&h->mvd_table[0]);
1985 av_freep(&h->mvd_table[1]);
1986 av_freep(&h->direct_table);
1987 av_freep(&h->non_zero_count);
1988 av_freep(&h->slice_table_base);
1989 h->slice_table= NULL;
1991 av_freep(&h->mb2b_xy);
1992 av_freep(&h->mb2b8_xy);
1994 for(i = 0; i < MAX_THREADS; i++) {
1995 hx = h->thread_context[i];
1996 if(!hx) continue;
1997 av_freep(&hx->top_borders[1]);
1998 av_freep(&hx->top_borders[0]);
1999 av_freep(&hx->s.obmc_scratchpad);
2000 av_freep(&hx->rbsp_buffer[1]);
2001 av_freep(&hx->rbsp_buffer[0]);
2002 hx->rbsp_buffer_size[0] = 0;
2003 hx->rbsp_buffer_size[1] = 0;
2004 if (i) av_freep(&h->thread_context[i]);
2008 static void init_dequant8_coeff_table(H264Context *h){
2009 int i,q,x;
2010 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2011 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2012 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2014 for(i=0; i<2; i++ ){
2015 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2016 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2017 break;
2020 for(q=0; q<52; q++){
2021 int shift = div6[q];
2022 int idx = rem6[q];
2023 for(x=0; x<64; x++)
2024 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2025 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2026 h->pps.scaling_matrix8[i][x]) << shift;
2031 static void init_dequant4_coeff_table(H264Context *h){
2032 int i,j,q,x;
2033 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2034 for(i=0; i<6; i++ ){
2035 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2036 for(j=0; j<i; j++){
2037 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2038 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2039 break;
2042 if(j<i)
2043 continue;
2045 for(q=0; q<52; q++){
2046 int shift = div6[q] + 2;
2047 int idx = rem6[q];
2048 for(x=0; x<16; x++)
2049 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2050 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2051 h->pps.scaling_matrix4[i][x]) << shift;
2056 static void init_dequant_tables(H264Context *h){
2057 int i,x;
2058 init_dequant4_coeff_table(h);
2059 if(h->pps.transform_8x8_mode)
2060 init_dequant8_coeff_table(h);
2061 if(h->sps.transform_bypass){
2062 for(i=0; i<6; i++)
2063 for(x=0; x<16; x++)
2064 h->dequant4_coeff[i][0][x] = 1<<6;
2065 if(h->pps.transform_8x8_mode)
2066 for(i=0; i<2; i++)
2067 for(x=0; x<64; x++)
2068 h->dequant8_coeff[i][0][x] = 1<<6;
2074 * allocates tables.
2075 * needs width/height
2077 static int alloc_tables(H264Context *h){
2078 MpegEncContext * const s = &h->s;
2079 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2080 int x,y;
2082 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t), fail)
2084 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t), fail)
2085 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
2086 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
2088 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
2089 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t), fail);
2090 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t), fail);
2091 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 32*big_mb_num * sizeof(uint8_t) , fail);
2093 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2094 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2096 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
2097 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
2098 for(y=0; y<s->mb_height; y++){
2099 for(x=0; x<s->mb_width; x++){
2100 const int mb_xy= x + y*s->mb_stride;
2101 const int b_xy = 4*x + 4*y*h->b_stride;
2102 const int b8_xy= 2*x + 2*y*h->b8_stride;
2104 h->mb2b_xy [mb_xy]= b_xy;
2105 h->mb2b8_xy[mb_xy]= b8_xy;
2109 s->obmc_scratchpad = NULL;
2111 if(!h->dequant4_coeff[0])
2112 init_dequant_tables(h);
2114 return 0;
2115 fail:
2116 free_tables(h);
2117 return -1;
2121 * Mimic alloc_tables(), but for every context thread.
2123 static void clone_tables(H264Context *dst, H264Context *src){
2124 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2125 dst->non_zero_count = src->non_zero_count;
2126 dst->slice_table = src->slice_table;
2127 dst->cbp_table = src->cbp_table;
2128 dst->mb2b_xy = src->mb2b_xy;
2129 dst->mb2b8_xy = src->mb2b8_xy;
2130 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2131 dst->mvd_table[0] = src->mvd_table[0];
2132 dst->mvd_table[1] = src->mvd_table[1];
2133 dst->direct_table = src->direct_table;
2135 dst->s.obmc_scratchpad = NULL;
2136 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2140 * Init context
2141 * Allocate buffers which are not shared amongst multiple threads.
2143 static int context_init(H264Context *h){
2144 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
2145 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
2147 return 0;
2148 fail:
2149 return -1; // free_tables will clean up for us
2152 static av_cold void common_init(H264Context *h){
2153 MpegEncContext * const s = &h->s;
2155 s->width = s->avctx->width;
2156 s->height = s->avctx->height;
2157 s->codec_id= s->avctx->codec->id;
2159 ff_h264_pred_init(&h->hpc, s->codec_id);
2161 h->dequant_coeff_pps= -1;
2162 s->unrestricted_mv=1;
2163 s->decode=1; //FIXME
2165 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2167 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2168 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2172 * Reset SEI values at the beginning of the frame.
2174 * @param h H.264 context.
2176 static void reset_sei(H264Context *h) {
2177 h->sei_recovery_frame_cnt = -1;
2178 h->sei_dpb_output_delay = 0;
2179 h->sei_cpb_removal_delay = -1;
2180 h->sei_buffering_period_present = 0;
2183 static av_cold int decode_init(AVCodecContext *avctx){
2184 H264Context *h= avctx->priv_data;
2185 MpegEncContext * const s = &h->s;
2187 MPV_decode_defaults(s);
2189 s->avctx = avctx;
2190 common_init(h);
2192 s->out_format = FMT_H264;
2193 s->workaround_bugs= avctx->workaround_bugs;
2195 // set defaults
2196 // s->decode_mb= ff_h263_decode_mb;
2197 s->quarter_sample = 1;
2198 if(!avctx->has_b_frames)
2199 s->low_delay= 1;
2201 avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
2202 avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
2203 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
2205 decode_init_vlc();
2207 if(avctx->extradata_size > 0 && avctx->extradata &&
2208 *(char *)avctx->extradata == 1){
2209 h->is_avc = 1;
2210 h->got_avcC = 0;
2211 } else {
2212 h->is_avc = 0;
2215 h->thread_context[0] = h;
2216 h->outputed_poc = INT_MIN;
2217 h->prev_poc_msb= 1<<16;
2218 reset_sei(h);
2219 if(avctx->codec_id == CODEC_ID_H264){
2220 if(avctx->ticks_per_frame == 1){
2221 s->avctx->time_base.den *=2;
2223 avctx->ticks_per_frame = 2;
2225 return 0;
2228 static int frame_start(H264Context *h){
2229 MpegEncContext * const s = &h->s;
2230 int i;
2232 if(MPV_frame_start(s, s->avctx) < 0)
2233 return -1;
2234 ff_er_frame_start(s);
2236 * MPV_frame_start uses pict_type to derive key_frame.
2237 * This is incorrect for H.264; IDR markings must be used.
2238 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2239 * See decode_nal_units().
2241 s->current_picture_ptr->key_frame= 0;
2242 s->current_picture_ptr->mmco_reset= 0;
2244 assert(s->linesize && s->uvlinesize);
2246 for(i=0; i<16; i++){
2247 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2248 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2250 for(i=0; i<4; i++){
2251 h->block_offset[16+i]=
2252 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2253 h->block_offset[24+16+i]=
2254 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2257 /* can't be in alloc_tables because linesize isn't known there.
2258 * FIXME: redo bipred weight to not require extra buffer? */
2259 for(i = 0; i < s->avctx->thread_count; i++)
2260 if(!h->thread_context[i]->s.obmc_scratchpad)
2261 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2263 /* some macroblocks will be accessed before they're available */
2264 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2265 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2267 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2269 // We mark the current picture as non-reference after allocating it, so
2270 // that if we break out due to an error it can be released automatically
2271 // in the next MPV_frame_start().
2272 // SVQ3 as well as most other codecs have only last/next/current and thus
2273 // get released even with set reference, besides SVQ3 and others do not
2274 // mark frames as reference later "naturally".
2275 if(s->codec_id != CODEC_ID_SVQ3)
2276 s->current_picture_ptr->reference= 0;
2278 s->current_picture_ptr->field_poc[0]=
2279 s->current_picture_ptr->field_poc[1]= INT_MAX;
2280 assert(s->current_picture_ptr->long_ref==0);
2282 return 0;
2285 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2286 MpegEncContext * const s = &h->s;
2287 int i;
2288 int step = 1;
2289 int offset = 1;
2290 int uvoffset= 1;
2291 int top_idx = 1;
2292 int skiplast= 0;
2294 src_y -= linesize;
2295 src_cb -= uvlinesize;
2296 src_cr -= uvlinesize;
2298 if(!simple && FRAME_MBAFF){
2299 if(s->mb_y&1){
2300 offset = MB_MBAFF ? 1 : 17;
2301 uvoffset= MB_MBAFF ? 1 : 9;
2302 if(!MB_MBAFF){
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2304 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2305 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2306 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2307 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2310 }else{
2311 if(!MB_MBAFF){
2312 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2313 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2314 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2315 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2317 skiplast= 1;
2319 offset =
2320 uvoffset=
2321 top_idx = MB_MBAFF ? 0 : 1;
2323 step= MB_MBAFF ? 2 : 1;
2326 // There are two lines saved, the line above the the top macroblock of a pair,
2327 // and the line above the bottom macroblock
2328 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2329 for(i=1; i<17 - skiplast; i++){
2330 h->left_border[offset+i*step]= src_y[15+i* linesize];
2333 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2334 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2336 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2337 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2338 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2339 for(i=1; i<9 - skiplast; i++){
2340 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2341 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2343 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2344 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2348 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2349 MpegEncContext * const s = &h->s;
2350 int temp8, i;
2351 uint64_t temp64;
2352 int deblock_left;
2353 int deblock_top;
2354 int mb_xy;
2355 int step = 1;
2356 int offset = 1;
2357 int uvoffset= 1;
2358 int top_idx = 1;
2360 if(!simple && FRAME_MBAFF){
2361 if(s->mb_y&1){
2362 offset = MB_MBAFF ? 1 : 17;
2363 uvoffset= MB_MBAFF ? 1 : 9;
2364 }else{
2365 offset =
2366 uvoffset=
2367 top_idx = MB_MBAFF ? 0 : 1;
2369 step= MB_MBAFF ? 2 : 1;
2372 if(h->deblocking_filter == 2) {
2373 mb_xy = h->mb_xy;
2374 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2375 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2376 } else {
2377 deblock_left = (s->mb_x > 0);
2378 deblock_top = (s->mb_y > !!MB_FIELD);
2381 src_y -= linesize + 1;
2382 src_cb -= uvlinesize + 1;
2383 src_cr -= uvlinesize + 1;
2385 #define XCHG(a,b,t,xchg)\
2386 t= a;\
2387 if(xchg)\
2388 a= b;\
2389 b= t;
2391 if(deblock_left){
2392 for(i = !deblock_top; i<16; i++){
2393 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2395 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2398 if(deblock_top){
2399 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2400 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2401 if(s->mb_x+1 < s->mb_width){
2402 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2406 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2407 if(deblock_left){
2408 for(i = !deblock_top; i<8; i++){
2409 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2410 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2412 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2413 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2415 if(deblock_top){
2416 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2417 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2422 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2423 MpegEncContext * const s = &h->s;
2424 const int mb_x= s->mb_x;
2425 const int mb_y= s->mb_y;
2426 const int mb_xy= h->mb_xy;
2427 const int mb_type= s->current_picture.mb_type[mb_xy];
2428 uint8_t *dest_y, *dest_cb, *dest_cr;
2429 int linesize, uvlinesize /*dct_offset*/;
2430 int i;
2431 int *block_offset = &h->block_offset[0];
2432 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2433 /* is_h264 should always be true if SVQ3 is disabled. */
2434 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2435 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2436 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2438 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2439 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2440 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2442 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2443 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2445 if (!simple && MB_FIELD) {
2446 linesize = h->mb_linesize = s->linesize * 2;
2447 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2448 block_offset = &h->block_offset[24];
2449 if(mb_y&1){ //FIXME move out of this function?
2450 dest_y -= s->linesize*15;
2451 dest_cb-= s->uvlinesize*7;
2452 dest_cr-= s->uvlinesize*7;
2454 if(FRAME_MBAFF) {
2455 int list;
2456 for(list=0; list<h->list_count; list++){
2457 if(!USES_LIST(mb_type, list))
2458 continue;
2459 if(IS_16X16(mb_type)){
2460 int8_t *ref = &h->ref_cache[list][scan8[0]];
2461 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2462 }else{
2463 for(i=0; i<16; i+=4){
2464 int ref = h->ref_cache[list][scan8[i]];
2465 if(ref >= 0)
2466 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2471 } else {
2472 linesize = h->mb_linesize = s->linesize;
2473 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2474 // dct_offset = s->linesize * 16;
2477 if (!simple && IS_INTRA_PCM(mb_type)) {
2478 for (i=0; i<16; i++) {
2479 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2481 for (i=0; i<8; i++) {
2482 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2483 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2485 } else {
2486 if(IS_INTRA(mb_type)){
2487 if(h->deblocking_filter)
2488 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2490 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2491 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2492 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2495 if(IS_INTRA4x4(mb_type)){
2496 if(simple || !s->encoding){
2497 if(IS_8x8DCT(mb_type)){
2498 if(transform_bypass){
2499 idct_dc_add =
2500 idct_add = s->dsp.add_pixels8;
2501 }else{
2502 idct_dc_add = s->dsp.h264_idct8_dc_add;
2503 idct_add = s->dsp.h264_idct8_add;
2505 for(i=0; i<16; i+=4){
2506 uint8_t * const ptr= dest_y + block_offset[i];
2507 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2508 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2509 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2510 }else{
2511 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2512 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2513 (h->topright_samples_available<<i)&0x4000, linesize);
2514 if(nnz){
2515 if(nnz == 1 && h->mb[i*16])
2516 idct_dc_add(ptr, h->mb + i*16, linesize);
2517 else
2518 idct_add (ptr, h->mb + i*16, linesize);
2522 }else{
2523 if(transform_bypass){
2524 idct_dc_add =
2525 idct_add = s->dsp.add_pixels4;
2526 }else{
2527 idct_dc_add = s->dsp.h264_idct_dc_add;
2528 idct_add = s->dsp.h264_idct_add;
2530 for(i=0; i<16; i++){
2531 uint8_t * const ptr= dest_y + block_offset[i];
2532 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2534 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2535 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2536 }else{
2537 uint8_t *topright;
2538 int nnz, tr;
2539 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2540 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2541 assert(mb_y || linesize <= block_offset[i]);
2542 if(!topright_avail){
2543 tr= ptr[3 - linesize]*0x01010101;
2544 topright= (uint8_t*) &tr;
2545 }else
2546 topright= ptr + 4 - linesize;
2547 }else
2548 topright= NULL;
2550 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2551 nnz = h->non_zero_count_cache[ scan8[i] ];
2552 if(nnz){
2553 if(is_h264){
2554 if(nnz == 1 && h->mb[i*16])
2555 idct_dc_add(ptr, h->mb + i*16, linesize);
2556 else
2557 idct_add (ptr, h->mb + i*16, linesize);
2558 }else
2559 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2565 }else{
2566 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2567 if(is_h264){
2568 if(!transform_bypass)
2569 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2570 }else
2571 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2573 if(h->deblocking_filter)
2574 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2575 }else if(is_h264){
2576 hl_motion(h, dest_y, dest_cb, dest_cr,
2577 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2578 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2579 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2583 if(!IS_INTRA4x4(mb_type)){
2584 if(is_h264){
2585 if(IS_INTRA16x16(mb_type)){
2586 if(transform_bypass){
2587 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2588 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2589 }else{
2590 for(i=0; i<16; i++){
2591 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2592 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2595 }else{
2596 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2598 }else if(h->cbp&15){
2599 if(transform_bypass){
2600 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2601 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2602 for(i=0; i<16; i+=di){
2603 if(h->non_zero_count_cache[ scan8[i] ]){
2604 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2607 }else{
2608 if(IS_8x8DCT(mb_type)){
2609 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2610 }else{
2611 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2615 }else{
2616 for(i=0; i<16; i++){
2617 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2618 uint8_t * const ptr= dest_y + block_offset[i];
2619 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2625 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2626 uint8_t *dest[2] = {dest_cb, dest_cr};
2627 if(transform_bypass){
2628 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2629 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2630 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2631 }else{
2632 idct_add = s->dsp.add_pixels4;
2633 for(i=16; i<16+8; i++){
2634 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2635 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2638 }else{
2639 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2640 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2641 if(is_h264){
2642 idct_add = s->dsp.h264_idct_add;
2643 idct_dc_add = s->dsp.h264_idct_dc_add;
2644 for(i=16; i<16+8; i++){
2645 if(h->non_zero_count_cache[ scan8[i] ])
2646 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2647 else if(h->mb[i*16])
2648 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2650 }else{
2651 for(i=16; i<16+8; i++){
2652 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2653 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2654 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2661 if(h->cbp || IS_INTRA(mb_type))
2662 s->dsp.clear_blocks(h->mb);
2664 if(h->deblocking_filter) {
2665 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2666 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2667 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2668 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2669 if (!simple && FRAME_MBAFF) {
2670 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2671 } else {
2672 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2678 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2680 static void hl_decode_mb_simple(H264Context *h){
2681 hl_decode_mb_internal(h, 1);
2685 * Process a macroblock; this handles edge cases, such as interlacing.
2687 static void av_noinline hl_decode_mb_complex(H264Context *h){
2688 hl_decode_mb_internal(h, 0);
2691 static void hl_decode_mb(H264Context *h){
2692 MpegEncContext * const s = &h->s;
2693 const int mb_xy= h->mb_xy;
2694 const int mb_type= s->current_picture.mb_type[mb_xy];
2695 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2697 if (is_complex)
2698 hl_decode_mb_complex(h);
2699 else hl_decode_mb_simple(h);
2702 static void pic_as_field(Picture *pic, const int parity){
2703 int i;
2704 for (i = 0; i < 4; ++i) {
2705 if (parity == PICT_BOTTOM_FIELD)
2706 pic->data[i] += pic->linesize[i];
2707 pic->reference = parity;
2708 pic->linesize[i] *= 2;
2710 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2713 static int split_field_copy(Picture *dest, Picture *src,
2714 int parity, int id_add){
2715 int match = !!(src->reference & parity);
2717 if (match) {
2718 *dest = *src;
2719 if(parity != PICT_FRAME){
2720 pic_as_field(dest, parity);
2721 dest->pic_id *= 2;
2722 dest->pic_id += id_add;
2726 return match;
2729 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2730 int i[2]={0};
2731 int index=0;
2733 while(i[0]<len || i[1]<len){
2734 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2735 i[0]++;
2736 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2737 i[1]++;
2738 if(i[0] < len){
2739 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2740 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2742 if(i[1] < len){
2743 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2744 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2748 return index;
2751 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2752 int i, best_poc;
2753 int out_i= 0;
2755 for(;;){
2756 best_poc= dir ? INT_MIN : INT_MAX;
2758 for(i=0; i<len; i++){
2759 const int poc= src[i]->poc;
2760 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2761 best_poc= poc;
2762 sorted[out_i]= src[i];
2765 if(best_poc == (dir ? INT_MIN : INT_MAX))
2766 break;
2767 limit= sorted[out_i++]->poc - dir;
2769 return out_i;
2773 * fills the default_ref_list.
2775 static int fill_default_ref_list(H264Context *h){
2776 MpegEncContext * const s = &h->s;
2777 int i, len;
2779 if(h->slice_type_nos==FF_B_TYPE){
2780 Picture *sorted[32];
2781 int cur_poc, list;
2782 int lens[2];
2784 if(FIELD_PICTURE)
2785 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2786 else
2787 cur_poc= s->current_picture_ptr->poc;
2789 for(list= 0; list<2; list++){
2790 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2791 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2792 assert(len<=32);
2793 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2794 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2795 assert(len<=32);
2797 if(len < h->ref_count[list])
2798 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2799 lens[list]= len;
2802 if(lens[0] == lens[1] && lens[1] > 1){
2803 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2804 if(i == lens[0])
2805 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2807 }else{
2808 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2809 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2810 assert(len <= 32);
2811 if(len < h->ref_count[0])
2812 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2814 #ifdef TRACE
2815 for (i=0; i<h->ref_count[0]; i++) {
2816 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2818 if(h->slice_type_nos==FF_B_TYPE){
2819 for (i=0; i<h->ref_count[1]; i++) {
2820 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2823 #endif
2824 return 0;
2827 static void print_short_term(H264Context *h);
2828 static void print_long_term(H264Context *h);
2831 * Extract structure information about the picture described by pic_num in
2832 * the current decoding context (frame or field). Note that pic_num is
2833 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2834 * @param pic_num picture number for which to extract structure information
2835 * @param structure one of PICT_XXX describing structure of picture
2836 * with pic_num
2837 * @return frame number (short term) or long term index of picture
2838 * described by pic_num
2840 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2841 MpegEncContext * const s = &h->s;
2843 *structure = s->picture_structure;
2844 if(FIELD_PICTURE){
2845 if (!(pic_num & 1))
2846 /* opposite field */
2847 *structure ^= PICT_FRAME;
2848 pic_num >>= 1;
2851 return pic_num;
2854 static int decode_ref_pic_list_reordering(H264Context *h){
2855 MpegEncContext * const s = &h->s;
2856 int list, index, pic_structure;
2858 print_short_term(h);
2859 print_long_term(h);
2861 for(list=0; list<h->list_count; list++){
2862 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2864 if(get_bits1(&s->gb)){
2865 int pred= h->curr_pic_num;
2867 for(index=0; ; index++){
2868 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2869 unsigned int pic_id;
2870 int i;
2871 Picture *ref = NULL;
2873 if(reordering_of_pic_nums_idc==3)
2874 break;
2876 if(index >= h->ref_count[list]){
2877 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2878 return -1;
2881 if(reordering_of_pic_nums_idc<3){
2882 if(reordering_of_pic_nums_idc<2){
2883 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2884 int frame_num;
2886 if(abs_diff_pic_num > h->max_pic_num){
2887 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2888 return -1;
2891 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2892 else pred+= abs_diff_pic_num;
2893 pred &= h->max_pic_num - 1;
2895 frame_num = pic_num_extract(h, pred, &pic_structure);
2897 for(i= h->short_ref_count-1; i>=0; i--){
2898 ref = h->short_ref[i];
2899 assert(ref->reference);
2900 assert(!ref->long_ref);
2902 ref->frame_num == frame_num &&
2903 (ref->reference & pic_structure)
2905 break;
2907 if(i>=0)
2908 ref->pic_id= pred;
2909 }else{
2910 int long_idx;
2911 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2913 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2915 if(long_idx>31){
2916 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2917 return -1;
2919 ref = h->long_ref[long_idx];
2920 assert(!(ref && !ref->reference));
2921 if(ref && (ref->reference & pic_structure)){
2922 ref->pic_id= pic_id;
2923 assert(ref->long_ref);
2924 i=0;
2925 }else{
2926 i=-1;
2930 if (i < 0) {
2931 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2932 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2933 } else {
2934 for(i=index; i+1<h->ref_count[list]; i++){
2935 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2936 break;
2938 for(; i > index; i--){
2939 h->ref_list[list][i]= h->ref_list[list][i-1];
2941 h->ref_list[list][index]= *ref;
2942 if (FIELD_PICTURE){
2943 pic_as_field(&h->ref_list[list][index], pic_structure);
2946 }else{
2947 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2948 return -1;
2953 for(list=0; list<h->list_count; list++){
2954 for(index= 0; index < h->ref_count[list]; index++){
2955 if(!h->ref_list[list][index].data[0]){
2956 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2957 if(h->default_ref_list[list][0].data[0])
2958 h->ref_list[list][index]= h->default_ref_list[list][0];
2959 else
2960 return -1;
2965 return 0;
2968 static void fill_mbaff_ref_list(H264Context *h){
2969 int list, i, j;
2970 for(list=0; list<2; list++){ //FIXME try list_count
2971 for(i=0; i<h->ref_count[list]; i++){
2972 Picture *frame = &h->ref_list[list][i];
2973 Picture *field = &h->ref_list[list][16+2*i];
2974 field[0] = *frame;
2975 for(j=0; j<3; j++)
2976 field[0].linesize[j] <<= 1;
2977 field[0].reference = PICT_TOP_FIELD;
2978 field[0].poc= field[0].field_poc[0];
2979 field[1] = field[0];
2980 for(j=0; j<3; j++)
2981 field[1].data[j] += frame->linesize[j];
2982 field[1].reference = PICT_BOTTOM_FIELD;
2983 field[1].poc= field[1].field_poc[1];
2985 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2986 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2987 for(j=0; j<2; j++){
2988 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2989 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2993 for(j=0; j<h->ref_count[1]; j++){
2994 for(i=0; i<h->ref_count[0]; i++)
2995 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2996 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2997 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3001 static int pred_weight_table(H264Context *h){
3002 MpegEncContext * const s = &h->s;
3003 int list, i;
3004 int luma_def, chroma_def;
3006 h->use_weight= 0;
3007 h->use_weight_chroma= 0;
3008 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3009 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3010 luma_def = 1<<h->luma_log2_weight_denom;
3011 chroma_def = 1<<h->chroma_log2_weight_denom;
3013 for(list=0; list<2; list++){
3014 h->luma_weight_flag[list] = 0;
3015 h->chroma_weight_flag[list] = 0;
3016 for(i=0; i<h->ref_count[list]; i++){
3017 int luma_weight_flag, chroma_weight_flag;
3019 luma_weight_flag= get_bits1(&s->gb);
3020 if(luma_weight_flag){
3021 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3022 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3023 if( h->luma_weight[list][i] != luma_def
3024 || h->luma_offset[list][i] != 0) {
3025 h->use_weight= 1;
3026 h->luma_weight_flag[list]= 1;
3028 }else{
3029 h->luma_weight[list][i]= luma_def;
3030 h->luma_offset[list][i]= 0;
3033 if(CHROMA){
3034 chroma_weight_flag= get_bits1(&s->gb);
3035 if(chroma_weight_flag){
3036 int j;
3037 for(j=0; j<2; j++){
3038 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3039 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3040 if( h->chroma_weight[list][i][j] != chroma_def
3041 || h->chroma_offset[list][i][j] != 0) {
3042 h->use_weight_chroma= 1;
3043 h->chroma_weight_flag[list]= 1;
3046 }else{
3047 int j;
3048 for(j=0; j<2; j++){
3049 h->chroma_weight[list][i][j]= chroma_def;
3050 h->chroma_offset[list][i][j]= 0;
3055 if(h->slice_type_nos != FF_B_TYPE) break;
3057 h->use_weight= h->use_weight || h->use_weight_chroma;
3058 return 0;
3061 static void implicit_weight_table(H264Context *h){
3062 MpegEncContext * const s = &h->s;
3063 int ref0, ref1, i;
3064 int cur_poc = s->current_picture_ptr->poc;
3066 for (i = 0; i < 2; i++) {
3067 h->luma_weight_flag[i] = 0;
3068 h->chroma_weight_flag[i] = 0;
3071 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3072 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3073 h->use_weight= 0;
3074 h->use_weight_chroma= 0;
3075 return;
3078 h->use_weight= 2;
3079 h->use_weight_chroma= 2;
3080 h->luma_log2_weight_denom= 5;
3081 h->chroma_log2_weight_denom= 5;
3083 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3084 int poc0 = h->ref_list[0][ref0].poc;
3085 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3086 int poc1 = h->ref_list[1][ref1].poc;
3087 int td = av_clip(poc1 - poc0, -128, 127);
3088 if(td){
3089 int tb = av_clip(cur_poc - poc0, -128, 127);
3090 int tx = (16384 + (FFABS(td) >> 1)) / td;
3091 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3092 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3093 h->implicit_weight[ref0][ref1] = 32;
3094 else
3095 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3096 }else
3097 h->implicit_weight[ref0][ref1] = 32;
3103 * Mark a picture as no longer needed for reference. The refmask
3104 * argument allows unreferencing of individual fields or the whole frame.
3105 * If the picture becomes entirely unreferenced, but is being held for
3106 * display purposes, it is marked as such.
3107 * @param refmask mask of fields to unreference; the mask is bitwise
3108 * anded with the reference marking of pic
3109 * @return non-zero if pic becomes entirely unreferenced (except possibly
3110 * for display purposes) zero if one of the fields remains in
3111 * reference
3113 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3114 int i;
3115 if (pic->reference &= refmask) {
3116 return 0;
3117 } else {
3118 for(i = 0; h->delayed_pic[i]; i++)
3119 if(pic == h->delayed_pic[i]){
3120 pic->reference=DELAYED_PIC_REF;
3121 break;
3123 return 1;
3128 * instantaneous decoder refresh.
3130 static void idr(H264Context *h){
3131 int i;
3133 for(i=0; i<16; i++){
3134 remove_long(h, i, 0);
3136 assert(h->long_ref_count==0);
3138 for(i=0; i<h->short_ref_count; i++){
3139 unreference_pic(h, h->short_ref[i], 0);
3140 h->short_ref[i]= NULL;
3142 h->short_ref_count=0;
3143 h->prev_frame_num= 0;
3144 h->prev_frame_num_offset= 0;
3145 h->prev_poc_msb=
3146 h->prev_poc_lsb= 0;
3149 /* forget old pics after a seek */
3150 static void flush_dpb(AVCodecContext *avctx){
3151 H264Context *h= avctx->priv_data;
3152 int i;
3153 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3154 if(h->delayed_pic[i])
3155 h->delayed_pic[i]->reference= 0;
3156 h->delayed_pic[i]= NULL;
3158 h->outputed_poc= INT_MIN;
3159 h->prev_interlaced_frame = 1;
3160 idr(h);
3161 if(h->s.current_picture_ptr)
3162 h->s.current_picture_ptr->reference= 0;
3163 h->s.first_field= 0;
3164 reset_sei(h);
3165 ff_mpeg_flush(avctx);
3169 * Find a Picture in the short term reference list by frame number.
3170 * @param frame_num frame number to search for
3171 * @param idx the index into h->short_ref where returned picture is found
3172 * undefined if no picture found.
3173 * @return pointer to the found picture, or NULL if no pic with the provided
3174 * frame number is found
3176 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3177 MpegEncContext * const s = &h->s;
3178 int i;
3180 for(i=0; i<h->short_ref_count; i++){
3181 Picture *pic= h->short_ref[i];
3182 if(s->avctx->debug&FF_DEBUG_MMCO)
3183 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3184 if(pic->frame_num == frame_num) {
3185 *idx = i;
3186 return pic;
3189 return NULL;
3193 * Remove a picture from the short term reference list by its index in
3194 * that list. This does no checking on the provided index; it is assumed
3195 * to be valid. Other list entries are shifted down.
3196 * @param i index into h->short_ref of picture to remove.
3198 static void remove_short_at_index(H264Context *h, int i){
3199 assert(i >= 0 && i < h->short_ref_count);
3200 h->short_ref[i]= NULL;
3201 if (--h->short_ref_count)
3202 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3207 * @return the removed picture or NULL if an error occurs
3209 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3210 MpegEncContext * const s = &h->s;
3211 Picture *pic;
3212 int i;
3214 if(s->avctx->debug&FF_DEBUG_MMCO)
3215 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3217 pic = find_short(h, frame_num, &i);
3218 if (pic){
3219 if(unreference_pic(h, pic, ref_mask))
3220 remove_short_at_index(h, i);
3223 return pic;
3227 * Remove a picture from the long term reference list by its index in
3228 * that list.
3229 * @return the removed picture or NULL if an error occurs
3231 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3232 Picture *pic;
3234 pic= h->long_ref[i];
3235 if (pic){
3236 if(unreference_pic(h, pic, ref_mask)){
3237 assert(h->long_ref[i]->long_ref == 1);
3238 h->long_ref[i]->long_ref= 0;
3239 h->long_ref[i]= NULL;
3240 h->long_ref_count--;
3244 return pic;
3248 * print short term list
3250 static void print_short_term(H264Context *h) {
3251 uint32_t i;
3252 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3253 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3254 for(i=0; i<h->short_ref_count; i++){
3255 Picture *pic= h->short_ref[i];
3256 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3262 * print long term list
3264 static void print_long_term(H264Context *h) {
3265 uint32_t i;
3266 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3267 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3268 for(i = 0; i < 16; i++){
3269 Picture *pic= h->long_ref[i];
3270 if (pic) {
3271 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3278 * Executes the reference picture marking (memory management control operations).
3280 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3281 MpegEncContext * const s = &h->s;
3282 int i, av_uninit(j);
3283 int current_ref_assigned=0;
3284 Picture *av_uninit(pic);
3286 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3287 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3289 for(i=0; i<mmco_count; i++){
3290 int av_uninit(structure), av_uninit(frame_num);
3291 if(s->avctx->debug&FF_DEBUG_MMCO)
3292 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3294 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3295 || mmco[i].opcode == MMCO_SHORT2LONG){
3296 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3297 pic = find_short(h, frame_num, &j);
3298 if(!pic){
3299 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3300 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3301 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3302 continue;
3306 switch(mmco[i].opcode){
3307 case MMCO_SHORT2UNUSED:
3308 if(s->avctx->debug&FF_DEBUG_MMCO)
3309 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3310 remove_short(h, frame_num, structure ^ PICT_FRAME);
3311 break;
3312 case MMCO_SHORT2LONG:
3313 if (h->long_ref[mmco[i].long_arg] != pic)
3314 remove_long(h, mmco[i].long_arg, 0);
3316 remove_short_at_index(h, j);
3317 h->long_ref[ mmco[i].long_arg ]= pic;
3318 if (h->long_ref[ mmco[i].long_arg ]){
3319 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3320 h->long_ref_count++;
3322 break;
3323 case MMCO_LONG2UNUSED:
3324 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3325 pic = h->long_ref[j];
3326 if (pic) {
3327 remove_long(h, j, structure ^ PICT_FRAME);
3328 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3329 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3330 break;
3331 case MMCO_LONG:
3332 // Comment below left from previous code as it is an interresting note.
3333 /* First field in pair is in short term list or
3334 * at a different long term index.
3335 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3336 * Report the problem and keep the pair where it is,
3337 * and mark this field valid.
3340 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3341 remove_long(h, mmco[i].long_arg, 0);
3343 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3344 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3345 h->long_ref_count++;
3348 s->current_picture_ptr->reference |= s->picture_structure;
3349 current_ref_assigned=1;
3350 break;
3351 case MMCO_SET_MAX_LONG:
3352 assert(mmco[i].long_arg <= 16);
3353 // just remove the long term which index is greater than new max
3354 for(j = mmco[i].long_arg; j<16; j++){
3355 remove_long(h, j, 0);
3357 break;
3358 case MMCO_RESET:
3359 while(h->short_ref_count){
3360 remove_short(h, h->short_ref[0]->frame_num, 0);
3362 for(j = 0; j < 16; j++) {
3363 remove_long(h, j, 0);
3365 s->current_picture_ptr->poc=
3366 s->current_picture_ptr->field_poc[0]=
3367 s->current_picture_ptr->field_poc[1]=
3368 h->poc_lsb=
3369 h->poc_msb=
3370 h->frame_num=
3371 s->current_picture_ptr->frame_num= 0;
3372 s->current_picture_ptr->mmco_reset=1;
3373 break;
3374 default: assert(0);
3378 if (!current_ref_assigned) {
3379 /* Second field of complementary field pair; the first field of
3380 * which is already referenced. If short referenced, it
3381 * should be first entry in short_ref. If not, it must exist
3382 * in long_ref; trying to put it on the short list here is an
3383 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3385 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3386 /* Just mark the second field valid */
3387 s->current_picture_ptr->reference = PICT_FRAME;
3388 } else if (s->current_picture_ptr->long_ref) {
3389 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3390 "assignment for second field "
3391 "in complementary field pair "
3392 "(first field is long term)\n");
3393 } else {
3394 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3395 if(pic){
3396 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3399 if(h->short_ref_count)
3400 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3402 h->short_ref[0]= s->current_picture_ptr;
3403 h->short_ref_count++;
3404 s->current_picture_ptr->reference |= s->picture_structure;
3408 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3410 /* We have too many reference frames, probably due to corrupted
3411 * stream. Need to discard one frame. Prevents overrun of the
3412 * short_ref and long_ref buffers.
3414 av_log(h->s.avctx, AV_LOG_ERROR,
3415 "number of reference frames exceeds max (probably "
3416 "corrupt input), discarding one\n");
3418 if (h->long_ref_count && !h->short_ref_count) {
3419 for (i = 0; i < 16; ++i)
3420 if (h->long_ref[i])
3421 break;
3423 assert(i < 16);
3424 remove_long(h, i, 0);
3425 } else {
3426 pic = h->short_ref[h->short_ref_count - 1];
3427 remove_short(h, pic->frame_num, 0);
3431 print_short_term(h);
3432 print_long_term(h);
3433 return 0;
3436 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3437 MpegEncContext * const s = &h->s;
3438 int i;
3440 h->mmco_index= 0;
3441 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3442 s->broken_link= get_bits1(gb) -1;
3443 if(get_bits1(gb)){
3444 h->mmco[0].opcode= MMCO_LONG;
3445 h->mmco[0].long_arg= 0;
3446 h->mmco_index= 1;
3448 }else{
3449 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3450 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3451 MMCOOpcode opcode= get_ue_golomb_31(gb);
3453 h->mmco[i].opcode= opcode;
3454 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3455 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3456 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3457 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3458 return -1;
3461 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3462 unsigned int long_arg= get_ue_golomb_31(gb);
3463 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3464 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3465 return -1;
3467 h->mmco[i].long_arg= long_arg;
3470 if(opcode > (unsigned)MMCO_LONG){
3471 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3472 return -1;
3474 if(opcode == MMCO_END)
3475 break;
3477 h->mmco_index= i;
3478 }else{
3479 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3481 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3482 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3483 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3484 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3485 h->mmco_index= 1;
3486 if (FIELD_PICTURE) {
3487 h->mmco[0].short_pic_num *= 2;
3488 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3489 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3490 h->mmco_index= 2;
3496 return 0;
3499 static int init_poc(H264Context *h){
3500 MpegEncContext * const s = &h->s;
3501 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3502 int field_poc[2];
3503 Picture *cur = s->current_picture_ptr;
3505 h->frame_num_offset= h->prev_frame_num_offset;
3506 if(h->frame_num < h->prev_frame_num)
3507 h->frame_num_offset += max_frame_num;
3509 if(h->sps.poc_type==0){
3510 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3512 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3513 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3514 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3515 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3516 else
3517 h->poc_msb = h->prev_poc_msb;
3518 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3519 field_poc[0] =
3520 field_poc[1] = h->poc_msb + h->poc_lsb;
3521 if(s->picture_structure == PICT_FRAME)
3522 field_poc[1] += h->delta_poc_bottom;
3523 }else if(h->sps.poc_type==1){
3524 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3525 int i;
3527 if(h->sps.poc_cycle_length != 0)
3528 abs_frame_num = h->frame_num_offset + h->frame_num;
3529 else
3530 abs_frame_num = 0;
3532 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3533 abs_frame_num--;
3535 expected_delta_per_poc_cycle = 0;
3536 for(i=0; i < h->sps.poc_cycle_length; i++)
3537 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3539 if(abs_frame_num > 0){
3540 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3541 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3543 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3544 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3545 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3546 } else
3547 expectedpoc = 0;
3549 if(h->nal_ref_idc == 0)
3550 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3552 field_poc[0] = expectedpoc + h->delta_poc[0];
3553 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3555 if(s->picture_structure == PICT_FRAME)
3556 field_poc[1] += h->delta_poc[1];
3557 }else{
3558 int poc= 2*(h->frame_num_offset + h->frame_num);
3560 if(!h->nal_ref_idc)
3561 poc--;
3563 field_poc[0]= poc;
3564 field_poc[1]= poc;
3567 if(s->picture_structure != PICT_BOTTOM_FIELD)
3568 s->current_picture_ptr->field_poc[0]= field_poc[0];
3569 if(s->picture_structure != PICT_TOP_FIELD)
3570 s->current_picture_ptr->field_poc[1]= field_poc[1];
3571 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3573 return 0;
3578 * initialize scan tables
3580 static void init_scan_tables(H264Context *h){
3581 MpegEncContext * const s = &h->s;
3582 int i;
3583 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3584 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3585 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3586 }else{
3587 for(i=0; i<16; i++){
3588 #define T(x) (x>>2) | ((x<<2) & 0xF)
3589 h->zigzag_scan[i] = T(zigzag_scan[i]);
3590 h-> field_scan[i] = T( field_scan[i]);
3591 #undef T
3594 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3595 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3596 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3597 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3598 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3599 }else{
3600 for(i=0; i<64; i++){
3601 #define T(x) (x>>3) | ((x&7)<<3)
3602 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3603 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3604 h->field_scan8x8[i] = T(field_scan8x8[i]);
3605 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3606 #undef T
3609 if(h->sps.transform_bypass){ //FIXME same ugly
3610 h->zigzag_scan_q0 = zigzag_scan;
3611 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3612 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3613 h->field_scan_q0 = field_scan;
3614 h->field_scan8x8_q0 = field_scan8x8;
3615 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3616 }else{
3617 h->zigzag_scan_q0 = h->zigzag_scan;
3618 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3619 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3620 h->field_scan_q0 = h->field_scan;
3621 h->field_scan8x8_q0 = h->field_scan8x8;
3622 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3626 static void field_end(H264Context *h){
3627 MpegEncContext * const s = &h->s;
3628 AVCodecContext * const avctx= s->avctx;
3629 s->mb_y= 0;
3631 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
3632 s->current_picture_ptr->pict_type= s->pict_type;
3634 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3635 ff_vdpau_h264_set_reference_frames(s);
3637 if(!s->dropable) {
3638 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
3639 h->prev_poc_msb= h->poc_msb;
3640 h->prev_poc_lsb= h->poc_lsb;
3642 h->prev_frame_num_offset= h->frame_num_offset;
3643 h->prev_frame_num= h->frame_num;
3645 if (avctx->hwaccel) {
3646 if (avctx->hwaccel->end_frame(avctx) < 0)
3647 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
3650 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3651 ff_vdpau_h264_picture_complete(s);
3654 * FIXME: Error handling code does not seem to support interlaced
3655 * when slices span multiple rows
3656 * The ff_er_add_slice calls don't work right for bottom
3657 * fields; they cause massive erroneous error concealing
3658 * Error marking covers both fields (top and bottom).
3659 * This causes a mismatched s->error_count
3660 * and a bad error table. Further, the error count goes to
3661 * INT_MAX when called for bottom field, because mb_y is
3662 * past end by one (callers fault) and resync_mb_y != 0
3663 * causes problems for the first MB line, too.
3665 if (!FIELD_PICTURE)
3666 ff_er_frame_end(s);
3668 MPV_frame_end(s);
3670 h->current_slice=0;
3674 * Replicates H264 "master" context to thread contexts.
3676 static void clone_slice(H264Context *dst, H264Context *src)
3678 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3679 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3680 dst->s.current_picture = src->s.current_picture;
3681 dst->s.linesize = src->s.linesize;
3682 dst->s.uvlinesize = src->s.uvlinesize;
3683 dst->s.first_field = src->s.first_field;
3685 dst->prev_poc_msb = src->prev_poc_msb;
3686 dst->prev_poc_lsb = src->prev_poc_lsb;
3687 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3688 dst->prev_frame_num = src->prev_frame_num;
3689 dst->short_ref_count = src->short_ref_count;
3691 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3692 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3693 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3694 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3696 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3697 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3701 * decodes a slice header.
3702 * This will also call MPV_common_init() and frame_start() as needed.
3704 * @param h h264context
3705 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3707 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3709 static int decode_slice_header(H264Context *h, H264Context *h0){
3710 MpegEncContext * const s = &h->s;
3711 MpegEncContext * const s0 = &h0->s;
3712 unsigned int first_mb_in_slice;
3713 unsigned int pps_id;
3714 int num_ref_idx_active_override_flag;
3715 unsigned int slice_type, tmp, i, j;
3716 int default_ref_list_done = 0;
3717 int last_pic_structure;
3719 s->dropable= h->nal_ref_idc == 0;
3721 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3722 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3723 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3724 }else{
3725 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3726 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3729 first_mb_in_slice= get_ue_golomb(&s->gb);
3731 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
3732 if(h0->current_slice && FIELD_PICTURE){
3733 field_end(h);
3736 h0->current_slice = 0;
3737 if (!s0->first_field)
3738 s->current_picture_ptr= NULL;
3741 slice_type= get_ue_golomb_31(&s->gb);
3742 if(slice_type > 9){
3743 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3744 return -1;
3746 if(slice_type > 4){
3747 slice_type -= 5;
3748 h->slice_type_fixed=1;
3749 }else
3750 h->slice_type_fixed=0;
3752 slice_type= golomb_to_pict_type[ slice_type ];
3753 if (slice_type == FF_I_TYPE
3754 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3755 default_ref_list_done = 1;
3757 h->slice_type= slice_type;
3758 h->slice_type_nos= slice_type & 3;
3760 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3761 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3762 av_log(h->s.avctx, AV_LOG_ERROR,
3763 "B picture before any references, skipping\n");
3764 return -1;
3767 pps_id= get_ue_golomb(&s->gb);
3768 if(pps_id>=MAX_PPS_COUNT){
3769 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3770 return -1;
3772 if(!h0->pps_buffers[pps_id]) {
3773 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
3774 return -1;
3776 h->pps= *h0->pps_buffers[pps_id];
3778 if(!h0->sps_buffers[h->pps.sps_id]) {
3779 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
3780 return -1;
3782 h->sps = *h0->sps_buffers[h->pps.sps_id];
3784 if(h == h0 && h->dequant_coeff_pps != pps_id){
3785 h->dequant_coeff_pps = pps_id;
3786 init_dequant_tables(h);
3789 s->mb_width= h->sps.mb_width;
3790 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3792 h->b_stride= s->mb_width*4;
3793 h->b8_stride= s->mb_width*2;
3795 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3796 if(h->sps.frame_mbs_only_flag)
3797 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3798 else
3799 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3801 if (s->context_initialized
3802 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3803 if(h != h0)
3804 return -1; // width / height changed during parallelized decoding
3805 free_tables(h);
3806 flush_dpb(s->avctx);
3807 MPV_common_end(s);
3809 if (!s->context_initialized) {
3810 if(h != h0)
3811 return -1; // we cant (re-)initialize context during parallel decoding
3812 if (MPV_common_init(s) < 0)
3813 return -1;
3814 s->first_field = 0;
3815 h->prev_interlaced_frame = 1;
3817 init_scan_tables(h);
3818 alloc_tables(h);
3820 for(i = 1; i < s->avctx->thread_count; i++) {
3821 H264Context *c;
3822 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3823 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3824 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3825 c->sps = h->sps;
3826 c->pps = h->pps;
3827 init_scan_tables(c);
3828 clone_tables(c, h);
3831 for(i = 0; i < s->avctx->thread_count; i++)
3832 if(context_init(h->thread_context[i]) < 0)
3833 return -1;
3835 s->avctx->width = s->width;
3836 s->avctx->height = s->height;
3837 s->avctx->sample_aspect_ratio= h->sps.sar;
3838 if(!s->avctx->sample_aspect_ratio.den)
3839 s->avctx->sample_aspect_ratio.den = 1;
3841 if(h->sps.timing_info_present_flag){
3842 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
3843 if(h->x264_build > 0 && h->x264_build < 44)
3844 s->avctx->time_base.den *= 2;
3845 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3846 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3850 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3852 h->mb_mbaff = 0;
3853 h->mb_aff_frame = 0;
3854 last_pic_structure = s0->picture_structure;
3855 if(h->sps.frame_mbs_only_flag){
3856 s->picture_structure= PICT_FRAME;
3857 }else{
3858 if(get_bits1(&s->gb)) { //field_pic_flag
3859 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3860 } else {
3861 s->picture_structure= PICT_FRAME;
3862 h->mb_aff_frame = h->sps.mb_aff;
3865 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3867 if(h0->current_slice == 0){
3868 while(h->frame_num != h->prev_frame_num &&
3869 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3870 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3871 if (frame_start(h) < 0)
3872 return -1;
3873 h->prev_frame_num++;
3874 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3875 s->current_picture_ptr->frame_num= h->prev_frame_num;
3876 execute_ref_pic_marking(h, NULL, 0);
3879 /* See if we have a decoded first field looking for a pair... */
3880 if (s0->first_field) {
3881 assert(s0->current_picture_ptr);
3882 assert(s0->current_picture_ptr->data[0]);
3883 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3885 /* figure out if we have a complementary field pair */
3886 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3888 * Previous field is unmatched. Don't display it, but let it
3889 * remain for reference if marked as such.
3891 s0->current_picture_ptr = NULL;
3892 s0->first_field = FIELD_PICTURE;
3894 } else {
3895 if (h->nal_ref_idc &&
3896 s0->current_picture_ptr->reference &&
3897 s0->current_picture_ptr->frame_num != h->frame_num) {
3899 * This and previous field were reference, but had
3900 * different frame_nums. Consider this field first in
3901 * pair. Throw away previous field except for reference
3902 * purposes.
3904 s0->first_field = 1;
3905 s0->current_picture_ptr = NULL;
3907 } else {
3908 /* Second field in complementary pair */
3909 s0->first_field = 0;
3913 } else {
3914 /* Frame or first field in a potentially complementary pair */
3915 assert(!s0->current_picture_ptr);
3916 s0->first_field = FIELD_PICTURE;
3919 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3920 s0->first_field = 0;
3921 return -1;
3924 if(h != h0)
3925 clone_slice(h, h0);
3927 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3929 assert(s->mb_num == s->mb_width * s->mb_height);
3930 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3931 first_mb_in_slice >= s->mb_num){
3932 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3933 return -1;
3935 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3936 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3937 if (s->picture_structure == PICT_BOTTOM_FIELD)
3938 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3939 assert(s->mb_y < s->mb_height);
3941 if(s->picture_structure==PICT_FRAME){
3942 h->curr_pic_num= h->frame_num;
3943 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3944 }else{
3945 h->curr_pic_num= 2*h->frame_num + 1;
3946 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3949 if(h->nal_unit_type == NAL_IDR_SLICE){
3950 get_ue_golomb(&s->gb); /* idr_pic_id */
3953 if(h->sps.poc_type==0){
3954 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3956 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3957 h->delta_poc_bottom= get_se_golomb(&s->gb);
3961 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3962 h->delta_poc[0]= get_se_golomb(&s->gb);
3964 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3965 h->delta_poc[1]= get_se_golomb(&s->gb);
3968 init_poc(h);
3970 if(h->pps.redundant_pic_cnt_present){
3971 h->redundant_pic_count= get_ue_golomb(&s->gb);
3974 //set defaults, might be overridden a few lines later
3975 h->ref_count[0]= h->pps.ref_count[0];
3976 h->ref_count[1]= h->pps.ref_count[1];
3978 if(h->slice_type_nos != FF_I_TYPE){
3979 if(h->slice_type_nos == FF_B_TYPE){
3980 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3982 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3984 if(num_ref_idx_active_override_flag){
3985 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3986 if(h->slice_type_nos==FF_B_TYPE)
3987 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3989 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3990 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3991 h->ref_count[0]= h->ref_count[1]= 1;
3992 return -1;
3995 if(h->slice_type_nos == FF_B_TYPE)
3996 h->list_count= 2;
3997 else
3998 h->list_count= 1;
3999 }else
4000 h->list_count= 0;
4002 if(!default_ref_list_done){
4003 fill_default_ref_list(h);
4006 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
4007 return -1;
4009 if(h->slice_type_nos!=FF_I_TYPE){
4010 s->last_picture_ptr= &h->ref_list[0][0];
4011 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
4013 if(h->slice_type_nos==FF_B_TYPE){
4014 s->next_picture_ptr= &h->ref_list[1][0];
4015 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
4018 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4019 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4020 pred_weight_table(h);
4021 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4022 implicit_weight_table(h);
4023 else {
4024 h->use_weight = 0;
4025 for (i = 0; i < 2; i++) {
4026 h->luma_weight_flag[i] = 0;
4027 h->chroma_weight_flag[i] = 0;
4031 if(h->nal_ref_idc)
4032 decode_ref_pic_marking(h0, &s->gb);
4034 if(FRAME_MBAFF)
4035 fill_mbaff_ref_list(h);
4037 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
4038 direct_dist_scale_factor(h);
4039 direct_ref_list_init(h);
4041 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4042 tmp = get_ue_golomb_31(&s->gb);
4043 if(tmp > 2){
4044 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4045 return -1;
4047 h->cabac_init_idc= tmp;
4050 h->last_qscale_diff = 0;
4051 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4052 if(tmp>51){
4053 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4054 return -1;
4056 s->qscale= tmp;
4057 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4058 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4059 //FIXME qscale / qp ... stuff
4060 if(h->slice_type == FF_SP_TYPE){
4061 get_bits1(&s->gb); /* sp_for_switch_flag */
4063 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4064 get_se_golomb(&s->gb); /* slice_qs_delta */
4067 h->deblocking_filter = 1;
4068 h->slice_alpha_c0_offset = 0;
4069 h->slice_beta_offset = 0;
4070 if( h->pps.deblocking_filter_parameters_present ) {
4071 tmp= get_ue_golomb_31(&s->gb);
4072 if(tmp > 2){
4073 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4074 return -1;
4076 h->deblocking_filter= tmp;
4077 if(h->deblocking_filter < 2)
4078 h->deblocking_filter^= 1; // 1<->0
4080 if( h->deblocking_filter ) {
4081 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4082 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4086 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4087 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4088 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4089 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4090 h->deblocking_filter= 0;
4092 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4093 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4094 /* Cheat slightly for speed:
4095 Do not bother to deblock across slices. */
4096 h->deblocking_filter = 2;
4097 } else {
4098 h0->max_contexts = 1;
4099 if(!h0->single_decode_warning) {
4100 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4101 h0->single_decode_warning = 1;
4103 if(h != h0)
4104 return 1; // deblocking switched inside frame
4108 #if 0 //FMO
4109 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4110 slice_group_change_cycle= get_bits(&s->gb, ?);
4111 #endif
4113 h0->last_slice_type = slice_type;
4114 h->slice_num = ++h0->current_slice;
4115 if(h->slice_num >= MAX_SLICES){
4116 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4119 for(j=0; j<2; j++){
4120 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4121 ref2frm[0]=
4122 ref2frm[1]= -1;
4123 for(i=0; i<16; i++)
4124 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4125 +(h->ref_list[j][i].reference&3);
4126 ref2frm[18+0]=
4127 ref2frm[18+1]= -1;
4128 for(i=16; i<48; i++)
4129 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4130 +(h->ref_list[j][i].reference&3);
4133 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4134 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4136 s->avctx->refs= h->sps.ref_frame_count;
4138 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4139 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4140 h->slice_num,
4141 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4142 first_mb_in_slice,
4143 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4144 pps_id, h->frame_num,
4145 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4146 h->ref_count[0], h->ref_count[1],
4147 s->qscale,
4148 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4149 h->use_weight,
4150 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4151 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4155 return 0;
4161 static inline int get_level_prefix(GetBitContext *gb){
4162 unsigned int buf;
4163 int log;
4165 OPEN_READER(re, gb);
4166 UPDATE_CACHE(re, gb);
4167 buf=GET_CACHE(re, gb);
4169 log= 32 - av_log2(buf);
4170 #ifdef TRACE
4171 print_bin(buf>>(32-log), log);
4172 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4173 #endif
4175 LAST_SKIP_BITS(re, gb, log);
4176 CLOSE_READER(re, gb);
4178 return log-1;
4181 static inline int get_dct8x8_allowed(H264Context *h){
4182 if(h->sps.direct_8x8_inference_flag)
4183 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4184 else
4185 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4189 * decodes a residual block.
4190 * @param n block index
4191 * @param scantable scantable
4192 * @param max_coeff number of coefficients in the block
4193 * @return <0 if an error occurred
4195 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4196 MpegEncContext * const s = &h->s;
4197 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4198 int level[16];
4199 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4201 //FIXME put trailing_onex into the context
4203 if(n == CHROMA_DC_BLOCK_INDEX){
4204 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4205 total_coeff= coeff_token>>2;
4206 }else{
4207 if(n == LUMA_DC_BLOCK_INDEX){
4208 total_coeff= pred_non_zero_count(h, 0);
4209 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4210 total_coeff= coeff_token>>2;
4211 }else{
4212 total_coeff= pred_non_zero_count(h, n);
4213 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4214 total_coeff= coeff_token>>2;
4215 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4219 //FIXME set last_non_zero?
4221 if(total_coeff==0)
4222 return 0;
4223 if(total_coeff > (unsigned)max_coeff) {
4224 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4225 return -1;
4228 trailing_ones= coeff_token&3;
4229 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4230 assert(total_coeff<=16);
4232 i = show_bits(gb, 3);
4233 skip_bits(gb, trailing_ones);
4234 level[0] = 1-((i&4)>>1);
4235 level[1] = 1-((i&2) );
4236 level[2] = 1-((i&1)<<1);
4238 if(trailing_ones<total_coeff) {
4239 int mask, prefix;
4240 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4241 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4242 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4244 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4245 if(level_code >= 100){
4246 prefix= level_code - 100;
4247 if(prefix == LEVEL_TAB_BITS)
4248 prefix += get_level_prefix(gb);
4250 //first coefficient has suffix_length equal to 0 or 1
4251 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4252 if(suffix_length)
4253 level_code= (prefix<<1) + get_bits1(gb); //part
4254 else
4255 level_code= prefix; //part
4256 }else if(prefix==14){
4257 if(suffix_length)
4258 level_code= (prefix<<1) + get_bits1(gb); //part
4259 else
4260 level_code= prefix + get_bits(gb, 4); //part
4261 }else{
4262 level_code= 30 + get_bits(gb, prefix-3); //part
4263 if(prefix>=16)
4264 level_code += (1<<(prefix-3))-4096;
4267 if(trailing_ones < 3) level_code += 2;
4269 suffix_length = 2;
4270 mask= -(level_code&1);
4271 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4272 }else{
4273 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4275 suffix_length = 1;
4276 if(level_code + 3U > 6U)
4277 suffix_length++;
4278 level[trailing_ones]= level_code;
4281 //remaining coefficients have suffix_length > 0
4282 for(i=trailing_ones+1;i<total_coeff;i++) {
4283 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4284 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4285 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4287 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4288 if(level_code >= 100){
4289 prefix= level_code - 100;
4290 if(prefix == LEVEL_TAB_BITS){
4291 prefix += get_level_prefix(gb);
4293 if(prefix<15){
4294 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4295 }else{
4296 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4297 if(prefix>=16)
4298 level_code += (1<<(prefix-3))-4096;
4300 mask= -(level_code&1);
4301 level_code= (((2+level_code)>>1) ^ mask) - mask;
4303 level[i]= level_code;
4305 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4306 suffix_length++;
4310 if(total_coeff == max_coeff)
4311 zeros_left=0;
4312 else{
4313 if(n == CHROMA_DC_BLOCK_INDEX)
4314 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4315 else
4316 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4319 coeff_num = zeros_left + total_coeff - 1;
4320 j = scantable[coeff_num];
4321 if(n > 24){
4322 block[j] = level[0];
4323 for(i=1;i<total_coeff;i++) {
4324 if(zeros_left <= 0)
4325 run_before = 0;
4326 else if(zeros_left < 7){
4327 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4328 }else{
4329 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4331 zeros_left -= run_before;
4332 coeff_num -= 1 + run_before;
4333 j= scantable[ coeff_num ];
4335 block[j]= level[i];
4337 }else{
4338 block[j] = (level[0] * qmul[j] + 32)>>6;
4339 for(i=1;i<total_coeff;i++) {
4340 if(zeros_left <= 0)
4341 run_before = 0;
4342 else if(zeros_left < 7){
4343 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4344 }else{
4345 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4347 zeros_left -= run_before;
4348 coeff_num -= 1 + run_before;
4349 j= scantable[ coeff_num ];
4351 block[j]= (level[i] * qmul[j] + 32)>>6;
4355 if(zeros_left<0){
4356 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4357 return -1;
4360 return 0;
4363 static void predict_field_decoding_flag(H264Context *h){
4364 MpegEncContext * const s = &h->s;
4365 const int mb_xy= h->mb_xy;
4366 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4367 ? s->current_picture.mb_type[mb_xy-1]
4368 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4369 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4370 : 0;
4371 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4375 * decodes a P_SKIP or B_SKIP macroblock
4377 static void decode_mb_skip(H264Context *h){
4378 MpegEncContext * const s = &h->s;
4379 const int mb_xy= h->mb_xy;
4380 int mb_type=0;
4382 memset(h->non_zero_count[mb_xy], 0, 16);
4383 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4385 if(MB_FIELD)
4386 mb_type|= MB_TYPE_INTERLACED;
4388 if( h->slice_type_nos == FF_B_TYPE )
4390 // just for fill_caches. pred_direct_motion will set the real mb_type
4391 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4393 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4394 pred_direct_motion(h, &mb_type);
4395 mb_type|= MB_TYPE_SKIP;
4397 else
4399 int mx, my;
4400 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4402 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4403 pred_pskip_motion(h, &mx, &my);
4404 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4405 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4408 write_back_motion(h, mb_type);
4409 s->current_picture.mb_type[mb_xy]= mb_type;
4410 s->current_picture.qscale_table[mb_xy]= s->qscale;
4411 h->slice_table[ mb_xy ]= h->slice_num;
4412 h->prev_mb_skipped= 1;
4416 * decodes a macroblock
4417 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4419 static int decode_mb_cavlc(H264Context *h){
4420 MpegEncContext * const s = &h->s;
4421 int mb_xy;
4422 int partition_count;
4423 unsigned int mb_type, cbp;
4424 int dct8x8_allowed= h->pps.transform_8x8_mode;
4426 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4428 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4429 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4430 down the code */
4431 if(h->slice_type_nos != FF_I_TYPE){
4432 if(s->mb_skip_run==-1)
4433 s->mb_skip_run= get_ue_golomb(&s->gb);
4435 if (s->mb_skip_run--) {
4436 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4437 if(s->mb_skip_run==0)
4438 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4439 else
4440 predict_field_decoding_flag(h);
4442 decode_mb_skip(h);
4443 return 0;
4446 if(FRAME_MBAFF){
4447 if( (s->mb_y&1) == 0 )
4448 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4451 h->prev_mb_skipped= 0;
4453 mb_type= get_ue_golomb(&s->gb);
4454 if(h->slice_type_nos == FF_B_TYPE){
4455 if(mb_type < 23){
4456 partition_count= b_mb_type_info[mb_type].partition_count;
4457 mb_type= b_mb_type_info[mb_type].type;
4458 }else{
4459 mb_type -= 23;
4460 goto decode_intra_mb;
4462 }else if(h->slice_type_nos == FF_P_TYPE){
4463 if(mb_type < 5){
4464 partition_count= p_mb_type_info[mb_type].partition_count;
4465 mb_type= p_mb_type_info[mb_type].type;
4466 }else{
4467 mb_type -= 5;
4468 goto decode_intra_mb;
4470 }else{
4471 assert(h->slice_type_nos == FF_I_TYPE);
4472 if(h->slice_type == FF_SI_TYPE && mb_type)
4473 mb_type--;
4474 decode_intra_mb:
4475 if(mb_type > 25){
4476 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4477 return -1;
4479 partition_count=0;
4480 cbp= i_mb_type_info[mb_type].cbp;
4481 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4482 mb_type= i_mb_type_info[mb_type].type;
4485 if(MB_FIELD)
4486 mb_type |= MB_TYPE_INTERLACED;
4488 h->slice_table[ mb_xy ]= h->slice_num;
4490 if(IS_INTRA_PCM(mb_type)){
4491 unsigned int x;
4493 // We assume these blocks are very rare so we do not optimize it.
4494 align_get_bits(&s->gb);
4496 // The pixels are stored in the same order as levels in h->mb array.
4497 for(x=0; x < (CHROMA ? 384 : 256); x++){
4498 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4501 // In deblocking, the quantizer is 0
4502 s->current_picture.qscale_table[mb_xy]= 0;
4503 // All coeffs are present
4504 memset(h->non_zero_count[mb_xy], 16, 16);
4506 s->current_picture.mb_type[mb_xy]= mb_type;
4507 return 0;
4510 if(MB_MBAFF){
4511 h->ref_count[0] <<= 1;
4512 h->ref_count[1] <<= 1;
4515 fill_caches(h, mb_type, 0);
4517 //mb_pred
4518 if(IS_INTRA(mb_type)){
4519 int pred_mode;
4520 // init_top_left_availability(h);
4521 if(IS_INTRA4x4(mb_type)){
4522 int i;
4523 int di = 1;
4524 if(dct8x8_allowed && get_bits1(&s->gb)){
4525 mb_type |= MB_TYPE_8x8DCT;
4526 di = 4;
4529 // fill_intra4x4_pred_table(h);
4530 for(i=0; i<16; i+=di){
4531 int mode= pred_intra_mode(h, i);
4533 if(!get_bits1(&s->gb)){
4534 const int rem_mode= get_bits(&s->gb, 3);
4535 mode = rem_mode + (rem_mode >= mode);
4538 if(di==4)
4539 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4540 else
4541 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4543 write_back_intra_pred_mode(h);
4544 if( check_intra4x4_pred_mode(h) < 0)
4545 return -1;
4546 }else{
4547 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4548 if(h->intra16x16_pred_mode < 0)
4549 return -1;
4551 if(CHROMA){
4552 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4553 if(pred_mode < 0)
4554 return -1;
4555 h->chroma_pred_mode= pred_mode;
4557 }else if(partition_count==4){
4558 int i, j, sub_partition_count[4], list, ref[2][4];
4560 if(h->slice_type_nos == FF_B_TYPE){
4561 for(i=0; i<4; i++){
4562 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4563 if(h->sub_mb_type[i] >=13){
4564 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4565 return -1;
4567 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4568 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4570 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4571 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4572 pred_direct_motion(h, &mb_type);
4573 h->ref_cache[0][scan8[4]] =
4574 h->ref_cache[1][scan8[4]] =
4575 h->ref_cache[0][scan8[12]] =
4576 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4578 }else{
4579 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4580 for(i=0; i<4; i++){
4581 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4582 if(h->sub_mb_type[i] >=4){
4583 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4584 return -1;
4586 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4587 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4591 for(list=0; list<h->list_count; list++){
4592 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4593 for(i=0; i<4; i++){
4594 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4595 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4596 unsigned int tmp;
4597 if(ref_count == 1){
4598 tmp= 0;
4599 }else if(ref_count == 2){
4600 tmp= get_bits1(&s->gb)^1;
4601 }else{
4602 tmp= get_ue_golomb_31(&s->gb);
4603 if(tmp>=ref_count){
4604 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4605 return -1;
4608 ref[list][i]= tmp;
4609 }else{
4610 //FIXME
4611 ref[list][i] = -1;
4616 if(dct8x8_allowed)
4617 dct8x8_allowed = get_dct8x8_allowed(h);
4619 for(list=0; list<h->list_count; list++){
4620 for(i=0; i<4; i++){
4621 if(IS_DIRECT(h->sub_mb_type[i])) {
4622 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4623 continue;
4625 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4626 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4628 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4629 const int sub_mb_type= h->sub_mb_type[i];
4630 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4631 for(j=0; j<sub_partition_count[i]; j++){
4632 int mx, my;
4633 const int index= 4*i + block_width*j;
4634 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4635 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4636 mx += get_se_golomb(&s->gb);
4637 my += get_se_golomb(&s->gb);
4638 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4640 if(IS_SUB_8X8(sub_mb_type)){
4641 mv_cache[ 1 ][0]=
4642 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4643 mv_cache[ 1 ][1]=
4644 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4645 }else if(IS_SUB_8X4(sub_mb_type)){
4646 mv_cache[ 1 ][0]= mx;
4647 mv_cache[ 1 ][1]= my;
4648 }else if(IS_SUB_4X8(sub_mb_type)){
4649 mv_cache[ 8 ][0]= mx;
4650 mv_cache[ 8 ][1]= my;
4652 mv_cache[ 0 ][0]= mx;
4653 mv_cache[ 0 ][1]= my;
4655 }else{
4656 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4657 p[0] = p[1]=
4658 p[8] = p[9]= 0;
4662 }else if(IS_DIRECT(mb_type)){
4663 pred_direct_motion(h, &mb_type);
4664 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4665 }else{
4666 int list, mx, my, i;
4667 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4668 if(IS_16X16(mb_type)){
4669 for(list=0; list<h->list_count; list++){
4670 unsigned int val;
4671 if(IS_DIR(mb_type, 0, list)){
4672 if(h->ref_count[list]==1){
4673 val= 0;
4674 }else if(h->ref_count[list]==2){
4675 val= get_bits1(&s->gb)^1;
4676 }else{
4677 val= get_ue_golomb_31(&s->gb);
4678 if(val >= h->ref_count[list]){
4679 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4680 return -1;
4683 }else
4684 val= LIST_NOT_USED&0xFF;
4685 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4687 for(list=0; list<h->list_count; list++){
4688 unsigned int val;
4689 if(IS_DIR(mb_type, 0, list)){
4690 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4691 mx += get_se_golomb(&s->gb);
4692 my += get_se_golomb(&s->gb);
4693 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4695 val= pack16to32(mx,my);
4696 }else
4697 val=0;
4698 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4701 else if(IS_16X8(mb_type)){
4702 for(list=0; list<h->list_count; list++){
4703 for(i=0; i<2; i++){
4704 unsigned int val;
4705 if(IS_DIR(mb_type, i, list)){
4706 if(h->ref_count[list] == 1){
4707 val= 0;
4708 }else if(h->ref_count[list] == 2){
4709 val= get_bits1(&s->gb)^1;
4710 }else{
4711 val= get_ue_golomb_31(&s->gb);
4712 if(val >= h->ref_count[list]){
4713 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4714 return -1;
4717 }else
4718 val= LIST_NOT_USED&0xFF;
4719 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4722 for(list=0; list<h->list_count; list++){
4723 for(i=0; i<2; i++){
4724 unsigned int val;
4725 if(IS_DIR(mb_type, i, list)){
4726 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4727 mx += get_se_golomb(&s->gb);
4728 my += get_se_golomb(&s->gb);
4729 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4731 val= pack16to32(mx,my);
4732 }else
4733 val=0;
4734 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4737 }else{
4738 assert(IS_8X16(mb_type));
4739 for(list=0; list<h->list_count; list++){
4740 for(i=0; i<2; i++){
4741 unsigned int val;
4742 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4743 if(h->ref_count[list]==1){
4744 val= 0;
4745 }else if(h->ref_count[list]==2){
4746 val= get_bits1(&s->gb)^1;
4747 }else{
4748 val= get_ue_golomb_31(&s->gb);
4749 if(val >= h->ref_count[list]){
4750 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4751 return -1;
4754 }else
4755 val= LIST_NOT_USED&0xFF;
4756 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4759 for(list=0; list<h->list_count; list++){
4760 for(i=0; i<2; i++){
4761 unsigned int val;
4762 if(IS_DIR(mb_type, i, list)){
4763 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4764 mx += get_se_golomb(&s->gb);
4765 my += get_se_golomb(&s->gb);
4766 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4768 val= pack16to32(mx,my);
4769 }else
4770 val=0;
4771 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4777 if(IS_INTER(mb_type))
4778 write_back_motion(h, mb_type);
4780 if(!IS_INTRA16x16(mb_type)){
4781 cbp= get_ue_golomb(&s->gb);
4782 if(cbp > 47){
4783 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4784 return -1;
4787 if(CHROMA){
4788 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4789 else cbp= golomb_to_inter_cbp [cbp];
4790 }else{
4791 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4792 else cbp= golomb_to_inter_cbp_gray[cbp];
4795 h->cbp = cbp;
4797 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4798 if(get_bits1(&s->gb)){
4799 mb_type |= MB_TYPE_8x8DCT;
4800 h->cbp_table[mb_xy]= cbp;
4803 s->current_picture.mb_type[mb_xy]= mb_type;
4805 if(cbp || IS_INTRA16x16(mb_type)){
4806 int i8x8, i4x4, chroma_idx;
4807 int dquant;
4808 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4809 const uint8_t *scan, *scan8x8, *dc_scan;
4811 // fill_non_zero_count_cache(h);
4813 if(IS_INTERLACED(mb_type)){
4814 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4815 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4816 dc_scan= luma_dc_field_scan;
4817 }else{
4818 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4819 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4820 dc_scan= luma_dc_zigzag_scan;
4823 dquant= get_se_golomb(&s->gb);
4825 if( dquant > 25 || dquant < -26 ){
4826 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4827 return -1;
4830 s->qscale += dquant;
4831 if(((unsigned)s->qscale) > 51){
4832 if(s->qscale<0) s->qscale+= 52;
4833 else s->qscale-= 52;
4836 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4837 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4838 if(IS_INTRA16x16(mb_type)){
4839 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4840 return -1; //FIXME continue if partitioned and other return -1 too
4843 assert((cbp&15) == 0 || (cbp&15) == 15);
4845 if(cbp&15){
4846 for(i8x8=0; i8x8<4; i8x8++){
4847 for(i4x4=0; i4x4<4; i4x4++){
4848 const int index= i4x4 + 4*i8x8;
4849 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4850 return -1;
4854 }else{
4855 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4857 }else{
4858 for(i8x8=0; i8x8<4; i8x8++){
4859 if(cbp & (1<<i8x8)){
4860 if(IS_8x8DCT(mb_type)){
4861 DCTELEM *buf = &h->mb[64*i8x8];
4862 uint8_t *nnz;
4863 for(i4x4=0; i4x4<4; i4x4++){
4864 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4865 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4866 return -1;
4868 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4869 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4870 }else{
4871 for(i4x4=0; i4x4<4; i4x4++){
4872 const int index= i4x4 + 4*i8x8;
4874 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4875 return -1;
4879 }else{
4880 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4881 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4886 if(cbp&0x30){
4887 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4888 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4889 return -1;
4893 if(cbp&0x20){
4894 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4895 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4896 for(i4x4=0; i4x4<4; i4x4++){
4897 const int index= 16 + 4*chroma_idx + i4x4;
4898 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4899 return -1;
4903 }else{
4904 uint8_t * const nnz= &h->non_zero_count_cache[0];
4905 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4906 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4908 }else{
4909 uint8_t * const nnz= &h->non_zero_count_cache[0];
4910 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4911 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4912 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4914 s->current_picture.qscale_table[mb_xy]= s->qscale;
4915 write_back_non_zero_count(h);
4917 if(MB_MBAFF){
4918 h->ref_count[0] >>= 1;
4919 h->ref_count[1] >>= 1;
4922 return 0;
4925 static int decode_cabac_field_decoding_flag(H264Context *h) {
4926 MpegEncContext * const s = &h->s;
4927 const int mb_x = s->mb_x;
4928 const int mb_y = s->mb_y & ~1;
4929 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4930 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4932 unsigned int ctx = 0;
4934 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4935 ctx += 1;
4937 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4938 ctx += 1;
4941 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4944 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4945 uint8_t *state= &h->cabac_state[ctx_base];
4946 int mb_type;
4948 if(intra_slice){
4949 MpegEncContext * const s = &h->s;
4950 const int mba_xy = h->left_mb_xy[0];
4951 const int mbb_xy = h->top_mb_xy;
4952 int ctx=0;
4953 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4954 ctx++;
4955 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4956 ctx++;
4957 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4958 return 0; /* I4x4 */
4959 state += 2;
4960 }else{
4961 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4962 return 0; /* I4x4 */
4965 if( get_cabac_terminate( &h->cabac ) )
4966 return 25; /* PCM */
4968 mb_type = 1; /* I16x16 */
4969 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4970 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4971 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4972 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4973 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4974 return mb_type;
4977 static int decode_cabac_mb_type_b( H264Context *h ) {
4978 MpegEncContext * const s = &h->s;
4980 const int mba_xy = h->left_mb_xy[0];
4981 const int mbb_xy = h->top_mb_xy;
4982 int ctx = 0;
4983 int bits;
4984 assert(h->slice_type_nos == FF_B_TYPE);
4986 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4987 ctx++;
4988 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4989 ctx++;
4991 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4992 return 0; /* B_Direct_16x16 */
4994 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4995 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4998 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4999 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5000 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5001 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5002 if( bits < 8 )
5003 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5004 else if( bits == 13 ) {
5005 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5006 } else if( bits == 14 )
5007 return 11; /* B_L1_L0_8x16 */
5008 else if( bits == 15 )
5009 return 22; /* B_8x8 */
5011 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5012 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5015 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5016 MpegEncContext * const s = &h->s;
5017 int mba_xy, mbb_xy;
5018 int ctx = 0;
5020 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5021 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5022 mba_xy = mb_xy - 1;
5023 if( (mb_y&1)
5024 && h->slice_table[mba_xy] == h->slice_num
5025 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5026 mba_xy += s->mb_stride;
5027 if( MB_FIELD ){
5028 mbb_xy = mb_xy - s->mb_stride;
5029 if( !(mb_y&1)
5030 && h->slice_table[mbb_xy] == h->slice_num
5031 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5032 mbb_xy -= s->mb_stride;
5033 }else
5034 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5035 }else{
5036 int mb_xy = h->mb_xy;
5037 mba_xy = mb_xy - 1;
5038 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5041 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5042 ctx++;
5043 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5044 ctx++;
5046 if( h->slice_type_nos == FF_B_TYPE )
5047 ctx += 13;
5048 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5051 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5052 int mode = 0;
5054 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5055 return pred_mode;
5057 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5058 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5059 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5061 if( mode >= pred_mode )
5062 return mode + 1;
5063 else
5064 return mode;
5067 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5068 const int mba_xy = h->left_mb_xy[0];
5069 const int mbb_xy = h->top_mb_xy;
5071 int ctx = 0;
5073 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5074 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5075 ctx++;
5077 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5078 ctx++;
5080 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5081 return 0;
5083 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5084 return 1;
5085 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5086 return 2;
5087 else
5088 return 3;
5091 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5092 int cbp_b, cbp_a, ctx, cbp = 0;
5094 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5095 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5097 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5098 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5099 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5100 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5101 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5102 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5103 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5104 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5105 return cbp;
5107 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5108 int ctx;
5109 int cbp_a, cbp_b;
5111 cbp_a = (h->left_cbp>>4)&0x03;
5112 cbp_b = (h-> top_cbp>>4)&0x03;
5114 ctx = 0;
5115 if( cbp_a > 0 ) ctx++;
5116 if( cbp_b > 0 ) ctx += 2;
5117 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5118 return 0;
5120 ctx = 4;
5121 if( cbp_a == 2 ) ctx++;
5122 if( cbp_b == 2 ) ctx += 2;
5123 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5125 static int decode_cabac_mb_dqp( H264Context *h) {
5126 int ctx= h->last_qscale_diff != 0;
5127 int val = 0;
5129 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5130 ctx= 2+(ctx>>1);
5131 val++;
5132 if(val > 102) //prevent infinite loop
5133 return INT_MIN;
5136 if( val&0x01 )
5137 return (val + 1)>>1 ;
5138 else
5139 return -((val + 1)>>1);
5141 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5142 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5143 return 0; /* 8x8 */
5144 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5145 return 1; /* 8x4 */
5146 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5147 return 2; /* 4x8 */
5148 return 3; /* 4x4 */
5150 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5151 int type;
5152 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5153 return 0; /* B_Direct_8x8 */
5154 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5155 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5156 type = 3;
5157 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5158 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5159 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5160 type += 4;
5162 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5163 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5164 return type;
5167 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5168 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5171 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5172 int refa = h->ref_cache[list][scan8[n] - 1];
5173 int refb = h->ref_cache[list][scan8[n] - 8];
5174 int ref = 0;
5175 int ctx = 0;
5177 if( h->slice_type_nos == FF_B_TYPE) {
5178 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5179 ctx++;
5180 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5181 ctx += 2;
5182 } else {
5183 if( refa > 0 )
5184 ctx++;
5185 if( refb > 0 )
5186 ctx += 2;
5189 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5190 ref++;
5191 ctx = (ctx>>2)+4;
5192 if(ref >= 32 /*h->ref_list[list]*/){
5193 return -1;
5196 return ref;
5199 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5200 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5201 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5202 int ctxbase = (l == 0) ? 40 : 47;
5203 int mvd;
5204 int ctx = (amvd>2) + (amvd>32);
5206 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5207 return 0;
5209 mvd= 1;
5210 ctx= 3;
5211 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5212 mvd++;
5213 if( ctx < 6 )
5214 ctx++;
5217 if( mvd >= 9 ) {
5218 int k = 3;
5219 while( get_cabac_bypass( &h->cabac ) ) {
5220 mvd += 1 << k;
5221 k++;
5222 if(k>24){
5223 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5224 return INT_MIN;
5227 while( k-- ) {
5228 if( get_cabac_bypass( &h->cabac ) )
5229 mvd += 1 << k;
5232 return get_cabac_bypass_sign( &h->cabac, -mvd );
5235 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5236 int nza, nzb;
5237 int ctx = 0;
5239 if( is_dc ) {
5240 if( cat == 0 ) {
5241 nza = h->left_cbp&0x100;
5242 nzb = h-> top_cbp&0x100;
5243 } else {
5244 nza = (h->left_cbp>>(6+idx))&0x01;
5245 nzb = (h-> top_cbp>>(6+idx))&0x01;
5247 } else {
5248 assert(cat == 1 || cat == 2 || cat == 4);
5249 nza = h->non_zero_count_cache[scan8[idx] - 1];
5250 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5253 if( nza > 0 )
5254 ctx++;
5256 if( nzb > 0 )
5257 ctx += 2;
5259 return ctx + 4 * cat;
5262 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5263 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5264 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5265 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5266 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5269 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5270 static const int significant_coeff_flag_offset[2][6] = {
5271 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5272 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5274 static const int last_coeff_flag_offset[2][6] = {
5275 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5276 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5278 static const int coeff_abs_level_m1_offset[6] = {
5279 227+0, 227+10, 227+20, 227+30, 227+39, 426
5281 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5282 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5283 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5284 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5285 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5286 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5287 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5288 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5289 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5291 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5292 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5293 * map node ctx => cabac ctx for level=1 */
5294 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5295 /* map node ctx => cabac ctx for level>1 */
5296 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5297 static const uint8_t coeff_abs_level_transition[2][8] = {
5298 /* update node ctx after decoding a level=1 */
5299 { 1, 2, 3, 3, 4, 5, 6, 7 },
5300 /* update node ctx after decoding a level>1 */
5301 { 4, 4, 4, 4, 5, 6, 7, 7 }
5304 int index[64];
5306 int av_unused last;
5307 int coeff_count = 0;
5308 int node_ctx = 0;
5310 uint8_t *significant_coeff_ctx_base;
5311 uint8_t *last_coeff_ctx_base;
5312 uint8_t *abs_level_m1_ctx_base;
5314 #if !ARCH_X86
5315 #define CABAC_ON_STACK
5316 #endif
5317 #ifdef CABAC_ON_STACK
5318 #define CC &cc
5319 CABACContext cc;
5320 cc.range = h->cabac.range;
5321 cc.low = h->cabac.low;
5322 cc.bytestream= h->cabac.bytestream;
5323 #else
5324 #define CC &h->cabac
5325 #endif
5328 /* cat: 0-> DC 16x16 n = 0
5329 * 1-> AC 16x16 n = luma4x4idx
5330 * 2-> Luma4x4 n = luma4x4idx
5331 * 3-> DC Chroma n = iCbCr
5332 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5333 * 5-> Luma8x8 n = 4 * luma8x8idx
5336 /* read coded block flag */
5337 if( is_dc || cat != 5 ) {
5338 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5339 if( !is_dc )
5340 h->non_zero_count_cache[scan8[n]] = 0;
5342 #ifdef CABAC_ON_STACK
5343 h->cabac.range = cc.range ;
5344 h->cabac.low = cc.low ;
5345 h->cabac.bytestream= cc.bytestream;
5346 #endif
5347 return;
5351 significant_coeff_ctx_base = h->cabac_state
5352 + significant_coeff_flag_offset[MB_FIELD][cat];
5353 last_coeff_ctx_base = h->cabac_state
5354 + last_coeff_flag_offset[MB_FIELD][cat];
5355 abs_level_m1_ctx_base = h->cabac_state
5356 + coeff_abs_level_m1_offset[cat];
5358 if( !is_dc && cat == 5 ) {
5359 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5360 for(last= 0; last < coefs; last++) { \
5361 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5362 if( get_cabac( CC, sig_ctx )) { \
5363 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5364 index[coeff_count++] = last; \
5365 if( get_cabac( CC, last_ctx ) ) { \
5366 last= max_coeff; \
5367 break; \
5371 if( last == max_coeff -1 ) {\
5372 index[coeff_count++] = last;\
5374 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5375 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5376 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5377 } else {
5378 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5379 #else
5380 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5381 } else {
5382 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5383 #endif
5385 assert(coeff_count > 0);
5387 if( is_dc ) {
5388 if( cat == 0 )
5389 h->cbp_table[h->mb_xy] |= 0x100;
5390 else
5391 h->cbp_table[h->mb_xy] |= 0x40 << n;
5392 } else {
5393 if( cat == 5 )
5394 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5395 else {
5396 assert( cat == 1 || cat == 2 || cat == 4 );
5397 h->non_zero_count_cache[scan8[n]] = coeff_count;
5401 do {
5402 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5404 int j= scantable[index[--coeff_count]];
5406 if( get_cabac( CC, ctx ) == 0 ) {
5407 node_ctx = coeff_abs_level_transition[0][node_ctx];
5408 if( is_dc ) {
5409 block[j] = get_cabac_bypass_sign( CC, -1);
5410 }else{
5411 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5413 } else {
5414 int coeff_abs = 2;
5415 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5416 node_ctx = coeff_abs_level_transition[1][node_ctx];
5418 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5419 coeff_abs++;
5422 if( coeff_abs >= 15 ) {
5423 int j = 0;
5424 while( get_cabac_bypass( CC ) ) {
5425 j++;
5428 coeff_abs=1;
5429 while( j-- ) {
5430 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5432 coeff_abs+= 14;
5435 if( is_dc ) {
5436 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5437 }else{
5438 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5441 } while( coeff_count );
5442 #ifdef CABAC_ON_STACK
5443 h->cabac.range = cc.range ;
5444 h->cabac.low = cc.low ;
5445 h->cabac.bytestream= cc.bytestream;
5446 #endif
5450 #if !CONFIG_SMALL
5451 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5452 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5455 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5456 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5458 #endif
5460 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5461 #if CONFIG_SMALL
5462 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5463 #else
5464 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5465 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5466 #endif
5469 static inline void compute_mb_neighbors(H264Context *h)
5471 MpegEncContext * const s = &h->s;
5472 const int mb_xy = h->mb_xy;
5473 h->top_mb_xy = mb_xy - s->mb_stride;
5474 h->left_mb_xy[0] = mb_xy - 1;
5475 if(FRAME_MBAFF){
5476 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5477 const int top_pair_xy = pair_xy - s->mb_stride;
5478 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5479 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5480 const int curr_mb_field_flag = MB_FIELD;
5481 const int bottom = (s->mb_y & 1);
5483 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5484 h->top_mb_xy -= s->mb_stride;
5486 if (!left_mb_field_flag == curr_mb_field_flag) {
5487 h->left_mb_xy[0] = pair_xy - 1;
5489 } else if (FIELD_PICTURE) {
5490 h->top_mb_xy -= s->mb_stride;
5492 return;
5496 * decodes a macroblock
5497 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5499 static int decode_mb_cabac(H264Context *h) {
5500 MpegEncContext * const s = &h->s;
5501 int mb_xy;
5502 int mb_type, partition_count, cbp = 0;
5503 int dct8x8_allowed= h->pps.transform_8x8_mode;
5505 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5507 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5508 if( h->slice_type_nos != FF_I_TYPE ) {
5509 int skip;
5510 /* a skipped mb needs the aff flag from the following mb */
5511 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5512 predict_field_decoding_flag(h);
5513 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5514 skip = h->next_mb_skipped;
5515 else
5516 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5517 /* read skip flags */
5518 if( skip ) {
5519 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5520 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5521 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5522 if(!h->next_mb_skipped)
5523 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5526 decode_mb_skip(h);
5528 h->cbp_table[mb_xy] = 0;
5529 h->chroma_pred_mode_table[mb_xy] = 0;
5530 h->last_qscale_diff = 0;
5532 return 0;
5536 if(FRAME_MBAFF){
5537 if( (s->mb_y&1) == 0 )
5538 h->mb_mbaff =
5539 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5542 h->prev_mb_skipped = 0;
5544 compute_mb_neighbors(h);
5546 if( h->slice_type_nos == FF_B_TYPE ) {
5547 mb_type = decode_cabac_mb_type_b( h );
5548 if( mb_type < 23 ){
5549 partition_count= b_mb_type_info[mb_type].partition_count;
5550 mb_type= b_mb_type_info[mb_type].type;
5551 }else{
5552 mb_type -= 23;
5553 goto decode_intra_mb;
5555 } else if( h->slice_type_nos == FF_P_TYPE ) {
5556 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5557 /* P-type */
5558 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5559 /* P_L0_D16x16, P_8x8 */
5560 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5561 } else {
5562 /* P_L0_D8x16, P_L0_D16x8 */
5563 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5565 partition_count= p_mb_type_info[mb_type].partition_count;
5566 mb_type= p_mb_type_info[mb_type].type;
5567 } else {
5568 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5569 goto decode_intra_mb;
5571 } else {
5572 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5573 if(h->slice_type == FF_SI_TYPE && mb_type)
5574 mb_type--;
5575 assert(h->slice_type_nos == FF_I_TYPE);
5576 decode_intra_mb:
5577 partition_count = 0;
5578 cbp= i_mb_type_info[mb_type].cbp;
5579 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5580 mb_type= i_mb_type_info[mb_type].type;
5582 if(MB_FIELD)
5583 mb_type |= MB_TYPE_INTERLACED;
5585 h->slice_table[ mb_xy ]= h->slice_num;
5587 if(IS_INTRA_PCM(mb_type)) {
5588 const uint8_t *ptr;
5590 // We assume these blocks are very rare so we do not optimize it.
5591 // FIXME The two following lines get the bitstream position in the cabac
5592 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5593 ptr= h->cabac.bytestream;
5594 if(h->cabac.low&0x1) ptr--;
5595 if(CABAC_BITS==16){
5596 if(h->cabac.low&0x1FF) ptr--;
5599 // The pixels are stored in the same order as levels in h->mb array.
5600 memcpy(h->mb, ptr, 256); ptr+=256;
5601 if(CHROMA){
5602 memcpy(h->mb+128, ptr, 128); ptr+=128;
5605 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5607 // All blocks are present
5608 h->cbp_table[mb_xy] = 0x1ef;
5609 h->chroma_pred_mode_table[mb_xy] = 0;
5610 // In deblocking, the quantizer is 0
5611 s->current_picture.qscale_table[mb_xy]= 0;
5612 // All coeffs are present
5613 memset(h->non_zero_count[mb_xy], 16, 16);
5614 s->current_picture.mb_type[mb_xy]= mb_type;
5615 h->last_qscale_diff = 0;
5616 return 0;
5619 if(MB_MBAFF){
5620 h->ref_count[0] <<= 1;
5621 h->ref_count[1] <<= 1;
5624 fill_caches(h, mb_type, 0);
5626 if( IS_INTRA( mb_type ) ) {
5627 int i, pred_mode;
5628 if( IS_INTRA4x4( mb_type ) ) {
5629 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5630 mb_type |= MB_TYPE_8x8DCT;
5631 for( i = 0; i < 16; i+=4 ) {
5632 int pred = pred_intra_mode( h, i );
5633 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5634 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5636 } else {
5637 for( i = 0; i < 16; i++ ) {
5638 int pred = pred_intra_mode( h, i );
5639 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5641 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5644 write_back_intra_pred_mode(h);
5645 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5646 } else {
5647 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5648 if( h->intra16x16_pred_mode < 0 ) return -1;
5650 if(CHROMA){
5651 h->chroma_pred_mode_table[mb_xy] =
5652 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5654 pred_mode= check_intra_pred_mode( h, pred_mode );
5655 if( pred_mode < 0 ) return -1;
5656 h->chroma_pred_mode= pred_mode;
5658 } else if( partition_count == 4 ) {
5659 int i, j, sub_partition_count[4], list, ref[2][4];
5661 if( h->slice_type_nos == FF_B_TYPE ) {
5662 for( i = 0; i < 4; i++ ) {
5663 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5664 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5665 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5667 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5668 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5669 pred_direct_motion(h, &mb_type);
5670 h->ref_cache[0][scan8[4]] =
5671 h->ref_cache[1][scan8[4]] =
5672 h->ref_cache[0][scan8[12]] =
5673 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5674 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5675 for( i = 0; i < 4; i++ )
5676 if( IS_DIRECT(h->sub_mb_type[i]) )
5677 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5680 } else {
5681 for( i = 0; i < 4; i++ ) {
5682 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5683 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5684 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5688 for( list = 0; list < h->list_count; list++ ) {
5689 for( i = 0; i < 4; i++ ) {
5690 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5691 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5692 if( h->ref_count[list] > 1 ){
5693 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5694 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5695 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5696 return -1;
5698 }else
5699 ref[list][i] = 0;
5700 } else {
5701 ref[list][i] = -1;
5703 h->ref_cache[list][ scan8[4*i]+1 ]=
5704 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5708 if(dct8x8_allowed)
5709 dct8x8_allowed = get_dct8x8_allowed(h);
5711 for(list=0; list<h->list_count; list++){
5712 for(i=0; i<4; i++){
5713 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5714 if(IS_DIRECT(h->sub_mb_type[i])){
5715 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5716 continue;
5719 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5720 const int sub_mb_type= h->sub_mb_type[i];
5721 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5722 for(j=0; j<sub_partition_count[i]; j++){
5723 int mpx, mpy;
5724 int mx, my;
5725 const int index= 4*i + block_width*j;
5726 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5727 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5728 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5730 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5731 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5732 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5734 if(IS_SUB_8X8(sub_mb_type)){
5735 mv_cache[ 1 ][0]=
5736 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5737 mv_cache[ 1 ][1]=
5738 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5740 mvd_cache[ 1 ][0]=
5741 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5742 mvd_cache[ 1 ][1]=
5743 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5744 }else if(IS_SUB_8X4(sub_mb_type)){
5745 mv_cache[ 1 ][0]= mx;
5746 mv_cache[ 1 ][1]= my;
5748 mvd_cache[ 1 ][0]= mx - mpx;
5749 mvd_cache[ 1 ][1]= my - mpy;
5750 }else if(IS_SUB_4X8(sub_mb_type)){
5751 mv_cache[ 8 ][0]= mx;
5752 mv_cache[ 8 ][1]= my;
5754 mvd_cache[ 8 ][0]= mx - mpx;
5755 mvd_cache[ 8 ][1]= my - mpy;
5757 mv_cache[ 0 ][0]= mx;
5758 mv_cache[ 0 ][1]= my;
5760 mvd_cache[ 0 ][0]= mx - mpx;
5761 mvd_cache[ 0 ][1]= my - mpy;
5763 }else{
5764 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5765 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5766 p[0] = p[1] = p[8] = p[9] = 0;
5767 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5771 } else if( IS_DIRECT(mb_type) ) {
5772 pred_direct_motion(h, &mb_type);
5773 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5774 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5775 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5776 } else {
5777 int list, mx, my, i, mpx, mpy;
5778 if(IS_16X16(mb_type)){
5779 for(list=0; list<h->list_count; list++){
5780 if(IS_DIR(mb_type, 0, list)){
5781 int ref;
5782 if(h->ref_count[list] > 1){
5783 ref= decode_cabac_mb_ref(h, list, 0);
5784 if(ref >= (unsigned)h->ref_count[list]){
5785 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5786 return -1;
5788 }else
5789 ref=0;
5790 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5791 }else
5792 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5794 for(list=0; list<h->list_count; list++){
5795 if(IS_DIR(mb_type, 0, list)){
5796 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5798 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5799 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5800 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5802 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5803 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5804 }else
5805 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5808 else if(IS_16X8(mb_type)){
5809 for(list=0; list<h->list_count; list++){
5810 for(i=0; i<2; i++){
5811 if(IS_DIR(mb_type, i, list)){
5812 int ref;
5813 if(h->ref_count[list] > 1){
5814 ref= decode_cabac_mb_ref( h, list, 8*i );
5815 if(ref >= (unsigned)h->ref_count[list]){
5816 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5817 return -1;
5819 }else
5820 ref=0;
5821 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5822 }else
5823 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5826 for(list=0; list<h->list_count; list++){
5827 for(i=0; i<2; i++){
5828 if(IS_DIR(mb_type, i, list)){
5829 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5830 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5831 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5832 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5834 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5835 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5836 }else{
5837 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5838 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5842 }else{
5843 assert(IS_8X16(mb_type));
5844 for(list=0; list<h->list_count; list++){
5845 for(i=0; i<2; i++){
5846 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5847 int ref;
5848 if(h->ref_count[list] > 1){
5849 ref= decode_cabac_mb_ref( h, list, 4*i );
5850 if(ref >= (unsigned)h->ref_count[list]){
5851 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5852 return -1;
5854 }else
5855 ref=0;
5856 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5857 }else
5858 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5861 for(list=0; list<h->list_count; list++){
5862 for(i=0; i<2; i++){
5863 if(IS_DIR(mb_type, i, list)){
5864 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5865 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5866 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5868 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5869 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5870 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5871 }else{
5872 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5873 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5880 if( IS_INTER( mb_type ) ) {
5881 h->chroma_pred_mode_table[mb_xy] = 0;
5882 write_back_motion( h, mb_type );
5885 if( !IS_INTRA16x16( mb_type ) ) {
5886 cbp = decode_cabac_mb_cbp_luma( h );
5887 if(CHROMA)
5888 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5891 h->cbp_table[mb_xy] = h->cbp = cbp;
5893 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5894 if( decode_cabac_mb_transform_size( h ) )
5895 mb_type |= MB_TYPE_8x8DCT;
5897 s->current_picture.mb_type[mb_xy]= mb_type;
5899 if( cbp || IS_INTRA16x16( mb_type ) ) {
5900 const uint8_t *scan, *scan8x8, *dc_scan;
5901 const uint32_t *qmul;
5902 int dqp;
5904 if(IS_INTERLACED(mb_type)){
5905 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5906 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5907 dc_scan= luma_dc_field_scan;
5908 }else{
5909 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5910 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5911 dc_scan= luma_dc_zigzag_scan;
5914 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5915 if( dqp == INT_MIN ){
5916 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5917 return -1;
5919 s->qscale += dqp;
5920 if(((unsigned)s->qscale) > 51){
5921 if(s->qscale<0) s->qscale+= 52;
5922 else s->qscale-= 52;
5924 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5925 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5927 if( IS_INTRA16x16( mb_type ) ) {
5928 int i;
5929 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5930 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5932 if( cbp&15 ) {
5933 qmul = h->dequant4_coeff[0][s->qscale];
5934 for( i = 0; i < 16; i++ ) {
5935 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5936 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5938 } else {
5939 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5941 } else {
5942 int i8x8, i4x4;
5943 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5944 if( cbp & (1<<i8x8) ) {
5945 if( IS_8x8DCT(mb_type) ) {
5946 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5947 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5948 } else {
5949 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5950 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5951 const int index = 4*i8x8 + i4x4;
5952 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5953 //START_TIMER
5954 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5955 //STOP_TIMER("decode_residual")
5958 } else {
5959 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5960 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5965 if( cbp&0x30 ){
5966 int c;
5967 for( c = 0; c < 2; c++ ) {
5968 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5969 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5973 if( cbp&0x20 ) {
5974 int c, i;
5975 for( c = 0; c < 2; c++ ) {
5976 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5977 for( i = 0; i < 4; i++ ) {
5978 const int index = 16 + 4 * c + i;
5979 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5980 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5983 } else {
5984 uint8_t * const nnz= &h->non_zero_count_cache[0];
5985 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5986 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5988 } else {
5989 uint8_t * const nnz= &h->non_zero_count_cache[0];
5990 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5991 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5992 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5993 h->last_qscale_diff = 0;
5996 s->current_picture.qscale_table[mb_xy]= s->qscale;
5997 write_back_non_zero_count(h);
5999 if(MB_MBAFF){
6000 h->ref_count[0] >>= 1;
6001 h->ref_count[1] >>= 1;
6004 return 0;
6008 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6009 const int index_a = qp + h->slice_alpha_c0_offset;
6010 const int alpha = (alpha_table+52)[index_a];
6011 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6012 if (alpha ==0 || beta == 0) return;
6014 if( bS[0] < 4 ) {
6015 int8_t tc[4];
6016 tc[0] = (tc0_table+52)[index_a][bS[0]];
6017 tc[1] = (tc0_table+52)[index_a][bS[1]];
6018 tc[2] = (tc0_table+52)[index_a][bS[2]];
6019 tc[3] = (tc0_table+52)[index_a][bS[3]];
6020 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6021 } else {
6022 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
6025 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6026 const int index_a = qp + h->slice_alpha_c0_offset;
6027 const int alpha = (alpha_table+52)[index_a];
6028 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6029 if (alpha ==0 || beta == 0) return;
6031 if( bS[0] < 4 ) {
6032 int8_t tc[4];
6033 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6034 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6035 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6036 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6037 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6038 } else {
6039 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6043 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6044 int i;
6045 for( i = 0; i < 16; i++, pix += stride) {
6046 int index_a;
6047 int alpha;
6048 int beta;
6050 int qp_index;
6051 int bS_index = (i >> 1);
6052 if (!MB_FIELD) {
6053 bS_index &= ~1;
6054 bS_index |= (i & 1);
6057 if( bS[bS_index] == 0 ) {
6058 continue;
6061 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6062 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6063 alpha = (alpha_table+52)[index_a];
6064 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6066 if( bS[bS_index] < 4 ) {
6067 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
6068 const int p0 = pix[-1];
6069 const int p1 = pix[-2];
6070 const int p2 = pix[-3];
6071 const int q0 = pix[0];
6072 const int q1 = pix[1];
6073 const int q2 = pix[2];
6075 if( FFABS( p0 - q0 ) < alpha &&
6076 FFABS( p1 - p0 ) < beta &&
6077 FFABS( q1 - q0 ) < beta ) {
6078 int tc = tc0;
6079 int i_delta;
6081 if( FFABS( p2 - p0 ) < beta ) {
6082 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6083 tc++;
6085 if( FFABS( q2 - q0 ) < beta ) {
6086 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6087 tc++;
6090 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6091 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6092 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6093 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6095 }else{
6096 const int p0 = pix[-1];
6097 const int p1 = pix[-2];
6098 const int p2 = pix[-3];
6100 const int q0 = pix[0];
6101 const int q1 = pix[1];
6102 const int q2 = pix[2];
6104 if( FFABS( p0 - q0 ) < alpha &&
6105 FFABS( p1 - p0 ) < beta &&
6106 FFABS( q1 - q0 ) < beta ) {
6108 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6109 if( FFABS( p2 - p0 ) < beta)
6111 const int p3 = pix[-4];
6112 /* p0', p1', p2' */
6113 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6114 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6115 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6116 } else {
6117 /* p0' */
6118 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6120 if( FFABS( q2 - q0 ) < beta)
6122 const int q3 = pix[3];
6123 /* q0', q1', q2' */
6124 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6125 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6126 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6127 } else {
6128 /* q0' */
6129 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6131 }else{
6132 /* p0', q0' */
6133 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6134 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6136 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6141 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6142 int i;
6143 for( i = 0; i < 8; i++, pix += stride) {
6144 int index_a;
6145 int alpha;
6146 int beta;
6148 int qp_index;
6149 int bS_index = i;
6151 if( bS[bS_index] == 0 ) {
6152 continue;
6155 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6156 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6157 alpha = (alpha_table+52)[index_a];
6158 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6160 if( bS[bS_index] < 4 ) {
6161 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6162 const int p0 = pix[-1];
6163 const int p1 = pix[-2];
6164 const int q0 = pix[0];
6165 const int q1 = pix[1];
6167 if( FFABS( p0 - q0 ) < alpha &&
6168 FFABS( p1 - p0 ) < beta &&
6169 FFABS( q1 - q0 ) < beta ) {
6170 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6172 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6173 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6174 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6176 }else{
6177 const int p0 = pix[-1];
6178 const int p1 = pix[-2];
6179 const int q0 = pix[0];
6180 const int q1 = pix[1];
6182 if( FFABS( p0 - q0 ) < alpha &&
6183 FFABS( p1 - p0 ) < beta &&
6184 FFABS( q1 - q0 ) < beta ) {
6186 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6187 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6188 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6194 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6195 const int index_a = qp + h->slice_alpha_c0_offset;
6196 const int alpha = (alpha_table+52)[index_a];
6197 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6198 if (alpha ==0 || beta == 0) return;
6200 if( bS[0] < 4 ) {
6201 int8_t tc[4];
6202 tc[0] = (tc0_table+52)[index_a][bS[0]];
6203 tc[1] = (tc0_table+52)[index_a][bS[1]];
6204 tc[2] = (tc0_table+52)[index_a][bS[2]];
6205 tc[3] = (tc0_table+52)[index_a][bS[3]];
6206 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6207 } else {
6208 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6212 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6213 const int index_a = qp + h->slice_alpha_c0_offset;
6214 const int alpha = (alpha_table+52)[index_a];
6215 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6216 if (alpha ==0 || beta == 0) return;
6218 if( bS[0] < 4 ) {
6219 int8_t tc[4];
6220 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6221 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6222 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6223 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6224 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6225 } else {
6226 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6230 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6231 MpegEncContext * const s = &h->s;
6232 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6233 int mb_xy, mb_type;
6234 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6236 mb_xy = h->mb_xy;
6238 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6239 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6240 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6241 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6242 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6243 return;
6245 assert(!FRAME_MBAFF);
6247 mb_type = s->current_picture.mb_type[mb_xy];
6248 qp = s->current_picture.qscale_table[mb_xy];
6249 qp0 = s->current_picture.qscale_table[mb_xy-1];
6250 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6251 qpc = get_chroma_qp( h, 0, qp );
6252 qpc0 = get_chroma_qp( h, 0, qp0 );
6253 qpc1 = get_chroma_qp( h, 0, qp1 );
6254 qp0 = (qp + qp0 + 1) >> 1;
6255 qp1 = (qp + qp1 + 1) >> 1;
6256 qpc0 = (qpc + qpc0 + 1) >> 1;
6257 qpc1 = (qpc + qpc1 + 1) >> 1;
6258 qp_thresh = 15 - h->slice_alpha_c0_offset;
6259 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6260 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6261 return;
6263 if( IS_INTRA(mb_type) ) {
6264 int16_t bS4[4] = {4,4,4,4};
6265 int16_t bS3[4] = {3,3,3,3};
6266 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6267 if( IS_8x8DCT(mb_type) ) {
6268 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6269 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6270 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6271 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6272 } else {
6273 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6274 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6275 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6276 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6277 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6278 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6279 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6280 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6282 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6283 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6284 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6285 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6286 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6287 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6288 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6289 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6290 return;
6291 } else {
6292 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6293 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6294 int edges;
6295 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6296 edges = 4;
6297 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6298 } else {
6299 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6300 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6301 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6302 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6303 ? 3 : 0;
6304 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6305 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6306 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6307 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6309 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6310 bSv[0][0] = 0x0004000400040004ULL;
6311 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6312 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6314 #define FILTER(hv,dir,edge)\
6315 if(bSv[dir][edge]) {\
6316 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6317 if(!(edge&1)) {\
6318 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6319 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6322 if( edges == 1 ) {
6323 FILTER(v,0,0);
6324 FILTER(h,1,0);
6325 } else if( IS_8x8DCT(mb_type) ) {
6326 FILTER(v,0,0);
6327 FILTER(v,0,2);
6328 FILTER(h,1,0);
6329 FILTER(h,1,2);
6330 } else {
6331 FILTER(v,0,0);
6332 FILTER(v,0,1);
6333 FILTER(v,0,2);
6334 FILTER(v,0,3);
6335 FILTER(h,1,0);
6336 FILTER(h,1,1);
6337 FILTER(h,1,2);
6338 FILTER(h,1,3);
6340 #undef FILTER
6345 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6346 MpegEncContext * const s = &h->s;
6347 int edge;
6348 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6349 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6350 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6351 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6352 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6354 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6355 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6356 // how often to recheck mv-based bS when iterating between edges
6357 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6358 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6359 // how often to recheck mv-based bS when iterating along each edge
6360 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6362 if (first_vertical_edge_done) {
6363 start = 1;
6366 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6367 start = 1;
6369 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6370 && !IS_INTERLACED(mb_type)
6371 && IS_INTERLACED(mbm_type)
6373 // This is a special case in the norm where the filtering must
6374 // be done twice (one each of the field) even if we are in a
6375 // frame macroblock.
6377 static const int nnz_idx[4] = {4,5,6,3};
6378 unsigned int tmp_linesize = 2 * linesize;
6379 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6380 int mbn_xy = mb_xy - 2 * s->mb_stride;
6381 int qp;
6382 int i, j;
6383 int16_t bS[4];
6385 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6386 if( IS_INTRA(mb_type) ||
6387 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6388 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6389 } else {
6390 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6391 for( i = 0; i < 4; i++ ) {
6392 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6393 mbn_nnz[nnz_idx[i]] != 0 )
6394 bS[i] = 2;
6395 else
6396 bS[i] = 1;
6399 // Do not use s->qscale as luma quantizer because it has not the same
6400 // value in IPCM macroblocks.
6401 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6402 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6403 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6404 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6405 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6406 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6407 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6408 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6411 start = 1;
6414 /* Calculate bS */
6415 for( edge = start; edge < edges; edge++ ) {
6416 /* mbn_xy: neighbor macroblock */
6417 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6418 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6419 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6420 int16_t bS[4];
6421 int qp;
6423 if( (edge&1) && IS_8x8DCT(mb_type) )
6424 continue;
6426 if( IS_INTRA(mb_type) ||
6427 IS_INTRA(mbn_type) ) {
6428 int value;
6429 if (edge == 0) {
6430 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6431 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6433 value = 4;
6434 } else {
6435 value = 3;
6437 } else {
6438 value = 3;
6440 bS[0] = bS[1] = bS[2] = bS[3] = value;
6441 } else {
6442 int i, l;
6443 int mv_done;
6445 if( edge & mask_edge ) {
6446 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6447 mv_done = 1;
6449 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6450 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6451 mv_done = 1;
6453 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6454 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6455 int bn_idx= b_idx - (dir ? 8:1);
6456 int v = 0;
6458 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6459 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6460 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6461 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6464 if(h->slice_type_nos == FF_B_TYPE && v){
6465 v=0;
6466 for( l = 0; !v && l < 2; l++ ) {
6467 int ln= 1-l;
6468 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6469 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6470 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6474 bS[0] = bS[1] = bS[2] = bS[3] = v;
6475 mv_done = 1;
6477 else
6478 mv_done = 0;
6480 for( i = 0; i < 4; i++ ) {
6481 int x = dir == 0 ? edge : i;
6482 int y = dir == 0 ? i : edge;
6483 int b_idx= 8 + 4 + x + 8*y;
6484 int bn_idx= b_idx - (dir ? 8:1);
6486 if( h->non_zero_count_cache[b_idx] |
6487 h->non_zero_count_cache[bn_idx] ) {
6488 bS[i] = 2;
6490 else if(!mv_done)
6492 bS[i] = 0;
6493 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6494 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6495 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6496 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6497 bS[i] = 1;
6498 break;
6502 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6503 bS[i] = 0;
6504 for( l = 0; l < 2; l++ ) {
6505 int ln= 1-l;
6506 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6507 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6508 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6509 bS[i] = 1;
6510 break;
6517 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6518 continue;
6521 /* Filter edge */
6522 // Do not use s->qscale as luma quantizer because it has not the same
6523 // value in IPCM macroblocks.
6524 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6525 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6526 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6527 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6528 if( dir == 0 ) {
6529 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6530 if( (edge&1) == 0 ) {
6531 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6532 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6533 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6534 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6536 } else {
6537 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6538 if( (edge&1) == 0 ) {
6539 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6540 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6541 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6542 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6548 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6549 MpegEncContext * const s = &h->s;
6550 const int mb_xy= mb_x + mb_y*s->mb_stride;
6551 const int mb_type = s->current_picture.mb_type[mb_xy];
6552 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6553 int first_vertical_edge_done = 0;
6554 av_unused int dir;
6556 //for sufficiently low qp, filtering wouldn't do anything
6557 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6558 if(!FRAME_MBAFF){
6559 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6560 int qp = s->current_picture.qscale_table[mb_xy];
6561 if(qp <= qp_thresh
6562 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6563 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6564 return;
6568 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6569 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6570 int top_type, left_type[2];
6571 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6572 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6573 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6575 if(IS_8x8DCT(top_type)){
6576 h->non_zero_count_cache[4+8*0]=
6577 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6578 h->non_zero_count_cache[6+8*0]=
6579 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6581 if(IS_8x8DCT(left_type[0])){
6582 h->non_zero_count_cache[3+8*1]=
6583 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6585 if(IS_8x8DCT(left_type[1])){
6586 h->non_zero_count_cache[3+8*3]=
6587 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6590 if(IS_8x8DCT(mb_type)){
6591 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6592 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6594 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6595 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6597 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6598 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6600 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6601 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6605 if (FRAME_MBAFF
6606 // left mb is in picture
6607 && h->slice_table[mb_xy-1] != 0xFFFF
6608 // and current and left pair do not have the same interlaced type
6609 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6610 // and left mb is in the same slice if deblocking_filter == 2
6611 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6612 /* First vertical edge is different in MBAFF frames
6613 * There are 8 different bS to compute and 2 different Qp
6615 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6616 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6617 int16_t bS[8];
6618 int qp[2];
6619 int bqp[2];
6620 int rqp[2];
6621 int mb_qp, mbn0_qp, mbn1_qp;
6622 int i;
6623 first_vertical_edge_done = 1;
6625 if( IS_INTRA(mb_type) )
6626 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6627 else {
6628 for( i = 0; i < 8; i++ ) {
6629 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6631 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6632 bS[i] = 4;
6633 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6634 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6635 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6637 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6638 bS[i] = 2;
6639 else
6640 bS[i] = 1;
6644 mb_qp = s->current_picture.qscale_table[mb_xy];
6645 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6646 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6647 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6648 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6649 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6650 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6651 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6652 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6653 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6654 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6655 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6656 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6658 /* Filter edge */
6659 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6660 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6661 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6662 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6663 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6666 #if CONFIG_SMALL
6667 for( dir = 0; dir < 2; dir++ )
6668 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6669 #else
6670 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6671 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6672 #endif
6675 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6676 H264Context *h = *(void**)arg;
6677 MpegEncContext * const s = &h->s;
6678 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6680 s->mb_skip_run= -1;
6682 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6683 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6685 if( h->pps.cabac ) {
6686 int i;
6688 /* realign */
6689 align_get_bits( &s->gb );
6691 /* init cabac */
6692 ff_init_cabac_states( &h->cabac);
6693 ff_init_cabac_decoder( &h->cabac,
6694 s->gb.buffer + get_bits_count(&s->gb)/8,
6695 (get_bits_left(&s->gb) + 7)/8);
6696 /* calculate pre-state */
6697 for( i= 0; i < 460; i++ ) {
6698 int pre;
6699 if( h->slice_type_nos == FF_I_TYPE )
6700 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6701 else
6702 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6704 if( pre <= 63 )
6705 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6706 else
6707 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6710 for(;;){
6711 //START_TIMER
6712 int ret = decode_mb_cabac(h);
6713 int eos;
6714 //STOP_TIMER("decode_mb_cabac")
6716 if(ret>=0) hl_decode_mb(h);
6718 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6719 s->mb_y++;
6721 ret = decode_mb_cabac(h);
6723 if(ret>=0) hl_decode_mb(h);
6724 s->mb_y--;
6726 eos = get_cabac_terminate( &h->cabac );
6728 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6729 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6730 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6731 return -1;
6734 if( ++s->mb_x >= s->mb_width ) {
6735 s->mb_x = 0;
6736 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6737 ++s->mb_y;
6738 if(FIELD_OR_MBAFF_PICTURE) {
6739 ++s->mb_y;
6743 if( eos || s->mb_y >= s->mb_height ) {
6744 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6745 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6746 return 0;
6750 } else {
6751 for(;;){
6752 int ret = decode_mb_cavlc(h);
6754 if(ret>=0) hl_decode_mb(h);
6756 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6757 s->mb_y++;
6758 ret = decode_mb_cavlc(h);
6760 if(ret>=0) hl_decode_mb(h);
6761 s->mb_y--;
6764 if(ret<0){
6765 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6766 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6768 return -1;
6771 if(++s->mb_x >= s->mb_width){
6772 s->mb_x=0;
6773 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6774 ++s->mb_y;
6775 if(FIELD_OR_MBAFF_PICTURE) {
6776 ++s->mb_y;
6778 if(s->mb_y >= s->mb_height){
6779 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6781 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6782 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6784 return 0;
6785 }else{
6786 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6788 return -1;
6793 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6794 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6795 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6796 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6798 return 0;
6799 }else{
6800 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6802 return -1;
6808 #if 0
6809 for(;s->mb_y < s->mb_height; s->mb_y++){
6810 for(;s->mb_x < s->mb_width; s->mb_x++){
6811 int ret= decode_mb(h);
6813 hl_decode_mb(h);
6815 if(ret<0){
6816 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6817 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6819 return -1;
6822 if(++s->mb_x >= s->mb_width){
6823 s->mb_x=0;
6824 if(++s->mb_y >= s->mb_height){
6825 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6826 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6828 return 0;
6829 }else{
6830 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6832 return -1;
6837 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6838 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6839 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6841 return 0;
6842 }else{
6843 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6845 return -1;
6849 s->mb_x=0;
6850 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6852 #endif
6853 return -1; //not reached
6856 static int decode_picture_timing(H264Context *h){
6857 MpegEncContext * const s = &h->s;
6858 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6859 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6860 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6862 if(h->sps.pic_struct_present_flag){
6863 unsigned int i, num_clock_ts;
6864 h->sei_pic_struct = get_bits(&s->gb, 4);
6865 h->sei_ct_type = 0;
6867 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6868 return -1;
6870 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6872 for (i = 0 ; i < num_clock_ts ; i++){
6873 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6874 unsigned int full_timestamp_flag;
6875 h->sei_ct_type |= 1<<get_bits(&s->gb, 2);
6876 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6877 skip_bits(&s->gb, 5); /* counting_type */
6878 full_timestamp_flag = get_bits(&s->gb, 1);
6879 skip_bits(&s->gb, 1); /* discontinuity_flag */
6880 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6881 skip_bits(&s->gb, 8); /* n_frames */
6882 if(full_timestamp_flag){
6883 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6884 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6885 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6886 }else{
6887 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6888 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6889 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6890 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6891 if(get_bits(&s->gb, 1)) /* hours_flag */
6892 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6896 if(h->sps.time_offset_length > 0)
6897 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6901 if(s->avctx->debug & FF_DEBUG_PICT_INFO)
6902 av_log(s->avctx, AV_LOG_DEBUG, "ct_type:%X pic_struct:%d\n", h->sei_ct_type, h->sei_pic_struct);
6904 return 0;
6907 static int decode_unregistered_user_data(H264Context *h, int size){
6908 MpegEncContext * const s = &h->s;
6909 uint8_t user_data[16+256];
6910 int e, build, i;
6912 if(size<16)
6913 return -1;
6915 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6916 user_data[i]= get_bits(&s->gb, 8);
6919 user_data[i]= 0;
6920 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6921 if(e==1 && build>=0)
6922 h->x264_build= build;
6924 if(s->avctx->debug & FF_DEBUG_BUGS)
6925 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6927 for(; i<size; i++)
6928 skip_bits(&s->gb, 8);
6930 return 0;
6933 static int decode_recovery_point(H264Context *h){
6934 MpegEncContext * const s = &h->s;
6936 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6937 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6939 return 0;
6942 static int decode_buffering_period(H264Context *h){
6943 MpegEncContext * const s = &h->s;
6944 unsigned int sps_id;
6945 int sched_sel_idx;
6946 SPS *sps;
6948 sps_id = get_ue_golomb_31(&s->gb);
6949 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6950 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6951 return -1;
6953 sps = h->sps_buffers[sps_id];
6955 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6956 if (sps->nal_hrd_parameters_present_flag) {
6957 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6958 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6959 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6962 if (sps->vcl_hrd_parameters_present_flag) {
6963 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6964 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6965 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6969 h->sei_buffering_period_present = 1;
6970 return 0;
6973 int ff_h264_decode_sei(H264Context *h){
6974 MpegEncContext * const s = &h->s;
6976 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6977 int size, type;
6979 type=0;
6981 type+= show_bits(&s->gb, 8);
6982 }while(get_bits(&s->gb, 8) == 255);
6984 size=0;
6986 size+= show_bits(&s->gb, 8);
6987 }while(get_bits(&s->gb, 8) == 255);
6989 switch(type){
6990 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6991 if(decode_picture_timing(h) < 0)
6992 return -1;
6993 break;
6994 case SEI_TYPE_USER_DATA_UNREGISTERED:
6995 if(decode_unregistered_user_data(h, size) < 0)
6996 return -1;
6997 break;
6998 case SEI_TYPE_RECOVERY_POINT:
6999 if(decode_recovery_point(h) < 0)
7000 return -1;
7001 break;
7002 case SEI_BUFFERING_PERIOD:
7003 if(decode_buffering_period(h) < 0)
7004 return -1;
7005 break;
7006 default:
7007 skip_bits(&s->gb, 8*size);
7010 //FIXME check bits here
7011 align_get_bits(&s->gb);
7014 return 0;
7017 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
7018 MpegEncContext * const s = &h->s;
7019 int cpb_count, i;
7020 cpb_count = get_ue_golomb_31(&s->gb) + 1;
7022 if(cpb_count > 32U){
7023 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
7024 return -1;
7027 get_bits(&s->gb, 4); /* bit_rate_scale */
7028 get_bits(&s->gb, 4); /* cpb_size_scale */
7029 for(i=0; i<cpb_count; i++){
7030 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7031 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7032 get_bits1(&s->gb); /* cbr_flag */
7034 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7035 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7036 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
7037 sps->time_offset_length = get_bits(&s->gb, 5);
7038 sps->cpb_cnt = cpb_count;
7039 return 0;
7042 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7043 MpegEncContext * const s = &h->s;
7044 int aspect_ratio_info_present_flag;
7045 unsigned int aspect_ratio_idc;
7047 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7049 if( aspect_ratio_info_present_flag ) {
7050 aspect_ratio_idc= get_bits(&s->gb, 8);
7051 if( aspect_ratio_idc == EXTENDED_SAR ) {
7052 sps->sar.num= get_bits(&s->gb, 16);
7053 sps->sar.den= get_bits(&s->gb, 16);
7054 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
7055 sps->sar= pixel_aspect[aspect_ratio_idc];
7056 }else{
7057 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7058 return -1;
7060 }else{
7061 sps->sar.num=
7062 sps->sar.den= 0;
7064 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7066 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7067 get_bits1(&s->gb); /* overscan_appropriate_flag */
7070 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7071 get_bits(&s->gb, 3); /* video_format */
7072 get_bits1(&s->gb); /* video_full_range_flag */
7073 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7074 get_bits(&s->gb, 8); /* colour_primaries */
7075 get_bits(&s->gb, 8); /* transfer_characteristics */
7076 get_bits(&s->gb, 8); /* matrix_coefficients */
7080 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7081 s->avctx->chroma_sample_location = get_ue_golomb(&s->gb)+1; /* chroma_sample_location_type_top_field */
7082 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7085 sps->timing_info_present_flag = get_bits1(&s->gb);
7086 if(sps->timing_info_present_flag){
7087 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7088 sps->time_scale = get_bits_long(&s->gb, 32);
7089 if(sps->num_units_in_tick-1 > 0x7FFFFFFEU || sps->time_scale-1 > 0x7FFFFFFEU){
7090 av_log(h->s.avctx, AV_LOG_ERROR, "time_scale/num_units_in_tick invalid or unsupported (%d/%d)\n", sps->time_scale, sps->num_units_in_tick);
7091 return -1;
7093 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7096 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7097 if(sps->nal_hrd_parameters_present_flag)
7098 if(decode_hrd_parameters(h, sps) < 0)
7099 return -1;
7100 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7101 if(sps->vcl_hrd_parameters_present_flag)
7102 if(decode_hrd_parameters(h, sps) < 0)
7103 return -1;
7104 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7105 get_bits1(&s->gb); /* low_delay_hrd_flag */
7106 sps->pic_struct_present_flag = get_bits1(&s->gb);
7108 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7109 if(sps->bitstream_restriction_flag){
7110 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7111 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7112 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7113 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7114 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7115 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7116 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7118 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7119 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7120 return -1;
7124 return 0;
7127 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7128 const uint8_t *jvt_list, const uint8_t *fallback_list){
7129 MpegEncContext * const s = &h->s;
7130 int i, last = 8, next = 8;
7131 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7132 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7133 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7134 else
7135 for(i=0;i<size;i++){
7136 if(next)
7137 next = (last + get_se_golomb(&s->gb)) & 0xff;
7138 if(!i && !next){ /* matrix not written, we use the preset one */
7139 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7140 break;
7142 last = factors[scan[i]] = next ? next : last;
7146 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7147 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7148 MpegEncContext * const s = &h->s;
7149 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7150 const uint8_t *fallback[4] = {
7151 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7152 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7153 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7154 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7156 if(get_bits1(&s->gb)){
7157 sps->scaling_matrix_present |= is_sps;
7158 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7159 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7160 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7161 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7162 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7163 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7164 if(is_sps || pps->transform_8x8_mode){
7165 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7166 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7171 int ff_h264_decode_seq_parameter_set(H264Context *h){
7172 MpegEncContext * const s = &h->s;
7173 int profile_idc, level_idc;
7174 unsigned int sps_id;
7175 int i;
7176 SPS *sps;
7178 profile_idc= get_bits(&s->gb, 8);
7179 get_bits1(&s->gb); //constraint_set0_flag
7180 get_bits1(&s->gb); //constraint_set1_flag
7181 get_bits1(&s->gb); //constraint_set2_flag
7182 get_bits1(&s->gb); //constraint_set3_flag
7183 get_bits(&s->gb, 4); // reserved
7184 level_idc= get_bits(&s->gb, 8);
7185 sps_id= get_ue_golomb_31(&s->gb);
7187 if(sps_id >= MAX_SPS_COUNT) {
7188 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7189 return -1;
7191 sps= av_mallocz(sizeof(SPS));
7192 if(sps == NULL)
7193 return -1;
7195 sps->profile_idc= profile_idc;
7196 sps->level_idc= level_idc;
7198 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7199 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7200 sps->scaling_matrix_present = 0;
7202 if(sps->profile_idc >= 100){ //high profile
7203 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7204 if(sps->chroma_format_idc == 3)
7205 sps->residual_color_transform_flag = get_bits1(&s->gb);
7206 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7207 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7208 sps->transform_bypass = get_bits1(&s->gb);
7209 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7210 }else{
7211 sps->chroma_format_idc= 1;
7214 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7215 sps->poc_type= get_ue_golomb_31(&s->gb);
7217 if(sps->poc_type == 0){ //FIXME #define
7218 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7219 } else if(sps->poc_type == 1){//FIXME #define
7220 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7221 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7222 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7223 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7225 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7226 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7227 goto fail;
7230 for(i=0; i<sps->poc_cycle_length; i++)
7231 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7232 }else if(sps->poc_type != 2){
7233 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7234 goto fail;
7237 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7238 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7239 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7240 goto fail;
7242 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7243 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7244 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7245 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7246 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7247 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7248 goto fail;
7251 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7252 if(!sps->frame_mbs_only_flag)
7253 sps->mb_aff= get_bits1(&s->gb);
7254 else
7255 sps->mb_aff= 0;
7257 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7259 #ifndef ALLOW_INTERLACE
7260 if(sps->mb_aff)
7261 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7262 #endif
7263 sps->crop= get_bits1(&s->gb);
7264 if(sps->crop){
7265 sps->crop_left = get_ue_golomb(&s->gb);
7266 sps->crop_right = get_ue_golomb(&s->gb);
7267 sps->crop_top = get_ue_golomb(&s->gb);
7268 sps->crop_bottom= get_ue_golomb(&s->gb);
7269 if(sps->crop_left || sps->crop_top){
7270 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7272 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7273 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7275 }else{
7276 sps->crop_left =
7277 sps->crop_right =
7278 sps->crop_top =
7279 sps->crop_bottom= 0;
7282 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7283 if( sps->vui_parameters_present_flag )
7284 if (decode_vui_parameters(h, sps) < 0)
7285 goto fail;
7287 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7288 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s %d/%d\n",
7289 sps_id, sps->profile_idc, sps->level_idc,
7290 sps->poc_type,
7291 sps->ref_frame_count,
7292 sps->mb_width, sps->mb_height,
7293 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7294 sps->direct_8x8_inference_flag ? "8B8" : "",
7295 sps->crop_left, sps->crop_right,
7296 sps->crop_top, sps->crop_bottom,
7297 sps->vui_parameters_present_flag ? "VUI" : "",
7298 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc],
7299 sps->timing_info_present_flag ? sps->num_units_in_tick : 0,
7300 sps->timing_info_present_flag ? sps->time_scale : 0
7304 av_free(h->sps_buffers[sps_id]);
7305 h->sps_buffers[sps_id]= sps;
7306 h->sps = *sps;
7307 return 0;
7308 fail:
7309 av_free(sps);
7310 return -1;
7313 static void
7314 build_qp_table(PPS *pps, int t, int index)
7316 int i;
7317 for(i = 0; i < 52; i++)
7318 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7321 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7322 MpegEncContext * const s = &h->s;
7323 unsigned int pps_id= get_ue_golomb(&s->gb);
7324 PPS *pps;
7326 if(pps_id >= MAX_PPS_COUNT) {
7327 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7328 return -1;
7331 pps= av_mallocz(sizeof(PPS));
7332 if(pps == NULL)
7333 return -1;
7334 pps->sps_id= get_ue_golomb_31(&s->gb);
7335 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7336 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7337 goto fail;
7340 pps->cabac= get_bits1(&s->gb);
7341 pps->pic_order_present= get_bits1(&s->gb);
7342 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7343 if(pps->slice_group_count > 1 ){
7344 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7345 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7346 switch(pps->mb_slice_group_map_type){
7347 case 0:
7348 #if 0
7349 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7350 | run_length[ i ] |1 |ue(v) |
7351 #endif
7352 break;
7353 case 2:
7354 #if 0
7355 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7356 |{ | | |
7357 | top_left_mb[ i ] |1 |ue(v) |
7358 | bottom_right_mb[ i ] |1 |ue(v) |
7359 | } | | |
7360 #endif
7361 break;
7362 case 3:
7363 case 4:
7364 case 5:
7365 #if 0
7366 | slice_group_change_direction_flag |1 |u(1) |
7367 | slice_group_change_rate_minus1 |1 |ue(v) |
7368 #endif
7369 break;
7370 case 6:
7371 #if 0
7372 | slice_group_id_cnt_minus1 |1 |ue(v) |
7373 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7374 |) | | |
7375 | slice_group_id[ i ] |1 |u(v) |
7376 #endif
7377 break;
7380 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7381 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7382 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7383 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7384 goto fail;
7387 pps->weighted_pred= get_bits1(&s->gb);
7388 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7389 pps->init_qp= get_se_golomb(&s->gb) + 26;
7390 pps->init_qs= get_se_golomb(&s->gb) + 26;
7391 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7392 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7393 pps->constrained_intra_pred= get_bits1(&s->gb);
7394 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7396 pps->transform_8x8_mode= 0;
7397 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7398 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7399 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7401 if(get_bits_count(&s->gb) < bit_length){
7402 pps->transform_8x8_mode= get_bits1(&s->gb);
7403 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7404 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7405 } else {
7406 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7409 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7410 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7411 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7412 h->pps.chroma_qp_diff= 1;
7414 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7415 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7416 pps_id, pps->sps_id,
7417 pps->cabac ? "CABAC" : "CAVLC",
7418 pps->slice_group_count,
7419 pps->ref_count[0], pps->ref_count[1],
7420 pps->weighted_pred ? "weighted" : "",
7421 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7422 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7423 pps->constrained_intra_pred ? "CONSTR" : "",
7424 pps->redundant_pic_cnt_present ? "REDU" : "",
7425 pps->transform_8x8_mode ? "8x8DCT" : ""
7429 av_free(h->pps_buffers[pps_id]);
7430 h->pps_buffers[pps_id]= pps;
7431 return 0;
7432 fail:
7433 av_free(pps);
7434 return -1;
7438 * Call decode_slice() for each context.
7440 * @param h h264 master context
7441 * @param context_count number of contexts to execute
7443 static void execute_decode_slices(H264Context *h, int context_count){
7444 MpegEncContext * const s = &h->s;
7445 AVCodecContext * const avctx= s->avctx;
7446 H264Context *hx;
7447 int i;
7449 if (s->avctx->hwaccel)
7450 return;
7451 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7452 return;
7453 if(context_count == 1) {
7454 decode_slice(avctx, &h);
7455 } else {
7456 for(i = 1; i < context_count; i++) {
7457 hx = h->thread_context[i];
7458 hx->s.error_recognition = avctx->error_recognition;
7459 hx->s.error_count = 0;
7462 avctx->execute(avctx, (void *)decode_slice,
7463 h->thread_context, NULL, context_count, sizeof(void*));
7465 /* pull back stuff from slices to master context */
7466 hx = h->thread_context[context_count - 1];
7467 s->mb_x = hx->s.mb_x;
7468 s->mb_y = hx->s.mb_y;
7469 s->dropable = hx->s.dropable;
7470 s->picture_structure = hx->s.picture_structure;
7471 for(i = 1; i < context_count; i++)
7472 h->s.error_count += h->thread_context[i]->s.error_count;
7477 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7478 MpegEncContext * const s = &h->s;
7479 AVCodecContext * const avctx= s->avctx;
7480 int buf_index=0;
7481 H264Context *hx; ///< thread context
7482 int context_count = 0;
7483 int next_avc= h->is_avc ? 0 : buf_size;
7485 h->max_contexts = avctx->thread_count;
7486 #if 0
7487 int i;
7488 for(i=0; i<50; i++){
7489 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7491 #endif
7492 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7493 h->current_slice = 0;
7494 if (!s->first_field)
7495 s->current_picture_ptr= NULL;
7496 reset_sei(h);
7499 for(;;){
7500 int consumed;
7501 int dst_length;
7502 int bit_length;
7503 const uint8_t *ptr;
7504 int i, nalsize = 0;
7505 int err;
7507 if(buf_index >= next_avc) {
7508 if(buf_index >= buf_size) break;
7509 nalsize = 0;
7510 for(i = 0; i < h->nal_length_size; i++)
7511 nalsize = (nalsize << 8) | buf[buf_index++];
7512 if(nalsize <= 1 || nalsize > buf_size - buf_index){
7513 if(nalsize == 1){
7514 buf_index++;
7515 continue;
7516 }else{
7517 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7518 break;
7521 next_avc= buf_index + nalsize;
7522 } else {
7523 // start code prefix search
7524 for(; buf_index + 3 < buf_size; buf_index++){
7525 // This should always succeed in the first iteration.
7526 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7527 break;
7530 if(buf_index+3 >= buf_size) break;
7532 buf_index+=3;
7535 hx = h->thread_context[context_count];
7537 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
7538 if (ptr==NULL || dst_length < 0){
7539 return -1;
7541 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7542 dst_length--;
7543 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7545 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7546 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7549 if (h->is_avc && (nalsize != consumed) && nalsize){
7550 int i, debug_level = AV_LOG_DEBUG;
7551 for (i = consumed; i < nalsize; i++)
7552 if (buf[buf_index+i])
7553 debug_level = AV_LOG_ERROR;
7554 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7557 buf_index += consumed;
7559 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7560 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7561 continue;
7563 again:
7564 err = 0;
7565 switch(hx->nal_unit_type){
7566 case NAL_IDR_SLICE:
7567 if (h->nal_unit_type != NAL_IDR_SLICE) {
7568 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7569 return -1;
7571 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7572 case NAL_SLICE:
7573 init_get_bits(&hx->s.gb, ptr, bit_length);
7574 hx->intra_gb_ptr=
7575 hx->inter_gb_ptr= &hx->s.gb;
7576 hx->s.data_partitioning = 0;
7578 if((err = decode_slice_header(hx, h)))
7579 break;
7581 if (s->avctx->hwaccel && h->current_slice == 1) {
7582 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
7583 return -1;
7586 s->current_picture_ptr->key_frame |=
7587 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7588 (h->sei_recovery_frame_cnt >= 0);
7589 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7590 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7591 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7592 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7593 && avctx->skip_frame < AVDISCARD_ALL){
7594 if(avctx->hwaccel) {
7595 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
7596 return -1;
7597 }else
7598 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7599 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7600 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7601 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7602 }else
7603 context_count++;
7605 break;
7606 case NAL_DPA:
7607 init_get_bits(&hx->s.gb, ptr, bit_length);
7608 hx->intra_gb_ptr=
7609 hx->inter_gb_ptr= NULL;
7611 if ((err = decode_slice_header(hx, h)) < 0)
7612 break;
7614 hx->s.data_partitioning = 1;
7616 break;
7617 case NAL_DPB:
7618 init_get_bits(&hx->intra_gb, ptr, bit_length);
7619 hx->intra_gb_ptr= &hx->intra_gb;
7620 break;
7621 case NAL_DPC:
7622 init_get_bits(&hx->inter_gb, ptr, bit_length);
7623 hx->inter_gb_ptr= &hx->inter_gb;
7625 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7626 && s->context_initialized
7627 && s->hurry_up < 5
7628 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7629 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7630 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7631 && avctx->skip_frame < AVDISCARD_ALL)
7632 context_count++;
7633 break;
7634 case NAL_SEI:
7635 init_get_bits(&s->gb, ptr, bit_length);
7636 ff_h264_decode_sei(h);
7637 break;
7638 case NAL_SPS:
7639 init_get_bits(&s->gb, ptr, bit_length);
7640 ff_h264_decode_seq_parameter_set(h);
7642 if(s->flags& CODEC_FLAG_LOW_DELAY)
7643 s->low_delay=1;
7645 if(avctx->has_b_frames < 2)
7646 avctx->has_b_frames= !s->low_delay;
7647 break;
7648 case NAL_PPS:
7649 init_get_bits(&s->gb, ptr, bit_length);
7651 ff_h264_decode_picture_parameter_set(h, bit_length);
7653 break;
7654 case NAL_AUD:
7655 case NAL_END_SEQUENCE:
7656 case NAL_END_STREAM:
7657 case NAL_FILLER_DATA:
7658 case NAL_SPS_EXT:
7659 case NAL_AUXILIARY_SLICE:
7660 break;
7661 default:
7662 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7665 if(context_count == h->max_contexts) {
7666 execute_decode_slices(h, context_count);
7667 context_count = 0;
7670 if (err < 0)
7671 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7672 else if(err == 1) {
7673 /* Slice could not be decoded in parallel mode, copy down
7674 * NAL unit stuff to context 0 and restart. Note that
7675 * rbsp_buffer is not transferred, but since we no longer
7676 * run in parallel mode this should not be an issue. */
7677 h->nal_unit_type = hx->nal_unit_type;
7678 h->nal_ref_idc = hx->nal_ref_idc;
7679 hx = h;
7680 goto again;
7683 if(context_count)
7684 execute_decode_slices(h, context_count);
7685 return buf_index;
7689 * returns the number of bytes consumed for building the current frame
7691 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7692 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7693 if(pos+10>buf_size) pos=buf_size; // oops ;)
7695 return pos;
7698 static int decode_frame(AVCodecContext *avctx,
7699 void *data, int *data_size,
7700 AVPacket *avpkt)
7702 const uint8_t *buf = avpkt->data;
7703 int buf_size = avpkt->size;
7704 H264Context *h = avctx->priv_data;
7705 MpegEncContext *s = &h->s;
7706 AVFrame *pict = data;
7707 int buf_index;
7709 s->flags= avctx->flags;
7710 s->flags2= avctx->flags2;
7712 /* end of stream, output what is still in the buffers */
7713 if (buf_size == 0) {
7714 Picture *out;
7715 int i, out_idx;
7717 //FIXME factorize this with the output code below
7718 out = h->delayed_pic[0];
7719 out_idx = 0;
7720 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
7721 if(h->delayed_pic[i]->poc < out->poc){
7722 out = h->delayed_pic[i];
7723 out_idx = i;
7726 for(i=out_idx; h->delayed_pic[i]; i++)
7727 h->delayed_pic[i] = h->delayed_pic[i+1];
7729 if(out){
7730 *data_size = sizeof(AVFrame);
7731 *pict= *(AVFrame*)out;
7734 return 0;
7737 if(h->is_avc && !h->got_avcC) {
7738 int i, cnt, nalsize;
7739 unsigned char *p = avctx->extradata;
7740 if(avctx->extradata_size < 7) {
7741 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7742 return -1;
7744 if(*p != 1) {
7745 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7746 return -1;
7748 /* sps and pps in the avcC always have length coded with 2 bytes,
7749 so put a fake nal_length_size = 2 while parsing them */
7750 h->nal_length_size = 2;
7751 // Decode sps from avcC
7752 cnt = *(p+5) & 0x1f; // Number of sps
7753 p += 6;
7754 for (i = 0; i < cnt; i++) {
7755 nalsize = AV_RB16(p) + 2;
7756 if(decode_nal_units(h, p, nalsize) < 0) {
7757 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7758 return -1;
7760 p += nalsize;
7762 // Decode pps from avcC
7763 cnt = *(p++); // Number of pps
7764 for (i = 0; i < cnt; i++) {
7765 nalsize = AV_RB16(p) + 2;
7766 if(decode_nal_units(h, p, nalsize) != nalsize) {
7767 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7768 return -1;
7770 p += nalsize;
7772 // Now store right nal length size, that will be use to parse all other nals
7773 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7774 // Do not reparse avcC
7775 h->got_avcC = 1;
7778 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7779 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7780 return -1;
7781 h->got_avcC = 1;
7784 buf_index=decode_nal_units(h, buf, buf_size);
7785 if(buf_index < 0)
7786 return -1;
7788 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7789 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7790 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7791 return -1;
7794 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7795 Picture *out = s->current_picture_ptr;
7796 Picture *cur = s->current_picture_ptr;
7797 int i, pics, out_of_order, out_idx;
7799 field_end(h);
7801 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7802 /* Wait for second field. */
7803 *data_size = 0;
7805 } else {
7806 cur->interlaced_frame = 0;
7807 cur->repeat_pict = 0;
7809 /* Signal interlacing information externally. */
7810 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7812 if(h->sps.pic_struct_present_flag){
7813 switch (h->sei_pic_struct)
7815 case SEI_PIC_STRUCT_FRAME:
7816 break;
7817 case SEI_PIC_STRUCT_TOP_FIELD:
7818 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7819 cur->interlaced_frame = 1;
7820 break;
7821 case SEI_PIC_STRUCT_TOP_BOTTOM:
7822 case SEI_PIC_STRUCT_BOTTOM_TOP:
7823 if (FIELD_OR_MBAFF_PICTURE)
7824 cur->interlaced_frame = 1;
7825 else
7826 // try to flag soft telecine progressive
7827 cur->interlaced_frame = h->prev_interlaced_frame;
7828 break;
7829 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7830 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7831 // Signal the possibility of telecined film externally (pic_struct 5,6)
7832 // From these hints, let the applications decide if they apply deinterlacing.
7833 cur->repeat_pict = 1;
7834 break;
7835 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7836 // Force progressive here, as doubling interlaced frame is a bad idea.
7837 cur->repeat_pict = 2;
7838 break;
7839 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7840 cur->repeat_pict = 4;
7841 break;
7844 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
7845 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
7846 }else{
7847 /* Derive interlacing flag from used decoding process. */
7848 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7850 h->prev_interlaced_frame = cur->interlaced_frame;
7852 if (cur->field_poc[0] != cur->field_poc[1]){
7853 /* Derive top_field_first from field pocs. */
7854 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7855 }else{
7856 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7857 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7858 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7859 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7860 cur->top_field_first = 1;
7861 else
7862 cur->top_field_first = 0;
7863 }else{
7864 /* Most likely progressive */
7865 cur->top_field_first = 0;
7869 //FIXME do something with unavailable reference frames
7871 /* Sort B-frames into display order */
7873 if(h->sps.bitstream_restriction_flag
7874 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7875 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7876 s->low_delay = 0;
7879 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7880 && !h->sps.bitstream_restriction_flag){
7881 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7882 s->low_delay= 0;
7885 pics = 0;
7886 while(h->delayed_pic[pics]) pics++;
7888 assert(pics <= MAX_DELAYED_PIC_COUNT);
7890 h->delayed_pic[pics++] = cur;
7891 if(cur->reference == 0)
7892 cur->reference = DELAYED_PIC_REF;
7894 out = h->delayed_pic[0];
7895 out_idx = 0;
7896 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
7897 if(h->delayed_pic[i]->poc < out->poc){
7898 out = h->delayed_pic[i];
7899 out_idx = i;
7901 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
7902 h->outputed_poc= INT_MIN;
7903 out_of_order = out->poc < h->outputed_poc;
7905 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7907 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7908 || (s->low_delay &&
7909 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
7910 || cur->pict_type == FF_B_TYPE)))
7912 s->low_delay = 0;
7913 s->avctx->has_b_frames++;
7916 if(out_of_order || pics > s->avctx->has_b_frames){
7917 out->reference &= ~DELAYED_PIC_REF;
7918 for(i=out_idx; h->delayed_pic[i]; i++)
7919 h->delayed_pic[i] = h->delayed_pic[i+1];
7921 if(!out_of_order && pics > s->avctx->has_b_frames){
7922 *data_size = sizeof(AVFrame);
7924 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
7925 h->outputed_poc = INT_MIN;
7926 } else
7927 h->outputed_poc = out->poc;
7928 *pict= *(AVFrame*)out;
7929 }else{
7930 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7935 assert(pict->data[0] || !*data_size);
7936 ff_print_debug_info(s, pict);
7937 //printf("out %d\n", (int)pict->data[0]);
7939 return get_consumed_bytes(s, buf_index, buf_size);
7941 #if 0
7942 static inline void fill_mb_avail(H264Context *h){
7943 MpegEncContext * const s = &h->s;
7944 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7946 if(s->mb_y){
7947 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7948 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7949 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7950 }else{
7951 h->mb_avail[0]=
7952 h->mb_avail[1]=
7953 h->mb_avail[2]= 0;
7955 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7956 h->mb_avail[4]= 1; //FIXME move out
7957 h->mb_avail[5]= 0; //FIXME move out
7959 #endif
7961 #ifdef TEST
7962 #undef printf
7963 #undef random
7964 #define COUNT 8000
7965 #define SIZE (COUNT*40)
7966 int main(void){
7967 int i;
7968 uint8_t temp[SIZE];
7969 PutBitContext pb;
7970 GetBitContext gb;
7971 // int int_temp[10000];
7972 DSPContext dsp;
7973 AVCodecContext avctx;
7975 dsputil_init(&dsp, &avctx);
7977 init_put_bits(&pb, temp, SIZE);
7978 printf("testing unsigned exp golomb\n");
7979 for(i=0; i<COUNT; i++){
7980 START_TIMER
7981 set_ue_golomb(&pb, i);
7982 STOP_TIMER("set_ue_golomb");
7984 flush_put_bits(&pb);
7986 init_get_bits(&gb, temp, 8*SIZE);
7987 for(i=0; i<COUNT; i++){
7988 int j, s;
7990 s= show_bits(&gb, 24);
7992 START_TIMER
7993 j= get_ue_golomb(&gb);
7994 if(j != i){
7995 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7996 // return -1;
7998 STOP_TIMER("get_ue_golomb");
8002 init_put_bits(&pb, temp, SIZE);
8003 printf("testing signed exp golomb\n");
8004 for(i=0; i<COUNT; i++){
8005 START_TIMER
8006 set_se_golomb(&pb, i - COUNT/2);
8007 STOP_TIMER("set_se_golomb");
8009 flush_put_bits(&pb);
8011 init_get_bits(&gb, temp, 8*SIZE);
8012 for(i=0; i<COUNT; i++){
8013 int j, s;
8015 s= show_bits(&gb, 24);
8017 START_TIMER
8018 j= get_se_golomb(&gb);
8019 if(j != i - COUNT/2){
8020 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8021 // return -1;
8023 STOP_TIMER("get_se_golomb");
8026 #if 0
8027 printf("testing 4x4 (I)DCT\n");
8029 DCTELEM block[16];
8030 uint8_t src[16], ref[16];
8031 uint64_t error= 0, max_error=0;
8033 for(i=0; i<COUNT; i++){
8034 int j;
8035 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8036 for(j=0; j<16; j++){
8037 ref[j]= random()%255;
8038 src[j]= random()%255;
8041 h264_diff_dct_c(block, src, ref, 4);
8043 //normalize
8044 for(j=0; j<16; j++){
8045 // printf("%d ", block[j]);
8046 block[j]= block[j]*4;
8047 if(j&1) block[j]= (block[j]*4 + 2)/5;
8048 if(j&4) block[j]= (block[j]*4 + 2)/5;
8050 // printf("\n");
8052 s->dsp.h264_idct_add(ref, block, 4);
8053 /* for(j=0; j<16; j++){
8054 printf("%d ", ref[j]);
8056 printf("\n");*/
8058 for(j=0; j<16; j++){
8059 int diff= FFABS(src[j] - ref[j]);
8061 error+= diff*diff;
8062 max_error= FFMAX(max_error, diff);
8065 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8066 printf("testing quantizer\n");
8067 for(qp=0; qp<52; qp++){
8068 for(i=0; i<16; i++)
8069 src1_block[i]= src2_block[i]= random()%255;
8072 printf("Testing NAL layer\n");
8074 uint8_t bitstream[COUNT];
8075 uint8_t nal[COUNT*2];
8076 H264Context h;
8077 memset(&h, 0, sizeof(H264Context));
8079 for(i=0; i<COUNT; i++){
8080 int zeros= i;
8081 int nal_length;
8082 int consumed;
8083 int out_length;
8084 uint8_t *out;
8085 int j;
8087 for(j=0; j<COUNT; j++){
8088 bitstream[j]= (random() % 255) + 1;
8091 for(j=0; j<zeros; j++){
8092 int pos= random() % COUNT;
8093 while(bitstream[pos] == 0){
8094 pos++;
8095 pos %= COUNT;
8097 bitstream[pos]=0;
8100 START_TIMER
8102 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8103 if(nal_length<0){
8104 printf("encoding failed\n");
8105 return -1;
8108 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8110 STOP_TIMER("NAL")
8112 if(out_length != COUNT){
8113 printf("incorrect length %d %d\n", out_length, COUNT);
8114 return -1;
8117 if(consumed != nal_length){
8118 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8119 return -1;
8122 if(memcmp(bitstream, out, COUNT)){
8123 printf("mismatch\n");
8124 return -1;
8127 #endif
8129 printf("Testing RBSP\n");
8132 return 0;
8134 #endif /* TEST */
8137 av_cold void ff_h264_free_context(H264Context *h)
8139 int i;
8141 free_tables(h); //FIXME cleanup init stuff perhaps
8143 for(i = 0; i < MAX_SPS_COUNT; i++)
8144 av_freep(h->sps_buffers + i);
8146 for(i = 0; i < MAX_PPS_COUNT; i++)
8147 av_freep(h->pps_buffers + i);
8150 static av_cold int decode_end(AVCodecContext *avctx)
8152 H264Context *h = avctx->priv_data;
8153 MpegEncContext *s = &h->s;
8155 ff_h264_free_context(h);
8157 MPV_common_end(s);
8159 // memset(h, 0, sizeof(H264Context));
8161 return 0;
8165 AVCodec h264_decoder = {
8166 "h264",
8167 CODEC_TYPE_VIDEO,
8168 CODEC_ID_H264,
8169 sizeof(H264Context),
8170 decode_init,
8171 NULL,
8172 decode_end,
8173 decode_frame,
8174 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8175 .flush= flush_dpb,
8176 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8177 .pix_fmts= ff_hwaccel_pixfmt_list_420,
8180 #if CONFIG_H264_VDPAU_DECODER
8181 AVCodec h264_vdpau_decoder = {
8182 "h264_vdpau",
8183 CODEC_TYPE_VIDEO,
8184 CODEC_ID_H264,
8185 sizeof(H264Context),
8186 decode_init,
8187 NULL,
8188 decode_end,
8189 decode_frame,
8190 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8191 .flush= flush_dpb,
8192 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8193 .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
8195 #endif
8197 #if CONFIG_SVQ3_DECODER
8198 #include "svq3.c"
8199 #endif