Check if there is enough bytes before reading the buffer in the EA ADPCM
[ffmpeg-lucabe.git] / libavcodec / h264.c
blobb45a249f596e3ec6710f667faee89f328a75d62f
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "internal.h"
29 #include "dsputil.h"
30 #include "avcodec.h"
31 #include "mpegvideo.h"
32 #include "h264.h"
33 #include "h264data.h"
34 #include "h264_parser.h"
35 #include "golomb.h"
36 #include "mathops.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
40 #include "cabac.h"
41 #if ARCH_X86
42 #include "x86/h264_i386.h"
43 #endif
45 //#undef NDEBUG
46 #include <assert.h>
48 /**
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc[4];
55 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size = 256;
62 static VLC total_zeros_vlc[15];
63 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64 static const int total_zeros_vlc_tables_size = 512;
66 static VLC chroma_dc_total_zeros_vlc[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
70 static VLC run_vlc[6];
71 static VLC_TYPE run_vlc_tables[6][8][2];
72 static const int run_vlc_tables_size = 8;
74 static VLC run7_vlc;
75 static VLC_TYPE run7_vlc_table[96][2];
76 static const int run7_vlc_table_size = 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static Picture * remove_long(H264Context *h, int i, int ref_mask);
84 static av_always_inline uint32_t pack16to32(int a, int b){
85 #ifdef WORDS_BIGENDIAN
86 return (b&0xFFFF) + (a<<16);
87 #else
88 return (a&0xFFFF) + (b<<16);
89 #endif
92 static const uint8_t rem6[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options[4][8]={
101 {0,1,2,3,7,10,8,11},
102 {2,2,3,3,8,11,8,11},
103 {0,0,1,1,7,10,7,10},
104 {0,2,0,2,7,10,7,10}
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111 MpegEncContext * const s = &h->s;
112 const int mb_xy= h->mb_xy;
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
115 const uint8_t * left_block;
116 int topleft_partition= -1;
117 int i;
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
123 return;
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
131 left_block = left_block_options[0];
132 if(FRAME_MBAFF){
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
142 const int bottom = (s->mb_y & 1);
143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146 top_xy -= s->mb_stride;
148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149 topleft_xy -= s->mb_stride;
150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151 topleft_xy += s->mb_stride;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition = 0;
155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156 topright_xy -= s->mb_stride;
158 if (left_mb_field_flag != curr_mb_field_flag) {
159 left_xy[1] = left_xy[0] = pair_xy - 1;
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
163 } else {
164 left_block= left_block_options[2 - bottom];
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
172 if(for_deblock){
173 topleft_type = 0;
174 topright_type = 0;
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
179 if(MB_MBAFF && !IS_INTRA(mb_type)){
180 int list;
181 for(list=0; list<h->list_count; list++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type,list)){
186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
189 ref += h->b8_stride;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 }else{
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
202 if(IS_INTRA(mb_type)){
203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204 h->topleft_samples_available=
205 h->top_samples_available=
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
209 if(!(top_type & type_mask)){
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
216 if(!(left_type[0] & type_mask)){
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
220 if(!(left_type[1] & type_mask)){
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
224 }else{
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
233 }else{
234 if(!(left_type[0] & type_mask)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(topleft_type & type_mask))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!(topright_type & type_mask))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252 }else{
253 int pred;
254 if(!(top_type & type_mask))
255 pred= -1;
256 else{
257 pred= 2;
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
264 for(i=0; i<2; i++){
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268 }else{
269 int pred;
270 if(!(left_type[i] & type_mask))
271 pred= -1;
272 else{
273 pred= 2;
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
285 0 . T T. T T T T
286 1 L . .L . . . .
287 2 L . .L . . . .
288 3 . T TL . . . .
289 4 L . .L . . . .
290 5 L . .. . . . .
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
293 if(top_type){
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
305 }else{
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
320 if(left_type[i]){
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325 }else{
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
333 if( h->pps.cabac ) {
334 // top_cbp
335 if(top_type) {
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
338 h->top_cbp = 0x1C0;
339 } else {
340 h->top_cbp = 0;
342 // left_cbp
343 if (left_type[0]) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
346 h->left_cbp = 0x1C0;
347 } else {
348 h->left_cbp = 0;
350 if (left_type[0]) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
353 if (left_type[1]) {
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
358 #if 1
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360 int list;
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
368 continue;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 }else{
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
391 for(i=0; i<2; i++){
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 }else{
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
409 continue;
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 }else{
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 }else{
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
432 continue;
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
445 if( h->pps.cabac ) {
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 }else{
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 }else{
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 }else{
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type_nos == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 }else{
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 else
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 else
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
510 if(FRAME_MBAFF){
511 #define MAP_MVS\
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 if(MB_FIELD){
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
529 MAP_MVS
530 #undef MAP_F2F
531 }else{
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
538 MAP_MVS
539 #undef MAP_F2F
544 #endif
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 const int mb_xy= h->mb_xy;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
568 int i;
570 if(!(h->top_samples_available&0x8000)){
571 for(i=0; i<4; i++){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573 if(status<0){
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
575 return -1;
576 } else if(status){
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
584 for(i=0; i<4; i++){
585 if(!(h->left_samples_available&mask[i])){
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587 if(status<0){
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
589 return -1;
590 } else if(status){
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
597 return 0;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
608 if(mode > 6U) {
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
610 return -1;
613 if(!(h->top_samples_available&0x8000)){
614 mode= top[ mode ];
615 if(mode<0){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
617 return -1;
621 if((h->left_samples_available&0x8080) != 0x8080){
622 mode= left[ mode ];
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
626 if(mode<0){
627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
628 return -1;
632 return mode;
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
646 if(min<0) return DC_PRED;
647 else return min;
650 static inline void write_back_non_zero_count(H264Context *h){
651 const int mb_xy= h->mb_xy;
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
678 int i= left + top;
680 if(i<64) i= (i+1)>>1;
682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
684 return i&31;
687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689 MpegEncContext *s = &h->s;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
693 if(FRAME_MBAFF){
694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695 const int16_t *mv;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
699 if(!MB_FIELD
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 if(!MB_FIELD
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
723 if(MB_FIELD
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 && i >= scan8[0]+8){
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
730 #undef SET_DIAG_MV
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
735 return topright_ref;
736 }else{
737 tprintf(s->avctx, "topright MV not available\n");
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757 const int16_t * C;
758 int diagonal_ref, match_count;
760 assert(part_width==1 || part_width==2 || part_width==4);
762 /* mv_cache
763 B . . A T T T T
764 U . . L . . , .
765 U . . L . . . .
766 U . . L . . , .
767 . . . L . . . .
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
777 if(left_ref==ref){
778 *mx= A[0];
779 *my= A[1];
780 }else if(top_ref==ref){
781 *mx= B[0];
782 *my= B[1];
783 }else{
784 *mx= C[0];
785 *my= C[1];
787 }else{
788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
789 *mx= A[0];
790 *my= A[1];
791 }else{
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807 if(n==0){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
813 if(top_ref == ref){
814 *mx= B[0];
815 *my= B[1];
816 return;
818 }else{
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
824 if(left_ref == ref){
825 *mx= A[0];
826 *my= A[1];
827 return;
831 //RARE
832 pred_motion(h, n, 4, list, ref, mx, my);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 if(n==0){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
848 if(left_ref == ref){
849 *mx= A[0];
850 *my= A[1];
851 return;
853 }else{
854 const int16_t * C;
855 int diagonal_ref;
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861 if(diagonal_ref == ref){
862 *mx= C[0];
863 *my= C[1];
864 return;
868 //RARE
869 pred_motion(h, n, 2, list, ref, mx, my);
872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
882 *mx = *my = 0;
883 return;
886 pred_motion(h, 0, 4, 0, 0, mx, my);
888 return;
891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
895 return 256;
896 }else{
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context * const h){
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906 const int poc1 = h->ref_list[1][0].poc;
907 int i, field;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
935 if (!interl)
936 poc |= 3;
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944 if(rfield == field)
945 map[list][old_ref] = cur_ref;
946 break;
953 static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
957 int list, j, field;
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
961 for(list=0; list<2; list++){
962 cur->ref_count[sidx][list] = h->ref_count[list];
963 for(j=0; j<h->ref_count[list]; j++)
964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
967 if(s->picture_structure == PICT_FRAME){
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
972 cur->mbaff= FRAME_MBAFF;
974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
975 return;
977 for(list=0; list<2; list++){
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
989 int mb_type_col[2];
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
992 const int is_b8x8 = IS_8X8(*mb_type);
993 unsigned int sub_mb_type;
994 int i8, i4;
996 assert(h->ref_list[1][0].reference&3);
998 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1000 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1001 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1002 int cur_poc = s->current_picture_ptr->poc;
1003 int *col_poc = h->ref_list[1]->field_poc;
1004 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1005 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1006 b8_stride = 0;
1007 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1008 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1009 mb_xy += s->mb_stride*fieldoff;
1011 goto single_col;
1012 }else{ // AFL/AFR/FR/FL -> AFR/FR
1013 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1014 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1015 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1016 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1017 b8_stride *= 3;
1018 b4_stride *= 6;
1019 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1020 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1021 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1022 && !is_b8x8){
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1025 }else{
1026 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1027 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1029 }else{ // AFR/FR -> AFR/FR
1030 single_col:
1031 mb_type_col[0] =
1032 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1033 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1034 /* FIXME save sub mb types from previous frames (or derive from MVs)
1035 * so we know exactly what block size to use */
1036 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1037 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1038 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1041 }else{
1042 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1043 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1048 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1049 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1050 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1051 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1052 if(!b8_stride){
1053 if(s->mb_y&1){
1054 l1ref0 += h->b8_stride;
1055 l1ref1 += h->b8_stride;
1056 l1mv0 += 2*b4_stride;
1057 l1mv1 += 2*b4_stride;
1061 if(h->direct_spatial_mv_pred){
1062 int ref[2];
1063 int mv[2][2];
1064 int list;
1066 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1068 /* ref = min(neighbors) */
1069 for(list=0; list<2; list++){
1070 int refa = h->ref_cache[list][scan8[0] - 1];
1071 int refb = h->ref_cache[list][scan8[0] - 8];
1072 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1073 if(refc == PART_NOT_AVAILABLE)
1074 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1075 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1076 if(ref[list] < 0)
1077 ref[list] = -1;
1080 if(ref[0] < 0 && ref[1] < 0){
1081 ref[0] = ref[1] = 0;
1082 mv[0][0] = mv[0][1] =
1083 mv[1][0] = mv[1][1] = 0;
1084 }else{
1085 for(list=0; list<2; list++){
1086 if(ref[list] >= 0)
1087 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1088 else
1089 mv[list][0] = mv[list][1] = 0;
1093 if(ref[1] < 0){
1094 if(!is_b8x8)
1095 *mb_type &= ~MB_TYPE_L1;
1096 sub_mb_type &= ~MB_TYPE_L1;
1097 }else if(ref[0] < 0){
1098 if(!is_b8x8)
1099 *mb_type &= ~MB_TYPE_L0;
1100 sub_mb_type &= ~MB_TYPE_L0;
1103 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1104 for(i8=0; i8<4; i8++){
1105 int x8 = i8&1;
1106 int y8 = i8>>1;
1107 int xy8 = x8+y8*b8_stride;
1108 int xy4 = 3*x8+y8*b4_stride;
1109 int a=0, b=0;
1111 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1112 continue;
1113 h->sub_mb_type[i8] = sub_mb_type;
1115 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1116 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1117 if(!IS_INTRA(mb_type_col[y8])
1118 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1119 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1120 if(ref[0] > 0)
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 if(ref[1] > 0)
1123 b= pack16to32(mv[1][0],mv[1][1]);
1124 }else{
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 b= pack16to32(mv[1][0],mv[1][1]);
1128 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1129 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1131 }else if(IS_16X16(*mb_type)){
1132 int a=0, b=0;
1134 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1135 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1136 if(!IS_INTRA(mb_type_col[0])
1137 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1138 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1139 && (h->x264_build>33 || !h->x264_build)))){
1140 if(ref[0] > 0)
1141 a= pack16to32(mv[0][0],mv[0][1]);
1142 if(ref[1] > 0)
1143 b= pack16to32(mv[1][0],mv[1][1]);
1144 }else{
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 b= pack16to32(mv[1][0],mv[1][1]);
1148 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1149 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1150 }else{
1151 for(i8=0; i8<4; i8++){
1152 const int x8 = i8&1;
1153 const int y8 = i8>>1;
1155 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1156 continue;
1157 h->sub_mb_type[i8] = sub_mb_type;
1159 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1160 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1161 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1162 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1164 /* col_zero_flag */
1165 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1166 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1167 && (h->x264_build>33 || !h->x264_build)))){
1168 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1169 if(IS_SUB_8X8(sub_mb_type)){
1170 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1171 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1172 if(ref[0] == 0)
1173 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1174 if(ref[1] == 0)
1175 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1177 }else
1178 for(i4=0; i4<4; i4++){
1179 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1180 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1181 if(ref[0] == 0)
1182 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1183 if(ref[1] == 0)
1184 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1190 }else{ /* direct temporal mv pred */
1191 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1192 const int *dist_scale_factor = h->dist_scale_factor;
1193 int ref_offset= 0;
1195 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1196 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1197 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1198 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1200 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1201 ref_offset += 16;
1203 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1204 /* FIXME assumes direct_8x8_inference == 1 */
1205 int y_shift = 2*!IS_INTERLACED(*mb_type);
1207 for(i8=0; i8<4; i8++){
1208 const int x8 = i8&1;
1209 const int y8 = i8>>1;
1210 int ref0, scale;
1211 const int16_t (*l1mv)[2]= l1mv0;
1213 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1214 continue;
1215 h->sub_mb_type[i8] = sub_mb_type;
1217 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 if(IS_INTRA(mb_type_col[y8])){
1219 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1220 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1222 continue;
1225 ref0 = l1ref0[x8 + y8*b8_stride];
1226 if(ref0 >= 0)
1227 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1228 else{
1229 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1230 l1mv= l1mv1;
1232 scale = dist_scale_factor[ref0];
1233 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1236 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1237 int my_col = (mv_col[1]<<y_shift)/2;
1238 int mx = (scale * mv_col[0] + 128) >> 8;
1239 int my = (scale * my_col + 128) >> 8;
1240 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1241 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1244 return;
1247 /* one-to-one mv scaling */
1249 if(IS_16X16(*mb_type)){
1250 int ref, mv0, mv1;
1252 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1253 if(IS_INTRA(mb_type_col[0])){
1254 ref=mv0=mv1=0;
1255 }else{
1256 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1257 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1258 const int scale = dist_scale_factor[ref0];
1259 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1260 int mv_l0[2];
1261 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1262 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1263 ref= ref0;
1264 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1265 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1267 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1268 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1269 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1270 }else{
1271 for(i8=0; i8<4; i8++){
1272 const int x8 = i8&1;
1273 const int y8 = i8>>1;
1274 int ref0, scale;
1275 const int16_t (*l1mv)[2]= l1mv0;
1277 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1278 continue;
1279 h->sub_mb_type[i8] = sub_mb_type;
1280 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1281 if(IS_INTRA(mb_type_col[0])){
1282 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1283 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1285 continue;
1288 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1289 if(ref0 >= 0)
1290 ref0 = map_col_to_list0[0][ref0];
1291 else{
1292 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1293 l1mv= l1mv1;
1295 scale = dist_scale_factor[ref0];
1297 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1298 if(IS_SUB_8X8(sub_mb_type)){
1299 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1300 int mx = (scale * mv_col[0] + 128) >> 8;
1301 int my = (scale * mv_col[1] + 128) >> 8;
1302 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1303 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1304 }else
1305 for(i4=0; i4<4; i4++){
1306 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1307 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1308 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1309 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1310 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1311 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1318 static inline void write_back_motion(H264Context *h, int mb_type){
1319 MpegEncContext * const s = &h->s;
1320 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1321 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1322 int list;
1324 if(!USES_LIST(mb_type, 0))
1325 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1327 for(list=0; list<h->list_count; list++){
1328 int y;
1329 if(!USES_LIST(mb_type, list))
1330 continue;
1332 for(y=0; y<4; y++){
1333 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1336 if( h->pps.cabac ) {
1337 if(IS_SKIP(mb_type))
1338 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1339 else
1340 for(y=0; y<4; y++){
1341 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1347 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1348 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1349 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1350 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1351 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1355 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1356 if(IS_8X8(mb_type)){
1357 uint8_t *direct_table = &h->direct_table[b8_xy];
1358 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1359 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1360 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1365 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1366 int i, si, di;
1367 uint8_t *dst;
1368 int bufidx;
1370 // src[0]&0x80; //forbidden bit
1371 h->nal_ref_idc= src[0]>>5;
1372 h->nal_unit_type= src[0]&0x1F;
1374 src++; length--;
1375 #if 0
1376 for(i=0; i<length; i++)
1377 printf("%2X ", src[i]);
1378 #endif
1380 #if HAVE_FAST_UNALIGNED
1381 # if HAVE_FAST_64BIT
1382 # define RS 7
1383 for(i=0; i+1<length; i+=9){
1384 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1385 # else
1386 # define RS 3
1387 for(i=0; i+1<length; i+=5){
1388 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1389 # endif
1390 continue;
1391 if(i>0 && !src[i]) i--;
1392 while(src[i]) i++;
1393 #else
1394 # define RS 0
1395 for(i=0; i+1<length; i+=2){
1396 if(src[i]) continue;
1397 if(i>0 && src[i-1]==0) i--;
1398 #endif
1399 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1400 if(src[i+2]!=3){
1401 /* startcode, so we must be past the end */
1402 length=i;
1404 break;
1406 i-= RS;
1409 if(i>=length-1){ //no escaped 0
1410 *dst_length= length;
1411 *consumed= length+1; //+1 for the header
1412 return src;
1415 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1416 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1417 dst= h->rbsp_buffer[bufidx];
1419 if (dst == NULL){
1420 return NULL;
1423 //printf("decoding esc\n");
1424 memcpy(dst, src, i);
1425 si=di=i;
1426 while(si+2<length){
1427 //remove escapes (very rare 1:2^22)
1428 if(src[si+2]>3){
1429 dst[di++]= src[si++];
1430 dst[di++]= src[si++];
1431 }else if(src[si]==0 && src[si+1]==0){
1432 if(src[si+2]==3){ //escape
1433 dst[di++]= 0;
1434 dst[di++]= 0;
1435 si+=3;
1436 continue;
1437 }else //next start code
1438 goto nsc;
1441 dst[di++]= src[si++];
1443 while(si<length)
1444 dst[di++]= src[si++];
1445 nsc:
1447 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1449 *dst_length= di;
1450 *consumed= si + 1;//+1 for the header
1451 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1452 return dst;
1455 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1456 int v= *src;
1457 int r;
1459 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1461 for(r=1; r<9; r++){
1462 if(v&1) return r;
1463 v>>=1;
1465 return 0;
1469 * IDCT transforms the 16 dc values and dequantizes them.
1470 * @param qp quantization parameter
1472 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1473 #define stride 16
1474 int i;
1475 int temp[16]; //FIXME check if this is a good idea
1476 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1477 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1479 //memset(block, 64, 2*256);
1480 //return;
1481 for(i=0; i<4; i++){
1482 const int offset= y_offset[i];
1483 const int z0= block[offset+stride*0] + block[offset+stride*4];
1484 const int z1= block[offset+stride*0] - block[offset+stride*4];
1485 const int z2= block[offset+stride*1] - block[offset+stride*5];
1486 const int z3= block[offset+stride*1] + block[offset+stride*5];
1488 temp[4*i+0]= z0+z3;
1489 temp[4*i+1]= z1+z2;
1490 temp[4*i+2]= z1-z2;
1491 temp[4*i+3]= z0-z3;
1494 for(i=0; i<4; i++){
1495 const int offset= x_offset[i];
1496 const int z0= temp[4*0+i] + temp[4*2+i];
1497 const int z1= temp[4*0+i] - temp[4*2+i];
1498 const int z2= temp[4*1+i] - temp[4*3+i];
1499 const int z3= temp[4*1+i] + temp[4*3+i];
1501 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1502 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1503 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1504 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1508 #if 0
1510 * DCT transforms the 16 dc values.
1511 * @param qp quantization parameter ??? FIXME
1513 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1514 // const int qmul= dequant_coeff[qp][0];
1515 int i;
1516 int temp[16]; //FIXME check if this is a good idea
1517 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1518 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1520 for(i=0; i<4; i++){
1521 const int offset= y_offset[i];
1522 const int z0= block[offset+stride*0] + block[offset+stride*4];
1523 const int z1= block[offset+stride*0] - block[offset+stride*4];
1524 const int z2= block[offset+stride*1] - block[offset+stride*5];
1525 const int z3= block[offset+stride*1] + block[offset+stride*5];
1527 temp[4*i+0]= z0+z3;
1528 temp[4*i+1]= z1+z2;
1529 temp[4*i+2]= z1-z2;
1530 temp[4*i+3]= z0-z3;
1533 for(i=0; i<4; i++){
1534 const int offset= x_offset[i];
1535 const int z0= temp[4*0+i] + temp[4*2+i];
1536 const int z1= temp[4*0+i] - temp[4*2+i];
1537 const int z2= temp[4*1+i] - temp[4*3+i];
1538 const int z3= temp[4*1+i] + temp[4*3+i];
1540 block[stride*0 +offset]= (z0 + z3)>>1;
1541 block[stride*2 +offset]= (z1 + z2)>>1;
1542 block[stride*8 +offset]= (z1 - z2)>>1;
1543 block[stride*10+offset]= (z0 - z3)>>1;
1546 #endif
1548 #undef xStride
1549 #undef stride
1551 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1552 const int stride= 16*2;
1553 const int xStride= 16;
1554 int a,b,c,d,e;
1556 a= block[stride*0 + xStride*0];
1557 b= block[stride*0 + xStride*1];
1558 c= block[stride*1 + xStride*0];
1559 d= block[stride*1 + xStride*1];
1561 e= a-b;
1562 a= a+b;
1563 b= c-d;
1564 c= c+d;
1566 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1567 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1568 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1569 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1572 #if 0
1573 static void chroma_dc_dct_c(DCTELEM *block){
1574 const int stride= 16*2;
1575 const int xStride= 16;
1576 int a,b,c,d,e;
1578 a= block[stride*0 + xStride*0];
1579 b= block[stride*0 + xStride*1];
1580 c= block[stride*1 + xStride*0];
1581 d= block[stride*1 + xStride*1];
1583 e= a-b;
1584 a= a+b;
1585 b= c-d;
1586 c= c+d;
1588 block[stride*0 + xStride*0]= (a+c);
1589 block[stride*0 + xStride*1]= (e+b);
1590 block[stride*1 + xStride*0]= (a-c);
1591 block[stride*1 + xStride*1]= (e-b);
1593 #endif
1596 * gets the chroma qp.
1598 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1599 return h->pps.chroma_qp_table[t][qscale];
1602 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1603 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1604 int src_x_offset, int src_y_offset,
1605 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1606 MpegEncContext * const s = &h->s;
1607 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1608 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1609 const int luma_xy= (mx&3) + ((my&3)<<2);
1610 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1611 uint8_t * src_cb, * src_cr;
1612 int extra_width= h->emu_edge_width;
1613 int extra_height= h->emu_edge_height;
1614 int emu=0;
1615 const int full_mx= mx>>2;
1616 const int full_my= my>>2;
1617 const int pic_width = 16*s->mb_width;
1618 const int pic_height = 16*s->mb_height >> MB_FIELD;
1620 if(mx&7) extra_width -= 3;
1621 if(my&7) extra_height -= 3;
1623 if( full_mx < 0-extra_width
1624 || full_my < 0-extra_height
1625 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1626 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1627 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1628 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1629 emu=1;
1632 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1633 if(!square){
1634 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1637 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1639 if(MB_FIELD){
1640 // chroma offset when predicting from a field of opposite parity
1641 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1642 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1644 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1645 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1647 if(emu){
1648 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1649 src_cb= s->edge_emu_buffer;
1651 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1653 if(emu){
1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1655 src_cr= s->edge_emu_buffer;
1657 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1660 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1661 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1662 int x_offset, int y_offset,
1663 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1664 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1665 int list0, int list1){
1666 MpegEncContext * const s = &h->s;
1667 qpel_mc_func *qpix_op= qpix_put;
1668 h264_chroma_mc_func chroma_op= chroma_put;
1670 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1671 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1672 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1673 x_offset += 8*s->mb_x;
1674 y_offset += 8*(s->mb_y >> MB_FIELD);
1676 if(list0){
1677 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1682 qpix_op= qpix_avg;
1683 chroma_op= chroma_avg;
1686 if(list1){
1687 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1688 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1689 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1690 qpix_op, chroma_op);
1694 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1695 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1696 int x_offset, int y_offset,
1697 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1698 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1699 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1700 int list0, int list1){
1701 MpegEncContext * const s = &h->s;
1703 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1704 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1705 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1706 x_offset += 8*s->mb_x;
1707 y_offset += 8*(s->mb_y >> MB_FIELD);
1709 if(list0 && list1){
1710 /* don't optimize for luma-only case, since B-frames usually
1711 * use implicit weights => chroma too. */
1712 uint8_t *tmp_cb = s->obmc_scratchpad;
1713 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1714 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1715 int refn0 = h->ref_cache[0][ scan8[n] ];
1716 int refn1 = h->ref_cache[1][ scan8[n] ];
1718 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1719 dest_y, dest_cb, dest_cr,
1720 x_offset, y_offset, qpix_put, chroma_put);
1721 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1722 tmp_y, tmp_cb, tmp_cr,
1723 x_offset, y_offset, qpix_put, chroma_put);
1725 if(h->use_weight == 2){
1726 int weight0 = h->implicit_weight[refn0][refn1];
1727 int weight1 = 64 - weight0;
1728 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1729 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1731 }else{
1732 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1733 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1734 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1737 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1739 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1740 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1742 }else{
1743 int list = list1 ? 1 : 0;
1744 int refn = h->ref_cache[list][ scan8[n] ];
1745 Picture *ref= &h->ref_list[list][refn];
1746 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1747 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1748 qpix_put, chroma_put);
1750 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1751 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1752 if(h->use_weight_chroma){
1753 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1755 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1756 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1761 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1762 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1763 int x_offset, int y_offset,
1764 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1765 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1766 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1767 int list0, int list1){
1768 if((h->use_weight==2 && list0 && list1
1769 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1770 || h->use_weight==1)
1771 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put,
1773 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1774 else
1775 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1776 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1779 static inline void prefetch_motion(H264Context *h, int list){
1780 /* fetch pixels for estimated mv 4 macroblocks ahead
1781 * optimized for 64byte cache lines */
1782 MpegEncContext * const s = &h->s;
1783 const int refn = h->ref_cache[list][scan8[0]];
1784 if(refn >= 0){
1785 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1786 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1787 uint8_t **src= h->ref_list[list][refn].data;
1788 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1789 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1790 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1791 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1795 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1796 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1797 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1798 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1799 MpegEncContext * const s = &h->s;
1800 const int mb_xy= h->mb_xy;
1801 const int mb_type= s->current_picture.mb_type[mb_xy];
1803 assert(IS_INTER(mb_type));
1805 prefetch_motion(h, 0);
1807 if(IS_16X16(mb_type)){
1808 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1809 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1810 &weight_op[0], &weight_avg[0],
1811 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1812 }else if(IS_16X8(mb_type)){
1813 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1815 &weight_op[1], &weight_avg[1],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1818 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1819 &weight_op[1], &weight_avg[1],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 }else if(IS_8X16(mb_type)){
1822 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1823 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1824 &weight_op[2], &weight_avg[2],
1825 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1826 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1827 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1828 &weight_op[2], &weight_avg[2],
1829 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1830 }else{
1831 int i;
1833 assert(IS_8X8(mb_type));
1835 for(i=0; i<4; i++){
1836 const int sub_mb_type= h->sub_mb_type[i];
1837 const int n= 4*i;
1838 int x_offset= (i&1)<<2;
1839 int y_offset= (i&2)<<1;
1841 if(IS_SUB_8X8(sub_mb_type)){
1842 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1843 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1844 &weight_op[3], &weight_avg[3],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_8X4(sub_mb_type)){
1847 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1849 &weight_op[4], &weight_avg[4],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1852 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1853 &weight_op[4], &weight_avg[4],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1855 }else if(IS_SUB_4X8(sub_mb_type)){
1856 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1857 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1858 &weight_op[5], &weight_avg[5],
1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1860 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1862 &weight_op[5], &weight_avg[5],
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1864 }else{
1865 int j;
1866 assert(IS_SUB_4X4(sub_mb_type));
1867 for(j=0; j<4; j++){
1868 int sub_x_offset= x_offset + 2*(j&1);
1869 int sub_y_offset= y_offset + (j&2);
1870 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1871 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1872 &weight_op[6], &weight_avg[6],
1873 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1879 prefetch_motion(h, 1);
1882 static av_cold void init_cavlc_level_tab(void){
1883 int suffix_length, mask;
1884 unsigned int i;
1886 for(suffix_length=0; suffix_length<7; suffix_length++){
1887 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1888 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1889 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1891 mask= -(level_code&1);
1892 level_code= (((2+level_code)>>1) ^ mask) - mask;
1893 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1894 cavlc_level_tab[suffix_length][i][0]= level_code;
1895 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1896 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1897 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1898 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1899 }else{
1900 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1901 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1907 static av_cold void decode_init_vlc(void){
1908 static int done = 0;
1910 if (!done) {
1911 int i;
1912 int offset;
1913 done = 1;
1915 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1916 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1917 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1918 &chroma_dc_coeff_token_len [0], 1, 1,
1919 &chroma_dc_coeff_token_bits[0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
1922 offset = 0;
1923 for(i=0; i<4; i++){
1924 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1925 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1926 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1927 &coeff_token_len [i][0], 1, 1,
1928 &coeff_token_bits[i][0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC);
1930 offset += coeff_token_vlc_tables_size[i];
1933 * This is a one time safety check to make sure that
1934 * the packed static coeff_token_vlc table sizes
1935 * were initialized correctly.
1937 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1939 for(i=0; i<3; i++){
1940 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1941 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1942 init_vlc(&chroma_dc_total_zeros_vlc[i],
1943 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1944 &chroma_dc_total_zeros_len [i][0], 1, 1,
1945 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1946 INIT_VLC_USE_NEW_STATIC);
1948 for(i=0; i<15; i++){
1949 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1950 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1951 init_vlc(&total_zeros_vlc[i],
1952 TOTAL_ZEROS_VLC_BITS, 16,
1953 &total_zeros_len [i][0], 1, 1,
1954 &total_zeros_bits[i][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC);
1958 for(i=0; i<6; i++){
1959 run_vlc[i].table = run_vlc_tables[i];
1960 run_vlc[i].table_allocated = run_vlc_tables_size;
1961 init_vlc(&run_vlc[i],
1962 RUN_VLC_BITS, 7,
1963 &run_len [i][0], 1, 1,
1964 &run_bits[i][0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
1967 run7_vlc.table = run7_vlc_table,
1968 run7_vlc.table_allocated = run7_vlc_table_size;
1969 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1970 &run_len [6][0], 1, 1,
1971 &run_bits[6][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC);
1974 init_cavlc_level_tab();
1978 static void free_tables(H264Context *h){
1979 int i;
1980 H264Context *hx;
1981 av_freep(&h->intra4x4_pred_mode);
1982 av_freep(&h->chroma_pred_mode_table);
1983 av_freep(&h->cbp_table);
1984 av_freep(&h->mvd_table[0]);
1985 av_freep(&h->mvd_table[1]);
1986 av_freep(&h->direct_table);
1987 av_freep(&h->non_zero_count);
1988 av_freep(&h->slice_table_base);
1989 h->slice_table= NULL;
1991 av_freep(&h->mb2b_xy);
1992 av_freep(&h->mb2b8_xy);
1994 for(i = 0; i < MAX_THREADS; i++) {
1995 hx = h->thread_context[i];
1996 if(!hx) continue;
1997 av_freep(&hx->top_borders[1]);
1998 av_freep(&hx->top_borders[0]);
1999 av_freep(&hx->s.obmc_scratchpad);
2003 static void init_dequant8_coeff_table(H264Context *h){
2004 int i,q,x;
2005 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2006 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2007 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2009 for(i=0; i<2; i++ ){
2010 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2011 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2012 break;
2015 for(q=0; q<52; q++){
2016 int shift = div6[q];
2017 int idx = rem6[q];
2018 for(x=0; x<64; x++)
2019 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2020 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2021 h->pps.scaling_matrix8[i][x]) << shift;
2026 static void init_dequant4_coeff_table(H264Context *h){
2027 int i,j,q,x;
2028 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2029 for(i=0; i<6; i++ ){
2030 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2031 for(j=0; j<i; j++){
2032 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2033 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2034 break;
2037 if(j<i)
2038 continue;
2040 for(q=0; q<52; q++){
2041 int shift = div6[q] + 2;
2042 int idx = rem6[q];
2043 for(x=0; x<16; x++)
2044 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2045 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2046 h->pps.scaling_matrix4[i][x]) << shift;
2051 static void init_dequant_tables(H264Context *h){
2052 int i,x;
2053 init_dequant4_coeff_table(h);
2054 if(h->pps.transform_8x8_mode)
2055 init_dequant8_coeff_table(h);
2056 if(h->sps.transform_bypass){
2057 for(i=0; i<6; i++)
2058 for(x=0; x<16; x++)
2059 h->dequant4_coeff[i][0][x] = 1<<6;
2060 if(h->pps.transform_8x8_mode)
2061 for(i=0; i<2; i++)
2062 for(x=0; x<64; x++)
2063 h->dequant8_coeff[i][0][x] = 1<<6;
2069 * allocates tables.
2070 * needs width/height
2072 static int alloc_tables(H264Context *h){
2073 MpegEncContext * const s = &h->s;
2074 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2075 int x,y;
2077 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2079 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2080 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2081 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2083 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2084 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2085 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2086 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2088 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2089 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2091 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2092 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2093 for(y=0; y<s->mb_height; y++){
2094 for(x=0; x<s->mb_width; x++){
2095 const int mb_xy= x + y*s->mb_stride;
2096 const int b_xy = 4*x + 4*y*h->b_stride;
2097 const int b8_xy= 2*x + 2*y*h->b8_stride;
2099 h->mb2b_xy [mb_xy]= b_xy;
2100 h->mb2b8_xy[mb_xy]= b8_xy;
2104 s->obmc_scratchpad = NULL;
2106 if(!h->dequant4_coeff[0])
2107 init_dequant_tables(h);
2109 return 0;
2110 fail:
2111 free_tables(h);
2112 return -1;
2116 * Mimic alloc_tables(), but for every context thread.
2118 static void clone_tables(H264Context *dst, H264Context *src){
2119 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2120 dst->non_zero_count = src->non_zero_count;
2121 dst->slice_table = src->slice_table;
2122 dst->cbp_table = src->cbp_table;
2123 dst->mb2b_xy = src->mb2b_xy;
2124 dst->mb2b8_xy = src->mb2b8_xy;
2125 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2126 dst->mvd_table[0] = src->mvd_table[0];
2127 dst->mvd_table[1] = src->mvd_table[1];
2128 dst->direct_table = src->direct_table;
2130 dst->s.obmc_scratchpad = NULL;
2131 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2135 * Init context
2136 * Allocate buffers which are not shared amongst multiple threads.
2138 static int context_init(H264Context *h){
2139 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2140 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2142 return 0;
2143 fail:
2144 return -1; // free_tables will clean up for us
2147 static av_cold void common_init(H264Context *h){
2148 MpegEncContext * const s = &h->s;
2150 s->width = s->avctx->width;
2151 s->height = s->avctx->height;
2152 s->codec_id= s->avctx->codec->id;
2154 ff_h264_pred_init(&h->hpc, s->codec_id);
2156 h->dequant_coeff_pps= -1;
2157 s->unrestricted_mv=1;
2158 s->decode=1; //FIXME
2160 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2162 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2163 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2167 * Reset SEI values at the beginning of the frame.
2169 * @param h H.264 context.
2171 static void reset_sei(H264Context *h) {
2172 h->sei_recovery_frame_cnt = -1;
2173 h->sei_dpb_output_delay = 0;
2174 h->sei_cpb_removal_delay = -1;
2175 h->sei_buffering_period_present = 0;
2178 static av_cold int decode_init(AVCodecContext *avctx){
2179 H264Context *h= avctx->priv_data;
2180 MpegEncContext * const s = &h->s;
2182 MPV_decode_defaults(s);
2184 s->avctx = avctx;
2185 common_init(h);
2187 s->out_format = FMT_H264;
2188 s->workaround_bugs= avctx->workaround_bugs;
2190 // set defaults
2191 // s->decode_mb= ff_h263_decode_mb;
2192 s->quarter_sample = 1;
2193 if(!avctx->has_b_frames)
2194 s->low_delay= 1;
2196 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2197 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2198 else
2199 avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
2200 avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
2202 decode_init_vlc();
2204 if(avctx->extradata_size > 0 && avctx->extradata &&
2205 *(char *)avctx->extradata == 1){
2206 h->is_avc = 1;
2207 h->got_avcC = 0;
2208 } else {
2209 h->is_avc = 0;
2212 h->thread_context[0] = h;
2213 h->outputed_poc = INT_MIN;
2214 h->prev_poc_msb= 1<<16;
2215 reset_sei(h);
2216 if(avctx->codec_id == CODEC_ID_H264){
2217 if(avctx->ticks_per_frame == 1){
2218 s->avctx->time_base.den *=2;
2220 avctx->ticks_per_frame = 2;
2222 return 0;
2225 static int frame_start(H264Context *h){
2226 MpegEncContext * const s = &h->s;
2227 int i;
2229 if(MPV_frame_start(s, s->avctx) < 0)
2230 return -1;
2231 ff_er_frame_start(s);
2233 * MPV_frame_start uses pict_type to derive key_frame.
2234 * This is incorrect for H.264; IDR markings must be used.
2235 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2236 * See decode_nal_units().
2238 s->current_picture_ptr->key_frame= 0;
2240 assert(s->linesize && s->uvlinesize);
2242 for(i=0; i<16; i++){
2243 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2244 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2246 for(i=0; i<4; i++){
2247 h->block_offset[16+i]=
2248 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2249 h->block_offset[24+16+i]=
2250 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2253 /* can't be in alloc_tables because linesize isn't known there.
2254 * FIXME: redo bipred weight to not require extra buffer? */
2255 for(i = 0; i < s->avctx->thread_count; i++)
2256 if(!h->thread_context[i]->s.obmc_scratchpad)
2257 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2259 /* some macroblocks will be accessed before they're available */
2260 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2261 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2263 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2265 // We mark the current picture as non-reference after allocating it, so
2266 // that if we break out due to an error it can be released automatically
2267 // in the next MPV_frame_start().
2268 // SVQ3 as well as most other codecs have only last/next/current and thus
2269 // get released even with set reference, besides SVQ3 and others do not
2270 // mark frames as reference later "naturally".
2271 if(s->codec_id != CODEC_ID_SVQ3)
2272 s->current_picture_ptr->reference= 0;
2274 s->current_picture_ptr->field_poc[0]=
2275 s->current_picture_ptr->field_poc[1]= INT_MAX;
2276 assert(s->current_picture_ptr->long_ref==0);
2278 return 0;
2281 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2282 MpegEncContext * const s = &h->s;
2283 int i;
2284 int step = 1;
2285 int offset = 1;
2286 int uvoffset= 1;
2287 int top_idx = 1;
2288 int skiplast= 0;
2290 src_y -= linesize;
2291 src_cb -= uvlinesize;
2292 src_cr -= uvlinesize;
2294 if(!simple && FRAME_MBAFF){
2295 if(s->mb_y&1){
2296 offset = MB_MBAFF ? 1 : 17;
2297 uvoffset= MB_MBAFF ? 1 : 9;
2298 if(!MB_MBAFF){
2299 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2300 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2301 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2302 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2306 }else{
2307 if(!MB_MBAFF){
2308 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2309 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2310 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2311 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2313 skiplast= 1;
2315 offset =
2316 uvoffset=
2317 top_idx = MB_MBAFF ? 0 : 1;
2319 step= MB_MBAFF ? 2 : 1;
2322 // There are two lines saved, the line above the the top macroblock of a pair,
2323 // and the line above the bottom macroblock
2324 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2325 for(i=1; i<17 - skiplast; i++){
2326 h->left_border[offset+i*step]= src_y[15+i* linesize];
2329 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2330 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2332 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2333 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2334 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2335 for(i=1; i<9 - skiplast; i++){
2336 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2337 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2339 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2340 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2344 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2345 MpegEncContext * const s = &h->s;
2346 int temp8, i;
2347 uint64_t temp64;
2348 int deblock_left;
2349 int deblock_top;
2350 int mb_xy;
2351 int step = 1;
2352 int offset = 1;
2353 int uvoffset= 1;
2354 int top_idx = 1;
2356 if(!simple && FRAME_MBAFF){
2357 if(s->mb_y&1){
2358 offset = MB_MBAFF ? 1 : 17;
2359 uvoffset= MB_MBAFF ? 1 : 9;
2360 }else{
2361 offset =
2362 uvoffset=
2363 top_idx = MB_MBAFF ? 0 : 1;
2365 step= MB_MBAFF ? 2 : 1;
2368 if(h->deblocking_filter == 2) {
2369 mb_xy = h->mb_xy;
2370 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2371 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2372 } else {
2373 deblock_left = (s->mb_x > 0);
2374 deblock_top = (s->mb_y > !!MB_FIELD);
2377 src_y -= linesize + 1;
2378 src_cb -= uvlinesize + 1;
2379 src_cr -= uvlinesize + 1;
2381 #define XCHG(a,b,t,xchg)\
2382 t= a;\
2383 if(xchg)\
2384 a= b;\
2385 b= t;
2387 if(deblock_left){
2388 for(i = !deblock_top; i<16; i++){
2389 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2391 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2394 if(deblock_top){
2395 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2396 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2397 if(s->mb_x+1 < s->mb_width){
2398 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2402 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2403 if(deblock_left){
2404 for(i = !deblock_top; i<8; i++){
2405 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2406 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2408 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2409 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2411 if(deblock_top){
2412 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2413 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2418 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2419 MpegEncContext * const s = &h->s;
2420 const int mb_x= s->mb_x;
2421 const int mb_y= s->mb_y;
2422 const int mb_xy= h->mb_xy;
2423 const int mb_type= s->current_picture.mb_type[mb_xy];
2424 uint8_t *dest_y, *dest_cb, *dest_cr;
2425 int linesize, uvlinesize /*dct_offset*/;
2426 int i;
2427 int *block_offset = &h->block_offset[0];
2428 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2429 /* is_h264 should always be true if SVQ3 is disabled. */
2430 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2431 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2432 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2434 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2435 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2436 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2438 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2439 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2441 if (!simple && MB_FIELD) {
2442 linesize = h->mb_linesize = s->linesize * 2;
2443 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2444 block_offset = &h->block_offset[24];
2445 if(mb_y&1){ //FIXME move out of this function?
2446 dest_y -= s->linesize*15;
2447 dest_cb-= s->uvlinesize*7;
2448 dest_cr-= s->uvlinesize*7;
2450 if(FRAME_MBAFF) {
2451 int list;
2452 for(list=0; list<h->list_count; list++){
2453 if(!USES_LIST(mb_type, list))
2454 continue;
2455 if(IS_16X16(mb_type)){
2456 int8_t *ref = &h->ref_cache[list][scan8[0]];
2457 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2458 }else{
2459 for(i=0; i<16; i+=4){
2460 int ref = h->ref_cache[list][scan8[i]];
2461 if(ref >= 0)
2462 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2467 } else {
2468 linesize = h->mb_linesize = s->linesize;
2469 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2470 // dct_offset = s->linesize * 16;
2473 if (!simple && IS_INTRA_PCM(mb_type)) {
2474 for (i=0; i<16; i++) {
2475 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2477 for (i=0; i<8; i++) {
2478 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2479 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2481 } else {
2482 if(IS_INTRA(mb_type)){
2483 if(h->deblocking_filter)
2484 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2486 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2487 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2488 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2491 if(IS_INTRA4x4(mb_type)){
2492 if(simple || !s->encoding){
2493 if(IS_8x8DCT(mb_type)){
2494 if(transform_bypass){
2495 idct_dc_add =
2496 idct_add = s->dsp.add_pixels8;
2497 }else{
2498 idct_dc_add = s->dsp.h264_idct8_dc_add;
2499 idct_add = s->dsp.h264_idct8_add;
2501 for(i=0; i<16; i+=4){
2502 uint8_t * const ptr= dest_y + block_offset[i];
2503 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2504 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2505 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2506 }else{
2507 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2508 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2509 (h->topright_samples_available<<i)&0x4000, linesize);
2510 if(nnz){
2511 if(nnz == 1 && h->mb[i*16])
2512 idct_dc_add(ptr, h->mb + i*16, linesize);
2513 else
2514 idct_add (ptr, h->mb + i*16, linesize);
2518 }else{
2519 if(transform_bypass){
2520 idct_dc_add =
2521 idct_add = s->dsp.add_pixels4;
2522 }else{
2523 idct_dc_add = s->dsp.h264_idct_dc_add;
2524 idct_add = s->dsp.h264_idct_add;
2526 for(i=0; i<16; i++){
2527 uint8_t * const ptr= dest_y + block_offset[i];
2528 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2530 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2531 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2532 }else{
2533 uint8_t *topright;
2534 int nnz, tr;
2535 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2536 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2537 assert(mb_y || linesize <= block_offset[i]);
2538 if(!topright_avail){
2539 tr= ptr[3 - linesize]*0x01010101;
2540 topright= (uint8_t*) &tr;
2541 }else
2542 topright= ptr + 4 - linesize;
2543 }else
2544 topright= NULL;
2546 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2547 nnz = h->non_zero_count_cache[ scan8[i] ];
2548 if(nnz){
2549 if(is_h264){
2550 if(nnz == 1 && h->mb[i*16])
2551 idct_dc_add(ptr, h->mb + i*16, linesize);
2552 else
2553 idct_add (ptr, h->mb + i*16, linesize);
2554 }else
2555 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2561 }else{
2562 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2563 if(is_h264){
2564 if(!transform_bypass)
2565 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2566 }else
2567 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2569 if(h->deblocking_filter)
2570 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2571 }else if(is_h264){
2572 hl_motion(h, dest_y, dest_cb, dest_cr,
2573 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2574 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2575 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2579 if(!IS_INTRA4x4(mb_type)){
2580 if(is_h264){
2581 if(IS_INTRA16x16(mb_type)){
2582 if(transform_bypass){
2583 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2584 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2585 }else{
2586 for(i=0; i<16; i++){
2587 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2588 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2591 }else{
2592 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2594 }else if(h->cbp&15){
2595 if(transform_bypass){
2596 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2597 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2598 for(i=0; i<16; i+=di){
2599 if(h->non_zero_count_cache[ scan8[i] ]){
2600 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2603 }else{
2604 if(IS_8x8DCT(mb_type)){
2605 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2606 }else{
2607 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2611 }else{
2612 for(i=0; i<16; i++){
2613 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2614 uint8_t * const ptr= dest_y + block_offset[i];
2615 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2621 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2622 uint8_t *dest[2] = {dest_cb, dest_cr};
2623 if(transform_bypass){
2624 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2625 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2626 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2627 }else{
2628 idct_add = s->dsp.add_pixels4;
2629 for(i=16; i<16+8; i++){
2630 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2631 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2634 }else{
2635 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2636 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2637 if(is_h264){
2638 idct_add = s->dsp.h264_idct_add;
2639 idct_dc_add = s->dsp.h264_idct_dc_add;
2640 for(i=16; i<16+8; i++){
2641 if(h->non_zero_count_cache[ scan8[i] ])
2642 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2643 else if(h->mb[i*16])
2644 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2646 }else{
2647 for(i=16; i<16+8; i++){
2648 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2649 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2650 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2657 if(h->cbp || IS_INTRA(mb_type))
2658 s->dsp.clear_blocks(h->mb);
2660 if(h->deblocking_filter) {
2661 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2662 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2663 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2664 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2665 if (!simple && FRAME_MBAFF) {
2666 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2667 } else {
2668 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2674 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2676 static void hl_decode_mb_simple(H264Context *h){
2677 hl_decode_mb_internal(h, 1);
2681 * Process a macroblock; this handles edge cases, such as interlacing.
2683 static void av_noinline hl_decode_mb_complex(H264Context *h){
2684 hl_decode_mb_internal(h, 0);
2687 static void hl_decode_mb(H264Context *h){
2688 MpegEncContext * const s = &h->s;
2689 const int mb_xy= h->mb_xy;
2690 const int mb_type= s->current_picture.mb_type[mb_xy];
2691 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2693 if (is_complex)
2694 hl_decode_mb_complex(h);
2695 else hl_decode_mb_simple(h);
2698 static void pic_as_field(Picture *pic, const int parity){
2699 int i;
2700 for (i = 0; i < 4; ++i) {
2701 if (parity == PICT_BOTTOM_FIELD)
2702 pic->data[i] += pic->linesize[i];
2703 pic->reference = parity;
2704 pic->linesize[i] *= 2;
2706 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2709 static int split_field_copy(Picture *dest, Picture *src,
2710 int parity, int id_add){
2711 int match = !!(src->reference & parity);
2713 if (match) {
2714 *dest = *src;
2715 if(parity != PICT_FRAME){
2716 pic_as_field(dest, parity);
2717 dest->pic_id *= 2;
2718 dest->pic_id += id_add;
2722 return match;
2725 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2726 int i[2]={0};
2727 int index=0;
2729 while(i[0]<len || i[1]<len){
2730 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2731 i[0]++;
2732 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2733 i[1]++;
2734 if(i[0] < len){
2735 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2736 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2738 if(i[1] < len){
2739 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2740 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2744 return index;
2747 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2748 int i, best_poc;
2749 int out_i= 0;
2751 for(;;){
2752 best_poc= dir ? INT_MIN : INT_MAX;
2754 for(i=0; i<len; i++){
2755 const int poc= src[i]->poc;
2756 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2757 best_poc= poc;
2758 sorted[out_i]= src[i];
2761 if(best_poc == (dir ? INT_MIN : INT_MAX))
2762 break;
2763 limit= sorted[out_i++]->poc - dir;
2765 return out_i;
2769 * fills the default_ref_list.
2771 static int fill_default_ref_list(H264Context *h){
2772 MpegEncContext * const s = &h->s;
2773 int i, len;
2775 if(h->slice_type_nos==FF_B_TYPE){
2776 Picture *sorted[32];
2777 int cur_poc, list;
2778 int lens[2];
2780 if(FIELD_PICTURE)
2781 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2782 else
2783 cur_poc= s->current_picture_ptr->poc;
2785 for(list= 0; list<2; list++){
2786 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2787 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2788 assert(len<=32);
2789 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2790 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2791 assert(len<=32);
2793 if(len < h->ref_count[list])
2794 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2795 lens[list]= len;
2798 if(lens[0] == lens[1] && lens[1] > 1){
2799 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2800 if(i == lens[0])
2801 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2803 }else{
2804 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2805 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2806 assert(len <= 32);
2807 if(len < h->ref_count[0])
2808 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2810 #ifdef TRACE
2811 for (i=0; i<h->ref_count[0]; i++) {
2812 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2814 if(h->slice_type_nos==FF_B_TYPE){
2815 for (i=0; i<h->ref_count[1]; i++) {
2816 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2819 #endif
2820 return 0;
2823 static void print_short_term(H264Context *h);
2824 static void print_long_term(H264Context *h);
2827 * Extract structure information about the picture described by pic_num in
2828 * the current decoding context (frame or field). Note that pic_num is
2829 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2830 * @param pic_num picture number for which to extract structure information
2831 * @param structure one of PICT_XXX describing structure of picture
2832 * with pic_num
2833 * @return frame number (short term) or long term index of picture
2834 * described by pic_num
2836 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2837 MpegEncContext * const s = &h->s;
2839 *structure = s->picture_structure;
2840 if(FIELD_PICTURE){
2841 if (!(pic_num & 1))
2842 /* opposite field */
2843 *structure ^= PICT_FRAME;
2844 pic_num >>= 1;
2847 return pic_num;
2850 static int decode_ref_pic_list_reordering(H264Context *h){
2851 MpegEncContext * const s = &h->s;
2852 int list, index, pic_structure;
2854 print_short_term(h);
2855 print_long_term(h);
2857 for(list=0; list<h->list_count; list++){
2858 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2860 if(get_bits1(&s->gb)){
2861 int pred= h->curr_pic_num;
2863 for(index=0; ; index++){
2864 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2865 unsigned int pic_id;
2866 int i;
2867 Picture *ref = NULL;
2869 if(reordering_of_pic_nums_idc==3)
2870 break;
2872 if(index >= h->ref_count[list]){
2873 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2874 return -1;
2877 if(reordering_of_pic_nums_idc<3){
2878 if(reordering_of_pic_nums_idc<2){
2879 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2880 int frame_num;
2882 if(abs_diff_pic_num > h->max_pic_num){
2883 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2884 return -1;
2887 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2888 else pred+= abs_diff_pic_num;
2889 pred &= h->max_pic_num - 1;
2891 frame_num = pic_num_extract(h, pred, &pic_structure);
2893 for(i= h->short_ref_count-1; i>=0; i--){
2894 ref = h->short_ref[i];
2895 assert(ref->reference);
2896 assert(!ref->long_ref);
2898 ref->frame_num == frame_num &&
2899 (ref->reference & pic_structure)
2901 break;
2903 if(i>=0)
2904 ref->pic_id= pred;
2905 }else{
2906 int long_idx;
2907 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2909 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2911 if(long_idx>31){
2912 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2913 return -1;
2915 ref = h->long_ref[long_idx];
2916 assert(!(ref && !ref->reference));
2917 if(ref && (ref->reference & pic_structure)){
2918 ref->pic_id= pic_id;
2919 assert(ref->long_ref);
2920 i=0;
2921 }else{
2922 i=-1;
2926 if (i < 0) {
2927 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2928 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2929 } else {
2930 for(i=index; i+1<h->ref_count[list]; i++){
2931 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2932 break;
2934 for(; i > index; i--){
2935 h->ref_list[list][i]= h->ref_list[list][i-1];
2937 h->ref_list[list][index]= *ref;
2938 if (FIELD_PICTURE){
2939 pic_as_field(&h->ref_list[list][index], pic_structure);
2942 }else{
2943 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2944 return -1;
2949 for(list=0; list<h->list_count; list++){
2950 for(index= 0; index < h->ref_count[list]; index++){
2951 if(!h->ref_list[list][index].data[0]){
2952 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2953 if(h->default_ref_list[list][0].data[0])
2954 h->ref_list[list][index]= h->default_ref_list[list][0];
2955 else
2956 return -1;
2961 return 0;
2964 static void fill_mbaff_ref_list(H264Context *h){
2965 int list, i, j;
2966 for(list=0; list<2; list++){ //FIXME try list_count
2967 for(i=0; i<h->ref_count[list]; i++){
2968 Picture *frame = &h->ref_list[list][i];
2969 Picture *field = &h->ref_list[list][16+2*i];
2970 field[0] = *frame;
2971 for(j=0; j<3; j++)
2972 field[0].linesize[j] <<= 1;
2973 field[0].reference = PICT_TOP_FIELD;
2974 field[0].poc= field[0].field_poc[0];
2975 field[1] = field[0];
2976 for(j=0; j<3; j++)
2977 field[1].data[j] += frame->linesize[j];
2978 field[1].reference = PICT_BOTTOM_FIELD;
2979 field[1].poc= field[1].field_poc[1];
2981 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2982 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2983 for(j=0; j<2; j++){
2984 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2985 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2989 for(j=0; j<h->ref_count[1]; j++){
2990 for(i=0; i<h->ref_count[0]; i++)
2991 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2992 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2993 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2997 static int pred_weight_table(H264Context *h){
2998 MpegEncContext * const s = &h->s;
2999 int list, i;
3000 int luma_def, chroma_def;
3002 h->use_weight= 0;
3003 h->use_weight_chroma= 0;
3004 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3005 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3006 luma_def = 1<<h->luma_log2_weight_denom;
3007 chroma_def = 1<<h->chroma_log2_weight_denom;
3009 for(list=0; list<2; list++){
3010 h->luma_weight_flag[list] = 0;
3011 h->chroma_weight_flag[list] = 0;
3012 for(i=0; i<h->ref_count[list]; i++){
3013 int luma_weight_flag, chroma_weight_flag;
3015 luma_weight_flag= get_bits1(&s->gb);
3016 if(luma_weight_flag){
3017 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3018 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3019 if( h->luma_weight[list][i] != luma_def
3020 || h->luma_offset[list][i] != 0) {
3021 h->use_weight= 1;
3022 h->luma_weight_flag[list]= 1;
3024 }else{
3025 h->luma_weight[list][i]= luma_def;
3026 h->luma_offset[list][i]= 0;
3029 if(CHROMA){
3030 chroma_weight_flag= get_bits1(&s->gb);
3031 if(chroma_weight_flag){
3032 int j;
3033 for(j=0; j<2; j++){
3034 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3035 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3036 if( h->chroma_weight[list][i][j] != chroma_def
3037 || h->chroma_offset[list][i][j] != 0) {
3038 h->use_weight_chroma= 1;
3039 h->chroma_weight_flag[list]= 1;
3042 }else{
3043 int j;
3044 for(j=0; j<2; j++){
3045 h->chroma_weight[list][i][j]= chroma_def;
3046 h->chroma_offset[list][i][j]= 0;
3051 if(h->slice_type_nos != FF_B_TYPE) break;
3053 h->use_weight= h->use_weight || h->use_weight_chroma;
3054 return 0;
3057 static void implicit_weight_table(H264Context *h){
3058 MpegEncContext * const s = &h->s;
3059 int ref0, ref1, i;
3060 int cur_poc = s->current_picture_ptr->poc;
3062 for (i = 0; i < 2; i++) {
3063 h->luma_weight_flag[i] = 0;
3064 h->chroma_weight_flag[i] = 0;
3067 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3068 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3069 h->use_weight= 0;
3070 h->use_weight_chroma= 0;
3071 return;
3074 h->use_weight= 2;
3075 h->use_weight_chroma= 2;
3076 h->luma_log2_weight_denom= 5;
3077 h->chroma_log2_weight_denom= 5;
3079 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3080 int poc0 = h->ref_list[0][ref0].poc;
3081 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3082 int poc1 = h->ref_list[1][ref1].poc;
3083 int td = av_clip(poc1 - poc0, -128, 127);
3084 if(td){
3085 int tb = av_clip(cur_poc - poc0, -128, 127);
3086 int tx = (16384 + (FFABS(td) >> 1)) / td;
3087 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3088 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3089 h->implicit_weight[ref0][ref1] = 32;
3090 else
3091 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3092 }else
3093 h->implicit_weight[ref0][ref1] = 32;
3099 * Mark a picture as no longer needed for reference. The refmask
3100 * argument allows unreferencing of individual fields or the whole frame.
3101 * If the picture becomes entirely unreferenced, but is being held for
3102 * display purposes, it is marked as such.
3103 * @param refmask mask of fields to unreference; the mask is bitwise
3104 * anded with the reference marking of pic
3105 * @return non-zero if pic becomes entirely unreferenced (except possibly
3106 * for display purposes) zero if one of the fields remains in
3107 * reference
3109 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3110 int i;
3111 if (pic->reference &= refmask) {
3112 return 0;
3113 } else {
3114 for(i = 0; h->delayed_pic[i]; i++)
3115 if(pic == h->delayed_pic[i]){
3116 pic->reference=DELAYED_PIC_REF;
3117 break;
3119 return 1;
3124 * instantaneous decoder refresh.
3126 static void idr(H264Context *h){
3127 int i;
3129 for(i=0; i<16; i++){
3130 remove_long(h, i, 0);
3132 assert(h->long_ref_count==0);
3134 for(i=0; i<h->short_ref_count; i++){
3135 unreference_pic(h, h->short_ref[i], 0);
3136 h->short_ref[i]= NULL;
3138 h->short_ref_count=0;
3139 h->prev_frame_num= 0;
3140 h->prev_frame_num_offset= 0;
3141 h->prev_poc_msb=
3142 h->prev_poc_lsb= 0;
3145 /* forget old pics after a seek */
3146 static void flush_dpb(AVCodecContext *avctx){
3147 H264Context *h= avctx->priv_data;
3148 int i;
3149 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3150 if(h->delayed_pic[i])
3151 h->delayed_pic[i]->reference= 0;
3152 h->delayed_pic[i]= NULL;
3154 h->outputed_poc= INT_MIN;
3155 idr(h);
3156 if(h->s.current_picture_ptr)
3157 h->s.current_picture_ptr->reference= 0;
3158 h->s.first_field= 0;
3159 reset_sei(h);
3160 ff_mpeg_flush(avctx);
3164 * Find a Picture in the short term reference list by frame number.
3165 * @param frame_num frame number to search for
3166 * @param idx the index into h->short_ref where returned picture is found
3167 * undefined if no picture found.
3168 * @return pointer to the found picture, or NULL if no pic with the provided
3169 * frame number is found
3171 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3172 MpegEncContext * const s = &h->s;
3173 int i;
3175 for(i=0; i<h->short_ref_count; i++){
3176 Picture *pic= h->short_ref[i];
3177 if(s->avctx->debug&FF_DEBUG_MMCO)
3178 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3179 if(pic->frame_num == frame_num) {
3180 *idx = i;
3181 return pic;
3184 return NULL;
3188 * Remove a picture from the short term reference list by its index in
3189 * that list. This does no checking on the provided index; it is assumed
3190 * to be valid. Other list entries are shifted down.
3191 * @param i index into h->short_ref of picture to remove.
3193 static void remove_short_at_index(H264Context *h, int i){
3194 assert(i >= 0 && i < h->short_ref_count);
3195 h->short_ref[i]= NULL;
3196 if (--h->short_ref_count)
3197 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3202 * @return the removed picture or NULL if an error occurs
3204 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3205 MpegEncContext * const s = &h->s;
3206 Picture *pic;
3207 int i;
3209 if(s->avctx->debug&FF_DEBUG_MMCO)
3210 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3212 pic = find_short(h, frame_num, &i);
3213 if (pic){
3214 if(unreference_pic(h, pic, ref_mask))
3215 remove_short_at_index(h, i);
3218 return pic;
3222 * Remove a picture from the long term reference list by its index in
3223 * that list.
3224 * @return the removed picture or NULL if an error occurs
3226 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3227 Picture *pic;
3229 pic= h->long_ref[i];
3230 if (pic){
3231 if(unreference_pic(h, pic, ref_mask)){
3232 assert(h->long_ref[i]->long_ref == 1);
3233 h->long_ref[i]->long_ref= 0;
3234 h->long_ref[i]= NULL;
3235 h->long_ref_count--;
3239 return pic;
3243 * print short term list
3245 static void print_short_term(H264Context *h) {
3246 uint32_t i;
3247 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3248 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3249 for(i=0; i<h->short_ref_count; i++){
3250 Picture *pic= h->short_ref[i];
3251 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3257 * print long term list
3259 static void print_long_term(H264Context *h) {
3260 uint32_t i;
3261 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3262 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3263 for(i = 0; i < 16; i++){
3264 Picture *pic= h->long_ref[i];
3265 if (pic) {
3266 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3273 * Executes the reference picture marking (memory management control operations).
3275 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3276 MpegEncContext * const s = &h->s;
3277 int i, av_uninit(j);
3278 int current_ref_assigned=0;
3279 Picture *av_uninit(pic);
3281 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3282 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3284 for(i=0; i<mmco_count; i++){
3285 int av_uninit(structure), av_uninit(frame_num);
3286 if(s->avctx->debug&FF_DEBUG_MMCO)
3287 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3289 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3290 || mmco[i].opcode == MMCO_SHORT2LONG){
3291 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3292 pic = find_short(h, frame_num, &j);
3293 if(!pic){
3294 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3295 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3296 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3297 continue;
3301 switch(mmco[i].opcode){
3302 case MMCO_SHORT2UNUSED:
3303 if(s->avctx->debug&FF_DEBUG_MMCO)
3304 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3305 remove_short(h, frame_num, structure ^ PICT_FRAME);
3306 break;
3307 case MMCO_SHORT2LONG:
3308 if (h->long_ref[mmco[i].long_arg] != pic)
3309 remove_long(h, mmco[i].long_arg, 0);
3311 remove_short_at_index(h, j);
3312 h->long_ref[ mmco[i].long_arg ]= pic;
3313 if (h->long_ref[ mmco[i].long_arg ]){
3314 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3315 h->long_ref_count++;
3317 break;
3318 case MMCO_LONG2UNUSED:
3319 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3320 pic = h->long_ref[j];
3321 if (pic) {
3322 remove_long(h, j, structure ^ PICT_FRAME);
3323 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3324 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3325 break;
3326 case MMCO_LONG:
3327 // Comment below left from previous code as it is an interresting note.
3328 /* First field in pair is in short term list or
3329 * at a different long term index.
3330 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3331 * Report the problem and keep the pair where it is,
3332 * and mark this field valid.
3335 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3336 remove_long(h, mmco[i].long_arg, 0);
3338 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3339 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3340 h->long_ref_count++;
3343 s->current_picture_ptr->reference |= s->picture_structure;
3344 current_ref_assigned=1;
3345 break;
3346 case MMCO_SET_MAX_LONG:
3347 assert(mmco[i].long_arg <= 16);
3348 // just remove the long term which index is greater than new max
3349 for(j = mmco[i].long_arg; j<16; j++){
3350 remove_long(h, j, 0);
3352 break;
3353 case MMCO_RESET:
3354 while(h->short_ref_count){
3355 remove_short(h, h->short_ref[0]->frame_num, 0);
3357 for(j = 0; j < 16; j++) {
3358 remove_long(h, j, 0);
3360 s->current_picture_ptr->poc=
3361 s->current_picture_ptr->field_poc[0]=
3362 s->current_picture_ptr->field_poc[1]=
3363 h->poc_lsb=
3364 h->poc_msb=
3365 h->frame_num=
3366 s->current_picture_ptr->frame_num= 0;
3367 break;
3368 default: assert(0);
3372 if (!current_ref_assigned) {
3373 /* Second field of complementary field pair; the first field of
3374 * which is already referenced. If short referenced, it
3375 * should be first entry in short_ref. If not, it must exist
3376 * in long_ref; trying to put it on the short list here is an
3377 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3379 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3380 /* Just mark the second field valid */
3381 s->current_picture_ptr->reference = PICT_FRAME;
3382 } else if (s->current_picture_ptr->long_ref) {
3383 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3384 "assignment for second field "
3385 "in complementary field pair "
3386 "(first field is long term)\n");
3387 } else {
3388 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3389 if(pic){
3390 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3393 if(h->short_ref_count)
3394 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3396 h->short_ref[0]= s->current_picture_ptr;
3397 h->short_ref_count++;
3398 s->current_picture_ptr->reference |= s->picture_structure;
3402 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3404 /* We have too many reference frames, probably due to corrupted
3405 * stream. Need to discard one frame. Prevents overrun of the
3406 * short_ref and long_ref buffers.
3408 av_log(h->s.avctx, AV_LOG_ERROR,
3409 "number of reference frames exceeds max (probably "
3410 "corrupt input), discarding one\n");
3412 if (h->long_ref_count && !h->short_ref_count) {
3413 for (i = 0; i < 16; ++i)
3414 if (h->long_ref[i])
3415 break;
3417 assert(i < 16);
3418 remove_long(h, i, 0);
3419 } else {
3420 pic = h->short_ref[h->short_ref_count - 1];
3421 remove_short(h, pic->frame_num, 0);
3425 print_short_term(h);
3426 print_long_term(h);
3427 return 0;
3430 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3431 MpegEncContext * const s = &h->s;
3432 int i;
3434 h->mmco_index= 0;
3435 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3436 s->broken_link= get_bits1(gb) -1;
3437 if(get_bits1(gb)){
3438 h->mmco[0].opcode= MMCO_LONG;
3439 h->mmco[0].long_arg= 0;
3440 h->mmco_index= 1;
3442 }else{
3443 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3444 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3445 MMCOOpcode opcode= get_ue_golomb_31(gb);
3447 h->mmco[i].opcode= opcode;
3448 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3449 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3450 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3451 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3452 return -1;
3455 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3456 unsigned int long_arg= get_ue_golomb_31(gb);
3457 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3458 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3459 return -1;
3461 h->mmco[i].long_arg= long_arg;
3464 if(opcode > (unsigned)MMCO_LONG){
3465 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3466 return -1;
3468 if(opcode == MMCO_END)
3469 break;
3471 h->mmco_index= i;
3472 }else{
3473 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3475 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3476 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3477 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3478 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3479 h->mmco_index= 1;
3480 if (FIELD_PICTURE) {
3481 h->mmco[0].short_pic_num *= 2;
3482 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3483 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3484 h->mmco_index= 2;
3490 return 0;
3493 static int init_poc(H264Context *h){
3494 MpegEncContext * const s = &h->s;
3495 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3496 int field_poc[2];
3497 Picture *cur = s->current_picture_ptr;
3499 h->frame_num_offset= h->prev_frame_num_offset;
3500 if(h->frame_num < h->prev_frame_num)
3501 h->frame_num_offset += max_frame_num;
3503 if(h->sps.poc_type==0){
3504 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3506 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3507 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3508 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3509 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3510 else
3511 h->poc_msb = h->prev_poc_msb;
3512 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3513 field_poc[0] =
3514 field_poc[1] = h->poc_msb + h->poc_lsb;
3515 if(s->picture_structure == PICT_FRAME)
3516 field_poc[1] += h->delta_poc_bottom;
3517 }else if(h->sps.poc_type==1){
3518 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3519 int i;
3521 if(h->sps.poc_cycle_length != 0)
3522 abs_frame_num = h->frame_num_offset + h->frame_num;
3523 else
3524 abs_frame_num = 0;
3526 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3527 abs_frame_num--;
3529 expected_delta_per_poc_cycle = 0;
3530 for(i=0; i < h->sps.poc_cycle_length; i++)
3531 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3533 if(abs_frame_num > 0){
3534 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3535 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3537 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3538 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3539 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3540 } else
3541 expectedpoc = 0;
3543 if(h->nal_ref_idc == 0)
3544 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3546 field_poc[0] = expectedpoc + h->delta_poc[0];
3547 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3549 if(s->picture_structure == PICT_FRAME)
3550 field_poc[1] += h->delta_poc[1];
3551 }else{
3552 int poc= 2*(h->frame_num_offset + h->frame_num);
3554 if(!h->nal_ref_idc)
3555 poc--;
3557 field_poc[0]= poc;
3558 field_poc[1]= poc;
3561 if(s->picture_structure != PICT_BOTTOM_FIELD)
3562 s->current_picture_ptr->field_poc[0]= field_poc[0];
3563 if(s->picture_structure != PICT_TOP_FIELD)
3564 s->current_picture_ptr->field_poc[1]= field_poc[1];
3565 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3567 return 0;
3572 * initialize scan tables
3574 static void init_scan_tables(H264Context *h){
3575 MpegEncContext * const s = &h->s;
3576 int i;
3577 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3578 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3579 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3580 }else{
3581 for(i=0; i<16; i++){
3582 #define T(x) (x>>2) | ((x<<2) & 0xF)
3583 h->zigzag_scan[i] = T(zigzag_scan[i]);
3584 h-> field_scan[i] = T( field_scan[i]);
3585 #undef T
3588 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3589 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3590 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3591 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3592 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3593 }else{
3594 for(i=0; i<64; i++){
3595 #define T(x) (x>>3) | ((x&7)<<3)
3596 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3597 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3598 h->field_scan8x8[i] = T(field_scan8x8[i]);
3599 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3600 #undef T
3603 if(h->sps.transform_bypass){ //FIXME same ugly
3604 h->zigzag_scan_q0 = zigzag_scan;
3605 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3606 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3607 h->field_scan_q0 = field_scan;
3608 h->field_scan8x8_q0 = field_scan8x8;
3609 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3610 }else{
3611 h->zigzag_scan_q0 = h->zigzag_scan;
3612 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3613 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3614 h->field_scan_q0 = h->field_scan;
3615 h->field_scan8x8_q0 = h->field_scan8x8;
3616 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3620 static void field_end(H264Context *h){
3621 MpegEncContext * const s = &h->s;
3622 AVCodecContext * const avctx= s->avctx;
3623 s->mb_y= 0;
3625 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
3626 s->current_picture_ptr->pict_type= s->pict_type;
3628 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3629 ff_vdpau_h264_set_reference_frames(s);
3631 if(!s->dropable) {
3632 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
3633 h->prev_poc_msb= h->poc_msb;
3634 h->prev_poc_lsb= h->poc_lsb;
3636 h->prev_frame_num_offset= h->frame_num_offset;
3637 h->prev_frame_num= h->frame_num;
3639 if (avctx->hwaccel) {
3640 if (avctx->hwaccel->end_frame(avctx) < 0)
3641 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
3644 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3645 ff_vdpau_h264_picture_complete(s);
3648 * FIXME: Error handling code does not seem to support interlaced
3649 * when slices span multiple rows
3650 * The ff_er_add_slice calls don't work right for bottom
3651 * fields; they cause massive erroneous error concealing
3652 * Error marking covers both fields (top and bottom).
3653 * This causes a mismatched s->error_count
3654 * and a bad error table. Further, the error count goes to
3655 * INT_MAX when called for bottom field, because mb_y is
3656 * past end by one (callers fault) and resync_mb_y != 0
3657 * causes problems for the first MB line, too.
3659 if (!FIELD_PICTURE)
3660 ff_er_frame_end(s);
3662 MPV_frame_end(s);
3664 h->current_slice=0;
3668 * Replicates H264 "master" context to thread contexts.
3670 static void clone_slice(H264Context *dst, H264Context *src)
3672 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3673 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3674 dst->s.current_picture = src->s.current_picture;
3675 dst->s.linesize = src->s.linesize;
3676 dst->s.uvlinesize = src->s.uvlinesize;
3677 dst->s.first_field = src->s.first_field;
3679 dst->prev_poc_msb = src->prev_poc_msb;
3680 dst->prev_poc_lsb = src->prev_poc_lsb;
3681 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3682 dst->prev_frame_num = src->prev_frame_num;
3683 dst->short_ref_count = src->short_ref_count;
3685 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3686 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3687 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3688 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3690 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3691 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3695 * decodes a slice header.
3696 * This will also call MPV_common_init() and frame_start() as needed.
3698 * @param h h264context
3699 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3701 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3703 static int decode_slice_header(H264Context *h, H264Context *h0){
3704 MpegEncContext * const s = &h->s;
3705 MpegEncContext * const s0 = &h0->s;
3706 unsigned int first_mb_in_slice;
3707 unsigned int pps_id;
3708 int num_ref_idx_active_override_flag;
3709 unsigned int slice_type, tmp, i, j;
3710 int default_ref_list_done = 0;
3711 int last_pic_structure;
3713 s->dropable= h->nal_ref_idc == 0;
3715 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3716 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3717 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3718 }else{
3719 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3720 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3723 first_mb_in_slice= get_ue_golomb(&s->gb);
3725 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
3726 if(h0->current_slice && FIELD_PICTURE){
3727 field_end(h);
3730 h0->current_slice = 0;
3731 if (!s0->first_field)
3732 s->current_picture_ptr= NULL;
3735 slice_type= get_ue_golomb_31(&s->gb);
3736 if(slice_type > 9){
3737 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3738 return -1;
3740 if(slice_type > 4){
3741 slice_type -= 5;
3742 h->slice_type_fixed=1;
3743 }else
3744 h->slice_type_fixed=0;
3746 slice_type= golomb_to_pict_type[ slice_type ];
3747 if (slice_type == FF_I_TYPE
3748 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3749 default_ref_list_done = 1;
3751 h->slice_type= slice_type;
3752 h->slice_type_nos= slice_type & 3;
3754 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3755 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3756 av_log(h->s.avctx, AV_LOG_ERROR,
3757 "B picture before any references, skipping\n");
3758 return -1;
3761 pps_id= get_ue_golomb(&s->gb);
3762 if(pps_id>=MAX_PPS_COUNT){
3763 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3764 return -1;
3766 if(!h0->pps_buffers[pps_id]) {
3767 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
3768 return -1;
3770 h->pps= *h0->pps_buffers[pps_id];
3772 if(!h0->sps_buffers[h->pps.sps_id]) {
3773 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
3774 return -1;
3776 h->sps = *h0->sps_buffers[h->pps.sps_id];
3778 if(h == h0 && h->dequant_coeff_pps != pps_id){
3779 h->dequant_coeff_pps = pps_id;
3780 init_dequant_tables(h);
3783 s->mb_width= h->sps.mb_width;
3784 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3786 h->b_stride= s->mb_width*4;
3787 h->b8_stride= s->mb_width*2;
3789 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3790 if(h->sps.frame_mbs_only_flag)
3791 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3792 else
3793 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3795 if (s->context_initialized
3796 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3797 if(h != h0)
3798 return -1; // width / height changed during parallelized decoding
3799 free_tables(h);
3800 flush_dpb(s->avctx);
3801 MPV_common_end(s);
3803 if (!s->context_initialized) {
3804 if(h != h0)
3805 return -1; // we cant (re-)initialize context during parallel decoding
3806 if (MPV_common_init(s) < 0)
3807 return -1;
3808 s->first_field = 0;
3810 init_scan_tables(h);
3811 alloc_tables(h);
3813 for(i = 1; i < s->avctx->thread_count; i++) {
3814 H264Context *c;
3815 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3816 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3817 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3818 c->sps = h->sps;
3819 c->pps = h->pps;
3820 init_scan_tables(c);
3821 clone_tables(c, h);
3824 for(i = 0; i < s->avctx->thread_count; i++)
3825 if(context_init(h->thread_context[i]) < 0)
3826 return -1;
3828 s->avctx->width = s->width;
3829 s->avctx->height = s->height;
3830 s->avctx->sample_aspect_ratio= h->sps.sar;
3831 if(!s->avctx->sample_aspect_ratio.den)
3832 s->avctx->sample_aspect_ratio.den = 1;
3834 if(h->sps.timing_info_present_flag){
3835 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
3836 if(h->x264_build > 0 && h->x264_build < 44)
3837 s->avctx->time_base.den *= 2;
3838 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3839 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3843 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3845 h->mb_mbaff = 0;
3846 h->mb_aff_frame = 0;
3847 last_pic_structure = s0->picture_structure;
3848 if(h->sps.frame_mbs_only_flag){
3849 s->picture_structure= PICT_FRAME;
3850 }else{
3851 if(get_bits1(&s->gb)) { //field_pic_flag
3852 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3853 } else {
3854 s->picture_structure= PICT_FRAME;
3855 h->mb_aff_frame = h->sps.mb_aff;
3858 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3860 if(h0->current_slice == 0){
3861 while(h->frame_num != h->prev_frame_num &&
3862 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3863 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3864 if (frame_start(h) < 0)
3865 return -1;
3866 h->prev_frame_num++;
3867 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3868 s->current_picture_ptr->frame_num= h->prev_frame_num;
3869 execute_ref_pic_marking(h, NULL, 0);
3872 /* See if we have a decoded first field looking for a pair... */
3873 if (s0->first_field) {
3874 assert(s0->current_picture_ptr);
3875 assert(s0->current_picture_ptr->data[0]);
3876 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3878 /* figure out if we have a complementary field pair */
3879 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3881 * Previous field is unmatched. Don't display it, but let it
3882 * remain for reference if marked as such.
3884 s0->current_picture_ptr = NULL;
3885 s0->first_field = FIELD_PICTURE;
3887 } else {
3888 if (h->nal_ref_idc &&
3889 s0->current_picture_ptr->reference &&
3890 s0->current_picture_ptr->frame_num != h->frame_num) {
3892 * This and previous field were reference, but had
3893 * different frame_nums. Consider this field first in
3894 * pair. Throw away previous field except for reference
3895 * purposes.
3897 s0->first_field = 1;
3898 s0->current_picture_ptr = NULL;
3900 } else {
3901 /* Second field in complementary pair */
3902 s0->first_field = 0;
3906 } else {
3907 /* Frame or first field in a potentially complementary pair */
3908 assert(!s0->current_picture_ptr);
3909 s0->first_field = FIELD_PICTURE;
3912 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3913 s0->first_field = 0;
3914 return -1;
3917 if(h != h0)
3918 clone_slice(h, h0);
3920 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3922 assert(s->mb_num == s->mb_width * s->mb_height);
3923 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3924 first_mb_in_slice >= s->mb_num){
3925 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3926 return -1;
3928 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3929 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3930 if (s->picture_structure == PICT_BOTTOM_FIELD)
3931 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3932 assert(s->mb_y < s->mb_height);
3934 if(s->picture_structure==PICT_FRAME){
3935 h->curr_pic_num= h->frame_num;
3936 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3937 }else{
3938 h->curr_pic_num= 2*h->frame_num + 1;
3939 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3942 if(h->nal_unit_type == NAL_IDR_SLICE){
3943 get_ue_golomb(&s->gb); /* idr_pic_id */
3946 if(h->sps.poc_type==0){
3947 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3949 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3950 h->delta_poc_bottom= get_se_golomb(&s->gb);
3954 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3955 h->delta_poc[0]= get_se_golomb(&s->gb);
3957 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3958 h->delta_poc[1]= get_se_golomb(&s->gb);
3961 init_poc(h);
3963 if(h->pps.redundant_pic_cnt_present){
3964 h->redundant_pic_count= get_ue_golomb(&s->gb);
3967 //set defaults, might be overridden a few lines later
3968 h->ref_count[0]= h->pps.ref_count[0];
3969 h->ref_count[1]= h->pps.ref_count[1];
3971 if(h->slice_type_nos != FF_I_TYPE){
3972 if(h->slice_type_nos == FF_B_TYPE){
3973 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3975 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3977 if(num_ref_idx_active_override_flag){
3978 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3979 if(h->slice_type_nos==FF_B_TYPE)
3980 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3982 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3983 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3984 h->ref_count[0]= h->ref_count[1]= 1;
3985 return -1;
3988 if(h->slice_type_nos == FF_B_TYPE)
3989 h->list_count= 2;
3990 else
3991 h->list_count= 1;
3992 }else
3993 h->list_count= 0;
3995 if(!default_ref_list_done){
3996 fill_default_ref_list(h);
3999 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
4000 return -1;
4002 if(h->slice_type_nos!=FF_I_TYPE){
4003 s->last_picture_ptr= &h->ref_list[0][0];
4004 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
4006 if(h->slice_type_nos==FF_B_TYPE){
4007 s->next_picture_ptr= &h->ref_list[1][0];
4008 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
4011 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4012 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4013 pred_weight_table(h);
4014 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4015 implicit_weight_table(h);
4016 else {
4017 h->use_weight = 0;
4018 for (i = 0; i < 2; i++) {
4019 h->luma_weight_flag[i] = 0;
4020 h->chroma_weight_flag[i] = 0;
4024 if(h->nal_ref_idc)
4025 decode_ref_pic_marking(h0, &s->gb);
4027 if(FRAME_MBAFF)
4028 fill_mbaff_ref_list(h);
4030 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
4031 direct_dist_scale_factor(h);
4032 direct_ref_list_init(h);
4034 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4035 tmp = get_ue_golomb_31(&s->gb);
4036 if(tmp > 2){
4037 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4038 return -1;
4040 h->cabac_init_idc= tmp;
4043 h->last_qscale_diff = 0;
4044 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4045 if(tmp>51){
4046 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4047 return -1;
4049 s->qscale= tmp;
4050 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4051 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4052 //FIXME qscale / qp ... stuff
4053 if(h->slice_type == FF_SP_TYPE){
4054 get_bits1(&s->gb); /* sp_for_switch_flag */
4056 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4057 get_se_golomb(&s->gb); /* slice_qs_delta */
4060 h->deblocking_filter = 1;
4061 h->slice_alpha_c0_offset = 0;
4062 h->slice_beta_offset = 0;
4063 if( h->pps.deblocking_filter_parameters_present ) {
4064 tmp= get_ue_golomb_31(&s->gb);
4065 if(tmp > 2){
4066 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4067 return -1;
4069 h->deblocking_filter= tmp;
4070 if(h->deblocking_filter < 2)
4071 h->deblocking_filter^= 1; // 1<->0
4073 if( h->deblocking_filter ) {
4074 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4075 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4079 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4080 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4081 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4082 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4083 h->deblocking_filter= 0;
4085 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4086 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4087 /* Cheat slightly for speed:
4088 Do not bother to deblock across slices. */
4089 h->deblocking_filter = 2;
4090 } else {
4091 h0->max_contexts = 1;
4092 if(!h0->single_decode_warning) {
4093 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4094 h0->single_decode_warning = 1;
4096 if(h != h0)
4097 return 1; // deblocking switched inside frame
4101 #if 0 //FMO
4102 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4103 slice_group_change_cycle= get_bits(&s->gb, ?);
4104 #endif
4106 h0->last_slice_type = slice_type;
4107 h->slice_num = ++h0->current_slice;
4108 if(h->slice_num >= MAX_SLICES){
4109 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4112 for(j=0; j<2; j++){
4113 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4114 ref2frm[0]=
4115 ref2frm[1]= -1;
4116 for(i=0; i<16; i++)
4117 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4118 +(h->ref_list[j][i].reference&3);
4119 ref2frm[18+0]=
4120 ref2frm[18+1]= -1;
4121 for(i=16; i<48; i++)
4122 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4123 +(h->ref_list[j][i].reference&3);
4126 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4127 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4129 s->avctx->refs= h->sps.ref_frame_count;
4131 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4132 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4133 h->slice_num,
4134 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4135 first_mb_in_slice,
4136 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4137 pps_id, h->frame_num,
4138 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4139 h->ref_count[0], h->ref_count[1],
4140 s->qscale,
4141 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4142 h->use_weight,
4143 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4144 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4148 return 0;
4154 static inline int get_level_prefix(GetBitContext *gb){
4155 unsigned int buf;
4156 int log;
4158 OPEN_READER(re, gb);
4159 UPDATE_CACHE(re, gb);
4160 buf=GET_CACHE(re, gb);
4162 log= 32 - av_log2(buf);
4163 #ifdef TRACE
4164 print_bin(buf>>(32-log), log);
4165 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4166 #endif
4168 LAST_SKIP_BITS(re, gb, log);
4169 CLOSE_READER(re, gb);
4171 return log-1;
4174 static inline int get_dct8x8_allowed(H264Context *h){
4175 if(h->sps.direct_8x8_inference_flag)
4176 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4177 else
4178 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4182 * decodes a residual block.
4183 * @param n block index
4184 * @param scantable scantable
4185 * @param max_coeff number of coefficients in the block
4186 * @return <0 if an error occurred
4188 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4189 MpegEncContext * const s = &h->s;
4190 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4191 int level[16];
4192 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4194 //FIXME put trailing_onex into the context
4196 if(n == CHROMA_DC_BLOCK_INDEX){
4197 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4198 total_coeff= coeff_token>>2;
4199 }else{
4200 if(n == LUMA_DC_BLOCK_INDEX){
4201 total_coeff= pred_non_zero_count(h, 0);
4202 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4203 total_coeff= coeff_token>>2;
4204 }else{
4205 total_coeff= pred_non_zero_count(h, n);
4206 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4207 total_coeff= coeff_token>>2;
4208 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4212 //FIXME set last_non_zero?
4214 if(total_coeff==0)
4215 return 0;
4216 if(total_coeff > (unsigned)max_coeff) {
4217 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4218 return -1;
4221 trailing_ones= coeff_token&3;
4222 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4223 assert(total_coeff<=16);
4225 i = show_bits(gb, 3);
4226 skip_bits(gb, trailing_ones);
4227 level[0] = 1-((i&4)>>1);
4228 level[1] = 1-((i&2) );
4229 level[2] = 1-((i&1)<<1);
4231 if(trailing_ones<total_coeff) {
4232 int mask, prefix;
4233 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4234 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4235 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4237 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4238 if(level_code >= 100){
4239 prefix= level_code - 100;
4240 if(prefix == LEVEL_TAB_BITS)
4241 prefix += get_level_prefix(gb);
4243 //first coefficient has suffix_length equal to 0 or 1
4244 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4245 if(suffix_length)
4246 level_code= (prefix<<1) + get_bits1(gb); //part
4247 else
4248 level_code= prefix; //part
4249 }else if(prefix==14){
4250 if(suffix_length)
4251 level_code= (prefix<<1) + get_bits1(gb); //part
4252 else
4253 level_code= prefix + get_bits(gb, 4); //part
4254 }else{
4255 level_code= 30 + get_bits(gb, prefix-3); //part
4256 if(prefix>=16)
4257 level_code += (1<<(prefix-3))-4096;
4260 if(trailing_ones < 3) level_code += 2;
4262 suffix_length = 2;
4263 mask= -(level_code&1);
4264 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4265 }else{
4266 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4268 suffix_length = 1;
4269 if(level_code + 3U > 6U)
4270 suffix_length++;
4271 level[trailing_ones]= level_code;
4274 //remaining coefficients have suffix_length > 0
4275 for(i=trailing_ones+1;i<total_coeff;i++) {
4276 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4277 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4278 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4280 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4281 if(level_code >= 100){
4282 prefix= level_code - 100;
4283 if(prefix == LEVEL_TAB_BITS){
4284 prefix += get_level_prefix(gb);
4286 if(prefix<15){
4287 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4288 }else{
4289 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4290 if(prefix>=16)
4291 level_code += (1<<(prefix-3))-4096;
4293 mask= -(level_code&1);
4294 level_code= (((2+level_code)>>1) ^ mask) - mask;
4296 level[i]= level_code;
4298 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4299 suffix_length++;
4303 if(total_coeff == max_coeff)
4304 zeros_left=0;
4305 else{
4306 if(n == CHROMA_DC_BLOCK_INDEX)
4307 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4308 else
4309 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4312 coeff_num = zeros_left + total_coeff - 1;
4313 j = scantable[coeff_num];
4314 if(n > 24){
4315 block[j] = level[0];
4316 for(i=1;i<total_coeff;i++) {
4317 if(zeros_left <= 0)
4318 run_before = 0;
4319 else if(zeros_left < 7){
4320 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4321 }else{
4322 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4324 zeros_left -= run_before;
4325 coeff_num -= 1 + run_before;
4326 j= scantable[ coeff_num ];
4328 block[j]= level[i];
4330 }else{
4331 block[j] = (level[0] * qmul[j] + 32)>>6;
4332 for(i=1;i<total_coeff;i++) {
4333 if(zeros_left <= 0)
4334 run_before = 0;
4335 else if(zeros_left < 7){
4336 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4337 }else{
4338 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4340 zeros_left -= run_before;
4341 coeff_num -= 1 + run_before;
4342 j= scantable[ coeff_num ];
4344 block[j]= (level[i] * qmul[j] + 32)>>6;
4348 if(zeros_left<0){
4349 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4350 return -1;
4353 return 0;
4356 static void predict_field_decoding_flag(H264Context *h){
4357 MpegEncContext * const s = &h->s;
4358 const int mb_xy= h->mb_xy;
4359 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4360 ? s->current_picture.mb_type[mb_xy-1]
4361 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4362 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4363 : 0;
4364 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4368 * decodes a P_SKIP or B_SKIP macroblock
4370 static void decode_mb_skip(H264Context *h){
4371 MpegEncContext * const s = &h->s;
4372 const int mb_xy= h->mb_xy;
4373 int mb_type=0;
4375 memset(h->non_zero_count[mb_xy], 0, 16);
4376 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4378 if(MB_FIELD)
4379 mb_type|= MB_TYPE_INTERLACED;
4381 if( h->slice_type_nos == FF_B_TYPE )
4383 // just for fill_caches. pred_direct_motion will set the real mb_type
4384 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4386 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4387 pred_direct_motion(h, &mb_type);
4388 mb_type|= MB_TYPE_SKIP;
4390 else
4392 int mx, my;
4393 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4395 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4396 pred_pskip_motion(h, &mx, &my);
4397 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4398 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4401 write_back_motion(h, mb_type);
4402 s->current_picture.mb_type[mb_xy]= mb_type;
4403 s->current_picture.qscale_table[mb_xy]= s->qscale;
4404 h->slice_table[ mb_xy ]= h->slice_num;
4405 h->prev_mb_skipped= 1;
4409 * decodes a macroblock
4410 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4412 static int decode_mb_cavlc(H264Context *h){
4413 MpegEncContext * const s = &h->s;
4414 int mb_xy;
4415 int partition_count;
4416 unsigned int mb_type, cbp;
4417 int dct8x8_allowed= h->pps.transform_8x8_mode;
4419 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4421 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4422 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4423 down the code */
4424 if(h->slice_type_nos != FF_I_TYPE){
4425 if(s->mb_skip_run==-1)
4426 s->mb_skip_run= get_ue_golomb(&s->gb);
4428 if (s->mb_skip_run--) {
4429 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4430 if(s->mb_skip_run==0)
4431 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4432 else
4433 predict_field_decoding_flag(h);
4435 decode_mb_skip(h);
4436 return 0;
4439 if(FRAME_MBAFF){
4440 if( (s->mb_y&1) == 0 )
4441 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4444 h->prev_mb_skipped= 0;
4446 mb_type= get_ue_golomb(&s->gb);
4447 if(h->slice_type_nos == FF_B_TYPE){
4448 if(mb_type < 23){
4449 partition_count= b_mb_type_info[mb_type].partition_count;
4450 mb_type= b_mb_type_info[mb_type].type;
4451 }else{
4452 mb_type -= 23;
4453 goto decode_intra_mb;
4455 }else if(h->slice_type_nos == FF_P_TYPE){
4456 if(mb_type < 5){
4457 partition_count= p_mb_type_info[mb_type].partition_count;
4458 mb_type= p_mb_type_info[mb_type].type;
4459 }else{
4460 mb_type -= 5;
4461 goto decode_intra_mb;
4463 }else{
4464 assert(h->slice_type_nos == FF_I_TYPE);
4465 if(h->slice_type == FF_SI_TYPE && mb_type)
4466 mb_type--;
4467 decode_intra_mb:
4468 if(mb_type > 25){
4469 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4470 return -1;
4472 partition_count=0;
4473 cbp= i_mb_type_info[mb_type].cbp;
4474 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4475 mb_type= i_mb_type_info[mb_type].type;
4478 if(MB_FIELD)
4479 mb_type |= MB_TYPE_INTERLACED;
4481 h->slice_table[ mb_xy ]= h->slice_num;
4483 if(IS_INTRA_PCM(mb_type)){
4484 unsigned int x;
4486 // We assume these blocks are very rare so we do not optimize it.
4487 align_get_bits(&s->gb);
4489 // The pixels are stored in the same order as levels in h->mb array.
4490 for(x=0; x < (CHROMA ? 384 : 256); x++){
4491 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4494 // In deblocking, the quantizer is 0
4495 s->current_picture.qscale_table[mb_xy]= 0;
4496 // All coeffs are present
4497 memset(h->non_zero_count[mb_xy], 16, 16);
4499 s->current_picture.mb_type[mb_xy]= mb_type;
4500 return 0;
4503 if(MB_MBAFF){
4504 h->ref_count[0] <<= 1;
4505 h->ref_count[1] <<= 1;
4508 fill_caches(h, mb_type, 0);
4510 //mb_pred
4511 if(IS_INTRA(mb_type)){
4512 int pred_mode;
4513 // init_top_left_availability(h);
4514 if(IS_INTRA4x4(mb_type)){
4515 int i;
4516 int di = 1;
4517 if(dct8x8_allowed && get_bits1(&s->gb)){
4518 mb_type |= MB_TYPE_8x8DCT;
4519 di = 4;
4522 // fill_intra4x4_pred_table(h);
4523 for(i=0; i<16; i+=di){
4524 int mode= pred_intra_mode(h, i);
4526 if(!get_bits1(&s->gb)){
4527 const int rem_mode= get_bits(&s->gb, 3);
4528 mode = rem_mode + (rem_mode >= mode);
4531 if(di==4)
4532 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4533 else
4534 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4536 write_back_intra_pred_mode(h);
4537 if( check_intra4x4_pred_mode(h) < 0)
4538 return -1;
4539 }else{
4540 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4541 if(h->intra16x16_pred_mode < 0)
4542 return -1;
4544 if(CHROMA){
4545 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4546 if(pred_mode < 0)
4547 return -1;
4548 h->chroma_pred_mode= pred_mode;
4550 }else if(partition_count==4){
4551 int i, j, sub_partition_count[4], list, ref[2][4];
4553 if(h->slice_type_nos == FF_B_TYPE){
4554 for(i=0; i<4; i++){
4555 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4556 if(h->sub_mb_type[i] >=13){
4557 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4558 return -1;
4560 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4561 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4563 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4564 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4565 pred_direct_motion(h, &mb_type);
4566 h->ref_cache[0][scan8[4]] =
4567 h->ref_cache[1][scan8[4]] =
4568 h->ref_cache[0][scan8[12]] =
4569 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4571 }else{
4572 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4573 for(i=0; i<4; i++){
4574 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4575 if(h->sub_mb_type[i] >=4){
4576 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4577 return -1;
4579 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4580 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4584 for(list=0; list<h->list_count; list++){
4585 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4586 for(i=0; i<4; i++){
4587 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4588 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4589 unsigned int tmp;
4590 if(ref_count == 1){
4591 tmp= 0;
4592 }else if(ref_count == 2){
4593 tmp= get_bits1(&s->gb)^1;
4594 }else{
4595 tmp= get_ue_golomb_31(&s->gb);
4596 if(tmp>=ref_count){
4597 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4598 return -1;
4601 ref[list][i]= tmp;
4602 }else{
4603 //FIXME
4604 ref[list][i] = -1;
4609 if(dct8x8_allowed)
4610 dct8x8_allowed = get_dct8x8_allowed(h);
4612 for(list=0; list<h->list_count; list++){
4613 for(i=0; i<4; i++){
4614 if(IS_DIRECT(h->sub_mb_type[i])) {
4615 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4616 continue;
4618 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4619 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4621 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4622 const int sub_mb_type= h->sub_mb_type[i];
4623 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4624 for(j=0; j<sub_partition_count[i]; j++){
4625 int mx, my;
4626 const int index= 4*i + block_width*j;
4627 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4628 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4629 mx += get_se_golomb(&s->gb);
4630 my += get_se_golomb(&s->gb);
4631 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4633 if(IS_SUB_8X8(sub_mb_type)){
4634 mv_cache[ 1 ][0]=
4635 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4636 mv_cache[ 1 ][1]=
4637 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4638 }else if(IS_SUB_8X4(sub_mb_type)){
4639 mv_cache[ 1 ][0]= mx;
4640 mv_cache[ 1 ][1]= my;
4641 }else if(IS_SUB_4X8(sub_mb_type)){
4642 mv_cache[ 8 ][0]= mx;
4643 mv_cache[ 8 ][1]= my;
4645 mv_cache[ 0 ][0]= mx;
4646 mv_cache[ 0 ][1]= my;
4648 }else{
4649 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4650 p[0] = p[1]=
4651 p[8] = p[9]= 0;
4655 }else if(IS_DIRECT(mb_type)){
4656 pred_direct_motion(h, &mb_type);
4657 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4658 }else{
4659 int list, mx, my, i;
4660 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4661 if(IS_16X16(mb_type)){
4662 for(list=0; list<h->list_count; list++){
4663 unsigned int val;
4664 if(IS_DIR(mb_type, 0, list)){
4665 if(h->ref_count[list]==1){
4666 val= 0;
4667 }else if(h->ref_count[list]==2){
4668 val= get_bits1(&s->gb)^1;
4669 }else{
4670 val= get_ue_golomb_31(&s->gb);
4671 if(val >= h->ref_count[list]){
4672 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4673 return -1;
4676 }else
4677 val= LIST_NOT_USED&0xFF;
4678 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4680 for(list=0; list<h->list_count; list++){
4681 unsigned int val;
4682 if(IS_DIR(mb_type, 0, list)){
4683 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4684 mx += get_se_golomb(&s->gb);
4685 my += get_se_golomb(&s->gb);
4686 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4688 val= pack16to32(mx,my);
4689 }else
4690 val=0;
4691 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4694 else if(IS_16X8(mb_type)){
4695 for(list=0; list<h->list_count; list++){
4696 for(i=0; i<2; i++){
4697 unsigned int val;
4698 if(IS_DIR(mb_type, i, list)){
4699 if(h->ref_count[list] == 1){
4700 val= 0;
4701 }else if(h->ref_count[list] == 2){
4702 val= get_bits1(&s->gb)^1;
4703 }else{
4704 val= get_ue_golomb_31(&s->gb);
4705 if(val >= h->ref_count[list]){
4706 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4707 return -1;
4710 }else
4711 val= LIST_NOT_USED&0xFF;
4712 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4715 for(list=0; list<h->list_count; list++){
4716 for(i=0; i<2; i++){
4717 unsigned int val;
4718 if(IS_DIR(mb_type, i, list)){
4719 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4720 mx += get_se_golomb(&s->gb);
4721 my += get_se_golomb(&s->gb);
4722 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4724 val= pack16to32(mx,my);
4725 }else
4726 val=0;
4727 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4730 }else{
4731 assert(IS_8X16(mb_type));
4732 for(list=0; list<h->list_count; list++){
4733 for(i=0; i<2; i++){
4734 unsigned int val;
4735 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4736 if(h->ref_count[list]==1){
4737 val= 0;
4738 }else if(h->ref_count[list]==2){
4739 val= get_bits1(&s->gb)^1;
4740 }else{
4741 val= get_ue_golomb_31(&s->gb);
4742 if(val >= h->ref_count[list]){
4743 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4744 return -1;
4747 }else
4748 val= LIST_NOT_USED&0xFF;
4749 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4752 for(list=0; list<h->list_count; list++){
4753 for(i=0; i<2; i++){
4754 unsigned int val;
4755 if(IS_DIR(mb_type, i, list)){
4756 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4757 mx += get_se_golomb(&s->gb);
4758 my += get_se_golomb(&s->gb);
4759 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4761 val= pack16to32(mx,my);
4762 }else
4763 val=0;
4764 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4770 if(IS_INTER(mb_type))
4771 write_back_motion(h, mb_type);
4773 if(!IS_INTRA16x16(mb_type)){
4774 cbp= get_ue_golomb(&s->gb);
4775 if(cbp > 47){
4776 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4777 return -1;
4780 if(CHROMA){
4781 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4782 else cbp= golomb_to_inter_cbp [cbp];
4783 }else{
4784 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4785 else cbp= golomb_to_inter_cbp_gray[cbp];
4788 h->cbp = cbp;
4790 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4791 if(get_bits1(&s->gb)){
4792 mb_type |= MB_TYPE_8x8DCT;
4793 h->cbp_table[mb_xy]= cbp;
4796 s->current_picture.mb_type[mb_xy]= mb_type;
4798 if(cbp || IS_INTRA16x16(mb_type)){
4799 int i8x8, i4x4, chroma_idx;
4800 int dquant;
4801 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4802 const uint8_t *scan, *scan8x8, *dc_scan;
4804 // fill_non_zero_count_cache(h);
4806 if(IS_INTERLACED(mb_type)){
4807 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4808 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4809 dc_scan= luma_dc_field_scan;
4810 }else{
4811 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4812 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4813 dc_scan= luma_dc_zigzag_scan;
4816 dquant= get_se_golomb(&s->gb);
4818 if( dquant > 25 || dquant < -26 ){
4819 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4820 return -1;
4823 s->qscale += dquant;
4824 if(((unsigned)s->qscale) > 51){
4825 if(s->qscale<0) s->qscale+= 52;
4826 else s->qscale-= 52;
4829 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4830 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4831 if(IS_INTRA16x16(mb_type)){
4832 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4833 return -1; //FIXME continue if partitioned and other return -1 too
4836 assert((cbp&15) == 0 || (cbp&15) == 15);
4838 if(cbp&15){
4839 for(i8x8=0; i8x8<4; i8x8++){
4840 for(i4x4=0; i4x4<4; i4x4++){
4841 const int index= i4x4 + 4*i8x8;
4842 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4843 return -1;
4847 }else{
4848 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4850 }else{
4851 for(i8x8=0; i8x8<4; i8x8++){
4852 if(cbp & (1<<i8x8)){
4853 if(IS_8x8DCT(mb_type)){
4854 DCTELEM *buf = &h->mb[64*i8x8];
4855 uint8_t *nnz;
4856 for(i4x4=0; i4x4<4; i4x4++){
4857 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4858 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4859 return -1;
4861 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4862 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4863 }else{
4864 for(i4x4=0; i4x4<4; i4x4++){
4865 const int index= i4x4 + 4*i8x8;
4867 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4868 return -1;
4872 }else{
4873 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4874 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4879 if(cbp&0x30){
4880 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4881 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4882 return -1;
4886 if(cbp&0x20){
4887 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4888 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4889 for(i4x4=0; i4x4<4; i4x4++){
4890 const int index= 16 + 4*chroma_idx + i4x4;
4891 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4892 return -1;
4896 }else{
4897 uint8_t * const nnz= &h->non_zero_count_cache[0];
4898 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4899 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4901 }else{
4902 uint8_t * const nnz= &h->non_zero_count_cache[0];
4903 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4904 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4905 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4907 s->current_picture.qscale_table[mb_xy]= s->qscale;
4908 write_back_non_zero_count(h);
4910 if(MB_MBAFF){
4911 h->ref_count[0] >>= 1;
4912 h->ref_count[1] >>= 1;
4915 return 0;
4918 static int decode_cabac_field_decoding_flag(H264Context *h) {
4919 MpegEncContext * const s = &h->s;
4920 const int mb_x = s->mb_x;
4921 const int mb_y = s->mb_y & ~1;
4922 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4923 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4925 unsigned int ctx = 0;
4927 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4928 ctx += 1;
4930 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4931 ctx += 1;
4934 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4937 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4938 uint8_t *state= &h->cabac_state[ctx_base];
4939 int mb_type;
4941 if(intra_slice){
4942 MpegEncContext * const s = &h->s;
4943 const int mba_xy = h->left_mb_xy[0];
4944 const int mbb_xy = h->top_mb_xy;
4945 int ctx=0;
4946 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4947 ctx++;
4948 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4949 ctx++;
4950 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4951 return 0; /* I4x4 */
4952 state += 2;
4953 }else{
4954 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4955 return 0; /* I4x4 */
4958 if( get_cabac_terminate( &h->cabac ) )
4959 return 25; /* PCM */
4961 mb_type = 1; /* I16x16 */
4962 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4963 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4964 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4965 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4966 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4967 return mb_type;
4970 static int decode_cabac_mb_type_b( H264Context *h ) {
4971 MpegEncContext * const s = &h->s;
4973 const int mba_xy = h->left_mb_xy[0];
4974 const int mbb_xy = h->top_mb_xy;
4975 int ctx = 0;
4976 int bits;
4977 assert(h->slice_type_nos == FF_B_TYPE);
4979 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4980 ctx++;
4981 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4982 ctx++;
4984 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4985 return 0; /* B_Direct_16x16 */
4987 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4988 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4991 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4992 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4993 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4994 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4995 if( bits < 8 )
4996 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4997 else if( bits == 13 ) {
4998 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4999 } else if( bits == 14 )
5000 return 11; /* B_L1_L0_8x16 */
5001 else if( bits == 15 )
5002 return 22; /* B_8x8 */
5004 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5005 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5008 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5009 MpegEncContext * const s = &h->s;
5010 int mba_xy, mbb_xy;
5011 int ctx = 0;
5013 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5014 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5015 mba_xy = mb_xy - 1;
5016 if( (mb_y&1)
5017 && h->slice_table[mba_xy] == h->slice_num
5018 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5019 mba_xy += s->mb_stride;
5020 if( MB_FIELD ){
5021 mbb_xy = mb_xy - s->mb_stride;
5022 if( !(mb_y&1)
5023 && h->slice_table[mbb_xy] == h->slice_num
5024 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5025 mbb_xy -= s->mb_stride;
5026 }else
5027 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5028 }else{
5029 int mb_xy = h->mb_xy;
5030 mba_xy = mb_xy - 1;
5031 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5034 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5035 ctx++;
5036 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5037 ctx++;
5039 if( h->slice_type_nos == FF_B_TYPE )
5040 ctx += 13;
5041 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5044 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5045 int mode = 0;
5047 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5048 return pred_mode;
5050 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5051 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5052 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5054 if( mode >= pred_mode )
5055 return mode + 1;
5056 else
5057 return mode;
5060 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5061 const int mba_xy = h->left_mb_xy[0];
5062 const int mbb_xy = h->top_mb_xy;
5064 int ctx = 0;
5066 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5067 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5068 ctx++;
5070 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5071 ctx++;
5073 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5074 return 0;
5076 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5077 return 1;
5078 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5079 return 2;
5080 else
5081 return 3;
5084 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5085 int cbp_b, cbp_a, ctx, cbp = 0;
5087 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5088 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5090 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5091 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5092 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5093 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5094 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5095 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5096 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5097 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5098 return cbp;
5100 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5101 int ctx;
5102 int cbp_a, cbp_b;
5104 cbp_a = (h->left_cbp>>4)&0x03;
5105 cbp_b = (h-> top_cbp>>4)&0x03;
5107 ctx = 0;
5108 if( cbp_a > 0 ) ctx++;
5109 if( cbp_b > 0 ) ctx += 2;
5110 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5111 return 0;
5113 ctx = 4;
5114 if( cbp_a == 2 ) ctx++;
5115 if( cbp_b == 2 ) ctx += 2;
5116 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5118 static int decode_cabac_mb_dqp( H264Context *h) {
5119 int ctx= h->last_qscale_diff != 0;
5120 int val = 0;
5122 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5123 ctx= 2+(ctx>>1);
5124 val++;
5125 if(val > 102) //prevent infinite loop
5126 return INT_MIN;
5129 if( val&0x01 )
5130 return (val + 1)>>1 ;
5131 else
5132 return -((val + 1)>>1);
5134 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5135 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5136 return 0; /* 8x8 */
5137 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5138 return 1; /* 8x4 */
5139 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5140 return 2; /* 4x8 */
5141 return 3; /* 4x4 */
5143 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5144 int type;
5145 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5146 return 0; /* B_Direct_8x8 */
5147 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5148 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5149 type = 3;
5150 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5151 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5152 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5153 type += 4;
5155 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5156 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5157 return type;
5160 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5161 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5164 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5165 int refa = h->ref_cache[list][scan8[n] - 1];
5166 int refb = h->ref_cache[list][scan8[n] - 8];
5167 int ref = 0;
5168 int ctx = 0;
5170 if( h->slice_type_nos == FF_B_TYPE) {
5171 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5172 ctx++;
5173 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5174 ctx += 2;
5175 } else {
5176 if( refa > 0 )
5177 ctx++;
5178 if( refb > 0 )
5179 ctx += 2;
5182 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5183 ref++;
5184 ctx = (ctx>>2)+4;
5185 if(ref >= 32 /*h->ref_list[list]*/){
5186 return -1;
5189 return ref;
5192 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5193 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5194 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5195 int ctxbase = (l == 0) ? 40 : 47;
5196 int mvd;
5197 int ctx = (amvd>2) + (amvd>32);
5199 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5200 return 0;
5202 mvd= 1;
5203 ctx= 3;
5204 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5205 mvd++;
5206 if( ctx < 6 )
5207 ctx++;
5210 if( mvd >= 9 ) {
5211 int k = 3;
5212 while( get_cabac_bypass( &h->cabac ) ) {
5213 mvd += 1 << k;
5214 k++;
5215 if(k>24){
5216 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5217 return INT_MIN;
5220 while( k-- ) {
5221 if( get_cabac_bypass( &h->cabac ) )
5222 mvd += 1 << k;
5225 return get_cabac_bypass_sign( &h->cabac, -mvd );
5228 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5229 int nza, nzb;
5230 int ctx = 0;
5232 if( is_dc ) {
5233 if( cat == 0 ) {
5234 nza = h->left_cbp&0x100;
5235 nzb = h-> top_cbp&0x100;
5236 } else {
5237 nza = (h->left_cbp>>(6+idx))&0x01;
5238 nzb = (h-> top_cbp>>(6+idx))&0x01;
5240 } else {
5241 assert(cat == 1 || cat == 2 || cat == 4);
5242 nza = h->non_zero_count_cache[scan8[idx] - 1];
5243 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5246 if( nza > 0 )
5247 ctx++;
5249 if( nzb > 0 )
5250 ctx += 2;
5252 return ctx + 4 * cat;
5255 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5256 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5257 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5258 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5259 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5262 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5263 static const int significant_coeff_flag_offset[2][6] = {
5264 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5265 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5267 static const int last_coeff_flag_offset[2][6] = {
5268 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5269 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5271 static const int coeff_abs_level_m1_offset[6] = {
5272 227+0, 227+10, 227+20, 227+30, 227+39, 426
5274 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5275 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5276 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5277 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5278 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5279 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5280 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5281 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5282 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5284 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5285 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5286 * map node ctx => cabac ctx for level=1 */
5287 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5288 /* map node ctx => cabac ctx for level>1 */
5289 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5290 static const uint8_t coeff_abs_level_transition[2][8] = {
5291 /* update node ctx after decoding a level=1 */
5292 { 1, 2, 3, 3, 4, 5, 6, 7 },
5293 /* update node ctx after decoding a level>1 */
5294 { 4, 4, 4, 4, 5, 6, 7, 7 }
5297 int index[64];
5299 int av_unused last;
5300 int coeff_count = 0;
5301 int node_ctx = 0;
5303 uint8_t *significant_coeff_ctx_base;
5304 uint8_t *last_coeff_ctx_base;
5305 uint8_t *abs_level_m1_ctx_base;
5307 #if !ARCH_X86
5308 #define CABAC_ON_STACK
5309 #endif
5310 #ifdef CABAC_ON_STACK
5311 #define CC &cc
5312 CABACContext cc;
5313 cc.range = h->cabac.range;
5314 cc.low = h->cabac.low;
5315 cc.bytestream= h->cabac.bytestream;
5316 #else
5317 #define CC &h->cabac
5318 #endif
5321 /* cat: 0-> DC 16x16 n = 0
5322 * 1-> AC 16x16 n = luma4x4idx
5323 * 2-> Luma4x4 n = luma4x4idx
5324 * 3-> DC Chroma n = iCbCr
5325 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5326 * 5-> Luma8x8 n = 4 * luma8x8idx
5329 /* read coded block flag */
5330 if( is_dc || cat != 5 ) {
5331 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5332 if( !is_dc )
5333 h->non_zero_count_cache[scan8[n]] = 0;
5335 #ifdef CABAC_ON_STACK
5336 h->cabac.range = cc.range ;
5337 h->cabac.low = cc.low ;
5338 h->cabac.bytestream= cc.bytestream;
5339 #endif
5340 return;
5344 significant_coeff_ctx_base = h->cabac_state
5345 + significant_coeff_flag_offset[MB_FIELD][cat];
5346 last_coeff_ctx_base = h->cabac_state
5347 + last_coeff_flag_offset[MB_FIELD][cat];
5348 abs_level_m1_ctx_base = h->cabac_state
5349 + coeff_abs_level_m1_offset[cat];
5351 if( !is_dc && cat == 5 ) {
5352 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5353 for(last= 0; last < coefs; last++) { \
5354 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5355 if( get_cabac( CC, sig_ctx )) { \
5356 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5357 index[coeff_count++] = last; \
5358 if( get_cabac( CC, last_ctx ) ) { \
5359 last= max_coeff; \
5360 break; \
5364 if( last == max_coeff -1 ) {\
5365 index[coeff_count++] = last;\
5367 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5368 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5369 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5370 } else {
5371 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5372 #else
5373 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5374 } else {
5375 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5376 #endif
5378 assert(coeff_count > 0);
5380 if( is_dc ) {
5381 if( cat == 0 )
5382 h->cbp_table[h->mb_xy] |= 0x100;
5383 else
5384 h->cbp_table[h->mb_xy] |= 0x40 << n;
5385 } else {
5386 if( cat == 5 )
5387 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5388 else {
5389 assert( cat == 1 || cat == 2 || cat == 4 );
5390 h->non_zero_count_cache[scan8[n]] = coeff_count;
5394 do {
5395 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5397 int j= scantable[index[--coeff_count]];
5399 if( get_cabac( CC, ctx ) == 0 ) {
5400 node_ctx = coeff_abs_level_transition[0][node_ctx];
5401 if( is_dc ) {
5402 block[j] = get_cabac_bypass_sign( CC, -1);
5403 }else{
5404 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5406 } else {
5407 int coeff_abs = 2;
5408 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5409 node_ctx = coeff_abs_level_transition[1][node_ctx];
5411 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5412 coeff_abs++;
5415 if( coeff_abs >= 15 ) {
5416 int j = 0;
5417 while( get_cabac_bypass( CC ) ) {
5418 j++;
5421 coeff_abs=1;
5422 while( j-- ) {
5423 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5425 coeff_abs+= 14;
5428 if( is_dc ) {
5429 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5430 }else{
5431 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5434 } while( coeff_count );
5435 #ifdef CABAC_ON_STACK
5436 h->cabac.range = cc.range ;
5437 h->cabac.low = cc.low ;
5438 h->cabac.bytestream= cc.bytestream;
5439 #endif
5443 #if !CONFIG_SMALL
5444 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5445 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5448 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5449 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5451 #endif
5453 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5454 #if CONFIG_SMALL
5455 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5456 #else
5457 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5458 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5459 #endif
5462 static inline void compute_mb_neighbors(H264Context *h)
5464 MpegEncContext * const s = &h->s;
5465 const int mb_xy = h->mb_xy;
5466 h->top_mb_xy = mb_xy - s->mb_stride;
5467 h->left_mb_xy[0] = mb_xy - 1;
5468 if(FRAME_MBAFF){
5469 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5470 const int top_pair_xy = pair_xy - s->mb_stride;
5471 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5472 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5473 const int curr_mb_field_flag = MB_FIELD;
5474 const int bottom = (s->mb_y & 1);
5476 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5477 h->top_mb_xy -= s->mb_stride;
5479 if (!left_mb_field_flag == curr_mb_field_flag) {
5480 h->left_mb_xy[0] = pair_xy - 1;
5482 } else if (FIELD_PICTURE) {
5483 h->top_mb_xy -= s->mb_stride;
5485 return;
5489 * decodes a macroblock
5490 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5492 static int decode_mb_cabac(H264Context *h) {
5493 MpegEncContext * const s = &h->s;
5494 int mb_xy;
5495 int mb_type, partition_count, cbp = 0;
5496 int dct8x8_allowed= h->pps.transform_8x8_mode;
5498 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5500 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5501 if( h->slice_type_nos != FF_I_TYPE ) {
5502 int skip;
5503 /* a skipped mb needs the aff flag from the following mb */
5504 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5505 predict_field_decoding_flag(h);
5506 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5507 skip = h->next_mb_skipped;
5508 else
5509 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5510 /* read skip flags */
5511 if( skip ) {
5512 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5513 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5514 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5515 if(!h->next_mb_skipped)
5516 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5519 decode_mb_skip(h);
5521 h->cbp_table[mb_xy] = 0;
5522 h->chroma_pred_mode_table[mb_xy] = 0;
5523 h->last_qscale_diff = 0;
5525 return 0;
5529 if(FRAME_MBAFF){
5530 if( (s->mb_y&1) == 0 )
5531 h->mb_mbaff =
5532 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5535 h->prev_mb_skipped = 0;
5537 compute_mb_neighbors(h);
5539 if( h->slice_type_nos == FF_B_TYPE ) {
5540 mb_type = decode_cabac_mb_type_b( h );
5541 if( mb_type < 23 ){
5542 partition_count= b_mb_type_info[mb_type].partition_count;
5543 mb_type= b_mb_type_info[mb_type].type;
5544 }else{
5545 mb_type -= 23;
5546 goto decode_intra_mb;
5548 } else if( h->slice_type_nos == FF_P_TYPE ) {
5549 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5550 /* P-type */
5551 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5552 /* P_L0_D16x16, P_8x8 */
5553 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5554 } else {
5555 /* P_L0_D8x16, P_L0_D16x8 */
5556 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5558 partition_count= p_mb_type_info[mb_type].partition_count;
5559 mb_type= p_mb_type_info[mb_type].type;
5560 } else {
5561 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5562 goto decode_intra_mb;
5564 } else {
5565 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5566 if(h->slice_type == FF_SI_TYPE && mb_type)
5567 mb_type--;
5568 assert(h->slice_type_nos == FF_I_TYPE);
5569 decode_intra_mb:
5570 partition_count = 0;
5571 cbp= i_mb_type_info[mb_type].cbp;
5572 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5573 mb_type= i_mb_type_info[mb_type].type;
5575 if(MB_FIELD)
5576 mb_type |= MB_TYPE_INTERLACED;
5578 h->slice_table[ mb_xy ]= h->slice_num;
5580 if(IS_INTRA_PCM(mb_type)) {
5581 const uint8_t *ptr;
5583 // We assume these blocks are very rare so we do not optimize it.
5584 // FIXME The two following lines get the bitstream position in the cabac
5585 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5586 ptr= h->cabac.bytestream;
5587 if(h->cabac.low&0x1) ptr--;
5588 if(CABAC_BITS==16){
5589 if(h->cabac.low&0x1FF) ptr--;
5592 // The pixels are stored in the same order as levels in h->mb array.
5593 memcpy(h->mb, ptr, 256); ptr+=256;
5594 if(CHROMA){
5595 memcpy(h->mb+128, ptr, 128); ptr+=128;
5598 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5600 // All blocks are present
5601 h->cbp_table[mb_xy] = 0x1ef;
5602 h->chroma_pred_mode_table[mb_xy] = 0;
5603 // In deblocking, the quantizer is 0
5604 s->current_picture.qscale_table[mb_xy]= 0;
5605 // All coeffs are present
5606 memset(h->non_zero_count[mb_xy], 16, 16);
5607 s->current_picture.mb_type[mb_xy]= mb_type;
5608 h->last_qscale_diff = 0;
5609 return 0;
5612 if(MB_MBAFF){
5613 h->ref_count[0] <<= 1;
5614 h->ref_count[1] <<= 1;
5617 fill_caches(h, mb_type, 0);
5619 if( IS_INTRA( mb_type ) ) {
5620 int i, pred_mode;
5621 if( IS_INTRA4x4( mb_type ) ) {
5622 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5623 mb_type |= MB_TYPE_8x8DCT;
5624 for( i = 0; i < 16; i+=4 ) {
5625 int pred = pred_intra_mode( h, i );
5626 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5627 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5629 } else {
5630 for( i = 0; i < 16; i++ ) {
5631 int pred = pred_intra_mode( h, i );
5632 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5634 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5637 write_back_intra_pred_mode(h);
5638 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5639 } else {
5640 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5641 if( h->intra16x16_pred_mode < 0 ) return -1;
5643 if(CHROMA){
5644 h->chroma_pred_mode_table[mb_xy] =
5645 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5647 pred_mode= check_intra_pred_mode( h, pred_mode );
5648 if( pred_mode < 0 ) return -1;
5649 h->chroma_pred_mode= pred_mode;
5651 } else if( partition_count == 4 ) {
5652 int i, j, sub_partition_count[4], list, ref[2][4];
5654 if( h->slice_type_nos == FF_B_TYPE ) {
5655 for( i = 0; i < 4; i++ ) {
5656 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5657 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5658 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5660 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5661 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5662 pred_direct_motion(h, &mb_type);
5663 h->ref_cache[0][scan8[4]] =
5664 h->ref_cache[1][scan8[4]] =
5665 h->ref_cache[0][scan8[12]] =
5666 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5667 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5668 for( i = 0; i < 4; i++ )
5669 if( IS_DIRECT(h->sub_mb_type[i]) )
5670 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5673 } else {
5674 for( i = 0; i < 4; i++ ) {
5675 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5676 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5677 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5681 for( list = 0; list < h->list_count; list++ ) {
5682 for( i = 0; i < 4; i++ ) {
5683 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5684 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5685 if( h->ref_count[list] > 1 ){
5686 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5687 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5688 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5689 return -1;
5691 }else
5692 ref[list][i] = 0;
5693 } else {
5694 ref[list][i] = -1;
5696 h->ref_cache[list][ scan8[4*i]+1 ]=
5697 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5701 if(dct8x8_allowed)
5702 dct8x8_allowed = get_dct8x8_allowed(h);
5704 for(list=0; list<h->list_count; list++){
5705 for(i=0; i<4; i++){
5706 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5707 if(IS_DIRECT(h->sub_mb_type[i])){
5708 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5709 continue;
5712 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5713 const int sub_mb_type= h->sub_mb_type[i];
5714 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5715 for(j=0; j<sub_partition_count[i]; j++){
5716 int mpx, mpy;
5717 int mx, my;
5718 const int index= 4*i + block_width*j;
5719 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5720 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5721 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5723 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5724 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5725 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5727 if(IS_SUB_8X8(sub_mb_type)){
5728 mv_cache[ 1 ][0]=
5729 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5730 mv_cache[ 1 ][1]=
5731 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5733 mvd_cache[ 1 ][0]=
5734 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5735 mvd_cache[ 1 ][1]=
5736 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5737 }else if(IS_SUB_8X4(sub_mb_type)){
5738 mv_cache[ 1 ][0]= mx;
5739 mv_cache[ 1 ][1]= my;
5741 mvd_cache[ 1 ][0]= mx - mpx;
5742 mvd_cache[ 1 ][1]= my - mpy;
5743 }else if(IS_SUB_4X8(sub_mb_type)){
5744 mv_cache[ 8 ][0]= mx;
5745 mv_cache[ 8 ][1]= my;
5747 mvd_cache[ 8 ][0]= mx - mpx;
5748 mvd_cache[ 8 ][1]= my - mpy;
5750 mv_cache[ 0 ][0]= mx;
5751 mv_cache[ 0 ][1]= my;
5753 mvd_cache[ 0 ][0]= mx - mpx;
5754 mvd_cache[ 0 ][1]= my - mpy;
5756 }else{
5757 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5758 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5759 p[0] = p[1] = p[8] = p[9] = 0;
5760 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5764 } else if( IS_DIRECT(mb_type) ) {
5765 pred_direct_motion(h, &mb_type);
5766 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5767 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5768 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5769 } else {
5770 int list, mx, my, i, mpx, mpy;
5771 if(IS_16X16(mb_type)){
5772 for(list=0; list<h->list_count; list++){
5773 if(IS_DIR(mb_type, 0, list)){
5774 int ref;
5775 if(h->ref_count[list] > 1){
5776 ref= decode_cabac_mb_ref(h, list, 0);
5777 if(ref >= (unsigned)h->ref_count[list]){
5778 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5779 return -1;
5781 }else
5782 ref=0;
5783 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5784 }else
5785 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5787 for(list=0; list<h->list_count; list++){
5788 if(IS_DIR(mb_type, 0, list)){
5789 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5791 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5792 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5793 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5795 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5796 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5797 }else
5798 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5801 else if(IS_16X8(mb_type)){
5802 for(list=0; list<h->list_count; list++){
5803 for(i=0; i<2; i++){
5804 if(IS_DIR(mb_type, i, list)){
5805 int ref;
5806 if(h->ref_count[list] > 1){
5807 ref= decode_cabac_mb_ref( h, list, 8*i );
5808 if(ref >= (unsigned)h->ref_count[list]){
5809 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5810 return -1;
5812 }else
5813 ref=0;
5814 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5815 }else
5816 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5819 for(list=0; list<h->list_count; list++){
5820 for(i=0; i<2; i++){
5821 if(IS_DIR(mb_type, i, list)){
5822 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5823 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5824 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5825 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5827 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5828 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5829 }else{
5830 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5831 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5835 }else{
5836 assert(IS_8X16(mb_type));
5837 for(list=0; list<h->list_count; list++){
5838 for(i=0; i<2; i++){
5839 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5840 int ref;
5841 if(h->ref_count[list] > 1){
5842 ref= decode_cabac_mb_ref( h, list, 4*i );
5843 if(ref >= (unsigned)h->ref_count[list]){
5844 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5845 return -1;
5847 }else
5848 ref=0;
5849 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5850 }else
5851 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5854 for(list=0; list<h->list_count; list++){
5855 for(i=0; i<2; i++){
5856 if(IS_DIR(mb_type, i, list)){
5857 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5858 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5859 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5861 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5862 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5863 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5864 }else{
5865 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5866 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5873 if( IS_INTER( mb_type ) ) {
5874 h->chroma_pred_mode_table[mb_xy] = 0;
5875 write_back_motion( h, mb_type );
5878 if( !IS_INTRA16x16( mb_type ) ) {
5879 cbp = decode_cabac_mb_cbp_luma( h );
5880 if(CHROMA)
5881 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5884 h->cbp_table[mb_xy] = h->cbp = cbp;
5886 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5887 if( decode_cabac_mb_transform_size( h ) )
5888 mb_type |= MB_TYPE_8x8DCT;
5890 s->current_picture.mb_type[mb_xy]= mb_type;
5892 if( cbp || IS_INTRA16x16( mb_type ) ) {
5893 const uint8_t *scan, *scan8x8, *dc_scan;
5894 const uint32_t *qmul;
5895 int dqp;
5897 if(IS_INTERLACED(mb_type)){
5898 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5899 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5900 dc_scan= luma_dc_field_scan;
5901 }else{
5902 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5903 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5904 dc_scan= luma_dc_zigzag_scan;
5907 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5908 if( dqp == INT_MIN ){
5909 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5910 return -1;
5912 s->qscale += dqp;
5913 if(((unsigned)s->qscale) > 51){
5914 if(s->qscale<0) s->qscale+= 52;
5915 else s->qscale-= 52;
5917 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5918 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5920 if( IS_INTRA16x16( mb_type ) ) {
5921 int i;
5922 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5923 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5925 if( cbp&15 ) {
5926 qmul = h->dequant4_coeff[0][s->qscale];
5927 for( i = 0; i < 16; i++ ) {
5928 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5929 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5931 } else {
5932 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5934 } else {
5935 int i8x8, i4x4;
5936 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5937 if( cbp & (1<<i8x8) ) {
5938 if( IS_8x8DCT(mb_type) ) {
5939 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5940 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5941 } else {
5942 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5943 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5944 const int index = 4*i8x8 + i4x4;
5945 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5946 //START_TIMER
5947 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5948 //STOP_TIMER("decode_residual")
5951 } else {
5952 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5953 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5958 if( cbp&0x30 ){
5959 int c;
5960 for( c = 0; c < 2; c++ ) {
5961 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5962 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5966 if( cbp&0x20 ) {
5967 int c, i;
5968 for( c = 0; c < 2; c++ ) {
5969 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5970 for( i = 0; i < 4; i++ ) {
5971 const int index = 16 + 4 * c + i;
5972 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5973 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5976 } else {
5977 uint8_t * const nnz= &h->non_zero_count_cache[0];
5978 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5979 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5981 } else {
5982 uint8_t * const nnz= &h->non_zero_count_cache[0];
5983 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5984 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5985 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5986 h->last_qscale_diff = 0;
5989 s->current_picture.qscale_table[mb_xy]= s->qscale;
5990 write_back_non_zero_count(h);
5992 if(MB_MBAFF){
5993 h->ref_count[0] >>= 1;
5994 h->ref_count[1] >>= 1;
5997 return 0;
6001 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6002 const int index_a = qp + h->slice_alpha_c0_offset;
6003 const int alpha = (alpha_table+52)[index_a];
6004 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6006 if( bS[0] < 4 ) {
6007 int8_t tc[4];
6008 tc[0] = (tc0_table+52)[index_a][bS[0]];
6009 tc[1] = (tc0_table+52)[index_a][bS[1]];
6010 tc[2] = (tc0_table+52)[index_a][bS[2]];
6011 tc[3] = (tc0_table+52)[index_a][bS[3]];
6012 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6013 } else {
6014 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
6017 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6018 const int index_a = qp + h->slice_alpha_c0_offset;
6019 const int alpha = (alpha_table+52)[index_a];
6020 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6022 if( bS[0] < 4 ) {
6023 int8_t tc[4];
6024 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6025 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6026 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6027 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6028 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6029 } else {
6030 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6034 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6035 int i;
6036 for( i = 0; i < 16; i++, pix += stride) {
6037 int index_a;
6038 int alpha;
6039 int beta;
6041 int qp_index;
6042 int bS_index = (i >> 1);
6043 if (!MB_FIELD) {
6044 bS_index &= ~1;
6045 bS_index |= (i & 1);
6048 if( bS[bS_index] == 0 ) {
6049 continue;
6052 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6053 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6054 alpha = (alpha_table+52)[index_a];
6055 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6057 if( bS[bS_index] < 4 ) {
6058 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
6059 const int p0 = pix[-1];
6060 const int p1 = pix[-2];
6061 const int p2 = pix[-3];
6062 const int q0 = pix[0];
6063 const int q1 = pix[1];
6064 const int q2 = pix[2];
6066 if( FFABS( p0 - q0 ) < alpha &&
6067 FFABS( p1 - p0 ) < beta &&
6068 FFABS( q1 - q0 ) < beta ) {
6069 int tc = tc0;
6070 int i_delta;
6072 if( FFABS( p2 - p0 ) < beta ) {
6073 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6074 tc++;
6076 if( FFABS( q2 - q0 ) < beta ) {
6077 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6078 tc++;
6081 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6082 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6083 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6084 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6086 }else{
6087 const int p0 = pix[-1];
6088 const int p1 = pix[-2];
6089 const int p2 = pix[-3];
6091 const int q0 = pix[0];
6092 const int q1 = pix[1];
6093 const int q2 = pix[2];
6095 if( FFABS( p0 - q0 ) < alpha &&
6096 FFABS( p1 - p0 ) < beta &&
6097 FFABS( q1 - q0 ) < beta ) {
6099 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6100 if( FFABS( p2 - p0 ) < beta)
6102 const int p3 = pix[-4];
6103 /* p0', p1', p2' */
6104 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6105 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6106 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6107 } else {
6108 /* p0' */
6109 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6111 if( FFABS( q2 - q0 ) < beta)
6113 const int q3 = pix[3];
6114 /* q0', q1', q2' */
6115 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6116 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6117 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6118 } else {
6119 /* q0' */
6120 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6122 }else{
6123 /* p0', q0' */
6124 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6125 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6127 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6132 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6133 int i;
6134 for( i = 0; i < 8; i++, pix += stride) {
6135 int index_a;
6136 int alpha;
6137 int beta;
6139 int qp_index;
6140 int bS_index = i;
6142 if( bS[bS_index] == 0 ) {
6143 continue;
6146 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6147 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6148 alpha = (alpha_table+52)[index_a];
6149 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6151 if( bS[bS_index] < 4 ) {
6152 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6153 const int p0 = pix[-1];
6154 const int p1 = pix[-2];
6155 const int q0 = pix[0];
6156 const int q1 = pix[1];
6158 if( FFABS( p0 - q0 ) < alpha &&
6159 FFABS( p1 - p0 ) < beta &&
6160 FFABS( q1 - q0 ) < beta ) {
6161 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6163 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6164 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6165 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6167 }else{
6168 const int p0 = pix[-1];
6169 const int p1 = pix[-2];
6170 const int q0 = pix[0];
6171 const int q1 = pix[1];
6173 if( FFABS( p0 - q0 ) < alpha &&
6174 FFABS( p1 - p0 ) < beta &&
6175 FFABS( q1 - q0 ) < beta ) {
6177 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6178 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6179 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6185 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6186 const int index_a = qp + h->slice_alpha_c0_offset;
6187 const int alpha = (alpha_table+52)[index_a];
6188 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6190 if( bS[0] < 4 ) {
6191 int8_t tc[4];
6192 tc[0] = (tc0_table+52)[index_a][bS[0]];
6193 tc[1] = (tc0_table+52)[index_a][bS[1]];
6194 tc[2] = (tc0_table+52)[index_a][bS[2]];
6195 tc[3] = (tc0_table+52)[index_a][bS[3]];
6196 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6197 } else {
6198 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6202 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6203 const int index_a = qp + h->slice_alpha_c0_offset;
6204 const int alpha = (alpha_table+52)[index_a];
6205 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6207 if( bS[0] < 4 ) {
6208 int8_t tc[4];
6209 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6210 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6211 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6212 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6213 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6214 } else {
6215 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6219 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6220 MpegEncContext * const s = &h->s;
6221 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6222 int mb_xy, mb_type;
6223 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6225 mb_xy = h->mb_xy;
6227 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6228 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6229 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6230 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6231 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6232 return;
6234 assert(!FRAME_MBAFF);
6236 mb_type = s->current_picture.mb_type[mb_xy];
6237 qp = s->current_picture.qscale_table[mb_xy];
6238 qp0 = s->current_picture.qscale_table[mb_xy-1];
6239 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6240 qpc = get_chroma_qp( h, 0, qp );
6241 qpc0 = get_chroma_qp( h, 0, qp0 );
6242 qpc1 = get_chroma_qp( h, 0, qp1 );
6243 qp0 = (qp + qp0 + 1) >> 1;
6244 qp1 = (qp + qp1 + 1) >> 1;
6245 qpc0 = (qpc + qpc0 + 1) >> 1;
6246 qpc1 = (qpc + qpc1 + 1) >> 1;
6247 qp_thresh = 15 - h->slice_alpha_c0_offset;
6248 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6249 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6250 return;
6252 if( IS_INTRA(mb_type) ) {
6253 int16_t bS4[4] = {4,4,4,4};
6254 int16_t bS3[4] = {3,3,3,3};
6255 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6256 if( IS_8x8DCT(mb_type) ) {
6257 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6258 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6259 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6260 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6261 } else {
6262 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6263 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6264 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6265 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6266 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6267 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6268 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6269 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6271 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6272 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6273 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6274 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6275 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6276 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6277 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6278 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6279 return;
6280 } else {
6281 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6282 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6283 int edges;
6284 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6285 edges = 4;
6286 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6287 } else {
6288 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6289 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6290 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6291 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6292 ? 3 : 0;
6293 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6294 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6295 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6296 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6298 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6299 bSv[0][0] = 0x0004000400040004ULL;
6300 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6301 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6303 #define FILTER(hv,dir,edge)\
6304 if(bSv[dir][edge]) {\
6305 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6306 if(!(edge&1)) {\
6307 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6308 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6311 if( edges == 1 ) {
6312 FILTER(v,0,0);
6313 FILTER(h,1,0);
6314 } else if( IS_8x8DCT(mb_type) ) {
6315 FILTER(v,0,0);
6316 FILTER(v,0,2);
6317 FILTER(h,1,0);
6318 FILTER(h,1,2);
6319 } else {
6320 FILTER(v,0,0);
6321 FILTER(v,0,1);
6322 FILTER(v,0,2);
6323 FILTER(v,0,3);
6324 FILTER(h,1,0);
6325 FILTER(h,1,1);
6326 FILTER(h,1,2);
6327 FILTER(h,1,3);
6329 #undef FILTER
6334 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6335 MpegEncContext * const s = &h->s;
6336 int edge;
6337 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6338 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6339 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6340 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6341 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6343 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6344 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6345 // how often to recheck mv-based bS when iterating between edges
6346 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6347 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6348 // how often to recheck mv-based bS when iterating along each edge
6349 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6351 if (first_vertical_edge_done) {
6352 start = 1;
6355 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6356 start = 1;
6358 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6359 && !IS_INTERLACED(mb_type)
6360 && IS_INTERLACED(mbm_type)
6362 // This is a special case in the norm where the filtering must
6363 // be done twice (one each of the field) even if we are in a
6364 // frame macroblock.
6366 static const int nnz_idx[4] = {4,5,6,3};
6367 unsigned int tmp_linesize = 2 * linesize;
6368 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6369 int mbn_xy = mb_xy - 2 * s->mb_stride;
6370 int qp;
6371 int i, j;
6372 int16_t bS[4];
6374 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6375 if( IS_INTRA(mb_type) ||
6376 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6377 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6378 } else {
6379 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6380 for( i = 0; i < 4; i++ ) {
6381 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6382 mbn_nnz[nnz_idx[i]] != 0 )
6383 bS[i] = 2;
6384 else
6385 bS[i] = 1;
6388 // Do not use s->qscale as luma quantizer because it has not the same
6389 // value in IPCM macroblocks.
6390 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6391 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6392 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6393 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6394 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6395 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6396 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6397 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6400 start = 1;
6403 /* Calculate bS */
6404 for( edge = start; edge < edges; edge++ ) {
6405 /* mbn_xy: neighbor macroblock */
6406 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6407 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6408 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6409 int16_t bS[4];
6410 int qp;
6412 if( (edge&1) && IS_8x8DCT(mb_type) )
6413 continue;
6415 if( IS_INTRA(mb_type) ||
6416 IS_INTRA(mbn_type) ) {
6417 int value;
6418 if (edge == 0) {
6419 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6420 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6422 value = 4;
6423 } else {
6424 value = 3;
6426 } else {
6427 value = 3;
6429 bS[0] = bS[1] = bS[2] = bS[3] = value;
6430 } else {
6431 int i, l;
6432 int mv_done;
6434 if( edge & mask_edge ) {
6435 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6436 mv_done = 1;
6438 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6439 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6440 mv_done = 1;
6442 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6443 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6444 int bn_idx= b_idx - (dir ? 8:1);
6445 int v = 0;
6447 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6448 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6449 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6450 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6453 if(h->slice_type_nos == FF_B_TYPE && v){
6454 v=0;
6455 for( l = 0; !v && l < 2; l++ ) {
6456 int ln= 1-l;
6457 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6458 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6459 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6463 bS[0] = bS[1] = bS[2] = bS[3] = v;
6464 mv_done = 1;
6466 else
6467 mv_done = 0;
6469 for( i = 0; i < 4; i++ ) {
6470 int x = dir == 0 ? edge : i;
6471 int y = dir == 0 ? i : edge;
6472 int b_idx= 8 + 4 + x + 8*y;
6473 int bn_idx= b_idx - (dir ? 8:1);
6475 if( h->non_zero_count_cache[b_idx] |
6476 h->non_zero_count_cache[bn_idx] ) {
6477 bS[i] = 2;
6479 else if(!mv_done)
6481 bS[i] = 0;
6482 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6483 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6484 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6485 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6486 bS[i] = 1;
6487 break;
6491 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6492 bS[i] = 0;
6493 for( l = 0; l < 2; l++ ) {
6494 int ln= 1-l;
6495 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6496 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6497 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6498 bS[i] = 1;
6499 break;
6506 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6507 continue;
6510 /* Filter edge */
6511 // Do not use s->qscale as luma quantizer because it has not the same
6512 // value in IPCM macroblocks.
6513 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6514 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6515 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6516 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6517 if( dir == 0 ) {
6518 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6519 if( (edge&1) == 0 ) {
6520 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6521 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6522 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6523 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6525 } else {
6526 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6527 if( (edge&1) == 0 ) {
6528 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6529 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6530 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6531 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6537 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6538 MpegEncContext * const s = &h->s;
6539 const int mb_xy= mb_x + mb_y*s->mb_stride;
6540 const int mb_type = s->current_picture.mb_type[mb_xy];
6541 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6542 int first_vertical_edge_done = 0;
6543 av_unused int dir;
6545 //for sufficiently low qp, filtering wouldn't do anything
6546 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6547 if(!FRAME_MBAFF){
6548 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6549 int qp = s->current_picture.qscale_table[mb_xy];
6550 if(qp <= qp_thresh
6551 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6552 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6553 return;
6557 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6558 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6559 int top_type, left_type[2];
6560 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6561 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6562 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6564 if(IS_8x8DCT(top_type)){
6565 h->non_zero_count_cache[4+8*0]=
6566 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6567 h->non_zero_count_cache[6+8*0]=
6568 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6570 if(IS_8x8DCT(left_type[0])){
6571 h->non_zero_count_cache[3+8*1]=
6572 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6574 if(IS_8x8DCT(left_type[1])){
6575 h->non_zero_count_cache[3+8*3]=
6576 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6579 if(IS_8x8DCT(mb_type)){
6580 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6581 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6583 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6584 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6586 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6587 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6589 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6590 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6594 if (FRAME_MBAFF
6595 // left mb is in picture
6596 && h->slice_table[mb_xy-1] != 0xFFFF
6597 // and current and left pair do not have the same interlaced type
6598 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6599 // and left mb is in the same slice if deblocking_filter == 2
6600 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6601 /* First vertical edge is different in MBAFF frames
6602 * There are 8 different bS to compute and 2 different Qp
6604 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6605 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6606 int16_t bS[8];
6607 int qp[2];
6608 int bqp[2];
6609 int rqp[2];
6610 int mb_qp, mbn0_qp, mbn1_qp;
6611 int i;
6612 first_vertical_edge_done = 1;
6614 if( IS_INTRA(mb_type) )
6615 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6616 else {
6617 for( i = 0; i < 8; i++ ) {
6618 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6620 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6621 bS[i] = 4;
6622 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6623 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6624 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6626 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6627 bS[i] = 2;
6628 else
6629 bS[i] = 1;
6633 mb_qp = s->current_picture.qscale_table[mb_xy];
6634 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6635 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6636 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6637 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6638 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6639 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6640 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6641 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6642 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6643 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6644 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6645 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6647 /* Filter edge */
6648 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6649 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6650 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6651 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6652 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6655 #if CONFIG_SMALL
6656 for( dir = 0; dir < 2; dir++ )
6657 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6658 #else
6659 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6660 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6661 #endif
6664 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6665 H264Context *h = *(void**)arg;
6666 MpegEncContext * const s = &h->s;
6667 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6669 s->mb_skip_run= -1;
6671 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6672 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6674 if( h->pps.cabac ) {
6675 int i;
6677 /* realign */
6678 align_get_bits( &s->gb );
6680 /* init cabac */
6681 ff_init_cabac_states( &h->cabac);
6682 ff_init_cabac_decoder( &h->cabac,
6683 s->gb.buffer + get_bits_count(&s->gb)/8,
6684 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6685 /* calculate pre-state */
6686 for( i= 0; i < 460; i++ ) {
6687 int pre;
6688 if( h->slice_type_nos == FF_I_TYPE )
6689 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6690 else
6691 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6693 if( pre <= 63 )
6694 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6695 else
6696 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6699 for(;;){
6700 //START_TIMER
6701 int ret = decode_mb_cabac(h);
6702 int eos;
6703 //STOP_TIMER("decode_mb_cabac")
6705 if(ret>=0) hl_decode_mb(h);
6707 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6708 s->mb_y++;
6710 ret = decode_mb_cabac(h);
6712 if(ret>=0) hl_decode_mb(h);
6713 s->mb_y--;
6715 eos = get_cabac_terminate( &h->cabac );
6717 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6718 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6719 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6720 return -1;
6723 if( ++s->mb_x >= s->mb_width ) {
6724 s->mb_x = 0;
6725 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6726 ++s->mb_y;
6727 if(FIELD_OR_MBAFF_PICTURE) {
6728 ++s->mb_y;
6732 if( eos || s->mb_y >= s->mb_height ) {
6733 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6734 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6735 return 0;
6739 } else {
6740 for(;;){
6741 int ret = decode_mb_cavlc(h);
6743 if(ret>=0) hl_decode_mb(h);
6745 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6746 s->mb_y++;
6747 ret = decode_mb_cavlc(h);
6749 if(ret>=0) hl_decode_mb(h);
6750 s->mb_y--;
6753 if(ret<0){
6754 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6755 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6757 return -1;
6760 if(++s->mb_x >= s->mb_width){
6761 s->mb_x=0;
6762 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6763 ++s->mb_y;
6764 if(FIELD_OR_MBAFF_PICTURE) {
6765 ++s->mb_y;
6767 if(s->mb_y >= s->mb_height){
6768 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6770 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6771 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6773 return 0;
6774 }else{
6775 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6777 return -1;
6782 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6783 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6784 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6785 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6787 return 0;
6788 }else{
6789 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6791 return -1;
6797 #if 0
6798 for(;s->mb_y < s->mb_height; s->mb_y++){
6799 for(;s->mb_x < s->mb_width; s->mb_x++){
6800 int ret= decode_mb(h);
6802 hl_decode_mb(h);
6804 if(ret<0){
6805 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6806 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6808 return -1;
6811 if(++s->mb_x >= s->mb_width){
6812 s->mb_x=0;
6813 if(++s->mb_y >= s->mb_height){
6814 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6815 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6817 return 0;
6818 }else{
6819 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6821 return -1;
6826 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6827 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6828 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6830 return 0;
6831 }else{
6832 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6834 return -1;
6838 s->mb_x=0;
6839 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6841 #endif
6842 return -1; //not reached
6845 static int decode_picture_timing(H264Context *h){
6846 MpegEncContext * const s = &h->s;
6847 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6848 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6849 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6851 if(h->sps.pic_struct_present_flag){
6852 unsigned int i, num_clock_ts;
6853 h->sei_pic_struct = get_bits(&s->gb, 4);
6854 h->sei_ct_type = 0;
6856 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6857 return -1;
6859 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6861 for (i = 0 ; i < num_clock_ts ; i++){
6862 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6863 unsigned int full_timestamp_flag;
6864 h->sei_ct_type |= 1<<get_bits(&s->gb, 2);
6865 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6866 skip_bits(&s->gb, 5); /* counting_type */
6867 full_timestamp_flag = get_bits(&s->gb, 1);
6868 skip_bits(&s->gb, 1); /* discontinuity_flag */
6869 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6870 skip_bits(&s->gb, 8); /* n_frames */
6871 if(full_timestamp_flag){
6872 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6873 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6874 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6875 }else{
6876 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6877 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6878 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6879 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6880 if(get_bits(&s->gb, 1)) /* hours_flag */
6881 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6885 if(h->sps.time_offset_length > 0)
6886 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6890 return 0;
6893 static int decode_unregistered_user_data(H264Context *h, int size){
6894 MpegEncContext * const s = &h->s;
6895 uint8_t user_data[16+256];
6896 int e, build, i;
6898 if(size<16)
6899 return -1;
6901 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6902 user_data[i]= get_bits(&s->gb, 8);
6905 user_data[i]= 0;
6906 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6907 if(e==1 && build>=0)
6908 h->x264_build= build;
6910 if(s->avctx->debug & FF_DEBUG_BUGS)
6911 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6913 for(; i<size; i++)
6914 skip_bits(&s->gb, 8);
6916 return 0;
6919 static int decode_recovery_point(H264Context *h){
6920 MpegEncContext * const s = &h->s;
6922 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6923 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6925 return 0;
6928 static int decode_buffering_period(H264Context *h){
6929 MpegEncContext * const s = &h->s;
6930 unsigned int sps_id;
6931 int sched_sel_idx;
6932 SPS *sps;
6934 sps_id = get_ue_golomb_31(&s->gb);
6935 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6936 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6937 return -1;
6939 sps = h->sps_buffers[sps_id];
6941 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6942 if (sps->nal_hrd_parameters_present_flag) {
6943 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6944 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6945 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6948 if (sps->vcl_hrd_parameters_present_flag) {
6949 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6950 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6951 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6955 h->sei_buffering_period_present = 1;
6956 return 0;
6959 int ff_h264_decode_sei(H264Context *h){
6960 MpegEncContext * const s = &h->s;
6962 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6963 int size, type;
6965 type=0;
6967 type+= show_bits(&s->gb, 8);
6968 }while(get_bits(&s->gb, 8) == 255);
6970 size=0;
6972 size+= show_bits(&s->gb, 8);
6973 }while(get_bits(&s->gb, 8) == 255);
6975 switch(type){
6976 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6977 if(decode_picture_timing(h) < 0)
6978 return -1;
6979 break;
6980 case SEI_TYPE_USER_DATA_UNREGISTERED:
6981 if(decode_unregistered_user_data(h, size) < 0)
6982 return -1;
6983 break;
6984 case SEI_TYPE_RECOVERY_POINT:
6985 if(decode_recovery_point(h) < 0)
6986 return -1;
6987 break;
6988 case SEI_BUFFERING_PERIOD:
6989 if(decode_buffering_period(h) < 0)
6990 return -1;
6991 break;
6992 default:
6993 skip_bits(&s->gb, 8*size);
6996 //FIXME check bits here
6997 align_get_bits(&s->gb);
7000 return 0;
7003 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
7004 MpegEncContext * const s = &h->s;
7005 int cpb_count, i;
7006 cpb_count = get_ue_golomb_31(&s->gb) + 1;
7008 if(cpb_count > 32U){
7009 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
7010 return -1;
7013 get_bits(&s->gb, 4); /* bit_rate_scale */
7014 get_bits(&s->gb, 4); /* cpb_size_scale */
7015 for(i=0; i<cpb_count; i++){
7016 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7017 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7018 get_bits1(&s->gb); /* cbr_flag */
7020 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7021 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7022 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
7023 sps->time_offset_length = get_bits(&s->gb, 5);
7024 sps->cpb_cnt = cpb_count;
7025 return 0;
7028 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7029 MpegEncContext * const s = &h->s;
7030 int aspect_ratio_info_present_flag;
7031 unsigned int aspect_ratio_idc;
7033 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7035 if( aspect_ratio_info_present_flag ) {
7036 aspect_ratio_idc= get_bits(&s->gb, 8);
7037 if( aspect_ratio_idc == EXTENDED_SAR ) {
7038 sps->sar.num= get_bits(&s->gb, 16);
7039 sps->sar.den= get_bits(&s->gb, 16);
7040 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
7041 sps->sar= pixel_aspect[aspect_ratio_idc];
7042 }else{
7043 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7044 return -1;
7046 }else{
7047 sps->sar.num=
7048 sps->sar.den= 0;
7050 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7052 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7053 get_bits1(&s->gb); /* overscan_appropriate_flag */
7056 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7057 get_bits(&s->gb, 3); /* video_format */
7058 get_bits1(&s->gb); /* video_full_range_flag */
7059 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7060 get_bits(&s->gb, 8); /* colour_primaries */
7061 get_bits(&s->gb, 8); /* transfer_characteristics */
7062 get_bits(&s->gb, 8); /* matrix_coefficients */
7066 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7067 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7068 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7071 sps->timing_info_present_flag = get_bits1(&s->gb);
7072 if(sps->timing_info_present_flag){
7073 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7074 sps->time_scale = get_bits_long(&s->gb, 32);
7075 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7078 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7079 if(sps->nal_hrd_parameters_present_flag)
7080 if(decode_hrd_parameters(h, sps) < 0)
7081 return -1;
7082 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7083 if(sps->vcl_hrd_parameters_present_flag)
7084 if(decode_hrd_parameters(h, sps) < 0)
7085 return -1;
7086 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7087 get_bits1(&s->gb); /* low_delay_hrd_flag */
7088 sps->pic_struct_present_flag = get_bits1(&s->gb);
7090 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7091 if(sps->bitstream_restriction_flag){
7092 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7093 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7094 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7095 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7096 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7097 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7098 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7100 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7101 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7102 return -1;
7106 return 0;
7109 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7110 const uint8_t *jvt_list, const uint8_t *fallback_list){
7111 MpegEncContext * const s = &h->s;
7112 int i, last = 8, next = 8;
7113 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7114 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7115 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7116 else
7117 for(i=0;i<size;i++){
7118 if(next)
7119 next = (last + get_se_golomb(&s->gb)) & 0xff;
7120 if(!i && !next){ /* matrix not written, we use the preset one */
7121 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7122 break;
7124 last = factors[scan[i]] = next ? next : last;
7128 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7129 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7130 MpegEncContext * const s = &h->s;
7131 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7132 const uint8_t *fallback[4] = {
7133 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7134 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7135 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7136 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7138 if(get_bits1(&s->gb)){
7139 sps->scaling_matrix_present |= is_sps;
7140 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7141 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7142 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7143 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7144 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7145 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7146 if(is_sps || pps->transform_8x8_mode){
7147 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7148 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7153 int ff_h264_decode_seq_parameter_set(H264Context *h){
7154 MpegEncContext * const s = &h->s;
7155 int profile_idc, level_idc;
7156 unsigned int sps_id;
7157 int i;
7158 SPS *sps;
7160 profile_idc= get_bits(&s->gb, 8);
7161 get_bits1(&s->gb); //constraint_set0_flag
7162 get_bits1(&s->gb); //constraint_set1_flag
7163 get_bits1(&s->gb); //constraint_set2_flag
7164 get_bits1(&s->gb); //constraint_set3_flag
7165 get_bits(&s->gb, 4); // reserved
7166 level_idc= get_bits(&s->gb, 8);
7167 sps_id= get_ue_golomb_31(&s->gb);
7169 if(sps_id >= MAX_SPS_COUNT) {
7170 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7171 return -1;
7173 sps= av_mallocz(sizeof(SPS));
7174 if(sps == NULL)
7175 return -1;
7177 sps->profile_idc= profile_idc;
7178 sps->level_idc= level_idc;
7180 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7181 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7182 sps->scaling_matrix_present = 0;
7184 if(sps->profile_idc >= 100){ //high profile
7185 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7186 if(sps->chroma_format_idc == 3)
7187 sps->residual_color_transform_flag = get_bits1(&s->gb);
7188 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7189 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7190 sps->transform_bypass = get_bits1(&s->gb);
7191 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7192 }else{
7193 sps->chroma_format_idc= 1;
7196 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7197 sps->poc_type= get_ue_golomb_31(&s->gb);
7199 if(sps->poc_type == 0){ //FIXME #define
7200 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7201 } else if(sps->poc_type == 1){//FIXME #define
7202 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7203 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7204 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7205 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7207 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7208 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7209 goto fail;
7212 for(i=0; i<sps->poc_cycle_length; i++)
7213 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7214 }else if(sps->poc_type != 2){
7215 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7216 goto fail;
7219 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7220 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7221 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7222 goto fail;
7224 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7225 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7226 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7227 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7228 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7229 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7230 goto fail;
7233 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7234 if(!sps->frame_mbs_only_flag)
7235 sps->mb_aff= get_bits1(&s->gb);
7236 else
7237 sps->mb_aff= 0;
7239 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7241 #ifndef ALLOW_INTERLACE
7242 if(sps->mb_aff)
7243 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7244 #endif
7245 sps->crop= get_bits1(&s->gb);
7246 if(sps->crop){
7247 sps->crop_left = get_ue_golomb(&s->gb);
7248 sps->crop_right = get_ue_golomb(&s->gb);
7249 sps->crop_top = get_ue_golomb(&s->gb);
7250 sps->crop_bottom= get_ue_golomb(&s->gb);
7251 if(sps->crop_left || sps->crop_top){
7252 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7254 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7255 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7257 }else{
7258 sps->crop_left =
7259 sps->crop_right =
7260 sps->crop_top =
7261 sps->crop_bottom= 0;
7264 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7265 if( sps->vui_parameters_present_flag )
7266 decode_vui_parameters(h, sps);
7268 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7269 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s %d/%d\n",
7270 sps_id, sps->profile_idc, sps->level_idc,
7271 sps->poc_type,
7272 sps->ref_frame_count,
7273 sps->mb_width, sps->mb_height,
7274 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7275 sps->direct_8x8_inference_flag ? "8B8" : "",
7276 sps->crop_left, sps->crop_right,
7277 sps->crop_top, sps->crop_bottom,
7278 sps->vui_parameters_present_flag ? "VUI" : "",
7279 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc],
7280 sps->timing_info_present_flag ? sps->num_units_in_tick : 0,
7281 sps->timing_info_present_flag ? sps->time_scale : 0
7285 av_free(h->sps_buffers[sps_id]);
7286 h->sps_buffers[sps_id]= sps;
7287 h->sps = *sps;
7288 return 0;
7289 fail:
7290 av_free(sps);
7291 return -1;
7294 static void
7295 build_qp_table(PPS *pps, int t, int index)
7297 int i;
7298 for(i = 0; i < 52; i++)
7299 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7302 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7303 MpegEncContext * const s = &h->s;
7304 unsigned int pps_id= get_ue_golomb(&s->gb);
7305 PPS *pps;
7307 if(pps_id >= MAX_PPS_COUNT) {
7308 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7309 return -1;
7312 pps= av_mallocz(sizeof(PPS));
7313 if(pps == NULL)
7314 return -1;
7315 pps->sps_id= get_ue_golomb_31(&s->gb);
7316 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7317 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7318 goto fail;
7321 pps->cabac= get_bits1(&s->gb);
7322 pps->pic_order_present= get_bits1(&s->gb);
7323 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7324 if(pps->slice_group_count > 1 ){
7325 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7326 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7327 switch(pps->mb_slice_group_map_type){
7328 case 0:
7329 #if 0
7330 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7331 | run_length[ i ] |1 |ue(v) |
7332 #endif
7333 break;
7334 case 2:
7335 #if 0
7336 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7337 |{ | | |
7338 | top_left_mb[ i ] |1 |ue(v) |
7339 | bottom_right_mb[ i ] |1 |ue(v) |
7340 | } | | |
7341 #endif
7342 break;
7343 case 3:
7344 case 4:
7345 case 5:
7346 #if 0
7347 | slice_group_change_direction_flag |1 |u(1) |
7348 | slice_group_change_rate_minus1 |1 |ue(v) |
7349 #endif
7350 break;
7351 case 6:
7352 #if 0
7353 | slice_group_id_cnt_minus1 |1 |ue(v) |
7354 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7355 |) | | |
7356 | slice_group_id[ i ] |1 |u(v) |
7357 #endif
7358 break;
7361 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7362 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7363 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7364 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7365 goto fail;
7368 pps->weighted_pred= get_bits1(&s->gb);
7369 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7370 pps->init_qp= get_se_golomb(&s->gb) + 26;
7371 pps->init_qs= get_se_golomb(&s->gb) + 26;
7372 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7373 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7374 pps->constrained_intra_pred= get_bits1(&s->gb);
7375 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7377 pps->transform_8x8_mode= 0;
7378 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7379 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7380 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7382 if(get_bits_count(&s->gb) < bit_length){
7383 pps->transform_8x8_mode= get_bits1(&s->gb);
7384 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7385 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7386 } else {
7387 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7390 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7391 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7392 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7393 h->pps.chroma_qp_diff= 1;
7395 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7396 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7397 pps_id, pps->sps_id,
7398 pps->cabac ? "CABAC" : "CAVLC",
7399 pps->slice_group_count,
7400 pps->ref_count[0], pps->ref_count[1],
7401 pps->weighted_pred ? "weighted" : "",
7402 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7403 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7404 pps->constrained_intra_pred ? "CONSTR" : "",
7405 pps->redundant_pic_cnt_present ? "REDU" : "",
7406 pps->transform_8x8_mode ? "8x8DCT" : ""
7410 av_free(h->pps_buffers[pps_id]);
7411 h->pps_buffers[pps_id]= pps;
7412 return 0;
7413 fail:
7414 av_free(pps);
7415 return -1;
7419 * Call decode_slice() for each context.
7421 * @param h h264 master context
7422 * @param context_count number of contexts to execute
7424 static void execute_decode_slices(H264Context *h, int context_count){
7425 MpegEncContext * const s = &h->s;
7426 AVCodecContext * const avctx= s->avctx;
7427 H264Context *hx;
7428 int i;
7430 if (s->avctx->hwaccel)
7431 return;
7432 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7433 return;
7434 if(context_count == 1) {
7435 decode_slice(avctx, &h);
7436 } else {
7437 for(i = 1; i < context_count; i++) {
7438 hx = h->thread_context[i];
7439 hx->s.error_recognition = avctx->error_recognition;
7440 hx->s.error_count = 0;
7443 avctx->execute(avctx, (void *)decode_slice,
7444 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7446 /* pull back stuff from slices to master context */
7447 hx = h->thread_context[context_count - 1];
7448 s->mb_x = hx->s.mb_x;
7449 s->mb_y = hx->s.mb_y;
7450 s->dropable = hx->s.dropable;
7451 s->picture_structure = hx->s.picture_structure;
7452 for(i = 1; i < context_count; i++)
7453 h->s.error_count += h->thread_context[i]->s.error_count;
7458 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7459 MpegEncContext * const s = &h->s;
7460 AVCodecContext * const avctx= s->avctx;
7461 int buf_index=0;
7462 H264Context *hx; ///< thread context
7463 int context_count = 0;
7464 int next_avc= h->is_avc ? 0 : buf_size;
7466 h->max_contexts = avctx->thread_count;
7467 #if 0
7468 int i;
7469 for(i=0; i<50; i++){
7470 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7472 #endif
7473 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7474 h->current_slice = 0;
7475 if (!s->first_field)
7476 s->current_picture_ptr= NULL;
7477 reset_sei(h);
7480 for(;;){
7481 int consumed;
7482 int dst_length;
7483 int bit_length;
7484 const uint8_t *ptr;
7485 int i, nalsize = 0;
7486 int err;
7488 if(buf_index >= next_avc) {
7489 if(buf_index >= buf_size) break;
7490 nalsize = 0;
7491 for(i = 0; i < h->nal_length_size; i++)
7492 nalsize = (nalsize << 8) | buf[buf_index++];
7493 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7494 if(nalsize == 1){
7495 buf_index++;
7496 continue;
7497 }else{
7498 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7499 break;
7502 next_avc= buf_index + nalsize;
7503 } else {
7504 // start code prefix search
7505 for(; buf_index + 3 < buf_size; buf_index++){
7506 // This should always succeed in the first iteration.
7507 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7508 break;
7511 if(buf_index+3 >= buf_size) break;
7513 buf_index+=3;
7516 hx = h->thread_context[context_count];
7518 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
7519 if (ptr==NULL || dst_length < 0){
7520 return -1;
7522 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7523 dst_length--;
7524 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7526 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7527 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7530 if (h->is_avc && (nalsize != consumed) && nalsize){
7531 int i, debug_level = AV_LOG_DEBUG;
7532 for (i = consumed; i < nalsize; i++)
7533 if (buf[buf_index+i])
7534 debug_level = AV_LOG_ERROR;
7535 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7538 buf_index += consumed;
7540 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7541 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7542 continue;
7544 again:
7545 err = 0;
7546 switch(hx->nal_unit_type){
7547 case NAL_IDR_SLICE:
7548 if (h->nal_unit_type != NAL_IDR_SLICE) {
7549 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7550 return -1;
7552 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7553 case NAL_SLICE:
7554 init_get_bits(&hx->s.gb, ptr, bit_length);
7555 hx->intra_gb_ptr=
7556 hx->inter_gb_ptr= &hx->s.gb;
7557 hx->s.data_partitioning = 0;
7559 if((err = decode_slice_header(hx, h)))
7560 break;
7562 if (s->avctx->hwaccel && h->current_slice == 1) {
7563 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
7564 return -1;
7567 s->current_picture_ptr->key_frame |=
7568 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7569 (h->sei_recovery_frame_cnt >= 0);
7570 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7571 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7572 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7573 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7574 && avctx->skip_frame < AVDISCARD_ALL){
7575 if(avctx->hwaccel) {
7576 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
7577 return -1;
7578 }else
7579 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7580 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7581 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7582 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7583 }else
7584 context_count++;
7586 break;
7587 case NAL_DPA:
7588 init_get_bits(&hx->s.gb, ptr, bit_length);
7589 hx->intra_gb_ptr=
7590 hx->inter_gb_ptr= NULL;
7591 hx->s.data_partitioning = 1;
7593 err = decode_slice_header(hx, h);
7594 break;
7595 case NAL_DPB:
7596 init_get_bits(&hx->intra_gb, ptr, bit_length);
7597 hx->intra_gb_ptr= &hx->intra_gb;
7598 break;
7599 case NAL_DPC:
7600 init_get_bits(&hx->inter_gb, ptr, bit_length);
7601 hx->inter_gb_ptr= &hx->inter_gb;
7603 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7604 && s->context_initialized
7605 && s->hurry_up < 5
7606 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7607 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7608 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7609 && avctx->skip_frame < AVDISCARD_ALL)
7610 context_count++;
7611 break;
7612 case NAL_SEI:
7613 init_get_bits(&s->gb, ptr, bit_length);
7614 ff_h264_decode_sei(h);
7615 break;
7616 case NAL_SPS:
7617 init_get_bits(&s->gb, ptr, bit_length);
7618 ff_h264_decode_seq_parameter_set(h);
7620 if(s->flags& CODEC_FLAG_LOW_DELAY)
7621 s->low_delay=1;
7623 if(avctx->has_b_frames < 2)
7624 avctx->has_b_frames= !s->low_delay;
7625 break;
7626 case NAL_PPS:
7627 init_get_bits(&s->gb, ptr, bit_length);
7629 ff_h264_decode_picture_parameter_set(h, bit_length);
7631 break;
7632 case NAL_AUD:
7633 case NAL_END_SEQUENCE:
7634 case NAL_END_STREAM:
7635 case NAL_FILLER_DATA:
7636 case NAL_SPS_EXT:
7637 case NAL_AUXILIARY_SLICE:
7638 break;
7639 default:
7640 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7643 if(context_count == h->max_contexts) {
7644 execute_decode_slices(h, context_count);
7645 context_count = 0;
7648 if (err < 0)
7649 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7650 else if(err == 1) {
7651 /* Slice could not be decoded in parallel mode, copy down
7652 * NAL unit stuff to context 0 and restart. Note that
7653 * rbsp_buffer is not transferred, but since we no longer
7654 * run in parallel mode this should not be an issue. */
7655 h->nal_unit_type = hx->nal_unit_type;
7656 h->nal_ref_idc = hx->nal_ref_idc;
7657 hx = h;
7658 goto again;
7661 if(context_count)
7662 execute_decode_slices(h, context_count);
7663 return buf_index;
7667 * returns the number of bytes consumed for building the current frame
7669 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7670 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7671 if(pos+10>buf_size) pos=buf_size; // oops ;)
7673 return pos;
7676 static int decode_frame(AVCodecContext *avctx,
7677 void *data, int *data_size,
7678 AVPacket *avpkt)
7680 const uint8_t *buf = avpkt->data;
7681 int buf_size = avpkt->size;
7682 H264Context *h = avctx->priv_data;
7683 MpegEncContext *s = &h->s;
7684 AVFrame *pict = data;
7685 int buf_index;
7687 s->flags= avctx->flags;
7688 s->flags2= avctx->flags2;
7690 /* end of stream, output what is still in the buffers */
7691 if (buf_size == 0) {
7692 Picture *out;
7693 int i, out_idx;
7695 //FIXME factorize this with the output code below
7696 out = h->delayed_pic[0];
7697 out_idx = 0;
7698 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7699 if(h->delayed_pic[i]->poc < out->poc){
7700 out = h->delayed_pic[i];
7701 out_idx = i;
7704 for(i=out_idx; h->delayed_pic[i]; i++)
7705 h->delayed_pic[i] = h->delayed_pic[i+1];
7707 if(out){
7708 *data_size = sizeof(AVFrame);
7709 *pict= *(AVFrame*)out;
7712 return 0;
7715 if(h->is_avc && !h->got_avcC) {
7716 int i, cnt, nalsize;
7717 unsigned char *p = avctx->extradata;
7718 if(avctx->extradata_size < 7) {
7719 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7720 return -1;
7722 if(*p != 1) {
7723 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7724 return -1;
7726 /* sps and pps in the avcC always have length coded with 2 bytes,
7727 so put a fake nal_length_size = 2 while parsing them */
7728 h->nal_length_size = 2;
7729 // Decode sps from avcC
7730 cnt = *(p+5) & 0x1f; // Number of sps
7731 p += 6;
7732 for (i = 0; i < cnt; i++) {
7733 nalsize = AV_RB16(p) + 2;
7734 if(decode_nal_units(h, p, nalsize) < 0) {
7735 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7736 return -1;
7738 p += nalsize;
7740 // Decode pps from avcC
7741 cnt = *(p++); // Number of pps
7742 for (i = 0; i < cnt; i++) {
7743 nalsize = AV_RB16(p) + 2;
7744 if(decode_nal_units(h, p, nalsize) != nalsize) {
7745 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7746 return -1;
7748 p += nalsize;
7750 // Now store right nal length size, that will be use to parse all other nals
7751 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7752 // Do not reparse avcC
7753 h->got_avcC = 1;
7756 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7757 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7758 return -1;
7759 h->got_avcC = 1;
7762 buf_index=decode_nal_units(h, buf, buf_size);
7763 if(buf_index < 0)
7764 return -1;
7766 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7767 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7768 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7769 return -1;
7772 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7773 Picture *out = s->current_picture_ptr;
7774 Picture *cur = s->current_picture_ptr;
7775 int i, pics, cross_idr, out_of_order, out_idx;
7777 field_end(h);
7779 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7780 /* Wait for second field. */
7781 *data_size = 0;
7783 } else {
7784 cur->repeat_pict = 0;
7786 /* Signal interlacing information externally. */
7787 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7788 if (h->sei_ct_type)
7789 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
7790 else
7791 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7793 if(h->sps.pic_struct_present_flag){
7794 switch (h->sei_pic_struct)
7796 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7797 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7798 // Signal the possibility of telecined film externally (pic_struct 5,6)
7799 // From these hints, let the applications decide if they apply deinterlacing.
7800 cur->repeat_pict = 1;
7801 break;
7802 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7803 // Force progressive here, as doubling interlaced frame is a bad idea.
7804 cur->interlaced_frame = 0;
7805 cur->repeat_pict = 2;
7806 break;
7807 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7808 cur->interlaced_frame = 0;
7809 cur->repeat_pict = 4;
7810 break;
7812 }else{
7813 /* Derive interlacing flag from used decoding process. */
7814 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7817 if (cur->field_poc[0] != cur->field_poc[1]){
7818 /* Derive top_field_first from field pocs. */
7819 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7820 }else{
7821 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7822 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7823 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7824 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7825 cur->top_field_first = 1;
7826 else
7827 cur->top_field_first = 0;
7828 }else{
7829 /* Most likely progressive */
7830 cur->top_field_first = 0;
7834 //FIXME do something with unavailable reference frames
7836 /* Sort B-frames into display order */
7838 if(h->sps.bitstream_restriction_flag
7839 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7840 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7841 s->low_delay = 0;
7844 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7845 && !h->sps.bitstream_restriction_flag){
7846 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7847 s->low_delay= 0;
7850 pics = 0;
7851 while(h->delayed_pic[pics]) pics++;
7853 assert(pics <= MAX_DELAYED_PIC_COUNT);
7855 h->delayed_pic[pics++] = cur;
7856 if(cur->reference == 0)
7857 cur->reference = DELAYED_PIC_REF;
7859 out = h->delayed_pic[0];
7860 out_idx = 0;
7861 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7862 if(h->delayed_pic[i]->poc < out->poc){
7863 out = h->delayed_pic[i];
7864 out_idx = i;
7866 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7868 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7870 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7872 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7873 || (s->low_delay &&
7874 ((!cross_idr && out->poc > h->outputed_poc + 2)
7875 || cur->pict_type == FF_B_TYPE)))
7877 s->low_delay = 0;
7878 s->avctx->has_b_frames++;
7881 if(out_of_order || pics > s->avctx->has_b_frames){
7882 out->reference &= ~DELAYED_PIC_REF;
7883 for(i=out_idx; h->delayed_pic[i]; i++)
7884 h->delayed_pic[i] = h->delayed_pic[i+1];
7886 if(!out_of_order && pics > s->avctx->has_b_frames){
7887 *data_size = sizeof(AVFrame);
7889 h->outputed_poc = out->poc;
7890 *pict= *(AVFrame*)out;
7891 }else{
7892 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7897 assert(pict->data[0] || !*data_size);
7898 ff_print_debug_info(s, pict);
7899 //printf("out %d\n", (int)pict->data[0]);
7900 #if 0 //?
7902 /* Return the Picture timestamp as the frame number */
7903 /* we subtract 1 because it is added on utils.c */
7904 avctx->frame_number = s->picture_number - 1;
7905 #endif
7906 return get_consumed_bytes(s, buf_index, buf_size);
7908 #if 0
7909 static inline void fill_mb_avail(H264Context *h){
7910 MpegEncContext * const s = &h->s;
7911 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7913 if(s->mb_y){
7914 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7915 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7916 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7917 }else{
7918 h->mb_avail[0]=
7919 h->mb_avail[1]=
7920 h->mb_avail[2]= 0;
7922 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7923 h->mb_avail[4]= 1; //FIXME move out
7924 h->mb_avail[5]= 0; //FIXME move out
7926 #endif
7928 #ifdef TEST
7929 #undef printf
7930 #undef random
7931 #define COUNT 8000
7932 #define SIZE (COUNT*40)
7933 int main(void){
7934 int i;
7935 uint8_t temp[SIZE];
7936 PutBitContext pb;
7937 GetBitContext gb;
7938 // int int_temp[10000];
7939 DSPContext dsp;
7940 AVCodecContext avctx;
7942 dsputil_init(&dsp, &avctx);
7944 init_put_bits(&pb, temp, SIZE);
7945 printf("testing unsigned exp golomb\n");
7946 for(i=0; i<COUNT; i++){
7947 START_TIMER
7948 set_ue_golomb(&pb, i);
7949 STOP_TIMER("set_ue_golomb");
7951 flush_put_bits(&pb);
7953 init_get_bits(&gb, temp, 8*SIZE);
7954 for(i=0; i<COUNT; i++){
7955 int j, s;
7957 s= show_bits(&gb, 24);
7959 START_TIMER
7960 j= get_ue_golomb(&gb);
7961 if(j != i){
7962 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7963 // return -1;
7965 STOP_TIMER("get_ue_golomb");
7969 init_put_bits(&pb, temp, SIZE);
7970 printf("testing signed exp golomb\n");
7971 for(i=0; i<COUNT; i++){
7972 START_TIMER
7973 set_se_golomb(&pb, i - COUNT/2);
7974 STOP_TIMER("set_se_golomb");
7976 flush_put_bits(&pb);
7978 init_get_bits(&gb, temp, 8*SIZE);
7979 for(i=0; i<COUNT; i++){
7980 int j, s;
7982 s= show_bits(&gb, 24);
7984 START_TIMER
7985 j= get_se_golomb(&gb);
7986 if(j != i - COUNT/2){
7987 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7988 // return -1;
7990 STOP_TIMER("get_se_golomb");
7993 #if 0
7994 printf("testing 4x4 (I)DCT\n");
7996 DCTELEM block[16];
7997 uint8_t src[16], ref[16];
7998 uint64_t error= 0, max_error=0;
8000 for(i=0; i<COUNT; i++){
8001 int j;
8002 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8003 for(j=0; j<16; j++){
8004 ref[j]= random()%255;
8005 src[j]= random()%255;
8008 h264_diff_dct_c(block, src, ref, 4);
8010 //normalize
8011 for(j=0; j<16; j++){
8012 // printf("%d ", block[j]);
8013 block[j]= block[j]*4;
8014 if(j&1) block[j]= (block[j]*4 + 2)/5;
8015 if(j&4) block[j]= (block[j]*4 + 2)/5;
8017 // printf("\n");
8019 s->dsp.h264_idct_add(ref, block, 4);
8020 /* for(j=0; j<16; j++){
8021 printf("%d ", ref[j]);
8023 printf("\n");*/
8025 for(j=0; j<16; j++){
8026 int diff= FFABS(src[j] - ref[j]);
8028 error+= diff*diff;
8029 max_error= FFMAX(max_error, diff);
8032 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8033 printf("testing quantizer\n");
8034 for(qp=0; qp<52; qp++){
8035 for(i=0; i<16; i++)
8036 src1_block[i]= src2_block[i]= random()%255;
8039 printf("Testing NAL layer\n");
8041 uint8_t bitstream[COUNT];
8042 uint8_t nal[COUNT*2];
8043 H264Context h;
8044 memset(&h, 0, sizeof(H264Context));
8046 for(i=0; i<COUNT; i++){
8047 int zeros= i;
8048 int nal_length;
8049 int consumed;
8050 int out_length;
8051 uint8_t *out;
8052 int j;
8054 for(j=0; j<COUNT; j++){
8055 bitstream[j]= (random() % 255) + 1;
8058 for(j=0; j<zeros; j++){
8059 int pos= random() % COUNT;
8060 while(bitstream[pos] == 0){
8061 pos++;
8062 pos %= COUNT;
8064 bitstream[pos]=0;
8067 START_TIMER
8069 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8070 if(nal_length<0){
8071 printf("encoding failed\n");
8072 return -1;
8075 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8077 STOP_TIMER("NAL")
8079 if(out_length != COUNT){
8080 printf("incorrect length %d %d\n", out_length, COUNT);
8081 return -1;
8084 if(consumed != nal_length){
8085 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8086 return -1;
8089 if(memcmp(bitstream, out, COUNT)){
8090 printf("mismatch\n");
8091 return -1;
8094 #endif
8096 printf("Testing RBSP\n");
8099 return 0;
8101 #endif /* TEST */
8104 av_cold void ff_h264_free_context(H264Context *h)
8106 int i;
8108 av_freep(&h->rbsp_buffer[0]);
8109 av_freep(&h->rbsp_buffer[1]);
8110 free_tables(h); //FIXME cleanup init stuff perhaps
8112 for(i = 0; i < MAX_SPS_COUNT; i++)
8113 av_freep(h->sps_buffers + i);
8115 for(i = 0; i < MAX_PPS_COUNT; i++)
8116 av_freep(h->pps_buffers + i);
8119 static av_cold int decode_end(AVCodecContext *avctx)
8121 H264Context *h = avctx->priv_data;
8122 MpegEncContext *s = &h->s;
8124 ff_h264_free_context(h);
8126 MPV_common_end(s);
8128 // memset(h, 0, sizeof(H264Context));
8130 return 0;
8134 AVCodec h264_decoder = {
8135 "h264",
8136 CODEC_TYPE_VIDEO,
8137 CODEC_ID_H264,
8138 sizeof(H264Context),
8139 decode_init,
8140 NULL,
8141 decode_end,
8142 decode_frame,
8143 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8144 .flush= flush_dpb,
8145 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8146 .pix_fmts= ff_hwaccel_pixfmt_list_420,
8149 #if CONFIG_H264_VDPAU_DECODER
8150 AVCodec h264_vdpau_decoder = {
8151 "h264_vdpau",
8152 CODEC_TYPE_VIDEO,
8153 CODEC_ID_H264,
8154 sizeof(H264Context),
8155 decode_init,
8156 NULL,
8157 decode_end,
8158 decode_frame,
8159 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8160 .flush= flush_dpb,
8161 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8163 #endif
8165 #if CONFIG_SVQ3_DECODER
8166 #include "svq3.c"
8167 #endif