Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
[FFMpeg-mirror/lagarith.git] / libavcodec / h264.c
blob8b30eee1eef72f7dbc6703c5541761012b3bc331
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "internal.h"
29 #include "dsputil.h"
30 #include "avcodec.h"
31 #include "mpegvideo.h"
32 #include "h264.h"
33 #include "h264data.h"
34 #include "h264_parser.h"
35 #include "golomb.h"
36 #include "mathops.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
40 #include "cabac.h"
41 #if ARCH_X86
42 #include "x86/h264_i386.h"
43 #endif
45 //#undef NDEBUG
46 #include <assert.h>
48 /**
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc[4];
55 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size = 256;
62 static VLC total_zeros_vlc[15];
63 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64 static const int total_zeros_vlc_tables_size = 512;
66 static VLC chroma_dc_total_zeros_vlc[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
70 static VLC run_vlc[6];
71 static VLC_TYPE run_vlc_tables[6][8][2];
72 static const int run_vlc_tables_size = 8;
74 static VLC run7_vlc;
75 static VLC_TYPE run7_vlc_table[96][2];
76 static const int run7_vlc_table_size = 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static Picture * remove_long(H264Context *h, int i, int ref_mask);
84 static av_always_inline uint32_t pack16to32(int a, int b){
85 #if HAVE_BIGENDIAN
86 return (b&0xFFFF) + (a<<16);
87 #else
88 return (a&0xFFFF) + (b<<16);
89 #endif
92 static const uint8_t rem6[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options[4][8]={
101 {0,1,2,3,7,10,8,11},
102 {2,2,3,3,8,11,8,11},
103 {0,0,1,1,7,10,7,10},
104 {0,2,0,2,7,10,7,10}
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111 MpegEncContext * const s = &h->s;
112 const int mb_xy= h->mb_xy;
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
115 const uint8_t * left_block;
116 int topleft_partition= -1;
117 int i;
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
123 return;
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
131 left_block = left_block_options[0];
132 if(FRAME_MBAFF){
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
142 const int bottom = (s->mb_y & 1);
143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146 top_xy -= s->mb_stride;
148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149 topleft_xy -= s->mb_stride;
150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151 topleft_xy += s->mb_stride;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition = 0;
155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156 topright_xy -= s->mb_stride;
158 if (left_mb_field_flag != curr_mb_field_flag) {
159 left_xy[1] = left_xy[0] = pair_xy - 1;
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
163 } else {
164 left_block= left_block_options[2 - bottom];
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
172 if(for_deblock){
173 topleft_type = 0;
174 topright_type = 0;
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
179 if(MB_MBAFF && !IS_INTRA(mb_type)){
180 int list;
181 for(list=0; list<h->list_count; list++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type,list)){
186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
189 ref += h->b8_stride;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 }else{
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
202 if(IS_INTRA(mb_type)){
203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204 h->topleft_samples_available=
205 h->top_samples_available=
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
209 if(!(top_type & type_mask)){
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
216 if(!(left_type[0] & type_mask)){
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
220 if(!(left_type[1] & type_mask)){
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
224 }else{
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
233 }else{
234 if(!(left_type[0] & type_mask)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(topleft_type & type_mask))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!(topright_type & type_mask))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252 }else{
253 int pred;
254 if(!(top_type & type_mask))
255 pred= -1;
256 else{
257 pred= 2;
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
264 for(i=0; i<2; i++){
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268 }else{
269 int pred;
270 if(!(left_type[i] & type_mask))
271 pred= -1;
272 else{
273 pred= 2;
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
285 0 . T T. T T T T
286 1 L . .L . . . .
287 2 L . .L . . . .
288 3 . T TL . . . .
289 4 L . .L . . . .
290 5 L . .. . . . .
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
293 if(top_type){
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
305 }else{
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
320 if(left_type[i]){
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325 }else{
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
333 if( h->pps.cabac ) {
334 // top_cbp
335 if(top_type) {
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
338 h->top_cbp = 0x1C0;
339 } else {
340 h->top_cbp = 0;
342 // left_cbp
343 if (left_type[0]) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
346 h->left_cbp = 0x1C0;
347 } else {
348 h->left_cbp = 0;
350 if (left_type[0]) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
353 if (left_type[1]) {
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
358 #if 1
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360 int list;
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
368 continue;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 }else{
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
391 for(i=0; i<2; i++){
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 }else{
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
409 continue;
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 }else{
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 }else{
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
432 continue;
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
445 if( h->pps.cabac ) {
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 }else{
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 }else{
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 }else{
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type_nos == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 }else{
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 else
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 else
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
510 if(FRAME_MBAFF){
511 #define MAP_MVS\
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 if(MB_FIELD){
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
529 MAP_MVS
530 #undef MAP_F2F
531 }else{
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
538 MAP_MVS
539 #undef MAP_F2F
544 #endif
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 const int mb_xy= h->mb_xy;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
568 int i;
570 if(!(h->top_samples_available&0x8000)){
571 for(i=0; i<4; i++){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573 if(status<0){
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
575 return -1;
576 } else if(status){
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
584 for(i=0; i<4; i++){
585 if(!(h->left_samples_available&mask[i])){
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587 if(status<0){
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
589 return -1;
590 } else if(status){
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
597 return 0;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
608 if(mode > 6U) {
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
610 return -1;
613 if(!(h->top_samples_available&0x8000)){
614 mode= top[ mode ];
615 if(mode<0){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
617 return -1;
621 if((h->left_samples_available&0x8080) != 0x8080){
622 mode= left[ mode ];
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
626 if(mode<0){
627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
628 return -1;
632 return mode;
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
646 if(min<0) return DC_PRED;
647 else return min;
650 static inline void write_back_non_zero_count(H264Context *h){
651 const int mb_xy= h->mb_xy;
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
678 int i= left + top;
680 if(i<64) i= (i+1)>>1;
682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
684 return i&31;
687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689 MpegEncContext *s = &h->s;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
693 if(FRAME_MBAFF){
694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695 const int16_t *mv;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
699 if(!MB_FIELD
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 if(!MB_FIELD
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
723 if(MB_FIELD
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 && i >= scan8[0]+8){
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
730 #undef SET_DIAG_MV
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
735 return topright_ref;
736 }else{
737 tprintf(s->avctx, "topright MV not available\n");
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757 const int16_t * C;
758 int diagonal_ref, match_count;
760 assert(part_width==1 || part_width==2 || part_width==4);
762 /* mv_cache
763 B . . A T T T T
764 U . . L . . , .
765 U . . L . . . .
766 U . . L . . , .
767 . . . L . . . .
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
777 if(left_ref==ref){
778 *mx= A[0];
779 *my= A[1];
780 }else if(top_ref==ref){
781 *mx= B[0];
782 *my= B[1];
783 }else{
784 *mx= C[0];
785 *my= C[1];
787 }else{
788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
789 *mx= A[0];
790 *my= A[1];
791 }else{
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807 if(n==0){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
813 if(top_ref == ref){
814 *mx= B[0];
815 *my= B[1];
816 return;
818 }else{
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
824 if(left_ref == ref){
825 *mx= A[0];
826 *my= A[1];
827 return;
831 //RARE
832 pred_motion(h, n, 4, list, ref, mx, my);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 if(n==0){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
848 if(left_ref == ref){
849 *mx= A[0];
850 *my= A[1];
851 return;
853 }else{
854 const int16_t * C;
855 int diagonal_ref;
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861 if(diagonal_ref == ref){
862 *mx= C[0];
863 *my= C[1];
864 return;
868 //RARE
869 pred_motion(h, n, 2, list, ref, mx, my);
872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
882 *mx = *my = 0;
883 return;
886 pred_motion(h, 0, 4, 0, 0, mx, my);
888 return;
891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
895 return 256;
896 }else{
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context * const h){
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906 const int poc1 = h->ref_list[1][0].poc;
907 int i, field;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
935 if (!interl)
936 poc |= 3;
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944 if(rfield == field)
945 map[list][old_ref] = cur_ref;
946 break;
953 static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
957 int list, j, field;
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
961 for(list=0; list<2; list++){
962 cur->ref_count[sidx][list] = h->ref_count[list];
963 for(j=0; j<h->ref_count[list]; j++)
964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
967 if(s->picture_structure == PICT_FRAME){
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
972 cur->mbaff= FRAME_MBAFF;
974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
975 return;
977 for(list=0; list<2; list++){
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
989 int mb_type_col[2];
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
992 const int is_b8x8 = IS_8X8(*mb_type);
993 unsigned int sub_mb_type;
994 int i8, i4;
996 assert(h->ref_list[1][0].reference&3);
998 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1000 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1001 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1002 int cur_poc = s->current_picture_ptr->poc;
1003 int *col_poc = h->ref_list[1]->field_poc;
1004 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1005 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1006 b8_stride = 0;
1007 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1008 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1009 mb_xy += s->mb_stride*fieldoff;
1011 goto single_col;
1012 }else{ // AFL/AFR/FR/FL -> AFR/FR
1013 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1014 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1015 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1016 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1017 b8_stride *= 3;
1018 b4_stride *= 6;
1019 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1020 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1021 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1022 && !is_b8x8){
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1025 }else{
1026 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1027 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1029 }else{ // AFR/FR -> AFR/FR
1030 single_col:
1031 mb_type_col[0] =
1032 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1033 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1034 /* FIXME save sub mb types from previous frames (or derive from MVs)
1035 * so we know exactly what block size to use */
1036 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1037 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1038 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1041 }else{
1042 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1043 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1048 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1049 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1050 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1051 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1052 if(!b8_stride){
1053 if(s->mb_y&1){
1054 l1ref0 += h->b8_stride;
1055 l1ref1 += h->b8_stride;
1056 l1mv0 += 2*b4_stride;
1057 l1mv1 += 2*b4_stride;
1061 if(h->direct_spatial_mv_pred){
1062 int ref[2];
1063 int mv[2][2];
1064 int list;
1066 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1068 /* ref = min(neighbors) */
1069 for(list=0; list<2; list++){
1070 int refa = h->ref_cache[list][scan8[0] - 1];
1071 int refb = h->ref_cache[list][scan8[0] - 8];
1072 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1073 if(refc == PART_NOT_AVAILABLE)
1074 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1075 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1076 if(ref[list] < 0)
1077 ref[list] = -1;
1080 if(ref[0] < 0 && ref[1] < 0){
1081 ref[0] = ref[1] = 0;
1082 mv[0][0] = mv[0][1] =
1083 mv[1][0] = mv[1][1] = 0;
1084 }else{
1085 for(list=0; list<2; list++){
1086 if(ref[list] >= 0)
1087 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1088 else
1089 mv[list][0] = mv[list][1] = 0;
1093 if(ref[1] < 0){
1094 if(!is_b8x8)
1095 *mb_type &= ~MB_TYPE_L1;
1096 sub_mb_type &= ~MB_TYPE_L1;
1097 }else if(ref[0] < 0){
1098 if(!is_b8x8)
1099 *mb_type &= ~MB_TYPE_L0;
1100 sub_mb_type &= ~MB_TYPE_L0;
1103 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1104 for(i8=0; i8<4; i8++){
1105 int x8 = i8&1;
1106 int y8 = i8>>1;
1107 int xy8 = x8+y8*b8_stride;
1108 int xy4 = 3*x8+y8*b4_stride;
1109 int a=0, b=0;
1111 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1112 continue;
1113 h->sub_mb_type[i8] = sub_mb_type;
1115 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1116 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1117 if(!IS_INTRA(mb_type_col[y8])
1118 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1119 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1120 if(ref[0] > 0)
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 if(ref[1] > 0)
1123 b= pack16to32(mv[1][0],mv[1][1]);
1124 }else{
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 b= pack16to32(mv[1][0],mv[1][1]);
1128 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1129 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1131 }else if(IS_16X16(*mb_type)){
1132 int a=0, b=0;
1134 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1135 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1136 if(!IS_INTRA(mb_type_col[0])
1137 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1138 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1139 && (h->x264_build>33 || !h->x264_build)))){
1140 if(ref[0] > 0)
1141 a= pack16to32(mv[0][0],mv[0][1]);
1142 if(ref[1] > 0)
1143 b= pack16to32(mv[1][0],mv[1][1]);
1144 }else{
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 b= pack16to32(mv[1][0],mv[1][1]);
1148 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1149 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1150 }else{
1151 for(i8=0; i8<4; i8++){
1152 const int x8 = i8&1;
1153 const int y8 = i8>>1;
1155 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1156 continue;
1157 h->sub_mb_type[i8] = sub_mb_type;
1159 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1160 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1161 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1162 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1164 /* col_zero_flag */
1165 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1166 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1167 && (h->x264_build>33 || !h->x264_build)))){
1168 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1169 if(IS_SUB_8X8(sub_mb_type)){
1170 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1171 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1172 if(ref[0] == 0)
1173 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1174 if(ref[1] == 0)
1175 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1177 }else
1178 for(i4=0; i4<4; i4++){
1179 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1180 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1181 if(ref[0] == 0)
1182 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1183 if(ref[1] == 0)
1184 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1190 }else{ /* direct temporal mv pred */
1191 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1192 const int *dist_scale_factor = h->dist_scale_factor;
1193 int ref_offset= 0;
1195 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1196 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1197 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1198 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1200 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1201 ref_offset += 16;
1203 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1204 /* FIXME assumes direct_8x8_inference == 1 */
1205 int y_shift = 2*!IS_INTERLACED(*mb_type);
1207 for(i8=0; i8<4; i8++){
1208 const int x8 = i8&1;
1209 const int y8 = i8>>1;
1210 int ref0, scale;
1211 const int16_t (*l1mv)[2]= l1mv0;
1213 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1214 continue;
1215 h->sub_mb_type[i8] = sub_mb_type;
1217 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 if(IS_INTRA(mb_type_col[y8])){
1219 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1220 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1222 continue;
1225 ref0 = l1ref0[x8 + y8*b8_stride];
1226 if(ref0 >= 0)
1227 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1228 else{
1229 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1230 l1mv= l1mv1;
1232 scale = dist_scale_factor[ref0];
1233 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1236 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1237 int my_col = (mv_col[1]<<y_shift)/2;
1238 int mx = (scale * mv_col[0] + 128) >> 8;
1239 int my = (scale * my_col + 128) >> 8;
1240 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1241 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1244 return;
1247 /* one-to-one mv scaling */
1249 if(IS_16X16(*mb_type)){
1250 int ref, mv0, mv1;
1252 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1253 if(IS_INTRA(mb_type_col[0])){
1254 ref=mv0=mv1=0;
1255 }else{
1256 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1257 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1258 const int scale = dist_scale_factor[ref0];
1259 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1260 int mv_l0[2];
1261 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1262 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1263 ref= ref0;
1264 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1265 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1267 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1268 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1269 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1270 }else{
1271 for(i8=0; i8<4; i8++){
1272 const int x8 = i8&1;
1273 const int y8 = i8>>1;
1274 int ref0, scale;
1275 const int16_t (*l1mv)[2]= l1mv0;
1277 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1278 continue;
1279 h->sub_mb_type[i8] = sub_mb_type;
1280 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1281 if(IS_INTRA(mb_type_col[0])){
1282 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1283 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1285 continue;
1288 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1289 if(ref0 >= 0)
1290 ref0 = map_col_to_list0[0][ref0];
1291 else{
1292 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1293 l1mv= l1mv1;
1295 scale = dist_scale_factor[ref0];
1297 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1298 if(IS_SUB_8X8(sub_mb_type)){
1299 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1300 int mx = (scale * mv_col[0] + 128) >> 8;
1301 int my = (scale * mv_col[1] + 128) >> 8;
1302 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1303 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1304 }else
1305 for(i4=0; i4<4; i4++){
1306 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1307 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1308 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1309 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1310 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1311 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1318 static inline void write_back_motion(H264Context *h, int mb_type){
1319 MpegEncContext * const s = &h->s;
1320 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1321 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1322 int list;
1324 if(!USES_LIST(mb_type, 0))
1325 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1327 for(list=0; list<h->list_count; list++){
1328 int y;
1329 if(!USES_LIST(mb_type, list))
1330 continue;
1332 for(y=0; y<4; y++){
1333 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1336 if( h->pps.cabac ) {
1337 if(IS_SKIP(mb_type))
1338 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1339 else
1340 for(y=0; y<4; y++){
1341 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1347 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1348 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1349 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1350 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1351 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1355 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1356 if(IS_8X8(mb_type)){
1357 uint8_t *direct_table = &h->direct_table[b8_xy];
1358 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1359 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1360 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1365 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1366 int i, si, di;
1367 uint8_t *dst;
1368 int bufidx;
1370 // src[0]&0x80; //forbidden bit
1371 h->nal_ref_idc= src[0]>>5;
1372 h->nal_unit_type= src[0]&0x1F;
1374 src++; length--;
1375 #if 0
1376 for(i=0; i<length; i++)
1377 printf("%2X ", src[i]);
1378 #endif
1380 #if HAVE_FAST_UNALIGNED
1381 # if HAVE_FAST_64BIT
1382 # define RS 7
1383 for(i=0; i+1<length; i+=9){
1384 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1385 # else
1386 # define RS 3
1387 for(i=0; i+1<length; i+=5){
1388 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1389 # endif
1390 continue;
1391 if(i>0 && !src[i]) i--;
1392 while(src[i]) i++;
1393 #else
1394 # define RS 0
1395 for(i=0; i+1<length; i+=2){
1396 if(src[i]) continue;
1397 if(i>0 && src[i-1]==0) i--;
1398 #endif
1399 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1400 if(src[i+2]!=3){
1401 /* startcode, so we must be past the end */
1402 length=i;
1404 break;
1406 i-= RS;
1409 if(i>=length-1){ //no escaped 0
1410 *dst_length= length;
1411 *consumed= length+1; //+1 for the header
1412 return src;
1415 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1416 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1417 dst= h->rbsp_buffer[bufidx];
1419 if (dst == NULL){
1420 return NULL;
1423 //printf("decoding esc\n");
1424 memcpy(dst, src, i);
1425 si=di=i;
1426 while(si+2<length){
1427 //remove escapes (very rare 1:2^22)
1428 if(src[si+2]>3){
1429 dst[di++]= src[si++];
1430 dst[di++]= src[si++];
1431 }else if(src[si]==0 && src[si+1]==0){
1432 if(src[si+2]==3){ //escape
1433 dst[di++]= 0;
1434 dst[di++]= 0;
1435 si+=3;
1436 continue;
1437 }else //next start code
1438 goto nsc;
1441 dst[di++]= src[si++];
1443 while(si<length)
1444 dst[di++]= src[si++];
1445 nsc:
1447 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1449 *dst_length= di;
1450 *consumed= si + 1;//+1 for the header
1451 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1452 return dst;
1455 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1456 int v= *src;
1457 int r;
1459 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1461 for(r=1; r<9; r++){
1462 if(v&1) return r;
1463 v>>=1;
1465 return 0;
1469 * IDCT transforms the 16 dc values and dequantizes them.
1470 * @param qp quantization parameter
1472 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1473 #define stride 16
1474 int i;
1475 int temp[16]; //FIXME check if this is a good idea
1476 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1477 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1479 //memset(block, 64, 2*256);
1480 //return;
1481 for(i=0; i<4; i++){
1482 const int offset= y_offset[i];
1483 const int z0= block[offset+stride*0] + block[offset+stride*4];
1484 const int z1= block[offset+stride*0] - block[offset+stride*4];
1485 const int z2= block[offset+stride*1] - block[offset+stride*5];
1486 const int z3= block[offset+stride*1] + block[offset+stride*5];
1488 temp[4*i+0]= z0+z3;
1489 temp[4*i+1]= z1+z2;
1490 temp[4*i+2]= z1-z2;
1491 temp[4*i+3]= z0-z3;
1494 for(i=0; i<4; i++){
1495 const int offset= x_offset[i];
1496 const int z0= temp[4*0+i] + temp[4*2+i];
1497 const int z1= temp[4*0+i] - temp[4*2+i];
1498 const int z2= temp[4*1+i] - temp[4*3+i];
1499 const int z3= temp[4*1+i] + temp[4*3+i];
1501 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1502 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1503 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1504 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1508 #if 0
1510 * DCT transforms the 16 dc values.
1511 * @param qp quantization parameter ??? FIXME
1513 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1514 // const int qmul= dequant_coeff[qp][0];
1515 int i;
1516 int temp[16]; //FIXME check if this is a good idea
1517 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1518 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1520 for(i=0; i<4; i++){
1521 const int offset= y_offset[i];
1522 const int z0= block[offset+stride*0] + block[offset+stride*4];
1523 const int z1= block[offset+stride*0] - block[offset+stride*4];
1524 const int z2= block[offset+stride*1] - block[offset+stride*5];
1525 const int z3= block[offset+stride*1] + block[offset+stride*5];
1527 temp[4*i+0]= z0+z3;
1528 temp[4*i+1]= z1+z2;
1529 temp[4*i+2]= z1-z2;
1530 temp[4*i+3]= z0-z3;
1533 for(i=0; i<4; i++){
1534 const int offset= x_offset[i];
1535 const int z0= temp[4*0+i] + temp[4*2+i];
1536 const int z1= temp[4*0+i] - temp[4*2+i];
1537 const int z2= temp[4*1+i] - temp[4*3+i];
1538 const int z3= temp[4*1+i] + temp[4*3+i];
1540 block[stride*0 +offset]= (z0 + z3)>>1;
1541 block[stride*2 +offset]= (z1 + z2)>>1;
1542 block[stride*8 +offset]= (z1 - z2)>>1;
1543 block[stride*10+offset]= (z0 - z3)>>1;
1546 #endif
1548 #undef xStride
1549 #undef stride
1551 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1552 const int stride= 16*2;
1553 const int xStride= 16;
1554 int a,b,c,d,e;
1556 a= block[stride*0 + xStride*0];
1557 b= block[stride*0 + xStride*1];
1558 c= block[stride*1 + xStride*0];
1559 d= block[stride*1 + xStride*1];
1561 e= a-b;
1562 a= a+b;
1563 b= c-d;
1564 c= c+d;
1566 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1567 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1568 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1569 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1572 #if 0
1573 static void chroma_dc_dct_c(DCTELEM *block){
1574 const int stride= 16*2;
1575 const int xStride= 16;
1576 int a,b,c,d,e;
1578 a= block[stride*0 + xStride*0];
1579 b= block[stride*0 + xStride*1];
1580 c= block[stride*1 + xStride*0];
1581 d= block[stride*1 + xStride*1];
1583 e= a-b;
1584 a= a+b;
1585 b= c-d;
1586 c= c+d;
1588 block[stride*0 + xStride*0]= (a+c);
1589 block[stride*0 + xStride*1]= (e+b);
1590 block[stride*1 + xStride*0]= (a-c);
1591 block[stride*1 + xStride*1]= (e-b);
1593 #endif
1596 * gets the chroma qp.
1598 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1599 return h->pps.chroma_qp_table[t][qscale];
1602 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1603 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1604 int src_x_offset, int src_y_offset,
1605 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1606 MpegEncContext * const s = &h->s;
1607 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1608 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1609 const int luma_xy= (mx&3) + ((my&3)<<2);
1610 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1611 uint8_t * src_cb, * src_cr;
1612 int extra_width= h->emu_edge_width;
1613 int extra_height= h->emu_edge_height;
1614 int emu=0;
1615 const int full_mx= mx>>2;
1616 const int full_my= my>>2;
1617 const int pic_width = 16*s->mb_width;
1618 const int pic_height = 16*s->mb_height >> MB_FIELD;
1620 if(mx&7) extra_width -= 3;
1621 if(my&7) extra_height -= 3;
1623 if( full_mx < 0-extra_width
1624 || full_my < 0-extra_height
1625 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1626 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1627 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1628 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1629 emu=1;
1632 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1633 if(!square){
1634 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1637 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1639 if(MB_FIELD){
1640 // chroma offset when predicting from a field of opposite parity
1641 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1642 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1644 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1645 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1647 if(emu){
1648 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1649 src_cb= s->edge_emu_buffer;
1651 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1653 if(emu){
1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1655 src_cr= s->edge_emu_buffer;
1657 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1660 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1661 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1662 int x_offset, int y_offset,
1663 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1664 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1665 int list0, int list1){
1666 MpegEncContext * const s = &h->s;
1667 qpel_mc_func *qpix_op= qpix_put;
1668 h264_chroma_mc_func chroma_op= chroma_put;
1670 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1671 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1672 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1673 x_offset += 8*s->mb_x;
1674 y_offset += 8*(s->mb_y >> MB_FIELD);
1676 if(list0){
1677 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1682 qpix_op= qpix_avg;
1683 chroma_op= chroma_avg;
1686 if(list1){
1687 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1688 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1689 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1690 qpix_op, chroma_op);
1694 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1695 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1696 int x_offset, int y_offset,
1697 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1698 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1699 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1700 int list0, int list1){
1701 MpegEncContext * const s = &h->s;
1703 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1704 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1705 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1706 x_offset += 8*s->mb_x;
1707 y_offset += 8*(s->mb_y >> MB_FIELD);
1709 if(list0 && list1){
1710 /* don't optimize for luma-only case, since B-frames usually
1711 * use implicit weights => chroma too. */
1712 uint8_t *tmp_cb = s->obmc_scratchpad;
1713 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1714 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1715 int refn0 = h->ref_cache[0][ scan8[n] ];
1716 int refn1 = h->ref_cache[1][ scan8[n] ];
1718 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1719 dest_y, dest_cb, dest_cr,
1720 x_offset, y_offset, qpix_put, chroma_put);
1721 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1722 tmp_y, tmp_cb, tmp_cr,
1723 x_offset, y_offset, qpix_put, chroma_put);
1725 if(h->use_weight == 2){
1726 int weight0 = h->implicit_weight[refn0][refn1];
1727 int weight1 = 64 - weight0;
1728 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1729 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1731 }else{
1732 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1733 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1734 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1737 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1739 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1740 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1742 }else{
1743 int list = list1 ? 1 : 0;
1744 int refn = h->ref_cache[list][ scan8[n] ];
1745 Picture *ref= &h->ref_list[list][refn];
1746 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1747 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1748 qpix_put, chroma_put);
1750 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1751 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1752 if(h->use_weight_chroma){
1753 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1755 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1756 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1761 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1762 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1763 int x_offset, int y_offset,
1764 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1765 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1766 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1767 int list0, int list1){
1768 if((h->use_weight==2 && list0 && list1
1769 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1770 || h->use_weight==1)
1771 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put,
1773 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1774 else
1775 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1776 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1779 static inline void prefetch_motion(H264Context *h, int list){
1780 /* fetch pixels for estimated mv 4 macroblocks ahead
1781 * optimized for 64byte cache lines */
1782 MpegEncContext * const s = &h->s;
1783 const int refn = h->ref_cache[list][scan8[0]];
1784 if(refn >= 0){
1785 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1786 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1787 uint8_t **src= h->ref_list[list][refn].data;
1788 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1789 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1790 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1791 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1795 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1796 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1797 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1798 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1799 MpegEncContext * const s = &h->s;
1800 const int mb_xy= h->mb_xy;
1801 const int mb_type= s->current_picture.mb_type[mb_xy];
1803 assert(IS_INTER(mb_type));
1805 prefetch_motion(h, 0);
1807 if(IS_16X16(mb_type)){
1808 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1809 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1810 &weight_op[0], &weight_avg[0],
1811 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1812 }else if(IS_16X8(mb_type)){
1813 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1815 &weight_op[1], &weight_avg[1],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1818 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1819 &weight_op[1], &weight_avg[1],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 }else if(IS_8X16(mb_type)){
1822 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1823 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1824 &weight_op[2], &weight_avg[2],
1825 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1826 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1827 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1828 &weight_op[2], &weight_avg[2],
1829 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1830 }else{
1831 int i;
1833 assert(IS_8X8(mb_type));
1835 for(i=0; i<4; i++){
1836 const int sub_mb_type= h->sub_mb_type[i];
1837 const int n= 4*i;
1838 int x_offset= (i&1)<<2;
1839 int y_offset= (i&2)<<1;
1841 if(IS_SUB_8X8(sub_mb_type)){
1842 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1843 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1844 &weight_op[3], &weight_avg[3],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_8X4(sub_mb_type)){
1847 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1849 &weight_op[4], &weight_avg[4],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1852 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1853 &weight_op[4], &weight_avg[4],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1855 }else if(IS_SUB_4X8(sub_mb_type)){
1856 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1857 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1858 &weight_op[5], &weight_avg[5],
1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1860 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1862 &weight_op[5], &weight_avg[5],
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1864 }else{
1865 int j;
1866 assert(IS_SUB_4X4(sub_mb_type));
1867 for(j=0; j<4; j++){
1868 int sub_x_offset= x_offset + 2*(j&1);
1869 int sub_y_offset= y_offset + (j&2);
1870 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1871 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1872 &weight_op[6], &weight_avg[6],
1873 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1879 prefetch_motion(h, 1);
1882 static av_cold void init_cavlc_level_tab(void){
1883 int suffix_length, mask;
1884 unsigned int i;
1886 for(suffix_length=0; suffix_length<7; suffix_length++){
1887 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1888 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1889 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1891 mask= -(level_code&1);
1892 level_code= (((2+level_code)>>1) ^ mask) - mask;
1893 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1894 cavlc_level_tab[suffix_length][i][0]= level_code;
1895 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1896 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1897 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1898 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1899 }else{
1900 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1901 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1907 static av_cold void decode_init_vlc(void){
1908 static int done = 0;
1910 if (!done) {
1911 int i;
1912 int offset;
1913 done = 1;
1915 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1916 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1917 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1918 &chroma_dc_coeff_token_len [0], 1, 1,
1919 &chroma_dc_coeff_token_bits[0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
1922 offset = 0;
1923 for(i=0; i<4; i++){
1924 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1925 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1926 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1927 &coeff_token_len [i][0], 1, 1,
1928 &coeff_token_bits[i][0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC);
1930 offset += coeff_token_vlc_tables_size[i];
1933 * This is a one time safety check to make sure that
1934 * the packed static coeff_token_vlc table sizes
1935 * were initialized correctly.
1937 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1939 for(i=0; i<3; i++){
1940 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1941 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1942 init_vlc(&chroma_dc_total_zeros_vlc[i],
1943 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1944 &chroma_dc_total_zeros_len [i][0], 1, 1,
1945 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1946 INIT_VLC_USE_NEW_STATIC);
1948 for(i=0; i<15; i++){
1949 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1950 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1951 init_vlc(&total_zeros_vlc[i],
1952 TOTAL_ZEROS_VLC_BITS, 16,
1953 &total_zeros_len [i][0], 1, 1,
1954 &total_zeros_bits[i][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC);
1958 for(i=0; i<6; i++){
1959 run_vlc[i].table = run_vlc_tables[i];
1960 run_vlc[i].table_allocated = run_vlc_tables_size;
1961 init_vlc(&run_vlc[i],
1962 RUN_VLC_BITS, 7,
1963 &run_len [i][0], 1, 1,
1964 &run_bits[i][0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
1967 run7_vlc.table = run7_vlc_table,
1968 run7_vlc.table_allocated = run7_vlc_table_size;
1969 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1970 &run_len [6][0], 1, 1,
1971 &run_bits[6][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC);
1974 init_cavlc_level_tab();
1978 static void free_tables(H264Context *h){
1979 int i;
1980 H264Context *hx;
1981 av_freep(&h->intra4x4_pred_mode);
1982 av_freep(&h->chroma_pred_mode_table);
1983 av_freep(&h->cbp_table);
1984 av_freep(&h->mvd_table[0]);
1985 av_freep(&h->mvd_table[1]);
1986 av_freep(&h->direct_table);
1987 av_freep(&h->non_zero_count);
1988 av_freep(&h->slice_table_base);
1989 h->slice_table= NULL;
1991 av_freep(&h->mb2b_xy);
1992 av_freep(&h->mb2b8_xy);
1994 for(i = 0; i < MAX_THREADS; i++) {
1995 hx = h->thread_context[i];
1996 if(!hx) continue;
1997 av_freep(&hx->top_borders[1]);
1998 av_freep(&hx->top_borders[0]);
1999 av_freep(&hx->s.obmc_scratchpad);
2000 av_freep(&hx->rbsp_buffer[1]);
2001 av_freep(&hx->rbsp_buffer[0]);
2002 if (i) av_freep(&h->thread_context[i]);
2006 static void init_dequant8_coeff_table(H264Context *h){
2007 int i,q,x;
2008 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2009 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2010 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2012 for(i=0; i<2; i++ ){
2013 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2014 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2015 break;
2018 for(q=0; q<52; q++){
2019 int shift = div6[q];
2020 int idx = rem6[q];
2021 for(x=0; x<64; x++)
2022 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2023 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2024 h->pps.scaling_matrix8[i][x]) << shift;
2029 static void init_dequant4_coeff_table(H264Context *h){
2030 int i,j,q,x;
2031 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2032 for(i=0; i<6; i++ ){
2033 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2034 for(j=0; j<i; j++){
2035 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2036 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2037 break;
2040 if(j<i)
2041 continue;
2043 for(q=0; q<52; q++){
2044 int shift = div6[q] + 2;
2045 int idx = rem6[q];
2046 for(x=0; x<16; x++)
2047 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2048 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2049 h->pps.scaling_matrix4[i][x]) << shift;
2054 static void init_dequant_tables(H264Context *h){
2055 int i,x;
2056 init_dequant4_coeff_table(h);
2057 if(h->pps.transform_8x8_mode)
2058 init_dequant8_coeff_table(h);
2059 if(h->sps.transform_bypass){
2060 for(i=0; i<6; i++)
2061 for(x=0; x<16; x++)
2062 h->dequant4_coeff[i][0][x] = 1<<6;
2063 if(h->pps.transform_8x8_mode)
2064 for(i=0; i<2; i++)
2065 for(x=0; x<64; x++)
2066 h->dequant8_coeff[i][0][x] = 1<<6;
2072 * allocates tables.
2073 * needs width/height
2075 static int alloc_tables(H264Context *h){
2076 MpegEncContext * const s = &h->s;
2077 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2078 int x,y;
2080 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t), fail)
2082 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t), fail)
2083 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
2084 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
2086 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
2087 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t), fail);
2088 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t), fail);
2089 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 32*big_mb_num * sizeof(uint8_t) , fail);
2091 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2092 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2094 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
2095 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
2096 for(y=0; y<s->mb_height; y++){
2097 for(x=0; x<s->mb_width; x++){
2098 const int mb_xy= x + y*s->mb_stride;
2099 const int b_xy = 4*x + 4*y*h->b_stride;
2100 const int b8_xy= 2*x + 2*y*h->b8_stride;
2102 h->mb2b_xy [mb_xy]= b_xy;
2103 h->mb2b8_xy[mb_xy]= b8_xy;
2107 s->obmc_scratchpad = NULL;
2109 if(!h->dequant4_coeff[0])
2110 init_dequant_tables(h);
2112 return 0;
2113 fail:
2114 free_tables(h);
2115 return -1;
2119 * Mimic alloc_tables(), but for every context thread.
2121 static void clone_tables(H264Context *dst, H264Context *src){
2122 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2123 dst->non_zero_count = src->non_zero_count;
2124 dst->slice_table = src->slice_table;
2125 dst->cbp_table = src->cbp_table;
2126 dst->mb2b_xy = src->mb2b_xy;
2127 dst->mb2b8_xy = src->mb2b8_xy;
2128 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2129 dst->mvd_table[0] = src->mvd_table[0];
2130 dst->mvd_table[1] = src->mvd_table[1];
2131 dst->direct_table = src->direct_table;
2133 dst->s.obmc_scratchpad = NULL;
2134 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2138 * Init context
2139 * Allocate buffers which are not shared amongst multiple threads.
2141 static int context_init(H264Context *h){
2142 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
2143 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
2145 return 0;
2146 fail:
2147 return -1; // free_tables will clean up for us
2150 static av_cold void common_init(H264Context *h){
2151 MpegEncContext * const s = &h->s;
2153 s->width = s->avctx->width;
2154 s->height = s->avctx->height;
2155 s->codec_id= s->avctx->codec->id;
2157 ff_h264_pred_init(&h->hpc, s->codec_id);
2159 h->dequant_coeff_pps= -1;
2160 s->unrestricted_mv=1;
2161 s->decode=1; //FIXME
2163 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2165 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2166 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2170 * Reset SEI values at the beginning of the frame.
2172 * @param h H.264 context.
2174 static void reset_sei(H264Context *h) {
2175 h->sei_recovery_frame_cnt = -1;
2176 h->sei_dpb_output_delay = 0;
2177 h->sei_cpb_removal_delay = -1;
2178 h->sei_buffering_period_present = 0;
2181 static av_cold int decode_init(AVCodecContext *avctx){
2182 H264Context *h= avctx->priv_data;
2183 MpegEncContext * const s = &h->s;
2185 MPV_decode_defaults(s);
2187 s->avctx = avctx;
2188 common_init(h);
2190 s->out_format = FMT_H264;
2191 s->workaround_bugs= avctx->workaround_bugs;
2193 // set defaults
2194 // s->decode_mb= ff_h263_decode_mb;
2195 s->quarter_sample = 1;
2196 if(!avctx->has_b_frames)
2197 s->low_delay= 1;
2199 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2200 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2201 else
2202 avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
2203 avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
2204 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
2206 decode_init_vlc();
2208 if(avctx->extradata_size > 0 && avctx->extradata &&
2209 *(char *)avctx->extradata == 1){
2210 h->is_avc = 1;
2211 h->got_avcC = 0;
2212 } else {
2213 h->is_avc = 0;
2216 h->thread_context[0] = h;
2217 h->outputed_poc = INT_MIN;
2218 h->prev_poc_msb= 1<<16;
2219 reset_sei(h);
2220 if(avctx->codec_id == CODEC_ID_H264){
2221 if(avctx->ticks_per_frame == 1){
2222 s->avctx->time_base.den *=2;
2224 avctx->ticks_per_frame = 2;
2226 return 0;
2229 static int frame_start(H264Context *h){
2230 MpegEncContext * const s = &h->s;
2231 int i;
2233 if(MPV_frame_start(s, s->avctx) < 0)
2234 return -1;
2235 ff_er_frame_start(s);
2237 * MPV_frame_start uses pict_type to derive key_frame.
2238 * This is incorrect for H.264; IDR markings must be used.
2239 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2240 * See decode_nal_units().
2242 s->current_picture_ptr->key_frame= 0;
2243 s->current_picture_ptr->mmco_reset= 0;
2245 assert(s->linesize && s->uvlinesize);
2247 for(i=0; i<16; i++){
2248 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2249 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2251 for(i=0; i<4; i++){
2252 h->block_offset[16+i]=
2253 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2254 h->block_offset[24+16+i]=
2255 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2258 /* can't be in alloc_tables because linesize isn't known there.
2259 * FIXME: redo bipred weight to not require extra buffer? */
2260 for(i = 0; i < s->avctx->thread_count; i++)
2261 if(!h->thread_context[i]->s.obmc_scratchpad)
2262 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2264 /* some macroblocks will be accessed before they're available */
2265 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2266 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2268 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2270 // We mark the current picture as non-reference after allocating it, so
2271 // that if we break out due to an error it can be released automatically
2272 // in the next MPV_frame_start().
2273 // SVQ3 as well as most other codecs have only last/next/current and thus
2274 // get released even with set reference, besides SVQ3 and others do not
2275 // mark frames as reference later "naturally".
2276 if(s->codec_id != CODEC_ID_SVQ3)
2277 s->current_picture_ptr->reference= 0;
2279 s->current_picture_ptr->field_poc[0]=
2280 s->current_picture_ptr->field_poc[1]= INT_MAX;
2281 assert(s->current_picture_ptr->long_ref==0);
2283 return 0;
2286 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2287 MpegEncContext * const s = &h->s;
2288 int i;
2289 int step = 1;
2290 int offset = 1;
2291 int uvoffset= 1;
2292 int top_idx = 1;
2293 int skiplast= 0;
2295 src_y -= linesize;
2296 src_cb -= uvlinesize;
2297 src_cr -= uvlinesize;
2299 if(!simple && FRAME_MBAFF){
2300 if(s->mb_y&1){
2301 offset = MB_MBAFF ? 1 : 17;
2302 uvoffset= MB_MBAFF ? 1 : 9;
2303 if(!MB_MBAFF){
2304 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2305 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2306 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2307 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2308 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2311 }else{
2312 if(!MB_MBAFF){
2313 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2314 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2315 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2316 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2318 skiplast= 1;
2320 offset =
2321 uvoffset=
2322 top_idx = MB_MBAFF ? 0 : 1;
2324 step= MB_MBAFF ? 2 : 1;
2327 // There are two lines saved, the line above the the top macroblock of a pair,
2328 // and the line above the bottom macroblock
2329 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2330 for(i=1; i<17 - skiplast; i++){
2331 h->left_border[offset+i*step]= src_y[15+i* linesize];
2334 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2335 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2337 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2338 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2339 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2340 for(i=1; i<9 - skiplast; i++){
2341 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2342 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2344 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2345 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2349 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2350 MpegEncContext * const s = &h->s;
2351 int temp8, i;
2352 uint64_t temp64;
2353 int deblock_left;
2354 int deblock_top;
2355 int mb_xy;
2356 int step = 1;
2357 int offset = 1;
2358 int uvoffset= 1;
2359 int top_idx = 1;
2361 if(!simple && FRAME_MBAFF){
2362 if(s->mb_y&1){
2363 offset = MB_MBAFF ? 1 : 17;
2364 uvoffset= MB_MBAFF ? 1 : 9;
2365 }else{
2366 offset =
2367 uvoffset=
2368 top_idx = MB_MBAFF ? 0 : 1;
2370 step= MB_MBAFF ? 2 : 1;
2373 if(h->deblocking_filter == 2) {
2374 mb_xy = h->mb_xy;
2375 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2376 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2377 } else {
2378 deblock_left = (s->mb_x > 0);
2379 deblock_top = (s->mb_y > !!MB_FIELD);
2382 src_y -= linesize + 1;
2383 src_cb -= uvlinesize + 1;
2384 src_cr -= uvlinesize + 1;
2386 #define XCHG(a,b,t,xchg)\
2387 t= a;\
2388 if(xchg)\
2389 a= b;\
2390 b= t;
2392 if(deblock_left){
2393 for(i = !deblock_top; i<16; i++){
2394 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2396 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2399 if(deblock_top){
2400 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2401 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2402 if(s->mb_x+1 < s->mb_width){
2403 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2407 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2408 if(deblock_left){
2409 for(i = !deblock_top; i<8; i++){
2410 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2411 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2413 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2414 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2416 if(deblock_top){
2417 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2418 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2423 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2424 MpegEncContext * const s = &h->s;
2425 const int mb_x= s->mb_x;
2426 const int mb_y= s->mb_y;
2427 const int mb_xy= h->mb_xy;
2428 const int mb_type= s->current_picture.mb_type[mb_xy];
2429 uint8_t *dest_y, *dest_cb, *dest_cr;
2430 int linesize, uvlinesize /*dct_offset*/;
2431 int i;
2432 int *block_offset = &h->block_offset[0];
2433 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2434 /* is_h264 should always be true if SVQ3 is disabled. */
2435 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2436 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2437 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2439 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2440 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2441 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2443 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2444 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2446 if (!simple && MB_FIELD) {
2447 linesize = h->mb_linesize = s->linesize * 2;
2448 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2449 block_offset = &h->block_offset[24];
2450 if(mb_y&1){ //FIXME move out of this function?
2451 dest_y -= s->linesize*15;
2452 dest_cb-= s->uvlinesize*7;
2453 dest_cr-= s->uvlinesize*7;
2455 if(FRAME_MBAFF) {
2456 int list;
2457 for(list=0; list<h->list_count; list++){
2458 if(!USES_LIST(mb_type, list))
2459 continue;
2460 if(IS_16X16(mb_type)){
2461 int8_t *ref = &h->ref_cache[list][scan8[0]];
2462 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2463 }else{
2464 for(i=0; i<16; i+=4){
2465 int ref = h->ref_cache[list][scan8[i]];
2466 if(ref >= 0)
2467 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2472 } else {
2473 linesize = h->mb_linesize = s->linesize;
2474 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2475 // dct_offset = s->linesize * 16;
2478 if (!simple && IS_INTRA_PCM(mb_type)) {
2479 for (i=0; i<16; i++) {
2480 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2482 for (i=0; i<8; i++) {
2483 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2484 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2486 } else {
2487 if(IS_INTRA(mb_type)){
2488 if(h->deblocking_filter)
2489 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2491 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2492 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2493 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2496 if(IS_INTRA4x4(mb_type)){
2497 if(simple || !s->encoding){
2498 if(IS_8x8DCT(mb_type)){
2499 if(transform_bypass){
2500 idct_dc_add =
2501 idct_add = s->dsp.add_pixels8;
2502 }else{
2503 idct_dc_add = s->dsp.h264_idct8_dc_add;
2504 idct_add = s->dsp.h264_idct8_add;
2506 for(i=0; i<16; i+=4){
2507 uint8_t * const ptr= dest_y + block_offset[i];
2508 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2509 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2510 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2511 }else{
2512 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2513 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2514 (h->topright_samples_available<<i)&0x4000, linesize);
2515 if(nnz){
2516 if(nnz == 1 && h->mb[i*16])
2517 idct_dc_add(ptr, h->mb + i*16, linesize);
2518 else
2519 idct_add (ptr, h->mb + i*16, linesize);
2523 }else{
2524 if(transform_bypass){
2525 idct_dc_add =
2526 idct_add = s->dsp.add_pixels4;
2527 }else{
2528 idct_dc_add = s->dsp.h264_idct_dc_add;
2529 idct_add = s->dsp.h264_idct_add;
2531 for(i=0; i<16; i++){
2532 uint8_t * const ptr= dest_y + block_offset[i];
2533 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2535 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2536 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2537 }else{
2538 uint8_t *topright;
2539 int nnz, tr;
2540 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2541 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2542 assert(mb_y || linesize <= block_offset[i]);
2543 if(!topright_avail){
2544 tr= ptr[3 - linesize]*0x01010101;
2545 topright= (uint8_t*) &tr;
2546 }else
2547 topright= ptr + 4 - linesize;
2548 }else
2549 topright= NULL;
2551 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2552 nnz = h->non_zero_count_cache[ scan8[i] ];
2553 if(nnz){
2554 if(is_h264){
2555 if(nnz == 1 && h->mb[i*16])
2556 idct_dc_add(ptr, h->mb + i*16, linesize);
2557 else
2558 idct_add (ptr, h->mb + i*16, linesize);
2559 }else
2560 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2566 }else{
2567 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2568 if(is_h264){
2569 if(!transform_bypass)
2570 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2571 }else
2572 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2574 if(h->deblocking_filter)
2575 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2576 }else if(is_h264){
2577 hl_motion(h, dest_y, dest_cb, dest_cr,
2578 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2579 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2580 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2584 if(!IS_INTRA4x4(mb_type)){
2585 if(is_h264){
2586 if(IS_INTRA16x16(mb_type)){
2587 if(transform_bypass){
2588 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2589 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2590 }else{
2591 for(i=0; i<16; i++){
2592 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2593 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2596 }else{
2597 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2599 }else if(h->cbp&15){
2600 if(transform_bypass){
2601 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2602 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2603 for(i=0; i<16; i+=di){
2604 if(h->non_zero_count_cache[ scan8[i] ]){
2605 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2608 }else{
2609 if(IS_8x8DCT(mb_type)){
2610 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2611 }else{
2612 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2616 }else{
2617 for(i=0; i<16; i++){
2618 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2619 uint8_t * const ptr= dest_y + block_offset[i];
2620 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2626 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2627 uint8_t *dest[2] = {dest_cb, dest_cr};
2628 if(transform_bypass){
2629 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2630 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2631 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2632 }else{
2633 idct_add = s->dsp.add_pixels4;
2634 for(i=16; i<16+8; i++){
2635 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2636 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2639 }else{
2640 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2641 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2642 if(is_h264){
2643 idct_add = s->dsp.h264_idct_add;
2644 idct_dc_add = s->dsp.h264_idct_dc_add;
2645 for(i=16; i<16+8; i++){
2646 if(h->non_zero_count_cache[ scan8[i] ])
2647 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2648 else if(h->mb[i*16])
2649 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2651 }else{
2652 for(i=16; i<16+8; i++){
2653 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2654 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2655 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2662 if(h->cbp || IS_INTRA(mb_type))
2663 s->dsp.clear_blocks(h->mb);
2665 if(h->deblocking_filter) {
2666 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2667 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2668 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2669 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2670 if (!simple && FRAME_MBAFF) {
2671 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2672 } else {
2673 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2679 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2681 static void hl_decode_mb_simple(H264Context *h){
2682 hl_decode_mb_internal(h, 1);
2686 * Process a macroblock; this handles edge cases, such as interlacing.
2688 static void av_noinline hl_decode_mb_complex(H264Context *h){
2689 hl_decode_mb_internal(h, 0);
2692 static void hl_decode_mb(H264Context *h){
2693 MpegEncContext * const s = &h->s;
2694 const int mb_xy= h->mb_xy;
2695 const int mb_type= s->current_picture.mb_type[mb_xy];
2696 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2698 if (is_complex)
2699 hl_decode_mb_complex(h);
2700 else hl_decode_mb_simple(h);
2703 static void pic_as_field(Picture *pic, const int parity){
2704 int i;
2705 for (i = 0; i < 4; ++i) {
2706 if (parity == PICT_BOTTOM_FIELD)
2707 pic->data[i] += pic->linesize[i];
2708 pic->reference = parity;
2709 pic->linesize[i] *= 2;
2711 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2714 static int split_field_copy(Picture *dest, Picture *src,
2715 int parity, int id_add){
2716 int match = !!(src->reference & parity);
2718 if (match) {
2719 *dest = *src;
2720 if(parity != PICT_FRAME){
2721 pic_as_field(dest, parity);
2722 dest->pic_id *= 2;
2723 dest->pic_id += id_add;
2727 return match;
2730 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2731 int i[2]={0};
2732 int index=0;
2734 while(i[0]<len || i[1]<len){
2735 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2736 i[0]++;
2737 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2738 i[1]++;
2739 if(i[0] < len){
2740 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2741 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2743 if(i[1] < len){
2744 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2745 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2749 return index;
2752 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2753 int i, best_poc;
2754 int out_i= 0;
2756 for(;;){
2757 best_poc= dir ? INT_MIN : INT_MAX;
2759 for(i=0; i<len; i++){
2760 const int poc= src[i]->poc;
2761 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2762 best_poc= poc;
2763 sorted[out_i]= src[i];
2766 if(best_poc == (dir ? INT_MIN : INT_MAX))
2767 break;
2768 limit= sorted[out_i++]->poc - dir;
2770 return out_i;
2774 * fills the default_ref_list.
2776 static int fill_default_ref_list(H264Context *h){
2777 MpegEncContext * const s = &h->s;
2778 int i, len;
2780 if(h->slice_type_nos==FF_B_TYPE){
2781 Picture *sorted[32];
2782 int cur_poc, list;
2783 int lens[2];
2785 if(FIELD_PICTURE)
2786 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2787 else
2788 cur_poc= s->current_picture_ptr->poc;
2790 for(list= 0; list<2; list++){
2791 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2792 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2793 assert(len<=32);
2794 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2795 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2796 assert(len<=32);
2798 if(len < h->ref_count[list])
2799 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2800 lens[list]= len;
2803 if(lens[0] == lens[1] && lens[1] > 1){
2804 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2805 if(i == lens[0])
2806 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2808 }else{
2809 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2810 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2811 assert(len <= 32);
2812 if(len < h->ref_count[0])
2813 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2815 #ifdef TRACE
2816 for (i=0; i<h->ref_count[0]; i++) {
2817 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2819 if(h->slice_type_nos==FF_B_TYPE){
2820 for (i=0; i<h->ref_count[1]; i++) {
2821 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2824 #endif
2825 return 0;
2828 static void print_short_term(H264Context *h);
2829 static void print_long_term(H264Context *h);
2832 * Extract structure information about the picture described by pic_num in
2833 * the current decoding context (frame or field). Note that pic_num is
2834 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2835 * @param pic_num picture number for which to extract structure information
2836 * @param structure one of PICT_XXX describing structure of picture
2837 * with pic_num
2838 * @return frame number (short term) or long term index of picture
2839 * described by pic_num
2841 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2842 MpegEncContext * const s = &h->s;
2844 *structure = s->picture_structure;
2845 if(FIELD_PICTURE){
2846 if (!(pic_num & 1))
2847 /* opposite field */
2848 *structure ^= PICT_FRAME;
2849 pic_num >>= 1;
2852 return pic_num;
2855 static int decode_ref_pic_list_reordering(H264Context *h){
2856 MpegEncContext * const s = &h->s;
2857 int list, index, pic_structure;
2859 print_short_term(h);
2860 print_long_term(h);
2862 for(list=0; list<h->list_count; list++){
2863 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2865 if(get_bits1(&s->gb)){
2866 int pred= h->curr_pic_num;
2868 for(index=0; ; index++){
2869 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2870 unsigned int pic_id;
2871 int i;
2872 Picture *ref = NULL;
2874 if(reordering_of_pic_nums_idc==3)
2875 break;
2877 if(index >= h->ref_count[list]){
2878 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2879 return -1;
2882 if(reordering_of_pic_nums_idc<3){
2883 if(reordering_of_pic_nums_idc<2){
2884 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2885 int frame_num;
2887 if(abs_diff_pic_num > h->max_pic_num){
2888 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2889 return -1;
2892 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2893 else pred+= abs_diff_pic_num;
2894 pred &= h->max_pic_num - 1;
2896 frame_num = pic_num_extract(h, pred, &pic_structure);
2898 for(i= h->short_ref_count-1; i>=0; i--){
2899 ref = h->short_ref[i];
2900 assert(ref->reference);
2901 assert(!ref->long_ref);
2903 ref->frame_num == frame_num &&
2904 (ref->reference & pic_structure)
2906 break;
2908 if(i>=0)
2909 ref->pic_id= pred;
2910 }else{
2911 int long_idx;
2912 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2914 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2916 if(long_idx>31){
2917 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2918 return -1;
2920 ref = h->long_ref[long_idx];
2921 assert(!(ref && !ref->reference));
2922 if(ref && (ref->reference & pic_structure)){
2923 ref->pic_id= pic_id;
2924 assert(ref->long_ref);
2925 i=0;
2926 }else{
2927 i=-1;
2931 if (i < 0) {
2932 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2933 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2934 } else {
2935 for(i=index; i+1<h->ref_count[list]; i++){
2936 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2937 break;
2939 for(; i > index; i--){
2940 h->ref_list[list][i]= h->ref_list[list][i-1];
2942 h->ref_list[list][index]= *ref;
2943 if (FIELD_PICTURE){
2944 pic_as_field(&h->ref_list[list][index], pic_structure);
2947 }else{
2948 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2949 return -1;
2954 for(list=0; list<h->list_count; list++){
2955 for(index= 0; index < h->ref_count[list]; index++){
2956 if(!h->ref_list[list][index].data[0]){
2957 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2958 if(h->default_ref_list[list][0].data[0])
2959 h->ref_list[list][index]= h->default_ref_list[list][0];
2960 else
2961 return -1;
2966 return 0;
2969 static void fill_mbaff_ref_list(H264Context *h){
2970 int list, i, j;
2971 for(list=0; list<2; list++){ //FIXME try list_count
2972 for(i=0; i<h->ref_count[list]; i++){
2973 Picture *frame = &h->ref_list[list][i];
2974 Picture *field = &h->ref_list[list][16+2*i];
2975 field[0] = *frame;
2976 for(j=0; j<3; j++)
2977 field[0].linesize[j] <<= 1;
2978 field[0].reference = PICT_TOP_FIELD;
2979 field[0].poc= field[0].field_poc[0];
2980 field[1] = field[0];
2981 for(j=0; j<3; j++)
2982 field[1].data[j] += frame->linesize[j];
2983 field[1].reference = PICT_BOTTOM_FIELD;
2984 field[1].poc= field[1].field_poc[1];
2986 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2987 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2988 for(j=0; j<2; j++){
2989 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2990 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2994 for(j=0; j<h->ref_count[1]; j++){
2995 for(i=0; i<h->ref_count[0]; i++)
2996 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2997 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2998 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3002 static int pred_weight_table(H264Context *h){
3003 MpegEncContext * const s = &h->s;
3004 int list, i;
3005 int luma_def, chroma_def;
3007 h->use_weight= 0;
3008 h->use_weight_chroma= 0;
3009 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3010 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3011 luma_def = 1<<h->luma_log2_weight_denom;
3012 chroma_def = 1<<h->chroma_log2_weight_denom;
3014 for(list=0; list<2; list++){
3015 h->luma_weight_flag[list] = 0;
3016 h->chroma_weight_flag[list] = 0;
3017 for(i=0; i<h->ref_count[list]; i++){
3018 int luma_weight_flag, chroma_weight_flag;
3020 luma_weight_flag= get_bits1(&s->gb);
3021 if(luma_weight_flag){
3022 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3023 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3024 if( h->luma_weight[list][i] != luma_def
3025 || h->luma_offset[list][i] != 0) {
3026 h->use_weight= 1;
3027 h->luma_weight_flag[list]= 1;
3029 }else{
3030 h->luma_weight[list][i]= luma_def;
3031 h->luma_offset[list][i]= 0;
3034 if(CHROMA){
3035 chroma_weight_flag= get_bits1(&s->gb);
3036 if(chroma_weight_flag){
3037 int j;
3038 for(j=0; j<2; j++){
3039 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3040 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3041 if( h->chroma_weight[list][i][j] != chroma_def
3042 || h->chroma_offset[list][i][j] != 0) {
3043 h->use_weight_chroma= 1;
3044 h->chroma_weight_flag[list]= 1;
3047 }else{
3048 int j;
3049 for(j=0; j<2; j++){
3050 h->chroma_weight[list][i][j]= chroma_def;
3051 h->chroma_offset[list][i][j]= 0;
3056 if(h->slice_type_nos != FF_B_TYPE) break;
3058 h->use_weight= h->use_weight || h->use_weight_chroma;
3059 return 0;
3062 static void implicit_weight_table(H264Context *h){
3063 MpegEncContext * const s = &h->s;
3064 int ref0, ref1, i;
3065 int cur_poc = s->current_picture_ptr->poc;
3067 for (i = 0; i < 2; i++) {
3068 h->luma_weight_flag[i] = 0;
3069 h->chroma_weight_flag[i] = 0;
3072 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3073 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3074 h->use_weight= 0;
3075 h->use_weight_chroma= 0;
3076 return;
3079 h->use_weight= 2;
3080 h->use_weight_chroma= 2;
3081 h->luma_log2_weight_denom= 5;
3082 h->chroma_log2_weight_denom= 5;
3084 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3085 int poc0 = h->ref_list[0][ref0].poc;
3086 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3087 int poc1 = h->ref_list[1][ref1].poc;
3088 int td = av_clip(poc1 - poc0, -128, 127);
3089 if(td){
3090 int tb = av_clip(cur_poc - poc0, -128, 127);
3091 int tx = (16384 + (FFABS(td) >> 1)) / td;
3092 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3093 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3094 h->implicit_weight[ref0][ref1] = 32;
3095 else
3096 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3097 }else
3098 h->implicit_weight[ref0][ref1] = 32;
3104 * Mark a picture as no longer needed for reference. The refmask
3105 * argument allows unreferencing of individual fields or the whole frame.
3106 * If the picture becomes entirely unreferenced, but is being held for
3107 * display purposes, it is marked as such.
3108 * @param refmask mask of fields to unreference; the mask is bitwise
3109 * anded with the reference marking of pic
3110 * @return non-zero if pic becomes entirely unreferenced (except possibly
3111 * for display purposes) zero if one of the fields remains in
3112 * reference
3114 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3115 int i;
3116 if (pic->reference &= refmask) {
3117 return 0;
3118 } else {
3119 for(i = 0; h->delayed_pic[i]; i++)
3120 if(pic == h->delayed_pic[i]){
3121 pic->reference=DELAYED_PIC_REF;
3122 break;
3124 return 1;
3129 * instantaneous decoder refresh.
3131 static void idr(H264Context *h){
3132 int i;
3134 for(i=0; i<16; i++){
3135 remove_long(h, i, 0);
3137 assert(h->long_ref_count==0);
3139 for(i=0; i<h->short_ref_count; i++){
3140 unreference_pic(h, h->short_ref[i], 0);
3141 h->short_ref[i]= NULL;
3143 h->short_ref_count=0;
3144 h->prev_frame_num= 0;
3145 h->prev_frame_num_offset= 0;
3146 h->prev_poc_msb=
3147 h->prev_poc_lsb= 0;
3150 /* forget old pics after a seek */
3151 static void flush_dpb(AVCodecContext *avctx){
3152 H264Context *h= avctx->priv_data;
3153 int i;
3154 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3155 if(h->delayed_pic[i])
3156 h->delayed_pic[i]->reference= 0;
3157 h->delayed_pic[i]= NULL;
3159 h->outputed_poc= INT_MIN;
3160 h->prev_interlaced_frame = 1;
3161 idr(h);
3162 if(h->s.current_picture_ptr)
3163 h->s.current_picture_ptr->reference= 0;
3164 h->s.first_field= 0;
3165 reset_sei(h);
3166 ff_mpeg_flush(avctx);
3170 * Find a Picture in the short term reference list by frame number.
3171 * @param frame_num frame number to search for
3172 * @param idx the index into h->short_ref where returned picture is found
3173 * undefined if no picture found.
3174 * @return pointer to the found picture, or NULL if no pic with the provided
3175 * frame number is found
3177 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3178 MpegEncContext * const s = &h->s;
3179 int i;
3181 for(i=0; i<h->short_ref_count; i++){
3182 Picture *pic= h->short_ref[i];
3183 if(s->avctx->debug&FF_DEBUG_MMCO)
3184 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3185 if(pic->frame_num == frame_num) {
3186 *idx = i;
3187 return pic;
3190 return NULL;
3194 * Remove a picture from the short term reference list by its index in
3195 * that list. This does no checking on the provided index; it is assumed
3196 * to be valid. Other list entries are shifted down.
3197 * @param i index into h->short_ref of picture to remove.
3199 static void remove_short_at_index(H264Context *h, int i){
3200 assert(i >= 0 && i < h->short_ref_count);
3201 h->short_ref[i]= NULL;
3202 if (--h->short_ref_count)
3203 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3208 * @return the removed picture or NULL if an error occurs
3210 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3211 MpegEncContext * const s = &h->s;
3212 Picture *pic;
3213 int i;
3215 if(s->avctx->debug&FF_DEBUG_MMCO)
3216 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3218 pic = find_short(h, frame_num, &i);
3219 if (pic){
3220 if(unreference_pic(h, pic, ref_mask))
3221 remove_short_at_index(h, i);
3224 return pic;
3228 * Remove a picture from the long term reference list by its index in
3229 * that list.
3230 * @return the removed picture or NULL if an error occurs
3232 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3233 Picture *pic;
3235 pic= h->long_ref[i];
3236 if (pic){
3237 if(unreference_pic(h, pic, ref_mask)){
3238 assert(h->long_ref[i]->long_ref == 1);
3239 h->long_ref[i]->long_ref= 0;
3240 h->long_ref[i]= NULL;
3241 h->long_ref_count--;
3245 return pic;
3249 * print short term list
3251 static void print_short_term(H264Context *h) {
3252 uint32_t i;
3253 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3254 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3255 for(i=0; i<h->short_ref_count; i++){
3256 Picture *pic= h->short_ref[i];
3257 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3263 * print long term list
3265 static void print_long_term(H264Context *h) {
3266 uint32_t i;
3267 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3268 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3269 for(i = 0; i < 16; i++){
3270 Picture *pic= h->long_ref[i];
3271 if (pic) {
3272 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3279 * Executes the reference picture marking (memory management control operations).
3281 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3282 MpegEncContext * const s = &h->s;
3283 int i, av_uninit(j);
3284 int current_ref_assigned=0;
3285 Picture *av_uninit(pic);
3287 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3288 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3290 for(i=0; i<mmco_count; i++){
3291 int av_uninit(structure), av_uninit(frame_num);
3292 if(s->avctx->debug&FF_DEBUG_MMCO)
3293 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3295 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3296 || mmco[i].opcode == MMCO_SHORT2LONG){
3297 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3298 pic = find_short(h, frame_num, &j);
3299 if(!pic){
3300 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3301 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3302 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3303 continue;
3307 switch(mmco[i].opcode){
3308 case MMCO_SHORT2UNUSED:
3309 if(s->avctx->debug&FF_DEBUG_MMCO)
3310 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3311 remove_short(h, frame_num, structure ^ PICT_FRAME);
3312 break;
3313 case MMCO_SHORT2LONG:
3314 if (h->long_ref[mmco[i].long_arg] != pic)
3315 remove_long(h, mmco[i].long_arg, 0);
3317 remove_short_at_index(h, j);
3318 h->long_ref[ mmco[i].long_arg ]= pic;
3319 if (h->long_ref[ mmco[i].long_arg ]){
3320 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3321 h->long_ref_count++;
3323 break;
3324 case MMCO_LONG2UNUSED:
3325 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3326 pic = h->long_ref[j];
3327 if (pic) {
3328 remove_long(h, j, structure ^ PICT_FRAME);
3329 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3330 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3331 break;
3332 case MMCO_LONG:
3333 // Comment below left from previous code as it is an interresting note.
3334 /* First field in pair is in short term list or
3335 * at a different long term index.
3336 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3337 * Report the problem and keep the pair where it is,
3338 * and mark this field valid.
3341 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3342 remove_long(h, mmco[i].long_arg, 0);
3344 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3345 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3346 h->long_ref_count++;
3349 s->current_picture_ptr->reference |= s->picture_structure;
3350 current_ref_assigned=1;
3351 break;
3352 case MMCO_SET_MAX_LONG:
3353 assert(mmco[i].long_arg <= 16);
3354 // just remove the long term which index is greater than new max
3355 for(j = mmco[i].long_arg; j<16; j++){
3356 remove_long(h, j, 0);
3358 break;
3359 case MMCO_RESET:
3360 while(h->short_ref_count){
3361 remove_short(h, h->short_ref[0]->frame_num, 0);
3363 for(j = 0; j < 16; j++) {
3364 remove_long(h, j, 0);
3366 s->current_picture_ptr->poc=
3367 s->current_picture_ptr->field_poc[0]=
3368 s->current_picture_ptr->field_poc[1]=
3369 h->poc_lsb=
3370 h->poc_msb=
3371 h->frame_num=
3372 s->current_picture_ptr->frame_num= 0;
3373 s->current_picture_ptr->mmco_reset=1;
3374 break;
3375 default: assert(0);
3379 if (!current_ref_assigned) {
3380 /* Second field of complementary field pair; the first field of
3381 * which is already referenced. If short referenced, it
3382 * should be first entry in short_ref. If not, it must exist
3383 * in long_ref; trying to put it on the short list here is an
3384 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3386 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3387 /* Just mark the second field valid */
3388 s->current_picture_ptr->reference = PICT_FRAME;
3389 } else if (s->current_picture_ptr->long_ref) {
3390 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3391 "assignment for second field "
3392 "in complementary field pair "
3393 "(first field is long term)\n");
3394 } else {
3395 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3396 if(pic){
3397 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3400 if(h->short_ref_count)
3401 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3403 h->short_ref[0]= s->current_picture_ptr;
3404 h->short_ref_count++;
3405 s->current_picture_ptr->reference |= s->picture_structure;
3409 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3411 /* We have too many reference frames, probably due to corrupted
3412 * stream. Need to discard one frame. Prevents overrun of the
3413 * short_ref and long_ref buffers.
3415 av_log(h->s.avctx, AV_LOG_ERROR,
3416 "number of reference frames exceeds max (probably "
3417 "corrupt input), discarding one\n");
3419 if (h->long_ref_count && !h->short_ref_count) {
3420 for (i = 0; i < 16; ++i)
3421 if (h->long_ref[i])
3422 break;
3424 assert(i < 16);
3425 remove_long(h, i, 0);
3426 } else {
3427 pic = h->short_ref[h->short_ref_count - 1];
3428 remove_short(h, pic->frame_num, 0);
3432 print_short_term(h);
3433 print_long_term(h);
3434 return 0;
3437 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3438 MpegEncContext * const s = &h->s;
3439 int i;
3441 h->mmco_index= 0;
3442 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3443 s->broken_link= get_bits1(gb) -1;
3444 if(get_bits1(gb)){
3445 h->mmco[0].opcode= MMCO_LONG;
3446 h->mmco[0].long_arg= 0;
3447 h->mmco_index= 1;
3449 }else{
3450 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3451 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3452 MMCOOpcode opcode= get_ue_golomb_31(gb);
3454 h->mmco[i].opcode= opcode;
3455 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3456 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3457 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3458 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3459 return -1;
3462 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3463 unsigned int long_arg= get_ue_golomb_31(gb);
3464 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3465 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3466 return -1;
3468 h->mmco[i].long_arg= long_arg;
3471 if(opcode > (unsigned)MMCO_LONG){
3472 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3473 return -1;
3475 if(opcode == MMCO_END)
3476 break;
3478 h->mmco_index= i;
3479 }else{
3480 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3482 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3483 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3484 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3485 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3486 h->mmco_index= 1;
3487 if (FIELD_PICTURE) {
3488 h->mmco[0].short_pic_num *= 2;
3489 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3490 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3491 h->mmco_index= 2;
3497 return 0;
3500 static int init_poc(H264Context *h){
3501 MpegEncContext * const s = &h->s;
3502 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3503 int field_poc[2];
3504 Picture *cur = s->current_picture_ptr;
3506 h->frame_num_offset= h->prev_frame_num_offset;
3507 if(h->frame_num < h->prev_frame_num)
3508 h->frame_num_offset += max_frame_num;
3510 if(h->sps.poc_type==0){
3511 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3513 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3514 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3515 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3516 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3517 else
3518 h->poc_msb = h->prev_poc_msb;
3519 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3520 field_poc[0] =
3521 field_poc[1] = h->poc_msb + h->poc_lsb;
3522 if(s->picture_structure == PICT_FRAME)
3523 field_poc[1] += h->delta_poc_bottom;
3524 }else if(h->sps.poc_type==1){
3525 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3526 int i;
3528 if(h->sps.poc_cycle_length != 0)
3529 abs_frame_num = h->frame_num_offset + h->frame_num;
3530 else
3531 abs_frame_num = 0;
3533 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3534 abs_frame_num--;
3536 expected_delta_per_poc_cycle = 0;
3537 for(i=0; i < h->sps.poc_cycle_length; i++)
3538 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3540 if(abs_frame_num > 0){
3541 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3542 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3544 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3545 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3546 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3547 } else
3548 expectedpoc = 0;
3550 if(h->nal_ref_idc == 0)
3551 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3553 field_poc[0] = expectedpoc + h->delta_poc[0];
3554 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3556 if(s->picture_structure == PICT_FRAME)
3557 field_poc[1] += h->delta_poc[1];
3558 }else{
3559 int poc= 2*(h->frame_num_offset + h->frame_num);
3561 if(!h->nal_ref_idc)
3562 poc--;
3564 field_poc[0]= poc;
3565 field_poc[1]= poc;
3568 if(s->picture_structure != PICT_BOTTOM_FIELD)
3569 s->current_picture_ptr->field_poc[0]= field_poc[0];
3570 if(s->picture_structure != PICT_TOP_FIELD)
3571 s->current_picture_ptr->field_poc[1]= field_poc[1];
3572 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3574 return 0;
3579 * initialize scan tables
3581 static void init_scan_tables(H264Context *h){
3582 MpegEncContext * const s = &h->s;
3583 int i;
3584 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3585 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3586 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3587 }else{
3588 for(i=0; i<16; i++){
3589 #define T(x) (x>>2) | ((x<<2) & 0xF)
3590 h->zigzag_scan[i] = T(zigzag_scan[i]);
3591 h-> field_scan[i] = T( field_scan[i]);
3592 #undef T
3595 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3596 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3597 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3598 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3599 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3600 }else{
3601 for(i=0; i<64; i++){
3602 #define T(x) (x>>3) | ((x&7)<<3)
3603 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3604 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3605 h->field_scan8x8[i] = T(field_scan8x8[i]);
3606 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3607 #undef T
3610 if(h->sps.transform_bypass){ //FIXME same ugly
3611 h->zigzag_scan_q0 = zigzag_scan;
3612 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3613 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3614 h->field_scan_q0 = field_scan;
3615 h->field_scan8x8_q0 = field_scan8x8;
3616 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3617 }else{
3618 h->zigzag_scan_q0 = h->zigzag_scan;
3619 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3620 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3621 h->field_scan_q0 = h->field_scan;
3622 h->field_scan8x8_q0 = h->field_scan8x8;
3623 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3627 static void field_end(H264Context *h){
3628 MpegEncContext * const s = &h->s;
3629 AVCodecContext * const avctx= s->avctx;
3630 s->mb_y= 0;
3632 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
3633 s->current_picture_ptr->pict_type= s->pict_type;
3635 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3636 ff_vdpau_h264_set_reference_frames(s);
3638 if(!s->dropable) {
3639 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
3640 h->prev_poc_msb= h->poc_msb;
3641 h->prev_poc_lsb= h->poc_lsb;
3643 h->prev_frame_num_offset= h->frame_num_offset;
3644 h->prev_frame_num= h->frame_num;
3646 if (avctx->hwaccel) {
3647 if (avctx->hwaccel->end_frame(avctx) < 0)
3648 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
3651 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3652 ff_vdpau_h264_picture_complete(s);
3655 * FIXME: Error handling code does not seem to support interlaced
3656 * when slices span multiple rows
3657 * The ff_er_add_slice calls don't work right for bottom
3658 * fields; they cause massive erroneous error concealing
3659 * Error marking covers both fields (top and bottom).
3660 * This causes a mismatched s->error_count
3661 * and a bad error table. Further, the error count goes to
3662 * INT_MAX when called for bottom field, because mb_y is
3663 * past end by one (callers fault) and resync_mb_y != 0
3664 * causes problems for the first MB line, too.
3666 if (!FIELD_PICTURE)
3667 ff_er_frame_end(s);
3669 MPV_frame_end(s);
3671 h->current_slice=0;
3675 * Replicates H264 "master" context to thread contexts.
3677 static void clone_slice(H264Context *dst, H264Context *src)
3679 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3680 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3681 dst->s.current_picture = src->s.current_picture;
3682 dst->s.linesize = src->s.linesize;
3683 dst->s.uvlinesize = src->s.uvlinesize;
3684 dst->s.first_field = src->s.first_field;
3686 dst->prev_poc_msb = src->prev_poc_msb;
3687 dst->prev_poc_lsb = src->prev_poc_lsb;
3688 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3689 dst->prev_frame_num = src->prev_frame_num;
3690 dst->short_ref_count = src->short_ref_count;
3692 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3693 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3694 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3695 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3697 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3698 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3702 * decodes a slice header.
3703 * This will also call MPV_common_init() and frame_start() as needed.
3705 * @param h h264context
3706 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3708 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3710 static int decode_slice_header(H264Context *h, H264Context *h0){
3711 MpegEncContext * const s = &h->s;
3712 MpegEncContext * const s0 = &h0->s;
3713 unsigned int first_mb_in_slice;
3714 unsigned int pps_id;
3715 int num_ref_idx_active_override_flag;
3716 unsigned int slice_type, tmp, i, j;
3717 int default_ref_list_done = 0;
3718 int last_pic_structure;
3720 s->dropable= h->nal_ref_idc == 0;
3722 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3723 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3724 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3725 }else{
3726 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3727 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3730 first_mb_in_slice= get_ue_golomb(&s->gb);
3732 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
3733 if(h0->current_slice && FIELD_PICTURE){
3734 field_end(h);
3737 h0->current_slice = 0;
3738 if (!s0->first_field)
3739 s->current_picture_ptr= NULL;
3742 slice_type= get_ue_golomb_31(&s->gb);
3743 if(slice_type > 9){
3744 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3745 return -1;
3747 if(slice_type > 4){
3748 slice_type -= 5;
3749 h->slice_type_fixed=1;
3750 }else
3751 h->slice_type_fixed=0;
3753 slice_type= golomb_to_pict_type[ slice_type ];
3754 if (slice_type == FF_I_TYPE
3755 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3756 default_ref_list_done = 1;
3758 h->slice_type= slice_type;
3759 h->slice_type_nos= slice_type & 3;
3761 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3762 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3763 av_log(h->s.avctx, AV_LOG_ERROR,
3764 "B picture before any references, skipping\n");
3765 return -1;
3768 pps_id= get_ue_golomb(&s->gb);
3769 if(pps_id>=MAX_PPS_COUNT){
3770 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3771 return -1;
3773 if(!h0->pps_buffers[pps_id]) {
3774 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
3775 return -1;
3777 h->pps= *h0->pps_buffers[pps_id];
3779 if(!h0->sps_buffers[h->pps.sps_id]) {
3780 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
3781 return -1;
3783 h->sps = *h0->sps_buffers[h->pps.sps_id];
3785 if(h == h0 && h->dequant_coeff_pps != pps_id){
3786 h->dequant_coeff_pps = pps_id;
3787 init_dequant_tables(h);
3790 s->mb_width= h->sps.mb_width;
3791 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3793 h->b_stride= s->mb_width*4;
3794 h->b8_stride= s->mb_width*2;
3796 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3797 if(h->sps.frame_mbs_only_flag)
3798 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3799 else
3800 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3802 if (s->context_initialized
3803 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3804 if(h != h0)
3805 return -1; // width / height changed during parallelized decoding
3806 free_tables(h);
3807 flush_dpb(s->avctx);
3808 MPV_common_end(s);
3810 if (!s->context_initialized) {
3811 if(h != h0)
3812 return -1; // we cant (re-)initialize context during parallel decoding
3813 if (MPV_common_init(s) < 0)
3814 return -1;
3815 s->first_field = 0;
3816 h->prev_interlaced_frame = 1;
3818 init_scan_tables(h);
3819 alloc_tables(h);
3821 for(i = 1; i < s->avctx->thread_count; i++) {
3822 H264Context *c;
3823 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3824 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3825 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3826 c->sps = h->sps;
3827 c->pps = h->pps;
3828 init_scan_tables(c);
3829 clone_tables(c, h);
3832 for(i = 0; i < s->avctx->thread_count; i++)
3833 if(context_init(h->thread_context[i]) < 0)
3834 return -1;
3836 s->avctx->width = s->width;
3837 s->avctx->height = s->height;
3838 s->avctx->sample_aspect_ratio= h->sps.sar;
3839 if(!s->avctx->sample_aspect_ratio.den)
3840 s->avctx->sample_aspect_ratio.den = 1;
3842 if(h->sps.timing_info_present_flag){
3843 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
3844 if(h->x264_build > 0 && h->x264_build < 44)
3845 s->avctx->time_base.den *= 2;
3846 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3847 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3851 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3853 h->mb_mbaff = 0;
3854 h->mb_aff_frame = 0;
3855 last_pic_structure = s0->picture_structure;
3856 if(h->sps.frame_mbs_only_flag){
3857 s->picture_structure= PICT_FRAME;
3858 }else{
3859 if(get_bits1(&s->gb)) { //field_pic_flag
3860 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3861 } else {
3862 s->picture_structure= PICT_FRAME;
3863 h->mb_aff_frame = h->sps.mb_aff;
3866 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3868 if(h0->current_slice == 0){
3869 while(h->frame_num != h->prev_frame_num &&
3870 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3871 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3872 if (frame_start(h) < 0)
3873 return -1;
3874 h->prev_frame_num++;
3875 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3876 s->current_picture_ptr->frame_num= h->prev_frame_num;
3877 execute_ref_pic_marking(h, NULL, 0);
3880 /* See if we have a decoded first field looking for a pair... */
3881 if (s0->first_field) {
3882 assert(s0->current_picture_ptr);
3883 assert(s0->current_picture_ptr->data[0]);
3884 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3886 /* figure out if we have a complementary field pair */
3887 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3889 * Previous field is unmatched. Don't display it, but let it
3890 * remain for reference if marked as such.
3892 s0->current_picture_ptr = NULL;
3893 s0->first_field = FIELD_PICTURE;
3895 } else {
3896 if (h->nal_ref_idc &&
3897 s0->current_picture_ptr->reference &&
3898 s0->current_picture_ptr->frame_num != h->frame_num) {
3900 * This and previous field were reference, but had
3901 * different frame_nums. Consider this field first in
3902 * pair. Throw away previous field except for reference
3903 * purposes.
3905 s0->first_field = 1;
3906 s0->current_picture_ptr = NULL;
3908 } else {
3909 /* Second field in complementary pair */
3910 s0->first_field = 0;
3914 } else {
3915 /* Frame or first field in a potentially complementary pair */
3916 assert(!s0->current_picture_ptr);
3917 s0->first_field = FIELD_PICTURE;
3920 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3921 s0->first_field = 0;
3922 return -1;
3925 if(h != h0)
3926 clone_slice(h, h0);
3928 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3930 assert(s->mb_num == s->mb_width * s->mb_height);
3931 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3932 first_mb_in_slice >= s->mb_num){
3933 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3934 return -1;
3936 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3937 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3938 if (s->picture_structure == PICT_BOTTOM_FIELD)
3939 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3940 assert(s->mb_y < s->mb_height);
3942 if(s->picture_structure==PICT_FRAME){
3943 h->curr_pic_num= h->frame_num;
3944 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3945 }else{
3946 h->curr_pic_num= 2*h->frame_num + 1;
3947 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3950 if(h->nal_unit_type == NAL_IDR_SLICE){
3951 get_ue_golomb(&s->gb); /* idr_pic_id */
3954 if(h->sps.poc_type==0){
3955 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3957 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3958 h->delta_poc_bottom= get_se_golomb(&s->gb);
3962 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3963 h->delta_poc[0]= get_se_golomb(&s->gb);
3965 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3966 h->delta_poc[1]= get_se_golomb(&s->gb);
3969 init_poc(h);
3971 if(h->pps.redundant_pic_cnt_present){
3972 h->redundant_pic_count= get_ue_golomb(&s->gb);
3975 //set defaults, might be overridden a few lines later
3976 h->ref_count[0]= h->pps.ref_count[0];
3977 h->ref_count[1]= h->pps.ref_count[1];
3979 if(h->slice_type_nos != FF_I_TYPE){
3980 if(h->slice_type_nos == FF_B_TYPE){
3981 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3983 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3985 if(num_ref_idx_active_override_flag){
3986 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3987 if(h->slice_type_nos==FF_B_TYPE)
3988 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3990 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3991 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3992 h->ref_count[0]= h->ref_count[1]= 1;
3993 return -1;
3996 if(h->slice_type_nos == FF_B_TYPE)
3997 h->list_count= 2;
3998 else
3999 h->list_count= 1;
4000 }else
4001 h->list_count= 0;
4003 if(!default_ref_list_done){
4004 fill_default_ref_list(h);
4007 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
4008 return -1;
4010 if(h->slice_type_nos!=FF_I_TYPE){
4011 s->last_picture_ptr= &h->ref_list[0][0];
4012 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
4014 if(h->slice_type_nos==FF_B_TYPE){
4015 s->next_picture_ptr= &h->ref_list[1][0];
4016 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
4019 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4020 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4021 pred_weight_table(h);
4022 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4023 implicit_weight_table(h);
4024 else {
4025 h->use_weight = 0;
4026 for (i = 0; i < 2; i++) {
4027 h->luma_weight_flag[i] = 0;
4028 h->chroma_weight_flag[i] = 0;
4032 if(h->nal_ref_idc)
4033 decode_ref_pic_marking(h0, &s->gb);
4035 if(FRAME_MBAFF)
4036 fill_mbaff_ref_list(h);
4038 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
4039 direct_dist_scale_factor(h);
4040 direct_ref_list_init(h);
4042 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4043 tmp = get_ue_golomb_31(&s->gb);
4044 if(tmp > 2){
4045 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4046 return -1;
4048 h->cabac_init_idc= tmp;
4051 h->last_qscale_diff = 0;
4052 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4053 if(tmp>51){
4054 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4055 return -1;
4057 s->qscale= tmp;
4058 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4059 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4060 //FIXME qscale / qp ... stuff
4061 if(h->slice_type == FF_SP_TYPE){
4062 get_bits1(&s->gb); /* sp_for_switch_flag */
4064 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4065 get_se_golomb(&s->gb); /* slice_qs_delta */
4068 h->deblocking_filter = 1;
4069 h->slice_alpha_c0_offset = 0;
4070 h->slice_beta_offset = 0;
4071 if( h->pps.deblocking_filter_parameters_present ) {
4072 tmp= get_ue_golomb_31(&s->gb);
4073 if(tmp > 2){
4074 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4075 return -1;
4077 h->deblocking_filter= tmp;
4078 if(h->deblocking_filter < 2)
4079 h->deblocking_filter^= 1; // 1<->0
4081 if( h->deblocking_filter ) {
4082 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4083 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4087 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4088 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4089 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4090 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4091 h->deblocking_filter= 0;
4093 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4094 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4095 /* Cheat slightly for speed:
4096 Do not bother to deblock across slices. */
4097 h->deblocking_filter = 2;
4098 } else {
4099 h0->max_contexts = 1;
4100 if(!h0->single_decode_warning) {
4101 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4102 h0->single_decode_warning = 1;
4104 if(h != h0)
4105 return 1; // deblocking switched inside frame
4109 #if 0 //FMO
4110 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4111 slice_group_change_cycle= get_bits(&s->gb, ?);
4112 #endif
4114 h0->last_slice_type = slice_type;
4115 h->slice_num = ++h0->current_slice;
4116 if(h->slice_num >= MAX_SLICES){
4117 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4120 for(j=0; j<2; j++){
4121 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4122 ref2frm[0]=
4123 ref2frm[1]= -1;
4124 for(i=0; i<16; i++)
4125 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4126 +(h->ref_list[j][i].reference&3);
4127 ref2frm[18+0]=
4128 ref2frm[18+1]= -1;
4129 for(i=16; i<48; i++)
4130 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4131 +(h->ref_list[j][i].reference&3);
4134 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4135 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4137 s->avctx->refs= h->sps.ref_frame_count;
4139 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4140 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4141 h->slice_num,
4142 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4143 first_mb_in_slice,
4144 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4145 pps_id, h->frame_num,
4146 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4147 h->ref_count[0], h->ref_count[1],
4148 s->qscale,
4149 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4150 h->use_weight,
4151 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4152 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4156 return 0;
4162 static inline int get_level_prefix(GetBitContext *gb){
4163 unsigned int buf;
4164 int log;
4166 OPEN_READER(re, gb);
4167 UPDATE_CACHE(re, gb);
4168 buf=GET_CACHE(re, gb);
4170 log= 32 - av_log2(buf);
4171 #ifdef TRACE
4172 print_bin(buf>>(32-log), log);
4173 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4174 #endif
4176 LAST_SKIP_BITS(re, gb, log);
4177 CLOSE_READER(re, gb);
4179 return log-1;
4182 static inline int get_dct8x8_allowed(H264Context *h){
4183 if(h->sps.direct_8x8_inference_flag)
4184 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4185 else
4186 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4190 * decodes a residual block.
4191 * @param n block index
4192 * @param scantable scantable
4193 * @param max_coeff number of coefficients in the block
4194 * @return <0 if an error occurred
4196 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4197 MpegEncContext * const s = &h->s;
4198 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4199 int level[16];
4200 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4202 //FIXME put trailing_onex into the context
4204 if(n == CHROMA_DC_BLOCK_INDEX){
4205 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4206 total_coeff= coeff_token>>2;
4207 }else{
4208 if(n == LUMA_DC_BLOCK_INDEX){
4209 total_coeff= pred_non_zero_count(h, 0);
4210 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4211 total_coeff= coeff_token>>2;
4212 }else{
4213 total_coeff= pred_non_zero_count(h, n);
4214 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4215 total_coeff= coeff_token>>2;
4216 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4220 //FIXME set last_non_zero?
4222 if(total_coeff==0)
4223 return 0;
4224 if(total_coeff > (unsigned)max_coeff) {
4225 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4226 return -1;
4229 trailing_ones= coeff_token&3;
4230 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4231 assert(total_coeff<=16);
4233 i = show_bits(gb, 3);
4234 skip_bits(gb, trailing_ones);
4235 level[0] = 1-((i&4)>>1);
4236 level[1] = 1-((i&2) );
4237 level[2] = 1-((i&1)<<1);
4239 if(trailing_ones<total_coeff) {
4240 int mask, prefix;
4241 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4242 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4243 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4245 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4246 if(level_code >= 100){
4247 prefix= level_code - 100;
4248 if(prefix == LEVEL_TAB_BITS)
4249 prefix += get_level_prefix(gb);
4251 //first coefficient has suffix_length equal to 0 or 1
4252 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4253 if(suffix_length)
4254 level_code= (prefix<<1) + get_bits1(gb); //part
4255 else
4256 level_code= prefix; //part
4257 }else if(prefix==14){
4258 if(suffix_length)
4259 level_code= (prefix<<1) + get_bits1(gb); //part
4260 else
4261 level_code= prefix + get_bits(gb, 4); //part
4262 }else{
4263 level_code= 30 + get_bits(gb, prefix-3); //part
4264 if(prefix>=16)
4265 level_code += (1<<(prefix-3))-4096;
4268 if(trailing_ones < 3) level_code += 2;
4270 suffix_length = 2;
4271 mask= -(level_code&1);
4272 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4273 }else{
4274 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4276 suffix_length = 1;
4277 if(level_code + 3U > 6U)
4278 suffix_length++;
4279 level[trailing_ones]= level_code;
4282 //remaining coefficients have suffix_length > 0
4283 for(i=trailing_ones+1;i<total_coeff;i++) {
4284 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4285 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4286 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4288 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4289 if(level_code >= 100){
4290 prefix= level_code - 100;
4291 if(prefix == LEVEL_TAB_BITS){
4292 prefix += get_level_prefix(gb);
4294 if(prefix<15){
4295 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4296 }else{
4297 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4298 if(prefix>=16)
4299 level_code += (1<<(prefix-3))-4096;
4301 mask= -(level_code&1);
4302 level_code= (((2+level_code)>>1) ^ mask) - mask;
4304 level[i]= level_code;
4306 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4307 suffix_length++;
4311 if(total_coeff == max_coeff)
4312 zeros_left=0;
4313 else{
4314 if(n == CHROMA_DC_BLOCK_INDEX)
4315 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4316 else
4317 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4320 coeff_num = zeros_left + total_coeff - 1;
4321 j = scantable[coeff_num];
4322 if(n > 24){
4323 block[j] = level[0];
4324 for(i=1;i<total_coeff;i++) {
4325 if(zeros_left <= 0)
4326 run_before = 0;
4327 else if(zeros_left < 7){
4328 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4329 }else{
4330 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4332 zeros_left -= run_before;
4333 coeff_num -= 1 + run_before;
4334 j= scantable[ coeff_num ];
4336 block[j]= level[i];
4338 }else{
4339 block[j] = (level[0] * qmul[j] + 32)>>6;
4340 for(i=1;i<total_coeff;i++) {
4341 if(zeros_left <= 0)
4342 run_before = 0;
4343 else if(zeros_left < 7){
4344 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4345 }else{
4346 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4348 zeros_left -= run_before;
4349 coeff_num -= 1 + run_before;
4350 j= scantable[ coeff_num ];
4352 block[j]= (level[i] * qmul[j] + 32)>>6;
4356 if(zeros_left<0){
4357 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4358 return -1;
4361 return 0;
4364 static void predict_field_decoding_flag(H264Context *h){
4365 MpegEncContext * const s = &h->s;
4366 const int mb_xy= h->mb_xy;
4367 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4368 ? s->current_picture.mb_type[mb_xy-1]
4369 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4370 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4371 : 0;
4372 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4376 * decodes a P_SKIP or B_SKIP macroblock
4378 static void decode_mb_skip(H264Context *h){
4379 MpegEncContext * const s = &h->s;
4380 const int mb_xy= h->mb_xy;
4381 int mb_type=0;
4383 memset(h->non_zero_count[mb_xy], 0, 16);
4384 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4386 if(MB_FIELD)
4387 mb_type|= MB_TYPE_INTERLACED;
4389 if( h->slice_type_nos == FF_B_TYPE )
4391 // just for fill_caches. pred_direct_motion will set the real mb_type
4392 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4394 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4395 pred_direct_motion(h, &mb_type);
4396 mb_type|= MB_TYPE_SKIP;
4398 else
4400 int mx, my;
4401 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4403 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4404 pred_pskip_motion(h, &mx, &my);
4405 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4406 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4409 write_back_motion(h, mb_type);
4410 s->current_picture.mb_type[mb_xy]= mb_type;
4411 s->current_picture.qscale_table[mb_xy]= s->qscale;
4412 h->slice_table[ mb_xy ]= h->slice_num;
4413 h->prev_mb_skipped= 1;
4417 * decodes a macroblock
4418 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4420 static int decode_mb_cavlc(H264Context *h){
4421 MpegEncContext * const s = &h->s;
4422 int mb_xy;
4423 int partition_count;
4424 unsigned int mb_type, cbp;
4425 int dct8x8_allowed= h->pps.transform_8x8_mode;
4427 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4429 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4430 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4431 down the code */
4432 if(h->slice_type_nos != FF_I_TYPE){
4433 if(s->mb_skip_run==-1)
4434 s->mb_skip_run= get_ue_golomb(&s->gb);
4436 if (s->mb_skip_run--) {
4437 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4438 if(s->mb_skip_run==0)
4439 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4440 else
4441 predict_field_decoding_flag(h);
4443 decode_mb_skip(h);
4444 return 0;
4447 if(FRAME_MBAFF){
4448 if( (s->mb_y&1) == 0 )
4449 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4452 h->prev_mb_skipped= 0;
4454 mb_type= get_ue_golomb(&s->gb);
4455 if(h->slice_type_nos == FF_B_TYPE){
4456 if(mb_type < 23){
4457 partition_count= b_mb_type_info[mb_type].partition_count;
4458 mb_type= b_mb_type_info[mb_type].type;
4459 }else{
4460 mb_type -= 23;
4461 goto decode_intra_mb;
4463 }else if(h->slice_type_nos == FF_P_TYPE){
4464 if(mb_type < 5){
4465 partition_count= p_mb_type_info[mb_type].partition_count;
4466 mb_type= p_mb_type_info[mb_type].type;
4467 }else{
4468 mb_type -= 5;
4469 goto decode_intra_mb;
4471 }else{
4472 assert(h->slice_type_nos == FF_I_TYPE);
4473 if(h->slice_type == FF_SI_TYPE && mb_type)
4474 mb_type--;
4475 decode_intra_mb:
4476 if(mb_type > 25){
4477 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4478 return -1;
4480 partition_count=0;
4481 cbp= i_mb_type_info[mb_type].cbp;
4482 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4483 mb_type= i_mb_type_info[mb_type].type;
4486 if(MB_FIELD)
4487 mb_type |= MB_TYPE_INTERLACED;
4489 h->slice_table[ mb_xy ]= h->slice_num;
4491 if(IS_INTRA_PCM(mb_type)){
4492 unsigned int x;
4494 // We assume these blocks are very rare so we do not optimize it.
4495 align_get_bits(&s->gb);
4497 // The pixels are stored in the same order as levels in h->mb array.
4498 for(x=0; x < (CHROMA ? 384 : 256); x++){
4499 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4502 // In deblocking, the quantizer is 0
4503 s->current_picture.qscale_table[mb_xy]= 0;
4504 // All coeffs are present
4505 memset(h->non_zero_count[mb_xy], 16, 16);
4507 s->current_picture.mb_type[mb_xy]= mb_type;
4508 return 0;
4511 if(MB_MBAFF){
4512 h->ref_count[0] <<= 1;
4513 h->ref_count[1] <<= 1;
4516 fill_caches(h, mb_type, 0);
4518 //mb_pred
4519 if(IS_INTRA(mb_type)){
4520 int pred_mode;
4521 // init_top_left_availability(h);
4522 if(IS_INTRA4x4(mb_type)){
4523 int i;
4524 int di = 1;
4525 if(dct8x8_allowed && get_bits1(&s->gb)){
4526 mb_type |= MB_TYPE_8x8DCT;
4527 di = 4;
4530 // fill_intra4x4_pred_table(h);
4531 for(i=0; i<16; i+=di){
4532 int mode= pred_intra_mode(h, i);
4534 if(!get_bits1(&s->gb)){
4535 const int rem_mode= get_bits(&s->gb, 3);
4536 mode = rem_mode + (rem_mode >= mode);
4539 if(di==4)
4540 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4541 else
4542 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4544 write_back_intra_pred_mode(h);
4545 if( check_intra4x4_pred_mode(h) < 0)
4546 return -1;
4547 }else{
4548 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4549 if(h->intra16x16_pred_mode < 0)
4550 return -1;
4552 if(CHROMA){
4553 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4554 if(pred_mode < 0)
4555 return -1;
4556 h->chroma_pred_mode= pred_mode;
4558 }else if(partition_count==4){
4559 int i, j, sub_partition_count[4], list, ref[2][4];
4561 if(h->slice_type_nos == FF_B_TYPE){
4562 for(i=0; i<4; i++){
4563 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4564 if(h->sub_mb_type[i] >=13){
4565 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4566 return -1;
4568 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4569 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4571 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4572 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4573 pred_direct_motion(h, &mb_type);
4574 h->ref_cache[0][scan8[4]] =
4575 h->ref_cache[1][scan8[4]] =
4576 h->ref_cache[0][scan8[12]] =
4577 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4579 }else{
4580 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4581 for(i=0; i<4; i++){
4582 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4583 if(h->sub_mb_type[i] >=4){
4584 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4585 return -1;
4587 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4588 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4592 for(list=0; list<h->list_count; list++){
4593 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4594 for(i=0; i<4; i++){
4595 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4596 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4597 unsigned int tmp;
4598 if(ref_count == 1){
4599 tmp= 0;
4600 }else if(ref_count == 2){
4601 tmp= get_bits1(&s->gb)^1;
4602 }else{
4603 tmp= get_ue_golomb_31(&s->gb);
4604 if(tmp>=ref_count){
4605 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4606 return -1;
4609 ref[list][i]= tmp;
4610 }else{
4611 //FIXME
4612 ref[list][i] = -1;
4617 if(dct8x8_allowed)
4618 dct8x8_allowed = get_dct8x8_allowed(h);
4620 for(list=0; list<h->list_count; list++){
4621 for(i=0; i<4; i++){
4622 if(IS_DIRECT(h->sub_mb_type[i])) {
4623 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4624 continue;
4626 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4627 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4629 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4630 const int sub_mb_type= h->sub_mb_type[i];
4631 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4632 for(j=0; j<sub_partition_count[i]; j++){
4633 int mx, my;
4634 const int index= 4*i + block_width*j;
4635 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4636 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4637 mx += get_se_golomb(&s->gb);
4638 my += get_se_golomb(&s->gb);
4639 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4641 if(IS_SUB_8X8(sub_mb_type)){
4642 mv_cache[ 1 ][0]=
4643 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4644 mv_cache[ 1 ][1]=
4645 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4646 }else if(IS_SUB_8X4(sub_mb_type)){
4647 mv_cache[ 1 ][0]= mx;
4648 mv_cache[ 1 ][1]= my;
4649 }else if(IS_SUB_4X8(sub_mb_type)){
4650 mv_cache[ 8 ][0]= mx;
4651 mv_cache[ 8 ][1]= my;
4653 mv_cache[ 0 ][0]= mx;
4654 mv_cache[ 0 ][1]= my;
4656 }else{
4657 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4658 p[0] = p[1]=
4659 p[8] = p[9]= 0;
4663 }else if(IS_DIRECT(mb_type)){
4664 pred_direct_motion(h, &mb_type);
4665 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4666 }else{
4667 int list, mx, my, i;
4668 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4669 if(IS_16X16(mb_type)){
4670 for(list=0; list<h->list_count; list++){
4671 unsigned int val;
4672 if(IS_DIR(mb_type, 0, list)){
4673 if(h->ref_count[list]==1){
4674 val= 0;
4675 }else if(h->ref_count[list]==2){
4676 val= get_bits1(&s->gb)^1;
4677 }else{
4678 val= get_ue_golomb_31(&s->gb);
4679 if(val >= h->ref_count[list]){
4680 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4681 return -1;
4684 }else
4685 val= LIST_NOT_USED&0xFF;
4686 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4688 for(list=0; list<h->list_count; list++){
4689 unsigned int val;
4690 if(IS_DIR(mb_type, 0, list)){
4691 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4692 mx += get_se_golomb(&s->gb);
4693 my += get_se_golomb(&s->gb);
4694 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4696 val= pack16to32(mx,my);
4697 }else
4698 val=0;
4699 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4702 else if(IS_16X8(mb_type)){
4703 for(list=0; list<h->list_count; list++){
4704 for(i=0; i<2; i++){
4705 unsigned int val;
4706 if(IS_DIR(mb_type, i, list)){
4707 if(h->ref_count[list] == 1){
4708 val= 0;
4709 }else if(h->ref_count[list] == 2){
4710 val= get_bits1(&s->gb)^1;
4711 }else{
4712 val= get_ue_golomb_31(&s->gb);
4713 if(val >= h->ref_count[list]){
4714 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4715 return -1;
4718 }else
4719 val= LIST_NOT_USED&0xFF;
4720 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4723 for(list=0; list<h->list_count; list++){
4724 for(i=0; i<2; i++){
4725 unsigned int val;
4726 if(IS_DIR(mb_type, i, list)){
4727 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4728 mx += get_se_golomb(&s->gb);
4729 my += get_se_golomb(&s->gb);
4730 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4732 val= pack16to32(mx,my);
4733 }else
4734 val=0;
4735 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4738 }else{
4739 assert(IS_8X16(mb_type));
4740 for(list=0; list<h->list_count; list++){
4741 for(i=0; i<2; i++){
4742 unsigned int val;
4743 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4744 if(h->ref_count[list]==1){
4745 val= 0;
4746 }else if(h->ref_count[list]==2){
4747 val= get_bits1(&s->gb)^1;
4748 }else{
4749 val= get_ue_golomb_31(&s->gb);
4750 if(val >= h->ref_count[list]){
4751 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4752 return -1;
4755 }else
4756 val= LIST_NOT_USED&0xFF;
4757 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4760 for(list=0; list<h->list_count; list++){
4761 for(i=0; i<2; i++){
4762 unsigned int val;
4763 if(IS_DIR(mb_type, i, list)){
4764 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4765 mx += get_se_golomb(&s->gb);
4766 my += get_se_golomb(&s->gb);
4767 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4769 val= pack16to32(mx,my);
4770 }else
4771 val=0;
4772 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4778 if(IS_INTER(mb_type))
4779 write_back_motion(h, mb_type);
4781 if(!IS_INTRA16x16(mb_type)){
4782 cbp= get_ue_golomb(&s->gb);
4783 if(cbp > 47){
4784 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4785 return -1;
4788 if(CHROMA){
4789 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4790 else cbp= golomb_to_inter_cbp [cbp];
4791 }else{
4792 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4793 else cbp= golomb_to_inter_cbp_gray[cbp];
4796 h->cbp = cbp;
4798 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4799 if(get_bits1(&s->gb)){
4800 mb_type |= MB_TYPE_8x8DCT;
4801 h->cbp_table[mb_xy]= cbp;
4804 s->current_picture.mb_type[mb_xy]= mb_type;
4806 if(cbp || IS_INTRA16x16(mb_type)){
4807 int i8x8, i4x4, chroma_idx;
4808 int dquant;
4809 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4810 const uint8_t *scan, *scan8x8, *dc_scan;
4812 // fill_non_zero_count_cache(h);
4814 if(IS_INTERLACED(mb_type)){
4815 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4816 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4817 dc_scan= luma_dc_field_scan;
4818 }else{
4819 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4820 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4821 dc_scan= luma_dc_zigzag_scan;
4824 dquant= get_se_golomb(&s->gb);
4826 if( dquant > 25 || dquant < -26 ){
4827 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4828 return -1;
4831 s->qscale += dquant;
4832 if(((unsigned)s->qscale) > 51){
4833 if(s->qscale<0) s->qscale+= 52;
4834 else s->qscale-= 52;
4837 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4838 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4839 if(IS_INTRA16x16(mb_type)){
4840 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4841 return -1; //FIXME continue if partitioned and other return -1 too
4844 assert((cbp&15) == 0 || (cbp&15) == 15);
4846 if(cbp&15){
4847 for(i8x8=0; i8x8<4; i8x8++){
4848 for(i4x4=0; i4x4<4; i4x4++){
4849 const int index= i4x4 + 4*i8x8;
4850 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4851 return -1;
4855 }else{
4856 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4858 }else{
4859 for(i8x8=0; i8x8<4; i8x8++){
4860 if(cbp & (1<<i8x8)){
4861 if(IS_8x8DCT(mb_type)){
4862 DCTELEM *buf = &h->mb[64*i8x8];
4863 uint8_t *nnz;
4864 for(i4x4=0; i4x4<4; i4x4++){
4865 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4866 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4867 return -1;
4869 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4870 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4871 }else{
4872 for(i4x4=0; i4x4<4; i4x4++){
4873 const int index= i4x4 + 4*i8x8;
4875 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4876 return -1;
4880 }else{
4881 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4882 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4887 if(cbp&0x30){
4888 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4889 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4890 return -1;
4894 if(cbp&0x20){
4895 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4896 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4897 for(i4x4=0; i4x4<4; i4x4++){
4898 const int index= 16 + 4*chroma_idx + i4x4;
4899 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4900 return -1;
4904 }else{
4905 uint8_t * const nnz= &h->non_zero_count_cache[0];
4906 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4907 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4909 }else{
4910 uint8_t * const nnz= &h->non_zero_count_cache[0];
4911 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4912 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4913 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4915 s->current_picture.qscale_table[mb_xy]= s->qscale;
4916 write_back_non_zero_count(h);
4918 if(MB_MBAFF){
4919 h->ref_count[0] >>= 1;
4920 h->ref_count[1] >>= 1;
4923 return 0;
4926 static int decode_cabac_field_decoding_flag(H264Context *h) {
4927 MpegEncContext * const s = &h->s;
4928 const int mb_x = s->mb_x;
4929 const int mb_y = s->mb_y & ~1;
4930 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4931 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4933 unsigned int ctx = 0;
4935 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4936 ctx += 1;
4938 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4939 ctx += 1;
4942 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4945 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4946 uint8_t *state= &h->cabac_state[ctx_base];
4947 int mb_type;
4949 if(intra_slice){
4950 MpegEncContext * const s = &h->s;
4951 const int mba_xy = h->left_mb_xy[0];
4952 const int mbb_xy = h->top_mb_xy;
4953 int ctx=0;
4954 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4955 ctx++;
4956 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4957 ctx++;
4958 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4959 return 0; /* I4x4 */
4960 state += 2;
4961 }else{
4962 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4963 return 0; /* I4x4 */
4966 if( get_cabac_terminate( &h->cabac ) )
4967 return 25; /* PCM */
4969 mb_type = 1; /* I16x16 */
4970 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4971 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4972 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4973 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4974 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4975 return mb_type;
4978 static int decode_cabac_mb_type_b( H264Context *h ) {
4979 MpegEncContext * const s = &h->s;
4981 const int mba_xy = h->left_mb_xy[0];
4982 const int mbb_xy = h->top_mb_xy;
4983 int ctx = 0;
4984 int bits;
4985 assert(h->slice_type_nos == FF_B_TYPE);
4987 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4988 ctx++;
4989 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4990 ctx++;
4992 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4993 return 0; /* B_Direct_16x16 */
4995 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4996 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4999 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5000 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5001 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5002 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5003 if( bits < 8 )
5004 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5005 else if( bits == 13 ) {
5006 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5007 } else if( bits == 14 )
5008 return 11; /* B_L1_L0_8x16 */
5009 else if( bits == 15 )
5010 return 22; /* B_8x8 */
5012 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5013 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5016 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5017 MpegEncContext * const s = &h->s;
5018 int mba_xy, mbb_xy;
5019 int ctx = 0;
5021 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5022 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5023 mba_xy = mb_xy - 1;
5024 if( (mb_y&1)
5025 && h->slice_table[mba_xy] == h->slice_num
5026 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5027 mba_xy += s->mb_stride;
5028 if( MB_FIELD ){
5029 mbb_xy = mb_xy - s->mb_stride;
5030 if( !(mb_y&1)
5031 && h->slice_table[mbb_xy] == h->slice_num
5032 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5033 mbb_xy -= s->mb_stride;
5034 }else
5035 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5036 }else{
5037 int mb_xy = h->mb_xy;
5038 mba_xy = mb_xy - 1;
5039 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5042 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5043 ctx++;
5044 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5045 ctx++;
5047 if( h->slice_type_nos == FF_B_TYPE )
5048 ctx += 13;
5049 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5052 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5053 int mode = 0;
5055 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5056 return pred_mode;
5058 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5059 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5060 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5062 if( mode >= pred_mode )
5063 return mode + 1;
5064 else
5065 return mode;
5068 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5069 const int mba_xy = h->left_mb_xy[0];
5070 const int mbb_xy = h->top_mb_xy;
5072 int ctx = 0;
5074 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5075 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5076 ctx++;
5078 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5079 ctx++;
5081 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5082 return 0;
5084 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5085 return 1;
5086 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5087 return 2;
5088 else
5089 return 3;
5092 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5093 int cbp_b, cbp_a, ctx, cbp = 0;
5095 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5096 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5098 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5099 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5100 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5101 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5102 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5103 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5104 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5105 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5106 return cbp;
5108 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5109 int ctx;
5110 int cbp_a, cbp_b;
5112 cbp_a = (h->left_cbp>>4)&0x03;
5113 cbp_b = (h-> top_cbp>>4)&0x03;
5115 ctx = 0;
5116 if( cbp_a > 0 ) ctx++;
5117 if( cbp_b > 0 ) ctx += 2;
5118 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5119 return 0;
5121 ctx = 4;
5122 if( cbp_a == 2 ) ctx++;
5123 if( cbp_b == 2 ) ctx += 2;
5124 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5126 static int decode_cabac_mb_dqp( H264Context *h) {
5127 int ctx= h->last_qscale_diff != 0;
5128 int val = 0;
5130 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5131 ctx= 2+(ctx>>1);
5132 val++;
5133 if(val > 102) //prevent infinite loop
5134 return INT_MIN;
5137 if( val&0x01 )
5138 return (val + 1)>>1 ;
5139 else
5140 return -((val + 1)>>1);
5142 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5143 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5144 return 0; /* 8x8 */
5145 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5146 return 1; /* 8x4 */
5147 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5148 return 2; /* 4x8 */
5149 return 3; /* 4x4 */
5151 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5152 int type;
5153 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5154 return 0; /* B_Direct_8x8 */
5155 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5156 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5157 type = 3;
5158 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5159 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5160 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5161 type += 4;
5163 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5164 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5165 return type;
5168 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5169 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5172 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5173 int refa = h->ref_cache[list][scan8[n] - 1];
5174 int refb = h->ref_cache[list][scan8[n] - 8];
5175 int ref = 0;
5176 int ctx = 0;
5178 if( h->slice_type_nos == FF_B_TYPE) {
5179 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5180 ctx++;
5181 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5182 ctx += 2;
5183 } else {
5184 if( refa > 0 )
5185 ctx++;
5186 if( refb > 0 )
5187 ctx += 2;
5190 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5191 ref++;
5192 ctx = (ctx>>2)+4;
5193 if(ref >= 32 /*h->ref_list[list]*/){
5194 return -1;
5197 return ref;
5200 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5201 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5202 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5203 int ctxbase = (l == 0) ? 40 : 47;
5204 int mvd;
5205 int ctx = (amvd>2) + (amvd>32);
5207 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5208 return 0;
5210 mvd= 1;
5211 ctx= 3;
5212 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5213 mvd++;
5214 if( ctx < 6 )
5215 ctx++;
5218 if( mvd >= 9 ) {
5219 int k = 3;
5220 while( get_cabac_bypass( &h->cabac ) ) {
5221 mvd += 1 << k;
5222 k++;
5223 if(k>24){
5224 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5225 return INT_MIN;
5228 while( k-- ) {
5229 if( get_cabac_bypass( &h->cabac ) )
5230 mvd += 1 << k;
5233 return get_cabac_bypass_sign( &h->cabac, -mvd );
5236 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5237 int nza, nzb;
5238 int ctx = 0;
5240 if( is_dc ) {
5241 if( cat == 0 ) {
5242 nza = h->left_cbp&0x100;
5243 nzb = h-> top_cbp&0x100;
5244 } else {
5245 nza = (h->left_cbp>>(6+idx))&0x01;
5246 nzb = (h-> top_cbp>>(6+idx))&0x01;
5248 } else {
5249 assert(cat == 1 || cat == 2 || cat == 4);
5250 nza = h->non_zero_count_cache[scan8[idx] - 1];
5251 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5254 if( nza > 0 )
5255 ctx++;
5257 if( nzb > 0 )
5258 ctx += 2;
5260 return ctx + 4 * cat;
5263 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5264 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5265 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5266 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5267 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5270 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5271 static const int significant_coeff_flag_offset[2][6] = {
5272 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5273 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5275 static const int last_coeff_flag_offset[2][6] = {
5276 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5277 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5279 static const int coeff_abs_level_m1_offset[6] = {
5280 227+0, 227+10, 227+20, 227+30, 227+39, 426
5282 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5283 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5284 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5285 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5286 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5287 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5288 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5289 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5290 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5292 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5293 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5294 * map node ctx => cabac ctx for level=1 */
5295 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5296 /* map node ctx => cabac ctx for level>1 */
5297 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5298 static const uint8_t coeff_abs_level_transition[2][8] = {
5299 /* update node ctx after decoding a level=1 */
5300 { 1, 2, 3, 3, 4, 5, 6, 7 },
5301 /* update node ctx after decoding a level>1 */
5302 { 4, 4, 4, 4, 5, 6, 7, 7 }
5305 int index[64];
5307 int av_unused last;
5308 int coeff_count = 0;
5309 int node_ctx = 0;
5311 uint8_t *significant_coeff_ctx_base;
5312 uint8_t *last_coeff_ctx_base;
5313 uint8_t *abs_level_m1_ctx_base;
5315 #if !ARCH_X86
5316 #define CABAC_ON_STACK
5317 #endif
5318 #ifdef CABAC_ON_STACK
5319 #define CC &cc
5320 CABACContext cc;
5321 cc.range = h->cabac.range;
5322 cc.low = h->cabac.low;
5323 cc.bytestream= h->cabac.bytestream;
5324 #else
5325 #define CC &h->cabac
5326 #endif
5329 /* cat: 0-> DC 16x16 n = 0
5330 * 1-> AC 16x16 n = luma4x4idx
5331 * 2-> Luma4x4 n = luma4x4idx
5332 * 3-> DC Chroma n = iCbCr
5333 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5334 * 5-> Luma8x8 n = 4 * luma8x8idx
5337 /* read coded block flag */
5338 if( is_dc || cat != 5 ) {
5339 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5340 if( !is_dc )
5341 h->non_zero_count_cache[scan8[n]] = 0;
5343 #ifdef CABAC_ON_STACK
5344 h->cabac.range = cc.range ;
5345 h->cabac.low = cc.low ;
5346 h->cabac.bytestream= cc.bytestream;
5347 #endif
5348 return;
5352 significant_coeff_ctx_base = h->cabac_state
5353 + significant_coeff_flag_offset[MB_FIELD][cat];
5354 last_coeff_ctx_base = h->cabac_state
5355 + last_coeff_flag_offset[MB_FIELD][cat];
5356 abs_level_m1_ctx_base = h->cabac_state
5357 + coeff_abs_level_m1_offset[cat];
5359 if( !is_dc && cat == 5 ) {
5360 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5361 for(last= 0; last < coefs; last++) { \
5362 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5363 if( get_cabac( CC, sig_ctx )) { \
5364 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5365 index[coeff_count++] = last; \
5366 if( get_cabac( CC, last_ctx ) ) { \
5367 last= max_coeff; \
5368 break; \
5372 if( last == max_coeff -1 ) {\
5373 index[coeff_count++] = last;\
5375 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5376 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5377 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5378 } else {
5379 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5380 #else
5381 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5382 } else {
5383 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5384 #endif
5386 assert(coeff_count > 0);
5388 if( is_dc ) {
5389 if( cat == 0 )
5390 h->cbp_table[h->mb_xy] |= 0x100;
5391 else
5392 h->cbp_table[h->mb_xy] |= 0x40 << n;
5393 } else {
5394 if( cat == 5 )
5395 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5396 else {
5397 assert( cat == 1 || cat == 2 || cat == 4 );
5398 h->non_zero_count_cache[scan8[n]] = coeff_count;
5402 do {
5403 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5405 int j= scantable[index[--coeff_count]];
5407 if( get_cabac( CC, ctx ) == 0 ) {
5408 node_ctx = coeff_abs_level_transition[0][node_ctx];
5409 if( is_dc ) {
5410 block[j] = get_cabac_bypass_sign( CC, -1);
5411 }else{
5412 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5414 } else {
5415 int coeff_abs = 2;
5416 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5417 node_ctx = coeff_abs_level_transition[1][node_ctx];
5419 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5420 coeff_abs++;
5423 if( coeff_abs >= 15 ) {
5424 int j = 0;
5425 while( get_cabac_bypass( CC ) ) {
5426 j++;
5429 coeff_abs=1;
5430 while( j-- ) {
5431 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5433 coeff_abs+= 14;
5436 if( is_dc ) {
5437 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5438 }else{
5439 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5442 } while( coeff_count );
5443 #ifdef CABAC_ON_STACK
5444 h->cabac.range = cc.range ;
5445 h->cabac.low = cc.low ;
5446 h->cabac.bytestream= cc.bytestream;
5447 #endif
5451 #if !CONFIG_SMALL
5452 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5453 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5456 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5457 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5459 #endif
5461 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5462 #if CONFIG_SMALL
5463 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5464 #else
5465 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5466 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5467 #endif
5470 static inline void compute_mb_neighbors(H264Context *h)
5472 MpegEncContext * const s = &h->s;
5473 const int mb_xy = h->mb_xy;
5474 h->top_mb_xy = mb_xy - s->mb_stride;
5475 h->left_mb_xy[0] = mb_xy - 1;
5476 if(FRAME_MBAFF){
5477 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5478 const int top_pair_xy = pair_xy - s->mb_stride;
5479 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5480 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5481 const int curr_mb_field_flag = MB_FIELD;
5482 const int bottom = (s->mb_y & 1);
5484 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5485 h->top_mb_xy -= s->mb_stride;
5487 if (!left_mb_field_flag == curr_mb_field_flag) {
5488 h->left_mb_xy[0] = pair_xy - 1;
5490 } else if (FIELD_PICTURE) {
5491 h->top_mb_xy -= s->mb_stride;
5493 return;
5497 * decodes a macroblock
5498 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5500 static int decode_mb_cabac(H264Context *h) {
5501 MpegEncContext * const s = &h->s;
5502 int mb_xy;
5503 int mb_type, partition_count, cbp = 0;
5504 int dct8x8_allowed= h->pps.transform_8x8_mode;
5506 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5508 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5509 if( h->slice_type_nos != FF_I_TYPE ) {
5510 int skip;
5511 /* a skipped mb needs the aff flag from the following mb */
5512 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5513 predict_field_decoding_flag(h);
5514 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5515 skip = h->next_mb_skipped;
5516 else
5517 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5518 /* read skip flags */
5519 if( skip ) {
5520 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5521 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5522 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5523 if(!h->next_mb_skipped)
5524 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5527 decode_mb_skip(h);
5529 h->cbp_table[mb_xy] = 0;
5530 h->chroma_pred_mode_table[mb_xy] = 0;
5531 h->last_qscale_diff = 0;
5533 return 0;
5537 if(FRAME_MBAFF){
5538 if( (s->mb_y&1) == 0 )
5539 h->mb_mbaff =
5540 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5543 h->prev_mb_skipped = 0;
5545 compute_mb_neighbors(h);
5547 if( h->slice_type_nos == FF_B_TYPE ) {
5548 mb_type = decode_cabac_mb_type_b( h );
5549 if( mb_type < 23 ){
5550 partition_count= b_mb_type_info[mb_type].partition_count;
5551 mb_type= b_mb_type_info[mb_type].type;
5552 }else{
5553 mb_type -= 23;
5554 goto decode_intra_mb;
5556 } else if( h->slice_type_nos == FF_P_TYPE ) {
5557 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5558 /* P-type */
5559 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5560 /* P_L0_D16x16, P_8x8 */
5561 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5562 } else {
5563 /* P_L0_D8x16, P_L0_D16x8 */
5564 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5566 partition_count= p_mb_type_info[mb_type].partition_count;
5567 mb_type= p_mb_type_info[mb_type].type;
5568 } else {
5569 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5570 goto decode_intra_mb;
5572 } else {
5573 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5574 if(h->slice_type == FF_SI_TYPE && mb_type)
5575 mb_type--;
5576 assert(h->slice_type_nos == FF_I_TYPE);
5577 decode_intra_mb:
5578 partition_count = 0;
5579 cbp= i_mb_type_info[mb_type].cbp;
5580 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5581 mb_type= i_mb_type_info[mb_type].type;
5583 if(MB_FIELD)
5584 mb_type |= MB_TYPE_INTERLACED;
5586 h->slice_table[ mb_xy ]= h->slice_num;
5588 if(IS_INTRA_PCM(mb_type)) {
5589 const uint8_t *ptr;
5591 // We assume these blocks are very rare so we do not optimize it.
5592 // FIXME The two following lines get the bitstream position in the cabac
5593 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5594 ptr= h->cabac.bytestream;
5595 if(h->cabac.low&0x1) ptr--;
5596 if(CABAC_BITS==16){
5597 if(h->cabac.low&0x1FF) ptr--;
5600 // The pixels are stored in the same order as levels in h->mb array.
5601 memcpy(h->mb, ptr, 256); ptr+=256;
5602 if(CHROMA){
5603 memcpy(h->mb+128, ptr, 128); ptr+=128;
5606 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5608 // All blocks are present
5609 h->cbp_table[mb_xy] = 0x1ef;
5610 h->chroma_pred_mode_table[mb_xy] = 0;
5611 // In deblocking, the quantizer is 0
5612 s->current_picture.qscale_table[mb_xy]= 0;
5613 // All coeffs are present
5614 memset(h->non_zero_count[mb_xy], 16, 16);
5615 s->current_picture.mb_type[mb_xy]= mb_type;
5616 h->last_qscale_diff = 0;
5617 return 0;
5620 if(MB_MBAFF){
5621 h->ref_count[0] <<= 1;
5622 h->ref_count[1] <<= 1;
5625 fill_caches(h, mb_type, 0);
5627 if( IS_INTRA( mb_type ) ) {
5628 int i, pred_mode;
5629 if( IS_INTRA4x4( mb_type ) ) {
5630 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5631 mb_type |= MB_TYPE_8x8DCT;
5632 for( i = 0; i < 16; i+=4 ) {
5633 int pred = pred_intra_mode( h, i );
5634 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5635 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5637 } else {
5638 for( i = 0; i < 16; i++ ) {
5639 int pred = pred_intra_mode( h, i );
5640 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5642 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5645 write_back_intra_pred_mode(h);
5646 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5647 } else {
5648 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5649 if( h->intra16x16_pred_mode < 0 ) return -1;
5651 if(CHROMA){
5652 h->chroma_pred_mode_table[mb_xy] =
5653 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5655 pred_mode= check_intra_pred_mode( h, pred_mode );
5656 if( pred_mode < 0 ) return -1;
5657 h->chroma_pred_mode= pred_mode;
5659 } else if( partition_count == 4 ) {
5660 int i, j, sub_partition_count[4], list, ref[2][4];
5662 if( h->slice_type_nos == FF_B_TYPE ) {
5663 for( i = 0; i < 4; i++ ) {
5664 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5665 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5666 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5668 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5669 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5670 pred_direct_motion(h, &mb_type);
5671 h->ref_cache[0][scan8[4]] =
5672 h->ref_cache[1][scan8[4]] =
5673 h->ref_cache[0][scan8[12]] =
5674 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5675 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5676 for( i = 0; i < 4; i++ )
5677 if( IS_DIRECT(h->sub_mb_type[i]) )
5678 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5681 } else {
5682 for( i = 0; i < 4; i++ ) {
5683 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5684 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5685 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5689 for( list = 0; list < h->list_count; list++ ) {
5690 for( i = 0; i < 4; i++ ) {
5691 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5692 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5693 if( h->ref_count[list] > 1 ){
5694 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5695 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5696 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5697 return -1;
5699 }else
5700 ref[list][i] = 0;
5701 } else {
5702 ref[list][i] = -1;
5704 h->ref_cache[list][ scan8[4*i]+1 ]=
5705 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5709 if(dct8x8_allowed)
5710 dct8x8_allowed = get_dct8x8_allowed(h);
5712 for(list=0; list<h->list_count; list++){
5713 for(i=0; i<4; i++){
5714 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5715 if(IS_DIRECT(h->sub_mb_type[i])){
5716 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5717 continue;
5720 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5721 const int sub_mb_type= h->sub_mb_type[i];
5722 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5723 for(j=0; j<sub_partition_count[i]; j++){
5724 int mpx, mpy;
5725 int mx, my;
5726 const int index= 4*i + block_width*j;
5727 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5728 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5729 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5731 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5732 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5733 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5735 if(IS_SUB_8X8(sub_mb_type)){
5736 mv_cache[ 1 ][0]=
5737 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5738 mv_cache[ 1 ][1]=
5739 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5741 mvd_cache[ 1 ][0]=
5742 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5743 mvd_cache[ 1 ][1]=
5744 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5745 }else if(IS_SUB_8X4(sub_mb_type)){
5746 mv_cache[ 1 ][0]= mx;
5747 mv_cache[ 1 ][1]= my;
5749 mvd_cache[ 1 ][0]= mx - mpx;
5750 mvd_cache[ 1 ][1]= my - mpy;
5751 }else if(IS_SUB_4X8(sub_mb_type)){
5752 mv_cache[ 8 ][0]= mx;
5753 mv_cache[ 8 ][1]= my;
5755 mvd_cache[ 8 ][0]= mx - mpx;
5756 mvd_cache[ 8 ][1]= my - mpy;
5758 mv_cache[ 0 ][0]= mx;
5759 mv_cache[ 0 ][1]= my;
5761 mvd_cache[ 0 ][0]= mx - mpx;
5762 mvd_cache[ 0 ][1]= my - mpy;
5764 }else{
5765 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5766 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5767 p[0] = p[1] = p[8] = p[9] = 0;
5768 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5772 } else if( IS_DIRECT(mb_type) ) {
5773 pred_direct_motion(h, &mb_type);
5774 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5775 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5776 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5777 } else {
5778 int list, mx, my, i, mpx, mpy;
5779 if(IS_16X16(mb_type)){
5780 for(list=0; list<h->list_count; list++){
5781 if(IS_DIR(mb_type, 0, list)){
5782 int ref;
5783 if(h->ref_count[list] > 1){
5784 ref= decode_cabac_mb_ref(h, list, 0);
5785 if(ref >= (unsigned)h->ref_count[list]){
5786 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5787 return -1;
5789 }else
5790 ref=0;
5791 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5792 }else
5793 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5795 for(list=0; list<h->list_count; list++){
5796 if(IS_DIR(mb_type, 0, list)){
5797 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5799 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5800 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5801 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5803 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5804 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5805 }else
5806 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5809 else if(IS_16X8(mb_type)){
5810 for(list=0; list<h->list_count; list++){
5811 for(i=0; i<2; i++){
5812 if(IS_DIR(mb_type, i, list)){
5813 int ref;
5814 if(h->ref_count[list] > 1){
5815 ref= decode_cabac_mb_ref( h, list, 8*i );
5816 if(ref >= (unsigned)h->ref_count[list]){
5817 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5818 return -1;
5820 }else
5821 ref=0;
5822 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5823 }else
5824 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5827 for(list=0; list<h->list_count; list++){
5828 for(i=0; i<2; i++){
5829 if(IS_DIR(mb_type, i, list)){
5830 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5831 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5832 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5833 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5835 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5836 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5837 }else{
5838 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5839 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5843 }else{
5844 assert(IS_8X16(mb_type));
5845 for(list=0; list<h->list_count; list++){
5846 for(i=0; i<2; i++){
5847 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5848 int ref;
5849 if(h->ref_count[list] > 1){
5850 ref= decode_cabac_mb_ref( h, list, 4*i );
5851 if(ref >= (unsigned)h->ref_count[list]){
5852 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5853 return -1;
5855 }else
5856 ref=0;
5857 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5858 }else
5859 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5862 for(list=0; list<h->list_count; list++){
5863 for(i=0; i<2; i++){
5864 if(IS_DIR(mb_type, i, list)){
5865 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5866 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5867 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5869 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5870 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5871 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5872 }else{
5873 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5874 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5881 if( IS_INTER( mb_type ) ) {
5882 h->chroma_pred_mode_table[mb_xy] = 0;
5883 write_back_motion( h, mb_type );
5886 if( !IS_INTRA16x16( mb_type ) ) {
5887 cbp = decode_cabac_mb_cbp_luma( h );
5888 if(CHROMA)
5889 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5892 h->cbp_table[mb_xy] = h->cbp = cbp;
5894 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5895 if( decode_cabac_mb_transform_size( h ) )
5896 mb_type |= MB_TYPE_8x8DCT;
5898 s->current_picture.mb_type[mb_xy]= mb_type;
5900 if( cbp || IS_INTRA16x16( mb_type ) ) {
5901 const uint8_t *scan, *scan8x8, *dc_scan;
5902 const uint32_t *qmul;
5903 int dqp;
5905 if(IS_INTERLACED(mb_type)){
5906 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5907 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5908 dc_scan= luma_dc_field_scan;
5909 }else{
5910 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5911 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5912 dc_scan= luma_dc_zigzag_scan;
5915 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5916 if( dqp == INT_MIN ){
5917 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5918 return -1;
5920 s->qscale += dqp;
5921 if(((unsigned)s->qscale) > 51){
5922 if(s->qscale<0) s->qscale+= 52;
5923 else s->qscale-= 52;
5925 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5926 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5928 if( IS_INTRA16x16( mb_type ) ) {
5929 int i;
5930 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5931 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5933 if( cbp&15 ) {
5934 qmul = h->dequant4_coeff[0][s->qscale];
5935 for( i = 0; i < 16; i++ ) {
5936 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5937 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5939 } else {
5940 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5942 } else {
5943 int i8x8, i4x4;
5944 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5945 if( cbp & (1<<i8x8) ) {
5946 if( IS_8x8DCT(mb_type) ) {
5947 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5948 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5949 } else {
5950 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5951 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5952 const int index = 4*i8x8 + i4x4;
5953 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5954 //START_TIMER
5955 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5956 //STOP_TIMER("decode_residual")
5959 } else {
5960 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5961 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5966 if( cbp&0x30 ){
5967 int c;
5968 for( c = 0; c < 2; c++ ) {
5969 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5970 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5974 if( cbp&0x20 ) {
5975 int c, i;
5976 for( c = 0; c < 2; c++ ) {
5977 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5978 for( i = 0; i < 4; i++ ) {
5979 const int index = 16 + 4 * c + i;
5980 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5981 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5984 } else {
5985 uint8_t * const nnz= &h->non_zero_count_cache[0];
5986 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5987 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5989 } else {
5990 uint8_t * const nnz= &h->non_zero_count_cache[0];
5991 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5992 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5993 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5994 h->last_qscale_diff = 0;
5997 s->current_picture.qscale_table[mb_xy]= s->qscale;
5998 write_back_non_zero_count(h);
6000 if(MB_MBAFF){
6001 h->ref_count[0] >>= 1;
6002 h->ref_count[1] >>= 1;
6005 return 0;
6009 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6010 const int index_a = qp + h->slice_alpha_c0_offset;
6011 const int alpha = (alpha_table+52)[index_a];
6012 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6013 if (alpha ==0 || beta == 0) return;
6015 if( bS[0] < 4 ) {
6016 int8_t tc[4];
6017 tc[0] = (tc0_table+52)[index_a][bS[0]];
6018 tc[1] = (tc0_table+52)[index_a][bS[1]];
6019 tc[2] = (tc0_table+52)[index_a][bS[2]];
6020 tc[3] = (tc0_table+52)[index_a][bS[3]];
6021 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6022 } else {
6023 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
6026 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6027 const int index_a = qp + h->slice_alpha_c0_offset;
6028 const int alpha = (alpha_table+52)[index_a];
6029 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6030 if (alpha ==0 || beta == 0) return;
6032 if( bS[0] < 4 ) {
6033 int8_t tc[4];
6034 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6035 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6036 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6037 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6038 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6039 } else {
6040 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6044 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6045 int i;
6046 for( i = 0; i < 16; i++, pix += stride) {
6047 int index_a;
6048 int alpha;
6049 int beta;
6051 int qp_index;
6052 int bS_index = (i >> 1);
6053 if (!MB_FIELD) {
6054 bS_index &= ~1;
6055 bS_index |= (i & 1);
6058 if( bS[bS_index] == 0 ) {
6059 continue;
6062 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6063 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6064 alpha = (alpha_table+52)[index_a];
6065 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6067 if( bS[bS_index] < 4 ) {
6068 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
6069 const int p0 = pix[-1];
6070 const int p1 = pix[-2];
6071 const int p2 = pix[-3];
6072 const int q0 = pix[0];
6073 const int q1 = pix[1];
6074 const int q2 = pix[2];
6076 if( FFABS( p0 - q0 ) < alpha &&
6077 FFABS( p1 - p0 ) < beta &&
6078 FFABS( q1 - q0 ) < beta ) {
6079 int tc = tc0;
6080 int i_delta;
6082 if( FFABS( p2 - p0 ) < beta ) {
6083 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6084 tc++;
6086 if( FFABS( q2 - q0 ) < beta ) {
6087 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6088 tc++;
6091 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6092 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6093 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6094 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6096 }else{
6097 const int p0 = pix[-1];
6098 const int p1 = pix[-2];
6099 const int p2 = pix[-3];
6101 const int q0 = pix[0];
6102 const int q1 = pix[1];
6103 const int q2 = pix[2];
6105 if( FFABS( p0 - q0 ) < alpha &&
6106 FFABS( p1 - p0 ) < beta &&
6107 FFABS( q1 - q0 ) < beta ) {
6109 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6110 if( FFABS( p2 - p0 ) < beta)
6112 const int p3 = pix[-4];
6113 /* p0', p1', p2' */
6114 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6115 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6116 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6117 } else {
6118 /* p0' */
6119 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6121 if( FFABS( q2 - q0 ) < beta)
6123 const int q3 = pix[3];
6124 /* q0', q1', q2' */
6125 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6126 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6127 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6128 } else {
6129 /* q0' */
6130 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6132 }else{
6133 /* p0', q0' */
6134 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6135 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6137 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6142 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6143 int i;
6144 for( i = 0; i < 8; i++, pix += stride) {
6145 int index_a;
6146 int alpha;
6147 int beta;
6149 int qp_index;
6150 int bS_index = i;
6152 if( bS[bS_index] == 0 ) {
6153 continue;
6156 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6157 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6158 alpha = (alpha_table+52)[index_a];
6159 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6161 if( bS[bS_index] < 4 ) {
6162 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6163 const int p0 = pix[-1];
6164 const int p1 = pix[-2];
6165 const int q0 = pix[0];
6166 const int q1 = pix[1];
6168 if( FFABS( p0 - q0 ) < alpha &&
6169 FFABS( p1 - p0 ) < beta &&
6170 FFABS( q1 - q0 ) < beta ) {
6171 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6173 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6174 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6175 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6177 }else{
6178 const int p0 = pix[-1];
6179 const int p1 = pix[-2];
6180 const int q0 = pix[0];
6181 const int q1 = pix[1];
6183 if( FFABS( p0 - q0 ) < alpha &&
6184 FFABS( p1 - p0 ) < beta &&
6185 FFABS( q1 - q0 ) < beta ) {
6187 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6188 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6189 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6195 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6196 const int index_a = qp + h->slice_alpha_c0_offset;
6197 const int alpha = (alpha_table+52)[index_a];
6198 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6199 if (alpha ==0 || beta == 0) return;
6201 if( bS[0] < 4 ) {
6202 int8_t tc[4];
6203 tc[0] = (tc0_table+52)[index_a][bS[0]];
6204 tc[1] = (tc0_table+52)[index_a][bS[1]];
6205 tc[2] = (tc0_table+52)[index_a][bS[2]];
6206 tc[3] = (tc0_table+52)[index_a][bS[3]];
6207 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6208 } else {
6209 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6213 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6214 const int index_a = qp + h->slice_alpha_c0_offset;
6215 const int alpha = (alpha_table+52)[index_a];
6216 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6217 if (alpha ==0 || beta == 0) return;
6219 if( bS[0] < 4 ) {
6220 int8_t tc[4];
6221 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6222 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6223 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6224 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6225 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6226 } else {
6227 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6231 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6232 MpegEncContext * const s = &h->s;
6233 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6234 int mb_xy, mb_type;
6235 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6237 mb_xy = h->mb_xy;
6239 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6240 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6241 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6242 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6243 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6244 return;
6246 assert(!FRAME_MBAFF);
6248 mb_type = s->current_picture.mb_type[mb_xy];
6249 qp = s->current_picture.qscale_table[mb_xy];
6250 qp0 = s->current_picture.qscale_table[mb_xy-1];
6251 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6252 qpc = get_chroma_qp( h, 0, qp );
6253 qpc0 = get_chroma_qp( h, 0, qp0 );
6254 qpc1 = get_chroma_qp( h, 0, qp1 );
6255 qp0 = (qp + qp0 + 1) >> 1;
6256 qp1 = (qp + qp1 + 1) >> 1;
6257 qpc0 = (qpc + qpc0 + 1) >> 1;
6258 qpc1 = (qpc + qpc1 + 1) >> 1;
6259 qp_thresh = 15 - h->slice_alpha_c0_offset;
6260 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6261 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6262 return;
6264 if( IS_INTRA(mb_type) ) {
6265 int16_t bS4[4] = {4,4,4,4};
6266 int16_t bS3[4] = {3,3,3,3};
6267 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6268 if( IS_8x8DCT(mb_type) ) {
6269 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6270 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6271 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6272 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6273 } else {
6274 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6275 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6276 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6277 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6278 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6279 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6280 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6281 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6283 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6284 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6285 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6286 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6287 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6288 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6289 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6290 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6291 return;
6292 } else {
6293 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6294 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6295 int edges;
6296 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6297 edges = 4;
6298 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6299 } else {
6300 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6301 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6302 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6303 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6304 ? 3 : 0;
6305 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6306 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6307 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6308 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6310 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6311 bSv[0][0] = 0x0004000400040004ULL;
6312 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6313 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6315 #define FILTER(hv,dir,edge)\
6316 if(bSv[dir][edge]) {\
6317 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6318 if(!(edge&1)) {\
6319 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6320 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6323 if( edges == 1 ) {
6324 FILTER(v,0,0);
6325 FILTER(h,1,0);
6326 } else if( IS_8x8DCT(mb_type) ) {
6327 FILTER(v,0,0);
6328 FILTER(v,0,2);
6329 FILTER(h,1,0);
6330 FILTER(h,1,2);
6331 } else {
6332 FILTER(v,0,0);
6333 FILTER(v,0,1);
6334 FILTER(v,0,2);
6335 FILTER(v,0,3);
6336 FILTER(h,1,0);
6337 FILTER(h,1,1);
6338 FILTER(h,1,2);
6339 FILTER(h,1,3);
6341 #undef FILTER
6346 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6347 MpegEncContext * const s = &h->s;
6348 int edge;
6349 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6350 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6351 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6352 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6353 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6355 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6356 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6357 // how often to recheck mv-based bS when iterating between edges
6358 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6359 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6360 // how often to recheck mv-based bS when iterating along each edge
6361 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6363 if (first_vertical_edge_done) {
6364 start = 1;
6367 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6368 start = 1;
6370 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6371 && !IS_INTERLACED(mb_type)
6372 && IS_INTERLACED(mbm_type)
6374 // This is a special case in the norm where the filtering must
6375 // be done twice (one each of the field) even if we are in a
6376 // frame macroblock.
6378 static const int nnz_idx[4] = {4,5,6,3};
6379 unsigned int tmp_linesize = 2 * linesize;
6380 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6381 int mbn_xy = mb_xy - 2 * s->mb_stride;
6382 int qp;
6383 int i, j;
6384 int16_t bS[4];
6386 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6387 if( IS_INTRA(mb_type) ||
6388 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6389 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6390 } else {
6391 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6392 for( i = 0; i < 4; i++ ) {
6393 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6394 mbn_nnz[nnz_idx[i]] != 0 )
6395 bS[i] = 2;
6396 else
6397 bS[i] = 1;
6400 // Do not use s->qscale as luma quantizer because it has not the same
6401 // value in IPCM macroblocks.
6402 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6403 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6404 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6405 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6406 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6407 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6408 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6409 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6412 start = 1;
6415 /* Calculate bS */
6416 for( edge = start; edge < edges; edge++ ) {
6417 /* mbn_xy: neighbor macroblock */
6418 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6419 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6420 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6421 int16_t bS[4];
6422 int qp;
6424 if( (edge&1) && IS_8x8DCT(mb_type) )
6425 continue;
6427 if( IS_INTRA(mb_type) ||
6428 IS_INTRA(mbn_type) ) {
6429 int value;
6430 if (edge == 0) {
6431 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6432 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6434 value = 4;
6435 } else {
6436 value = 3;
6438 } else {
6439 value = 3;
6441 bS[0] = bS[1] = bS[2] = bS[3] = value;
6442 } else {
6443 int i, l;
6444 int mv_done;
6446 if( edge & mask_edge ) {
6447 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6448 mv_done = 1;
6450 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6451 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6452 mv_done = 1;
6454 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6455 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6456 int bn_idx= b_idx - (dir ? 8:1);
6457 int v = 0;
6459 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6460 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6461 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6462 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6465 if(h->slice_type_nos == FF_B_TYPE && v){
6466 v=0;
6467 for( l = 0; !v && l < 2; l++ ) {
6468 int ln= 1-l;
6469 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6470 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6471 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6475 bS[0] = bS[1] = bS[2] = bS[3] = v;
6476 mv_done = 1;
6478 else
6479 mv_done = 0;
6481 for( i = 0; i < 4; i++ ) {
6482 int x = dir == 0 ? edge : i;
6483 int y = dir == 0 ? i : edge;
6484 int b_idx= 8 + 4 + x + 8*y;
6485 int bn_idx= b_idx - (dir ? 8:1);
6487 if( h->non_zero_count_cache[b_idx] |
6488 h->non_zero_count_cache[bn_idx] ) {
6489 bS[i] = 2;
6491 else if(!mv_done)
6493 bS[i] = 0;
6494 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6495 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6496 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6497 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6498 bS[i] = 1;
6499 break;
6503 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6504 bS[i] = 0;
6505 for( l = 0; l < 2; l++ ) {
6506 int ln= 1-l;
6507 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6508 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6509 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6510 bS[i] = 1;
6511 break;
6518 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6519 continue;
6522 /* Filter edge */
6523 // Do not use s->qscale as luma quantizer because it has not the same
6524 // value in IPCM macroblocks.
6525 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6526 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6527 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6528 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6529 if( dir == 0 ) {
6530 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6531 if( (edge&1) == 0 ) {
6532 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6533 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6534 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6535 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6537 } else {
6538 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6539 if( (edge&1) == 0 ) {
6540 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6541 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6542 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6543 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6549 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6550 MpegEncContext * const s = &h->s;
6551 const int mb_xy= mb_x + mb_y*s->mb_stride;
6552 const int mb_type = s->current_picture.mb_type[mb_xy];
6553 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6554 int first_vertical_edge_done = 0;
6555 av_unused int dir;
6557 //for sufficiently low qp, filtering wouldn't do anything
6558 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6559 if(!FRAME_MBAFF){
6560 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6561 int qp = s->current_picture.qscale_table[mb_xy];
6562 if(qp <= qp_thresh
6563 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6564 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6565 return;
6569 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6570 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6571 int top_type, left_type[2];
6572 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6573 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6574 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6576 if(IS_8x8DCT(top_type)){
6577 h->non_zero_count_cache[4+8*0]=
6578 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6579 h->non_zero_count_cache[6+8*0]=
6580 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6582 if(IS_8x8DCT(left_type[0])){
6583 h->non_zero_count_cache[3+8*1]=
6584 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6586 if(IS_8x8DCT(left_type[1])){
6587 h->non_zero_count_cache[3+8*3]=
6588 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6591 if(IS_8x8DCT(mb_type)){
6592 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6593 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6595 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6596 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6598 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6599 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6601 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6602 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6606 if (FRAME_MBAFF
6607 // left mb is in picture
6608 && h->slice_table[mb_xy-1] != 0xFFFF
6609 // and current and left pair do not have the same interlaced type
6610 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6611 // and left mb is in the same slice if deblocking_filter == 2
6612 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6613 /* First vertical edge is different in MBAFF frames
6614 * There are 8 different bS to compute and 2 different Qp
6616 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6617 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6618 int16_t bS[8];
6619 int qp[2];
6620 int bqp[2];
6621 int rqp[2];
6622 int mb_qp, mbn0_qp, mbn1_qp;
6623 int i;
6624 first_vertical_edge_done = 1;
6626 if( IS_INTRA(mb_type) )
6627 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6628 else {
6629 for( i = 0; i < 8; i++ ) {
6630 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6632 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6633 bS[i] = 4;
6634 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6635 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6636 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6638 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6639 bS[i] = 2;
6640 else
6641 bS[i] = 1;
6645 mb_qp = s->current_picture.qscale_table[mb_xy];
6646 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6647 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6648 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6649 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6650 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6651 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6652 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6653 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6654 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6655 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6656 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6657 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6659 /* Filter edge */
6660 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6661 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6662 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6663 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6664 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6667 #if CONFIG_SMALL
6668 for( dir = 0; dir < 2; dir++ )
6669 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6670 #else
6671 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6672 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6673 #endif
6676 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6677 H264Context *h = *(void**)arg;
6678 MpegEncContext * const s = &h->s;
6679 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6681 s->mb_skip_run= -1;
6683 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6684 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6686 if( h->pps.cabac ) {
6687 int i;
6689 /* realign */
6690 align_get_bits( &s->gb );
6692 /* init cabac */
6693 ff_init_cabac_states( &h->cabac);
6694 ff_init_cabac_decoder( &h->cabac,
6695 s->gb.buffer + get_bits_count(&s->gb)/8,
6696 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6697 /* calculate pre-state */
6698 for( i= 0; i < 460; i++ ) {
6699 int pre;
6700 if( h->slice_type_nos == FF_I_TYPE )
6701 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6702 else
6703 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6705 if( pre <= 63 )
6706 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6707 else
6708 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6711 for(;;){
6712 //START_TIMER
6713 int ret = decode_mb_cabac(h);
6714 int eos;
6715 //STOP_TIMER("decode_mb_cabac")
6717 if(ret>=0) hl_decode_mb(h);
6719 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6720 s->mb_y++;
6722 ret = decode_mb_cabac(h);
6724 if(ret>=0) hl_decode_mb(h);
6725 s->mb_y--;
6727 eos = get_cabac_terminate( &h->cabac );
6729 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6730 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6731 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6732 return -1;
6735 if( ++s->mb_x >= s->mb_width ) {
6736 s->mb_x = 0;
6737 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6738 ++s->mb_y;
6739 if(FIELD_OR_MBAFF_PICTURE) {
6740 ++s->mb_y;
6744 if( eos || s->mb_y >= s->mb_height ) {
6745 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6746 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6747 return 0;
6751 } else {
6752 for(;;){
6753 int ret = decode_mb_cavlc(h);
6755 if(ret>=0) hl_decode_mb(h);
6757 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6758 s->mb_y++;
6759 ret = decode_mb_cavlc(h);
6761 if(ret>=0) hl_decode_mb(h);
6762 s->mb_y--;
6765 if(ret<0){
6766 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6767 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6769 return -1;
6772 if(++s->mb_x >= s->mb_width){
6773 s->mb_x=0;
6774 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6775 ++s->mb_y;
6776 if(FIELD_OR_MBAFF_PICTURE) {
6777 ++s->mb_y;
6779 if(s->mb_y >= s->mb_height){
6780 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6782 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6783 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6785 return 0;
6786 }else{
6787 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6789 return -1;
6794 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6795 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6796 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6797 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6799 return 0;
6800 }else{
6801 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6803 return -1;
6809 #if 0
6810 for(;s->mb_y < s->mb_height; s->mb_y++){
6811 for(;s->mb_x < s->mb_width; s->mb_x++){
6812 int ret= decode_mb(h);
6814 hl_decode_mb(h);
6816 if(ret<0){
6817 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6818 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6820 return -1;
6823 if(++s->mb_x >= s->mb_width){
6824 s->mb_x=0;
6825 if(++s->mb_y >= s->mb_height){
6826 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6827 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6829 return 0;
6830 }else{
6831 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6833 return -1;
6838 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6839 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6840 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6842 return 0;
6843 }else{
6844 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6846 return -1;
6850 s->mb_x=0;
6851 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6853 #endif
6854 return -1; //not reached
6857 static int decode_picture_timing(H264Context *h){
6858 MpegEncContext * const s = &h->s;
6859 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6860 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6861 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6863 if(h->sps.pic_struct_present_flag){
6864 unsigned int i, num_clock_ts;
6865 h->sei_pic_struct = get_bits(&s->gb, 4);
6866 h->sei_ct_type = 0;
6868 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6869 return -1;
6871 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6873 for (i = 0 ; i < num_clock_ts ; i++){
6874 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6875 unsigned int full_timestamp_flag;
6876 h->sei_ct_type |= 1<<get_bits(&s->gb, 2);
6877 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6878 skip_bits(&s->gb, 5); /* counting_type */
6879 full_timestamp_flag = get_bits(&s->gb, 1);
6880 skip_bits(&s->gb, 1); /* discontinuity_flag */
6881 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6882 skip_bits(&s->gb, 8); /* n_frames */
6883 if(full_timestamp_flag){
6884 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6885 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6886 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6887 }else{
6888 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6889 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6890 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6891 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6892 if(get_bits(&s->gb, 1)) /* hours_flag */
6893 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6897 if(h->sps.time_offset_length > 0)
6898 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6902 if(s->avctx->debug & FF_DEBUG_PICT_INFO)
6903 av_log(s->avctx, AV_LOG_DEBUG, "ct_type:%X pic_struct:%d\n", h->sei_ct_type, h->sei_pic_struct);
6905 return 0;
6908 static int decode_unregistered_user_data(H264Context *h, int size){
6909 MpegEncContext * const s = &h->s;
6910 uint8_t user_data[16+256];
6911 int e, build, i;
6913 if(size<16)
6914 return -1;
6916 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6917 user_data[i]= get_bits(&s->gb, 8);
6920 user_data[i]= 0;
6921 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6922 if(e==1 && build>=0)
6923 h->x264_build= build;
6925 if(s->avctx->debug & FF_DEBUG_BUGS)
6926 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6928 for(; i<size; i++)
6929 skip_bits(&s->gb, 8);
6931 return 0;
6934 static int decode_recovery_point(H264Context *h){
6935 MpegEncContext * const s = &h->s;
6937 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6938 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6940 return 0;
6943 static int decode_buffering_period(H264Context *h){
6944 MpegEncContext * const s = &h->s;
6945 unsigned int sps_id;
6946 int sched_sel_idx;
6947 SPS *sps;
6949 sps_id = get_ue_golomb_31(&s->gb);
6950 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6951 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6952 return -1;
6954 sps = h->sps_buffers[sps_id];
6956 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6957 if (sps->nal_hrd_parameters_present_flag) {
6958 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6959 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6960 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6963 if (sps->vcl_hrd_parameters_present_flag) {
6964 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6965 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6966 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6970 h->sei_buffering_period_present = 1;
6971 return 0;
6974 int ff_h264_decode_sei(H264Context *h){
6975 MpegEncContext * const s = &h->s;
6977 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6978 int size, type;
6980 type=0;
6982 type+= show_bits(&s->gb, 8);
6983 }while(get_bits(&s->gb, 8) == 255);
6985 size=0;
6987 size+= show_bits(&s->gb, 8);
6988 }while(get_bits(&s->gb, 8) == 255);
6990 switch(type){
6991 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6992 if(decode_picture_timing(h) < 0)
6993 return -1;
6994 break;
6995 case SEI_TYPE_USER_DATA_UNREGISTERED:
6996 if(decode_unregistered_user_data(h, size) < 0)
6997 return -1;
6998 break;
6999 case SEI_TYPE_RECOVERY_POINT:
7000 if(decode_recovery_point(h) < 0)
7001 return -1;
7002 break;
7003 case SEI_BUFFERING_PERIOD:
7004 if(decode_buffering_period(h) < 0)
7005 return -1;
7006 break;
7007 default:
7008 skip_bits(&s->gb, 8*size);
7011 //FIXME check bits here
7012 align_get_bits(&s->gb);
7015 return 0;
7018 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
7019 MpegEncContext * const s = &h->s;
7020 int cpb_count, i;
7021 cpb_count = get_ue_golomb_31(&s->gb) + 1;
7023 if(cpb_count > 32U){
7024 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
7025 return -1;
7028 get_bits(&s->gb, 4); /* bit_rate_scale */
7029 get_bits(&s->gb, 4); /* cpb_size_scale */
7030 for(i=0; i<cpb_count; i++){
7031 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7032 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7033 get_bits1(&s->gb); /* cbr_flag */
7035 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7036 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7037 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
7038 sps->time_offset_length = get_bits(&s->gb, 5);
7039 sps->cpb_cnt = cpb_count;
7040 return 0;
7043 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7044 MpegEncContext * const s = &h->s;
7045 int aspect_ratio_info_present_flag;
7046 unsigned int aspect_ratio_idc;
7048 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7050 if( aspect_ratio_info_present_flag ) {
7051 aspect_ratio_idc= get_bits(&s->gb, 8);
7052 if( aspect_ratio_idc == EXTENDED_SAR ) {
7053 sps->sar.num= get_bits(&s->gb, 16);
7054 sps->sar.den= get_bits(&s->gb, 16);
7055 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
7056 sps->sar= pixel_aspect[aspect_ratio_idc];
7057 }else{
7058 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7059 return -1;
7061 }else{
7062 sps->sar.num=
7063 sps->sar.den= 0;
7065 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7067 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7068 get_bits1(&s->gb); /* overscan_appropriate_flag */
7071 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7072 get_bits(&s->gb, 3); /* video_format */
7073 get_bits1(&s->gb); /* video_full_range_flag */
7074 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7075 get_bits(&s->gb, 8); /* colour_primaries */
7076 get_bits(&s->gb, 8); /* transfer_characteristics */
7077 get_bits(&s->gb, 8); /* matrix_coefficients */
7081 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7082 s->avctx->chroma_sample_location = get_ue_golomb(&s->gb)+1; /* chroma_sample_location_type_top_field */
7083 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7086 sps->timing_info_present_flag = get_bits1(&s->gb);
7087 if(sps->timing_info_present_flag){
7088 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7089 sps->time_scale = get_bits_long(&s->gb, 32);
7090 if(sps->num_units_in_tick-1 > 0x7FFFFFFEU || sps->time_scale-1 > 0x7FFFFFFEU){
7091 av_log(h->s.avctx, AV_LOG_ERROR, "time_scale/num_units_in_tick invalid or unsupported (%d/%d)\n", sps->time_scale, sps->num_units_in_tick);
7092 return -1;
7094 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7097 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7098 if(sps->nal_hrd_parameters_present_flag)
7099 if(decode_hrd_parameters(h, sps) < 0)
7100 return -1;
7101 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7102 if(sps->vcl_hrd_parameters_present_flag)
7103 if(decode_hrd_parameters(h, sps) < 0)
7104 return -1;
7105 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7106 get_bits1(&s->gb); /* low_delay_hrd_flag */
7107 sps->pic_struct_present_flag = get_bits1(&s->gb);
7109 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7110 if(sps->bitstream_restriction_flag){
7111 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7112 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7113 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7114 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7115 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7116 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7117 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7119 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7120 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7121 return -1;
7125 return 0;
7128 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7129 const uint8_t *jvt_list, const uint8_t *fallback_list){
7130 MpegEncContext * const s = &h->s;
7131 int i, last = 8, next = 8;
7132 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7133 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7134 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7135 else
7136 for(i=0;i<size;i++){
7137 if(next)
7138 next = (last + get_se_golomb(&s->gb)) & 0xff;
7139 if(!i && !next){ /* matrix not written, we use the preset one */
7140 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7141 break;
7143 last = factors[scan[i]] = next ? next : last;
7147 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7148 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7149 MpegEncContext * const s = &h->s;
7150 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7151 const uint8_t *fallback[4] = {
7152 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7153 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7154 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7155 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7157 if(get_bits1(&s->gb)){
7158 sps->scaling_matrix_present |= is_sps;
7159 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7160 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7161 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7162 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7163 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7164 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7165 if(is_sps || pps->transform_8x8_mode){
7166 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7167 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7172 int ff_h264_decode_seq_parameter_set(H264Context *h){
7173 MpegEncContext * const s = &h->s;
7174 int profile_idc, level_idc;
7175 unsigned int sps_id;
7176 int i;
7177 SPS *sps;
7179 profile_idc= get_bits(&s->gb, 8);
7180 get_bits1(&s->gb); //constraint_set0_flag
7181 get_bits1(&s->gb); //constraint_set1_flag
7182 get_bits1(&s->gb); //constraint_set2_flag
7183 get_bits1(&s->gb); //constraint_set3_flag
7184 get_bits(&s->gb, 4); // reserved
7185 level_idc= get_bits(&s->gb, 8);
7186 sps_id= get_ue_golomb_31(&s->gb);
7188 if(sps_id >= MAX_SPS_COUNT) {
7189 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7190 return -1;
7192 sps= av_mallocz(sizeof(SPS));
7193 if(sps == NULL)
7194 return -1;
7196 sps->profile_idc= profile_idc;
7197 sps->level_idc= level_idc;
7199 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7200 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7201 sps->scaling_matrix_present = 0;
7203 if(sps->profile_idc >= 100){ //high profile
7204 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7205 if(sps->chroma_format_idc == 3)
7206 sps->residual_color_transform_flag = get_bits1(&s->gb);
7207 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7208 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7209 sps->transform_bypass = get_bits1(&s->gb);
7210 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7211 }else{
7212 sps->chroma_format_idc= 1;
7215 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7216 sps->poc_type= get_ue_golomb_31(&s->gb);
7218 if(sps->poc_type == 0){ //FIXME #define
7219 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7220 } else if(sps->poc_type == 1){//FIXME #define
7221 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7222 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7223 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7224 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7226 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7227 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7228 goto fail;
7231 for(i=0; i<sps->poc_cycle_length; i++)
7232 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7233 }else if(sps->poc_type != 2){
7234 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7235 goto fail;
7238 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7239 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7240 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7241 goto fail;
7243 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7244 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7245 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7246 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7247 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7248 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7249 goto fail;
7252 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7253 if(!sps->frame_mbs_only_flag)
7254 sps->mb_aff= get_bits1(&s->gb);
7255 else
7256 sps->mb_aff= 0;
7258 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7260 #ifndef ALLOW_INTERLACE
7261 if(sps->mb_aff)
7262 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7263 #endif
7264 sps->crop= get_bits1(&s->gb);
7265 if(sps->crop){
7266 sps->crop_left = get_ue_golomb(&s->gb);
7267 sps->crop_right = get_ue_golomb(&s->gb);
7268 sps->crop_top = get_ue_golomb(&s->gb);
7269 sps->crop_bottom= get_ue_golomb(&s->gb);
7270 if(sps->crop_left || sps->crop_top){
7271 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7273 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7274 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7276 }else{
7277 sps->crop_left =
7278 sps->crop_right =
7279 sps->crop_top =
7280 sps->crop_bottom= 0;
7283 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7284 if( sps->vui_parameters_present_flag )
7285 if (decode_vui_parameters(h, sps) < 0)
7286 goto fail;
7288 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7289 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s %d/%d\n",
7290 sps_id, sps->profile_idc, sps->level_idc,
7291 sps->poc_type,
7292 sps->ref_frame_count,
7293 sps->mb_width, sps->mb_height,
7294 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7295 sps->direct_8x8_inference_flag ? "8B8" : "",
7296 sps->crop_left, sps->crop_right,
7297 sps->crop_top, sps->crop_bottom,
7298 sps->vui_parameters_present_flag ? "VUI" : "",
7299 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc],
7300 sps->timing_info_present_flag ? sps->num_units_in_tick : 0,
7301 sps->timing_info_present_flag ? sps->time_scale : 0
7305 av_free(h->sps_buffers[sps_id]);
7306 h->sps_buffers[sps_id]= sps;
7307 h->sps = *sps;
7308 return 0;
7309 fail:
7310 av_free(sps);
7311 return -1;
7314 static void
7315 build_qp_table(PPS *pps, int t, int index)
7317 int i;
7318 for(i = 0; i < 52; i++)
7319 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7322 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7323 MpegEncContext * const s = &h->s;
7324 unsigned int pps_id= get_ue_golomb(&s->gb);
7325 PPS *pps;
7327 if(pps_id >= MAX_PPS_COUNT) {
7328 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7329 return -1;
7332 pps= av_mallocz(sizeof(PPS));
7333 if(pps == NULL)
7334 return -1;
7335 pps->sps_id= get_ue_golomb_31(&s->gb);
7336 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7337 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7338 goto fail;
7341 pps->cabac= get_bits1(&s->gb);
7342 pps->pic_order_present= get_bits1(&s->gb);
7343 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7344 if(pps->slice_group_count > 1 ){
7345 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7346 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7347 switch(pps->mb_slice_group_map_type){
7348 case 0:
7349 #if 0
7350 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7351 | run_length[ i ] |1 |ue(v) |
7352 #endif
7353 break;
7354 case 2:
7355 #if 0
7356 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7357 |{ | | |
7358 | top_left_mb[ i ] |1 |ue(v) |
7359 | bottom_right_mb[ i ] |1 |ue(v) |
7360 | } | | |
7361 #endif
7362 break;
7363 case 3:
7364 case 4:
7365 case 5:
7366 #if 0
7367 | slice_group_change_direction_flag |1 |u(1) |
7368 | slice_group_change_rate_minus1 |1 |ue(v) |
7369 #endif
7370 break;
7371 case 6:
7372 #if 0
7373 | slice_group_id_cnt_minus1 |1 |ue(v) |
7374 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7375 |) | | |
7376 | slice_group_id[ i ] |1 |u(v) |
7377 #endif
7378 break;
7381 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7382 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7383 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7384 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7385 goto fail;
7388 pps->weighted_pred= get_bits1(&s->gb);
7389 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7390 pps->init_qp= get_se_golomb(&s->gb) + 26;
7391 pps->init_qs= get_se_golomb(&s->gb) + 26;
7392 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7393 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7394 pps->constrained_intra_pred= get_bits1(&s->gb);
7395 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7397 pps->transform_8x8_mode= 0;
7398 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7399 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7400 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7402 if(get_bits_count(&s->gb) < bit_length){
7403 pps->transform_8x8_mode= get_bits1(&s->gb);
7404 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7405 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7406 } else {
7407 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7410 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7411 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7412 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7413 h->pps.chroma_qp_diff= 1;
7415 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7416 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7417 pps_id, pps->sps_id,
7418 pps->cabac ? "CABAC" : "CAVLC",
7419 pps->slice_group_count,
7420 pps->ref_count[0], pps->ref_count[1],
7421 pps->weighted_pred ? "weighted" : "",
7422 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7423 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7424 pps->constrained_intra_pred ? "CONSTR" : "",
7425 pps->redundant_pic_cnt_present ? "REDU" : "",
7426 pps->transform_8x8_mode ? "8x8DCT" : ""
7430 av_free(h->pps_buffers[pps_id]);
7431 h->pps_buffers[pps_id]= pps;
7432 return 0;
7433 fail:
7434 av_free(pps);
7435 return -1;
7439 * Call decode_slice() for each context.
7441 * @param h h264 master context
7442 * @param context_count number of contexts to execute
7444 static void execute_decode_slices(H264Context *h, int context_count){
7445 MpegEncContext * const s = &h->s;
7446 AVCodecContext * const avctx= s->avctx;
7447 H264Context *hx;
7448 int i;
7450 if (s->avctx->hwaccel)
7451 return;
7452 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7453 return;
7454 if(context_count == 1) {
7455 decode_slice(avctx, &h);
7456 } else {
7457 for(i = 1; i < context_count; i++) {
7458 hx = h->thread_context[i];
7459 hx->s.error_recognition = avctx->error_recognition;
7460 hx->s.error_count = 0;
7463 avctx->execute(avctx, (void *)decode_slice,
7464 h->thread_context, NULL, context_count, sizeof(void*));
7466 /* pull back stuff from slices to master context */
7467 hx = h->thread_context[context_count - 1];
7468 s->mb_x = hx->s.mb_x;
7469 s->mb_y = hx->s.mb_y;
7470 s->dropable = hx->s.dropable;
7471 s->picture_structure = hx->s.picture_structure;
7472 for(i = 1; i < context_count; i++)
7473 h->s.error_count += h->thread_context[i]->s.error_count;
7478 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7479 MpegEncContext * const s = &h->s;
7480 AVCodecContext * const avctx= s->avctx;
7481 int buf_index=0;
7482 H264Context *hx; ///< thread context
7483 int context_count = 0;
7484 int next_avc= h->is_avc ? 0 : buf_size;
7486 h->max_contexts = avctx->thread_count;
7487 #if 0
7488 int i;
7489 for(i=0; i<50; i++){
7490 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7492 #endif
7493 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7494 h->current_slice = 0;
7495 if (!s->first_field)
7496 s->current_picture_ptr= NULL;
7497 reset_sei(h);
7500 for(;;){
7501 int consumed;
7502 int dst_length;
7503 int bit_length;
7504 const uint8_t *ptr;
7505 int i, nalsize = 0;
7506 int err;
7508 if(buf_index >= next_avc) {
7509 if(buf_index >= buf_size) break;
7510 nalsize = 0;
7511 for(i = 0; i < h->nal_length_size; i++)
7512 nalsize = (nalsize << 8) | buf[buf_index++];
7513 if(nalsize <= 1 || nalsize > buf_size - buf_index){
7514 if(nalsize == 1){
7515 buf_index++;
7516 continue;
7517 }else{
7518 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7519 break;
7522 next_avc= buf_index + nalsize;
7523 } else {
7524 // start code prefix search
7525 for(; buf_index + 3 < buf_size; buf_index++){
7526 // This should always succeed in the first iteration.
7527 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7528 break;
7531 if(buf_index+3 >= buf_size) break;
7533 buf_index+=3;
7536 hx = h->thread_context[context_count];
7538 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
7539 if (ptr==NULL || dst_length < 0){
7540 return -1;
7542 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7543 dst_length--;
7544 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7546 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7547 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7550 if (h->is_avc && (nalsize != consumed) && nalsize){
7551 int i, debug_level = AV_LOG_DEBUG;
7552 for (i = consumed; i < nalsize; i++)
7553 if (buf[buf_index+i])
7554 debug_level = AV_LOG_ERROR;
7555 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7558 buf_index += consumed;
7560 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7561 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7562 continue;
7564 again:
7565 err = 0;
7566 switch(hx->nal_unit_type){
7567 case NAL_IDR_SLICE:
7568 if (h->nal_unit_type != NAL_IDR_SLICE) {
7569 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7570 return -1;
7572 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7573 case NAL_SLICE:
7574 init_get_bits(&hx->s.gb, ptr, bit_length);
7575 hx->intra_gb_ptr=
7576 hx->inter_gb_ptr= &hx->s.gb;
7577 hx->s.data_partitioning = 0;
7579 if((err = decode_slice_header(hx, h)))
7580 break;
7582 if (s->avctx->hwaccel && h->current_slice == 1) {
7583 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
7584 return -1;
7587 s->current_picture_ptr->key_frame |=
7588 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7589 (h->sei_recovery_frame_cnt >= 0);
7590 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7591 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7592 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7593 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7594 && avctx->skip_frame < AVDISCARD_ALL){
7595 if(avctx->hwaccel) {
7596 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
7597 return -1;
7598 }else
7599 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7600 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7601 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7602 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7603 }else
7604 context_count++;
7606 break;
7607 case NAL_DPA:
7608 init_get_bits(&hx->s.gb, ptr, bit_length);
7609 hx->intra_gb_ptr=
7610 hx->inter_gb_ptr= NULL;
7612 if ((err = decode_slice_header(hx, h)) < 0)
7613 break;
7615 hx->s.data_partitioning = 1;
7617 break;
7618 case NAL_DPB:
7619 init_get_bits(&hx->intra_gb, ptr, bit_length);
7620 hx->intra_gb_ptr= &hx->intra_gb;
7621 break;
7622 case NAL_DPC:
7623 init_get_bits(&hx->inter_gb, ptr, bit_length);
7624 hx->inter_gb_ptr= &hx->inter_gb;
7626 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7627 && s->context_initialized
7628 && s->hurry_up < 5
7629 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7630 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7631 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7632 && avctx->skip_frame < AVDISCARD_ALL)
7633 context_count++;
7634 break;
7635 case NAL_SEI:
7636 init_get_bits(&s->gb, ptr, bit_length);
7637 ff_h264_decode_sei(h);
7638 break;
7639 case NAL_SPS:
7640 init_get_bits(&s->gb, ptr, bit_length);
7641 ff_h264_decode_seq_parameter_set(h);
7643 if(s->flags& CODEC_FLAG_LOW_DELAY)
7644 s->low_delay=1;
7646 if(avctx->has_b_frames < 2)
7647 avctx->has_b_frames= !s->low_delay;
7648 break;
7649 case NAL_PPS:
7650 init_get_bits(&s->gb, ptr, bit_length);
7652 ff_h264_decode_picture_parameter_set(h, bit_length);
7654 break;
7655 case NAL_AUD:
7656 case NAL_END_SEQUENCE:
7657 case NAL_END_STREAM:
7658 case NAL_FILLER_DATA:
7659 case NAL_SPS_EXT:
7660 case NAL_AUXILIARY_SLICE:
7661 break;
7662 default:
7663 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7666 if(context_count == h->max_contexts) {
7667 execute_decode_slices(h, context_count);
7668 context_count = 0;
7671 if (err < 0)
7672 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7673 else if(err == 1) {
7674 /* Slice could not be decoded in parallel mode, copy down
7675 * NAL unit stuff to context 0 and restart. Note that
7676 * rbsp_buffer is not transferred, but since we no longer
7677 * run in parallel mode this should not be an issue. */
7678 h->nal_unit_type = hx->nal_unit_type;
7679 h->nal_ref_idc = hx->nal_ref_idc;
7680 hx = h;
7681 goto again;
7684 if(context_count)
7685 execute_decode_slices(h, context_count);
7686 return buf_index;
7690 * returns the number of bytes consumed for building the current frame
7692 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7693 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7694 if(pos+10>buf_size) pos=buf_size; // oops ;)
7696 return pos;
7699 static int decode_frame(AVCodecContext *avctx,
7700 void *data, int *data_size,
7701 AVPacket *avpkt)
7703 const uint8_t *buf = avpkt->data;
7704 int buf_size = avpkt->size;
7705 H264Context *h = avctx->priv_data;
7706 MpegEncContext *s = &h->s;
7707 AVFrame *pict = data;
7708 int buf_index;
7710 s->flags= avctx->flags;
7711 s->flags2= avctx->flags2;
7713 /* end of stream, output what is still in the buffers */
7714 if (buf_size == 0) {
7715 Picture *out;
7716 int i, out_idx;
7718 //FIXME factorize this with the output code below
7719 out = h->delayed_pic[0];
7720 out_idx = 0;
7721 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
7722 if(h->delayed_pic[i]->poc < out->poc){
7723 out = h->delayed_pic[i];
7724 out_idx = i;
7727 for(i=out_idx; h->delayed_pic[i]; i++)
7728 h->delayed_pic[i] = h->delayed_pic[i+1];
7730 if(out){
7731 *data_size = sizeof(AVFrame);
7732 *pict= *(AVFrame*)out;
7735 return 0;
7738 if(h->is_avc && !h->got_avcC) {
7739 int i, cnt, nalsize;
7740 unsigned char *p = avctx->extradata;
7741 if(avctx->extradata_size < 7) {
7742 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7743 return -1;
7745 if(*p != 1) {
7746 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7747 return -1;
7749 /* sps and pps in the avcC always have length coded with 2 bytes,
7750 so put a fake nal_length_size = 2 while parsing them */
7751 h->nal_length_size = 2;
7752 // Decode sps from avcC
7753 cnt = *(p+5) & 0x1f; // Number of sps
7754 p += 6;
7755 for (i = 0; i < cnt; i++) {
7756 nalsize = AV_RB16(p) + 2;
7757 if(decode_nal_units(h, p, nalsize) < 0) {
7758 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7759 return -1;
7761 p += nalsize;
7763 // Decode pps from avcC
7764 cnt = *(p++); // Number of pps
7765 for (i = 0; i < cnt; i++) {
7766 nalsize = AV_RB16(p) + 2;
7767 if(decode_nal_units(h, p, nalsize) != nalsize) {
7768 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7769 return -1;
7771 p += nalsize;
7773 // Now store right nal length size, that will be use to parse all other nals
7774 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7775 // Do not reparse avcC
7776 h->got_avcC = 1;
7779 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7780 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7781 return -1;
7782 h->got_avcC = 1;
7785 buf_index=decode_nal_units(h, buf, buf_size);
7786 if(buf_index < 0)
7787 return -1;
7789 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7790 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7791 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7792 return -1;
7795 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7796 Picture *out = s->current_picture_ptr;
7797 Picture *cur = s->current_picture_ptr;
7798 int i, pics, cross_idr, out_of_order, out_idx;
7800 field_end(h);
7802 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7803 /* Wait for second field. */
7804 *data_size = 0;
7806 } else {
7807 cur->interlaced_frame = 0;
7808 cur->repeat_pict = 0;
7810 /* Signal interlacing information externally. */
7811 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7813 if(h->sps.pic_struct_present_flag){
7814 switch (h->sei_pic_struct)
7816 case SEI_PIC_STRUCT_FRAME:
7817 break;
7818 case SEI_PIC_STRUCT_TOP_FIELD:
7819 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7820 cur->interlaced_frame = 1;
7821 break;
7822 case SEI_PIC_STRUCT_TOP_BOTTOM:
7823 case SEI_PIC_STRUCT_BOTTOM_TOP:
7824 if (FIELD_OR_MBAFF_PICTURE)
7825 cur->interlaced_frame = 1;
7826 else
7827 // try to flag soft telecine progressive
7828 cur->interlaced_frame = h->prev_interlaced_frame;
7829 break;
7830 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7831 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7832 // Signal the possibility of telecined film externally (pic_struct 5,6)
7833 // From these hints, let the applications decide if they apply deinterlacing.
7834 cur->repeat_pict = 1;
7835 break;
7836 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7837 // Force progressive here, as doubling interlaced frame is a bad idea.
7838 cur->repeat_pict = 2;
7839 break;
7840 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7841 cur->repeat_pict = 4;
7842 break;
7845 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
7846 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
7847 }else{
7848 /* Derive interlacing flag from used decoding process. */
7849 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7851 h->prev_interlaced_frame = cur->interlaced_frame;
7853 if (cur->field_poc[0] != cur->field_poc[1]){
7854 /* Derive top_field_first from field pocs. */
7855 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7856 }else{
7857 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7858 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7859 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7860 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7861 cur->top_field_first = 1;
7862 else
7863 cur->top_field_first = 0;
7864 }else{
7865 /* Most likely progressive */
7866 cur->top_field_first = 0;
7870 //FIXME do something with unavailable reference frames
7872 /* Sort B-frames into display order */
7874 if(h->sps.bitstream_restriction_flag
7875 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7876 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7877 s->low_delay = 0;
7880 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7881 && !h->sps.bitstream_restriction_flag){
7882 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7883 s->low_delay= 0;
7886 pics = 0;
7887 while(h->delayed_pic[pics]) pics++;
7889 assert(pics <= MAX_DELAYED_PIC_COUNT);
7891 h->delayed_pic[pics++] = cur;
7892 if(cur->reference == 0)
7893 cur->reference = DELAYED_PIC_REF;
7895 out = h->delayed_pic[0];
7896 out_idx = 0;
7897 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
7898 if(h->delayed_pic[i]->poc < out->poc){
7899 out = h->delayed_pic[i];
7900 out_idx = i;
7902 cross_idr = !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset;
7904 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7906 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7908 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7909 || (s->low_delay &&
7910 ((!cross_idr && out->poc > h->outputed_poc + 2)
7911 || cur->pict_type == FF_B_TYPE)))
7913 s->low_delay = 0;
7914 s->avctx->has_b_frames++;
7917 if(out_of_order || pics > s->avctx->has_b_frames){
7918 out->reference &= ~DELAYED_PIC_REF;
7919 for(i=out_idx; h->delayed_pic[i]; i++)
7920 h->delayed_pic[i] = h->delayed_pic[i+1];
7922 if(!out_of_order && pics > s->avctx->has_b_frames){
7923 *data_size = sizeof(AVFrame);
7925 h->outputed_poc = out->poc;
7926 *pict= *(AVFrame*)out;
7927 }else{
7928 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7933 assert(pict->data[0] || !*data_size);
7934 ff_print_debug_info(s, pict);
7935 //printf("out %d\n", (int)pict->data[0]);
7937 return get_consumed_bytes(s, buf_index, buf_size);
7939 #if 0
7940 static inline void fill_mb_avail(H264Context *h){
7941 MpegEncContext * const s = &h->s;
7942 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7944 if(s->mb_y){
7945 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7946 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7947 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7948 }else{
7949 h->mb_avail[0]=
7950 h->mb_avail[1]=
7951 h->mb_avail[2]= 0;
7953 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7954 h->mb_avail[4]= 1; //FIXME move out
7955 h->mb_avail[5]= 0; //FIXME move out
7957 #endif
7959 #ifdef TEST
7960 #undef printf
7961 #undef random
7962 #define COUNT 8000
7963 #define SIZE (COUNT*40)
7964 int main(void){
7965 int i;
7966 uint8_t temp[SIZE];
7967 PutBitContext pb;
7968 GetBitContext gb;
7969 // int int_temp[10000];
7970 DSPContext dsp;
7971 AVCodecContext avctx;
7973 dsputil_init(&dsp, &avctx);
7975 init_put_bits(&pb, temp, SIZE);
7976 printf("testing unsigned exp golomb\n");
7977 for(i=0; i<COUNT; i++){
7978 START_TIMER
7979 set_ue_golomb(&pb, i);
7980 STOP_TIMER("set_ue_golomb");
7982 flush_put_bits(&pb);
7984 init_get_bits(&gb, temp, 8*SIZE);
7985 for(i=0; i<COUNT; i++){
7986 int j, s;
7988 s= show_bits(&gb, 24);
7990 START_TIMER
7991 j= get_ue_golomb(&gb);
7992 if(j != i){
7993 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7994 // return -1;
7996 STOP_TIMER("get_ue_golomb");
8000 init_put_bits(&pb, temp, SIZE);
8001 printf("testing signed exp golomb\n");
8002 for(i=0; i<COUNT; i++){
8003 START_TIMER
8004 set_se_golomb(&pb, i - COUNT/2);
8005 STOP_TIMER("set_se_golomb");
8007 flush_put_bits(&pb);
8009 init_get_bits(&gb, temp, 8*SIZE);
8010 for(i=0; i<COUNT; i++){
8011 int j, s;
8013 s= show_bits(&gb, 24);
8015 START_TIMER
8016 j= get_se_golomb(&gb);
8017 if(j != i - COUNT/2){
8018 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8019 // return -1;
8021 STOP_TIMER("get_se_golomb");
8024 #if 0
8025 printf("testing 4x4 (I)DCT\n");
8027 DCTELEM block[16];
8028 uint8_t src[16], ref[16];
8029 uint64_t error= 0, max_error=0;
8031 for(i=0; i<COUNT; i++){
8032 int j;
8033 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8034 for(j=0; j<16; j++){
8035 ref[j]= random()%255;
8036 src[j]= random()%255;
8039 h264_diff_dct_c(block, src, ref, 4);
8041 //normalize
8042 for(j=0; j<16; j++){
8043 // printf("%d ", block[j]);
8044 block[j]= block[j]*4;
8045 if(j&1) block[j]= (block[j]*4 + 2)/5;
8046 if(j&4) block[j]= (block[j]*4 + 2)/5;
8048 // printf("\n");
8050 s->dsp.h264_idct_add(ref, block, 4);
8051 /* for(j=0; j<16; j++){
8052 printf("%d ", ref[j]);
8054 printf("\n");*/
8056 for(j=0; j<16; j++){
8057 int diff= FFABS(src[j] - ref[j]);
8059 error+= diff*diff;
8060 max_error= FFMAX(max_error, diff);
8063 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8064 printf("testing quantizer\n");
8065 for(qp=0; qp<52; qp++){
8066 for(i=0; i<16; i++)
8067 src1_block[i]= src2_block[i]= random()%255;
8070 printf("Testing NAL layer\n");
8072 uint8_t bitstream[COUNT];
8073 uint8_t nal[COUNT*2];
8074 H264Context h;
8075 memset(&h, 0, sizeof(H264Context));
8077 for(i=0; i<COUNT; i++){
8078 int zeros= i;
8079 int nal_length;
8080 int consumed;
8081 int out_length;
8082 uint8_t *out;
8083 int j;
8085 for(j=0; j<COUNT; j++){
8086 bitstream[j]= (random() % 255) + 1;
8089 for(j=0; j<zeros; j++){
8090 int pos= random() % COUNT;
8091 while(bitstream[pos] == 0){
8092 pos++;
8093 pos %= COUNT;
8095 bitstream[pos]=0;
8098 START_TIMER
8100 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8101 if(nal_length<0){
8102 printf("encoding failed\n");
8103 return -1;
8106 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8108 STOP_TIMER("NAL")
8110 if(out_length != COUNT){
8111 printf("incorrect length %d %d\n", out_length, COUNT);
8112 return -1;
8115 if(consumed != nal_length){
8116 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8117 return -1;
8120 if(memcmp(bitstream, out, COUNT)){
8121 printf("mismatch\n");
8122 return -1;
8125 #endif
8127 printf("Testing RBSP\n");
8130 return 0;
8132 #endif /* TEST */
8135 av_cold void ff_h264_free_context(H264Context *h)
8137 int i;
8139 free_tables(h); //FIXME cleanup init stuff perhaps
8141 for(i = 0; i < MAX_SPS_COUNT; i++)
8142 av_freep(h->sps_buffers + i);
8144 for(i = 0; i < MAX_PPS_COUNT; i++)
8145 av_freep(h->pps_buffers + i);
8148 static av_cold int decode_end(AVCodecContext *avctx)
8150 H264Context *h = avctx->priv_data;
8151 MpegEncContext *s = &h->s;
8153 ff_h264_free_context(h);
8155 MPV_common_end(s);
8157 // memset(h, 0, sizeof(H264Context));
8159 return 0;
8163 AVCodec h264_decoder = {
8164 "h264",
8165 CODEC_TYPE_VIDEO,
8166 CODEC_ID_H264,
8167 sizeof(H264Context),
8168 decode_init,
8169 NULL,
8170 decode_end,
8171 decode_frame,
8172 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8173 .flush= flush_dpb,
8174 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8175 .pix_fmts= ff_hwaccel_pixfmt_list_420,
8178 #if CONFIG_H264_VDPAU_DECODER
8179 AVCodec h264_vdpau_decoder = {
8180 "h264_vdpau",
8181 CODEC_TYPE_VIDEO,
8182 CODEC_ID_H264,
8183 sizeof(H264Context),
8184 decode_init,
8185 NULL,
8186 decode_end,
8187 decode_frame,
8188 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8189 .flush= flush_dpb,
8190 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8192 #endif
8194 #if CONFIG_SVQ3_DECODER
8195 #include "svq3.c"
8196 #endif