eac3dec: get right of unnecessary left shifts in 16-bit * 24-bit
[FFMpeg-mirror/lagarith.git] / libavcodec / h264.c
blobf4c172b6062a91c249bb2d3a0f5a2110df8dd0bb
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file libavcodec/h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "internal.h"
29 #include "dsputil.h"
30 #include "avcodec.h"
31 #include "mpegvideo.h"
32 #include "h264.h"
33 #include "h264data.h"
34 #include "h264_parser.h"
35 #include "golomb.h"
36 #include "mathops.h"
37 #include "rectangle.h"
38 #include "vdpau_internal.h"
40 #include "cabac.h"
41 #if ARCH_X86
42 #include "x86/h264_i386.h"
43 #endif
45 //#undef NDEBUG
46 #include <assert.h>
48 /**
49 * Value of Picture.reference when Picture is not a reference picture, but
50 * is held for delayed output.
52 #define DELAYED_PIC_REF 4
54 static VLC coeff_token_vlc[4];
55 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
56 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
58 static VLC chroma_dc_coeff_token_vlc;
59 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
60 static const int chroma_dc_coeff_token_vlc_table_size = 256;
62 static VLC total_zeros_vlc[15];
63 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
64 static const int total_zeros_vlc_tables_size = 512;
66 static VLC chroma_dc_total_zeros_vlc[3];
67 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
68 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
70 static VLC run_vlc[6];
71 static VLC_TYPE run_vlc_tables[6][8][2];
72 static const int run_vlc_tables_size = 8;
74 static VLC run7_vlc;
75 static VLC_TYPE run7_vlc_table[96][2];
76 static const int run7_vlc_table_size = 96;
78 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
79 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
80 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
81 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static Picture * remove_long(H264Context *h, int i, int ref_mask);
84 static av_always_inline uint32_t pack16to32(int a, int b){
85 #ifdef WORDS_BIGENDIAN
86 return (b&0xFFFF) + (a<<16);
87 #else
88 return (a&0xFFFF) + (b<<16);
89 #endif
92 static const uint8_t rem6[52]={
93 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
96 static const uint8_t div6[52]={
97 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
100 static const uint8_t left_block_options[4][8]={
101 {0,1,2,3,7,10,8,11},
102 {2,2,3,3,8,11,8,11},
103 {0,0,1,1,7,10,7,10},
104 {0,2,0,2,7,10,7,10}
107 #define LEVEL_TAB_BITS 8
108 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
110 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
111 MpegEncContext * const s = &h->s;
112 const int mb_xy= h->mb_xy;
113 int topleft_xy, top_xy, topright_xy, left_xy[2];
114 int topleft_type, top_type, topright_type, left_type[2];
115 const uint8_t * left_block;
116 int topleft_partition= -1;
117 int i;
119 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
121 //FIXME deblocking could skip the intra and nnz parts.
122 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
123 return;
125 /* Wow, what a mess, why didn't they simplify the interlacing & intra
126 * stuff, I can't imagine that these complex rules are worth it. */
128 topleft_xy = top_xy - 1;
129 topright_xy= top_xy + 1;
130 left_xy[1] = left_xy[0] = mb_xy-1;
131 left_block = left_block_options[0];
132 if(FRAME_MBAFF){
133 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
134 const int top_pair_xy = pair_xy - s->mb_stride;
135 const int topleft_pair_xy = top_pair_xy - 1;
136 const int topright_pair_xy = top_pair_xy + 1;
137 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
138 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
139 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
140 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
141 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
142 const int bottom = (s->mb_y & 1);
143 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
145 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
146 top_xy -= s->mb_stride;
148 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
149 topleft_xy -= s->mb_stride;
150 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
151 topleft_xy += s->mb_stride;
152 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
153 topleft_partition = 0;
155 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
156 topright_xy -= s->mb_stride;
158 if (left_mb_field_flag != curr_mb_field_flag) {
159 left_xy[1] = left_xy[0] = pair_xy - 1;
160 if (curr_mb_field_flag) {
161 left_xy[1] += s->mb_stride;
162 left_block = left_block_options[3];
163 } else {
164 left_block= left_block_options[2 - bottom];
169 h->top_mb_xy = top_xy;
170 h->left_mb_xy[0] = left_xy[0];
171 h->left_mb_xy[1] = left_xy[1];
172 if(for_deblock){
173 topleft_type = 0;
174 topright_type = 0;
175 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
176 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
177 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
179 if(MB_MBAFF && !IS_INTRA(mb_type)){
180 int list;
181 for(list=0; list<h->list_count; list++){
182 //These values where changed for ease of performing MC, we need to change them back
183 //FIXME maybe we can make MC and loop filter use the same values or prevent
184 //the MC code from changing ref_cache and rather use a temporary array.
185 if(USES_LIST(mb_type,list)){
186 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
187 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
188 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
189 ref += h->b8_stride;
190 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
191 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 }else{
196 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
197 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
198 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
199 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
200 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
202 if(IS_INTRA(mb_type)){
203 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
204 h->topleft_samples_available=
205 h->top_samples_available=
206 h->left_samples_available= 0xFFFF;
207 h->topright_samples_available= 0xEEEA;
209 if(!(top_type & type_mask)){
210 h->topleft_samples_available= 0xB3FF;
211 h->top_samples_available= 0x33FF;
212 h->topright_samples_available= 0x26EA;
214 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
215 if(IS_INTERLACED(mb_type)){
216 if(!(left_type[0] & type_mask)){
217 h->topleft_samples_available&= 0xDFFF;
218 h->left_samples_available&= 0x5FFF;
220 if(!(left_type[1] & type_mask)){
221 h->topleft_samples_available&= 0xFF5F;
222 h->left_samples_available&= 0xFF5F;
224 }else{
225 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
226 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
227 assert(left_xy[0] == left_xy[1]);
228 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
229 h->topleft_samples_available&= 0xDF5F;
230 h->left_samples_available&= 0x5F5F;
233 }else{
234 if(!(left_type[0] & type_mask)){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
240 if(!(topleft_type & type_mask))
241 h->topleft_samples_available&= 0x7FFF;
243 if(!(topright_type & type_mask))
244 h->topright_samples_available&= 0xFBFF;
246 if(IS_INTRA4x4(mb_type)){
247 if(IS_INTRA4x4(top_type)){
248 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
249 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
250 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
251 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
252 }else{
253 int pred;
254 if(!(top_type & type_mask))
255 pred= -1;
256 else{
257 pred= 2;
259 h->intra4x4_pred_mode_cache[4+8*0]=
260 h->intra4x4_pred_mode_cache[5+8*0]=
261 h->intra4x4_pred_mode_cache[6+8*0]=
262 h->intra4x4_pred_mode_cache[7+8*0]= pred;
264 for(i=0; i<2; i++){
265 if(IS_INTRA4x4(left_type[i])){
266 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
267 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
268 }else{
269 int pred;
270 if(!(left_type[i] & type_mask))
271 pred= -1;
272 else{
273 pred= 2;
275 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
276 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
285 0 . T T. T T T T
286 1 L . .L . . . .
287 2 L . .L . . . .
288 3 . T TL . . . .
289 4 L . .L . . . .
290 5 L . .. . . . .
292 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
293 if(top_type){
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
305 }else{
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
320 if(left_type[i]){
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325 }else{
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
333 if( h->pps.cabac ) {
334 // top_cbp
335 if(top_type) {
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
338 h->top_cbp = 0x1C0;
339 } else {
340 h->top_cbp = 0;
342 // left_cbp
343 if (left_type[0]) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
346 h->left_cbp = 0x1C0;
347 } else {
348 h->left_cbp = 0;
350 if (left_type[0]) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
353 if (left_type[1]) {
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
358 #if 1
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360 int list;
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
368 continue;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 }else{
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
391 for(i=0; i<2; i++){
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 }else{
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
409 continue;
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 }else{
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 }else{
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
432 continue;
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
445 if( h->pps.cabac ) {
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 }else{
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 }else{
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 }else{
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type_nos == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 }else{
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 else
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 else
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
510 if(FRAME_MBAFF){
511 #define MAP_MVS\
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 if(MB_FIELD){
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
529 MAP_MVS
530 #undef MAP_F2F
531 }else{
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
538 MAP_MVS
539 #undef MAP_F2F
544 #endif
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 const int mb_xy= h->mb_xy;
552 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
553 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
554 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
555 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
556 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
557 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
558 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
562 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
564 static inline int check_intra4x4_pred_mode(H264Context *h){
565 MpegEncContext * const s = &h->s;
566 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
567 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
568 int i;
570 if(!(h->top_samples_available&0x8000)){
571 for(i=0; i<4; i++){
572 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
573 if(status<0){
574 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
575 return -1;
576 } else if(status){
577 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
582 if((h->left_samples_available&0x8888)!=0x8888){
583 static const int mask[4]={0x8000,0x2000,0x80,0x20};
584 for(i=0; i<4; i++){
585 if(!(h->left_samples_available&mask[i])){
586 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
587 if(status<0){
588 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
589 return -1;
590 } else if(status){
591 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
597 return 0;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
608 if(mode > 6U) {
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
610 return -1;
613 if(!(h->top_samples_available&0x8000)){
614 mode= top[ mode ];
615 if(mode<0){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
617 return -1;
621 if((h->left_samples_available&0x8080) != 0x8080){
622 mode= left[ mode ];
623 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
624 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
626 if(mode<0){
627 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
628 return -1;
632 return mode;
636 * gets the predicted intra4x4 prediction mode.
638 static inline int pred_intra_mode(H264Context *h, int n){
639 const int index8= scan8[n];
640 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
641 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
642 const int min= FFMIN(left, top);
644 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
646 if(min<0) return DC_PRED;
647 else return min;
650 static inline void write_back_non_zero_count(H264Context *h){
651 const int mb_xy= h->mb_xy;
653 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
654 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
655 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
656 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
657 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
658 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
659 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
661 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
662 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
663 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
665 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
666 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
667 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
671 * gets the predicted number of non-zero coefficients.
672 * @param n block index
674 static inline int pred_non_zero_count(H264Context *h, int n){
675 const int index8= scan8[n];
676 const int left= h->non_zero_count_cache[index8 - 1];
677 const int top = h->non_zero_count_cache[index8 - 8];
678 int i= left + top;
680 if(i<64) i= (i+1)>>1;
682 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
684 return i&31;
687 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
688 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
689 MpegEncContext *s = &h->s;
691 /* there is no consistent mapping of mvs to neighboring locations that will
692 * make mbaff happy, so we can't move all this logic to fill_caches */
693 if(FRAME_MBAFF){
694 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
695 const int16_t *mv;
696 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
697 *C = h->mv_cache[list][scan8[0]-2];
699 if(!MB_FIELD
700 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
701 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
702 if(IS_INTERLACED(mb_types[topright_xy])){
703 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
704 const int x4 = X4, y4 = Y4;\
705 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
706 if(!USES_LIST(mb_type,list))\
707 return LIST_NOT_USED;\
708 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
709 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
710 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
711 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
713 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
716 if(topright_ref == PART_NOT_AVAILABLE
717 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
718 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
719 if(!MB_FIELD
720 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
721 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
723 if(MB_FIELD
724 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
725 && i >= scan8[0]+8){
726 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
727 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
730 #undef SET_DIAG_MV
733 if(topright_ref != PART_NOT_AVAILABLE){
734 *C= h->mv_cache[list][ i - 8 + part_width ];
735 return topright_ref;
736 }else{
737 tprintf(s->avctx, "topright MV not available\n");
739 *C= h->mv_cache[list][ i - 8 - 1 ];
740 return h->ref_cache[list][ i - 8 - 1 ];
745 * gets the predicted MV.
746 * @param n the block index
747 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
748 * @param mx the x component of the predicted motion vector
749 * @param my the y component of the predicted motion vector
751 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
752 const int index8= scan8[n];
753 const int top_ref= h->ref_cache[list][ index8 - 8 ];
754 const int left_ref= h->ref_cache[list][ index8 - 1 ];
755 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
756 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
757 const int16_t * C;
758 int diagonal_ref, match_count;
760 assert(part_width==1 || part_width==2 || part_width==4);
762 /* mv_cache
763 B . . A T T T T
764 U . . L . . , .
765 U . . L . . . .
766 U . . L . . , .
767 . . . L . . . .
770 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
771 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
772 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
773 if(match_count > 1){ //most common
774 *mx= mid_pred(A[0], B[0], C[0]);
775 *my= mid_pred(A[1], B[1], C[1]);
776 }else if(match_count==1){
777 if(left_ref==ref){
778 *mx= A[0];
779 *my= A[1];
780 }else if(top_ref==ref){
781 *mx= B[0];
782 *my= B[1];
783 }else{
784 *mx= C[0];
785 *my= C[1];
787 }else{
788 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
789 *mx= A[0];
790 *my= A[1];
791 }else{
792 *mx= mid_pred(A[0], B[0], C[0]);
793 *my= mid_pred(A[1], B[1], C[1]);
797 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
801 * gets the directionally predicted 16x8 MV.
802 * @param n the block index
803 * @param mx the x component of the predicted motion vector
804 * @param my the y component of the predicted motion vector
806 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
807 if(n==0){
808 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
809 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
813 if(top_ref == ref){
814 *mx= B[0];
815 *my= B[1];
816 return;
818 }else{
819 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
820 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
824 if(left_ref == ref){
825 *mx= A[0];
826 *my= A[1];
827 return;
831 //RARE
832 pred_motion(h, n, 4, list, ref, mx, my);
836 * gets the directionally predicted 8x16 MV.
837 * @param n the block index
838 * @param mx the x component of the predicted motion vector
839 * @param my the y component of the predicted motion vector
841 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
842 if(n==0){
843 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
844 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
846 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
848 if(left_ref == ref){
849 *mx= A[0];
850 *my= A[1];
851 return;
853 }else{
854 const int16_t * C;
855 int diagonal_ref;
857 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861 if(diagonal_ref == ref){
862 *mx= C[0];
863 *my= C[1];
864 return;
868 //RARE
869 pred_motion(h, n, 2, list, ref, mx, my);
872 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
873 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
874 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
879 || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
880 || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
882 *mx = *my = 0;
883 return;
886 pred_motion(h, 0, 4, 0, 0, mx, my);
888 return;
891 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
892 int poc0 = h->ref_list[0][i].poc;
893 int td = av_clip(poc1 - poc0, -128, 127);
894 if(td == 0 || h->ref_list[0][i].long_ref){
895 return 256;
896 }else{
897 int tb = av_clip(poc - poc0, -128, 127);
898 int tx = (16384 + (FFABS(td) >> 1)) / td;
899 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
903 static inline void direct_dist_scale_factor(H264Context * const h){
904 MpegEncContext * const s = &h->s;
905 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
906 const int poc1 = h->ref_list[1][0].poc;
907 int i, field;
908 for(field=0; field<2; field++){
909 const int poc = h->s.current_picture_ptr->field_poc[field];
910 const int poc1 = h->ref_list[1][0].field_poc[field];
911 for(i=0; i < 2*h->ref_count[0]; i++)
912 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
915 for(i=0; i<h->ref_count[0]; i++){
916 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
920 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 int j, old_ref, rfield;
924 int start= mbafi ? 16 : 0;
925 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
926 int interl= mbafi || s->picture_structure != PICT_FRAME;
928 /* bogus; fills in for missing frames */
929 memset(map[list], 0, sizeof(map[list]));
931 for(rfield=0; rfield<2; rfield++){
932 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
933 int poc = ref1->ref_poc[colfield][list][old_ref];
935 if (!interl)
936 poc |= 3;
937 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
938 poc= (poc&~3) + rfield + 1;
940 for(j=start; j<end; j++){
941 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
942 int cur_ref= mbafi ? (j-16)^field : j;
943 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
944 if(rfield == field)
945 map[list][old_ref] = cur_ref;
946 break;
953 static inline void direct_ref_list_init(H264Context * const h){
954 MpegEncContext * const s = &h->s;
955 Picture * const ref1 = &h->ref_list[1][0];
956 Picture * const cur = s->current_picture_ptr;
957 int list, j, field;
958 int sidx= (s->picture_structure&1)^1;
959 int ref1sidx= (ref1->reference&1)^1;
961 for(list=0; list<2; list++){
962 cur->ref_count[sidx][list] = h->ref_count[list];
963 for(j=0; j<h->ref_count[list]; j++)
964 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
967 if(s->picture_structure == PICT_FRAME){
968 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
969 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
972 cur->mbaff= FRAME_MBAFF;
974 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
975 return;
977 for(list=0; list<2; list++){
978 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
979 for(field=0; field<2; field++)
980 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
984 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
985 MpegEncContext * const s = &h->s;
986 int b8_stride = h->b8_stride;
987 int b4_stride = h->b_stride;
988 int mb_xy = h->mb_xy;
989 int mb_type_col[2];
990 const int16_t (*l1mv0)[2], (*l1mv1)[2];
991 const int8_t *l1ref0, *l1ref1;
992 const int is_b8x8 = IS_8X8(*mb_type);
993 unsigned int sub_mb_type;
994 int i8, i4;
996 assert(h->ref_list[1][0].reference&3);
998 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1000 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1001 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1002 int cur_poc = s->current_picture_ptr->poc;
1003 int *col_poc = h->ref_list[1]->field_poc;
1004 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1005 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1006 b8_stride = 0;
1007 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1008 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1009 mb_xy += s->mb_stride*fieldoff;
1011 goto single_col;
1012 }else{ // AFL/AFR/FR/FL -> AFR/FR
1013 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1014 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1015 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1016 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1017 b8_stride *= 3;
1018 b4_stride *= 6;
1019 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1020 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1021 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1022 && !is_b8x8){
1023 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1024 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1025 }else{
1026 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1027 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1029 }else{ // AFR/FR -> AFR/FR
1030 single_col:
1031 mb_type_col[0] =
1032 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1033 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1034 /* FIXME save sub mb types from previous frames (or derive from MVs)
1035 * so we know exactly what block size to use */
1036 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1037 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1038 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1039 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1040 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1041 }else{
1042 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1043 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1048 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1049 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1050 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1051 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1052 if(!b8_stride){
1053 if(s->mb_y&1){
1054 l1ref0 += h->b8_stride;
1055 l1ref1 += h->b8_stride;
1056 l1mv0 += 2*b4_stride;
1057 l1mv1 += 2*b4_stride;
1061 if(h->direct_spatial_mv_pred){
1062 int ref[2];
1063 int mv[2][2];
1064 int list;
1066 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1068 /* ref = min(neighbors) */
1069 for(list=0; list<2; list++){
1070 int refa = h->ref_cache[list][scan8[0] - 1];
1071 int refb = h->ref_cache[list][scan8[0] - 8];
1072 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1073 if(refc == PART_NOT_AVAILABLE)
1074 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1075 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1076 if(ref[list] < 0)
1077 ref[list] = -1;
1080 if(ref[0] < 0 && ref[1] < 0){
1081 ref[0] = ref[1] = 0;
1082 mv[0][0] = mv[0][1] =
1083 mv[1][0] = mv[1][1] = 0;
1084 }else{
1085 for(list=0; list<2; list++){
1086 if(ref[list] >= 0)
1087 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1088 else
1089 mv[list][0] = mv[list][1] = 0;
1093 if(ref[1] < 0){
1094 if(!is_b8x8)
1095 *mb_type &= ~MB_TYPE_L1;
1096 sub_mb_type &= ~MB_TYPE_L1;
1097 }else if(ref[0] < 0){
1098 if(!is_b8x8)
1099 *mb_type &= ~MB_TYPE_L0;
1100 sub_mb_type &= ~MB_TYPE_L0;
1103 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1104 for(i8=0; i8<4; i8++){
1105 int x8 = i8&1;
1106 int y8 = i8>>1;
1107 int xy8 = x8+y8*b8_stride;
1108 int xy4 = 3*x8+y8*b4_stride;
1109 int a=0, b=0;
1111 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1112 continue;
1113 h->sub_mb_type[i8] = sub_mb_type;
1115 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1116 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1117 if(!IS_INTRA(mb_type_col[y8])
1118 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1119 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1120 if(ref[0] > 0)
1121 a= pack16to32(mv[0][0],mv[0][1]);
1122 if(ref[1] > 0)
1123 b= pack16to32(mv[1][0],mv[1][1]);
1124 }else{
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 b= pack16to32(mv[1][0],mv[1][1]);
1128 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1129 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1131 }else if(IS_16X16(*mb_type)){
1132 int a=0, b=0;
1134 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1135 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1136 if(!IS_INTRA(mb_type_col[0])
1137 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1138 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1139 && (h->x264_build>33 || !h->x264_build)))){
1140 if(ref[0] > 0)
1141 a= pack16to32(mv[0][0],mv[0][1]);
1142 if(ref[1] > 0)
1143 b= pack16to32(mv[1][0],mv[1][1]);
1144 }else{
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 b= pack16to32(mv[1][0],mv[1][1]);
1148 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1149 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1150 }else{
1151 for(i8=0; i8<4; i8++){
1152 const int x8 = i8&1;
1153 const int y8 = i8>>1;
1155 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1156 continue;
1157 h->sub_mb_type[i8] = sub_mb_type;
1159 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1160 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1161 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1162 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1164 /* col_zero_flag */
1165 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1166 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1167 && (h->x264_build>33 || !h->x264_build)))){
1168 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1169 if(IS_SUB_8X8(sub_mb_type)){
1170 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1171 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1172 if(ref[0] == 0)
1173 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1174 if(ref[1] == 0)
1175 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1177 }else
1178 for(i4=0; i4<4; i4++){
1179 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1180 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1181 if(ref[0] == 0)
1182 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1183 if(ref[1] == 0)
1184 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1190 }else{ /* direct temporal mv pred */
1191 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1192 const int *dist_scale_factor = h->dist_scale_factor;
1193 int ref_offset= 0;
1195 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1196 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1197 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1198 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1200 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1201 ref_offset += 16;
1203 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1204 /* FIXME assumes direct_8x8_inference == 1 */
1205 int y_shift = 2*!IS_INTERLACED(*mb_type);
1207 for(i8=0; i8<4; i8++){
1208 const int x8 = i8&1;
1209 const int y8 = i8>>1;
1210 int ref0, scale;
1211 const int16_t (*l1mv)[2]= l1mv0;
1213 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1214 continue;
1215 h->sub_mb_type[i8] = sub_mb_type;
1217 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1218 if(IS_INTRA(mb_type_col[y8])){
1219 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1220 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1221 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1222 continue;
1225 ref0 = l1ref0[x8 + y8*b8_stride];
1226 if(ref0 >= 0)
1227 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1228 else{
1229 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1230 l1mv= l1mv1;
1232 scale = dist_scale_factor[ref0];
1233 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1236 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1237 int my_col = (mv_col[1]<<y_shift)/2;
1238 int mx = (scale * mv_col[0] + 128) >> 8;
1239 int my = (scale * my_col + 128) >> 8;
1240 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1241 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1244 return;
1247 /* one-to-one mv scaling */
1249 if(IS_16X16(*mb_type)){
1250 int ref, mv0, mv1;
1252 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1253 if(IS_INTRA(mb_type_col[0])){
1254 ref=mv0=mv1=0;
1255 }else{
1256 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1257 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1258 const int scale = dist_scale_factor[ref0];
1259 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1260 int mv_l0[2];
1261 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1262 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1263 ref= ref0;
1264 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1265 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1267 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1268 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1269 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1270 }else{
1271 for(i8=0; i8<4; i8++){
1272 const int x8 = i8&1;
1273 const int y8 = i8>>1;
1274 int ref0, scale;
1275 const int16_t (*l1mv)[2]= l1mv0;
1277 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1278 continue;
1279 h->sub_mb_type[i8] = sub_mb_type;
1280 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1281 if(IS_INTRA(mb_type_col[0])){
1282 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1283 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1284 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1285 continue;
1288 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1289 if(ref0 >= 0)
1290 ref0 = map_col_to_list0[0][ref0];
1291 else{
1292 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1293 l1mv= l1mv1;
1295 scale = dist_scale_factor[ref0];
1297 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1298 if(IS_SUB_8X8(sub_mb_type)){
1299 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1300 int mx = (scale * mv_col[0] + 128) >> 8;
1301 int my = (scale * mv_col[1] + 128) >> 8;
1302 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1303 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1304 }else
1305 for(i4=0; i4<4; i4++){
1306 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1307 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1308 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1309 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1310 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1311 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1318 static inline void write_back_motion(H264Context *h, int mb_type){
1319 MpegEncContext * const s = &h->s;
1320 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1321 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1322 int list;
1324 if(!USES_LIST(mb_type, 0))
1325 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1327 for(list=0; list<h->list_count; list++){
1328 int y;
1329 if(!USES_LIST(mb_type, list))
1330 continue;
1332 for(y=0; y<4; y++){
1333 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1334 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1336 if( h->pps.cabac ) {
1337 if(IS_SKIP(mb_type))
1338 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1339 else
1340 for(y=0; y<4; y++){
1341 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1347 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1348 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1349 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1350 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1351 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1355 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1356 if(IS_8X8(mb_type)){
1357 uint8_t *direct_table = &h->direct_table[b8_xy];
1358 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1359 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1360 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1365 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1366 int i, si, di;
1367 uint8_t *dst;
1368 int bufidx;
1370 // src[0]&0x80; //forbidden bit
1371 h->nal_ref_idc= src[0]>>5;
1372 h->nal_unit_type= src[0]&0x1F;
1374 src++; length--;
1375 #if 0
1376 for(i=0; i<length; i++)
1377 printf("%2X ", src[i]);
1378 #endif
1380 #if HAVE_FAST_UNALIGNED
1381 # if HAVE_FAST_64BIT
1382 # define RS 7
1383 for(i=0; i+1<length; i+=9){
1384 if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1385 # else
1386 # define RS 3
1387 for(i=0; i+1<length; i+=5){
1388 if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1389 # endif
1390 continue;
1391 if(i>0 && !src[i]) i--;
1392 while(src[i]) i++;
1393 #else
1394 # define RS 0
1395 for(i=0; i+1<length; i+=2){
1396 if(src[i]) continue;
1397 if(i>0 && src[i-1]==0) i--;
1398 #endif
1399 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1400 if(src[i+2]!=3){
1401 /* startcode, so we must be past the end */
1402 length=i;
1404 break;
1406 i-= RS;
1409 if(i>=length-1){ //no escaped 0
1410 *dst_length= length;
1411 *consumed= length+1; //+1 for the header
1412 return src;
1415 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1416 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1417 dst= h->rbsp_buffer[bufidx];
1419 if (dst == NULL){
1420 return NULL;
1423 //printf("decoding esc\n");
1424 memcpy(dst, src, i);
1425 si=di=i;
1426 while(si+2<length){
1427 //remove escapes (very rare 1:2^22)
1428 if(src[si+2]>3){
1429 dst[di++]= src[si++];
1430 dst[di++]= src[si++];
1431 }else if(src[si]==0 && src[si+1]==0){
1432 if(src[si+2]==3){ //escape
1433 dst[di++]= 0;
1434 dst[di++]= 0;
1435 si+=3;
1436 continue;
1437 }else //next start code
1438 goto nsc;
1441 dst[di++]= src[si++];
1443 while(si<length)
1444 dst[di++]= src[si++];
1445 nsc:
1447 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1449 *dst_length= di;
1450 *consumed= si + 1;//+1 for the header
1451 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1452 return dst;
1455 int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1456 int v= *src;
1457 int r;
1459 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1461 for(r=1; r<9; r++){
1462 if(v&1) return r;
1463 v>>=1;
1465 return 0;
1469 * IDCT transforms the 16 dc values and dequantizes them.
1470 * @param qp quantization parameter
1472 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1473 #define stride 16
1474 int i;
1475 int temp[16]; //FIXME check if this is a good idea
1476 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1477 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1479 //memset(block, 64, 2*256);
1480 //return;
1481 for(i=0; i<4; i++){
1482 const int offset= y_offset[i];
1483 const int z0= block[offset+stride*0] + block[offset+stride*4];
1484 const int z1= block[offset+stride*0] - block[offset+stride*4];
1485 const int z2= block[offset+stride*1] - block[offset+stride*5];
1486 const int z3= block[offset+stride*1] + block[offset+stride*5];
1488 temp[4*i+0]= z0+z3;
1489 temp[4*i+1]= z1+z2;
1490 temp[4*i+2]= z1-z2;
1491 temp[4*i+3]= z0-z3;
1494 for(i=0; i<4; i++){
1495 const int offset= x_offset[i];
1496 const int z0= temp[4*0+i] + temp[4*2+i];
1497 const int z1= temp[4*0+i] - temp[4*2+i];
1498 const int z2= temp[4*1+i] - temp[4*3+i];
1499 const int z3= temp[4*1+i] + temp[4*3+i];
1501 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1502 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1503 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1504 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1508 #if 0
1510 * DCT transforms the 16 dc values.
1511 * @param qp quantization parameter ??? FIXME
1513 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1514 // const int qmul= dequant_coeff[qp][0];
1515 int i;
1516 int temp[16]; //FIXME check if this is a good idea
1517 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1518 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1520 for(i=0; i<4; i++){
1521 const int offset= y_offset[i];
1522 const int z0= block[offset+stride*0] + block[offset+stride*4];
1523 const int z1= block[offset+stride*0] - block[offset+stride*4];
1524 const int z2= block[offset+stride*1] - block[offset+stride*5];
1525 const int z3= block[offset+stride*1] + block[offset+stride*5];
1527 temp[4*i+0]= z0+z3;
1528 temp[4*i+1]= z1+z2;
1529 temp[4*i+2]= z1-z2;
1530 temp[4*i+3]= z0-z3;
1533 for(i=0; i<4; i++){
1534 const int offset= x_offset[i];
1535 const int z0= temp[4*0+i] + temp[4*2+i];
1536 const int z1= temp[4*0+i] - temp[4*2+i];
1537 const int z2= temp[4*1+i] - temp[4*3+i];
1538 const int z3= temp[4*1+i] + temp[4*3+i];
1540 block[stride*0 +offset]= (z0 + z3)>>1;
1541 block[stride*2 +offset]= (z1 + z2)>>1;
1542 block[stride*8 +offset]= (z1 - z2)>>1;
1543 block[stride*10+offset]= (z0 - z3)>>1;
1546 #endif
1548 #undef xStride
1549 #undef stride
1551 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1552 const int stride= 16*2;
1553 const int xStride= 16;
1554 int a,b,c,d,e;
1556 a= block[stride*0 + xStride*0];
1557 b= block[stride*0 + xStride*1];
1558 c= block[stride*1 + xStride*0];
1559 d= block[stride*1 + xStride*1];
1561 e= a-b;
1562 a= a+b;
1563 b= c-d;
1564 c= c+d;
1566 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1567 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1568 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1569 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1572 #if 0
1573 static void chroma_dc_dct_c(DCTELEM *block){
1574 const int stride= 16*2;
1575 const int xStride= 16;
1576 int a,b,c,d,e;
1578 a= block[stride*0 + xStride*0];
1579 b= block[stride*0 + xStride*1];
1580 c= block[stride*1 + xStride*0];
1581 d= block[stride*1 + xStride*1];
1583 e= a-b;
1584 a= a+b;
1585 b= c-d;
1586 c= c+d;
1588 block[stride*0 + xStride*0]= (a+c);
1589 block[stride*0 + xStride*1]= (e+b);
1590 block[stride*1 + xStride*0]= (a-c);
1591 block[stride*1 + xStride*1]= (e-b);
1593 #endif
1596 * gets the chroma qp.
1598 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1599 return h->pps.chroma_qp_table[t][qscale];
1602 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1603 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1604 int src_x_offset, int src_y_offset,
1605 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1606 MpegEncContext * const s = &h->s;
1607 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1608 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1609 const int luma_xy= (mx&3) + ((my&3)<<2);
1610 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1611 uint8_t * src_cb, * src_cr;
1612 int extra_width= h->emu_edge_width;
1613 int extra_height= h->emu_edge_height;
1614 int emu=0;
1615 const int full_mx= mx>>2;
1616 const int full_my= my>>2;
1617 const int pic_width = 16*s->mb_width;
1618 const int pic_height = 16*s->mb_height >> MB_FIELD;
1620 if(mx&7) extra_width -= 3;
1621 if(my&7) extra_height -= 3;
1623 if( full_mx < 0-extra_width
1624 || full_my < 0-extra_height
1625 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1626 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1627 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1628 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1629 emu=1;
1632 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1633 if(!square){
1634 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1637 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1639 if(MB_FIELD){
1640 // chroma offset when predicting from a field of opposite parity
1641 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1642 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1644 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1645 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1647 if(emu){
1648 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1649 src_cb= s->edge_emu_buffer;
1651 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1653 if(emu){
1654 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1655 src_cr= s->edge_emu_buffer;
1657 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1660 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1661 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1662 int x_offset, int y_offset,
1663 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1664 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1665 int list0, int list1){
1666 MpegEncContext * const s = &h->s;
1667 qpel_mc_func *qpix_op= qpix_put;
1668 h264_chroma_mc_func chroma_op= chroma_put;
1670 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1671 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1672 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1673 x_offset += 8*s->mb_x;
1674 y_offset += 8*(s->mb_y >> MB_FIELD);
1676 if(list0){
1677 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1678 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1679 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1680 qpix_op, chroma_op);
1682 qpix_op= qpix_avg;
1683 chroma_op= chroma_avg;
1686 if(list1){
1687 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1688 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1689 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1690 qpix_op, chroma_op);
1694 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1695 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1696 int x_offset, int y_offset,
1697 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1698 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1699 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1700 int list0, int list1){
1701 MpegEncContext * const s = &h->s;
1703 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1704 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1705 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1706 x_offset += 8*s->mb_x;
1707 y_offset += 8*(s->mb_y >> MB_FIELD);
1709 if(list0 && list1){
1710 /* don't optimize for luma-only case, since B-frames usually
1711 * use implicit weights => chroma too. */
1712 uint8_t *tmp_cb = s->obmc_scratchpad;
1713 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1714 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1715 int refn0 = h->ref_cache[0][ scan8[n] ];
1716 int refn1 = h->ref_cache[1][ scan8[n] ];
1718 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1719 dest_y, dest_cb, dest_cr,
1720 x_offset, y_offset, qpix_put, chroma_put);
1721 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1722 tmp_y, tmp_cb, tmp_cr,
1723 x_offset, y_offset, qpix_put, chroma_put);
1725 if(h->use_weight == 2){
1726 int weight0 = h->implicit_weight[refn0][refn1];
1727 int weight1 = 64 - weight0;
1728 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1729 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1730 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1731 }else{
1732 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1733 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1734 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1735 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1737 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1738 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1739 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1740 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1742 }else{
1743 int list = list1 ? 1 : 0;
1744 int refn = h->ref_cache[list][ scan8[n] ];
1745 Picture *ref= &h->ref_list[list][refn];
1746 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1747 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1748 qpix_put, chroma_put);
1750 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1751 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1752 if(h->use_weight_chroma){
1753 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1754 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1755 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1756 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1761 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1762 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1763 int x_offset, int y_offset,
1764 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1765 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1766 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1767 int list0, int list1){
1768 if((h->use_weight==2 && list0 && list1
1769 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1770 || h->use_weight==1)
1771 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put,
1773 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1774 else
1775 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1776 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1779 static inline void prefetch_motion(H264Context *h, int list){
1780 /* fetch pixels for estimated mv 4 macroblocks ahead
1781 * optimized for 64byte cache lines */
1782 MpegEncContext * const s = &h->s;
1783 const int refn = h->ref_cache[list][scan8[0]];
1784 if(refn >= 0){
1785 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1786 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1787 uint8_t **src= h->ref_list[list][refn].data;
1788 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1789 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1790 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1791 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1795 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1796 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1797 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1798 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1799 MpegEncContext * const s = &h->s;
1800 const int mb_xy= h->mb_xy;
1801 const int mb_type= s->current_picture.mb_type[mb_xy];
1803 assert(IS_INTER(mb_type));
1805 prefetch_motion(h, 0);
1807 if(IS_16X16(mb_type)){
1808 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1809 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1810 &weight_op[0], &weight_avg[0],
1811 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1812 }else if(IS_16X8(mb_type)){
1813 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1815 &weight_op[1], &weight_avg[1],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1818 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1819 &weight_op[1], &weight_avg[1],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 }else if(IS_8X16(mb_type)){
1822 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1823 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1824 &weight_op[2], &weight_avg[2],
1825 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1826 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1827 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1828 &weight_op[2], &weight_avg[2],
1829 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1830 }else{
1831 int i;
1833 assert(IS_8X8(mb_type));
1835 for(i=0; i<4; i++){
1836 const int sub_mb_type= h->sub_mb_type[i];
1837 const int n= 4*i;
1838 int x_offset= (i&1)<<2;
1839 int y_offset= (i&2)<<1;
1841 if(IS_SUB_8X8(sub_mb_type)){
1842 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1843 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1844 &weight_op[3], &weight_avg[3],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_8X4(sub_mb_type)){
1847 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1849 &weight_op[4], &weight_avg[4],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1852 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1853 &weight_op[4], &weight_avg[4],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1855 }else if(IS_SUB_4X8(sub_mb_type)){
1856 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1857 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1858 &weight_op[5], &weight_avg[5],
1859 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1860 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1861 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1862 &weight_op[5], &weight_avg[5],
1863 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1864 }else{
1865 int j;
1866 assert(IS_SUB_4X4(sub_mb_type));
1867 for(j=0; j<4; j++){
1868 int sub_x_offset= x_offset + 2*(j&1);
1869 int sub_y_offset= y_offset + (j&2);
1870 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1871 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1872 &weight_op[6], &weight_avg[6],
1873 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1879 prefetch_motion(h, 1);
1882 static av_cold void init_cavlc_level_tab(void){
1883 int suffix_length, mask;
1884 unsigned int i;
1886 for(suffix_length=0; suffix_length<7; suffix_length++){
1887 for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1888 int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1889 int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1891 mask= -(level_code&1);
1892 level_code= (((2+level_code)>>1) ^ mask) - mask;
1893 if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1894 cavlc_level_tab[suffix_length][i][0]= level_code;
1895 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1896 }else if(prefix + 1 <= LEVEL_TAB_BITS){
1897 cavlc_level_tab[suffix_length][i][0]= prefix+100;
1898 cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1899 }else{
1900 cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1901 cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1907 static av_cold void decode_init_vlc(void){
1908 static int done = 0;
1910 if (!done) {
1911 int i;
1912 int offset;
1913 done = 1;
1915 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1916 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1917 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1918 &chroma_dc_coeff_token_len [0], 1, 1,
1919 &chroma_dc_coeff_token_bits[0], 1, 1,
1920 INIT_VLC_USE_NEW_STATIC);
1922 offset = 0;
1923 for(i=0; i<4; i++){
1924 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1925 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1926 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1927 &coeff_token_len [i][0], 1, 1,
1928 &coeff_token_bits[i][0], 1, 1,
1929 INIT_VLC_USE_NEW_STATIC);
1930 offset += coeff_token_vlc_tables_size[i];
1933 * This is a one time safety check to make sure that
1934 * the packed static coeff_token_vlc table sizes
1935 * were initialized correctly.
1937 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1939 for(i=0; i<3; i++){
1940 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1941 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1942 init_vlc(&chroma_dc_total_zeros_vlc[i],
1943 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1944 &chroma_dc_total_zeros_len [i][0], 1, 1,
1945 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1946 INIT_VLC_USE_NEW_STATIC);
1948 for(i=0; i<15; i++){
1949 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1950 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1951 init_vlc(&total_zeros_vlc[i],
1952 TOTAL_ZEROS_VLC_BITS, 16,
1953 &total_zeros_len [i][0], 1, 1,
1954 &total_zeros_bits[i][0], 1, 1,
1955 INIT_VLC_USE_NEW_STATIC);
1958 for(i=0; i<6; i++){
1959 run_vlc[i].table = run_vlc_tables[i];
1960 run_vlc[i].table_allocated = run_vlc_tables_size;
1961 init_vlc(&run_vlc[i],
1962 RUN_VLC_BITS, 7,
1963 &run_len [i][0], 1, 1,
1964 &run_bits[i][0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
1967 run7_vlc.table = run7_vlc_table,
1968 run7_vlc.table_allocated = run7_vlc_table_size;
1969 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1970 &run_len [6][0], 1, 1,
1971 &run_bits[6][0], 1, 1,
1972 INIT_VLC_USE_NEW_STATIC);
1974 init_cavlc_level_tab();
1978 static void free_tables(H264Context *h){
1979 int i;
1980 H264Context *hx;
1981 av_freep(&h->intra4x4_pred_mode);
1982 av_freep(&h->chroma_pred_mode_table);
1983 av_freep(&h->cbp_table);
1984 av_freep(&h->mvd_table[0]);
1985 av_freep(&h->mvd_table[1]);
1986 av_freep(&h->direct_table);
1987 av_freep(&h->non_zero_count);
1988 av_freep(&h->slice_table_base);
1989 h->slice_table= NULL;
1991 av_freep(&h->mb2b_xy);
1992 av_freep(&h->mb2b8_xy);
1994 for(i = 0; i < MAX_THREADS; i++) {
1995 hx = h->thread_context[i];
1996 if(!hx) continue;
1997 av_freep(&hx->top_borders[1]);
1998 av_freep(&hx->top_borders[0]);
1999 av_freep(&hx->s.obmc_scratchpad);
2003 static void init_dequant8_coeff_table(H264Context *h){
2004 int i,q,x;
2005 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2006 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2007 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2009 for(i=0; i<2; i++ ){
2010 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2011 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2012 break;
2015 for(q=0; q<52; q++){
2016 int shift = div6[q];
2017 int idx = rem6[q];
2018 for(x=0; x<64; x++)
2019 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2020 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2021 h->pps.scaling_matrix8[i][x]) << shift;
2026 static void init_dequant4_coeff_table(H264Context *h){
2027 int i,j,q,x;
2028 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2029 for(i=0; i<6; i++ ){
2030 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2031 for(j=0; j<i; j++){
2032 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2033 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2034 break;
2037 if(j<i)
2038 continue;
2040 for(q=0; q<52; q++){
2041 int shift = div6[q] + 2;
2042 int idx = rem6[q];
2043 for(x=0; x<16; x++)
2044 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2045 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2046 h->pps.scaling_matrix4[i][x]) << shift;
2051 static void init_dequant_tables(H264Context *h){
2052 int i,x;
2053 init_dequant4_coeff_table(h);
2054 if(h->pps.transform_8x8_mode)
2055 init_dequant8_coeff_table(h);
2056 if(h->sps.transform_bypass){
2057 for(i=0; i<6; i++)
2058 for(x=0; x<16; x++)
2059 h->dequant4_coeff[i][0][x] = 1<<6;
2060 if(h->pps.transform_8x8_mode)
2061 for(i=0; i<2; i++)
2062 for(x=0; x<64; x++)
2063 h->dequant8_coeff[i][0][x] = 1<<6;
2069 * allocates tables.
2070 * needs width/height
2072 static int alloc_tables(H264Context *h){
2073 MpegEncContext * const s = &h->s;
2074 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2075 int x,y;
2077 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2079 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2080 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2081 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2083 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2084 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2085 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2086 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2088 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2089 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2091 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2092 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2093 for(y=0; y<s->mb_height; y++){
2094 for(x=0; x<s->mb_width; x++){
2095 const int mb_xy= x + y*s->mb_stride;
2096 const int b_xy = 4*x + 4*y*h->b_stride;
2097 const int b8_xy= 2*x + 2*y*h->b8_stride;
2099 h->mb2b_xy [mb_xy]= b_xy;
2100 h->mb2b8_xy[mb_xy]= b8_xy;
2104 s->obmc_scratchpad = NULL;
2106 if(!h->dequant4_coeff[0])
2107 init_dequant_tables(h);
2109 return 0;
2110 fail:
2111 free_tables(h);
2112 return -1;
2116 * Mimic alloc_tables(), but for every context thread.
2118 static void clone_tables(H264Context *dst, H264Context *src){
2119 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2120 dst->non_zero_count = src->non_zero_count;
2121 dst->slice_table = src->slice_table;
2122 dst->cbp_table = src->cbp_table;
2123 dst->mb2b_xy = src->mb2b_xy;
2124 dst->mb2b8_xy = src->mb2b8_xy;
2125 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2126 dst->mvd_table[0] = src->mvd_table[0];
2127 dst->mvd_table[1] = src->mvd_table[1];
2128 dst->direct_table = src->direct_table;
2130 dst->s.obmc_scratchpad = NULL;
2131 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2135 * Init context
2136 * Allocate buffers which are not shared amongst multiple threads.
2138 static int context_init(H264Context *h){
2139 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2140 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2142 return 0;
2143 fail:
2144 return -1; // free_tables will clean up for us
2147 static av_cold void common_init(H264Context *h){
2148 MpegEncContext * const s = &h->s;
2150 s->width = s->avctx->width;
2151 s->height = s->avctx->height;
2152 s->codec_id= s->avctx->codec->id;
2154 ff_h264_pred_init(&h->hpc, s->codec_id);
2156 h->dequant_coeff_pps= -1;
2157 s->unrestricted_mv=1;
2158 s->decode=1; //FIXME
2160 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2162 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2163 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2167 * Reset SEI values at the beginning of the frame.
2169 * @param h H.264 context.
2171 static void reset_sei(H264Context *h) {
2172 h->sei_recovery_frame_cnt = -1;
2173 h->sei_dpb_output_delay = 0;
2174 h->sei_cpb_removal_delay = -1;
2175 h->sei_buffering_period_present = 0;
2178 static av_cold int decode_init(AVCodecContext *avctx){
2179 H264Context *h= avctx->priv_data;
2180 MpegEncContext * const s = &h->s;
2182 MPV_decode_defaults(s);
2184 s->avctx = avctx;
2185 common_init(h);
2187 s->out_format = FMT_H264;
2188 s->workaround_bugs= avctx->workaround_bugs;
2190 // set defaults
2191 // s->decode_mb= ff_h263_decode_mb;
2192 s->quarter_sample = 1;
2193 if(!avctx->has_b_frames)
2194 s->low_delay= 1;
2196 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2197 avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2198 else
2199 avctx->pix_fmt= avctx->get_format(avctx, avctx->codec->pix_fmts);
2200 avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
2201 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
2203 decode_init_vlc();
2205 if(avctx->extradata_size > 0 && avctx->extradata &&
2206 *(char *)avctx->extradata == 1){
2207 h->is_avc = 1;
2208 h->got_avcC = 0;
2209 } else {
2210 h->is_avc = 0;
2213 h->thread_context[0] = h;
2214 h->outputed_poc = INT_MIN;
2215 h->prev_poc_msb= 1<<16;
2216 reset_sei(h);
2217 if(avctx->codec_id == CODEC_ID_H264){
2218 if(avctx->ticks_per_frame == 1){
2219 s->avctx->time_base.den *=2;
2221 avctx->ticks_per_frame = 2;
2223 return 0;
2226 static int frame_start(H264Context *h){
2227 MpegEncContext * const s = &h->s;
2228 int i;
2230 if(MPV_frame_start(s, s->avctx) < 0)
2231 return -1;
2232 ff_er_frame_start(s);
2234 * MPV_frame_start uses pict_type to derive key_frame.
2235 * This is incorrect for H.264; IDR markings must be used.
2236 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2237 * See decode_nal_units().
2239 s->current_picture_ptr->key_frame= 0;
2241 assert(s->linesize && s->uvlinesize);
2243 for(i=0; i<16; i++){
2244 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2245 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2247 for(i=0; i<4; i++){
2248 h->block_offset[16+i]=
2249 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2250 h->block_offset[24+16+i]=
2251 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2254 /* can't be in alloc_tables because linesize isn't known there.
2255 * FIXME: redo bipred weight to not require extra buffer? */
2256 for(i = 0; i < s->avctx->thread_count; i++)
2257 if(!h->thread_context[i]->s.obmc_scratchpad)
2258 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2260 /* some macroblocks will be accessed before they're available */
2261 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2262 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2264 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2266 // We mark the current picture as non-reference after allocating it, so
2267 // that if we break out due to an error it can be released automatically
2268 // in the next MPV_frame_start().
2269 // SVQ3 as well as most other codecs have only last/next/current and thus
2270 // get released even with set reference, besides SVQ3 and others do not
2271 // mark frames as reference later "naturally".
2272 if(s->codec_id != CODEC_ID_SVQ3)
2273 s->current_picture_ptr->reference= 0;
2275 s->current_picture_ptr->field_poc[0]=
2276 s->current_picture_ptr->field_poc[1]= INT_MAX;
2277 assert(s->current_picture_ptr->long_ref==0);
2279 return 0;
2282 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2283 MpegEncContext * const s = &h->s;
2284 int i;
2285 int step = 1;
2286 int offset = 1;
2287 int uvoffset= 1;
2288 int top_idx = 1;
2289 int skiplast= 0;
2291 src_y -= linesize;
2292 src_cb -= uvlinesize;
2293 src_cr -= uvlinesize;
2295 if(!simple && FRAME_MBAFF){
2296 if(s->mb_y&1){
2297 offset = MB_MBAFF ? 1 : 17;
2298 uvoffset= MB_MBAFF ? 1 : 9;
2299 if(!MB_MBAFF){
2300 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2301 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2302 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2303 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2304 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2307 }else{
2308 if(!MB_MBAFF){
2309 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2310 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2311 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2312 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2314 skiplast= 1;
2316 offset =
2317 uvoffset=
2318 top_idx = MB_MBAFF ? 0 : 1;
2320 step= MB_MBAFF ? 2 : 1;
2323 // There are two lines saved, the line above the the top macroblock of a pair,
2324 // and the line above the bottom macroblock
2325 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2326 for(i=1; i<17 - skiplast; i++){
2327 h->left_border[offset+i*step]= src_y[15+i* linesize];
2330 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2331 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2333 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2334 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2335 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2336 for(i=1; i<9 - skiplast; i++){
2337 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2338 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2340 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2341 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2345 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2346 MpegEncContext * const s = &h->s;
2347 int temp8, i;
2348 uint64_t temp64;
2349 int deblock_left;
2350 int deblock_top;
2351 int mb_xy;
2352 int step = 1;
2353 int offset = 1;
2354 int uvoffset= 1;
2355 int top_idx = 1;
2357 if(!simple && FRAME_MBAFF){
2358 if(s->mb_y&1){
2359 offset = MB_MBAFF ? 1 : 17;
2360 uvoffset= MB_MBAFF ? 1 : 9;
2361 }else{
2362 offset =
2363 uvoffset=
2364 top_idx = MB_MBAFF ? 0 : 1;
2366 step= MB_MBAFF ? 2 : 1;
2369 if(h->deblocking_filter == 2) {
2370 mb_xy = h->mb_xy;
2371 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2372 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2373 } else {
2374 deblock_left = (s->mb_x > 0);
2375 deblock_top = (s->mb_y > !!MB_FIELD);
2378 src_y -= linesize + 1;
2379 src_cb -= uvlinesize + 1;
2380 src_cr -= uvlinesize + 1;
2382 #define XCHG(a,b,t,xchg)\
2383 t= a;\
2384 if(xchg)\
2385 a= b;\
2386 b= t;
2388 if(deblock_left){
2389 for(i = !deblock_top; i<16; i++){
2390 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2392 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2395 if(deblock_top){
2396 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2397 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2398 if(s->mb_x+1 < s->mb_width){
2399 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2403 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2404 if(deblock_left){
2405 for(i = !deblock_top; i<8; i++){
2406 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2407 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2409 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2410 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2412 if(deblock_top){
2413 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2414 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2419 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2420 MpegEncContext * const s = &h->s;
2421 const int mb_x= s->mb_x;
2422 const int mb_y= s->mb_y;
2423 const int mb_xy= h->mb_xy;
2424 const int mb_type= s->current_picture.mb_type[mb_xy];
2425 uint8_t *dest_y, *dest_cb, *dest_cr;
2426 int linesize, uvlinesize /*dct_offset*/;
2427 int i;
2428 int *block_offset = &h->block_offset[0];
2429 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2430 /* is_h264 should always be true if SVQ3 is disabled. */
2431 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2432 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2433 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2435 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2436 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2437 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2439 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2440 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2442 if (!simple && MB_FIELD) {
2443 linesize = h->mb_linesize = s->linesize * 2;
2444 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2445 block_offset = &h->block_offset[24];
2446 if(mb_y&1){ //FIXME move out of this function?
2447 dest_y -= s->linesize*15;
2448 dest_cb-= s->uvlinesize*7;
2449 dest_cr-= s->uvlinesize*7;
2451 if(FRAME_MBAFF) {
2452 int list;
2453 for(list=0; list<h->list_count; list++){
2454 if(!USES_LIST(mb_type, list))
2455 continue;
2456 if(IS_16X16(mb_type)){
2457 int8_t *ref = &h->ref_cache[list][scan8[0]];
2458 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2459 }else{
2460 for(i=0; i<16; i+=4){
2461 int ref = h->ref_cache[list][scan8[i]];
2462 if(ref >= 0)
2463 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2468 } else {
2469 linesize = h->mb_linesize = s->linesize;
2470 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2471 // dct_offset = s->linesize * 16;
2474 if (!simple && IS_INTRA_PCM(mb_type)) {
2475 for (i=0; i<16; i++) {
2476 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2478 for (i=0; i<8; i++) {
2479 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2480 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2482 } else {
2483 if(IS_INTRA(mb_type)){
2484 if(h->deblocking_filter)
2485 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2487 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2488 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2489 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2492 if(IS_INTRA4x4(mb_type)){
2493 if(simple || !s->encoding){
2494 if(IS_8x8DCT(mb_type)){
2495 if(transform_bypass){
2496 idct_dc_add =
2497 idct_add = s->dsp.add_pixels8;
2498 }else{
2499 idct_dc_add = s->dsp.h264_idct8_dc_add;
2500 idct_add = s->dsp.h264_idct8_add;
2502 for(i=0; i<16; i+=4){
2503 uint8_t * const ptr= dest_y + block_offset[i];
2504 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2505 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2506 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2507 }else{
2508 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2509 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2510 (h->topright_samples_available<<i)&0x4000, linesize);
2511 if(nnz){
2512 if(nnz == 1 && h->mb[i*16])
2513 idct_dc_add(ptr, h->mb + i*16, linesize);
2514 else
2515 idct_add (ptr, h->mb + i*16, linesize);
2519 }else{
2520 if(transform_bypass){
2521 idct_dc_add =
2522 idct_add = s->dsp.add_pixels4;
2523 }else{
2524 idct_dc_add = s->dsp.h264_idct_dc_add;
2525 idct_add = s->dsp.h264_idct_add;
2527 for(i=0; i<16; i++){
2528 uint8_t * const ptr= dest_y + block_offset[i];
2529 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2531 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2532 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2533 }else{
2534 uint8_t *topright;
2535 int nnz, tr;
2536 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2537 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2538 assert(mb_y || linesize <= block_offset[i]);
2539 if(!topright_avail){
2540 tr= ptr[3 - linesize]*0x01010101;
2541 topright= (uint8_t*) &tr;
2542 }else
2543 topright= ptr + 4 - linesize;
2544 }else
2545 topright= NULL;
2547 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2548 nnz = h->non_zero_count_cache[ scan8[i] ];
2549 if(nnz){
2550 if(is_h264){
2551 if(nnz == 1 && h->mb[i*16])
2552 idct_dc_add(ptr, h->mb + i*16, linesize);
2553 else
2554 idct_add (ptr, h->mb + i*16, linesize);
2555 }else
2556 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2562 }else{
2563 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2564 if(is_h264){
2565 if(!transform_bypass)
2566 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2567 }else
2568 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2570 if(h->deblocking_filter)
2571 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2572 }else if(is_h264){
2573 hl_motion(h, dest_y, dest_cb, dest_cr,
2574 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2575 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2576 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2580 if(!IS_INTRA4x4(mb_type)){
2581 if(is_h264){
2582 if(IS_INTRA16x16(mb_type)){
2583 if(transform_bypass){
2584 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2585 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2586 }else{
2587 for(i=0; i<16; i++){
2588 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2589 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2592 }else{
2593 s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2595 }else if(h->cbp&15){
2596 if(transform_bypass){
2597 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2598 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2599 for(i=0; i<16; i+=di){
2600 if(h->non_zero_count_cache[ scan8[i] ]){
2601 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2604 }else{
2605 if(IS_8x8DCT(mb_type)){
2606 s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2607 }else{
2608 s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2612 }else{
2613 for(i=0; i<16; i++){
2614 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2615 uint8_t * const ptr= dest_y + block_offset[i];
2616 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2622 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
2623 uint8_t *dest[2] = {dest_cb, dest_cr};
2624 if(transform_bypass){
2625 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
2626 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
2627 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
2628 }else{
2629 idct_add = s->dsp.add_pixels4;
2630 for(i=16; i<16+8; i++){
2631 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2632 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2635 }else{
2636 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2637 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2638 if(is_h264){
2639 idct_add = s->dsp.h264_idct_add;
2640 idct_dc_add = s->dsp.h264_idct_dc_add;
2641 for(i=16; i<16+8; i++){
2642 if(h->non_zero_count_cache[ scan8[i] ])
2643 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2644 else if(h->mb[i*16])
2645 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2647 }else{
2648 for(i=16; i<16+8; i++){
2649 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2650 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2651 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2658 if(h->cbp || IS_INTRA(mb_type))
2659 s->dsp.clear_blocks(h->mb);
2661 if(h->deblocking_filter) {
2662 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2663 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2664 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2665 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2666 if (!simple && FRAME_MBAFF) {
2667 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2668 } else {
2669 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2675 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2677 static void hl_decode_mb_simple(H264Context *h){
2678 hl_decode_mb_internal(h, 1);
2682 * Process a macroblock; this handles edge cases, such as interlacing.
2684 static void av_noinline hl_decode_mb_complex(H264Context *h){
2685 hl_decode_mb_internal(h, 0);
2688 static void hl_decode_mb(H264Context *h){
2689 MpegEncContext * const s = &h->s;
2690 const int mb_xy= h->mb_xy;
2691 const int mb_type= s->current_picture.mb_type[mb_xy];
2692 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
2694 if (is_complex)
2695 hl_decode_mb_complex(h);
2696 else hl_decode_mb_simple(h);
2699 static void pic_as_field(Picture *pic, const int parity){
2700 int i;
2701 for (i = 0; i < 4; ++i) {
2702 if (parity == PICT_BOTTOM_FIELD)
2703 pic->data[i] += pic->linesize[i];
2704 pic->reference = parity;
2705 pic->linesize[i] *= 2;
2707 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2710 static int split_field_copy(Picture *dest, Picture *src,
2711 int parity, int id_add){
2712 int match = !!(src->reference & parity);
2714 if (match) {
2715 *dest = *src;
2716 if(parity != PICT_FRAME){
2717 pic_as_field(dest, parity);
2718 dest->pic_id *= 2;
2719 dest->pic_id += id_add;
2723 return match;
2726 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2727 int i[2]={0};
2728 int index=0;
2730 while(i[0]<len || i[1]<len){
2731 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2732 i[0]++;
2733 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2734 i[1]++;
2735 if(i[0] < len){
2736 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2737 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2739 if(i[1] < len){
2740 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2741 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2745 return index;
2748 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2749 int i, best_poc;
2750 int out_i= 0;
2752 for(;;){
2753 best_poc= dir ? INT_MIN : INT_MAX;
2755 for(i=0; i<len; i++){
2756 const int poc= src[i]->poc;
2757 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2758 best_poc= poc;
2759 sorted[out_i]= src[i];
2762 if(best_poc == (dir ? INT_MIN : INT_MAX))
2763 break;
2764 limit= sorted[out_i++]->poc - dir;
2766 return out_i;
2770 * fills the default_ref_list.
2772 static int fill_default_ref_list(H264Context *h){
2773 MpegEncContext * const s = &h->s;
2774 int i, len;
2776 if(h->slice_type_nos==FF_B_TYPE){
2777 Picture *sorted[32];
2778 int cur_poc, list;
2779 int lens[2];
2781 if(FIELD_PICTURE)
2782 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2783 else
2784 cur_poc= s->current_picture_ptr->poc;
2786 for(list= 0; list<2; list++){
2787 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2788 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2789 assert(len<=32);
2790 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2791 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2792 assert(len<=32);
2794 if(len < h->ref_count[list])
2795 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2796 lens[list]= len;
2799 if(lens[0] == lens[1] && lens[1] > 1){
2800 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2801 if(i == lens[0])
2802 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2804 }else{
2805 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2806 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2807 assert(len <= 32);
2808 if(len < h->ref_count[0])
2809 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2811 #ifdef TRACE
2812 for (i=0; i<h->ref_count[0]; i++) {
2813 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2815 if(h->slice_type_nos==FF_B_TYPE){
2816 for (i=0; i<h->ref_count[1]; i++) {
2817 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2820 #endif
2821 return 0;
2824 static void print_short_term(H264Context *h);
2825 static void print_long_term(H264Context *h);
2828 * Extract structure information about the picture described by pic_num in
2829 * the current decoding context (frame or field). Note that pic_num is
2830 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2831 * @param pic_num picture number for which to extract structure information
2832 * @param structure one of PICT_XXX describing structure of picture
2833 * with pic_num
2834 * @return frame number (short term) or long term index of picture
2835 * described by pic_num
2837 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2838 MpegEncContext * const s = &h->s;
2840 *structure = s->picture_structure;
2841 if(FIELD_PICTURE){
2842 if (!(pic_num & 1))
2843 /* opposite field */
2844 *structure ^= PICT_FRAME;
2845 pic_num >>= 1;
2848 return pic_num;
2851 static int decode_ref_pic_list_reordering(H264Context *h){
2852 MpegEncContext * const s = &h->s;
2853 int list, index, pic_structure;
2855 print_short_term(h);
2856 print_long_term(h);
2858 for(list=0; list<h->list_count; list++){
2859 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2861 if(get_bits1(&s->gb)){
2862 int pred= h->curr_pic_num;
2864 for(index=0; ; index++){
2865 unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
2866 unsigned int pic_id;
2867 int i;
2868 Picture *ref = NULL;
2870 if(reordering_of_pic_nums_idc==3)
2871 break;
2873 if(index >= h->ref_count[list]){
2874 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2875 return -1;
2878 if(reordering_of_pic_nums_idc<3){
2879 if(reordering_of_pic_nums_idc<2){
2880 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2881 int frame_num;
2883 if(abs_diff_pic_num > h->max_pic_num){
2884 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2885 return -1;
2888 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2889 else pred+= abs_diff_pic_num;
2890 pred &= h->max_pic_num - 1;
2892 frame_num = pic_num_extract(h, pred, &pic_structure);
2894 for(i= h->short_ref_count-1; i>=0; i--){
2895 ref = h->short_ref[i];
2896 assert(ref->reference);
2897 assert(!ref->long_ref);
2899 ref->frame_num == frame_num &&
2900 (ref->reference & pic_structure)
2902 break;
2904 if(i>=0)
2905 ref->pic_id= pred;
2906 }else{
2907 int long_idx;
2908 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2910 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2912 if(long_idx>31){
2913 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2914 return -1;
2916 ref = h->long_ref[long_idx];
2917 assert(!(ref && !ref->reference));
2918 if(ref && (ref->reference & pic_structure)){
2919 ref->pic_id= pic_id;
2920 assert(ref->long_ref);
2921 i=0;
2922 }else{
2923 i=-1;
2927 if (i < 0) {
2928 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2929 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2930 } else {
2931 for(i=index; i+1<h->ref_count[list]; i++){
2932 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2933 break;
2935 for(; i > index; i--){
2936 h->ref_list[list][i]= h->ref_list[list][i-1];
2938 h->ref_list[list][index]= *ref;
2939 if (FIELD_PICTURE){
2940 pic_as_field(&h->ref_list[list][index], pic_structure);
2943 }else{
2944 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2945 return -1;
2950 for(list=0; list<h->list_count; list++){
2951 for(index= 0; index < h->ref_count[list]; index++){
2952 if(!h->ref_list[list][index].data[0]){
2953 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2954 if(h->default_ref_list[list][0].data[0])
2955 h->ref_list[list][index]= h->default_ref_list[list][0];
2956 else
2957 return -1;
2962 return 0;
2965 static void fill_mbaff_ref_list(H264Context *h){
2966 int list, i, j;
2967 for(list=0; list<2; list++){ //FIXME try list_count
2968 for(i=0; i<h->ref_count[list]; i++){
2969 Picture *frame = &h->ref_list[list][i];
2970 Picture *field = &h->ref_list[list][16+2*i];
2971 field[0] = *frame;
2972 for(j=0; j<3; j++)
2973 field[0].linesize[j] <<= 1;
2974 field[0].reference = PICT_TOP_FIELD;
2975 field[0].poc= field[0].field_poc[0];
2976 field[1] = field[0];
2977 for(j=0; j<3; j++)
2978 field[1].data[j] += frame->linesize[j];
2979 field[1].reference = PICT_BOTTOM_FIELD;
2980 field[1].poc= field[1].field_poc[1];
2982 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2983 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2984 for(j=0; j<2; j++){
2985 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2986 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2990 for(j=0; j<h->ref_count[1]; j++){
2991 for(i=0; i<h->ref_count[0]; i++)
2992 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2993 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2994 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2998 static int pred_weight_table(H264Context *h){
2999 MpegEncContext * const s = &h->s;
3000 int list, i;
3001 int luma_def, chroma_def;
3003 h->use_weight= 0;
3004 h->use_weight_chroma= 0;
3005 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3006 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3007 luma_def = 1<<h->luma_log2_weight_denom;
3008 chroma_def = 1<<h->chroma_log2_weight_denom;
3010 for(list=0; list<2; list++){
3011 h->luma_weight_flag[list] = 0;
3012 h->chroma_weight_flag[list] = 0;
3013 for(i=0; i<h->ref_count[list]; i++){
3014 int luma_weight_flag, chroma_weight_flag;
3016 luma_weight_flag= get_bits1(&s->gb);
3017 if(luma_weight_flag){
3018 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3019 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3020 if( h->luma_weight[list][i] != luma_def
3021 || h->luma_offset[list][i] != 0) {
3022 h->use_weight= 1;
3023 h->luma_weight_flag[list]= 1;
3025 }else{
3026 h->luma_weight[list][i]= luma_def;
3027 h->luma_offset[list][i]= 0;
3030 if(CHROMA){
3031 chroma_weight_flag= get_bits1(&s->gb);
3032 if(chroma_weight_flag){
3033 int j;
3034 for(j=0; j<2; j++){
3035 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3036 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3037 if( h->chroma_weight[list][i][j] != chroma_def
3038 || h->chroma_offset[list][i][j] != 0) {
3039 h->use_weight_chroma= 1;
3040 h->chroma_weight_flag[list]= 1;
3043 }else{
3044 int j;
3045 for(j=0; j<2; j++){
3046 h->chroma_weight[list][i][j]= chroma_def;
3047 h->chroma_offset[list][i][j]= 0;
3052 if(h->slice_type_nos != FF_B_TYPE) break;
3054 h->use_weight= h->use_weight || h->use_weight_chroma;
3055 return 0;
3058 static void implicit_weight_table(H264Context *h){
3059 MpegEncContext * const s = &h->s;
3060 int ref0, ref1, i;
3061 int cur_poc = s->current_picture_ptr->poc;
3063 for (i = 0; i < 2; i++) {
3064 h->luma_weight_flag[i] = 0;
3065 h->chroma_weight_flag[i] = 0;
3068 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3069 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3070 h->use_weight= 0;
3071 h->use_weight_chroma= 0;
3072 return;
3075 h->use_weight= 2;
3076 h->use_weight_chroma= 2;
3077 h->luma_log2_weight_denom= 5;
3078 h->chroma_log2_weight_denom= 5;
3080 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3081 int poc0 = h->ref_list[0][ref0].poc;
3082 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3083 int poc1 = h->ref_list[1][ref1].poc;
3084 int td = av_clip(poc1 - poc0, -128, 127);
3085 if(td){
3086 int tb = av_clip(cur_poc - poc0, -128, 127);
3087 int tx = (16384 + (FFABS(td) >> 1)) / td;
3088 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3089 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3090 h->implicit_weight[ref0][ref1] = 32;
3091 else
3092 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3093 }else
3094 h->implicit_weight[ref0][ref1] = 32;
3100 * Mark a picture as no longer needed for reference. The refmask
3101 * argument allows unreferencing of individual fields or the whole frame.
3102 * If the picture becomes entirely unreferenced, but is being held for
3103 * display purposes, it is marked as such.
3104 * @param refmask mask of fields to unreference; the mask is bitwise
3105 * anded with the reference marking of pic
3106 * @return non-zero if pic becomes entirely unreferenced (except possibly
3107 * for display purposes) zero if one of the fields remains in
3108 * reference
3110 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3111 int i;
3112 if (pic->reference &= refmask) {
3113 return 0;
3114 } else {
3115 for(i = 0; h->delayed_pic[i]; i++)
3116 if(pic == h->delayed_pic[i]){
3117 pic->reference=DELAYED_PIC_REF;
3118 break;
3120 return 1;
3125 * instantaneous decoder refresh.
3127 static void idr(H264Context *h){
3128 int i;
3130 for(i=0; i<16; i++){
3131 remove_long(h, i, 0);
3133 assert(h->long_ref_count==0);
3135 for(i=0; i<h->short_ref_count; i++){
3136 unreference_pic(h, h->short_ref[i], 0);
3137 h->short_ref[i]= NULL;
3139 h->short_ref_count=0;
3140 h->prev_frame_num= 0;
3141 h->prev_frame_num_offset= 0;
3142 h->prev_poc_msb=
3143 h->prev_poc_lsb= 0;
3146 /* forget old pics after a seek */
3147 static void flush_dpb(AVCodecContext *avctx){
3148 H264Context *h= avctx->priv_data;
3149 int i;
3150 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3151 if(h->delayed_pic[i])
3152 h->delayed_pic[i]->reference= 0;
3153 h->delayed_pic[i]= NULL;
3155 h->outputed_poc= INT_MIN;
3156 idr(h);
3157 if(h->s.current_picture_ptr)
3158 h->s.current_picture_ptr->reference= 0;
3159 h->s.first_field= 0;
3160 reset_sei(h);
3161 ff_mpeg_flush(avctx);
3165 * Find a Picture in the short term reference list by frame number.
3166 * @param frame_num frame number to search for
3167 * @param idx the index into h->short_ref where returned picture is found
3168 * undefined if no picture found.
3169 * @return pointer to the found picture, or NULL if no pic with the provided
3170 * frame number is found
3172 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3173 MpegEncContext * const s = &h->s;
3174 int i;
3176 for(i=0; i<h->short_ref_count; i++){
3177 Picture *pic= h->short_ref[i];
3178 if(s->avctx->debug&FF_DEBUG_MMCO)
3179 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3180 if(pic->frame_num == frame_num) {
3181 *idx = i;
3182 return pic;
3185 return NULL;
3189 * Remove a picture from the short term reference list by its index in
3190 * that list. This does no checking on the provided index; it is assumed
3191 * to be valid. Other list entries are shifted down.
3192 * @param i index into h->short_ref of picture to remove.
3194 static void remove_short_at_index(H264Context *h, int i){
3195 assert(i >= 0 && i < h->short_ref_count);
3196 h->short_ref[i]= NULL;
3197 if (--h->short_ref_count)
3198 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3203 * @return the removed picture or NULL if an error occurs
3205 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3206 MpegEncContext * const s = &h->s;
3207 Picture *pic;
3208 int i;
3210 if(s->avctx->debug&FF_DEBUG_MMCO)
3211 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3213 pic = find_short(h, frame_num, &i);
3214 if (pic){
3215 if(unreference_pic(h, pic, ref_mask))
3216 remove_short_at_index(h, i);
3219 return pic;
3223 * Remove a picture from the long term reference list by its index in
3224 * that list.
3225 * @return the removed picture or NULL if an error occurs
3227 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3228 Picture *pic;
3230 pic= h->long_ref[i];
3231 if (pic){
3232 if(unreference_pic(h, pic, ref_mask)){
3233 assert(h->long_ref[i]->long_ref == 1);
3234 h->long_ref[i]->long_ref= 0;
3235 h->long_ref[i]= NULL;
3236 h->long_ref_count--;
3240 return pic;
3244 * print short term list
3246 static void print_short_term(H264Context *h) {
3247 uint32_t i;
3248 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3249 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3250 for(i=0; i<h->short_ref_count; i++){
3251 Picture *pic= h->short_ref[i];
3252 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3258 * print long term list
3260 static void print_long_term(H264Context *h) {
3261 uint32_t i;
3262 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3263 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3264 for(i = 0; i < 16; i++){
3265 Picture *pic= h->long_ref[i];
3266 if (pic) {
3267 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3274 * Executes the reference picture marking (memory management control operations).
3276 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3277 MpegEncContext * const s = &h->s;
3278 int i, av_uninit(j);
3279 int current_ref_assigned=0;
3280 Picture *av_uninit(pic);
3282 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3283 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3285 for(i=0; i<mmco_count; i++){
3286 int av_uninit(structure), av_uninit(frame_num);
3287 if(s->avctx->debug&FF_DEBUG_MMCO)
3288 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3290 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3291 || mmco[i].opcode == MMCO_SHORT2LONG){
3292 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3293 pic = find_short(h, frame_num, &j);
3294 if(!pic){
3295 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3296 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3297 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3298 continue;
3302 switch(mmco[i].opcode){
3303 case MMCO_SHORT2UNUSED:
3304 if(s->avctx->debug&FF_DEBUG_MMCO)
3305 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3306 remove_short(h, frame_num, structure ^ PICT_FRAME);
3307 break;
3308 case MMCO_SHORT2LONG:
3309 if (h->long_ref[mmco[i].long_arg] != pic)
3310 remove_long(h, mmco[i].long_arg, 0);
3312 remove_short_at_index(h, j);
3313 h->long_ref[ mmco[i].long_arg ]= pic;
3314 if (h->long_ref[ mmco[i].long_arg ]){
3315 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3316 h->long_ref_count++;
3318 break;
3319 case MMCO_LONG2UNUSED:
3320 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3321 pic = h->long_ref[j];
3322 if (pic) {
3323 remove_long(h, j, structure ^ PICT_FRAME);
3324 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3325 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3326 break;
3327 case MMCO_LONG:
3328 // Comment below left from previous code as it is an interresting note.
3329 /* First field in pair is in short term list or
3330 * at a different long term index.
3331 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3332 * Report the problem and keep the pair where it is,
3333 * and mark this field valid.
3336 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3337 remove_long(h, mmco[i].long_arg, 0);
3339 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3340 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3341 h->long_ref_count++;
3344 s->current_picture_ptr->reference |= s->picture_structure;
3345 current_ref_assigned=1;
3346 break;
3347 case MMCO_SET_MAX_LONG:
3348 assert(mmco[i].long_arg <= 16);
3349 // just remove the long term which index is greater than new max
3350 for(j = mmco[i].long_arg; j<16; j++){
3351 remove_long(h, j, 0);
3353 break;
3354 case MMCO_RESET:
3355 while(h->short_ref_count){
3356 remove_short(h, h->short_ref[0]->frame_num, 0);
3358 for(j = 0; j < 16; j++) {
3359 remove_long(h, j, 0);
3361 s->current_picture_ptr->poc=
3362 s->current_picture_ptr->field_poc[0]=
3363 s->current_picture_ptr->field_poc[1]=
3364 h->poc_lsb=
3365 h->poc_msb=
3366 h->frame_num=
3367 s->current_picture_ptr->frame_num= 0;
3368 break;
3369 default: assert(0);
3373 if (!current_ref_assigned) {
3374 /* Second field of complementary field pair; the first field of
3375 * which is already referenced. If short referenced, it
3376 * should be first entry in short_ref. If not, it must exist
3377 * in long_ref; trying to put it on the short list here is an
3378 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3380 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3381 /* Just mark the second field valid */
3382 s->current_picture_ptr->reference = PICT_FRAME;
3383 } else if (s->current_picture_ptr->long_ref) {
3384 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3385 "assignment for second field "
3386 "in complementary field pair "
3387 "(first field is long term)\n");
3388 } else {
3389 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3390 if(pic){
3391 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3394 if(h->short_ref_count)
3395 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3397 h->short_ref[0]= s->current_picture_ptr;
3398 h->short_ref_count++;
3399 s->current_picture_ptr->reference |= s->picture_structure;
3403 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3405 /* We have too many reference frames, probably due to corrupted
3406 * stream. Need to discard one frame. Prevents overrun of the
3407 * short_ref and long_ref buffers.
3409 av_log(h->s.avctx, AV_LOG_ERROR,
3410 "number of reference frames exceeds max (probably "
3411 "corrupt input), discarding one\n");
3413 if (h->long_ref_count && !h->short_ref_count) {
3414 for (i = 0; i < 16; ++i)
3415 if (h->long_ref[i])
3416 break;
3418 assert(i < 16);
3419 remove_long(h, i, 0);
3420 } else {
3421 pic = h->short_ref[h->short_ref_count - 1];
3422 remove_short(h, pic->frame_num, 0);
3426 print_short_term(h);
3427 print_long_term(h);
3428 return 0;
3431 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3432 MpegEncContext * const s = &h->s;
3433 int i;
3435 h->mmco_index= 0;
3436 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3437 s->broken_link= get_bits1(gb) -1;
3438 if(get_bits1(gb)){
3439 h->mmco[0].opcode= MMCO_LONG;
3440 h->mmco[0].long_arg= 0;
3441 h->mmco_index= 1;
3443 }else{
3444 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3445 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3446 MMCOOpcode opcode= get_ue_golomb_31(gb);
3448 h->mmco[i].opcode= opcode;
3449 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3450 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3451 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3452 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3453 return -1;
3456 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3457 unsigned int long_arg= get_ue_golomb_31(gb);
3458 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3459 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3460 return -1;
3462 h->mmco[i].long_arg= long_arg;
3465 if(opcode > (unsigned)MMCO_LONG){
3466 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3467 return -1;
3469 if(opcode == MMCO_END)
3470 break;
3472 h->mmco_index= i;
3473 }else{
3474 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3476 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3477 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3478 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3479 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3480 h->mmco_index= 1;
3481 if (FIELD_PICTURE) {
3482 h->mmco[0].short_pic_num *= 2;
3483 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3484 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3485 h->mmco_index= 2;
3491 return 0;
3494 static int init_poc(H264Context *h){
3495 MpegEncContext * const s = &h->s;
3496 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3497 int field_poc[2];
3498 Picture *cur = s->current_picture_ptr;
3500 h->frame_num_offset= h->prev_frame_num_offset;
3501 if(h->frame_num < h->prev_frame_num)
3502 h->frame_num_offset += max_frame_num;
3504 if(h->sps.poc_type==0){
3505 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3507 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3508 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3509 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3510 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3511 else
3512 h->poc_msb = h->prev_poc_msb;
3513 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3514 field_poc[0] =
3515 field_poc[1] = h->poc_msb + h->poc_lsb;
3516 if(s->picture_structure == PICT_FRAME)
3517 field_poc[1] += h->delta_poc_bottom;
3518 }else if(h->sps.poc_type==1){
3519 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3520 int i;
3522 if(h->sps.poc_cycle_length != 0)
3523 abs_frame_num = h->frame_num_offset + h->frame_num;
3524 else
3525 abs_frame_num = 0;
3527 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3528 abs_frame_num--;
3530 expected_delta_per_poc_cycle = 0;
3531 for(i=0; i < h->sps.poc_cycle_length; i++)
3532 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3534 if(abs_frame_num > 0){
3535 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3536 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3538 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3539 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3540 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3541 } else
3542 expectedpoc = 0;
3544 if(h->nal_ref_idc == 0)
3545 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3547 field_poc[0] = expectedpoc + h->delta_poc[0];
3548 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3550 if(s->picture_structure == PICT_FRAME)
3551 field_poc[1] += h->delta_poc[1];
3552 }else{
3553 int poc= 2*(h->frame_num_offset + h->frame_num);
3555 if(!h->nal_ref_idc)
3556 poc--;
3558 field_poc[0]= poc;
3559 field_poc[1]= poc;
3562 if(s->picture_structure != PICT_BOTTOM_FIELD)
3563 s->current_picture_ptr->field_poc[0]= field_poc[0];
3564 if(s->picture_structure != PICT_TOP_FIELD)
3565 s->current_picture_ptr->field_poc[1]= field_poc[1];
3566 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3568 return 0;
3573 * initialize scan tables
3575 static void init_scan_tables(H264Context *h){
3576 MpegEncContext * const s = &h->s;
3577 int i;
3578 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3579 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3580 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3581 }else{
3582 for(i=0; i<16; i++){
3583 #define T(x) (x>>2) | ((x<<2) & 0xF)
3584 h->zigzag_scan[i] = T(zigzag_scan[i]);
3585 h-> field_scan[i] = T( field_scan[i]);
3586 #undef T
3589 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3590 memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
3591 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3592 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3593 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3594 }else{
3595 for(i=0; i<64; i++){
3596 #define T(x) (x>>3) | ((x&7)<<3)
3597 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
3598 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3599 h->field_scan8x8[i] = T(field_scan8x8[i]);
3600 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3601 #undef T
3604 if(h->sps.transform_bypass){ //FIXME same ugly
3605 h->zigzag_scan_q0 = zigzag_scan;
3606 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
3607 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3608 h->field_scan_q0 = field_scan;
3609 h->field_scan8x8_q0 = field_scan8x8;
3610 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3611 }else{
3612 h->zigzag_scan_q0 = h->zigzag_scan;
3613 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3614 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3615 h->field_scan_q0 = h->field_scan;
3616 h->field_scan8x8_q0 = h->field_scan8x8;
3617 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3621 static void field_end(H264Context *h){
3622 MpegEncContext * const s = &h->s;
3623 AVCodecContext * const avctx= s->avctx;
3624 s->mb_y= 0;
3626 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
3627 s->current_picture_ptr->pict_type= s->pict_type;
3629 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3630 ff_vdpau_h264_set_reference_frames(s);
3632 if(!s->dropable) {
3633 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
3634 h->prev_poc_msb= h->poc_msb;
3635 h->prev_poc_lsb= h->poc_lsb;
3637 h->prev_frame_num_offset= h->frame_num_offset;
3638 h->prev_frame_num= h->frame_num;
3640 if (avctx->hwaccel) {
3641 if (avctx->hwaccel->end_frame(avctx) < 0)
3642 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
3645 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
3646 ff_vdpau_h264_picture_complete(s);
3649 * FIXME: Error handling code does not seem to support interlaced
3650 * when slices span multiple rows
3651 * The ff_er_add_slice calls don't work right for bottom
3652 * fields; they cause massive erroneous error concealing
3653 * Error marking covers both fields (top and bottom).
3654 * This causes a mismatched s->error_count
3655 * and a bad error table. Further, the error count goes to
3656 * INT_MAX when called for bottom field, because mb_y is
3657 * past end by one (callers fault) and resync_mb_y != 0
3658 * causes problems for the first MB line, too.
3660 if (!FIELD_PICTURE)
3661 ff_er_frame_end(s);
3663 MPV_frame_end(s);
3665 h->current_slice=0;
3669 * Replicates H264 "master" context to thread contexts.
3671 static void clone_slice(H264Context *dst, H264Context *src)
3673 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3674 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3675 dst->s.current_picture = src->s.current_picture;
3676 dst->s.linesize = src->s.linesize;
3677 dst->s.uvlinesize = src->s.uvlinesize;
3678 dst->s.first_field = src->s.first_field;
3680 dst->prev_poc_msb = src->prev_poc_msb;
3681 dst->prev_poc_lsb = src->prev_poc_lsb;
3682 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3683 dst->prev_frame_num = src->prev_frame_num;
3684 dst->short_ref_count = src->short_ref_count;
3686 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3687 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3688 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3689 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3691 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3692 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3696 * decodes a slice header.
3697 * This will also call MPV_common_init() and frame_start() as needed.
3699 * @param h h264context
3700 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3702 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3704 static int decode_slice_header(H264Context *h, H264Context *h0){
3705 MpegEncContext * const s = &h->s;
3706 MpegEncContext * const s0 = &h0->s;
3707 unsigned int first_mb_in_slice;
3708 unsigned int pps_id;
3709 int num_ref_idx_active_override_flag;
3710 unsigned int slice_type, tmp, i, j;
3711 int default_ref_list_done = 0;
3712 int last_pic_structure;
3714 s->dropable= h->nal_ref_idc == 0;
3716 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3717 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3718 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3719 }else{
3720 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3721 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3724 first_mb_in_slice= get_ue_golomb(&s->gb);
3726 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
3727 if(h0->current_slice && FIELD_PICTURE){
3728 field_end(h);
3731 h0->current_slice = 0;
3732 if (!s0->first_field)
3733 s->current_picture_ptr= NULL;
3736 slice_type= get_ue_golomb_31(&s->gb);
3737 if(slice_type > 9){
3738 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3739 return -1;
3741 if(slice_type > 4){
3742 slice_type -= 5;
3743 h->slice_type_fixed=1;
3744 }else
3745 h->slice_type_fixed=0;
3747 slice_type= golomb_to_pict_type[ slice_type ];
3748 if (slice_type == FF_I_TYPE
3749 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3750 default_ref_list_done = 1;
3752 h->slice_type= slice_type;
3753 h->slice_type_nos= slice_type & 3;
3755 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3756 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3757 av_log(h->s.avctx, AV_LOG_ERROR,
3758 "B picture before any references, skipping\n");
3759 return -1;
3762 pps_id= get_ue_golomb(&s->gb);
3763 if(pps_id>=MAX_PPS_COUNT){
3764 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3765 return -1;
3767 if(!h0->pps_buffers[pps_id]) {
3768 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
3769 return -1;
3771 h->pps= *h0->pps_buffers[pps_id];
3773 if(!h0->sps_buffers[h->pps.sps_id]) {
3774 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
3775 return -1;
3777 h->sps = *h0->sps_buffers[h->pps.sps_id];
3779 if(h == h0 && h->dequant_coeff_pps != pps_id){
3780 h->dequant_coeff_pps = pps_id;
3781 init_dequant_tables(h);
3784 s->mb_width= h->sps.mb_width;
3785 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3787 h->b_stride= s->mb_width*4;
3788 h->b8_stride= s->mb_width*2;
3790 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3791 if(h->sps.frame_mbs_only_flag)
3792 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3793 else
3794 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3796 if (s->context_initialized
3797 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3798 if(h != h0)
3799 return -1; // width / height changed during parallelized decoding
3800 free_tables(h);
3801 flush_dpb(s->avctx);
3802 MPV_common_end(s);
3804 if (!s->context_initialized) {
3805 if(h != h0)
3806 return -1; // we cant (re-)initialize context during parallel decoding
3807 if (MPV_common_init(s) < 0)
3808 return -1;
3809 s->first_field = 0;
3811 init_scan_tables(h);
3812 alloc_tables(h);
3814 for(i = 1; i < s->avctx->thread_count; i++) {
3815 H264Context *c;
3816 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3817 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3818 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3819 c->sps = h->sps;
3820 c->pps = h->pps;
3821 init_scan_tables(c);
3822 clone_tables(c, h);
3825 for(i = 0; i < s->avctx->thread_count; i++)
3826 if(context_init(h->thread_context[i]) < 0)
3827 return -1;
3829 s->avctx->width = s->width;
3830 s->avctx->height = s->height;
3831 s->avctx->sample_aspect_ratio= h->sps.sar;
3832 if(!s->avctx->sample_aspect_ratio.den)
3833 s->avctx->sample_aspect_ratio.den = 1;
3835 if(h->sps.timing_info_present_flag){
3836 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
3837 if(h->x264_build > 0 && h->x264_build < 44)
3838 s->avctx->time_base.den *= 2;
3839 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3840 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3844 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3846 h->mb_mbaff = 0;
3847 h->mb_aff_frame = 0;
3848 last_pic_structure = s0->picture_structure;
3849 if(h->sps.frame_mbs_only_flag){
3850 s->picture_structure= PICT_FRAME;
3851 }else{
3852 if(get_bits1(&s->gb)) { //field_pic_flag
3853 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3854 } else {
3855 s->picture_structure= PICT_FRAME;
3856 h->mb_aff_frame = h->sps.mb_aff;
3859 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3861 if(h0->current_slice == 0){
3862 while(h->frame_num != h->prev_frame_num &&
3863 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3864 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3865 if (frame_start(h) < 0)
3866 return -1;
3867 h->prev_frame_num++;
3868 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3869 s->current_picture_ptr->frame_num= h->prev_frame_num;
3870 execute_ref_pic_marking(h, NULL, 0);
3873 /* See if we have a decoded first field looking for a pair... */
3874 if (s0->first_field) {
3875 assert(s0->current_picture_ptr);
3876 assert(s0->current_picture_ptr->data[0]);
3877 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3879 /* figure out if we have a complementary field pair */
3880 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3882 * Previous field is unmatched. Don't display it, but let it
3883 * remain for reference if marked as such.
3885 s0->current_picture_ptr = NULL;
3886 s0->first_field = FIELD_PICTURE;
3888 } else {
3889 if (h->nal_ref_idc &&
3890 s0->current_picture_ptr->reference &&
3891 s0->current_picture_ptr->frame_num != h->frame_num) {
3893 * This and previous field were reference, but had
3894 * different frame_nums. Consider this field first in
3895 * pair. Throw away previous field except for reference
3896 * purposes.
3898 s0->first_field = 1;
3899 s0->current_picture_ptr = NULL;
3901 } else {
3902 /* Second field in complementary pair */
3903 s0->first_field = 0;
3907 } else {
3908 /* Frame or first field in a potentially complementary pair */
3909 assert(!s0->current_picture_ptr);
3910 s0->first_field = FIELD_PICTURE;
3913 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3914 s0->first_field = 0;
3915 return -1;
3918 if(h != h0)
3919 clone_slice(h, h0);
3921 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3923 assert(s->mb_num == s->mb_width * s->mb_height);
3924 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3925 first_mb_in_slice >= s->mb_num){
3926 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3927 return -1;
3929 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3930 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3931 if (s->picture_structure == PICT_BOTTOM_FIELD)
3932 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3933 assert(s->mb_y < s->mb_height);
3935 if(s->picture_structure==PICT_FRAME){
3936 h->curr_pic_num= h->frame_num;
3937 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3938 }else{
3939 h->curr_pic_num= 2*h->frame_num + 1;
3940 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3943 if(h->nal_unit_type == NAL_IDR_SLICE){
3944 get_ue_golomb(&s->gb); /* idr_pic_id */
3947 if(h->sps.poc_type==0){
3948 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3950 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3951 h->delta_poc_bottom= get_se_golomb(&s->gb);
3955 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3956 h->delta_poc[0]= get_se_golomb(&s->gb);
3958 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3959 h->delta_poc[1]= get_se_golomb(&s->gb);
3962 init_poc(h);
3964 if(h->pps.redundant_pic_cnt_present){
3965 h->redundant_pic_count= get_ue_golomb(&s->gb);
3968 //set defaults, might be overridden a few lines later
3969 h->ref_count[0]= h->pps.ref_count[0];
3970 h->ref_count[1]= h->pps.ref_count[1];
3972 if(h->slice_type_nos != FF_I_TYPE){
3973 if(h->slice_type_nos == FF_B_TYPE){
3974 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3976 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3978 if(num_ref_idx_active_override_flag){
3979 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3980 if(h->slice_type_nos==FF_B_TYPE)
3981 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3983 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3984 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3985 h->ref_count[0]= h->ref_count[1]= 1;
3986 return -1;
3989 if(h->slice_type_nos == FF_B_TYPE)
3990 h->list_count= 2;
3991 else
3992 h->list_count= 1;
3993 }else
3994 h->list_count= 0;
3996 if(!default_ref_list_done){
3997 fill_default_ref_list(h);
4000 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
4001 return -1;
4003 if(h->slice_type_nos!=FF_I_TYPE){
4004 s->last_picture_ptr= &h->ref_list[0][0];
4005 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
4007 if(h->slice_type_nos==FF_B_TYPE){
4008 s->next_picture_ptr= &h->ref_list[1][0];
4009 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
4012 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
4013 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
4014 pred_weight_table(h);
4015 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
4016 implicit_weight_table(h);
4017 else {
4018 h->use_weight = 0;
4019 for (i = 0; i < 2; i++) {
4020 h->luma_weight_flag[i] = 0;
4021 h->chroma_weight_flag[i] = 0;
4025 if(h->nal_ref_idc)
4026 decode_ref_pic_marking(h0, &s->gb);
4028 if(FRAME_MBAFF)
4029 fill_mbaff_ref_list(h);
4031 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
4032 direct_dist_scale_factor(h);
4033 direct_ref_list_init(h);
4035 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
4036 tmp = get_ue_golomb_31(&s->gb);
4037 if(tmp > 2){
4038 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4039 return -1;
4041 h->cabac_init_idc= tmp;
4044 h->last_qscale_diff = 0;
4045 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4046 if(tmp>51){
4047 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4048 return -1;
4050 s->qscale= tmp;
4051 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4052 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4053 //FIXME qscale / qp ... stuff
4054 if(h->slice_type == FF_SP_TYPE){
4055 get_bits1(&s->gb); /* sp_for_switch_flag */
4057 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4058 get_se_golomb(&s->gb); /* slice_qs_delta */
4061 h->deblocking_filter = 1;
4062 h->slice_alpha_c0_offset = 0;
4063 h->slice_beta_offset = 0;
4064 if( h->pps.deblocking_filter_parameters_present ) {
4065 tmp= get_ue_golomb_31(&s->gb);
4066 if(tmp > 2){
4067 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4068 return -1;
4070 h->deblocking_filter= tmp;
4071 if(h->deblocking_filter < 2)
4072 h->deblocking_filter^= 1; // 1<->0
4074 if( h->deblocking_filter ) {
4075 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4076 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4080 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4081 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4082 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4083 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4084 h->deblocking_filter= 0;
4086 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4087 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4088 /* Cheat slightly for speed:
4089 Do not bother to deblock across slices. */
4090 h->deblocking_filter = 2;
4091 } else {
4092 h0->max_contexts = 1;
4093 if(!h0->single_decode_warning) {
4094 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4095 h0->single_decode_warning = 1;
4097 if(h != h0)
4098 return 1; // deblocking switched inside frame
4102 #if 0 //FMO
4103 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4104 slice_group_change_cycle= get_bits(&s->gb, ?);
4105 #endif
4107 h0->last_slice_type = slice_type;
4108 h->slice_num = ++h0->current_slice;
4109 if(h->slice_num >= MAX_SLICES){
4110 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4113 for(j=0; j<2; j++){
4114 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4115 ref2frm[0]=
4116 ref2frm[1]= -1;
4117 for(i=0; i<16; i++)
4118 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4119 +(h->ref_list[j][i].reference&3);
4120 ref2frm[18+0]=
4121 ref2frm[18+1]= -1;
4122 for(i=16; i<48; i++)
4123 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4124 +(h->ref_list[j][i].reference&3);
4127 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4128 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4130 s->avctx->refs= h->sps.ref_frame_count;
4132 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4133 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4134 h->slice_num,
4135 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4136 first_mb_in_slice,
4137 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4138 pps_id, h->frame_num,
4139 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4140 h->ref_count[0], h->ref_count[1],
4141 s->qscale,
4142 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4143 h->use_weight,
4144 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4145 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4149 return 0;
4155 static inline int get_level_prefix(GetBitContext *gb){
4156 unsigned int buf;
4157 int log;
4159 OPEN_READER(re, gb);
4160 UPDATE_CACHE(re, gb);
4161 buf=GET_CACHE(re, gb);
4163 log= 32 - av_log2(buf);
4164 #ifdef TRACE
4165 print_bin(buf>>(32-log), log);
4166 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4167 #endif
4169 LAST_SKIP_BITS(re, gb, log);
4170 CLOSE_READER(re, gb);
4172 return log-1;
4175 static inline int get_dct8x8_allowed(H264Context *h){
4176 if(h->sps.direct_8x8_inference_flag)
4177 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
4178 else
4179 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
4183 * decodes a residual block.
4184 * @param n block index
4185 * @param scantable scantable
4186 * @param max_coeff number of coefficients in the block
4187 * @return <0 if an error occurred
4189 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4190 MpegEncContext * const s = &h->s;
4191 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4192 int level[16];
4193 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4195 //FIXME put trailing_onex into the context
4197 if(n == CHROMA_DC_BLOCK_INDEX){
4198 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4199 total_coeff= coeff_token>>2;
4200 }else{
4201 if(n == LUMA_DC_BLOCK_INDEX){
4202 total_coeff= pred_non_zero_count(h, 0);
4203 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4204 total_coeff= coeff_token>>2;
4205 }else{
4206 total_coeff= pred_non_zero_count(h, n);
4207 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4208 total_coeff= coeff_token>>2;
4209 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4213 //FIXME set last_non_zero?
4215 if(total_coeff==0)
4216 return 0;
4217 if(total_coeff > (unsigned)max_coeff) {
4218 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4219 return -1;
4222 trailing_ones= coeff_token&3;
4223 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4224 assert(total_coeff<=16);
4226 i = show_bits(gb, 3);
4227 skip_bits(gb, trailing_ones);
4228 level[0] = 1-((i&4)>>1);
4229 level[1] = 1-((i&2) );
4230 level[2] = 1-((i&1)<<1);
4232 if(trailing_ones<total_coeff) {
4233 int mask, prefix;
4234 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4235 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4236 int level_code= cavlc_level_tab[suffix_length][bitsi][0];
4238 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4239 if(level_code >= 100){
4240 prefix= level_code - 100;
4241 if(prefix == LEVEL_TAB_BITS)
4242 prefix += get_level_prefix(gb);
4244 //first coefficient has suffix_length equal to 0 or 1
4245 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4246 if(suffix_length)
4247 level_code= (prefix<<1) + get_bits1(gb); //part
4248 else
4249 level_code= prefix; //part
4250 }else if(prefix==14){
4251 if(suffix_length)
4252 level_code= (prefix<<1) + get_bits1(gb); //part
4253 else
4254 level_code= prefix + get_bits(gb, 4); //part
4255 }else{
4256 level_code= 30 + get_bits(gb, prefix-3); //part
4257 if(prefix>=16)
4258 level_code += (1<<(prefix-3))-4096;
4261 if(trailing_ones < 3) level_code += 2;
4263 suffix_length = 2;
4264 mask= -(level_code&1);
4265 level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
4266 }else{
4267 if(trailing_ones < 3) level_code += (level_code>>31)|1;
4269 suffix_length = 1;
4270 if(level_code + 3U > 6U)
4271 suffix_length++;
4272 level[trailing_ones]= level_code;
4275 //remaining coefficients have suffix_length > 0
4276 for(i=trailing_ones+1;i<total_coeff;i++) {
4277 static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
4278 int bitsi= show_bits(gb, LEVEL_TAB_BITS);
4279 level_code= cavlc_level_tab[suffix_length][bitsi][0];
4281 skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
4282 if(level_code >= 100){
4283 prefix= level_code - 100;
4284 if(prefix == LEVEL_TAB_BITS){
4285 prefix += get_level_prefix(gb);
4287 if(prefix<15){
4288 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4289 }else{
4290 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4291 if(prefix>=16)
4292 level_code += (1<<(prefix-3))-4096;
4294 mask= -(level_code&1);
4295 level_code= (((2+level_code)>>1) ^ mask) - mask;
4297 level[i]= level_code;
4299 if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
4300 suffix_length++;
4304 if(total_coeff == max_coeff)
4305 zeros_left=0;
4306 else{
4307 if(n == CHROMA_DC_BLOCK_INDEX)
4308 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4309 else
4310 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4313 coeff_num = zeros_left + total_coeff - 1;
4314 j = scantable[coeff_num];
4315 if(n > 24){
4316 block[j] = level[0];
4317 for(i=1;i<total_coeff;i++) {
4318 if(zeros_left <= 0)
4319 run_before = 0;
4320 else if(zeros_left < 7){
4321 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4322 }else{
4323 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4325 zeros_left -= run_before;
4326 coeff_num -= 1 + run_before;
4327 j= scantable[ coeff_num ];
4329 block[j]= level[i];
4331 }else{
4332 block[j] = (level[0] * qmul[j] + 32)>>6;
4333 for(i=1;i<total_coeff;i++) {
4334 if(zeros_left <= 0)
4335 run_before = 0;
4336 else if(zeros_left < 7){
4337 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4338 }else{
4339 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4341 zeros_left -= run_before;
4342 coeff_num -= 1 + run_before;
4343 j= scantable[ coeff_num ];
4345 block[j]= (level[i] * qmul[j] + 32)>>6;
4349 if(zeros_left<0){
4350 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4351 return -1;
4354 return 0;
4357 static void predict_field_decoding_flag(H264Context *h){
4358 MpegEncContext * const s = &h->s;
4359 const int mb_xy= h->mb_xy;
4360 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4361 ? s->current_picture.mb_type[mb_xy-1]
4362 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4363 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4364 : 0;
4365 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4369 * decodes a P_SKIP or B_SKIP macroblock
4371 static void decode_mb_skip(H264Context *h){
4372 MpegEncContext * const s = &h->s;
4373 const int mb_xy= h->mb_xy;
4374 int mb_type=0;
4376 memset(h->non_zero_count[mb_xy], 0, 16);
4377 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4379 if(MB_FIELD)
4380 mb_type|= MB_TYPE_INTERLACED;
4382 if( h->slice_type_nos == FF_B_TYPE )
4384 // just for fill_caches. pred_direct_motion will set the real mb_type
4385 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4387 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4388 pred_direct_motion(h, &mb_type);
4389 mb_type|= MB_TYPE_SKIP;
4391 else
4393 int mx, my;
4394 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4396 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4397 pred_pskip_motion(h, &mx, &my);
4398 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4399 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4402 write_back_motion(h, mb_type);
4403 s->current_picture.mb_type[mb_xy]= mb_type;
4404 s->current_picture.qscale_table[mb_xy]= s->qscale;
4405 h->slice_table[ mb_xy ]= h->slice_num;
4406 h->prev_mb_skipped= 1;
4410 * decodes a macroblock
4411 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4413 static int decode_mb_cavlc(H264Context *h){
4414 MpegEncContext * const s = &h->s;
4415 int mb_xy;
4416 int partition_count;
4417 unsigned int mb_type, cbp;
4418 int dct8x8_allowed= h->pps.transform_8x8_mode;
4420 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4422 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4423 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4424 down the code */
4425 if(h->slice_type_nos != FF_I_TYPE){
4426 if(s->mb_skip_run==-1)
4427 s->mb_skip_run= get_ue_golomb(&s->gb);
4429 if (s->mb_skip_run--) {
4430 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4431 if(s->mb_skip_run==0)
4432 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4433 else
4434 predict_field_decoding_flag(h);
4436 decode_mb_skip(h);
4437 return 0;
4440 if(FRAME_MBAFF){
4441 if( (s->mb_y&1) == 0 )
4442 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4445 h->prev_mb_skipped= 0;
4447 mb_type= get_ue_golomb(&s->gb);
4448 if(h->slice_type_nos == FF_B_TYPE){
4449 if(mb_type < 23){
4450 partition_count= b_mb_type_info[mb_type].partition_count;
4451 mb_type= b_mb_type_info[mb_type].type;
4452 }else{
4453 mb_type -= 23;
4454 goto decode_intra_mb;
4456 }else if(h->slice_type_nos == FF_P_TYPE){
4457 if(mb_type < 5){
4458 partition_count= p_mb_type_info[mb_type].partition_count;
4459 mb_type= p_mb_type_info[mb_type].type;
4460 }else{
4461 mb_type -= 5;
4462 goto decode_intra_mb;
4464 }else{
4465 assert(h->slice_type_nos == FF_I_TYPE);
4466 if(h->slice_type == FF_SI_TYPE && mb_type)
4467 mb_type--;
4468 decode_intra_mb:
4469 if(mb_type > 25){
4470 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4471 return -1;
4473 partition_count=0;
4474 cbp= i_mb_type_info[mb_type].cbp;
4475 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4476 mb_type= i_mb_type_info[mb_type].type;
4479 if(MB_FIELD)
4480 mb_type |= MB_TYPE_INTERLACED;
4482 h->slice_table[ mb_xy ]= h->slice_num;
4484 if(IS_INTRA_PCM(mb_type)){
4485 unsigned int x;
4487 // We assume these blocks are very rare so we do not optimize it.
4488 align_get_bits(&s->gb);
4490 // The pixels are stored in the same order as levels in h->mb array.
4491 for(x=0; x < (CHROMA ? 384 : 256); x++){
4492 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4495 // In deblocking, the quantizer is 0
4496 s->current_picture.qscale_table[mb_xy]= 0;
4497 // All coeffs are present
4498 memset(h->non_zero_count[mb_xy], 16, 16);
4500 s->current_picture.mb_type[mb_xy]= mb_type;
4501 return 0;
4504 if(MB_MBAFF){
4505 h->ref_count[0] <<= 1;
4506 h->ref_count[1] <<= 1;
4509 fill_caches(h, mb_type, 0);
4511 //mb_pred
4512 if(IS_INTRA(mb_type)){
4513 int pred_mode;
4514 // init_top_left_availability(h);
4515 if(IS_INTRA4x4(mb_type)){
4516 int i;
4517 int di = 1;
4518 if(dct8x8_allowed && get_bits1(&s->gb)){
4519 mb_type |= MB_TYPE_8x8DCT;
4520 di = 4;
4523 // fill_intra4x4_pred_table(h);
4524 for(i=0; i<16; i+=di){
4525 int mode= pred_intra_mode(h, i);
4527 if(!get_bits1(&s->gb)){
4528 const int rem_mode= get_bits(&s->gb, 3);
4529 mode = rem_mode + (rem_mode >= mode);
4532 if(di==4)
4533 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4534 else
4535 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4537 write_back_intra_pred_mode(h);
4538 if( check_intra4x4_pred_mode(h) < 0)
4539 return -1;
4540 }else{
4541 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4542 if(h->intra16x16_pred_mode < 0)
4543 return -1;
4545 if(CHROMA){
4546 pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
4547 if(pred_mode < 0)
4548 return -1;
4549 h->chroma_pred_mode= pred_mode;
4551 }else if(partition_count==4){
4552 int i, j, sub_partition_count[4], list, ref[2][4];
4554 if(h->slice_type_nos == FF_B_TYPE){
4555 for(i=0; i<4; i++){
4556 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4557 if(h->sub_mb_type[i] >=13){
4558 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4559 return -1;
4561 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4562 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4564 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4565 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4566 pred_direct_motion(h, &mb_type);
4567 h->ref_cache[0][scan8[4]] =
4568 h->ref_cache[1][scan8[4]] =
4569 h->ref_cache[0][scan8[12]] =
4570 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4572 }else{
4573 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4574 for(i=0; i<4; i++){
4575 h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
4576 if(h->sub_mb_type[i] >=4){
4577 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4578 return -1;
4580 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4581 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4585 for(list=0; list<h->list_count; list++){
4586 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4587 for(i=0; i<4; i++){
4588 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4589 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4590 unsigned int tmp;
4591 if(ref_count == 1){
4592 tmp= 0;
4593 }else if(ref_count == 2){
4594 tmp= get_bits1(&s->gb)^1;
4595 }else{
4596 tmp= get_ue_golomb_31(&s->gb);
4597 if(tmp>=ref_count){
4598 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4599 return -1;
4602 ref[list][i]= tmp;
4603 }else{
4604 //FIXME
4605 ref[list][i] = -1;
4610 if(dct8x8_allowed)
4611 dct8x8_allowed = get_dct8x8_allowed(h);
4613 for(list=0; list<h->list_count; list++){
4614 for(i=0; i<4; i++){
4615 if(IS_DIRECT(h->sub_mb_type[i])) {
4616 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4617 continue;
4619 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4620 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4622 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4623 const int sub_mb_type= h->sub_mb_type[i];
4624 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4625 for(j=0; j<sub_partition_count[i]; j++){
4626 int mx, my;
4627 const int index= 4*i + block_width*j;
4628 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4629 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4630 mx += get_se_golomb(&s->gb);
4631 my += get_se_golomb(&s->gb);
4632 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4634 if(IS_SUB_8X8(sub_mb_type)){
4635 mv_cache[ 1 ][0]=
4636 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4637 mv_cache[ 1 ][1]=
4638 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4639 }else if(IS_SUB_8X4(sub_mb_type)){
4640 mv_cache[ 1 ][0]= mx;
4641 mv_cache[ 1 ][1]= my;
4642 }else if(IS_SUB_4X8(sub_mb_type)){
4643 mv_cache[ 8 ][0]= mx;
4644 mv_cache[ 8 ][1]= my;
4646 mv_cache[ 0 ][0]= mx;
4647 mv_cache[ 0 ][1]= my;
4649 }else{
4650 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4651 p[0] = p[1]=
4652 p[8] = p[9]= 0;
4656 }else if(IS_DIRECT(mb_type)){
4657 pred_direct_motion(h, &mb_type);
4658 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4659 }else{
4660 int list, mx, my, i;
4661 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4662 if(IS_16X16(mb_type)){
4663 for(list=0; list<h->list_count; list++){
4664 unsigned int val;
4665 if(IS_DIR(mb_type, 0, list)){
4666 if(h->ref_count[list]==1){
4667 val= 0;
4668 }else if(h->ref_count[list]==2){
4669 val= get_bits1(&s->gb)^1;
4670 }else{
4671 val= get_ue_golomb_31(&s->gb);
4672 if(val >= h->ref_count[list]){
4673 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4674 return -1;
4677 }else
4678 val= LIST_NOT_USED&0xFF;
4679 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4681 for(list=0; list<h->list_count; list++){
4682 unsigned int val;
4683 if(IS_DIR(mb_type, 0, list)){
4684 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4685 mx += get_se_golomb(&s->gb);
4686 my += get_se_golomb(&s->gb);
4687 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4689 val= pack16to32(mx,my);
4690 }else
4691 val=0;
4692 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4695 else if(IS_16X8(mb_type)){
4696 for(list=0; list<h->list_count; list++){
4697 for(i=0; i<2; i++){
4698 unsigned int val;
4699 if(IS_DIR(mb_type, i, list)){
4700 if(h->ref_count[list] == 1){
4701 val= 0;
4702 }else if(h->ref_count[list] == 2){
4703 val= get_bits1(&s->gb)^1;
4704 }else{
4705 val= get_ue_golomb_31(&s->gb);
4706 if(val >= h->ref_count[list]){
4707 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4708 return -1;
4711 }else
4712 val= LIST_NOT_USED&0xFF;
4713 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4716 for(list=0; list<h->list_count; list++){
4717 for(i=0; i<2; i++){
4718 unsigned int val;
4719 if(IS_DIR(mb_type, i, list)){
4720 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4721 mx += get_se_golomb(&s->gb);
4722 my += get_se_golomb(&s->gb);
4723 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4725 val= pack16to32(mx,my);
4726 }else
4727 val=0;
4728 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4731 }else{
4732 assert(IS_8X16(mb_type));
4733 for(list=0; list<h->list_count; list++){
4734 for(i=0; i<2; i++){
4735 unsigned int val;
4736 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4737 if(h->ref_count[list]==1){
4738 val= 0;
4739 }else if(h->ref_count[list]==2){
4740 val= get_bits1(&s->gb)^1;
4741 }else{
4742 val= get_ue_golomb_31(&s->gb);
4743 if(val >= h->ref_count[list]){
4744 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4745 return -1;
4748 }else
4749 val= LIST_NOT_USED&0xFF;
4750 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4753 for(list=0; list<h->list_count; list++){
4754 for(i=0; i<2; i++){
4755 unsigned int val;
4756 if(IS_DIR(mb_type, i, list)){
4757 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4758 mx += get_se_golomb(&s->gb);
4759 my += get_se_golomb(&s->gb);
4760 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4762 val= pack16to32(mx,my);
4763 }else
4764 val=0;
4765 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4771 if(IS_INTER(mb_type))
4772 write_back_motion(h, mb_type);
4774 if(!IS_INTRA16x16(mb_type)){
4775 cbp= get_ue_golomb(&s->gb);
4776 if(cbp > 47){
4777 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4778 return -1;
4781 if(CHROMA){
4782 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4783 else cbp= golomb_to_inter_cbp [cbp];
4784 }else{
4785 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4786 else cbp= golomb_to_inter_cbp_gray[cbp];
4789 h->cbp = cbp;
4791 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4792 if(get_bits1(&s->gb)){
4793 mb_type |= MB_TYPE_8x8DCT;
4794 h->cbp_table[mb_xy]= cbp;
4797 s->current_picture.mb_type[mb_xy]= mb_type;
4799 if(cbp || IS_INTRA16x16(mb_type)){
4800 int i8x8, i4x4, chroma_idx;
4801 int dquant;
4802 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4803 const uint8_t *scan, *scan8x8, *dc_scan;
4805 // fill_non_zero_count_cache(h);
4807 if(IS_INTERLACED(mb_type)){
4808 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4809 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4810 dc_scan= luma_dc_field_scan;
4811 }else{
4812 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4813 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4814 dc_scan= luma_dc_zigzag_scan;
4817 dquant= get_se_golomb(&s->gb);
4819 if( dquant > 25 || dquant < -26 ){
4820 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4821 return -1;
4824 s->qscale += dquant;
4825 if(((unsigned)s->qscale) > 51){
4826 if(s->qscale<0) s->qscale+= 52;
4827 else s->qscale-= 52;
4830 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4831 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4832 if(IS_INTRA16x16(mb_type)){
4833 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4834 return -1; //FIXME continue if partitioned and other return -1 too
4837 assert((cbp&15) == 0 || (cbp&15) == 15);
4839 if(cbp&15){
4840 for(i8x8=0; i8x8<4; i8x8++){
4841 for(i4x4=0; i4x4<4; i4x4++){
4842 const int index= i4x4 + 4*i8x8;
4843 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4844 return -1;
4848 }else{
4849 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4851 }else{
4852 for(i8x8=0; i8x8<4; i8x8++){
4853 if(cbp & (1<<i8x8)){
4854 if(IS_8x8DCT(mb_type)){
4855 DCTELEM *buf = &h->mb[64*i8x8];
4856 uint8_t *nnz;
4857 for(i4x4=0; i4x4<4; i4x4++){
4858 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4859 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4860 return -1;
4862 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4863 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4864 }else{
4865 for(i4x4=0; i4x4<4; i4x4++){
4866 const int index= i4x4 + 4*i8x8;
4868 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4869 return -1;
4873 }else{
4874 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4875 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4880 if(cbp&0x30){
4881 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4882 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4883 return -1;
4887 if(cbp&0x20){
4888 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4889 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4890 for(i4x4=0; i4x4<4; i4x4++){
4891 const int index= 16 + 4*chroma_idx + i4x4;
4892 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4893 return -1;
4897 }else{
4898 uint8_t * const nnz= &h->non_zero_count_cache[0];
4899 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4900 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4902 }else{
4903 uint8_t * const nnz= &h->non_zero_count_cache[0];
4904 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4905 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4906 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4908 s->current_picture.qscale_table[mb_xy]= s->qscale;
4909 write_back_non_zero_count(h);
4911 if(MB_MBAFF){
4912 h->ref_count[0] >>= 1;
4913 h->ref_count[1] >>= 1;
4916 return 0;
4919 static int decode_cabac_field_decoding_flag(H264Context *h) {
4920 MpegEncContext * const s = &h->s;
4921 const int mb_x = s->mb_x;
4922 const int mb_y = s->mb_y & ~1;
4923 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4924 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4926 unsigned int ctx = 0;
4928 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4929 ctx += 1;
4931 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4932 ctx += 1;
4935 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4938 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4939 uint8_t *state= &h->cabac_state[ctx_base];
4940 int mb_type;
4942 if(intra_slice){
4943 MpegEncContext * const s = &h->s;
4944 const int mba_xy = h->left_mb_xy[0];
4945 const int mbb_xy = h->top_mb_xy;
4946 int ctx=0;
4947 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4948 ctx++;
4949 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4950 ctx++;
4951 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4952 return 0; /* I4x4 */
4953 state += 2;
4954 }else{
4955 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4956 return 0; /* I4x4 */
4959 if( get_cabac_terminate( &h->cabac ) )
4960 return 25; /* PCM */
4962 mb_type = 1; /* I16x16 */
4963 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4964 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4965 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4966 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4967 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4968 return mb_type;
4971 static int decode_cabac_mb_type_b( H264Context *h ) {
4972 MpegEncContext * const s = &h->s;
4974 const int mba_xy = h->left_mb_xy[0];
4975 const int mbb_xy = h->top_mb_xy;
4976 int ctx = 0;
4977 int bits;
4978 assert(h->slice_type_nos == FF_B_TYPE);
4980 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4981 ctx++;
4982 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4983 ctx++;
4985 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4986 return 0; /* B_Direct_16x16 */
4988 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4989 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4992 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4993 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4994 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4995 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4996 if( bits < 8 )
4997 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4998 else if( bits == 13 ) {
4999 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5000 } else if( bits == 14 )
5001 return 11; /* B_L1_L0_8x16 */
5002 else if( bits == 15 )
5003 return 22; /* B_8x8 */
5005 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5006 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5009 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5010 MpegEncContext * const s = &h->s;
5011 int mba_xy, mbb_xy;
5012 int ctx = 0;
5014 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5015 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5016 mba_xy = mb_xy - 1;
5017 if( (mb_y&1)
5018 && h->slice_table[mba_xy] == h->slice_num
5019 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5020 mba_xy += s->mb_stride;
5021 if( MB_FIELD ){
5022 mbb_xy = mb_xy - s->mb_stride;
5023 if( !(mb_y&1)
5024 && h->slice_table[mbb_xy] == h->slice_num
5025 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5026 mbb_xy -= s->mb_stride;
5027 }else
5028 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5029 }else{
5030 int mb_xy = h->mb_xy;
5031 mba_xy = mb_xy - 1;
5032 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5035 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5036 ctx++;
5037 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5038 ctx++;
5040 if( h->slice_type_nos == FF_B_TYPE )
5041 ctx += 13;
5042 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5045 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5046 int mode = 0;
5048 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5049 return pred_mode;
5051 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5052 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5053 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5055 if( mode >= pred_mode )
5056 return mode + 1;
5057 else
5058 return mode;
5061 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5062 const int mba_xy = h->left_mb_xy[0];
5063 const int mbb_xy = h->top_mb_xy;
5065 int ctx = 0;
5067 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5068 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5069 ctx++;
5071 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5072 ctx++;
5074 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5075 return 0;
5077 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5078 return 1;
5079 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5080 return 2;
5081 else
5082 return 3;
5085 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5086 int cbp_b, cbp_a, ctx, cbp = 0;
5088 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5089 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5091 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5092 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5093 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5094 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5095 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5096 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5097 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5098 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5099 return cbp;
5101 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5102 int ctx;
5103 int cbp_a, cbp_b;
5105 cbp_a = (h->left_cbp>>4)&0x03;
5106 cbp_b = (h-> top_cbp>>4)&0x03;
5108 ctx = 0;
5109 if( cbp_a > 0 ) ctx++;
5110 if( cbp_b > 0 ) ctx += 2;
5111 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5112 return 0;
5114 ctx = 4;
5115 if( cbp_a == 2 ) ctx++;
5116 if( cbp_b == 2 ) ctx += 2;
5117 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5119 static int decode_cabac_mb_dqp( H264Context *h) {
5120 int ctx= h->last_qscale_diff != 0;
5121 int val = 0;
5123 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5124 ctx= 2+(ctx>>1);
5125 val++;
5126 if(val > 102) //prevent infinite loop
5127 return INT_MIN;
5130 if( val&0x01 )
5131 return (val + 1)>>1 ;
5132 else
5133 return -((val + 1)>>1);
5135 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5136 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5137 return 0; /* 8x8 */
5138 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5139 return 1; /* 8x4 */
5140 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5141 return 2; /* 4x8 */
5142 return 3; /* 4x4 */
5144 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5145 int type;
5146 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5147 return 0; /* B_Direct_8x8 */
5148 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5149 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5150 type = 3;
5151 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5152 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5153 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5154 type += 4;
5156 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5157 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5158 return type;
5161 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5162 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5165 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5166 int refa = h->ref_cache[list][scan8[n] - 1];
5167 int refb = h->ref_cache[list][scan8[n] - 8];
5168 int ref = 0;
5169 int ctx = 0;
5171 if( h->slice_type_nos == FF_B_TYPE) {
5172 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5173 ctx++;
5174 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5175 ctx += 2;
5176 } else {
5177 if( refa > 0 )
5178 ctx++;
5179 if( refb > 0 )
5180 ctx += 2;
5183 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5184 ref++;
5185 ctx = (ctx>>2)+4;
5186 if(ref >= 32 /*h->ref_list[list]*/){
5187 return -1;
5190 return ref;
5193 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5194 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5195 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5196 int ctxbase = (l == 0) ? 40 : 47;
5197 int mvd;
5198 int ctx = (amvd>2) + (amvd>32);
5200 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5201 return 0;
5203 mvd= 1;
5204 ctx= 3;
5205 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5206 mvd++;
5207 if( ctx < 6 )
5208 ctx++;
5211 if( mvd >= 9 ) {
5212 int k = 3;
5213 while( get_cabac_bypass( &h->cabac ) ) {
5214 mvd += 1 << k;
5215 k++;
5216 if(k>24){
5217 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5218 return INT_MIN;
5221 while( k-- ) {
5222 if( get_cabac_bypass( &h->cabac ) )
5223 mvd += 1 << k;
5226 return get_cabac_bypass_sign( &h->cabac, -mvd );
5229 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5230 int nza, nzb;
5231 int ctx = 0;
5233 if( is_dc ) {
5234 if( cat == 0 ) {
5235 nza = h->left_cbp&0x100;
5236 nzb = h-> top_cbp&0x100;
5237 } else {
5238 nza = (h->left_cbp>>(6+idx))&0x01;
5239 nzb = (h-> top_cbp>>(6+idx))&0x01;
5241 } else {
5242 assert(cat == 1 || cat == 2 || cat == 4);
5243 nza = h->non_zero_count_cache[scan8[idx] - 1];
5244 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5247 if( nza > 0 )
5248 ctx++;
5250 if( nzb > 0 )
5251 ctx += 2;
5253 return ctx + 4 * cat;
5256 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5257 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5258 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5259 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5260 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5263 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5264 static const int significant_coeff_flag_offset[2][6] = {
5265 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5266 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5268 static const int last_coeff_flag_offset[2][6] = {
5269 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5270 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5272 static const int coeff_abs_level_m1_offset[6] = {
5273 227+0, 227+10, 227+20, 227+30, 227+39, 426
5275 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5276 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5277 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5278 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5279 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5280 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5281 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5282 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5283 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5285 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5286 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5287 * map node ctx => cabac ctx for level=1 */
5288 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5289 /* map node ctx => cabac ctx for level>1 */
5290 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5291 static const uint8_t coeff_abs_level_transition[2][8] = {
5292 /* update node ctx after decoding a level=1 */
5293 { 1, 2, 3, 3, 4, 5, 6, 7 },
5294 /* update node ctx after decoding a level>1 */
5295 { 4, 4, 4, 4, 5, 6, 7, 7 }
5298 int index[64];
5300 int av_unused last;
5301 int coeff_count = 0;
5302 int node_ctx = 0;
5304 uint8_t *significant_coeff_ctx_base;
5305 uint8_t *last_coeff_ctx_base;
5306 uint8_t *abs_level_m1_ctx_base;
5308 #if !ARCH_X86
5309 #define CABAC_ON_STACK
5310 #endif
5311 #ifdef CABAC_ON_STACK
5312 #define CC &cc
5313 CABACContext cc;
5314 cc.range = h->cabac.range;
5315 cc.low = h->cabac.low;
5316 cc.bytestream= h->cabac.bytestream;
5317 #else
5318 #define CC &h->cabac
5319 #endif
5322 /* cat: 0-> DC 16x16 n = 0
5323 * 1-> AC 16x16 n = luma4x4idx
5324 * 2-> Luma4x4 n = luma4x4idx
5325 * 3-> DC Chroma n = iCbCr
5326 * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
5327 * 5-> Luma8x8 n = 4 * luma8x8idx
5330 /* read coded block flag */
5331 if( is_dc || cat != 5 ) {
5332 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5333 if( !is_dc )
5334 h->non_zero_count_cache[scan8[n]] = 0;
5336 #ifdef CABAC_ON_STACK
5337 h->cabac.range = cc.range ;
5338 h->cabac.low = cc.low ;
5339 h->cabac.bytestream= cc.bytestream;
5340 #endif
5341 return;
5345 significant_coeff_ctx_base = h->cabac_state
5346 + significant_coeff_flag_offset[MB_FIELD][cat];
5347 last_coeff_ctx_base = h->cabac_state
5348 + last_coeff_flag_offset[MB_FIELD][cat];
5349 abs_level_m1_ctx_base = h->cabac_state
5350 + coeff_abs_level_m1_offset[cat];
5352 if( !is_dc && cat == 5 ) {
5353 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5354 for(last= 0; last < coefs; last++) { \
5355 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5356 if( get_cabac( CC, sig_ctx )) { \
5357 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5358 index[coeff_count++] = last; \
5359 if( get_cabac( CC, last_ctx ) ) { \
5360 last= max_coeff; \
5361 break; \
5365 if( last == max_coeff -1 ) {\
5366 index[coeff_count++] = last;\
5368 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5369 #if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
5370 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5371 } else {
5372 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5373 #else
5374 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5375 } else {
5376 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5377 #endif
5379 assert(coeff_count > 0);
5381 if( is_dc ) {
5382 if( cat == 0 )
5383 h->cbp_table[h->mb_xy] |= 0x100;
5384 else
5385 h->cbp_table[h->mb_xy] |= 0x40 << n;
5386 } else {
5387 if( cat == 5 )
5388 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5389 else {
5390 assert( cat == 1 || cat == 2 || cat == 4 );
5391 h->non_zero_count_cache[scan8[n]] = coeff_count;
5395 do {
5396 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5398 int j= scantable[index[--coeff_count]];
5400 if( get_cabac( CC, ctx ) == 0 ) {
5401 node_ctx = coeff_abs_level_transition[0][node_ctx];
5402 if( is_dc ) {
5403 block[j] = get_cabac_bypass_sign( CC, -1);
5404 }else{
5405 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5407 } else {
5408 int coeff_abs = 2;
5409 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5410 node_ctx = coeff_abs_level_transition[1][node_ctx];
5412 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5413 coeff_abs++;
5416 if( coeff_abs >= 15 ) {
5417 int j = 0;
5418 while( get_cabac_bypass( CC ) ) {
5419 j++;
5422 coeff_abs=1;
5423 while( j-- ) {
5424 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5426 coeff_abs+= 14;
5429 if( is_dc ) {
5430 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5431 }else{
5432 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5435 } while( coeff_count );
5436 #ifdef CABAC_ON_STACK
5437 h->cabac.range = cc.range ;
5438 h->cabac.low = cc.low ;
5439 h->cabac.bytestream= cc.bytestream;
5440 #endif
5444 #if !CONFIG_SMALL
5445 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5446 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5449 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5450 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5452 #endif
5454 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5455 #if CONFIG_SMALL
5456 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5457 #else
5458 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5459 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5460 #endif
5463 static inline void compute_mb_neighbors(H264Context *h)
5465 MpegEncContext * const s = &h->s;
5466 const int mb_xy = h->mb_xy;
5467 h->top_mb_xy = mb_xy - s->mb_stride;
5468 h->left_mb_xy[0] = mb_xy - 1;
5469 if(FRAME_MBAFF){
5470 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5471 const int top_pair_xy = pair_xy - s->mb_stride;
5472 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5473 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5474 const int curr_mb_field_flag = MB_FIELD;
5475 const int bottom = (s->mb_y & 1);
5477 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
5478 h->top_mb_xy -= s->mb_stride;
5480 if (!left_mb_field_flag == curr_mb_field_flag) {
5481 h->left_mb_xy[0] = pair_xy - 1;
5483 } else if (FIELD_PICTURE) {
5484 h->top_mb_xy -= s->mb_stride;
5486 return;
5490 * decodes a macroblock
5491 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5493 static int decode_mb_cabac(H264Context *h) {
5494 MpegEncContext * const s = &h->s;
5495 int mb_xy;
5496 int mb_type, partition_count, cbp = 0;
5497 int dct8x8_allowed= h->pps.transform_8x8_mode;
5499 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5501 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5502 if( h->slice_type_nos != FF_I_TYPE ) {
5503 int skip;
5504 /* a skipped mb needs the aff flag from the following mb */
5505 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5506 predict_field_decoding_flag(h);
5507 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5508 skip = h->next_mb_skipped;
5509 else
5510 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5511 /* read skip flags */
5512 if( skip ) {
5513 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5514 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5515 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5516 if(!h->next_mb_skipped)
5517 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5520 decode_mb_skip(h);
5522 h->cbp_table[mb_xy] = 0;
5523 h->chroma_pred_mode_table[mb_xy] = 0;
5524 h->last_qscale_diff = 0;
5526 return 0;
5530 if(FRAME_MBAFF){
5531 if( (s->mb_y&1) == 0 )
5532 h->mb_mbaff =
5533 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5536 h->prev_mb_skipped = 0;
5538 compute_mb_neighbors(h);
5540 if( h->slice_type_nos == FF_B_TYPE ) {
5541 mb_type = decode_cabac_mb_type_b( h );
5542 if( mb_type < 23 ){
5543 partition_count= b_mb_type_info[mb_type].partition_count;
5544 mb_type= b_mb_type_info[mb_type].type;
5545 }else{
5546 mb_type -= 23;
5547 goto decode_intra_mb;
5549 } else if( h->slice_type_nos == FF_P_TYPE ) {
5550 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5551 /* P-type */
5552 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5553 /* P_L0_D16x16, P_8x8 */
5554 mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5555 } else {
5556 /* P_L0_D8x16, P_L0_D16x8 */
5557 mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5559 partition_count= p_mb_type_info[mb_type].partition_count;
5560 mb_type= p_mb_type_info[mb_type].type;
5561 } else {
5562 mb_type= decode_cabac_intra_mb_type(h, 17, 0);
5563 goto decode_intra_mb;
5565 } else {
5566 mb_type= decode_cabac_intra_mb_type(h, 3, 1);
5567 if(h->slice_type == FF_SI_TYPE && mb_type)
5568 mb_type--;
5569 assert(h->slice_type_nos == FF_I_TYPE);
5570 decode_intra_mb:
5571 partition_count = 0;
5572 cbp= i_mb_type_info[mb_type].cbp;
5573 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5574 mb_type= i_mb_type_info[mb_type].type;
5576 if(MB_FIELD)
5577 mb_type |= MB_TYPE_INTERLACED;
5579 h->slice_table[ mb_xy ]= h->slice_num;
5581 if(IS_INTRA_PCM(mb_type)) {
5582 const uint8_t *ptr;
5584 // We assume these blocks are very rare so we do not optimize it.
5585 // FIXME The two following lines get the bitstream position in the cabac
5586 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5587 ptr= h->cabac.bytestream;
5588 if(h->cabac.low&0x1) ptr--;
5589 if(CABAC_BITS==16){
5590 if(h->cabac.low&0x1FF) ptr--;
5593 // The pixels are stored in the same order as levels in h->mb array.
5594 memcpy(h->mb, ptr, 256); ptr+=256;
5595 if(CHROMA){
5596 memcpy(h->mb+128, ptr, 128); ptr+=128;
5599 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5601 // All blocks are present
5602 h->cbp_table[mb_xy] = 0x1ef;
5603 h->chroma_pred_mode_table[mb_xy] = 0;
5604 // In deblocking, the quantizer is 0
5605 s->current_picture.qscale_table[mb_xy]= 0;
5606 // All coeffs are present
5607 memset(h->non_zero_count[mb_xy], 16, 16);
5608 s->current_picture.mb_type[mb_xy]= mb_type;
5609 h->last_qscale_diff = 0;
5610 return 0;
5613 if(MB_MBAFF){
5614 h->ref_count[0] <<= 1;
5615 h->ref_count[1] <<= 1;
5618 fill_caches(h, mb_type, 0);
5620 if( IS_INTRA( mb_type ) ) {
5621 int i, pred_mode;
5622 if( IS_INTRA4x4( mb_type ) ) {
5623 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5624 mb_type |= MB_TYPE_8x8DCT;
5625 for( i = 0; i < 16; i+=4 ) {
5626 int pred = pred_intra_mode( h, i );
5627 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5628 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5630 } else {
5631 for( i = 0; i < 16; i++ ) {
5632 int pred = pred_intra_mode( h, i );
5633 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5635 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5638 write_back_intra_pred_mode(h);
5639 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5640 } else {
5641 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5642 if( h->intra16x16_pred_mode < 0 ) return -1;
5644 if(CHROMA){
5645 h->chroma_pred_mode_table[mb_xy] =
5646 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5648 pred_mode= check_intra_pred_mode( h, pred_mode );
5649 if( pred_mode < 0 ) return -1;
5650 h->chroma_pred_mode= pred_mode;
5652 } else if( partition_count == 4 ) {
5653 int i, j, sub_partition_count[4], list, ref[2][4];
5655 if( h->slice_type_nos == FF_B_TYPE ) {
5656 for( i = 0; i < 4; i++ ) {
5657 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5658 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5659 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5661 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5662 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5663 pred_direct_motion(h, &mb_type);
5664 h->ref_cache[0][scan8[4]] =
5665 h->ref_cache[1][scan8[4]] =
5666 h->ref_cache[0][scan8[12]] =
5667 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5668 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5669 for( i = 0; i < 4; i++ )
5670 if( IS_DIRECT(h->sub_mb_type[i]) )
5671 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5674 } else {
5675 for( i = 0; i < 4; i++ ) {
5676 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5677 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5678 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5682 for( list = 0; list < h->list_count; list++ ) {
5683 for( i = 0; i < 4; i++ ) {
5684 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5685 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5686 if( h->ref_count[list] > 1 ){
5687 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5688 if(ref[list][i] >= (unsigned)h->ref_count[list]){
5689 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
5690 return -1;
5692 }else
5693 ref[list][i] = 0;
5694 } else {
5695 ref[list][i] = -1;
5697 h->ref_cache[list][ scan8[4*i]+1 ]=
5698 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5702 if(dct8x8_allowed)
5703 dct8x8_allowed = get_dct8x8_allowed(h);
5705 for(list=0; list<h->list_count; list++){
5706 for(i=0; i<4; i++){
5707 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5708 if(IS_DIRECT(h->sub_mb_type[i])){
5709 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5710 continue;
5713 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5714 const int sub_mb_type= h->sub_mb_type[i];
5715 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5716 for(j=0; j<sub_partition_count[i]; j++){
5717 int mpx, mpy;
5718 int mx, my;
5719 const int index= 4*i + block_width*j;
5720 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5721 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5722 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5724 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5725 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5726 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5728 if(IS_SUB_8X8(sub_mb_type)){
5729 mv_cache[ 1 ][0]=
5730 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5731 mv_cache[ 1 ][1]=
5732 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5734 mvd_cache[ 1 ][0]=
5735 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5736 mvd_cache[ 1 ][1]=
5737 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5738 }else if(IS_SUB_8X4(sub_mb_type)){
5739 mv_cache[ 1 ][0]= mx;
5740 mv_cache[ 1 ][1]= my;
5742 mvd_cache[ 1 ][0]= mx - mpx;
5743 mvd_cache[ 1 ][1]= my - mpy;
5744 }else if(IS_SUB_4X8(sub_mb_type)){
5745 mv_cache[ 8 ][0]= mx;
5746 mv_cache[ 8 ][1]= my;
5748 mvd_cache[ 8 ][0]= mx - mpx;
5749 mvd_cache[ 8 ][1]= my - mpy;
5751 mv_cache[ 0 ][0]= mx;
5752 mv_cache[ 0 ][1]= my;
5754 mvd_cache[ 0 ][0]= mx - mpx;
5755 mvd_cache[ 0 ][1]= my - mpy;
5757 }else{
5758 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5759 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5760 p[0] = p[1] = p[8] = p[9] = 0;
5761 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5765 } else if( IS_DIRECT(mb_type) ) {
5766 pred_direct_motion(h, &mb_type);
5767 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5768 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5769 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5770 } else {
5771 int list, mx, my, i, mpx, mpy;
5772 if(IS_16X16(mb_type)){
5773 for(list=0; list<h->list_count; list++){
5774 if(IS_DIR(mb_type, 0, list)){
5775 int ref;
5776 if(h->ref_count[list] > 1){
5777 ref= decode_cabac_mb_ref(h, list, 0);
5778 if(ref >= (unsigned)h->ref_count[list]){
5779 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5780 return -1;
5782 }else
5783 ref=0;
5784 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5785 }else
5786 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5788 for(list=0; list<h->list_count; list++){
5789 if(IS_DIR(mb_type, 0, list)){
5790 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5792 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5793 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5794 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5796 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5797 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5798 }else
5799 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5802 else if(IS_16X8(mb_type)){
5803 for(list=0; list<h->list_count; list++){
5804 for(i=0; i<2; i++){
5805 if(IS_DIR(mb_type, i, list)){
5806 int ref;
5807 if(h->ref_count[list] > 1){
5808 ref= decode_cabac_mb_ref( h, list, 8*i );
5809 if(ref >= (unsigned)h->ref_count[list]){
5810 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5811 return -1;
5813 }else
5814 ref=0;
5815 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5816 }else
5817 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5820 for(list=0; list<h->list_count; list++){
5821 for(i=0; i<2; i++){
5822 if(IS_DIR(mb_type, i, list)){
5823 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5824 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5825 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5826 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5828 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5829 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5830 }else{
5831 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5832 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5836 }else{
5837 assert(IS_8X16(mb_type));
5838 for(list=0; list<h->list_count; list++){
5839 for(i=0; i<2; i++){
5840 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5841 int ref;
5842 if(h->ref_count[list] > 1){
5843 ref= decode_cabac_mb_ref( h, list, 4*i );
5844 if(ref >= (unsigned)h->ref_count[list]){
5845 av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
5846 return -1;
5848 }else
5849 ref=0;
5850 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5851 }else
5852 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5855 for(list=0; list<h->list_count; list++){
5856 for(i=0; i<2; i++){
5857 if(IS_DIR(mb_type, i, list)){
5858 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5859 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5860 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5862 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5863 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5864 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5865 }else{
5866 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5867 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5874 if( IS_INTER( mb_type ) ) {
5875 h->chroma_pred_mode_table[mb_xy] = 0;
5876 write_back_motion( h, mb_type );
5879 if( !IS_INTRA16x16( mb_type ) ) {
5880 cbp = decode_cabac_mb_cbp_luma( h );
5881 if(CHROMA)
5882 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5885 h->cbp_table[mb_xy] = h->cbp = cbp;
5887 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5888 if( decode_cabac_mb_transform_size( h ) )
5889 mb_type |= MB_TYPE_8x8DCT;
5891 s->current_picture.mb_type[mb_xy]= mb_type;
5893 if( cbp || IS_INTRA16x16( mb_type ) ) {
5894 const uint8_t *scan, *scan8x8, *dc_scan;
5895 const uint32_t *qmul;
5896 int dqp;
5898 if(IS_INTERLACED(mb_type)){
5899 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5900 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5901 dc_scan= luma_dc_field_scan;
5902 }else{
5903 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5904 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5905 dc_scan= luma_dc_zigzag_scan;
5908 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5909 if( dqp == INT_MIN ){
5910 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5911 return -1;
5913 s->qscale += dqp;
5914 if(((unsigned)s->qscale) > 51){
5915 if(s->qscale<0) s->qscale+= 52;
5916 else s->qscale-= 52;
5918 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5919 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5921 if( IS_INTRA16x16( mb_type ) ) {
5922 int i;
5923 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5924 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5926 if( cbp&15 ) {
5927 qmul = h->dequant4_coeff[0][s->qscale];
5928 for( i = 0; i < 16; i++ ) {
5929 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5930 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5932 } else {
5933 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5935 } else {
5936 int i8x8, i4x4;
5937 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5938 if( cbp & (1<<i8x8) ) {
5939 if( IS_8x8DCT(mb_type) ) {
5940 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5941 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5942 } else {
5943 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5944 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5945 const int index = 4*i8x8 + i4x4;
5946 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5947 //START_TIMER
5948 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5949 //STOP_TIMER("decode_residual")
5952 } else {
5953 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5954 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5959 if( cbp&0x30 ){
5960 int c;
5961 for( c = 0; c < 2; c++ ) {
5962 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5963 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5967 if( cbp&0x20 ) {
5968 int c, i;
5969 for( c = 0; c < 2; c++ ) {
5970 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5971 for( i = 0; i < 4; i++ ) {
5972 const int index = 16 + 4 * c + i;
5973 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5974 decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
5977 } else {
5978 uint8_t * const nnz= &h->non_zero_count_cache[0];
5979 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5980 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5982 } else {
5983 uint8_t * const nnz= &h->non_zero_count_cache[0];
5984 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5985 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5986 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5987 h->last_qscale_diff = 0;
5990 s->current_picture.qscale_table[mb_xy]= s->qscale;
5991 write_back_non_zero_count(h);
5993 if(MB_MBAFF){
5994 h->ref_count[0] >>= 1;
5995 h->ref_count[1] >>= 1;
5998 return 0;
6002 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6003 const int index_a = qp + h->slice_alpha_c0_offset;
6004 const int alpha = (alpha_table+52)[index_a];
6005 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6006 if (alpha ==0 || beta == 0) return;
6008 if( bS[0] < 4 ) {
6009 int8_t tc[4];
6010 tc[0] = (tc0_table+52)[index_a][bS[0]];
6011 tc[1] = (tc0_table+52)[index_a][bS[1]];
6012 tc[2] = (tc0_table+52)[index_a][bS[2]];
6013 tc[3] = (tc0_table+52)[index_a][bS[3]];
6014 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6015 } else {
6016 h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
6019 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6020 const int index_a = qp + h->slice_alpha_c0_offset;
6021 const int alpha = (alpha_table+52)[index_a];
6022 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6023 if (alpha ==0 || beta == 0) return;
6025 if( bS[0] < 4 ) {
6026 int8_t tc[4];
6027 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6028 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6029 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6030 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6031 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6032 } else {
6033 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6037 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6038 int i;
6039 for( i = 0; i < 16; i++, pix += stride) {
6040 int index_a;
6041 int alpha;
6042 int beta;
6044 int qp_index;
6045 int bS_index = (i >> 1);
6046 if (!MB_FIELD) {
6047 bS_index &= ~1;
6048 bS_index |= (i & 1);
6051 if( bS[bS_index] == 0 ) {
6052 continue;
6055 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6056 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6057 alpha = (alpha_table+52)[index_a];
6058 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6060 if( bS[bS_index] < 4 ) {
6061 const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
6062 const int p0 = pix[-1];
6063 const int p1 = pix[-2];
6064 const int p2 = pix[-3];
6065 const int q0 = pix[0];
6066 const int q1 = pix[1];
6067 const int q2 = pix[2];
6069 if( FFABS( p0 - q0 ) < alpha &&
6070 FFABS( p1 - p0 ) < beta &&
6071 FFABS( q1 - q0 ) < beta ) {
6072 int tc = tc0;
6073 int i_delta;
6075 if( FFABS( p2 - p0 ) < beta ) {
6076 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6077 tc++;
6079 if( FFABS( q2 - q0 ) < beta ) {
6080 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6081 tc++;
6084 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6085 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6086 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6087 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6089 }else{
6090 const int p0 = pix[-1];
6091 const int p1 = pix[-2];
6092 const int p2 = pix[-3];
6094 const int q0 = pix[0];
6095 const int q1 = pix[1];
6096 const int q2 = pix[2];
6098 if( FFABS( p0 - q0 ) < alpha &&
6099 FFABS( p1 - p0 ) < beta &&
6100 FFABS( q1 - q0 ) < beta ) {
6102 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6103 if( FFABS( p2 - p0 ) < beta)
6105 const int p3 = pix[-4];
6106 /* p0', p1', p2' */
6107 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6108 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6109 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6110 } else {
6111 /* p0' */
6112 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6114 if( FFABS( q2 - q0 ) < beta)
6116 const int q3 = pix[3];
6117 /* q0', q1', q2' */
6118 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6119 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6120 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6121 } else {
6122 /* q0' */
6123 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6125 }else{
6126 /* p0', q0' */
6127 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6128 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6130 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6135 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6136 int i;
6137 for( i = 0; i < 8; i++, pix += stride) {
6138 int index_a;
6139 int alpha;
6140 int beta;
6142 int qp_index;
6143 int bS_index = i;
6145 if( bS[bS_index] == 0 ) {
6146 continue;
6149 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6150 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6151 alpha = (alpha_table+52)[index_a];
6152 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6154 if( bS[bS_index] < 4 ) {
6155 const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
6156 const int p0 = pix[-1];
6157 const int p1 = pix[-2];
6158 const int q0 = pix[0];
6159 const int q1 = pix[1];
6161 if( FFABS( p0 - q0 ) < alpha &&
6162 FFABS( p1 - p0 ) < beta &&
6163 FFABS( q1 - q0 ) < beta ) {
6164 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6166 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6167 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6168 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6170 }else{
6171 const int p0 = pix[-1];
6172 const int p1 = pix[-2];
6173 const int q0 = pix[0];
6174 const int q1 = pix[1];
6176 if( FFABS( p0 - q0 ) < alpha &&
6177 FFABS( p1 - p0 ) < beta &&
6178 FFABS( q1 - q0 ) < beta ) {
6180 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6181 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6182 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6188 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6189 const int index_a = qp + h->slice_alpha_c0_offset;
6190 const int alpha = (alpha_table+52)[index_a];
6191 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6192 if (alpha ==0 || beta == 0) return;
6194 if( bS[0] < 4 ) {
6195 int8_t tc[4];
6196 tc[0] = (tc0_table+52)[index_a][bS[0]];
6197 tc[1] = (tc0_table+52)[index_a][bS[1]];
6198 tc[2] = (tc0_table+52)[index_a][bS[2]];
6199 tc[3] = (tc0_table+52)[index_a][bS[3]];
6200 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6201 } else {
6202 h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
6206 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6207 const int index_a = qp + h->slice_alpha_c0_offset;
6208 const int alpha = (alpha_table+52)[index_a];
6209 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6210 if (alpha ==0 || beta == 0) return;
6212 if( bS[0] < 4 ) {
6213 int8_t tc[4];
6214 tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
6215 tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
6216 tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
6217 tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
6218 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6219 } else {
6220 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6224 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6225 MpegEncContext * const s = &h->s;
6226 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6227 int mb_xy, mb_type;
6228 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6230 mb_xy = h->mb_xy;
6232 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6233 !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
6234 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6235 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6236 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6237 return;
6239 assert(!FRAME_MBAFF);
6241 mb_type = s->current_picture.mb_type[mb_xy];
6242 qp = s->current_picture.qscale_table[mb_xy];
6243 qp0 = s->current_picture.qscale_table[mb_xy-1];
6244 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6245 qpc = get_chroma_qp( h, 0, qp );
6246 qpc0 = get_chroma_qp( h, 0, qp0 );
6247 qpc1 = get_chroma_qp( h, 0, qp1 );
6248 qp0 = (qp + qp0 + 1) >> 1;
6249 qp1 = (qp + qp1 + 1) >> 1;
6250 qpc0 = (qpc + qpc0 + 1) >> 1;
6251 qpc1 = (qpc + qpc1 + 1) >> 1;
6252 qp_thresh = 15 - h->slice_alpha_c0_offset;
6253 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6254 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6255 return;
6257 if( IS_INTRA(mb_type) ) {
6258 int16_t bS4[4] = {4,4,4,4};
6259 int16_t bS3[4] = {3,3,3,3};
6260 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6261 if( IS_8x8DCT(mb_type) ) {
6262 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6263 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6264 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6265 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6266 } else {
6267 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6268 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6269 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6270 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6271 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6272 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6273 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6274 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6276 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6277 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6278 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6279 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6280 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6281 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6282 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6283 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6284 return;
6285 } else {
6286 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6287 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6288 int edges;
6289 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6290 edges = 4;
6291 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6292 } else {
6293 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6294 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6295 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6296 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6297 ? 3 : 0;
6298 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6299 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6300 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6301 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6303 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6304 bSv[0][0] = 0x0004000400040004ULL;
6305 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6306 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6308 #define FILTER(hv,dir,edge)\
6309 if(bSv[dir][edge]) {\
6310 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6311 if(!(edge&1)) {\
6312 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6313 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6316 if( edges == 1 ) {
6317 FILTER(v,0,0);
6318 FILTER(h,1,0);
6319 } else if( IS_8x8DCT(mb_type) ) {
6320 FILTER(v,0,0);
6321 FILTER(v,0,2);
6322 FILTER(h,1,0);
6323 FILTER(h,1,2);
6324 } else {
6325 FILTER(v,0,0);
6326 FILTER(v,0,1);
6327 FILTER(v,0,2);
6328 FILTER(v,0,3);
6329 FILTER(h,1,0);
6330 FILTER(h,1,1);
6331 FILTER(h,1,2);
6332 FILTER(h,1,3);
6334 #undef FILTER
6339 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
6340 MpegEncContext * const s = &h->s;
6341 int edge;
6342 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6343 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6344 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6345 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6346 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6348 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6349 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6350 // how often to recheck mv-based bS when iterating between edges
6351 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6352 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6353 // how often to recheck mv-based bS when iterating along each edge
6354 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6356 if (first_vertical_edge_done) {
6357 start = 1;
6360 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6361 start = 1;
6363 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6364 && !IS_INTERLACED(mb_type)
6365 && IS_INTERLACED(mbm_type)
6367 // This is a special case in the norm where the filtering must
6368 // be done twice (one each of the field) even if we are in a
6369 // frame macroblock.
6371 static const int nnz_idx[4] = {4,5,6,3};
6372 unsigned int tmp_linesize = 2 * linesize;
6373 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6374 int mbn_xy = mb_xy - 2 * s->mb_stride;
6375 int qp;
6376 int i, j;
6377 int16_t bS[4];
6379 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6380 if( IS_INTRA(mb_type) ||
6381 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6382 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6383 } else {
6384 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6385 for( i = 0; i < 4; i++ ) {
6386 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6387 mbn_nnz[nnz_idx[i]] != 0 )
6388 bS[i] = 2;
6389 else
6390 bS[i] = 1;
6393 // Do not use s->qscale as luma quantizer because it has not the same
6394 // value in IPCM macroblocks.
6395 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6396 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6397 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6398 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6399 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6400 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6401 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6402 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6405 start = 1;
6408 /* Calculate bS */
6409 for( edge = start; edge < edges; edge++ ) {
6410 /* mbn_xy: neighbor macroblock */
6411 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6412 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6413 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6414 int16_t bS[4];
6415 int qp;
6417 if( (edge&1) && IS_8x8DCT(mb_type) )
6418 continue;
6420 if( IS_INTRA(mb_type) ||
6421 IS_INTRA(mbn_type) ) {
6422 int value;
6423 if (edge == 0) {
6424 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6425 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6427 value = 4;
6428 } else {
6429 value = 3;
6431 } else {
6432 value = 3;
6434 bS[0] = bS[1] = bS[2] = bS[3] = value;
6435 } else {
6436 int i, l;
6437 int mv_done;
6439 if( edge & mask_edge ) {
6440 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6441 mv_done = 1;
6443 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6444 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6445 mv_done = 1;
6447 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6448 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6449 int bn_idx= b_idx - (dir ? 8:1);
6450 int v = 0;
6452 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6453 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6454 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6455 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6458 if(h->slice_type_nos == FF_B_TYPE && v){
6459 v=0;
6460 for( l = 0; !v && l < 2; l++ ) {
6461 int ln= 1-l;
6462 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6463 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6464 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6468 bS[0] = bS[1] = bS[2] = bS[3] = v;
6469 mv_done = 1;
6471 else
6472 mv_done = 0;
6474 for( i = 0; i < 4; i++ ) {
6475 int x = dir == 0 ? edge : i;
6476 int y = dir == 0 ? i : edge;
6477 int b_idx= 8 + 4 + x + 8*y;
6478 int bn_idx= b_idx - (dir ? 8:1);
6480 if( h->non_zero_count_cache[b_idx] |
6481 h->non_zero_count_cache[bn_idx] ) {
6482 bS[i] = 2;
6484 else if(!mv_done)
6486 bS[i] = 0;
6487 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6488 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6489 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6490 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6491 bS[i] = 1;
6492 break;
6496 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6497 bS[i] = 0;
6498 for( l = 0; l < 2; l++ ) {
6499 int ln= 1-l;
6500 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6501 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6502 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6503 bS[i] = 1;
6504 break;
6511 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6512 continue;
6515 /* Filter edge */
6516 // Do not use s->qscale as luma quantizer because it has not the same
6517 // value in IPCM macroblocks.
6518 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6519 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6520 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6521 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6522 if( dir == 0 ) {
6523 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6524 if( (edge&1) == 0 ) {
6525 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6526 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6527 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6528 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6530 } else {
6531 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6532 if( (edge&1) == 0 ) {
6533 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6534 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6535 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6536 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6542 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6543 MpegEncContext * const s = &h->s;
6544 const int mb_xy= mb_x + mb_y*s->mb_stride;
6545 const int mb_type = s->current_picture.mb_type[mb_xy];
6546 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6547 int first_vertical_edge_done = 0;
6548 av_unused int dir;
6550 //for sufficiently low qp, filtering wouldn't do anything
6551 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6552 if(!FRAME_MBAFF){
6553 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6554 int qp = s->current_picture.qscale_table[mb_xy];
6555 if(qp <= qp_thresh
6556 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6557 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6558 return;
6562 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6563 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6564 int top_type, left_type[2];
6565 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6566 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6567 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6569 if(IS_8x8DCT(top_type)){
6570 h->non_zero_count_cache[4+8*0]=
6571 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6572 h->non_zero_count_cache[6+8*0]=
6573 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6575 if(IS_8x8DCT(left_type[0])){
6576 h->non_zero_count_cache[3+8*1]=
6577 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6579 if(IS_8x8DCT(left_type[1])){
6580 h->non_zero_count_cache[3+8*3]=
6581 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6584 if(IS_8x8DCT(mb_type)){
6585 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6586 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
6588 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6589 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
6591 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6592 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
6594 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6595 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
6599 if (FRAME_MBAFF
6600 // left mb is in picture
6601 && h->slice_table[mb_xy-1] != 0xFFFF
6602 // and current and left pair do not have the same interlaced type
6603 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6604 // and left mb is in the same slice if deblocking_filter == 2
6605 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6606 /* First vertical edge is different in MBAFF frames
6607 * There are 8 different bS to compute and 2 different Qp
6609 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6610 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6611 int16_t bS[8];
6612 int qp[2];
6613 int bqp[2];
6614 int rqp[2];
6615 int mb_qp, mbn0_qp, mbn1_qp;
6616 int i;
6617 first_vertical_edge_done = 1;
6619 if( IS_INTRA(mb_type) )
6620 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6621 else {
6622 for( i = 0; i < 8; i++ ) {
6623 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6625 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6626 bS[i] = 4;
6627 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6628 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6629 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6631 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6632 bS[i] = 2;
6633 else
6634 bS[i] = 1;
6638 mb_qp = s->current_picture.qscale_table[mb_xy];
6639 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6640 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6641 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6642 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6643 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6644 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6645 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6646 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6647 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6648 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6649 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6650 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6652 /* Filter edge */
6653 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6654 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6655 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6656 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6657 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6660 #if CONFIG_SMALL
6661 for( dir = 0; dir < 2; dir++ )
6662 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
6663 #else
6664 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
6665 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
6666 #endif
6669 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6670 H264Context *h = *(void**)arg;
6671 MpegEncContext * const s = &h->s;
6672 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6674 s->mb_skip_run= -1;
6676 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
6677 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
6679 if( h->pps.cabac ) {
6680 int i;
6682 /* realign */
6683 align_get_bits( &s->gb );
6685 /* init cabac */
6686 ff_init_cabac_states( &h->cabac);
6687 ff_init_cabac_decoder( &h->cabac,
6688 s->gb.buffer + get_bits_count(&s->gb)/8,
6689 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6690 /* calculate pre-state */
6691 for( i= 0; i < 460; i++ ) {
6692 int pre;
6693 if( h->slice_type_nos == FF_I_TYPE )
6694 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6695 else
6696 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6698 if( pre <= 63 )
6699 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6700 else
6701 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6704 for(;;){
6705 //START_TIMER
6706 int ret = decode_mb_cabac(h);
6707 int eos;
6708 //STOP_TIMER("decode_mb_cabac")
6710 if(ret>=0) hl_decode_mb(h);
6712 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6713 s->mb_y++;
6715 ret = decode_mb_cabac(h);
6717 if(ret>=0) hl_decode_mb(h);
6718 s->mb_y--;
6720 eos = get_cabac_terminate( &h->cabac );
6722 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6723 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6724 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6725 return -1;
6728 if( ++s->mb_x >= s->mb_width ) {
6729 s->mb_x = 0;
6730 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6731 ++s->mb_y;
6732 if(FIELD_OR_MBAFF_PICTURE) {
6733 ++s->mb_y;
6737 if( eos || s->mb_y >= s->mb_height ) {
6738 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6739 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6740 return 0;
6744 } else {
6745 for(;;){
6746 int ret = decode_mb_cavlc(h);
6748 if(ret>=0) hl_decode_mb(h);
6750 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6751 s->mb_y++;
6752 ret = decode_mb_cavlc(h);
6754 if(ret>=0) hl_decode_mb(h);
6755 s->mb_y--;
6758 if(ret<0){
6759 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6760 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6762 return -1;
6765 if(++s->mb_x >= s->mb_width){
6766 s->mb_x=0;
6767 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6768 ++s->mb_y;
6769 if(FIELD_OR_MBAFF_PICTURE) {
6770 ++s->mb_y;
6772 if(s->mb_y >= s->mb_height){
6773 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6775 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6776 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6778 return 0;
6779 }else{
6780 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6782 return -1;
6787 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6788 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6789 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6790 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6792 return 0;
6793 }else{
6794 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6796 return -1;
6802 #if 0
6803 for(;s->mb_y < s->mb_height; s->mb_y++){
6804 for(;s->mb_x < s->mb_width; s->mb_x++){
6805 int ret= decode_mb(h);
6807 hl_decode_mb(h);
6809 if(ret<0){
6810 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6811 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6813 return -1;
6816 if(++s->mb_x >= s->mb_width){
6817 s->mb_x=0;
6818 if(++s->mb_y >= s->mb_height){
6819 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6820 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6822 return 0;
6823 }else{
6824 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6826 return -1;
6831 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6832 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6833 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6835 return 0;
6836 }else{
6837 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6839 return -1;
6843 s->mb_x=0;
6844 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6846 #endif
6847 return -1; //not reached
6850 static int decode_picture_timing(H264Context *h){
6851 MpegEncContext * const s = &h->s;
6852 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6853 h->sei_cpb_removal_delay = get_bits(&s->gb, h->sps.cpb_removal_delay_length);
6854 h->sei_dpb_output_delay = get_bits(&s->gb, h->sps.dpb_output_delay_length);
6856 if(h->sps.pic_struct_present_flag){
6857 unsigned int i, num_clock_ts;
6858 h->sei_pic_struct = get_bits(&s->gb, 4);
6859 h->sei_ct_type = 0;
6861 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6862 return -1;
6864 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6866 for (i = 0 ; i < num_clock_ts ; i++){
6867 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6868 unsigned int full_timestamp_flag;
6869 h->sei_ct_type |= 1<<get_bits(&s->gb, 2);
6870 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6871 skip_bits(&s->gb, 5); /* counting_type */
6872 full_timestamp_flag = get_bits(&s->gb, 1);
6873 skip_bits(&s->gb, 1); /* discontinuity_flag */
6874 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6875 skip_bits(&s->gb, 8); /* n_frames */
6876 if(full_timestamp_flag){
6877 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6878 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6879 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6880 }else{
6881 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6882 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6883 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6884 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6885 if(get_bits(&s->gb, 1)) /* hours_flag */
6886 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6890 if(h->sps.time_offset_length > 0)
6891 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6895 return 0;
6898 static int decode_unregistered_user_data(H264Context *h, int size){
6899 MpegEncContext * const s = &h->s;
6900 uint8_t user_data[16+256];
6901 int e, build, i;
6903 if(size<16)
6904 return -1;
6906 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6907 user_data[i]= get_bits(&s->gb, 8);
6910 user_data[i]= 0;
6911 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6912 if(e==1 && build>=0)
6913 h->x264_build= build;
6915 if(s->avctx->debug & FF_DEBUG_BUGS)
6916 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6918 for(; i<size; i++)
6919 skip_bits(&s->gb, 8);
6921 return 0;
6924 static int decode_recovery_point(H264Context *h){
6925 MpegEncContext * const s = &h->s;
6927 h->sei_recovery_frame_cnt = get_ue_golomb(&s->gb);
6928 skip_bits(&s->gb, 4); /* 1b exact_match_flag, 1b broken_link_flag, 2b changing_slice_group_idc */
6930 return 0;
6933 static int decode_buffering_period(H264Context *h){
6934 MpegEncContext * const s = &h->s;
6935 unsigned int sps_id;
6936 int sched_sel_idx;
6937 SPS *sps;
6939 sps_id = get_ue_golomb_31(&s->gb);
6940 if(sps_id > 31 || !h->sps_buffers[sps_id]) {
6941 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %d referenced in buffering period\n", sps_id);
6942 return -1;
6944 sps = h->sps_buffers[sps_id];
6946 // NOTE: This is really so duplicated in the standard... See H.264, D.1.1
6947 if (sps->nal_hrd_parameters_present_flag) {
6948 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6949 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6950 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6953 if (sps->vcl_hrd_parameters_present_flag) {
6954 for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
6955 h->initial_cpb_removal_delay[sched_sel_idx] = get_bits(&s->gb, sps->initial_cpb_removal_delay_length);
6956 skip_bits(&s->gb, sps->initial_cpb_removal_delay_length); // initial_cpb_removal_delay_offset
6960 h->sei_buffering_period_present = 1;
6961 return 0;
6964 int ff_h264_decode_sei(H264Context *h){
6965 MpegEncContext * const s = &h->s;
6967 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6968 int size, type;
6970 type=0;
6972 type+= show_bits(&s->gb, 8);
6973 }while(get_bits(&s->gb, 8) == 255);
6975 size=0;
6977 size+= show_bits(&s->gb, 8);
6978 }while(get_bits(&s->gb, 8) == 255);
6980 switch(type){
6981 case SEI_TYPE_PIC_TIMING: // Picture timing SEI
6982 if(decode_picture_timing(h) < 0)
6983 return -1;
6984 break;
6985 case SEI_TYPE_USER_DATA_UNREGISTERED:
6986 if(decode_unregistered_user_data(h, size) < 0)
6987 return -1;
6988 break;
6989 case SEI_TYPE_RECOVERY_POINT:
6990 if(decode_recovery_point(h) < 0)
6991 return -1;
6992 break;
6993 case SEI_BUFFERING_PERIOD:
6994 if(decode_buffering_period(h) < 0)
6995 return -1;
6996 break;
6997 default:
6998 skip_bits(&s->gb, 8*size);
7001 //FIXME check bits here
7002 align_get_bits(&s->gb);
7005 return 0;
7008 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
7009 MpegEncContext * const s = &h->s;
7010 int cpb_count, i;
7011 cpb_count = get_ue_golomb_31(&s->gb) + 1;
7013 if(cpb_count > 32U){
7014 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
7015 return -1;
7018 get_bits(&s->gb, 4); /* bit_rate_scale */
7019 get_bits(&s->gb, 4); /* cpb_size_scale */
7020 for(i=0; i<cpb_count; i++){
7021 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7022 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7023 get_bits1(&s->gb); /* cbr_flag */
7025 sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7026 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
7027 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
7028 sps->time_offset_length = get_bits(&s->gb, 5);
7029 sps->cpb_cnt = cpb_count;
7030 return 0;
7033 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7034 MpegEncContext * const s = &h->s;
7035 int aspect_ratio_info_present_flag;
7036 unsigned int aspect_ratio_idc;
7038 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7040 if( aspect_ratio_info_present_flag ) {
7041 aspect_ratio_idc= get_bits(&s->gb, 8);
7042 if( aspect_ratio_idc == EXTENDED_SAR ) {
7043 sps->sar.num= get_bits(&s->gb, 16);
7044 sps->sar.den= get_bits(&s->gb, 16);
7045 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
7046 sps->sar= pixel_aspect[aspect_ratio_idc];
7047 }else{
7048 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7049 return -1;
7051 }else{
7052 sps->sar.num=
7053 sps->sar.den= 0;
7055 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7057 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7058 get_bits1(&s->gb); /* overscan_appropriate_flag */
7061 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7062 get_bits(&s->gb, 3); /* video_format */
7063 get_bits1(&s->gb); /* video_full_range_flag */
7064 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7065 get_bits(&s->gb, 8); /* colour_primaries */
7066 get_bits(&s->gb, 8); /* transfer_characteristics */
7067 get_bits(&s->gb, 8); /* matrix_coefficients */
7071 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7072 s->avctx->chroma_sample_location = get_ue_golomb(&s->gb)+1; /* chroma_sample_location_type_top_field */
7073 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7076 sps->timing_info_present_flag = get_bits1(&s->gb);
7077 if(sps->timing_info_present_flag){
7078 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7079 sps->time_scale = get_bits_long(&s->gb, 32);
7080 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7083 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7084 if(sps->nal_hrd_parameters_present_flag)
7085 if(decode_hrd_parameters(h, sps) < 0)
7086 return -1;
7087 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7088 if(sps->vcl_hrd_parameters_present_flag)
7089 if(decode_hrd_parameters(h, sps) < 0)
7090 return -1;
7091 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7092 get_bits1(&s->gb); /* low_delay_hrd_flag */
7093 sps->pic_struct_present_flag = get_bits1(&s->gb);
7095 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7096 if(sps->bitstream_restriction_flag){
7097 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7098 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7099 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7100 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7101 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7102 sps->num_reorder_frames= get_ue_golomb(&s->gb);
7103 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7105 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7106 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
7107 return -1;
7111 return 0;
7114 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7115 const uint8_t *jvt_list, const uint8_t *fallback_list){
7116 MpegEncContext * const s = &h->s;
7117 int i, last = 8, next = 8;
7118 const uint8_t *scan = size == 16 ? zigzag_scan : ff_zigzag_direct;
7119 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7120 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7121 else
7122 for(i=0;i<size;i++){
7123 if(next)
7124 next = (last + get_se_golomb(&s->gb)) & 0xff;
7125 if(!i && !next){ /* matrix not written, we use the preset one */
7126 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7127 break;
7129 last = factors[scan[i]] = next ? next : last;
7133 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7134 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7135 MpegEncContext * const s = &h->s;
7136 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7137 const uint8_t *fallback[4] = {
7138 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7139 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7140 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7141 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7143 if(get_bits1(&s->gb)){
7144 sps->scaling_matrix_present |= is_sps;
7145 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7146 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7147 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7148 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7149 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7150 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7151 if(is_sps || pps->transform_8x8_mode){
7152 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7153 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7158 int ff_h264_decode_seq_parameter_set(H264Context *h){
7159 MpegEncContext * const s = &h->s;
7160 int profile_idc, level_idc;
7161 unsigned int sps_id;
7162 int i;
7163 SPS *sps;
7165 profile_idc= get_bits(&s->gb, 8);
7166 get_bits1(&s->gb); //constraint_set0_flag
7167 get_bits1(&s->gb); //constraint_set1_flag
7168 get_bits1(&s->gb); //constraint_set2_flag
7169 get_bits1(&s->gb); //constraint_set3_flag
7170 get_bits(&s->gb, 4); // reserved
7171 level_idc= get_bits(&s->gb, 8);
7172 sps_id= get_ue_golomb_31(&s->gb);
7174 if(sps_id >= MAX_SPS_COUNT) {
7175 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7176 return -1;
7178 sps= av_mallocz(sizeof(SPS));
7179 if(sps == NULL)
7180 return -1;
7182 sps->profile_idc= profile_idc;
7183 sps->level_idc= level_idc;
7185 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7186 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7187 sps->scaling_matrix_present = 0;
7189 if(sps->profile_idc >= 100){ //high profile
7190 sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
7191 if(sps->chroma_format_idc == 3)
7192 sps->residual_color_transform_flag = get_bits1(&s->gb);
7193 sps->bit_depth_luma = get_ue_golomb(&s->gb) + 8;
7194 sps->bit_depth_chroma = get_ue_golomb(&s->gb) + 8;
7195 sps->transform_bypass = get_bits1(&s->gb);
7196 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7197 }else{
7198 sps->chroma_format_idc= 1;
7201 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7202 sps->poc_type= get_ue_golomb_31(&s->gb);
7204 if(sps->poc_type == 0){ //FIXME #define
7205 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7206 } else if(sps->poc_type == 1){//FIXME #define
7207 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7208 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7209 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7210 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7212 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7213 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7214 goto fail;
7217 for(i=0; i<sps->poc_cycle_length; i++)
7218 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7219 }else if(sps->poc_type != 2){
7220 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7221 goto fail;
7224 sps->ref_frame_count= get_ue_golomb_31(&s->gb);
7225 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7226 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7227 goto fail;
7229 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7230 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7231 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7232 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7233 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7234 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7235 goto fail;
7238 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7239 if(!sps->frame_mbs_only_flag)
7240 sps->mb_aff= get_bits1(&s->gb);
7241 else
7242 sps->mb_aff= 0;
7244 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7246 #ifndef ALLOW_INTERLACE
7247 if(sps->mb_aff)
7248 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7249 #endif
7250 sps->crop= get_bits1(&s->gb);
7251 if(sps->crop){
7252 sps->crop_left = get_ue_golomb(&s->gb);
7253 sps->crop_right = get_ue_golomb(&s->gb);
7254 sps->crop_top = get_ue_golomb(&s->gb);
7255 sps->crop_bottom= get_ue_golomb(&s->gb);
7256 if(sps->crop_left || sps->crop_top){
7257 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7259 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7260 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7262 }else{
7263 sps->crop_left =
7264 sps->crop_right =
7265 sps->crop_top =
7266 sps->crop_bottom= 0;
7269 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7270 if( sps->vui_parameters_present_flag )
7271 decode_vui_parameters(h, sps);
7273 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7274 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s %d/%d\n",
7275 sps_id, sps->profile_idc, sps->level_idc,
7276 sps->poc_type,
7277 sps->ref_frame_count,
7278 sps->mb_width, sps->mb_height,
7279 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7280 sps->direct_8x8_inference_flag ? "8B8" : "",
7281 sps->crop_left, sps->crop_right,
7282 sps->crop_top, sps->crop_bottom,
7283 sps->vui_parameters_present_flag ? "VUI" : "",
7284 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc],
7285 sps->timing_info_present_flag ? sps->num_units_in_tick : 0,
7286 sps->timing_info_present_flag ? sps->time_scale : 0
7290 av_free(h->sps_buffers[sps_id]);
7291 h->sps_buffers[sps_id]= sps;
7292 h->sps = *sps;
7293 return 0;
7294 fail:
7295 av_free(sps);
7296 return -1;
7299 static void
7300 build_qp_table(PPS *pps, int t, int index)
7302 int i;
7303 for(i = 0; i < 52; i++)
7304 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7307 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
7308 MpegEncContext * const s = &h->s;
7309 unsigned int pps_id= get_ue_golomb(&s->gb);
7310 PPS *pps;
7312 if(pps_id >= MAX_PPS_COUNT) {
7313 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7314 return -1;
7317 pps= av_mallocz(sizeof(PPS));
7318 if(pps == NULL)
7319 return -1;
7320 pps->sps_id= get_ue_golomb_31(&s->gb);
7321 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7322 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7323 goto fail;
7326 pps->cabac= get_bits1(&s->gb);
7327 pps->pic_order_present= get_bits1(&s->gb);
7328 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7329 if(pps->slice_group_count > 1 ){
7330 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7331 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7332 switch(pps->mb_slice_group_map_type){
7333 case 0:
7334 #if 0
7335 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7336 | run_length[ i ] |1 |ue(v) |
7337 #endif
7338 break;
7339 case 2:
7340 #if 0
7341 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7342 |{ | | |
7343 | top_left_mb[ i ] |1 |ue(v) |
7344 | bottom_right_mb[ i ] |1 |ue(v) |
7345 | } | | |
7346 #endif
7347 break;
7348 case 3:
7349 case 4:
7350 case 5:
7351 #if 0
7352 | slice_group_change_direction_flag |1 |u(1) |
7353 | slice_group_change_rate_minus1 |1 |ue(v) |
7354 #endif
7355 break;
7356 case 6:
7357 #if 0
7358 | slice_group_id_cnt_minus1 |1 |ue(v) |
7359 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7360 |) | | |
7361 | slice_group_id[ i ] |1 |u(v) |
7362 #endif
7363 break;
7366 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7367 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7368 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7369 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7370 goto fail;
7373 pps->weighted_pred= get_bits1(&s->gb);
7374 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7375 pps->init_qp= get_se_golomb(&s->gb) + 26;
7376 pps->init_qs= get_se_golomb(&s->gb) + 26;
7377 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7378 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7379 pps->constrained_intra_pred= get_bits1(&s->gb);
7380 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7382 pps->transform_8x8_mode= 0;
7383 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7384 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7385 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7387 if(get_bits_count(&s->gb) < bit_length){
7388 pps->transform_8x8_mode= get_bits1(&s->gb);
7389 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7390 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7391 } else {
7392 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7395 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7396 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7397 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7398 h->pps.chroma_qp_diff= 1;
7400 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7401 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7402 pps_id, pps->sps_id,
7403 pps->cabac ? "CABAC" : "CAVLC",
7404 pps->slice_group_count,
7405 pps->ref_count[0], pps->ref_count[1],
7406 pps->weighted_pred ? "weighted" : "",
7407 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7408 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7409 pps->constrained_intra_pred ? "CONSTR" : "",
7410 pps->redundant_pic_cnt_present ? "REDU" : "",
7411 pps->transform_8x8_mode ? "8x8DCT" : ""
7415 av_free(h->pps_buffers[pps_id]);
7416 h->pps_buffers[pps_id]= pps;
7417 return 0;
7418 fail:
7419 av_free(pps);
7420 return -1;
7424 * Call decode_slice() for each context.
7426 * @param h h264 master context
7427 * @param context_count number of contexts to execute
7429 static void execute_decode_slices(H264Context *h, int context_count){
7430 MpegEncContext * const s = &h->s;
7431 AVCodecContext * const avctx= s->avctx;
7432 H264Context *hx;
7433 int i;
7435 if (s->avctx->hwaccel)
7436 return;
7437 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
7438 return;
7439 if(context_count == 1) {
7440 decode_slice(avctx, &h);
7441 } else {
7442 for(i = 1; i < context_count; i++) {
7443 hx = h->thread_context[i];
7444 hx->s.error_recognition = avctx->error_recognition;
7445 hx->s.error_count = 0;
7448 avctx->execute(avctx, (void *)decode_slice,
7449 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7451 /* pull back stuff from slices to master context */
7452 hx = h->thread_context[context_count - 1];
7453 s->mb_x = hx->s.mb_x;
7454 s->mb_y = hx->s.mb_y;
7455 s->dropable = hx->s.dropable;
7456 s->picture_structure = hx->s.picture_structure;
7457 for(i = 1; i < context_count; i++)
7458 h->s.error_count += h->thread_context[i]->s.error_count;
7463 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7464 MpegEncContext * const s = &h->s;
7465 AVCodecContext * const avctx= s->avctx;
7466 int buf_index=0;
7467 H264Context *hx; ///< thread context
7468 int context_count = 0;
7469 int next_avc= h->is_avc ? 0 : buf_size;
7471 h->max_contexts = avctx->thread_count;
7472 #if 0
7473 int i;
7474 for(i=0; i<50; i++){
7475 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7477 #endif
7478 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7479 h->current_slice = 0;
7480 if (!s->first_field)
7481 s->current_picture_ptr= NULL;
7482 reset_sei(h);
7485 for(;;){
7486 int consumed;
7487 int dst_length;
7488 int bit_length;
7489 const uint8_t *ptr;
7490 int i, nalsize = 0;
7491 int err;
7493 if(buf_index >= next_avc) {
7494 if(buf_index >= buf_size) break;
7495 nalsize = 0;
7496 for(i = 0; i < h->nal_length_size; i++)
7497 nalsize = (nalsize << 8) | buf[buf_index++];
7498 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7499 if(nalsize == 1){
7500 buf_index++;
7501 continue;
7502 }else{
7503 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7504 break;
7507 next_avc= buf_index + nalsize;
7508 } else {
7509 // start code prefix search
7510 for(; buf_index + 3 < buf_size; buf_index++){
7511 // This should always succeed in the first iteration.
7512 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7513 break;
7516 if(buf_index+3 >= buf_size) break;
7518 buf_index+=3;
7521 hx = h->thread_context[context_count];
7523 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
7524 if (ptr==NULL || dst_length < 0){
7525 return -1;
7527 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7528 dst_length--;
7529 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
7531 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7532 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7535 if (h->is_avc && (nalsize != consumed) && nalsize){
7536 int i, debug_level = AV_LOG_DEBUG;
7537 for (i = consumed; i < nalsize; i++)
7538 if (buf[buf_index+i])
7539 debug_level = AV_LOG_ERROR;
7540 av_log(h->s.avctx, debug_level, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7543 buf_index += consumed;
7545 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7546 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7547 continue;
7549 again:
7550 err = 0;
7551 switch(hx->nal_unit_type){
7552 case NAL_IDR_SLICE:
7553 if (h->nal_unit_type != NAL_IDR_SLICE) {
7554 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7555 return -1;
7557 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7558 case NAL_SLICE:
7559 init_get_bits(&hx->s.gb, ptr, bit_length);
7560 hx->intra_gb_ptr=
7561 hx->inter_gb_ptr= &hx->s.gb;
7562 hx->s.data_partitioning = 0;
7564 if((err = decode_slice_header(hx, h)))
7565 break;
7567 if (s->avctx->hwaccel && h->current_slice == 1) {
7568 if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
7569 return -1;
7572 s->current_picture_ptr->key_frame |=
7573 (hx->nal_unit_type == NAL_IDR_SLICE) ||
7574 (h->sei_recovery_frame_cnt >= 0);
7575 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7576 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7577 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7578 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7579 && avctx->skip_frame < AVDISCARD_ALL){
7580 if(avctx->hwaccel) {
7581 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
7582 return -1;
7583 }else
7584 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
7585 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
7586 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
7587 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
7588 }else
7589 context_count++;
7591 break;
7592 case NAL_DPA:
7593 init_get_bits(&hx->s.gb, ptr, bit_length);
7594 hx->intra_gb_ptr=
7595 hx->inter_gb_ptr= NULL;
7596 hx->s.data_partitioning = 1;
7598 err = decode_slice_header(hx, h);
7599 break;
7600 case NAL_DPB:
7601 init_get_bits(&hx->intra_gb, ptr, bit_length);
7602 hx->intra_gb_ptr= &hx->intra_gb;
7603 break;
7604 case NAL_DPC:
7605 init_get_bits(&hx->inter_gb, ptr, bit_length);
7606 hx->inter_gb_ptr= &hx->inter_gb;
7608 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7609 && s->context_initialized
7610 && s->hurry_up < 5
7611 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7612 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7613 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7614 && avctx->skip_frame < AVDISCARD_ALL)
7615 context_count++;
7616 break;
7617 case NAL_SEI:
7618 init_get_bits(&s->gb, ptr, bit_length);
7619 ff_h264_decode_sei(h);
7620 break;
7621 case NAL_SPS:
7622 init_get_bits(&s->gb, ptr, bit_length);
7623 ff_h264_decode_seq_parameter_set(h);
7625 if(s->flags& CODEC_FLAG_LOW_DELAY)
7626 s->low_delay=1;
7628 if(avctx->has_b_frames < 2)
7629 avctx->has_b_frames= !s->low_delay;
7630 break;
7631 case NAL_PPS:
7632 init_get_bits(&s->gb, ptr, bit_length);
7634 ff_h264_decode_picture_parameter_set(h, bit_length);
7636 break;
7637 case NAL_AUD:
7638 case NAL_END_SEQUENCE:
7639 case NAL_END_STREAM:
7640 case NAL_FILLER_DATA:
7641 case NAL_SPS_EXT:
7642 case NAL_AUXILIARY_SLICE:
7643 break;
7644 default:
7645 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7648 if(context_count == h->max_contexts) {
7649 execute_decode_slices(h, context_count);
7650 context_count = 0;
7653 if (err < 0)
7654 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7655 else if(err == 1) {
7656 /* Slice could not be decoded in parallel mode, copy down
7657 * NAL unit stuff to context 0 and restart. Note that
7658 * rbsp_buffer is not transferred, but since we no longer
7659 * run in parallel mode this should not be an issue. */
7660 h->nal_unit_type = hx->nal_unit_type;
7661 h->nal_ref_idc = hx->nal_ref_idc;
7662 hx = h;
7663 goto again;
7666 if(context_count)
7667 execute_decode_slices(h, context_count);
7668 return buf_index;
7672 * returns the number of bytes consumed for building the current frame
7674 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7675 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7676 if(pos+10>buf_size) pos=buf_size; // oops ;)
7678 return pos;
7681 static int decode_frame(AVCodecContext *avctx,
7682 void *data, int *data_size,
7683 AVPacket *avpkt)
7685 const uint8_t *buf = avpkt->data;
7686 int buf_size = avpkt->size;
7687 H264Context *h = avctx->priv_data;
7688 MpegEncContext *s = &h->s;
7689 AVFrame *pict = data;
7690 int buf_index;
7692 s->flags= avctx->flags;
7693 s->flags2= avctx->flags2;
7695 /* end of stream, output what is still in the buffers */
7696 if (buf_size == 0) {
7697 Picture *out;
7698 int i, out_idx;
7700 //FIXME factorize this with the output code below
7701 out = h->delayed_pic[0];
7702 out_idx = 0;
7703 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7704 if(h->delayed_pic[i]->poc < out->poc){
7705 out = h->delayed_pic[i];
7706 out_idx = i;
7709 for(i=out_idx; h->delayed_pic[i]; i++)
7710 h->delayed_pic[i] = h->delayed_pic[i+1];
7712 if(out){
7713 *data_size = sizeof(AVFrame);
7714 *pict= *(AVFrame*)out;
7717 return 0;
7720 if(h->is_avc && !h->got_avcC) {
7721 int i, cnt, nalsize;
7722 unsigned char *p = avctx->extradata;
7723 if(avctx->extradata_size < 7) {
7724 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7725 return -1;
7727 if(*p != 1) {
7728 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7729 return -1;
7731 /* sps and pps in the avcC always have length coded with 2 bytes,
7732 so put a fake nal_length_size = 2 while parsing them */
7733 h->nal_length_size = 2;
7734 // Decode sps from avcC
7735 cnt = *(p+5) & 0x1f; // Number of sps
7736 p += 6;
7737 for (i = 0; i < cnt; i++) {
7738 nalsize = AV_RB16(p) + 2;
7739 if(decode_nal_units(h, p, nalsize) < 0) {
7740 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7741 return -1;
7743 p += nalsize;
7745 // Decode pps from avcC
7746 cnt = *(p++); // Number of pps
7747 for (i = 0; i < cnt; i++) {
7748 nalsize = AV_RB16(p) + 2;
7749 if(decode_nal_units(h, p, nalsize) != nalsize) {
7750 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7751 return -1;
7753 p += nalsize;
7755 // Now store right nal length size, that will be use to parse all other nals
7756 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7757 // Do not reparse avcC
7758 h->got_avcC = 1;
7761 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7762 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7763 return -1;
7764 h->got_avcC = 1;
7767 buf_index=decode_nal_units(h, buf, buf_size);
7768 if(buf_index < 0)
7769 return -1;
7771 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7772 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7773 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7774 return -1;
7777 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7778 Picture *out = s->current_picture_ptr;
7779 Picture *cur = s->current_picture_ptr;
7780 int i, pics, cross_idr, out_of_order, out_idx;
7782 field_end(h);
7784 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7785 /* Wait for second field. */
7786 *data_size = 0;
7788 } else {
7789 cur->repeat_pict = 0;
7791 /* Signal interlacing information externally. */
7792 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7793 if (h->sei_ct_type)
7794 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
7795 else
7796 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7798 if(h->sps.pic_struct_present_flag){
7799 switch (h->sei_pic_struct)
7801 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7802 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7803 // Signal the possibility of telecined film externally (pic_struct 5,6)
7804 // From these hints, let the applications decide if they apply deinterlacing.
7805 cur->repeat_pict = 1;
7806 break;
7807 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7808 // Force progressive here, as doubling interlaced frame is a bad idea.
7809 cur->interlaced_frame = 0;
7810 cur->repeat_pict = 2;
7811 break;
7812 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7813 cur->interlaced_frame = 0;
7814 cur->repeat_pict = 4;
7815 break;
7817 }else{
7818 /* Derive interlacing flag from used decoding process. */
7819 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7822 if (cur->field_poc[0] != cur->field_poc[1]){
7823 /* Derive top_field_first from field pocs. */
7824 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7825 }else{
7826 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7827 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7828 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7829 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7830 cur->top_field_first = 1;
7831 else
7832 cur->top_field_first = 0;
7833 }else{
7834 /* Most likely progressive */
7835 cur->top_field_first = 0;
7839 //FIXME do something with unavailable reference frames
7841 /* Sort B-frames into display order */
7843 if(h->sps.bitstream_restriction_flag
7844 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7845 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7846 s->low_delay = 0;
7849 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7850 && !h->sps.bitstream_restriction_flag){
7851 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7852 s->low_delay= 0;
7855 pics = 0;
7856 while(h->delayed_pic[pics]) pics++;
7858 assert(pics <= MAX_DELAYED_PIC_COUNT);
7860 h->delayed_pic[pics++] = cur;
7861 if(cur->reference == 0)
7862 cur->reference = DELAYED_PIC_REF;
7864 out = h->delayed_pic[0];
7865 out_idx = 0;
7866 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7867 if(h->delayed_pic[i]->poc < out->poc){
7868 out = h->delayed_pic[i];
7869 out_idx = i;
7871 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7873 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7875 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7877 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7878 || (s->low_delay &&
7879 ((!cross_idr && out->poc > h->outputed_poc + 2)
7880 || cur->pict_type == FF_B_TYPE)))
7882 s->low_delay = 0;
7883 s->avctx->has_b_frames++;
7886 if(out_of_order || pics > s->avctx->has_b_frames){
7887 out->reference &= ~DELAYED_PIC_REF;
7888 for(i=out_idx; h->delayed_pic[i]; i++)
7889 h->delayed_pic[i] = h->delayed_pic[i+1];
7891 if(!out_of_order && pics > s->avctx->has_b_frames){
7892 *data_size = sizeof(AVFrame);
7894 h->outputed_poc = out->poc;
7895 *pict= *(AVFrame*)out;
7896 }else{
7897 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7902 assert(pict->data[0] || !*data_size);
7903 ff_print_debug_info(s, pict);
7904 //printf("out %d\n", (int)pict->data[0]);
7905 #if 0 //?
7907 /* Return the Picture timestamp as the frame number */
7908 /* we subtract 1 because it is added on utils.c */
7909 avctx->frame_number = s->picture_number - 1;
7910 #endif
7911 return get_consumed_bytes(s, buf_index, buf_size);
7913 #if 0
7914 static inline void fill_mb_avail(H264Context *h){
7915 MpegEncContext * const s = &h->s;
7916 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7918 if(s->mb_y){
7919 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7920 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7921 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7922 }else{
7923 h->mb_avail[0]=
7924 h->mb_avail[1]=
7925 h->mb_avail[2]= 0;
7927 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7928 h->mb_avail[4]= 1; //FIXME move out
7929 h->mb_avail[5]= 0; //FIXME move out
7931 #endif
7933 #ifdef TEST
7934 #undef printf
7935 #undef random
7936 #define COUNT 8000
7937 #define SIZE (COUNT*40)
7938 int main(void){
7939 int i;
7940 uint8_t temp[SIZE];
7941 PutBitContext pb;
7942 GetBitContext gb;
7943 // int int_temp[10000];
7944 DSPContext dsp;
7945 AVCodecContext avctx;
7947 dsputil_init(&dsp, &avctx);
7949 init_put_bits(&pb, temp, SIZE);
7950 printf("testing unsigned exp golomb\n");
7951 for(i=0; i<COUNT; i++){
7952 START_TIMER
7953 set_ue_golomb(&pb, i);
7954 STOP_TIMER("set_ue_golomb");
7956 flush_put_bits(&pb);
7958 init_get_bits(&gb, temp, 8*SIZE);
7959 for(i=0; i<COUNT; i++){
7960 int j, s;
7962 s= show_bits(&gb, 24);
7964 START_TIMER
7965 j= get_ue_golomb(&gb);
7966 if(j != i){
7967 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7968 // return -1;
7970 STOP_TIMER("get_ue_golomb");
7974 init_put_bits(&pb, temp, SIZE);
7975 printf("testing signed exp golomb\n");
7976 for(i=0; i<COUNT; i++){
7977 START_TIMER
7978 set_se_golomb(&pb, i - COUNT/2);
7979 STOP_TIMER("set_se_golomb");
7981 flush_put_bits(&pb);
7983 init_get_bits(&gb, temp, 8*SIZE);
7984 for(i=0; i<COUNT; i++){
7985 int j, s;
7987 s= show_bits(&gb, 24);
7989 START_TIMER
7990 j= get_se_golomb(&gb);
7991 if(j != i - COUNT/2){
7992 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7993 // return -1;
7995 STOP_TIMER("get_se_golomb");
7998 #if 0
7999 printf("testing 4x4 (I)DCT\n");
8001 DCTELEM block[16];
8002 uint8_t src[16], ref[16];
8003 uint64_t error= 0, max_error=0;
8005 for(i=0; i<COUNT; i++){
8006 int j;
8007 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8008 for(j=0; j<16; j++){
8009 ref[j]= random()%255;
8010 src[j]= random()%255;
8013 h264_diff_dct_c(block, src, ref, 4);
8015 //normalize
8016 for(j=0; j<16; j++){
8017 // printf("%d ", block[j]);
8018 block[j]= block[j]*4;
8019 if(j&1) block[j]= (block[j]*4 + 2)/5;
8020 if(j&4) block[j]= (block[j]*4 + 2)/5;
8022 // printf("\n");
8024 s->dsp.h264_idct_add(ref, block, 4);
8025 /* for(j=0; j<16; j++){
8026 printf("%d ", ref[j]);
8028 printf("\n");*/
8030 for(j=0; j<16; j++){
8031 int diff= FFABS(src[j] - ref[j]);
8033 error+= diff*diff;
8034 max_error= FFMAX(max_error, diff);
8037 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8038 printf("testing quantizer\n");
8039 for(qp=0; qp<52; qp++){
8040 for(i=0; i<16; i++)
8041 src1_block[i]= src2_block[i]= random()%255;
8044 printf("Testing NAL layer\n");
8046 uint8_t bitstream[COUNT];
8047 uint8_t nal[COUNT*2];
8048 H264Context h;
8049 memset(&h, 0, sizeof(H264Context));
8051 for(i=0; i<COUNT; i++){
8052 int zeros= i;
8053 int nal_length;
8054 int consumed;
8055 int out_length;
8056 uint8_t *out;
8057 int j;
8059 for(j=0; j<COUNT; j++){
8060 bitstream[j]= (random() % 255) + 1;
8063 for(j=0; j<zeros; j++){
8064 int pos= random() % COUNT;
8065 while(bitstream[pos] == 0){
8066 pos++;
8067 pos %= COUNT;
8069 bitstream[pos]=0;
8072 START_TIMER
8074 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8075 if(nal_length<0){
8076 printf("encoding failed\n");
8077 return -1;
8080 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
8082 STOP_TIMER("NAL")
8084 if(out_length != COUNT){
8085 printf("incorrect length %d %d\n", out_length, COUNT);
8086 return -1;
8089 if(consumed != nal_length){
8090 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8091 return -1;
8094 if(memcmp(bitstream, out, COUNT)){
8095 printf("mismatch\n");
8096 return -1;
8099 #endif
8101 printf("Testing RBSP\n");
8104 return 0;
8106 #endif /* TEST */
8109 av_cold void ff_h264_free_context(H264Context *h)
8111 int i;
8113 av_freep(&h->rbsp_buffer[0]);
8114 av_freep(&h->rbsp_buffer[1]);
8115 free_tables(h); //FIXME cleanup init stuff perhaps
8117 for(i = 0; i < MAX_SPS_COUNT; i++)
8118 av_freep(h->sps_buffers + i);
8120 for(i = 0; i < MAX_PPS_COUNT; i++)
8121 av_freep(h->pps_buffers + i);
8124 static av_cold int decode_end(AVCodecContext *avctx)
8126 H264Context *h = avctx->priv_data;
8127 MpegEncContext *s = &h->s;
8129 ff_h264_free_context(h);
8131 MPV_common_end(s);
8133 // memset(h, 0, sizeof(H264Context));
8135 return 0;
8139 AVCodec h264_decoder = {
8140 "h264",
8141 CODEC_TYPE_VIDEO,
8142 CODEC_ID_H264,
8143 sizeof(H264Context),
8144 decode_init,
8145 NULL,
8146 decode_end,
8147 decode_frame,
8148 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8149 .flush= flush_dpb,
8150 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8151 .pix_fmts= ff_hwaccel_pixfmt_list_420,
8154 #if CONFIG_H264_VDPAU_DECODER
8155 AVCodec h264_vdpau_decoder = {
8156 "h264_vdpau",
8157 CODEC_TYPE_VIDEO,
8158 CODEC_ID_H264,
8159 sizeof(H264Context),
8160 decode_init,
8161 NULL,
8162 decode_end,
8163 decode_frame,
8164 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8165 .flush= flush_dpb,
8166 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8168 #endif
8170 #if CONFIG_SVQ3_DECODER
8171 #include "svq3.c"
8172 #endif