Rename ROQDPCMContext_t to ROQDPCMContext to avoid _t reserved prefix.
[ffmpeg-lucabe.git] / libavcodec / h264.c
bloba85d5928076fad6192e5c50aa1299b7f240e7af7
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264.h"
32 #include "h264data.h"
33 #include "h264_parser.h"
34 #include "golomb.h"
35 #include "rectangle.h"
37 #include "cabac.h"
38 #ifdef ARCH_X86
39 #include "i386/h264_i386.h"
40 #endif
42 //#undef NDEBUG
43 #include <assert.h>
45 /**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
71 static VLC run7_vlc;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
84 #else
85 return (a&0xFFFF) + (b<<16);
86 #endif
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
98 {0,1,2,3,7,10,8,11},
99 {2,2,3,3,8,11,8,11},
100 {0,0,1,1,7,10,7,10},
101 {0,2,0,2,7,10,7,10}
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 int * left_block;
110 int topleft_partition= -1;
111 int i;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
117 return;
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
126 if(FRAME_MBAFF){
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
138 if (bottom
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
144 if (bottom
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
154 if (bottom
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
163 if (bottom) {
164 left_block = left_block_options[1];
165 } else {
166 left_block= left_block_options[2];
168 } else {
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
178 if(for_deblock){
179 topleft_type = 0;
180 topright_type = 0;
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
186 int list;
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 ref += h->b8_stride;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
201 }else{
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
230 }else{
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
239 }else{
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 }else{
259 int pred;
260 if(!(top_type & type_mask))
261 pred= -1;
262 else{
263 pred= 2;
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
270 for(i=0; i<2; i++){
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 }else{
275 int pred;
276 if(!(left_type[i] & type_mask))
277 pred= -1;
278 else{
279 pred= 2;
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
291 0 . T T. T T T T
292 1 L . .L . . . .
293 2 L . .L . . . .
294 3 . T TL . . . .
295 4 L . .L . . . .
296 5 L . .. . . . .
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
299 if(top_type){
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
311 }else{
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
326 if(left_type[i]){
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
331 }else{
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 if( h->pps.cabac ) {
340 // top_cbp
341 if(top_type) {
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
344 h->top_cbp = 0x1C0;
345 } else {
346 h->top_cbp = 0;
348 // left_cbp
349 if (left_type[0]) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
352 h->left_cbp = 0x1C0;
353 } else {
354 h->left_cbp = 0;
356 if (left_type[0]) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
359 if (left_type[1]) {
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
364 #if 1
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
366 int list;
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
374 continue;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
389 }else{
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
397 for(i=0; i<2; i++){
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
406 }else{
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
415 continue;
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
422 }else{
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
432 }else{
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
438 continue;
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
451 if( h->pps.cabac ) {
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
459 }else{
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
469 }else{
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
477 }else{
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
496 }else{
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
504 else
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
511 else
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
516 if(FRAME_MBAFF){
517 #define MAP_MVS\
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
528 if(MB_FIELD){
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
535 MAP_MVS
536 #undef MAP_F2F
537 }else{
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
544 MAP_MVS
545 #undef MAP_F2F
550 #endif
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
574 int i;
576 if(!(h->top_samples_available&0x8000)){
577 for(i=0; i<4; i++){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
579 if(status<0){
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
581 return -1;
582 } else if(status){
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
590 for(i=0; i<4; i++){
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
593 if(status<0){
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
595 return -1;
596 } else if(status){
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
603 return 0;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
614 if(mode > 6U) {
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
616 return -1;
619 if(!(h->top_samples_available&0x8000)){
620 mode= top[ mode ];
621 if(mode<0){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
623 return -1;
627 if((h->left_samples_available&0x8080) != 0x8080){
628 mode= left[ mode ];
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
632 if(mode<0){
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
634 return -1;
638 return mode;
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
653 else return min;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
684 int i= left + top;
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
690 return i&31;
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
699 if(FRAME_MBAFF){
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 const int16_t *mv;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
705 if(!MB_FIELD
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 if(!MB_FIELD
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 if(MB_FIELD
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 && i >= scan8[0]+8){
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
736 #undef SET_DIAG_MV
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
741 return topright_ref;
742 }else{
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 const int16_t * C;
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
768 /* mv_cache
769 B . . A T T T T
770 U . . L . . , .
771 U . . L . . . .
772 U . . L . . , .
773 . . . L . . . .
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
783 if(left_ref==ref){
784 *mx= A[0];
785 *my= A[1];
786 }else if(top_ref==ref){
787 *mx= B[0];
788 *my= B[1];
789 }else{
790 *mx= C[0];
791 *my= C[1];
793 }else{
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
795 *mx= A[0];
796 *my= A[1];
797 }else{
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 if(n==0){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 if(top_ref == ref){
820 *mx= B[0];
821 *my= B[1];
822 return;
824 }else{
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
830 if(left_ref == ref){
831 *mx= A[0];
832 *my= A[1];
833 return;
837 //RARE
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 if(n==0){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
854 if(left_ref == ref){
855 *mx= A[0];
856 *my= A[1];
857 return;
859 }else{
860 const int16_t * C;
861 int diagonal_ref;
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
868 *mx= C[0];
869 *my= C[1];
870 return;
874 //RARE
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
888 *mx = *my = 0;
889 return;
892 pred_motion(h, 0, 4, 0, 0, mx, my);
894 return;
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
901 return 256;
902 }else{
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
913 int i, field;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
941 if (!interl)
942 poc |= 3;
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
950 if(rfield == field)
951 map[list][old_ref] = cur_ref;
952 break;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
963 int list, j, field;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
981 return;
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
995 int mb_type_col[2];
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1000 int i8, i4;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1010 b8_stride = 0;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1015 goto single_col;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1021 b8_stride *= 3;
1022 b4_stride *= 6;
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1026 && !is_b8x8){
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1029 }else{
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1034 single_col:
1035 mb_type_col[0] =
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1045 }else{
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1056 if(!b8_stride){
1057 if(s->mb_y&1){
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1066 int ref[2];
1067 int mv[2][2];
1068 int list;
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1080 if(ref[list] < 0)
1081 ref[list] = -1;
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1088 }else{
1089 for(list=0; list<2; list++){
1090 if(ref[list] >= 0)
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1092 else
1093 mv[list][0] = mv[list][1] = 0;
1097 if(ref[1] < 0){
1098 if(!is_b8x8)
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1102 if(!is_b8x8)
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1109 int x8 = i8&1;
1110 int y8 = i8>>1;
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1113 int a=0, b=0;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1116 continue;
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1124 if(ref[0] > 0)
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 if(ref[1] > 0)
1127 b= pack16to32(mv[1][0],mv[1][1]);
1128 }else{
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1136 int a=0, b=0;
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1144 if(ref[0] > 0)
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 if(ref[1] > 0)
1147 b= pack16to32(mv[1][0],mv[1][1]);
1148 }else{
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1154 }else{
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1160 continue;
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1168 /* col_zero_flag */
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1176 if(ref[0] == 0)
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1178 if(ref[1] == 0)
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1181 }else
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1185 if(ref[0] == 0)
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1187 if(ref[1] == 0)
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1197 int ref_offset= 0;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1205 ref_offset += 16;
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1214 int ref0, scale;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1218 continue;
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1226 continue;
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1230 if(ref0 >= 0)
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1232 else{
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1234 l1mv= l1mv1;
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1248 return;
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1254 int ref, mv0, mv1;
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1258 ref=mv0=mv1=0;
1259 }else{
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1264 int mv_l0[2];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1267 ref= ref0;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1274 }else{
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1278 int ref0, scale;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1282 continue;
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1289 continue;
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1293 if(ref0 >= 0)
1294 ref0 = map_col_to_list0[0][ref0];
1295 else{
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1297 l1mv= l1mv1;
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1308 }else
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1326 int list;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1332 int y;
1333 if(!USES_LIST(mb_type, list))
1334 continue;
1336 for(y=0; y<4; y++){
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1343 else
1344 for(y=0; y<4; y++){
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1377 int i, si, di;
1378 uint8_t *dst;
1379 int bufidx;
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1385 src++; length--;
1386 #if 0
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1389 #endif
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1394 if(src[i+2]!=3){
1395 /* startcode, so we must be past the end */
1396 length=i;
1398 break;
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1405 return src;
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1412 if (dst == NULL){
1413 return NULL;
1416 //printf("decoding esc\n");
1417 si=di=0;
1418 while(si<length){
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1422 dst[di++]= 0;
1423 dst[di++]= 0;
1424 si+=3;
1425 continue;
1426 }else //next start code
1427 break;
1430 dst[di++]= src[si++];
1433 *dst_length= di;
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1436 return dst;
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1444 int v= *src;
1445 int r;
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1449 for(r=1; r<9; r++){
1450 if(v&1) return r;
1451 v>>=1;
1453 return 0;
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1461 #define stride 16
1462 int i;
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1468 //return;
1469 for(i=0; i<4; i++){
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1476 temp[4*i+0]= z0+z3;
1477 temp[4*i+1]= z1+z2;
1478 temp[4*i+2]= z1-z2;
1479 temp[4*i+3]= z0-z3;
1482 for(i=0; i<4; i++){
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1496 #if 0
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1503 int i;
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1508 for(i=0; i<4; i++){
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1515 temp[4*i+0]= z0+z3;
1516 temp[4*i+1]= z1+z2;
1517 temp[4*i+2]= z1-z2;
1518 temp[4*i+3]= z0-z3;
1521 for(i=0; i<4; i++){
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1534 #endif
1536 #undef xStride
1537 #undef stride
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1542 int a,b,c,d,e;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1549 e= a-b;
1550 a= a+b;
1551 b= c-d;
1552 c= c+d;
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1560 #if 0
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1564 int a,b,c,d,e;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1571 e= a-b;
1572 a= a+b;
1573 b= c-d;
1574 c= c+d;
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1581 #endif
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1591 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1592 int src_x_offset, int src_y_offset,
1593 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1594 MpegEncContext * const s = &h->s;
1595 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1596 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1597 const int luma_xy= (mx&3) + ((my&3)<<2);
1598 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1599 uint8_t * src_cb, * src_cr;
1600 int extra_width= h->emu_edge_width;
1601 int extra_height= h->emu_edge_height;
1602 int emu=0;
1603 const int full_mx= mx>>2;
1604 const int full_my= my>>2;
1605 const int pic_width = 16*s->mb_width;
1606 const int pic_height = 16*s->mb_height >> MB_FIELD;
1608 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1609 return;
1611 if(mx&7) extra_width -= 3;
1612 if(my&7) extra_height -= 3;
1614 if( full_mx < 0-extra_width
1615 || full_my < 0-extra_height
1616 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1617 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1618 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1619 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1620 emu=1;
1623 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1624 if(!square){
1625 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1628 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1630 if(MB_FIELD){
1631 // chroma offset when predicting from a field of opposite parity
1632 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1633 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1635 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1636 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1638 if(emu){
1639 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1640 src_cb= s->edge_emu_buffer;
1642 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1644 if(emu){
1645 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1646 src_cr= s->edge_emu_buffer;
1648 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1651 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1652 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1653 int x_offset, int y_offset,
1654 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1655 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1656 int list0, int list1){
1657 MpegEncContext * const s = &h->s;
1658 qpel_mc_func *qpix_op= qpix_put;
1659 h264_chroma_mc_func chroma_op= chroma_put;
1661 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1662 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1663 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1664 x_offset += 8*s->mb_x;
1665 y_offset += 8*(s->mb_y >> MB_FIELD);
1667 if(list0){
1668 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1669 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1670 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1671 qpix_op, chroma_op);
1673 qpix_op= qpix_avg;
1674 chroma_op= chroma_avg;
1677 if(list1){
1678 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1679 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1680 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1681 qpix_op, chroma_op);
1685 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1686 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1687 int x_offset, int y_offset,
1688 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1689 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1690 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1691 int list0, int list1){
1692 MpegEncContext * const s = &h->s;
1694 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1695 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1696 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1697 x_offset += 8*s->mb_x;
1698 y_offset += 8*(s->mb_y >> MB_FIELD);
1700 if(list0 && list1){
1701 /* don't optimize for luma-only case, since B-frames usually
1702 * use implicit weights => chroma too. */
1703 uint8_t *tmp_cb = s->obmc_scratchpad;
1704 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1705 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1706 int refn0 = h->ref_cache[0][ scan8[n] ];
1707 int refn1 = h->ref_cache[1][ scan8[n] ];
1709 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1710 dest_y, dest_cb, dest_cr,
1711 x_offset, y_offset, qpix_put, chroma_put);
1712 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1713 tmp_y, tmp_cb, tmp_cr,
1714 x_offset, y_offset, qpix_put, chroma_put);
1716 if(h->use_weight == 2){
1717 int weight0 = h->implicit_weight[refn0][refn1];
1718 int weight1 = 64 - weight0;
1719 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1720 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1721 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1722 }else{
1723 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1724 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1725 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1726 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1727 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1728 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1729 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1730 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1731 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1733 }else{
1734 int list = list1 ? 1 : 0;
1735 int refn = h->ref_cache[list][ scan8[n] ];
1736 Picture *ref= &h->ref_list[list][refn];
1737 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1738 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1739 qpix_put, chroma_put);
1741 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1742 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1743 if(h->use_weight_chroma){
1744 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1745 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1746 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1747 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1752 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1753 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1754 int x_offset, int y_offset,
1755 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1756 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1757 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1758 int list0, int list1){
1759 if((h->use_weight==2 && list0 && list1
1760 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1761 || h->use_weight==1)
1762 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1763 x_offset, y_offset, qpix_put, chroma_put,
1764 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1765 else
1766 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1767 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1770 static inline void prefetch_motion(H264Context *h, int list){
1771 /* fetch pixels for estimated mv 4 macroblocks ahead
1772 * optimized for 64byte cache lines */
1773 MpegEncContext * const s = &h->s;
1774 const int refn = h->ref_cache[list][scan8[0]];
1775 if(refn >= 0){
1776 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1777 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1778 uint8_t **src= h->ref_list[list][refn].data;
1779 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1780 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1781 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1782 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1786 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1787 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1788 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1789 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1790 MpegEncContext * const s = &h->s;
1791 const int mb_xy= h->mb_xy;
1792 const int mb_type= s->current_picture.mb_type[mb_xy];
1794 assert(IS_INTER(mb_type));
1796 prefetch_motion(h, 0);
1798 if(IS_16X16(mb_type)){
1799 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1800 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1801 &weight_op[0], &weight_avg[0],
1802 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1803 }else if(IS_16X8(mb_type)){
1804 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1805 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1806 &weight_op[1], &weight_avg[1],
1807 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1808 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1809 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1810 &weight_op[1], &weight_avg[1],
1811 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1812 }else if(IS_8X16(mb_type)){
1813 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1814 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1815 &weight_op[2], &weight_avg[2],
1816 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1817 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1818 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1819 &weight_op[2], &weight_avg[2],
1820 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1821 }else{
1822 int i;
1824 assert(IS_8X8(mb_type));
1826 for(i=0; i<4; i++){
1827 const int sub_mb_type= h->sub_mb_type[i];
1828 const int n= 4*i;
1829 int x_offset= (i&1)<<2;
1830 int y_offset= (i&2)<<1;
1832 if(IS_SUB_8X8(sub_mb_type)){
1833 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1834 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1835 &weight_op[3], &weight_avg[3],
1836 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1837 }else if(IS_SUB_8X4(sub_mb_type)){
1838 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1840 &weight_op[4], &weight_avg[4],
1841 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1842 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1843 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1844 &weight_op[4], &weight_avg[4],
1845 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846 }else if(IS_SUB_4X8(sub_mb_type)){
1847 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1848 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1849 &weight_op[5], &weight_avg[5],
1850 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1851 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1852 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1853 &weight_op[5], &weight_avg[5],
1854 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1855 }else{
1856 int j;
1857 assert(IS_SUB_4X4(sub_mb_type));
1858 for(j=0; j<4; j++){
1859 int sub_x_offset= x_offset + 2*(j&1);
1860 int sub_y_offset= y_offset + (j&2);
1861 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1862 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1863 &weight_op[6], &weight_avg[6],
1864 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1870 prefetch_motion(h, 1);
1873 static av_cold void decode_init_vlc(void){
1874 static int done = 0;
1876 if (!done) {
1877 int i;
1878 int offset;
1879 done = 1;
1881 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1882 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1883 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1884 &chroma_dc_coeff_token_len [0], 1, 1,
1885 &chroma_dc_coeff_token_bits[0], 1, 1,
1886 INIT_VLC_USE_NEW_STATIC);
1888 offset = 0;
1889 for(i=0; i<4; i++){
1890 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1891 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1892 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1893 &coeff_token_len [i][0], 1, 1,
1894 &coeff_token_bits[i][0], 1, 1,
1895 INIT_VLC_USE_NEW_STATIC);
1896 offset += coeff_token_vlc_tables_size[i];
1899 * This is a one time safety check to make sure that
1900 * the packed static coeff_token_vlc table sizes
1901 * were initialized correctly.
1903 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1905 for(i=0; i<3; i++){
1906 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1907 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1908 init_vlc(&chroma_dc_total_zeros_vlc[i],
1909 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1910 &chroma_dc_total_zeros_len [i][0], 1, 1,
1911 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1912 INIT_VLC_USE_NEW_STATIC);
1914 for(i=0; i<15; i++){
1915 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1916 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1917 init_vlc(&total_zeros_vlc[i],
1918 TOTAL_ZEROS_VLC_BITS, 16,
1919 &total_zeros_len [i][0], 1, 1,
1920 &total_zeros_bits[i][0], 1, 1,
1921 INIT_VLC_USE_NEW_STATIC);
1924 for(i=0; i<6; i++){
1925 run_vlc[i].table = run_vlc_tables[i];
1926 run_vlc[i].table_allocated = run_vlc_tables_size;
1927 init_vlc(&run_vlc[i],
1928 RUN_VLC_BITS, 7,
1929 &run_len [i][0], 1, 1,
1930 &run_bits[i][0], 1, 1,
1931 INIT_VLC_USE_NEW_STATIC);
1933 run7_vlc.table = run7_vlc_table,
1934 run7_vlc.table_allocated = run7_vlc_table_size;
1935 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1936 &run_len [6][0], 1, 1,
1937 &run_bits[6][0], 1, 1,
1938 INIT_VLC_USE_NEW_STATIC);
1942 static void free_tables(H264Context *h){
1943 int i;
1944 H264Context *hx;
1945 av_freep(&h->intra4x4_pred_mode);
1946 av_freep(&h->chroma_pred_mode_table);
1947 av_freep(&h->cbp_table);
1948 av_freep(&h->mvd_table[0]);
1949 av_freep(&h->mvd_table[1]);
1950 av_freep(&h->direct_table);
1951 av_freep(&h->non_zero_count);
1952 av_freep(&h->slice_table_base);
1953 h->slice_table= NULL;
1955 av_freep(&h->mb2b_xy);
1956 av_freep(&h->mb2b8_xy);
1958 for(i = 0; i < h->s.avctx->thread_count; i++) {
1959 hx = h->thread_context[i];
1960 if(!hx) continue;
1961 av_freep(&hx->top_borders[1]);
1962 av_freep(&hx->top_borders[0]);
1963 av_freep(&hx->s.obmc_scratchpad);
1967 static void init_dequant8_coeff_table(H264Context *h){
1968 int i,q,x;
1969 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1970 h->dequant8_coeff[0] = h->dequant8_buffer[0];
1971 h->dequant8_coeff[1] = h->dequant8_buffer[1];
1973 for(i=0; i<2; i++ ){
1974 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1975 h->dequant8_coeff[1] = h->dequant8_buffer[0];
1976 break;
1979 for(q=0; q<52; q++){
1980 int shift = div6[q];
1981 int idx = rem6[q];
1982 for(x=0; x<64; x++)
1983 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1984 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1985 h->pps.scaling_matrix8[i][x]) << shift;
1990 static void init_dequant4_coeff_table(H264Context *h){
1991 int i,j,q,x;
1992 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1993 for(i=0; i<6; i++ ){
1994 h->dequant4_coeff[i] = h->dequant4_buffer[i];
1995 for(j=0; j<i; j++){
1996 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1997 h->dequant4_coeff[i] = h->dequant4_buffer[j];
1998 break;
2001 if(j<i)
2002 continue;
2004 for(q=0; q<52; q++){
2005 int shift = div6[q] + 2;
2006 int idx = rem6[q];
2007 for(x=0; x<16; x++)
2008 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2009 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2010 h->pps.scaling_matrix4[i][x]) << shift;
2015 static void init_dequant_tables(H264Context *h){
2016 int i,x;
2017 init_dequant4_coeff_table(h);
2018 if(h->pps.transform_8x8_mode)
2019 init_dequant8_coeff_table(h);
2020 if(h->sps.transform_bypass){
2021 for(i=0; i<6; i++)
2022 for(x=0; x<16; x++)
2023 h->dequant4_coeff[i][0][x] = 1<<6;
2024 if(h->pps.transform_8x8_mode)
2025 for(i=0; i<2; i++)
2026 for(x=0; x<64; x++)
2027 h->dequant8_coeff[i][0][x] = 1<<6;
2033 * allocates tables.
2034 * needs width/height
2036 static int alloc_tables(H264Context *h){
2037 MpegEncContext * const s = &h->s;
2038 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2039 int x,y;
2041 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2043 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2044 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2045 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2047 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2048 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2049 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2050 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2052 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2053 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2055 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2056 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2057 for(y=0; y<s->mb_height; y++){
2058 for(x=0; x<s->mb_width; x++){
2059 const int mb_xy= x + y*s->mb_stride;
2060 const int b_xy = 4*x + 4*y*h->b_stride;
2061 const int b8_xy= 2*x + 2*y*h->b8_stride;
2063 h->mb2b_xy [mb_xy]= b_xy;
2064 h->mb2b8_xy[mb_xy]= b8_xy;
2068 s->obmc_scratchpad = NULL;
2070 if(!h->dequant4_coeff[0])
2071 init_dequant_tables(h);
2073 return 0;
2074 fail:
2075 free_tables(h);
2076 return -1;
2080 * Mimic alloc_tables(), but for every context thread.
2082 static void clone_tables(H264Context *dst, H264Context *src){
2083 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2084 dst->non_zero_count = src->non_zero_count;
2085 dst->slice_table = src->slice_table;
2086 dst->cbp_table = src->cbp_table;
2087 dst->mb2b_xy = src->mb2b_xy;
2088 dst->mb2b8_xy = src->mb2b8_xy;
2089 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2090 dst->mvd_table[0] = src->mvd_table[0];
2091 dst->mvd_table[1] = src->mvd_table[1];
2092 dst->direct_table = src->direct_table;
2094 dst->s.obmc_scratchpad = NULL;
2095 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2099 * Init context
2100 * Allocate buffers which are not shared amongst multiple threads.
2102 static int context_init(H264Context *h){
2103 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2106 return 0;
2107 fail:
2108 return -1; // free_tables will clean up for us
2111 static av_cold void common_init(H264Context *h){
2112 MpegEncContext * const s = &h->s;
2114 s->width = s->avctx->width;
2115 s->height = s->avctx->height;
2116 s->codec_id= s->avctx->codec->id;
2118 ff_h264_pred_init(&h->hpc, s->codec_id);
2120 h->dequant_coeff_pps= -1;
2121 s->unrestricted_mv=1;
2122 s->decode=1; //FIXME
2124 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2125 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2128 static av_cold int decode_init(AVCodecContext *avctx){
2129 H264Context *h= avctx->priv_data;
2130 MpegEncContext * const s = &h->s;
2132 MPV_decode_defaults(s);
2134 s->avctx = avctx;
2135 common_init(h);
2137 s->out_format = FMT_H264;
2138 s->workaround_bugs= avctx->workaround_bugs;
2140 // set defaults
2141 // s->decode_mb= ff_h263_decode_mb;
2142 s->quarter_sample = 1;
2143 s->low_delay= 1;
2145 if(avctx->codec_id == CODEC_ID_SVQ3)
2146 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2147 else
2148 avctx->pix_fmt= PIX_FMT_YUV420P;
2150 decode_init_vlc();
2152 if(avctx->extradata_size > 0 && avctx->extradata &&
2153 *(char *)avctx->extradata == 1){
2154 h->is_avc = 1;
2155 h->got_avcC = 0;
2156 } else {
2157 h->is_avc = 0;
2160 h->thread_context[0] = h;
2161 h->outputed_poc = INT_MIN;
2162 h->prev_poc_msb= 1<<16;
2163 return 0;
2166 static int frame_start(H264Context *h){
2167 MpegEncContext * const s = &h->s;
2168 int i;
2170 if(MPV_frame_start(s, s->avctx) < 0)
2171 return -1;
2172 ff_er_frame_start(s);
2174 * MPV_frame_start uses pict_type to derive key_frame.
2175 * This is incorrect for H.264; IDR markings must be used.
2176 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2177 * See decode_nal_units().
2179 s->current_picture_ptr->key_frame= 0;
2181 assert(s->linesize && s->uvlinesize);
2183 for(i=0; i<16; i++){
2184 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2185 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2187 for(i=0; i<4; i++){
2188 h->block_offset[16+i]=
2189 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2190 h->block_offset[24+16+i]=
2191 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2194 /* can't be in alloc_tables because linesize isn't known there.
2195 * FIXME: redo bipred weight to not require extra buffer? */
2196 for(i = 0; i < s->avctx->thread_count; i++)
2197 if(!h->thread_context[i]->s.obmc_scratchpad)
2198 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2200 /* some macroblocks will be accessed before they're available */
2201 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2202 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2204 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2206 // We mark the current picture as non-reference after allocating it, so
2207 // that if we break out due to an error it can be released automatically
2208 // in the next MPV_frame_start().
2209 // SVQ3 as well as most other codecs have only last/next/current and thus
2210 // get released even with set reference, besides SVQ3 and others do not
2211 // mark frames as reference later "naturally".
2212 if(s->codec_id != CODEC_ID_SVQ3)
2213 s->current_picture_ptr->reference= 0;
2215 s->current_picture_ptr->field_poc[0]=
2216 s->current_picture_ptr->field_poc[1]= INT_MAX;
2217 assert(s->current_picture_ptr->long_ref==0);
2219 return 0;
2222 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2223 MpegEncContext * const s = &h->s;
2224 int i;
2225 int step = 1;
2226 int offset = 1;
2227 int uvoffset= 1;
2228 int top_idx = 1;
2229 int skiplast= 0;
2231 src_y -= linesize;
2232 src_cb -= uvlinesize;
2233 src_cr -= uvlinesize;
2235 if(!simple && FRAME_MBAFF){
2236 if(s->mb_y&1){
2237 offset = MB_MBAFF ? 1 : 17;
2238 uvoffset= MB_MBAFF ? 1 : 9;
2239 if(!MB_MBAFF){
2240 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2241 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2242 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2243 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2244 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2247 }else{
2248 if(!MB_MBAFF){
2249 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2250 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2251 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2252 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2254 skiplast= 1;
2256 offset =
2257 uvoffset=
2258 top_idx = MB_MBAFF ? 0 : 1;
2260 step= MB_MBAFF ? 2 : 1;
2263 // There are two lines saved, the line above the the top macroblock of a pair,
2264 // and the line above the bottom macroblock
2265 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2266 for(i=1; i<17 - skiplast; i++){
2267 h->left_border[offset+i*step]= src_y[15+i* linesize];
2270 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2271 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2273 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2274 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2275 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2276 for(i=1; i<9 - skiplast; i++){
2277 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2278 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2280 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2281 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2285 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2286 MpegEncContext * const s = &h->s;
2287 int temp8, i;
2288 uint64_t temp64;
2289 int deblock_left;
2290 int deblock_top;
2291 int mb_xy;
2292 int step = 1;
2293 int offset = 1;
2294 int uvoffset= 1;
2295 int top_idx = 1;
2297 if(!simple && FRAME_MBAFF){
2298 if(s->mb_y&1){
2299 offset = MB_MBAFF ? 1 : 17;
2300 uvoffset= MB_MBAFF ? 1 : 9;
2301 }else{
2302 offset =
2303 uvoffset=
2304 top_idx = MB_MBAFF ? 0 : 1;
2306 step= MB_MBAFF ? 2 : 1;
2309 if(h->deblocking_filter == 2) {
2310 mb_xy = h->mb_xy;
2311 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2312 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2313 } else {
2314 deblock_left = (s->mb_x > 0);
2315 deblock_top = (s->mb_y > !!MB_FIELD);
2318 src_y -= linesize + 1;
2319 src_cb -= uvlinesize + 1;
2320 src_cr -= uvlinesize + 1;
2322 #define XCHG(a,b,t,xchg)\
2323 t= a;\
2324 if(xchg)\
2325 a= b;\
2326 b= t;
2328 if(deblock_left){
2329 for(i = !deblock_top; i<16; i++){
2330 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2332 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2335 if(deblock_top){
2336 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2337 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2338 if(s->mb_x+1 < s->mb_width){
2339 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2343 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2344 if(deblock_left){
2345 for(i = !deblock_top; i<8; i++){
2346 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2347 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2349 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2350 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2352 if(deblock_top){
2353 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2354 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2359 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2360 MpegEncContext * const s = &h->s;
2361 const int mb_x= s->mb_x;
2362 const int mb_y= s->mb_y;
2363 const int mb_xy= h->mb_xy;
2364 const int mb_type= s->current_picture.mb_type[mb_xy];
2365 uint8_t *dest_y, *dest_cb, *dest_cr;
2366 int linesize, uvlinesize /*dct_offset*/;
2367 int i;
2368 int *block_offset = &h->block_offset[0];
2369 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2370 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2371 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2373 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2374 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2375 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2377 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2378 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2380 if (!simple && MB_FIELD) {
2381 linesize = h->mb_linesize = s->linesize * 2;
2382 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2383 block_offset = &h->block_offset[24];
2384 if(mb_y&1){ //FIXME move out of this function?
2385 dest_y -= s->linesize*15;
2386 dest_cb-= s->uvlinesize*7;
2387 dest_cr-= s->uvlinesize*7;
2389 if(FRAME_MBAFF) {
2390 int list;
2391 for(list=0; list<h->list_count; list++){
2392 if(!USES_LIST(mb_type, list))
2393 continue;
2394 if(IS_16X16(mb_type)){
2395 int8_t *ref = &h->ref_cache[list][scan8[0]];
2396 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2397 }else{
2398 for(i=0; i<16; i+=4){
2399 int ref = h->ref_cache[list][scan8[i]];
2400 if(ref >= 0)
2401 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2406 } else {
2407 linesize = h->mb_linesize = s->linesize;
2408 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2409 // dct_offset = s->linesize * 16;
2412 if(transform_bypass){
2413 idct_dc_add =
2414 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2415 }else if(IS_8x8DCT(mb_type)){
2416 idct_dc_add = s->dsp.h264_idct8_dc_add;
2417 idct_add = s->dsp.h264_idct8_add;
2418 }else{
2419 idct_dc_add = s->dsp.h264_idct_dc_add;
2420 idct_add = s->dsp.h264_idct_add;
2423 if (!simple && IS_INTRA_PCM(mb_type)) {
2424 for (i=0; i<16; i++) {
2425 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2427 for (i=0; i<8; i++) {
2428 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2429 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2431 } else {
2432 if(IS_INTRA(mb_type)){
2433 if(h->deblocking_filter)
2434 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2436 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2437 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2438 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2441 if(IS_INTRA4x4(mb_type)){
2442 if(simple || !s->encoding){
2443 if(IS_8x8DCT(mb_type)){
2444 for(i=0; i<16; i+=4){
2445 uint8_t * const ptr= dest_y + block_offset[i];
2446 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2447 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2448 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2449 (h->topright_samples_available<<i)&0x4000, linesize);
2450 if(nnz){
2451 if(nnz == 1 && h->mb[i*16])
2452 idct_dc_add(ptr, h->mb + i*16, linesize);
2453 else
2454 idct_add(ptr, h->mb + i*16, linesize);
2457 }else
2458 for(i=0; i<16; i++){
2459 uint8_t * const ptr= dest_y + block_offset[i];
2460 uint8_t *topright;
2461 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2462 int nnz, tr;
2464 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2465 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2466 assert(mb_y || linesize <= block_offset[i]);
2467 if(!topright_avail){
2468 tr= ptr[3 - linesize]*0x01010101;
2469 topright= (uint8_t*) &tr;
2470 }else
2471 topright= ptr + 4 - linesize;
2472 }else
2473 topright= NULL;
2475 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2476 nnz = h->non_zero_count_cache[ scan8[i] ];
2477 if(nnz){
2478 if(is_h264){
2479 if(nnz == 1 && h->mb[i*16])
2480 idct_dc_add(ptr, h->mb + i*16, linesize);
2481 else
2482 idct_add(ptr, h->mb + i*16, linesize);
2483 }else
2484 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2488 }else{
2489 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2490 if(is_h264){
2491 if(!transform_bypass)
2492 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2493 }else
2494 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2496 if(h->deblocking_filter)
2497 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2498 }else if(is_h264){
2499 hl_motion(h, dest_y, dest_cb, dest_cr,
2500 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2501 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2502 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2506 if(!IS_INTRA4x4(mb_type)){
2507 if(is_h264){
2508 if(IS_INTRA16x16(mb_type)){
2509 for(i=0; i<16; i++){
2510 if(h->non_zero_count_cache[ scan8[i] ])
2511 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2512 else if(h->mb[i*16])
2513 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2515 }else{
2516 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2517 for(i=0; i<16; i+=di){
2518 int nnz = h->non_zero_count_cache[ scan8[i] ];
2519 if(nnz){
2520 if(nnz==1 && h->mb[i*16])
2521 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2522 else
2523 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2527 }else{
2528 for(i=0; i<16; i++){
2529 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2530 uint8_t * const ptr= dest_y + block_offset[i];
2531 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2537 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2538 uint8_t *dest[2] = {dest_cb, dest_cr};
2539 if(transform_bypass){
2540 idct_add = idct_dc_add = s->dsp.add_pixels4;
2541 }else{
2542 idct_add = s->dsp.h264_idct_add;
2543 idct_dc_add = s->dsp.h264_idct_dc_add;
2544 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2545 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2547 if(is_h264){
2548 for(i=16; i<16+8; i++){
2549 if(h->non_zero_count_cache[ scan8[i] ])
2550 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2551 else if(h->mb[i*16])
2552 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2554 }else{
2555 for(i=16; i<16+8; i++){
2556 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2557 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2558 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2564 if(h->deblocking_filter) {
2565 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2566 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2567 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2568 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2569 if (!simple && FRAME_MBAFF) {
2570 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2571 } else {
2572 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2578 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2580 static void hl_decode_mb_simple(H264Context *h){
2581 hl_decode_mb_internal(h, 1);
2585 * Process a macroblock; this handles edge cases, such as interlacing.
2587 static void av_noinline hl_decode_mb_complex(H264Context *h){
2588 hl_decode_mb_internal(h, 0);
2591 static void hl_decode_mb(H264Context *h){
2592 MpegEncContext * const s = &h->s;
2593 const int mb_xy= h->mb_xy;
2594 const int mb_type= s->current_picture.mb_type[mb_xy];
2595 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2596 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2598 if(ENABLE_H264_ENCODER && !s->decode)
2599 return;
2601 if (is_complex)
2602 hl_decode_mb_complex(h);
2603 else hl_decode_mb_simple(h);
2606 static void pic_as_field(Picture *pic, const int parity){
2607 int i;
2608 for (i = 0; i < 4; ++i) {
2609 if (parity == PICT_BOTTOM_FIELD)
2610 pic->data[i] += pic->linesize[i];
2611 pic->reference = parity;
2612 pic->linesize[i] *= 2;
2614 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2617 static int split_field_copy(Picture *dest, Picture *src,
2618 int parity, int id_add){
2619 int match = !!(src->reference & parity);
2621 if (match) {
2622 *dest = *src;
2623 if(parity != PICT_FRAME){
2624 pic_as_field(dest, parity);
2625 dest->pic_id *= 2;
2626 dest->pic_id += id_add;
2630 return match;
2633 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2634 int i[2]={0};
2635 int index=0;
2637 while(i[0]<len || i[1]<len){
2638 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2639 i[0]++;
2640 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2641 i[1]++;
2642 if(i[0] < len){
2643 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2644 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2646 if(i[1] < len){
2647 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2648 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2652 return index;
2655 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2656 int i, best_poc;
2657 int out_i= 0;
2659 for(;;){
2660 best_poc= dir ? INT_MIN : INT_MAX;
2662 for(i=0; i<len; i++){
2663 const int poc= src[i]->poc;
2664 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2665 best_poc= poc;
2666 sorted[out_i]= src[i];
2669 if(best_poc == (dir ? INT_MIN : INT_MAX))
2670 break;
2671 limit= sorted[out_i++]->poc - dir;
2673 return out_i;
2677 * fills the default_ref_list.
2679 static int fill_default_ref_list(H264Context *h){
2680 MpegEncContext * const s = &h->s;
2681 int i, len;
2683 if(h->slice_type_nos==FF_B_TYPE){
2684 Picture *sorted[32];
2685 int cur_poc, list;
2686 int lens[2];
2688 if(FIELD_PICTURE)
2689 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2690 else
2691 cur_poc= s->current_picture_ptr->poc;
2693 for(list= 0; list<2; list++){
2694 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2695 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2696 assert(len<=32);
2697 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2698 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2699 assert(len<=32);
2701 if(len < h->ref_count[list])
2702 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2703 lens[list]= len;
2706 if(lens[0] == lens[1] && lens[1] > 1){
2707 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2708 if(i == lens[0])
2709 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2711 }else{
2712 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2713 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2714 assert(len <= 32);
2715 if(len < h->ref_count[0])
2716 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2718 #ifdef TRACE
2719 for (i=0; i<h->ref_count[0]; i++) {
2720 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2722 if(h->slice_type_nos==FF_B_TYPE){
2723 for (i=0; i<h->ref_count[1]; i++) {
2724 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2727 #endif
2728 return 0;
2731 static void print_short_term(H264Context *h);
2732 static void print_long_term(H264Context *h);
2735 * Extract structure information about the picture described by pic_num in
2736 * the current decoding context (frame or field). Note that pic_num is
2737 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2738 * @param pic_num picture number for which to extract structure information
2739 * @param structure one of PICT_XXX describing structure of picture
2740 * with pic_num
2741 * @return frame number (short term) or long term index of picture
2742 * described by pic_num
2744 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2745 MpegEncContext * const s = &h->s;
2747 *structure = s->picture_structure;
2748 if(FIELD_PICTURE){
2749 if (!(pic_num & 1))
2750 /* opposite field */
2751 *structure ^= PICT_FRAME;
2752 pic_num >>= 1;
2755 return pic_num;
2758 static int decode_ref_pic_list_reordering(H264Context *h){
2759 MpegEncContext * const s = &h->s;
2760 int list, index, pic_structure;
2762 print_short_term(h);
2763 print_long_term(h);
2765 for(list=0; list<h->list_count; list++){
2766 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2768 if(get_bits1(&s->gb)){
2769 int pred= h->curr_pic_num;
2771 for(index=0; ; index++){
2772 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2773 unsigned int pic_id;
2774 int i;
2775 Picture *ref = NULL;
2777 if(reordering_of_pic_nums_idc==3)
2778 break;
2780 if(index >= h->ref_count[list]){
2781 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2782 return -1;
2785 if(reordering_of_pic_nums_idc<3){
2786 if(reordering_of_pic_nums_idc<2){
2787 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2788 int frame_num;
2790 if(abs_diff_pic_num > h->max_pic_num){
2791 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2792 return -1;
2795 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2796 else pred+= abs_diff_pic_num;
2797 pred &= h->max_pic_num - 1;
2799 frame_num = pic_num_extract(h, pred, &pic_structure);
2801 for(i= h->short_ref_count-1; i>=0; i--){
2802 ref = h->short_ref[i];
2803 assert(ref->reference);
2804 assert(!ref->long_ref);
2806 ref->frame_num == frame_num &&
2807 (ref->reference & pic_structure)
2809 break;
2811 if(i>=0)
2812 ref->pic_id= pred;
2813 }else{
2814 int long_idx;
2815 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2817 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2819 if(long_idx>31){
2820 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2821 return -1;
2823 ref = h->long_ref[long_idx];
2824 assert(!(ref && !ref->reference));
2825 if(ref && (ref->reference & pic_structure)){
2826 ref->pic_id= pic_id;
2827 assert(ref->long_ref);
2828 i=0;
2829 }else{
2830 i=-1;
2834 if (i < 0) {
2835 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2836 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2837 } else {
2838 for(i=index; i+1<h->ref_count[list]; i++){
2839 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2840 break;
2842 for(; i > index; i--){
2843 h->ref_list[list][i]= h->ref_list[list][i-1];
2845 h->ref_list[list][index]= *ref;
2846 if (FIELD_PICTURE){
2847 pic_as_field(&h->ref_list[list][index], pic_structure);
2850 }else{
2851 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2852 return -1;
2857 for(list=0; list<h->list_count; list++){
2858 for(index= 0; index < h->ref_count[list]; index++){
2859 if(!h->ref_list[list][index].data[0]){
2860 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2861 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2866 return 0;
2869 static void fill_mbaff_ref_list(H264Context *h){
2870 int list, i, j;
2871 for(list=0; list<2; list++){ //FIXME try list_count
2872 for(i=0; i<h->ref_count[list]; i++){
2873 Picture *frame = &h->ref_list[list][i];
2874 Picture *field = &h->ref_list[list][16+2*i];
2875 field[0] = *frame;
2876 for(j=0; j<3; j++)
2877 field[0].linesize[j] <<= 1;
2878 field[0].reference = PICT_TOP_FIELD;
2879 field[0].poc= field[0].field_poc[0];
2880 field[1] = field[0];
2881 for(j=0; j<3; j++)
2882 field[1].data[j] += frame->linesize[j];
2883 field[1].reference = PICT_BOTTOM_FIELD;
2884 field[1].poc= field[1].field_poc[1];
2886 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2887 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2888 for(j=0; j<2; j++){
2889 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2890 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2894 for(j=0; j<h->ref_count[1]; j++){
2895 for(i=0; i<h->ref_count[0]; i++)
2896 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2897 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2898 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2902 static int pred_weight_table(H264Context *h){
2903 MpegEncContext * const s = &h->s;
2904 int list, i;
2905 int luma_def, chroma_def;
2907 h->use_weight= 0;
2908 h->use_weight_chroma= 0;
2909 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2910 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2911 luma_def = 1<<h->luma_log2_weight_denom;
2912 chroma_def = 1<<h->chroma_log2_weight_denom;
2914 for(list=0; list<2; list++){
2915 for(i=0; i<h->ref_count[list]; i++){
2916 int luma_weight_flag, chroma_weight_flag;
2918 luma_weight_flag= get_bits1(&s->gb);
2919 if(luma_weight_flag){
2920 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2921 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2922 if( h->luma_weight[list][i] != luma_def
2923 || h->luma_offset[list][i] != 0)
2924 h->use_weight= 1;
2925 }else{
2926 h->luma_weight[list][i]= luma_def;
2927 h->luma_offset[list][i]= 0;
2930 if(CHROMA){
2931 chroma_weight_flag= get_bits1(&s->gb);
2932 if(chroma_weight_flag){
2933 int j;
2934 for(j=0; j<2; j++){
2935 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2936 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2937 if( h->chroma_weight[list][i][j] != chroma_def
2938 || h->chroma_offset[list][i][j] != 0)
2939 h->use_weight_chroma= 1;
2941 }else{
2942 int j;
2943 for(j=0; j<2; j++){
2944 h->chroma_weight[list][i][j]= chroma_def;
2945 h->chroma_offset[list][i][j]= 0;
2950 if(h->slice_type_nos != FF_B_TYPE) break;
2952 h->use_weight= h->use_weight || h->use_weight_chroma;
2953 return 0;
2956 static void implicit_weight_table(H264Context *h){
2957 MpegEncContext * const s = &h->s;
2958 int ref0, ref1;
2959 int cur_poc = s->current_picture_ptr->poc;
2961 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
2962 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
2963 h->use_weight= 0;
2964 h->use_weight_chroma= 0;
2965 return;
2968 h->use_weight= 2;
2969 h->use_weight_chroma= 2;
2970 h->luma_log2_weight_denom= 5;
2971 h->chroma_log2_weight_denom= 5;
2973 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
2974 int poc0 = h->ref_list[0][ref0].poc;
2975 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
2976 int poc1 = h->ref_list[1][ref1].poc;
2977 int td = av_clip(poc1 - poc0, -128, 127);
2978 if(td){
2979 int tb = av_clip(cur_poc - poc0, -128, 127);
2980 int tx = (16384 + (FFABS(td) >> 1)) / td;
2981 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
2982 if(dist_scale_factor < -64 || dist_scale_factor > 128)
2983 h->implicit_weight[ref0][ref1] = 32;
2984 else
2985 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
2986 }else
2987 h->implicit_weight[ref0][ref1] = 32;
2993 * Mark a picture as no longer needed for reference. The refmask
2994 * argument allows unreferencing of individual fields or the whole frame.
2995 * If the picture becomes entirely unreferenced, but is being held for
2996 * display purposes, it is marked as such.
2997 * @param refmask mask of fields to unreference; the mask is bitwise
2998 * anded with the reference marking of pic
2999 * @return non-zero if pic becomes entirely unreferenced (except possibly
3000 * for display purposes) zero if one of the fields remains in
3001 * reference
3003 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3004 int i;
3005 if (pic->reference &= refmask) {
3006 return 0;
3007 } else {
3008 for(i = 0; h->delayed_pic[i]; i++)
3009 if(pic == h->delayed_pic[i]){
3010 pic->reference=DELAYED_PIC_REF;
3011 break;
3013 return 1;
3018 * instantaneous decoder refresh.
3020 static void idr(H264Context *h){
3021 int i;
3023 for(i=0; i<16; i++){
3024 remove_long(h, i, 0);
3026 assert(h->long_ref_count==0);
3028 for(i=0; i<h->short_ref_count; i++){
3029 unreference_pic(h, h->short_ref[i], 0);
3030 h->short_ref[i]= NULL;
3032 h->short_ref_count=0;
3033 h->prev_frame_num= 0;
3034 h->prev_frame_num_offset= 0;
3035 h->prev_poc_msb=
3036 h->prev_poc_lsb= 0;
3039 /* forget old pics after a seek */
3040 static void flush_dpb(AVCodecContext *avctx){
3041 H264Context *h= avctx->priv_data;
3042 int i;
3043 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3044 if(h->delayed_pic[i])
3045 h->delayed_pic[i]->reference= 0;
3046 h->delayed_pic[i]= NULL;
3048 h->outputed_poc= INT_MIN;
3049 idr(h);
3050 if(h->s.current_picture_ptr)
3051 h->s.current_picture_ptr->reference= 0;
3052 h->s.first_field= 0;
3053 ff_mpeg_flush(avctx);
3057 * Find a Picture in the short term reference list by frame number.
3058 * @param frame_num frame number to search for
3059 * @param idx the index into h->short_ref where returned picture is found
3060 * undefined if no picture found.
3061 * @return pointer to the found picture, or NULL if no pic with the provided
3062 * frame number is found
3064 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3065 MpegEncContext * const s = &h->s;
3066 int i;
3068 for(i=0; i<h->short_ref_count; i++){
3069 Picture *pic= h->short_ref[i];
3070 if(s->avctx->debug&FF_DEBUG_MMCO)
3071 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3072 if(pic->frame_num == frame_num) {
3073 *idx = i;
3074 return pic;
3077 return NULL;
3081 * Remove a picture from the short term reference list by its index in
3082 * that list. This does no checking on the provided index; it is assumed
3083 * to be valid. Other list entries are shifted down.
3084 * @param i index into h->short_ref of picture to remove.
3086 static void remove_short_at_index(H264Context *h, int i){
3087 assert(i >= 0 && i < h->short_ref_count);
3088 h->short_ref[i]= NULL;
3089 if (--h->short_ref_count)
3090 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3095 * @return the removed picture or NULL if an error occurs
3097 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3098 MpegEncContext * const s = &h->s;
3099 Picture *pic;
3100 int i;
3102 if(s->avctx->debug&FF_DEBUG_MMCO)
3103 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3105 pic = find_short(h, frame_num, &i);
3106 if (pic){
3107 if(unreference_pic(h, pic, ref_mask))
3108 remove_short_at_index(h, i);
3111 return pic;
3115 * Remove a picture from the long term reference list by its index in
3116 * that list.
3117 * @return the removed picture or NULL if an error occurs
3119 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3120 Picture *pic;
3122 pic= h->long_ref[i];
3123 if (pic){
3124 if(unreference_pic(h, pic, ref_mask)){
3125 assert(h->long_ref[i]->long_ref == 1);
3126 h->long_ref[i]->long_ref= 0;
3127 h->long_ref[i]= NULL;
3128 h->long_ref_count--;
3132 return pic;
3136 * print short term list
3138 static void print_short_term(H264Context *h) {
3139 uint32_t i;
3140 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3141 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3142 for(i=0; i<h->short_ref_count; i++){
3143 Picture *pic= h->short_ref[i];
3144 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3150 * print long term list
3152 static void print_long_term(H264Context *h) {
3153 uint32_t i;
3154 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3155 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3156 for(i = 0; i < 16; i++){
3157 Picture *pic= h->long_ref[i];
3158 if (pic) {
3159 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3166 * Executes the reference picture marking (memory management control operations).
3168 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3169 MpegEncContext * const s = &h->s;
3170 int i, j;
3171 int current_ref_assigned=0;
3172 Picture *pic;
3174 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3175 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3177 for(i=0; i<mmco_count; i++){
3178 int structure, frame_num;
3179 if(s->avctx->debug&FF_DEBUG_MMCO)
3180 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3182 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3183 || mmco[i].opcode == MMCO_SHORT2LONG){
3184 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3185 pic = find_short(h, frame_num, &j);
3186 if(!pic){
3187 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3188 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3189 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3190 continue;
3194 switch(mmco[i].opcode){
3195 case MMCO_SHORT2UNUSED:
3196 if(s->avctx->debug&FF_DEBUG_MMCO)
3197 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3198 remove_short(h, frame_num, structure ^ PICT_FRAME);
3199 break;
3200 case MMCO_SHORT2LONG:
3201 if (h->long_ref[mmco[i].long_arg] != pic)
3202 remove_long(h, mmco[i].long_arg, 0);
3204 remove_short_at_index(h, j);
3205 h->long_ref[ mmco[i].long_arg ]= pic;
3206 if (h->long_ref[ mmco[i].long_arg ]){
3207 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3208 h->long_ref_count++;
3210 break;
3211 case MMCO_LONG2UNUSED:
3212 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3213 pic = h->long_ref[j];
3214 if (pic) {
3215 remove_long(h, j, structure ^ PICT_FRAME);
3216 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3217 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3218 break;
3219 case MMCO_LONG:
3220 // Comment below left from previous code as it is an interresting note.
3221 /* First field in pair is in short term list or
3222 * at a different long term index.
3223 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3224 * Report the problem and keep the pair where it is,
3225 * and mark this field valid.
3228 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3229 remove_long(h, mmco[i].long_arg, 0);
3231 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3232 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3233 h->long_ref_count++;
3236 s->current_picture_ptr->reference |= s->picture_structure;
3237 current_ref_assigned=1;
3238 break;
3239 case MMCO_SET_MAX_LONG:
3240 assert(mmco[i].long_arg <= 16);
3241 // just remove the long term which index is greater than new max
3242 for(j = mmco[i].long_arg; j<16; j++){
3243 remove_long(h, j, 0);
3245 break;
3246 case MMCO_RESET:
3247 while(h->short_ref_count){
3248 remove_short(h, h->short_ref[0]->frame_num, 0);
3250 for(j = 0; j < 16; j++) {
3251 remove_long(h, j, 0);
3253 s->current_picture_ptr->poc=
3254 s->current_picture_ptr->field_poc[0]=
3255 s->current_picture_ptr->field_poc[1]=
3256 h->poc_lsb=
3257 h->poc_msb=
3258 h->frame_num=
3259 s->current_picture_ptr->frame_num= 0;
3260 break;
3261 default: assert(0);
3265 if (!current_ref_assigned) {
3266 /* Second field of complementary field pair; the first field of
3267 * which is already referenced. If short referenced, it
3268 * should be first entry in short_ref. If not, it must exist
3269 * in long_ref; trying to put it on the short list here is an
3270 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3272 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3273 /* Just mark the second field valid */
3274 s->current_picture_ptr->reference = PICT_FRAME;
3275 } else if (s->current_picture_ptr->long_ref) {
3276 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3277 "assignment for second field "
3278 "in complementary field pair "
3279 "(first field is long term)\n");
3280 } else {
3281 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3282 if(pic){
3283 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3286 if(h->short_ref_count)
3287 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3289 h->short_ref[0]= s->current_picture_ptr;
3290 h->short_ref_count++;
3291 s->current_picture_ptr->reference |= s->picture_structure;
3295 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3297 /* We have too many reference frames, probably due to corrupted
3298 * stream. Need to discard one frame. Prevents overrun of the
3299 * short_ref and long_ref buffers.
3301 av_log(h->s.avctx, AV_LOG_ERROR,
3302 "number of reference frames exceeds max (probably "
3303 "corrupt input), discarding one\n");
3305 if (h->long_ref_count && !h->short_ref_count) {
3306 for (i = 0; i < 16; ++i)
3307 if (h->long_ref[i])
3308 break;
3310 assert(i < 16);
3311 remove_long(h, i, 0);
3312 } else {
3313 pic = h->short_ref[h->short_ref_count - 1];
3314 remove_short(h, pic->frame_num, 0);
3318 print_short_term(h);
3319 print_long_term(h);
3320 return 0;
3323 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3324 MpegEncContext * const s = &h->s;
3325 int i;
3327 h->mmco_index= 0;
3328 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3329 s->broken_link= get_bits1(gb) -1;
3330 if(get_bits1(gb)){
3331 h->mmco[0].opcode= MMCO_LONG;
3332 h->mmco[0].long_arg= 0;
3333 h->mmco_index= 1;
3335 }else{
3336 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3337 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3338 MMCOOpcode opcode= get_ue_golomb(gb);
3340 h->mmco[i].opcode= opcode;
3341 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3342 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3343 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3344 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3345 return -1;
3348 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3349 unsigned int long_arg= get_ue_golomb(gb);
3350 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3351 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3352 return -1;
3354 h->mmco[i].long_arg= long_arg;
3357 if(opcode > (unsigned)MMCO_LONG){
3358 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3359 return -1;
3361 if(opcode == MMCO_END)
3362 break;
3364 h->mmco_index= i;
3365 }else{
3366 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3368 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3369 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3370 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3371 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3372 h->mmco_index= 1;
3373 if (FIELD_PICTURE) {
3374 h->mmco[0].short_pic_num *= 2;
3375 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3376 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3377 h->mmco_index= 2;
3383 return 0;
3386 static int init_poc(H264Context *h){
3387 MpegEncContext * const s = &h->s;
3388 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3389 int field_poc[2];
3390 Picture *cur = s->current_picture_ptr;
3392 h->frame_num_offset= h->prev_frame_num_offset;
3393 if(h->frame_num < h->prev_frame_num)
3394 h->frame_num_offset += max_frame_num;
3396 if(h->sps.poc_type==0){
3397 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3399 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3400 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3401 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3402 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3403 else
3404 h->poc_msb = h->prev_poc_msb;
3405 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3406 field_poc[0] =
3407 field_poc[1] = h->poc_msb + h->poc_lsb;
3408 if(s->picture_structure == PICT_FRAME)
3409 field_poc[1] += h->delta_poc_bottom;
3410 }else if(h->sps.poc_type==1){
3411 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3412 int i;
3414 if(h->sps.poc_cycle_length != 0)
3415 abs_frame_num = h->frame_num_offset + h->frame_num;
3416 else
3417 abs_frame_num = 0;
3419 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3420 abs_frame_num--;
3422 expected_delta_per_poc_cycle = 0;
3423 for(i=0; i < h->sps.poc_cycle_length; i++)
3424 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3426 if(abs_frame_num > 0){
3427 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3428 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3430 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3431 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3432 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3433 } else
3434 expectedpoc = 0;
3436 if(h->nal_ref_idc == 0)
3437 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3439 field_poc[0] = expectedpoc + h->delta_poc[0];
3440 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3442 if(s->picture_structure == PICT_FRAME)
3443 field_poc[1] += h->delta_poc[1];
3444 }else{
3445 int poc= 2*(h->frame_num_offset + h->frame_num);
3447 if(!h->nal_ref_idc)
3448 poc--;
3450 field_poc[0]= poc;
3451 field_poc[1]= poc;
3454 if(s->picture_structure != PICT_BOTTOM_FIELD)
3455 s->current_picture_ptr->field_poc[0]= field_poc[0];
3456 if(s->picture_structure != PICT_TOP_FIELD)
3457 s->current_picture_ptr->field_poc[1]= field_poc[1];
3458 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3460 return 0;
3465 * initialize scan tables
3467 static void init_scan_tables(H264Context *h){
3468 MpegEncContext * const s = &h->s;
3469 int i;
3470 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3471 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3472 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3473 }else{
3474 for(i=0; i<16; i++){
3475 #define T(x) (x>>2) | ((x<<2) & 0xF)
3476 h->zigzag_scan[i] = T(zigzag_scan[i]);
3477 h-> field_scan[i] = T( field_scan[i]);
3478 #undef T
3481 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3482 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3483 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3484 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3485 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3486 }else{
3487 for(i=0; i<64; i++){
3488 #define T(x) (x>>3) | ((x&7)<<3)
3489 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3490 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3491 h->field_scan8x8[i] = T(field_scan8x8[i]);
3492 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3493 #undef T
3496 if(h->sps.transform_bypass){ //FIXME same ugly
3497 h->zigzag_scan_q0 = zigzag_scan;
3498 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3499 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3500 h->field_scan_q0 = field_scan;
3501 h->field_scan8x8_q0 = field_scan8x8;
3502 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3503 }else{
3504 h->zigzag_scan_q0 = h->zigzag_scan;
3505 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3506 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3507 h->field_scan_q0 = h->field_scan;
3508 h->field_scan8x8_q0 = h->field_scan8x8;
3509 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3514 * Replicates H264 "master" context to thread contexts.
3516 static void clone_slice(H264Context *dst, H264Context *src)
3518 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3519 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3520 dst->s.current_picture = src->s.current_picture;
3521 dst->s.linesize = src->s.linesize;
3522 dst->s.uvlinesize = src->s.uvlinesize;
3523 dst->s.first_field = src->s.first_field;
3525 dst->prev_poc_msb = src->prev_poc_msb;
3526 dst->prev_poc_lsb = src->prev_poc_lsb;
3527 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3528 dst->prev_frame_num = src->prev_frame_num;
3529 dst->short_ref_count = src->short_ref_count;
3531 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3532 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3533 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3534 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3536 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3537 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3541 * decodes a slice header.
3542 * This will also call MPV_common_init() and frame_start() as needed.
3544 * @param h h264context
3545 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3547 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3549 static int decode_slice_header(H264Context *h, H264Context *h0){
3550 MpegEncContext * const s = &h->s;
3551 MpegEncContext * const s0 = &h0->s;
3552 unsigned int first_mb_in_slice;
3553 unsigned int pps_id;
3554 int num_ref_idx_active_override_flag;
3555 unsigned int slice_type, tmp, i, j;
3556 int default_ref_list_done = 0;
3557 int last_pic_structure;
3559 s->dropable= h->nal_ref_idc == 0;
3561 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3562 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3563 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3564 }else{
3565 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3566 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3569 first_mb_in_slice= get_ue_golomb(&s->gb);
3571 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3572 h0->current_slice = 0;
3573 if (!s0->first_field)
3574 s->current_picture_ptr= NULL;
3577 slice_type= get_ue_golomb(&s->gb);
3578 if(slice_type > 9){
3579 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3580 return -1;
3582 if(slice_type > 4){
3583 slice_type -= 5;
3584 h->slice_type_fixed=1;
3585 }else
3586 h->slice_type_fixed=0;
3588 slice_type= golomb_to_pict_type[ slice_type ];
3589 if (slice_type == FF_I_TYPE
3590 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3591 default_ref_list_done = 1;
3593 h->slice_type= slice_type;
3594 h->slice_type_nos= slice_type & 3;
3596 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3597 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3598 av_log(h->s.avctx, AV_LOG_ERROR,
3599 "B picture before any references, skipping\n");
3600 return -1;
3603 pps_id= get_ue_golomb(&s->gb);
3604 if(pps_id>=MAX_PPS_COUNT){
3605 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3606 return -1;
3608 if(!h0->pps_buffers[pps_id]) {
3609 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3610 return -1;
3612 h->pps= *h0->pps_buffers[pps_id];
3614 if(!h0->sps_buffers[h->pps.sps_id]) {
3615 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3616 return -1;
3618 h->sps = *h0->sps_buffers[h->pps.sps_id];
3620 if(h == h0 && h->dequant_coeff_pps != pps_id){
3621 h->dequant_coeff_pps = pps_id;
3622 init_dequant_tables(h);
3625 s->mb_width= h->sps.mb_width;
3626 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3628 h->b_stride= s->mb_width*4;
3629 h->b8_stride= s->mb_width*2;
3631 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3632 if(h->sps.frame_mbs_only_flag)
3633 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3634 else
3635 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3637 if (s->context_initialized
3638 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3639 if(h != h0)
3640 return -1; // width / height changed during parallelized decoding
3641 free_tables(h);
3642 flush_dpb(s->avctx);
3643 MPV_common_end(s);
3645 if (!s->context_initialized) {
3646 if(h != h0)
3647 return -1; // we cant (re-)initialize context during parallel decoding
3648 if (MPV_common_init(s) < 0)
3649 return -1;
3650 s->first_field = 0;
3652 init_scan_tables(h);
3653 alloc_tables(h);
3655 for(i = 1; i < s->avctx->thread_count; i++) {
3656 H264Context *c;
3657 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3658 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3659 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3660 c->sps = h->sps;
3661 c->pps = h->pps;
3662 init_scan_tables(c);
3663 clone_tables(c, h);
3666 for(i = 0; i < s->avctx->thread_count; i++)
3667 if(context_init(h->thread_context[i]) < 0)
3668 return -1;
3670 s->avctx->width = s->width;
3671 s->avctx->height = s->height;
3672 s->avctx->sample_aspect_ratio= h->sps.sar;
3673 if(!s->avctx->sample_aspect_ratio.den)
3674 s->avctx->sample_aspect_ratio.den = 1;
3676 if(h->sps.timing_info_present_flag){
3677 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3678 if(h->x264_build > 0 && h->x264_build < 44)
3679 s->avctx->time_base.den *= 2;
3680 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3681 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3685 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3687 h->mb_mbaff = 0;
3688 h->mb_aff_frame = 0;
3689 last_pic_structure = s0->picture_structure;
3690 if(h->sps.frame_mbs_only_flag){
3691 s->picture_structure= PICT_FRAME;
3692 }else{
3693 if(get_bits1(&s->gb)) { //field_pic_flag
3694 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3695 } else {
3696 s->picture_structure= PICT_FRAME;
3697 h->mb_aff_frame = h->sps.mb_aff;
3700 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3702 if(h0->current_slice == 0){
3703 while(h->frame_num != h->prev_frame_num &&
3704 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3705 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3706 frame_start(h);
3707 h->prev_frame_num++;
3708 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3709 s->current_picture_ptr->frame_num= h->prev_frame_num;
3710 execute_ref_pic_marking(h, NULL, 0);
3713 /* See if we have a decoded first field looking for a pair... */
3714 if (s0->first_field) {
3715 assert(s0->current_picture_ptr);
3716 assert(s0->current_picture_ptr->data[0]);
3717 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3719 /* figure out if we have a complementary field pair */
3720 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3722 * Previous field is unmatched. Don't display it, but let it
3723 * remain for reference if marked as such.
3725 s0->current_picture_ptr = NULL;
3726 s0->first_field = FIELD_PICTURE;
3728 } else {
3729 if (h->nal_ref_idc &&
3730 s0->current_picture_ptr->reference &&
3731 s0->current_picture_ptr->frame_num != h->frame_num) {
3733 * This and previous field were reference, but had
3734 * different frame_nums. Consider this field first in
3735 * pair. Throw away previous field except for reference
3736 * purposes.
3738 s0->first_field = 1;
3739 s0->current_picture_ptr = NULL;
3741 } else {
3742 /* Second field in complementary pair */
3743 s0->first_field = 0;
3747 } else {
3748 /* Frame or first field in a potentially complementary pair */
3749 assert(!s0->current_picture_ptr);
3750 s0->first_field = FIELD_PICTURE;
3753 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3754 s0->first_field = 0;
3755 return -1;
3758 if(h != h0)
3759 clone_slice(h, h0);
3761 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3763 assert(s->mb_num == s->mb_width * s->mb_height);
3764 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3765 first_mb_in_slice >= s->mb_num){
3766 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3767 return -1;
3769 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3770 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3771 if (s->picture_structure == PICT_BOTTOM_FIELD)
3772 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3773 assert(s->mb_y < s->mb_height);
3775 if(s->picture_structure==PICT_FRAME){
3776 h->curr_pic_num= h->frame_num;
3777 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3778 }else{
3779 h->curr_pic_num= 2*h->frame_num + 1;
3780 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3783 if(h->nal_unit_type == NAL_IDR_SLICE){
3784 get_ue_golomb(&s->gb); /* idr_pic_id */
3787 if(h->sps.poc_type==0){
3788 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3790 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3791 h->delta_poc_bottom= get_se_golomb(&s->gb);
3795 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3796 h->delta_poc[0]= get_se_golomb(&s->gb);
3798 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3799 h->delta_poc[1]= get_se_golomb(&s->gb);
3802 init_poc(h);
3804 if(h->pps.redundant_pic_cnt_present){
3805 h->redundant_pic_count= get_ue_golomb(&s->gb);
3808 //set defaults, might be overridden a few lines later
3809 h->ref_count[0]= h->pps.ref_count[0];
3810 h->ref_count[1]= h->pps.ref_count[1];
3812 if(h->slice_type_nos != FF_I_TYPE){
3813 if(h->slice_type_nos == FF_B_TYPE){
3814 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3816 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3818 if(num_ref_idx_active_override_flag){
3819 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3820 if(h->slice_type_nos==FF_B_TYPE)
3821 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3823 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3824 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3825 h->ref_count[0]= h->ref_count[1]= 1;
3826 return -1;
3829 if(h->slice_type_nos == FF_B_TYPE)
3830 h->list_count= 2;
3831 else
3832 h->list_count= 1;
3833 }else
3834 h->list_count= 0;
3836 if(!default_ref_list_done){
3837 fill_default_ref_list(h);
3840 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3841 return -1;
3843 if(h->slice_type_nos!=FF_I_TYPE){
3844 s->last_picture_ptr= &h->ref_list[0][0];
3845 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3847 if(h->slice_type_nos==FF_B_TYPE){
3848 s->next_picture_ptr= &h->ref_list[1][0];
3849 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3852 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3853 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3854 pred_weight_table(h);
3855 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3856 implicit_weight_table(h);
3857 else
3858 h->use_weight = 0;
3860 if(h->nal_ref_idc)
3861 decode_ref_pic_marking(h0, &s->gb);
3863 if(FRAME_MBAFF)
3864 fill_mbaff_ref_list(h);
3866 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3867 direct_dist_scale_factor(h);
3868 direct_ref_list_init(h);
3870 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3871 tmp = get_ue_golomb(&s->gb);
3872 if(tmp > 2){
3873 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3874 return -1;
3876 h->cabac_init_idc= tmp;
3879 h->last_qscale_diff = 0;
3880 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3881 if(tmp>51){
3882 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3883 return -1;
3885 s->qscale= tmp;
3886 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3887 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3888 //FIXME qscale / qp ... stuff
3889 if(h->slice_type == FF_SP_TYPE){
3890 get_bits1(&s->gb); /* sp_for_switch_flag */
3892 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3893 get_se_golomb(&s->gb); /* slice_qs_delta */
3896 h->deblocking_filter = 1;
3897 h->slice_alpha_c0_offset = 0;
3898 h->slice_beta_offset = 0;
3899 if( h->pps.deblocking_filter_parameters_present ) {
3900 tmp= get_ue_golomb(&s->gb);
3901 if(tmp > 2){
3902 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3903 return -1;
3905 h->deblocking_filter= tmp;
3906 if(h->deblocking_filter < 2)
3907 h->deblocking_filter^= 1; // 1<->0
3909 if( h->deblocking_filter ) {
3910 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3911 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3915 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3916 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3917 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3918 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3919 h->deblocking_filter= 0;
3921 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3922 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3923 /* Cheat slightly for speed:
3924 Do not bother to deblock across slices. */
3925 h->deblocking_filter = 2;
3926 } else {
3927 h0->max_contexts = 1;
3928 if(!h0->single_decode_warning) {
3929 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3930 h0->single_decode_warning = 1;
3932 if(h != h0)
3933 return 1; // deblocking switched inside frame
3937 #if 0 //FMO
3938 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3939 slice_group_change_cycle= get_bits(&s->gb, ?);
3940 #endif
3942 h0->last_slice_type = slice_type;
3943 h->slice_num = ++h0->current_slice;
3944 if(h->slice_num >= MAX_SLICES){
3945 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
3948 for(j=0; j<2; j++){
3949 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
3950 ref2frm[0]=
3951 ref2frm[1]= -1;
3952 for(i=0; i<16; i++)
3953 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3954 +(h->ref_list[j][i].reference&3);
3955 ref2frm[18+0]=
3956 ref2frm[18+1]= -1;
3957 for(i=16; i<48; i++)
3958 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
3959 +(h->ref_list[j][i].reference&3);
3962 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
3963 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
3965 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3966 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
3967 h->slice_num,
3968 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
3969 first_mb_in_slice,
3970 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
3971 pps_id, h->frame_num,
3972 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
3973 h->ref_count[0], h->ref_count[1],
3974 s->qscale,
3975 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
3976 h->use_weight,
3977 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
3978 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
3982 return 0;
3988 static inline int get_level_prefix(GetBitContext *gb){
3989 unsigned int buf;
3990 int log;
3992 OPEN_READER(re, gb);
3993 UPDATE_CACHE(re, gb);
3994 buf=GET_CACHE(re, gb);
3996 log= 32 - av_log2(buf);
3997 #ifdef TRACE
3998 print_bin(buf>>(32-log), log);
3999 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4000 #endif
4002 LAST_SKIP_BITS(re, gb, log);
4003 CLOSE_READER(re, gb);
4005 return log-1;
4008 static inline int get_dct8x8_allowed(H264Context *h){
4009 int i;
4010 for(i=0; i<4; i++){
4011 if(!IS_SUB_8X8(h->sub_mb_type[i])
4012 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4013 return 0;
4015 return 1;
4019 * decodes a residual block.
4020 * @param n block index
4021 * @param scantable scantable
4022 * @param max_coeff number of coefficients in the block
4023 * @return <0 if an error occurred
4025 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4026 MpegEncContext * const s = &h->s;
4027 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4028 int level[16];
4029 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4031 //FIXME put trailing_onex into the context
4033 if(n == CHROMA_DC_BLOCK_INDEX){
4034 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4035 total_coeff= coeff_token>>2;
4036 }else{
4037 if(n == LUMA_DC_BLOCK_INDEX){
4038 total_coeff= pred_non_zero_count(h, 0);
4039 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4040 total_coeff= coeff_token>>2;
4041 }else{
4042 total_coeff= pred_non_zero_count(h, n);
4043 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4044 total_coeff= coeff_token>>2;
4045 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4049 //FIXME set last_non_zero?
4051 if(total_coeff==0)
4052 return 0;
4053 if(total_coeff > (unsigned)max_coeff) {
4054 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4055 return -1;
4058 trailing_ones= coeff_token&3;
4059 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4060 assert(total_coeff<=16);
4062 for(i=0; i<trailing_ones; i++){
4063 level[i]= 1 - 2*get_bits1(gb);
4066 if(i<total_coeff) {
4067 int level_code, mask;
4068 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4069 int prefix= get_level_prefix(gb);
4071 //first coefficient has suffix_length equal to 0 or 1
4072 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4073 if(suffix_length)
4074 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4075 else
4076 level_code= (prefix<<suffix_length); //part
4077 }else if(prefix==14){
4078 if(suffix_length)
4079 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4080 else
4081 level_code= prefix + get_bits(gb, 4); //part
4082 }else{
4083 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4084 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4085 if(prefix>=16)
4086 level_code += (1<<(prefix-3))-4096;
4089 if(trailing_ones < 3) level_code += 2;
4091 suffix_length = 1;
4092 if(level_code > 5)
4093 suffix_length++;
4094 mask= -(level_code&1);
4095 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4096 i++;
4098 //remaining coefficients have suffix_length > 0
4099 for(;i<total_coeff;i++) {
4100 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4101 prefix = get_level_prefix(gb);
4102 if(prefix<15){
4103 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4104 }else{
4105 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4106 if(prefix>=16)
4107 level_code += (1<<(prefix-3))-4096;
4109 mask= -(level_code&1);
4110 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4111 if(level_code > suffix_limit[suffix_length])
4112 suffix_length++;
4116 if(total_coeff == max_coeff)
4117 zeros_left=0;
4118 else{
4119 if(n == CHROMA_DC_BLOCK_INDEX)
4120 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4121 else
4122 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4125 coeff_num = zeros_left + total_coeff - 1;
4126 j = scantable[coeff_num];
4127 if(n > 24){
4128 block[j] = level[0];
4129 for(i=1;i<total_coeff;i++) {
4130 if(zeros_left <= 0)
4131 run_before = 0;
4132 else if(zeros_left < 7){
4133 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4134 }else{
4135 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4137 zeros_left -= run_before;
4138 coeff_num -= 1 + run_before;
4139 j= scantable[ coeff_num ];
4141 block[j]= level[i];
4143 }else{
4144 block[j] = (level[0] * qmul[j] + 32)>>6;
4145 for(i=1;i<total_coeff;i++) {
4146 if(zeros_left <= 0)
4147 run_before = 0;
4148 else if(zeros_left < 7){
4149 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4150 }else{
4151 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4153 zeros_left -= run_before;
4154 coeff_num -= 1 + run_before;
4155 j= scantable[ coeff_num ];
4157 block[j]= (level[i] * qmul[j] + 32)>>6;
4161 if(zeros_left<0){
4162 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4163 return -1;
4166 return 0;
4169 static void predict_field_decoding_flag(H264Context *h){
4170 MpegEncContext * const s = &h->s;
4171 const int mb_xy= h->mb_xy;
4172 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4173 ? s->current_picture.mb_type[mb_xy-1]
4174 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4175 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4176 : 0;
4177 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4181 * decodes a P_SKIP or B_SKIP macroblock
4183 static void decode_mb_skip(H264Context *h){
4184 MpegEncContext * const s = &h->s;
4185 const int mb_xy= h->mb_xy;
4186 int mb_type=0;
4188 memset(h->non_zero_count[mb_xy], 0, 16);
4189 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4191 if(MB_FIELD)
4192 mb_type|= MB_TYPE_INTERLACED;
4194 if( h->slice_type_nos == FF_B_TYPE )
4196 // just for fill_caches. pred_direct_motion will set the real mb_type
4197 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4199 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4200 pred_direct_motion(h, &mb_type);
4201 mb_type|= MB_TYPE_SKIP;
4203 else
4205 int mx, my;
4206 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4208 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4209 pred_pskip_motion(h, &mx, &my);
4210 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4211 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4214 write_back_motion(h, mb_type);
4215 s->current_picture.mb_type[mb_xy]= mb_type;
4216 s->current_picture.qscale_table[mb_xy]= s->qscale;
4217 h->slice_table[ mb_xy ]= h->slice_num;
4218 h->prev_mb_skipped= 1;
4222 * decodes a macroblock
4223 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4225 static int decode_mb_cavlc(H264Context *h){
4226 MpegEncContext * const s = &h->s;
4227 int mb_xy;
4228 int partition_count;
4229 unsigned int mb_type, cbp;
4230 int dct8x8_allowed= h->pps.transform_8x8_mode;
4232 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4234 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4236 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4237 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4238 down the code */
4239 if(h->slice_type_nos != FF_I_TYPE){
4240 if(s->mb_skip_run==-1)
4241 s->mb_skip_run= get_ue_golomb(&s->gb);
4243 if (s->mb_skip_run--) {
4244 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4245 if(s->mb_skip_run==0)
4246 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4247 else
4248 predict_field_decoding_flag(h);
4250 decode_mb_skip(h);
4251 return 0;
4254 if(FRAME_MBAFF){
4255 if( (s->mb_y&1) == 0 )
4256 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4259 h->prev_mb_skipped= 0;
4261 mb_type= get_ue_golomb(&s->gb);
4262 if(h->slice_type_nos == FF_B_TYPE){
4263 if(mb_type < 23){
4264 partition_count= b_mb_type_info[mb_type].partition_count;
4265 mb_type= b_mb_type_info[mb_type].type;
4266 }else{
4267 mb_type -= 23;
4268 goto decode_intra_mb;
4270 }else if(h->slice_type_nos == FF_P_TYPE){
4271 if(mb_type < 5){
4272 partition_count= p_mb_type_info[mb_type].partition_count;
4273 mb_type= p_mb_type_info[mb_type].type;
4274 }else{
4275 mb_type -= 5;
4276 goto decode_intra_mb;
4278 }else{
4279 assert(h->slice_type_nos == FF_I_TYPE);
4280 if(h->slice_type == FF_SI_TYPE && mb_type)
4281 mb_type--;
4282 decode_intra_mb:
4283 if(mb_type > 25){
4284 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4285 return -1;
4287 partition_count=0;
4288 cbp= i_mb_type_info[mb_type].cbp;
4289 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4290 mb_type= i_mb_type_info[mb_type].type;
4293 if(MB_FIELD)
4294 mb_type |= MB_TYPE_INTERLACED;
4296 h->slice_table[ mb_xy ]= h->slice_num;
4298 if(IS_INTRA_PCM(mb_type)){
4299 unsigned int x;
4301 // We assume these blocks are very rare so we do not optimize it.
4302 align_get_bits(&s->gb);
4304 // The pixels are stored in the same order as levels in h->mb array.
4305 for(x=0; x < (CHROMA ? 384 : 256); x++){
4306 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4309 // In deblocking, the quantizer is 0
4310 s->current_picture.qscale_table[mb_xy]= 0;
4311 // All coeffs are present
4312 memset(h->non_zero_count[mb_xy], 16, 16);
4314 s->current_picture.mb_type[mb_xy]= mb_type;
4315 return 0;
4318 if(MB_MBAFF){
4319 h->ref_count[0] <<= 1;
4320 h->ref_count[1] <<= 1;
4323 fill_caches(h, mb_type, 0);
4325 //mb_pred
4326 if(IS_INTRA(mb_type)){
4327 int pred_mode;
4328 // init_top_left_availability(h);
4329 if(IS_INTRA4x4(mb_type)){
4330 int i;
4331 int di = 1;
4332 if(dct8x8_allowed && get_bits1(&s->gb)){
4333 mb_type |= MB_TYPE_8x8DCT;
4334 di = 4;
4337 // fill_intra4x4_pred_table(h);
4338 for(i=0; i<16; i+=di){
4339 int mode= pred_intra_mode(h, i);
4341 if(!get_bits1(&s->gb)){
4342 const int rem_mode= get_bits(&s->gb, 3);
4343 mode = rem_mode + (rem_mode >= mode);
4346 if(di==4)
4347 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4348 else
4349 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4351 write_back_intra_pred_mode(h);
4352 if( check_intra4x4_pred_mode(h) < 0)
4353 return -1;
4354 }else{
4355 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4356 if(h->intra16x16_pred_mode < 0)
4357 return -1;
4359 if(CHROMA){
4360 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4361 if(pred_mode < 0)
4362 return -1;
4363 h->chroma_pred_mode= pred_mode;
4365 }else if(partition_count==4){
4366 int i, j, sub_partition_count[4], list, ref[2][4];
4368 if(h->slice_type_nos == FF_B_TYPE){
4369 for(i=0; i<4; i++){
4370 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4371 if(h->sub_mb_type[i] >=13){
4372 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4373 return -1;
4375 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4376 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4378 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4379 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4380 pred_direct_motion(h, &mb_type);
4381 h->ref_cache[0][scan8[4]] =
4382 h->ref_cache[1][scan8[4]] =
4383 h->ref_cache[0][scan8[12]] =
4384 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4386 }else{
4387 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4388 for(i=0; i<4; i++){
4389 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4390 if(h->sub_mb_type[i] >=4){
4391 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4392 return -1;
4394 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4395 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4399 for(list=0; list<h->list_count; list++){
4400 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4401 for(i=0; i<4; i++){
4402 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4403 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4404 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4405 if(tmp>=ref_count){
4406 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4407 return -1;
4409 ref[list][i]= tmp;
4410 }else{
4411 //FIXME
4412 ref[list][i] = -1;
4417 if(dct8x8_allowed)
4418 dct8x8_allowed = get_dct8x8_allowed(h);
4420 for(list=0; list<h->list_count; list++){
4421 for(i=0; i<4; i++){
4422 if(IS_DIRECT(h->sub_mb_type[i])) {
4423 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4424 continue;
4426 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4427 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4429 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4430 const int sub_mb_type= h->sub_mb_type[i];
4431 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4432 for(j=0; j<sub_partition_count[i]; j++){
4433 int mx, my;
4434 const int index= 4*i + block_width*j;
4435 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4436 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4437 mx += get_se_golomb(&s->gb);
4438 my += get_se_golomb(&s->gb);
4439 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4441 if(IS_SUB_8X8(sub_mb_type)){
4442 mv_cache[ 1 ][0]=
4443 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4444 mv_cache[ 1 ][1]=
4445 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4446 }else if(IS_SUB_8X4(sub_mb_type)){
4447 mv_cache[ 1 ][0]= mx;
4448 mv_cache[ 1 ][1]= my;
4449 }else if(IS_SUB_4X8(sub_mb_type)){
4450 mv_cache[ 8 ][0]= mx;
4451 mv_cache[ 8 ][1]= my;
4453 mv_cache[ 0 ][0]= mx;
4454 mv_cache[ 0 ][1]= my;
4456 }else{
4457 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4458 p[0] = p[1]=
4459 p[8] = p[9]= 0;
4463 }else if(IS_DIRECT(mb_type)){
4464 pred_direct_motion(h, &mb_type);
4465 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4466 }else{
4467 int list, mx, my, i;
4468 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4469 if(IS_16X16(mb_type)){
4470 for(list=0; list<h->list_count; list++){
4471 unsigned int val;
4472 if(IS_DIR(mb_type, 0, list)){
4473 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4474 if(val >= h->ref_count[list]){
4475 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4476 return -1;
4478 }else
4479 val= LIST_NOT_USED&0xFF;
4480 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4482 for(list=0; list<h->list_count; list++){
4483 unsigned int val;
4484 if(IS_DIR(mb_type, 0, list)){
4485 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4486 mx += get_se_golomb(&s->gb);
4487 my += get_se_golomb(&s->gb);
4488 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4490 val= pack16to32(mx,my);
4491 }else
4492 val=0;
4493 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4496 else if(IS_16X8(mb_type)){
4497 for(list=0; list<h->list_count; list++){
4498 for(i=0; i<2; i++){
4499 unsigned int val;
4500 if(IS_DIR(mb_type, i, list)){
4501 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4502 if(val >= h->ref_count[list]){
4503 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4504 return -1;
4506 }else
4507 val= LIST_NOT_USED&0xFF;
4508 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4511 for(list=0; list<h->list_count; list++){
4512 for(i=0; i<2; i++){
4513 unsigned int val;
4514 if(IS_DIR(mb_type, i, list)){
4515 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4516 mx += get_se_golomb(&s->gb);
4517 my += get_se_golomb(&s->gb);
4518 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4520 val= pack16to32(mx,my);
4521 }else
4522 val=0;
4523 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4526 }else{
4527 assert(IS_8X16(mb_type));
4528 for(list=0; list<h->list_count; list++){
4529 for(i=0; i<2; i++){
4530 unsigned int val;
4531 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4532 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4533 if(val >= h->ref_count[list]){
4534 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4535 return -1;
4537 }else
4538 val= LIST_NOT_USED&0xFF;
4539 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4542 for(list=0; list<h->list_count; list++){
4543 for(i=0; i<2; i++){
4544 unsigned int val;
4545 if(IS_DIR(mb_type, i, list)){
4546 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4547 mx += get_se_golomb(&s->gb);
4548 my += get_se_golomb(&s->gb);
4549 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4551 val= pack16to32(mx,my);
4552 }else
4553 val=0;
4554 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4560 if(IS_INTER(mb_type))
4561 write_back_motion(h, mb_type);
4563 if(!IS_INTRA16x16(mb_type)){
4564 cbp= get_ue_golomb(&s->gb);
4565 if(cbp > 47){
4566 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4567 return -1;
4570 if(CHROMA){
4571 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4572 else cbp= golomb_to_inter_cbp [cbp];
4573 }else{
4574 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4575 else cbp= golomb_to_inter_cbp_gray[cbp];
4578 h->cbp = cbp;
4580 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4581 if(get_bits1(&s->gb)){
4582 mb_type |= MB_TYPE_8x8DCT;
4583 h->cbp_table[mb_xy]= cbp;
4586 s->current_picture.mb_type[mb_xy]= mb_type;
4588 if(cbp || IS_INTRA16x16(mb_type)){
4589 int i8x8, i4x4, chroma_idx;
4590 int dquant;
4591 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4592 const uint8_t *scan, *scan8x8, *dc_scan;
4594 // fill_non_zero_count_cache(h);
4596 if(IS_INTERLACED(mb_type)){
4597 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4598 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4599 dc_scan= luma_dc_field_scan;
4600 }else{
4601 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4602 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4603 dc_scan= luma_dc_zigzag_scan;
4606 dquant= get_se_golomb(&s->gb);
4608 if( dquant > 25 || dquant < -26 ){
4609 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4610 return -1;
4613 s->qscale += dquant;
4614 if(((unsigned)s->qscale) > 51){
4615 if(s->qscale<0) s->qscale+= 52;
4616 else s->qscale-= 52;
4619 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4620 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4621 if(IS_INTRA16x16(mb_type)){
4622 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4623 return -1; //FIXME continue if partitioned and other return -1 too
4626 assert((cbp&15) == 0 || (cbp&15) == 15);
4628 if(cbp&15){
4629 for(i8x8=0; i8x8<4; i8x8++){
4630 for(i4x4=0; i4x4<4; i4x4++){
4631 const int index= i4x4 + 4*i8x8;
4632 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4633 return -1;
4637 }else{
4638 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4640 }else{
4641 for(i8x8=0; i8x8<4; i8x8++){
4642 if(cbp & (1<<i8x8)){
4643 if(IS_8x8DCT(mb_type)){
4644 DCTELEM *buf = &h->mb[64*i8x8];
4645 uint8_t *nnz;
4646 for(i4x4=0; i4x4<4; i4x4++){
4647 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4648 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4649 return -1;
4651 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4652 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4653 }else{
4654 for(i4x4=0; i4x4<4; i4x4++){
4655 const int index= i4x4 + 4*i8x8;
4657 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4658 return -1;
4662 }else{
4663 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4664 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4669 if(cbp&0x30){
4670 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4671 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4672 return -1;
4676 if(cbp&0x20){
4677 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4678 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4679 for(i4x4=0; i4x4<4; i4x4++){
4680 const int index= 16 + 4*chroma_idx + i4x4;
4681 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4682 return -1;
4686 }else{
4687 uint8_t * const nnz= &h->non_zero_count_cache[0];
4688 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4689 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4691 }else{
4692 uint8_t * const nnz= &h->non_zero_count_cache[0];
4693 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4694 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4695 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4697 s->current_picture.qscale_table[mb_xy]= s->qscale;
4698 write_back_non_zero_count(h);
4700 if(MB_MBAFF){
4701 h->ref_count[0] >>= 1;
4702 h->ref_count[1] >>= 1;
4705 return 0;
4708 static int decode_cabac_field_decoding_flag(H264Context *h) {
4709 MpegEncContext * const s = &h->s;
4710 const int mb_x = s->mb_x;
4711 const int mb_y = s->mb_y & ~1;
4712 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4713 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4715 unsigned int ctx = 0;
4717 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4718 ctx += 1;
4720 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4721 ctx += 1;
4724 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4727 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4728 uint8_t *state= &h->cabac_state[ctx_base];
4729 int mb_type;
4731 if(intra_slice){
4732 MpegEncContext * const s = &h->s;
4733 const int mba_xy = h->left_mb_xy[0];
4734 const int mbb_xy = h->top_mb_xy;
4735 int ctx=0;
4736 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4737 ctx++;
4738 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4739 ctx++;
4740 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4741 return 0; /* I4x4 */
4742 state += 2;
4743 }else{
4744 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4745 return 0; /* I4x4 */
4748 if( get_cabac_terminate( &h->cabac ) )
4749 return 25; /* PCM */
4751 mb_type = 1; /* I16x16 */
4752 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4753 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4754 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4755 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4756 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4757 return mb_type;
4760 static int decode_cabac_mb_type( H264Context *h ) {
4761 MpegEncContext * const s = &h->s;
4763 if( h->slice_type_nos == FF_I_TYPE ) {
4764 return decode_cabac_intra_mb_type(h, 3, 1);
4765 } else if( h->slice_type_nos == FF_P_TYPE ) {
4766 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4767 /* P-type */
4768 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4769 /* P_L0_D16x16, P_8x8 */
4770 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4771 } else {
4772 /* P_L0_D8x16, P_L0_D16x8 */
4773 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4775 } else {
4776 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4778 } else if( h->slice_type_nos == FF_B_TYPE ) {
4779 const int mba_xy = h->left_mb_xy[0];
4780 const int mbb_xy = h->top_mb_xy;
4781 int ctx = 0;
4782 int bits;
4784 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4785 ctx++;
4786 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4787 ctx++;
4789 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4790 return 0; /* B_Direct_16x16 */
4792 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4793 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4796 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4797 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4798 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4799 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4800 if( bits < 8 )
4801 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4802 else if( bits == 13 ) {
4803 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4804 } else if( bits == 14 )
4805 return 11; /* B_L1_L0_8x16 */
4806 else if( bits == 15 )
4807 return 22; /* B_8x8 */
4809 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4810 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4811 } else {
4812 /* TODO SI/SP frames? */
4813 return -1;
4817 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4818 MpegEncContext * const s = &h->s;
4819 int mba_xy, mbb_xy;
4820 int ctx = 0;
4822 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4823 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4824 mba_xy = mb_xy - 1;
4825 if( (mb_y&1)
4826 && h->slice_table[mba_xy] == h->slice_num
4827 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4828 mba_xy += s->mb_stride;
4829 if( MB_FIELD ){
4830 mbb_xy = mb_xy - s->mb_stride;
4831 if( !(mb_y&1)
4832 && h->slice_table[mbb_xy] == h->slice_num
4833 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4834 mbb_xy -= s->mb_stride;
4835 }else
4836 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4837 }else{
4838 int mb_xy = h->mb_xy;
4839 mba_xy = mb_xy - 1;
4840 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4843 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4844 ctx++;
4845 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4846 ctx++;
4848 if( h->slice_type_nos == FF_B_TYPE )
4849 ctx += 13;
4850 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4853 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4854 int mode = 0;
4856 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4857 return pred_mode;
4859 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4860 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4861 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4863 if( mode >= pred_mode )
4864 return mode + 1;
4865 else
4866 return mode;
4869 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4870 const int mba_xy = h->left_mb_xy[0];
4871 const int mbb_xy = h->top_mb_xy;
4873 int ctx = 0;
4875 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4876 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4877 ctx++;
4879 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4880 ctx++;
4882 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4883 return 0;
4885 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4886 return 1;
4887 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4888 return 2;
4889 else
4890 return 3;
4893 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4894 int cbp_b, cbp_a, ctx, cbp = 0;
4896 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4897 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4899 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4900 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4901 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4902 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4903 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4904 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4905 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4906 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4907 return cbp;
4909 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4910 int ctx;
4911 int cbp_a, cbp_b;
4913 cbp_a = (h->left_cbp>>4)&0x03;
4914 cbp_b = (h-> top_cbp>>4)&0x03;
4916 ctx = 0;
4917 if( cbp_a > 0 ) ctx++;
4918 if( cbp_b > 0 ) ctx += 2;
4919 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4920 return 0;
4922 ctx = 4;
4923 if( cbp_a == 2 ) ctx++;
4924 if( cbp_b == 2 ) ctx += 2;
4925 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4927 static int decode_cabac_mb_dqp( H264Context *h) {
4928 int ctx = 0;
4929 int val = 0;
4931 if( h->last_qscale_diff != 0 )
4932 ctx++;
4934 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4935 if( ctx < 2 )
4936 ctx = 2;
4937 else
4938 ctx = 3;
4939 val++;
4940 if(val > 102) //prevent infinite loop
4941 return INT_MIN;
4944 if( val&0x01 )
4945 return (val + 1)/2;
4946 else
4947 return -(val + 1)/2;
4949 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4950 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4951 return 0; /* 8x8 */
4952 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4953 return 1; /* 8x4 */
4954 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4955 return 2; /* 4x8 */
4956 return 3; /* 4x4 */
4958 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
4959 int type;
4960 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
4961 return 0; /* B_Direct_8x8 */
4962 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
4963 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
4964 type = 3;
4965 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
4966 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
4967 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
4968 type += 4;
4970 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
4971 type += get_cabac( &h->cabac, &h->cabac_state[39] );
4972 return type;
4975 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
4976 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
4979 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
4980 int refa = h->ref_cache[list][scan8[n] - 1];
4981 int refb = h->ref_cache[list][scan8[n] - 8];
4982 int ref = 0;
4983 int ctx = 0;
4985 if( h->slice_type_nos == FF_B_TYPE) {
4986 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
4987 ctx++;
4988 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
4989 ctx += 2;
4990 } else {
4991 if( refa > 0 )
4992 ctx++;
4993 if( refb > 0 )
4994 ctx += 2;
4997 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
4998 ref++;
4999 if( ctx < 4 )
5000 ctx = 4;
5001 else
5002 ctx = 5;
5003 if(ref >= 32 /*h->ref_list[list]*/){
5004 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5005 return 0; //FIXME we should return -1 and check the return everywhere
5008 return ref;
5011 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5012 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5013 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5014 int ctxbase = (l == 0) ? 40 : 47;
5015 int ctx, mvd;
5017 if( amvd < 3 )
5018 ctx = 0;
5019 else if( amvd > 32 )
5020 ctx = 2;
5021 else
5022 ctx = 1;
5024 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5025 return 0;
5027 mvd= 1;
5028 ctx= 3;
5029 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5030 mvd++;
5031 if( ctx < 6 )
5032 ctx++;
5035 if( mvd >= 9 ) {
5036 int k = 3;
5037 while( get_cabac_bypass( &h->cabac ) ) {
5038 mvd += 1 << k;
5039 k++;
5040 if(k>24){
5041 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5042 return INT_MIN;
5045 while( k-- ) {
5046 if( get_cabac_bypass( &h->cabac ) )
5047 mvd += 1 << k;
5050 return get_cabac_bypass_sign( &h->cabac, -mvd );
5053 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5054 int nza, nzb;
5055 int ctx = 0;
5057 if( is_dc ) {
5058 if( cat == 0 ) {
5059 nza = h->left_cbp&0x100;
5060 nzb = h-> top_cbp&0x100;
5061 } else {
5062 nza = (h->left_cbp>>(6+idx))&0x01;
5063 nzb = (h-> top_cbp>>(6+idx))&0x01;
5065 } else {
5066 if( cat == 4 ) {
5067 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5068 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5069 } else {
5070 assert(cat == 1 || cat == 2);
5071 nza = h->non_zero_count_cache[scan8[idx] - 1];
5072 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5076 if( nza > 0 )
5077 ctx++;
5079 if( nzb > 0 )
5080 ctx += 2;
5082 return ctx + 4 * cat;
5085 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5086 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5087 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5088 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5089 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5092 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5093 static const int significant_coeff_flag_offset[2][6] = {
5094 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5095 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5097 static const int last_coeff_flag_offset[2][6] = {
5098 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5099 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5101 static const int coeff_abs_level_m1_offset[6] = {
5102 227+0, 227+10, 227+20, 227+30, 227+39, 426
5104 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5105 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5106 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5107 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5108 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5109 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5110 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5111 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5112 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5114 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5115 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5116 * map node ctx => cabac ctx for level=1 */
5117 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5118 /* map node ctx => cabac ctx for level>1 */
5119 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5120 static const uint8_t coeff_abs_level_transition[2][8] = {
5121 /* update node ctx after decoding a level=1 */
5122 { 1, 2, 3, 3, 4, 5, 6, 7 },
5123 /* update node ctx after decoding a level>1 */
5124 { 4, 4, 4, 4, 5, 6, 7, 7 }
5127 int index[64];
5129 int av_unused last;
5130 int coeff_count = 0;
5131 int node_ctx = 0;
5133 uint8_t *significant_coeff_ctx_base;
5134 uint8_t *last_coeff_ctx_base;
5135 uint8_t *abs_level_m1_ctx_base;
5137 #ifndef ARCH_X86
5138 #define CABAC_ON_STACK
5139 #endif
5140 #ifdef CABAC_ON_STACK
5141 #define CC &cc
5142 CABACContext cc;
5143 cc.range = h->cabac.range;
5144 cc.low = h->cabac.low;
5145 cc.bytestream= h->cabac.bytestream;
5146 #else
5147 #define CC &h->cabac
5148 #endif
5151 /* cat: 0-> DC 16x16 n = 0
5152 * 1-> AC 16x16 n = luma4x4idx
5153 * 2-> Luma4x4 n = luma4x4idx
5154 * 3-> DC Chroma n = iCbCr
5155 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5156 * 5-> Luma8x8 n = 4 * luma8x8idx
5159 /* read coded block flag */
5160 if( is_dc || cat != 5 ) {
5161 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5162 if( !is_dc ) {
5163 if( cat == 4 )
5164 h->non_zero_count_cache[scan8[16+n]] = 0;
5165 else
5166 h->non_zero_count_cache[scan8[n]] = 0;
5169 #ifdef CABAC_ON_STACK
5170 h->cabac.range = cc.range ;
5171 h->cabac.low = cc.low ;
5172 h->cabac.bytestream= cc.bytestream;
5173 #endif
5174 return;
5178 significant_coeff_ctx_base = h->cabac_state
5179 + significant_coeff_flag_offset[MB_FIELD][cat];
5180 last_coeff_ctx_base = h->cabac_state
5181 + last_coeff_flag_offset[MB_FIELD][cat];
5182 abs_level_m1_ctx_base = h->cabac_state
5183 + coeff_abs_level_m1_offset[cat];
5185 if( !is_dc && cat == 5 ) {
5186 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5187 for(last= 0; last < coefs; last++) { \
5188 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5189 if( get_cabac( CC, sig_ctx )) { \
5190 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5191 index[coeff_count++] = last; \
5192 if( get_cabac( CC, last_ctx ) ) { \
5193 last= max_coeff; \
5194 break; \
5198 if( last == max_coeff -1 ) {\
5199 index[coeff_count++] = last;\
5201 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5202 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5203 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5204 } else {
5205 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5206 #else
5207 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5208 } else {
5209 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5210 #endif
5212 assert(coeff_count > 0);
5214 if( is_dc ) {
5215 if( cat == 0 )
5216 h->cbp_table[h->mb_xy] |= 0x100;
5217 else
5218 h->cbp_table[h->mb_xy] |= 0x40 << n;
5219 } else {
5220 if( cat == 5 )
5221 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5222 else if( cat == 4 )
5223 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5224 else {
5225 assert( cat == 1 || cat == 2 );
5226 h->non_zero_count_cache[scan8[n]] = coeff_count;
5230 do {
5231 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5233 int j= scantable[index[--coeff_count]];
5235 if( get_cabac( CC, ctx ) == 0 ) {
5236 node_ctx = coeff_abs_level_transition[0][node_ctx];
5237 if( is_dc ) {
5238 block[j] = get_cabac_bypass_sign( CC, -1);
5239 }else{
5240 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5242 } else {
5243 int coeff_abs = 2;
5244 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5245 node_ctx = coeff_abs_level_transition[1][node_ctx];
5247 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5248 coeff_abs++;
5251 if( coeff_abs >= 15 ) {
5252 int j = 0;
5253 while( get_cabac_bypass( CC ) ) {
5254 j++;
5257 coeff_abs=1;
5258 while( j-- ) {
5259 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5261 coeff_abs+= 14;
5264 if( is_dc ) {
5265 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5266 }else{
5267 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5270 } while( coeff_count );
5271 #ifdef CABAC_ON_STACK
5272 h->cabac.range = cc.range ;
5273 h->cabac.low = cc.low ;
5274 h->cabac.bytestream= cc.bytestream;
5275 #endif
5279 #ifndef CONFIG_SMALL
5280 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5281 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5284 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5285 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5287 #endif
5289 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5290 #ifdef CONFIG_SMALL
5291 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5292 #else
5293 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5294 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5295 #endif
5298 static inline void compute_mb_neighbors(H264Context *h)
5300 MpegEncContext * const s = &h->s;
5301 const int mb_xy = h->mb_xy;
5302 h->top_mb_xy = mb_xy - s->mb_stride;
5303 h->left_mb_xy[0] = mb_xy - 1;
5304 if(FRAME_MBAFF){
5305 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5306 const int top_pair_xy = pair_xy - s->mb_stride;
5307 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5308 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5309 const int curr_mb_frame_flag = !MB_FIELD;
5310 const int bottom = (s->mb_y & 1);
5311 if (bottom
5312 ? !curr_mb_frame_flag // bottom macroblock
5313 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5315 h->top_mb_xy -= s->mb_stride;
5317 if (left_mb_frame_flag != curr_mb_frame_flag) {
5318 h->left_mb_xy[0] = pair_xy - 1;
5320 } else if (FIELD_PICTURE) {
5321 h->top_mb_xy -= s->mb_stride;
5323 return;
5327 * decodes a macroblock
5328 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5330 static int decode_mb_cabac(H264Context *h) {
5331 MpegEncContext * const s = &h->s;
5332 int mb_xy;
5333 int mb_type, partition_count, cbp = 0;
5334 int dct8x8_allowed= h->pps.transform_8x8_mode;
5336 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5338 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5340 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5341 if( h->slice_type_nos != FF_I_TYPE ) {
5342 int skip;
5343 /* a skipped mb needs the aff flag from the following mb */
5344 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5345 predict_field_decoding_flag(h);
5346 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5347 skip = h->next_mb_skipped;
5348 else
5349 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5350 /* read skip flags */
5351 if( skip ) {
5352 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5353 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5354 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5355 if(h->next_mb_skipped)
5356 predict_field_decoding_flag(h);
5357 else
5358 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5361 decode_mb_skip(h);
5363 h->cbp_table[mb_xy] = 0;
5364 h->chroma_pred_mode_table[mb_xy] = 0;
5365 h->last_qscale_diff = 0;
5367 return 0;
5371 if(FRAME_MBAFF){
5372 if( (s->mb_y&1) == 0 )
5373 h->mb_mbaff =
5374 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5377 h->prev_mb_skipped = 0;
5379 compute_mb_neighbors(h);
5380 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5381 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5382 return -1;
5385 if( h->slice_type_nos == FF_B_TYPE ) {
5386 if( mb_type < 23 ){
5387 partition_count= b_mb_type_info[mb_type].partition_count;
5388 mb_type= b_mb_type_info[mb_type].type;
5389 }else{
5390 mb_type -= 23;
5391 goto decode_intra_mb;
5393 } else if( h->slice_type_nos == FF_P_TYPE ) {
5394 if( mb_type < 5) {
5395 partition_count= p_mb_type_info[mb_type].partition_count;
5396 mb_type= p_mb_type_info[mb_type].type;
5397 } else {
5398 mb_type -= 5;
5399 goto decode_intra_mb;
5401 } else {
5402 if(h->slice_type == FF_SI_TYPE && mb_type)
5403 mb_type--;
5404 assert(h->slice_type_nos == FF_I_TYPE);
5405 decode_intra_mb:
5406 partition_count = 0;
5407 cbp= i_mb_type_info[mb_type].cbp;
5408 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5409 mb_type= i_mb_type_info[mb_type].type;
5411 if(MB_FIELD)
5412 mb_type |= MB_TYPE_INTERLACED;
5414 h->slice_table[ mb_xy ]= h->slice_num;
5416 if(IS_INTRA_PCM(mb_type)) {
5417 const uint8_t *ptr;
5419 // We assume these blocks are very rare so we do not optimize it.
5420 // FIXME The two following lines get the bitstream position in the cabac
5421 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5422 ptr= h->cabac.bytestream;
5423 if(h->cabac.low&0x1) ptr--;
5424 if(CABAC_BITS==16){
5425 if(h->cabac.low&0x1FF) ptr--;
5428 // The pixels are stored in the same order as levels in h->mb array.
5429 memcpy(h->mb, ptr, 256); ptr+=256;
5430 if(CHROMA){
5431 memcpy(h->mb+128, ptr, 128); ptr+=128;
5434 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5436 // All blocks are present
5437 h->cbp_table[mb_xy] = 0x1ef;
5438 h->chroma_pred_mode_table[mb_xy] = 0;
5439 // In deblocking, the quantizer is 0
5440 s->current_picture.qscale_table[mb_xy]= 0;
5441 // All coeffs are present
5442 memset(h->non_zero_count[mb_xy], 16, 16);
5443 s->current_picture.mb_type[mb_xy]= mb_type;
5444 h->last_qscale_diff = 0;
5445 return 0;
5448 if(MB_MBAFF){
5449 h->ref_count[0] <<= 1;
5450 h->ref_count[1] <<= 1;
5453 fill_caches(h, mb_type, 0);
5455 if( IS_INTRA( mb_type ) ) {
5456 int i, pred_mode;
5457 if( IS_INTRA4x4( mb_type ) ) {
5458 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5459 mb_type |= MB_TYPE_8x8DCT;
5460 for( i = 0; i < 16; i+=4 ) {
5461 int pred = pred_intra_mode( h, i );
5462 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5463 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5465 } else {
5466 for( i = 0; i < 16; i++ ) {
5467 int pred = pred_intra_mode( h, i );
5468 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5470 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5473 write_back_intra_pred_mode(h);
5474 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5475 } else {
5476 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5477 if( h->intra16x16_pred_mode < 0 ) return -1;
5479 if(CHROMA){
5480 h->chroma_pred_mode_table[mb_xy] =
5481 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5483 pred_mode= check_intra_pred_mode( h, pred_mode );
5484 if( pred_mode < 0 ) return -1;
5485 h->chroma_pred_mode= pred_mode;
5487 } else if( partition_count == 4 ) {
5488 int i, j, sub_partition_count[4], list, ref[2][4];
5490 if( h->slice_type_nos == FF_B_TYPE ) {
5491 for( i = 0; i < 4; i++ ) {
5492 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5493 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5494 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5496 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5497 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5498 pred_direct_motion(h, &mb_type);
5499 h->ref_cache[0][scan8[4]] =
5500 h->ref_cache[1][scan8[4]] =
5501 h->ref_cache[0][scan8[12]] =
5502 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5503 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5504 for( i = 0; i < 4; i++ )
5505 if( IS_DIRECT(h->sub_mb_type[i]) )
5506 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5509 } else {
5510 for( i = 0; i < 4; i++ ) {
5511 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5512 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5513 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5517 for( list = 0; list < h->list_count; list++ ) {
5518 for( i = 0; i < 4; i++ ) {
5519 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5520 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5521 if( h->ref_count[list] > 1 )
5522 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5523 else
5524 ref[list][i] = 0;
5525 } else {
5526 ref[list][i] = -1;
5528 h->ref_cache[list][ scan8[4*i]+1 ]=
5529 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5533 if(dct8x8_allowed)
5534 dct8x8_allowed = get_dct8x8_allowed(h);
5536 for(list=0; list<h->list_count; list++){
5537 for(i=0; i<4; i++){
5538 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5539 if(IS_DIRECT(h->sub_mb_type[i])){
5540 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5541 continue;
5544 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5545 const int sub_mb_type= h->sub_mb_type[i];
5546 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5547 for(j=0; j<sub_partition_count[i]; j++){
5548 int mpx, mpy;
5549 int mx, my;
5550 const int index= 4*i + block_width*j;
5551 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5552 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5553 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5555 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5556 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5557 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5559 if(IS_SUB_8X8(sub_mb_type)){
5560 mv_cache[ 1 ][0]=
5561 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5562 mv_cache[ 1 ][1]=
5563 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5565 mvd_cache[ 1 ][0]=
5566 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5567 mvd_cache[ 1 ][1]=
5568 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5569 }else if(IS_SUB_8X4(sub_mb_type)){
5570 mv_cache[ 1 ][0]= mx;
5571 mv_cache[ 1 ][1]= my;
5573 mvd_cache[ 1 ][0]= mx - mpx;
5574 mvd_cache[ 1 ][1]= my - mpy;
5575 }else if(IS_SUB_4X8(sub_mb_type)){
5576 mv_cache[ 8 ][0]= mx;
5577 mv_cache[ 8 ][1]= my;
5579 mvd_cache[ 8 ][0]= mx - mpx;
5580 mvd_cache[ 8 ][1]= my - mpy;
5582 mv_cache[ 0 ][0]= mx;
5583 mv_cache[ 0 ][1]= my;
5585 mvd_cache[ 0 ][0]= mx - mpx;
5586 mvd_cache[ 0 ][1]= my - mpy;
5588 }else{
5589 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5590 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5591 p[0] = p[1] = p[8] = p[9] = 0;
5592 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5596 } else if( IS_DIRECT(mb_type) ) {
5597 pred_direct_motion(h, &mb_type);
5598 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5599 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5600 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5601 } else {
5602 int list, mx, my, i, mpx, mpy;
5603 if(IS_16X16(mb_type)){
5604 for(list=0; list<h->list_count; list++){
5605 if(IS_DIR(mb_type, 0, list)){
5606 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5607 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5608 }else
5609 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5611 for(list=0; list<h->list_count; list++){
5612 if(IS_DIR(mb_type, 0, list)){
5613 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5615 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5616 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5617 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5619 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5620 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5621 }else
5622 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5625 else if(IS_16X8(mb_type)){
5626 for(list=0; list<h->list_count; list++){
5627 for(i=0; i<2; i++){
5628 if(IS_DIR(mb_type, i, list)){
5629 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5630 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5631 }else
5632 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5635 for(list=0; list<h->list_count; list++){
5636 for(i=0; i<2; i++){
5637 if(IS_DIR(mb_type, i, list)){
5638 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5639 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5640 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5641 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5643 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5644 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5645 }else{
5646 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5647 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5651 }else{
5652 assert(IS_8X16(mb_type));
5653 for(list=0; list<h->list_count; list++){
5654 for(i=0; i<2; i++){
5655 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5656 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5657 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5658 }else
5659 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5662 for(list=0; list<h->list_count; list++){
5663 for(i=0; i<2; i++){
5664 if(IS_DIR(mb_type, i, list)){
5665 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5666 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5667 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5669 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5670 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5671 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5672 }else{
5673 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5674 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5681 if( IS_INTER( mb_type ) ) {
5682 h->chroma_pred_mode_table[mb_xy] = 0;
5683 write_back_motion( h, mb_type );
5686 if( !IS_INTRA16x16( mb_type ) ) {
5687 cbp = decode_cabac_mb_cbp_luma( h );
5688 if(CHROMA)
5689 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5692 h->cbp_table[mb_xy] = h->cbp = cbp;
5694 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5695 if( decode_cabac_mb_transform_size( h ) )
5696 mb_type |= MB_TYPE_8x8DCT;
5698 s->current_picture.mb_type[mb_xy]= mb_type;
5700 if( cbp || IS_INTRA16x16( mb_type ) ) {
5701 const uint8_t *scan, *scan8x8, *dc_scan;
5702 const uint32_t *qmul;
5703 int dqp;
5705 if(IS_INTERLACED(mb_type)){
5706 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5707 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5708 dc_scan= luma_dc_field_scan;
5709 }else{
5710 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5711 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5712 dc_scan= luma_dc_zigzag_scan;
5715 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5716 if( dqp == INT_MIN ){
5717 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5718 return -1;
5720 s->qscale += dqp;
5721 if(((unsigned)s->qscale) > 51){
5722 if(s->qscale<0) s->qscale+= 52;
5723 else s->qscale-= 52;
5725 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5726 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5728 if( IS_INTRA16x16( mb_type ) ) {
5729 int i;
5730 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5731 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5733 if( cbp&15 ) {
5734 qmul = h->dequant4_coeff[0][s->qscale];
5735 for( i = 0; i < 16; i++ ) {
5736 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5737 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5739 } else {
5740 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5742 } else {
5743 int i8x8, i4x4;
5744 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5745 if( cbp & (1<<i8x8) ) {
5746 if( IS_8x8DCT(mb_type) ) {
5747 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5748 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5749 } else {
5750 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5751 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5752 const int index = 4*i8x8 + i4x4;
5753 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5754 //START_TIMER
5755 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5756 //STOP_TIMER("decode_residual")
5759 } else {
5760 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5761 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5766 if( cbp&0x30 ){
5767 int c;
5768 for( c = 0; c < 2; c++ ) {
5769 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5770 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5774 if( cbp&0x20 ) {
5775 int c, i;
5776 for( c = 0; c < 2; c++ ) {
5777 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5778 for( i = 0; i < 4; i++ ) {
5779 const int index = 16 + 4 * c + i;
5780 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5781 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5784 } else {
5785 uint8_t * const nnz= &h->non_zero_count_cache[0];
5786 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5787 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5789 } else {
5790 uint8_t * const nnz= &h->non_zero_count_cache[0];
5791 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5792 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5793 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5794 h->last_qscale_diff = 0;
5797 s->current_picture.qscale_table[mb_xy]= s->qscale;
5798 write_back_non_zero_count(h);
5800 if(MB_MBAFF){
5801 h->ref_count[0] >>= 1;
5802 h->ref_count[1] >>= 1;
5805 return 0;
5809 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5810 int i, d;
5811 const int index_a = qp + h->slice_alpha_c0_offset;
5812 const int alpha = (alpha_table+52)[index_a];
5813 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5815 if( bS[0] < 4 ) {
5816 int8_t tc[4];
5817 for(i=0; i<4; i++)
5818 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5819 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5820 } else {
5821 /* 16px edge length, because bS=4 is triggered by being at
5822 * the edge of an intra MB, so all 4 bS are the same */
5823 for( d = 0; d < 16; d++ ) {
5824 const int p0 = pix[-1];
5825 const int p1 = pix[-2];
5826 const int p2 = pix[-3];
5828 const int q0 = pix[0];
5829 const int q1 = pix[1];
5830 const int q2 = pix[2];
5832 if( FFABS( p0 - q0 ) < alpha &&
5833 FFABS( p1 - p0 ) < beta &&
5834 FFABS( q1 - q0 ) < beta ) {
5836 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5837 if( FFABS( p2 - p0 ) < beta)
5839 const int p3 = pix[-4];
5840 /* p0', p1', p2' */
5841 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5842 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5843 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5844 } else {
5845 /* p0' */
5846 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5848 if( FFABS( q2 - q0 ) < beta)
5850 const int q3 = pix[3];
5851 /* q0', q1', q2' */
5852 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5853 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5854 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5855 } else {
5856 /* q0' */
5857 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5859 }else{
5860 /* p0', q0' */
5861 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5862 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5864 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5866 pix += stride;
5870 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5871 int i;
5872 const int index_a = qp + h->slice_alpha_c0_offset;
5873 const int alpha = (alpha_table+52)[index_a];
5874 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5876 if( bS[0] < 4 ) {
5877 int8_t tc[4];
5878 for(i=0; i<4; i++)
5879 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5880 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5881 } else {
5882 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5886 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5887 int i;
5888 for( i = 0; i < 16; i++, pix += stride) {
5889 int index_a;
5890 int alpha;
5891 int beta;
5893 int qp_index;
5894 int bS_index = (i >> 1);
5895 if (!MB_FIELD) {
5896 bS_index &= ~1;
5897 bS_index |= (i & 1);
5900 if( bS[bS_index] == 0 ) {
5901 continue;
5904 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5905 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5906 alpha = (alpha_table+52)[index_a];
5907 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5909 if( bS[bS_index] < 4 ) {
5910 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5911 const int p0 = pix[-1];
5912 const int p1 = pix[-2];
5913 const int p2 = pix[-3];
5914 const int q0 = pix[0];
5915 const int q1 = pix[1];
5916 const int q2 = pix[2];
5918 if( FFABS( p0 - q0 ) < alpha &&
5919 FFABS( p1 - p0 ) < beta &&
5920 FFABS( q1 - q0 ) < beta ) {
5921 int tc = tc0;
5922 int i_delta;
5924 if( FFABS( p2 - p0 ) < beta ) {
5925 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5926 tc++;
5928 if( FFABS( q2 - q0 ) < beta ) {
5929 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5930 tc++;
5933 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5934 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5935 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5936 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5938 }else{
5939 const int p0 = pix[-1];
5940 const int p1 = pix[-2];
5941 const int p2 = pix[-3];
5943 const int q0 = pix[0];
5944 const int q1 = pix[1];
5945 const int q2 = pix[2];
5947 if( FFABS( p0 - q0 ) < alpha &&
5948 FFABS( p1 - p0 ) < beta &&
5949 FFABS( q1 - q0 ) < beta ) {
5951 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5952 if( FFABS( p2 - p0 ) < beta)
5954 const int p3 = pix[-4];
5955 /* p0', p1', p2' */
5956 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5957 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5958 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5959 } else {
5960 /* p0' */
5961 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5963 if( FFABS( q2 - q0 ) < beta)
5965 const int q3 = pix[3];
5966 /* q0', q1', q2' */
5967 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5968 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5969 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5970 } else {
5971 /* q0' */
5972 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5974 }else{
5975 /* p0', q0' */
5976 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5977 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5979 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
5984 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5985 int i;
5986 for( i = 0; i < 8; i++, pix += stride) {
5987 int index_a;
5988 int alpha;
5989 int beta;
5991 int qp_index;
5992 int bS_index = i;
5994 if( bS[bS_index] == 0 ) {
5995 continue;
5998 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
5999 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6000 alpha = (alpha_table+52)[index_a];
6001 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6003 if( bS[bS_index] < 4 ) {
6004 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6005 const int p0 = pix[-1];
6006 const int p1 = pix[-2];
6007 const int q0 = pix[0];
6008 const int q1 = pix[1];
6010 if( FFABS( p0 - q0 ) < alpha &&
6011 FFABS( p1 - p0 ) < beta &&
6012 FFABS( q1 - q0 ) < beta ) {
6013 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6015 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6016 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6017 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6019 }else{
6020 const int p0 = pix[-1];
6021 const int p1 = pix[-2];
6022 const int q0 = pix[0];
6023 const int q1 = pix[1];
6025 if( FFABS( p0 - q0 ) < alpha &&
6026 FFABS( p1 - p0 ) < beta &&
6027 FFABS( q1 - q0 ) < beta ) {
6029 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6030 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6031 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6037 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6038 int i, d;
6039 const int index_a = qp + h->slice_alpha_c0_offset;
6040 const int alpha = (alpha_table+52)[index_a];
6041 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6042 const int pix_next = stride;
6044 if( bS[0] < 4 ) {
6045 int8_t tc[4];
6046 for(i=0; i<4; i++)
6047 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6048 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6049 } else {
6050 /* 16px edge length, see filter_mb_edgev */
6051 for( d = 0; d < 16; d++ ) {
6052 const int p0 = pix[-1*pix_next];
6053 const int p1 = pix[-2*pix_next];
6054 const int p2 = pix[-3*pix_next];
6055 const int q0 = pix[0];
6056 const int q1 = pix[1*pix_next];
6057 const int q2 = pix[2*pix_next];
6059 if( FFABS( p0 - q0 ) < alpha &&
6060 FFABS( p1 - p0 ) < beta &&
6061 FFABS( q1 - q0 ) < beta ) {
6063 const int p3 = pix[-4*pix_next];
6064 const int q3 = pix[ 3*pix_next];
6066 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6067 if( FFABS( p2 - p0 ) < beta) {
6068 /* p0', p1', p2' */
6069 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6070 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6071 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6072 } else {
6073 /* p0' */
6074 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6076 if( FFABS( q2 - q0 ) < beta) {
6077 /* q0', q1', q2' */
6078 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6079 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6080 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6081 } else {
6082 /* q0' */
6083 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6085 }else{
6086 /* p0', q0' */
6087 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6088 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6090 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6092 pix++;
6097 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6098 int i;
6099 const int index_a = qp + h->slice_alpha_c0_offset;
6100 const int alpha = (alpha_table+52)[index_a];
6101 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6103 if( bS[0] < 4 ) {
6104 int8_t tc[4];
6105 for(i=0; i<4; i++)
6106 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6107 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6108 } else {
6109 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6113 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6114 MpegEncContext * const s = &h->s;
6115 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6116 int mb_xy, mb_type;
6117 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6119 mb_xy = h->mb_xy;
6121 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6122 1 ||
6123 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6124 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6125 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6126 return;
6128 assert(!FRAME_MBAFF);
6130 mb_type = s->current_picture.mb_type[mb_xy];
6131 qp = s->current_picture.qscale_table[mb_xy];
6132 qp0 = s->current_picture.qscale_table[mb_xy-1];
6133 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6134 qpc = get_chroma_qp( h, 0, qp );
6135 qpc0 = get_chroma_qp( h, 0, qp0 );
6136 qpc1 = get_chroma_qp( h, 0, qp1 );
6137 qp0 = (qp + qp0 + 1) >> 1;
6138 qp1 = (qp + qp1 + 1) >> 1;
6139 qpc0 = (qpc + qpc0 + 1) >> 1;
6140 qpc1 = (qpc + qpc1 + 1) >> 1;
6141 qp_thresh = 15 - h->slice_alpha_c0_offset;
6142 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6143 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6144 return;
6146 if( IS_INTRA(mb_type) ) {
6147 int16_t bS4[4] = {4,4,4,4};
6148 int16_t bS3[4] = {3,3,3,3};
6149 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6150 if( IS_8x8DCT(mb_type) ) {
6151 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6152 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6153 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6154 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6155 } else {
6156 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6157 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6158 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6159 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6160 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6161 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6162 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6163 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6165 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6166 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6167 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6168 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6169 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6170 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6171 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6172 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6173 return;
6174 } else {
6175 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6176 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6177 int edges;
6178 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6179 edges = 4;
6180 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6181 } else {
6182 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6183 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6184 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6185 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6186 ? 3 : 0;
6187 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6188 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6189 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6190 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6192 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6193 bSv[0][0] = 0x0004000400040004ULL;
6194 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6195 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6197 #define FILTER(hv,dir,edge)\
6198 if(bSv[dir][edge]) {\
6199 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6200 if(!(edge&1)) {\
6201 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6202 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6205 if( edges == 1 ) {
6206 FILTER(v,0,0);
6207 FILTER(h,1,0);
6208 } else if( IS_8x8DCT(mb_type) ) {
6209 FILTER(v,0,0);
6210 FILTER(v,0,2);
6211 FILTER(h,1,0);
6212 FILTER(h,1,2);
6213 } else {
6214 FILTER(v,0,0);
6215 FILTER(v,0,1);
6216 FILTER(v,0,2);
6217 FILTER(v,0,3);
6218 FILTER(h,1,0);
6219 FILTER(h,1,1);
6220 FILTER(h,1,2);
6221 FILTER(h,1,3);
6223 #undef FILTER
6227 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6228 MpegEncContext * const s = &h->s;
6229 const int mb_xy= mb_x + mb_y*s->mb_stride;
6230 const int mb_type = s->current_picture.mb_type[mb_xy];
6231 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6232 int first_vertical_edge_done = 0;
6233 int dir;
6235 //for sufficiently low qp, filtering wouldn't do anything
6236 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6237 if(!FRAME_MBAFF){
6238 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6239 int qp = s->current_picture.qscale_table[mb_xy];
6240 if(qp <= qp_thresh
6241 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6242 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6243 return;
6247 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6248 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6249 int top_type, left_type[2];
6250 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6251 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6252 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6254 if(IS_8x8DCT(top_type)){
6255 h->non_zero_count_cache[4+8*0]=
6256 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6257 h->non_zero_count_cache[6+8*0]=
6258 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6260 if(IS_8x8DCT(left_type[0])){
6261 h->non_zero_count_cache[3+8*1]=
6262 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6264 if(IS_8x8DCT(left_type[1])){
6265 h->non_zero_count_cache[3+8*3]=
6266 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6269 if(IS_8x8DCT(mb_type)){
6270 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6271 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6273 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6274 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6276 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6277 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6279 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6280 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6284 if (FRAME_MBAFF
6285 // left mb is in picture
6286 && h->slice_table[mb_xy-1] != 0xFFFF
6287 // and current and left pair do not have the same interlaced type
6288 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6289 // and left mb is in the same slice if deblocking_filter == 2
6290 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6291 /* First vertical edge is different in MBAFF frames
6292 * There are 8 different bS to compute and 2 different Qp
6294 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6295 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6296 int16_t bS[8];
6297 int qp[2];
6298 int bqp[2];
6299 int rqp[2];
6300 int mb_qp, mbn0_qp, mbn1_qp;
6301 int i;
6302 first_vertical_edge_done = 1;
6304 if( IS_INTRA(mb_type) )
6305 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6306 else {
6307 for( i = 0; i < 8; i++ ) {
6308 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6310 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6311 bS[i] = 4;
6312 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6313 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6314 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6316 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6317 bS[i] = 2;
6318 else
6319 bS[i] = 1;
6323 mb_qp = s->current_picture.qscale_table[mb_xy];
6324 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6325 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6326 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6327 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6328 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6329 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6330 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6331 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6332 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6333 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6334 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6335 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6337 /* Filter edge */
6338 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6339 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6340 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6341 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6342 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6344 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6345 for( dir = 0; dir < 2; dir++ )
6347 int edge;
6348 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6349 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6350 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6351 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6352 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6354 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6355 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6356 // how often to recheck mv-based bS when iterating between edges
6357 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6358 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6359 // how often to recheck mv-based bS when iterating along each edge
6360 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6362 if (first_vertical_edge_done) {
6363 start = 1;
6364 first_vertical_edge_done = 0;
6367 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6368 start = 1;
6370 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6371 && !IS_INTERLACED(mb_type)
6372 && IS_INTERLACED(mbm_type)
6374 // This is a special case in the norm where the filtering must
6375 // be done twice (one each of the field) even if we are in a
6376 // frame macroblock.
6378 static const int nnz_idx[4] = {4,5,6,3};
6379 unsigned int tmp_linesize = 2 * linesize;
6380 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6381 int mbn_xy = mb_xy - 2 * s->mb_stride;
6382 int qp;
6383 int i, j;
6384 int16_t bS[4];
6386 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6387 if( IS_INTRA(mb_type) ||
6388 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6389 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6390 } else {
6391 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6392 for( i = 0; i < 4; i++ ) {
6393 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6394 mbn_nnz[nnz_idx[i]] != 0 )
6395 bS[i] = 2;
6396 else
6397 bS[i] = 1;
6400 // Do not use s->qscale as luma quantizer because it has not the same
6401 // value in IPCM macroblocks.
6402 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6403 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6404 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6405 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6406 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6407 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6408 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6409 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6412 start = 1;
6415 /* Calculate bS */
6416 for( edge = start; edge < edges; edge++ ) {
6417 /* mbn_xy: neighbor macroblock */
6418 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6419 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6420 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6421 int16_t bS[4];
6422 int qp;
6424 if( (edge&1) && IS_8x8DCT(mb_type) )
6425 continue;
6427 if( IS_INTRA(mb_type) ||
6428 IS_INTRA(mbn_type) ) {
6429 int value;
6430 if (edge == 0) {
6431 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6432 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6434 value = 4;
6435 } else {
6436 value = 3;
6438 } else {
6439 value = 3;
6441 bS[0] = bS[1] = bS[2] = bS[3] = value;
6442 } else {
6443 int i, l;
6444 int mv_done;
6446 if( edge & mask_edge ) {
6447 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6448 mv_done = 1;
6450 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6451 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6452 mv_done = 1;
6454 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6455 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6456 int bn_idx= b_idx - (dir ? 8:1);
6457 int v = 0;
6459 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6460 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6461 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6462 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6465 if(h->slice_type_nos == FF_B_TYPE && v){
6466 v=0;
6467 for( l = 0; !v && l < 2; l++ ) {
6468 int ln= 1-l;
6469 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6470 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6471 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6475 bS[0] = bS[1] = bS[2] = bS[3] = v;
6476 mv_done = 1;
6478 else
6479 mv_done = 0;
6481 for( i = 0; i < 4; i++ ) {
6482 int x = dir == 0 ? edge : i;
6483 int y = dir == 0 ? i : edge;
6484 int b_idx= 8 + 4 + x + 8*y;
6485 int bn_idx= b_idx - (dir ? 8:1);
6487 if( h->non_zero_count_cache[b_idx] != 0 ||
6488 h->non_zero_count_cache[bn_idx] != 0 ) {
6489 bS[i] = 2;
6491 else if(!mv_done)
6493 bS[i] = 0;
6494 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6495 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6496 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6497 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6498 bS[i] = 1;
6499 break;
6503 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6504 bS[i] = 0;
6505 for( l = 0; l < 2; l++ ) {
6506 int ln= 1-l;
6507 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6508 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6509 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6510 bS[i] = 1;
6511 break;
6518 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6519 continue;
6522 /* Filter edge */
6523 // Do not use s->qscale as luma quantizer because it has not the same
6524 // value in IPCM macroblocks.
6525 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6526 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6527 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6528 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6529 if( dir == 0 ) {
6530 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6531 if( (edge&1) == 0 ) {
6532 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6533 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6534 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6535 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6537 } else {
6538 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6539 if( (edge&1) == 0 ) {
6540 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6541 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6542 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6543 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6550 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6551 H264Context *h = *(void**)arg;
6552 MpegEncContext * const s = &h->s;
6553 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6555 s->mb_skip_run= -1;
6557 if( h->pps.cabac ) {
6558 int i;
6560 /* realign */
6561 align_get_bits( &s->gb );
6563 /* init cabac */
6564 ff_init_cabac_states( &h->cabac);
6565 ff_init_cabac_decoder( &h->cabac,
6566 s->gb.buffer + get_bits_count(&s->gb)/8,
6567 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6568 /* calculate pre-state */
6569 for( i= 0; i < 460; i++ ) {
6570 int pre;
6571 if( h->slice_type_nos == FF_I_TYPE )
6572 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6573 else
6574 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6576 if( pre <= 63 )
6577 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6578 else
6579 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6582 for(;;){
6583 //START_TIMER
6584 int ret = decode_mb_cabac(h);
6585 int eos;
6586 //STOP_TIMER("decode_mb_cabac")
6588 if(ret>=0) hl_decode_mb(h);
6590 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6591 s->mb_y++;
6593 if(ret>=0) ret = decode_mb_cabac(h);
6595 if(ret>=0) hl_decode_mb(h);
6596 s->mb_y--;
6598 eos = get_cabac_terminate( &h->cabac );
6600 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6601 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6602 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6603 return -1;
6606 if( ++s->mb_x >= s->mb_width ) {
6607 s->mb_x = 0;
6608 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6609 ++s->mb_y;
6610 if(FIELD_OR_MBAFF_PICTURE) {
6611 ++s->mb_y;
6615 if( eos || s->mb_y >= s->mb_height ) {
6616 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6617 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6618 return 0;
6622 } else {
6623 for(;;){
6624 int ret = decode_mb_cavlc(h);
6626 if(ret>=0) hl_decode_mb(h);
6628 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6629 s->mb_y++;
6630 ret = decode_mb_cavlc(h);
6632 if(ret>=0) hl_decode_mb(h);
6633 s->mb_y--;
6636 if(ret<0){
6637 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6638 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6640 return -1;
6643 if(++s->mb_x >= s->mb_width){
6644 s->mb_x=0;
6645 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6646 ++s->mb_y;
6647 if(FIELD_OR_MBAFF_PICTURE) {
6648 ++s->mb_y;
6650 if(s->mb_y >= s->mb_height){
6651 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6653 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6654 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6656 return 0;
6657 }else{
6658 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6660 return -1;
6665 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6666 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6667 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6668 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6670 return 0;
6671 }else{
6672 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6674 return -1;
6680 #if 0
6681 for(;s->mb_y < s->mb_height; s->mb_y++){
6682 for(;s->mb_x < s->mb_width; s->mb_x++){
6683 int ret= decode_mb(h);
6685 hl_decode_mb(h);
6687 if(ret<0){
6688 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6689 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6691 return -1;
6694 if(++s->mb_x >= s->mb_width){
6695 s->mb_x=0;
6696 if(++s->mb_y >= s->mb_height){
6697 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6698 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6700 return 0;
6701 }else{
6702 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6704 return -1;
6709 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6710 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6711 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6713 return 0;
6714 }else{
6715 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6717 return -1;
6721 s->mb_x=0;
6722 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6724 #endif
6725 return -1; //not reached
6728 static int decode_picture_timing(H264Context *h){
6729 MpegEncContext * const s = &h->s;
6730 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6731 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6732 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6734 if(h->sps.pic_struct_present_flag){
6735 unsigned int i, num_clock_ts;
6736 h->sei_pic_struct = get_bits(&s->gb, 4);
6738 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6739 return -1;
6741 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6743 for (i = 0 ; i < num_clock_ts ; i++){
6744 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6745 unsigned int full_timestamp_flag;
6746 skip_bits(&s->gb, 2); /* ct_type */
6747 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6748 skip_bits(&s->gb, 5); /* counting_type */
6749 full_timestamp_flag = get_bits(&s->gb, 1);
6750 skip_bits(&s->gb, 1); /* discontinuity_flag */
6751 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6752 skip_bits(&s->gb, 8); /* n_frames */
6753 if(full_timestamp_flag){
6754 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6755 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6756 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6757 }else{
6758 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6759 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6760 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6761 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6762 if(get_bits(&s->gb, 1)) /* hours_flag */
6763 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6767 if(h->sps.time_offset_length > 0)
6768 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6772 return 0;
6775 static int decode_unregistered_user_data(H264Context *h, int size){
6776 MpegEncContext * const s = &h->s;
6777 uint8_t user_data[16+256];
6778 int e, build, i;
6780 if(size<16)
6781 return -1;
6783 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6784 user_data[i]= get_bits(&s->gb, 8);
6787 user_data[i]= 0;
6788 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6789 if(e==1 && build>=0)
6790 h->x264_build= build;
6792 if(s->avctx->debug & FF_DEBUG_BUGS)
6793 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6795 for(; i<size; i++)
6796 skip_bits(&s->gb, 8);
6798 return 0;
6801 static int decode_sei(H264Context *h){
6802 MpegEncContext * const s = &h->s;
6804 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6805 int size, type;
6807 type=0;
6809 type+= show_bits(&s->gb, 8);
6810 }while(get_bits(&s->gb, 8) == 255);
6812 size=0;
6814 size+= show_bits(&s->gb, 8);
6815 }while(get_bits(&s->gb, 8) == 255);
6817 switch(type){
6818 case 1: // Picture timing SEI
6819 if(decode_picture_timing(h) < 0)
6820 return -1;
6821 break;
6822 case 5:
6823 if(decode_unregistered_user_data(h, size) < 0)
6824 return -1;
6825 break;
6826 default:
6827 skip_bits(&s->gb, 8*size);
6830 //FIXME check bits here
6831 align_get_bits(&s->gb);
6834 return 0;
6837 static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
6838 MpegEncContext * const s = &h->s;
6839 int cpb_count, i;
6840 cpb_count = get_ue_golomb(&s->gb) + 1;
6842 if(cpb_count > 32U){
6843 av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
6844 return -1;
6847 get_bits(&s->gb, 4); /* bit_rate_scale */
6848 get_bits(&s->gb, 4); /* cpb_size_scale */
6849 for(i=0; i<cpb_count; i++){
6850 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6851 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6852 get_bits1(&s->gb); /* cbr_flag */
6854 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6855 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6856 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6857 sps->time_offset_length = get_bits(&s->gb, 5);
6858 return 0;
6861 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6862 MpegEncContext * const s = &h->s;
6863 int aspect_ratio_info_present_flag;
6864 unsigned int aspect_ratio_idc;
6866 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6868 if( aspect_ratio_info_present_flag ) {
6869 aspect_ratio_idc= get_bits(&s->gb, 8);
6870 if( aspect_ratio_idc == EXTENDED_SAR ) {
6871 sps->sar.num= get_bits(&s->gb, 16);
6872 sps->sar.den= get_bits(&s->gb, 16);
6873 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6874 sps->sar= pixel_aspect[aspect_ratio_idc];
6875 }else{
6876 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6877 return -1;
6879 }else{
6880 sps->sar.num=
6881 sps->sar.den= 0;
6883 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6885 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6886 get_bits1(&s->gb); /* overscan_appropriate_flag */
6889 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6890 get_bits(&s->gb, 3); /* video_format */
6891 get_bits1(&s->gb); /* video_full_range_flag */
6892 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6893 get_bits(&s->gb, 8); /* colour_primaries */
6894 get_bits(&s->gb, 8); /* transfer_characteristics */
6895 get_bits(&s->gb, 8); /* matrix_coefficients */
6899 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6900 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6901 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6904 sps->timing_info_present_flag = get_bits1(&s->gb);
6905 if(sps->timing_info_present_flag){
6906 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6907 sps->time_scale = get_bits_long(&s->gb, 32);
6908 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6911 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6912 if(sps->nal_hrd_parameters_present_flag)
6913 if(decode_hrd_parameters(h, sps) < 0)
6914 return -1;
6915 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6916 if(sps->vcl_hrd_parameters_present_flag)
6917 if(decode_hrd_parameters(h, sps) < 0)
6918 return -1;
6919 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6920 get_bits1(&s->gb); /* low_delay_hrd_flag */
6921 sps->pic_struct_present_flag = get_bits1(&s->gb);
6923 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6924 if(sps->bitstream_restriction_flag){
6925 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6926 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6927 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6928 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6929 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6930 sps->num_reorder_frames= get_ue_golomb(&s->gb);
6931 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6933 if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6934 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
6935 return -1;
6939 return 0;
6942 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6943 const uint8_t *jvt_list, const uint8_t *fallback_list){
6944 MpegEncContext * const s = &h->s;
6945 int i, last = 8, next = 8;
6946 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6947 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6948 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6949 else
6950 for(i=0;i<size;i++){
6951 if(next)
6952 next = (last + get_se_golomb(&s->gb)) & 0xff;
6953 if(!i && !next){ /* matrix not written, we use the preset one */
6954 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6955 break;
6957 last = factors[scan[i]] = next ? next : last;
6961 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6962 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6963 MpegEncContext * const s = &h->s;
6964 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6965 const uint8_t *fallback[4] = {
6966 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6967 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6968 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6969 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6971 if(get_bits1(&s->gb)){
6972 sps->scaling_matrix_present |= is_sps;
6973 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6974 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6975 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6976 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6977 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6978 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6979 if(is_sps || pps->transform_8x8_mode){
6980 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6981 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6986 static inline int decode_seq_parameter_set(H264Context *h){
6987 MpegEncContext * const s = &h->s;
6988 int profile_idc, level_idc;
6989 unsigned int sps_id;
6990 int i;
6991 SPS *sps;
6993 profile_idc= get_bits(&s->gb, 8);
6994 get_bits1(&s->gb); //constraint_set0_flag
6995 get_bits1(&s->gb); //constraint_set1_flag
6996 get_bits1(&s->gb); //constraint_set2_flag
6997 get_bits1(&s->gb); //constraint_set3_flag
6998 get_bits(&s->gb, 4); // reserved
6999 level_idc= get_bits(&s->gb, 8);
7000 sps_id= get_ue_golomb(&s->gb);
7002 if(sps_id >= MAX_SPS_COUNT) {
7003 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
7004 return -1;
7006 sps= av_mallocz(sizeof(SPS));
7007 if(sps == NULL)
7008 return -1;
7010 sps->profile_idc= profile_idc;
7011 sps->level_idc= level_idc;
7013 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7014 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7015 sps->scaling_matrix_present = 0;
7017 if(sps->profile_idc >= 100){ //high profile
7018 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7019 if(sps->chroma_format_idc == 3)
7020 get_bits1(&s->gb); //residual_color_transform_flag
7021 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7022 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7023 sps->transform_bypass = get_bits1(&s->gb);
7024 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7025 }else{
7026 sps->chroma_format_idc= 1;
7029 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7030 sps->poc_type= get_ue_golomb(&s->gb);
7032 if(sps->poc_type == 0){ //FIXME #define
7033 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7034 } else if(sps->poc_type == 1){//FIXME #define
7035 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7036 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7037 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7038 sps->poc_cycle_length = get_ue_golomb(&s->gb);
7040 if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7041 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
7042 goto fail;
7045 for(i=0; i<sps->poc_cycle_length; i++)
7046 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7047 }else if(sps->poc_type != 2){
7048 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7049 goto fail;
7052 sps->ref_frame_count= get_ue_golomb(&s->gb);
7053 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
7054 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7055 goto fail;
7057 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7058 sps->mb_width = get_ue_golomb(&s->gb) + 1;
7059 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7060 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7061 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
7062 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7063 goto fail;
7066 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7067 if(!sps->frame_mbs_only_flag)
7068 sps->mb_aff= get_bits1(&s->gb);
7069 else
7070 sps->mb_aff= 0;
7072 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7074 #ifndef ALLOW_INTERLACE
7075 if(sps->mb_aff)
7076 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7077 #endif
7078 sps->crop= get_bits1(&s->gb);
7079 if(sps->crop){
7080 sps->crop_left = get_ue_golomb(&s->gb);
7081 sps->crop_right = get_ue_golomb(&s->gb);
7082 sps->crop_top = get_ue_golomb(&s->gb);
7083 sps->crop_bottom= get_ue_golomb(&s->gb);
7084 if(sps->crop_left || sps->crop_top){
7085 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7087 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7088 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7090 }else{
7091 sps->crop_left =
7092 sps->crop_right =
7093 sps->crop_top =
7094 sps->crop_bottom= 0;
7097 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7098 if( sps->vui_parameters_present_flag )
7099 decode_vui_parameters(h, sps);
7101 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7102 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7103 sps_id, sps->profile_idc, sps->level_idc,
7104 sps->poc_type,
7105 sps->ref_frame_count,
7106 sps->mb_width, sps->mb_height,
7107 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7108 sps->direct_8x8_inference_flag ? "8B8" : "",
7109 sps->crop_left, sps->crop_right,
7110 sps->crop_top, sps->crop_bottom,
7111 sps->vui_parameters_present_flag ? "VUI" : "",
7112 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7115 av_free(h->sps_buffers[sps_id]);
7116 h->sps_buffers[sps_id]= sps;
7117 return 0;
7118 fail:
7119 av_free(sps);
7120 return -1;
7123 static void
7124 build_qp_table(PPS *pps, int t, int index)
7126 int i;
7127 for(i = 0; i < 52; i++)
7128 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7131 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7132 MpegEncContext * const s = &h->s;
7133 unsigned int pps_id= get_ue_golomb(&s->gb);
7134 PPS *pps;
7136 if(pps_id >= MAX_PPS_COUNT) {
7137 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
7138 return -1;
7141 pps= av_mallocz(sizeof(PPS));
7142 if(pps == NULL)
7143 return -1;
7144 pps->sps_id= get_ue_golomb(&s->gb);
7145 if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
7146 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7147 goto fail;
7150 pps->cabac= get_bits1(&s->gb);
7151 pps->pic_order_present= get_bits1(&s->gb);
7152 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7153 if(pps->slice_group_count > 1 ){
7154 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7155 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7156 switch(pps->mb_slice_group_map_type){
7157 case 0:
7158 #if 0
7159 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7160 | run_length[ i ] |1 |ue(v) |
7161 #endif
7162 break;
7163 case 2:
7164 #if 0
7165 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7166 |{ | | |
7167 | top_left_mb[ i ] |1 |ue(v) |
7168 | bottom_right_mb[ i ] |1 |ue(v) |
7169 | } | | |
7170 #endif
7171 break;
7172 case 3:
7173 case 4:
7174 case 5:
7175 #if 0
7176 | slice_group_change_direction_flag |1 |u(1) |
7177 | slice_group_change_rate_minus1 |1 |ue(v) |
7178 #endif
7179 break;
7180 case 6:
7181 #if 0
7182 | slice_group_id_cnt_minus1 |1 |ue(v) |
7183 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7184 |) | | |
7185 | slice_group_id[ i ] |1 |u(v) |
7186 #endif
7187 break;
7190 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7191 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7192 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7193 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7194 goto fail;
7197 pps->weighted_pred= get_bits1(&s->gb);
7198 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7199 pps->init_qp= get_se_golomb(&s->gb) + 26;
7200 pps->init_qs= get_se_golomb(&s->gb) + 26;
7201 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7202 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7203 pps->constrained_intra_pred= get_bits1(&s->gb);
7204 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7206 pps->transform_8x8_mode= 0;
7207 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7208 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7209 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7211 if(get_bits_count(&s->gb) < bit_length){
7212 pps->transform_8x8_mode= get_bits1(&s->gb);
7213 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7214 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7215 } else {
7216 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7219 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7220 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7221 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7222 h->pps.chroma_qp_diff= 1;
7224 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7225 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7226 pps_id, pps->sps_id,
7227 pps->cabac ? "CABAC" : "CAVLC",
7228 pps->slice_group_count,
7229 pps->ref_count[0], pps->ref_count[1],
7230 pps->weighted_pred ? "weighted" : "",
7231 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7232 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7233 pps->constrained_intra_pred ? "CONSTR" : "",
7234 pps->redundant_pic_cnt_present ? "REDU" : "",
7235 pps->transform_8x8_mode ? "8x8DCT" : ""
7239 av_free(h->pps_buffers[pps_id]);
7240 h->pps_buffers[pps_id]= pps;
7241 return 0;
7242 fail:
7243 av_free(pps);
7244 return -1;
7248 * Call decode_slice() for each context.
7250 * @param h h264 master context
7251 * @param context_count number of contexts to execute
7253 static void execute_decode_slices(H264Context *h, int context_count){
7254 MpegEncContext * const s = &h->s;
7255 AVCodecContext * const avctx= s->avctx;
7256 H264Context *hx;
7257 int i;
7259 if(context_count == 1) {
7260 decode_slice(avctx, &h);
7261 } else {
7262 for(i = 1; i < context_count; i++) {
7263 hx = h->thread_context[i];
7264 hx->s.error_recognition = avctx->error_recognition;
7265 hx->s.error_count = 0;
7268 avctx->execute(avctx, (void *)decode_slice,
7269 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7271 /* pull back stuff from slices to master context */
7272 hx = h->thread_context[context_count - 1];
7273 s->mb_x = hx->s.mb_x;
7274 s->mb_y = hx->s.mb_y;
7275 s->dropable = hx->s.dropable;
7276 s->picture_structure = hx->s.picture_structure;
7277 for(i = 1; i < context_count; i++)
7278 h->s.error_count += h->thread_context[i]->s.error_count;
7283 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7284 MpegEncContext * const s = &h->s;
7285 AVCodecContext * const avctx= s->avctx;
7286 int buf_index=0;
7287 H264Context *hx; ///< thread context
7288 int context_count = 0;
7290 h->max_contexts = avctx->thread_count;
7291 #if 0
7292 int i;
7293 for(i=0; i<50; i++){
7294 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7296 #endif
7297 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7298 h->current_slice = 0;
7299 if (!s->first_field)
7300 s->current_picture_ptr= NULL;
7303 for(;;){
7304 int consumed;
7305 int dst_length;
7306 int bit_length;
7307 const uint8_t *ptr;
7308 int i, nalsize = 0;
7309 int err;
7311 if(h->is_avc) {
7312 if(buf_index >= buf_size) break;
7313 nalsize = 0;
7314 for(i = 0; i < h->nal_length_size; i++)
7315 nalsize = (nalsize << 8) | buf[buf_index++];
7316 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7317 if(nalsize == 1){
7318 buf_index++;
7319 continue;
7320 }else{
7321 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7322 break;
7325 } else {
7326 // start code prefix search
7327 for(; buf_index + 3 < buf_size; buf_index++){
7328 // This should always succeed in the first iteration.
7329 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7330 break;
7333 if(buf_index+3 >= buf_size) break;
7335 buf_index+=3;
7338 hx = h->thread_context[context_count];
7340 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7341 if (ptr==NULL || dst_length < 0){
7342 return -1;
7344 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7345 dst_length--;
7346 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7348 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7349 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7352 if (h->is_avc && (nalsize != consumed)){
7353 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7354 consumed= nalsize;
7357 buf_index += consumed;
7359 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7360 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7361 continue;
7363 again:
7364 err = 0;
7365 switch(hx->nal_unit_type){
7366 case NAL_IDR_SLICE:
7367 if (h->nal_unit_type != NAL_IDR_SLICE) {
7368 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7369 return -1;
7371 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7372 case NAL_SLICE:
7373 init_get_bits(&hx->s.gb, ptr, bit_length);
7374 hx->intra_gb_ptr=
7375 hx->inter_gb_ptr= &hx->s.gb;
7376 hx->s.data_partitioning = 0;
7378 if((err = decode_slice_header(hx, h)))
7379 break;
7381 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7382 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7383 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7384 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7385 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7386 && avctx->skip_frame < AVDISCARD_ALL)
7387 context_count++;
7388 break;
7389 case NAL_DPA:
7390 init_get_bits(&hx->s.gb, ptr, bit_length);
7391 hx->intra_gb_ptr=
7392 hx->inter_gb_ptr= NULL;
7393 hx->s.data_partitioning = 1;
7395 err = decode_slice_header(hx, h);
7396 break;
7397 case NAL_DPB:
7398 init_get_bits(&hx->intra_gb, ptr, bit_length);
7399 hx->intra_gb_ptr= &hx->intra_gb;
7400 break;
7401 case NAL_DPC:
7402 init_get_bits(&hx->inter_gb, ptr, bit_length);
7403 hx->inter_gb_ptr= &hx->inter_gb;
7405 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7406 && s->context_initialized
7407 && s->hurry_up < 5
7408 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7409 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7410 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7411 && avctx->skip_frame < AVDISCARD_ALL)
7412 context_count++;
7413 break;
7414 case NAL_SEI:
7415 init_get_bits(&s->gb, ptr, bit_length);
7416 decode_sei(h);
7417 break;
7418 case NAL_SPS:
7419 init_get_bits(&s->gb, ptr, bit_length);
7420 decode_seq_parameter_set(h);
7422 if(s->flags& CODEC_FLAG_LOW_DELAY)
7423 s->low_delay=1;
7425 if(avctx->has_b_frames < 2)
7426 avctx->has_b_frames= !s->low_delay;
7427 break;
7428 case NAL_PPS:
7429 init_get_bits(&s->gb, ptr, bit_length);
7431 decode_picture_parameter_set(h, bit_length);
7433 break;
7434 case NAL_AUD:
7435 case NAL_END_SEQUENCE:
7436 case NAL_END_STREAM:
7437 case NAL_FILLER_DATA:
7438 case NAL_SPS_EXT:
7439 case NAL_AUXILIARY_SLICE:
7440 break;
7441 default:
7442 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7445 if(context_count == h->max_contexts) {
7446 execute_decode_slices(h, context_count);
7447 context_count = 0;
7450 if (err < 0)
7451 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7452 else if(err == 1) {
7453 /* Slice could not be decoded in parallel mode, copy down
7454 * NAL unit stuff to context 0 and restart. Note that
7455 * rbsp_buffer is not transferred, but since we no longer
7456 * run in parallel mode this should not be an issue. */
7457 h->nal_unit_type = hx->nal_unit_type;
7458 h->nal_ref_idc = hx->nal_ref_idc;
7459 hx = h;
7460 goto again;
7463 if(context_count)
7464 execute_decode_slices(h, context_count);
7465 return buf_index;
7469 * returns the number of bytes consumed for building the current frame
7471 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7472 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7473 if(pos+10>buf_size) pos=buf_size; // oops ;)
7475 return pos;
7478 static int decode_frame(AVCodecContext *avctx,
7479 void *data, int *data_size,
7480 const uint8_t *buf, int buf_size)
7482 H264Context *h = avctx->priv_data;
7483 MpegEncContext *s = &h->s;
7484 AVFrame *pict = data;
7485 int buf_index;
7487 s->flags= avctx->flags;
7488 s->flags2= avctx->flags2;
7490 /* end of stream, output what is still in the buffers */
7491 if (buf_size == 0) {
7492 Picture *out;
7493 int i, out_idx;
7495 //FIXME factorize this with the output code below
7496 out = h->delayed_pic[0];
7497 out_idx = 0;
7498 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7499 if(h->delayed_pic[i]->poc < out->poc){
7500 out = h->delayed_pic[i];
7501 out_idx = i;
7504 for(i=out_idx; h->delayed_pic[i]; i++)
7505 h->delayed_pic[i] = h->delayed_pic[i+1];
7507 if(out){
7508 *data_size = sizeof(AVFrame);
7509 *pict= *(AVFrame*)out;
7512 return 0;
7515 if(h->is_avc && !h->got_avcC) {
7516 int i, cnt, nalsize;
7517 unsigned char *p = avctx->extradata;
7518 if(avctx->extradata_size < 7) {
7519 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7520 return -1;
7522 if(*p != 1) {
7523 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7524 return -1;
7526 /* sps and pps in the avcC always have length coded with 2 bytes,
7527 so put a fake nal_length_size = 2 while parsing them */
7528 h->nal_length_size = 2;
7529 // Decode sps from avcC
7530 cnt = *(p+5) & 0x1f; // Number of sps
7531 p += 6;
7532 for (i = 0; i < cnt; i++) {
7533 nalsize = AV_RB16(p) + 2;
7534 if(decode_nal_units(h, p, nalsize) < 0) {
7535 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7536 return -1;
7538 p += nalsize;
7540 // Decode pps from avcC
7541 cnt = *(p++); // Number of pps
7542 for (i = 0; i < cnt; i++) {
7543 nalsize = AV_RB16(p) + 2;
7544 if(decode_nal_units(h, p, nalsize) != nalsize) {
7545 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7546 return -1;
7548 p += nalsize;
7550 // Now store right nal length size, that will be use to parse all other nals
7551 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7552 // Do not reparse avcC
7553 h->got_avcC = 1;
7556 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7557 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7558 return -1;
7559 h->got_avcC = 1;
7562 buf_index=decode_nal_units(h, buf, buf_size);
7563 if(buf_index < 0)
7564 return -1;
7566 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7567 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7568 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7569 return -1;
7572 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7573 Picture *out = s->current_picture_ptr;
7574 Picture *cur = s->current_picture_ptr;
7575 int i, pics, cross_idr, out_of_order, out_idx;
7577 s->mb_y= 0;
7579 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7580 s->current_picture_ptr->pict_type= s->pict_type;
7582 if(!s->dropable) {
7583 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7584 h->prev_poc_msb= h->poc_msb;
7585 h->prev_poc_lsb= h->poc_lsb;
7587 h->prev_frame_num_offset= h->frame_num_offset;
7588 h->prev_frame_num= h->frame_num;
7591 * FIXME: Error handling code does not seem to support interlaced
7592 * when slices span multiple rows
7593 * The ff_er_add_slice calls don't work right for bottom
7594 * fields; they cause massive erroneous error concealing
7595 * Error marking covers both fields (top and bottom).
7596 * This causes a mismatched s->error_count
7597 * and a bad error table. Further, the error count goes to
7598 * INT_MAX when called for bottom field, because mb_y is
7599 * past end by one (callers fault) and resync_mb_y != 0
7600 * causes problems for the first MB line, too.
7602 if (!FIELD_PICTURE)
7603 ff_er_frame_end(s);
7605 MPV_frame_end(s);
7607 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7608 /* Wait for second field. */
7609 *data_size = 0;
7611 } else {
7612 cur->repeat_pict = 0;
7614 /* Signal interlacing information externally. */
7615 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7616 if(h->sps.pic_struct_present_flag){
7617 switch (h->sei_pic_struct)
7619 case SEI_PIC_STRUCT_FRAME:
7620 cur->interlaced_frame = 0;
7621 break;
7622 case SEI_PIC_STRUCT_TOP_FIELD:
7623 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7624 case SEI_PIC_STRUCT_TOP_BOTTOM:
7625 case SEI_PIC_STRUCT_BOTTOM_TOP:
7626 cur->interlaced_frame = 1;
7627 break;
7628 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7629 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7630 // Signal the possibility of telecined film externally (pic_struct 5,6)
7631 // From these hints, let the applications decide if they apply deinterlacing.
7632 cur->repeat_pict = 1;
7633 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7634 break;
7635 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7636 // Force progressive here, as doubling interlaced frame is a bad idea.
7637 cur->interlaced_frame = 0;
7638 cur->repeat_pict = 2;
7639 break;
7640 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7641 cur->interlaced_frame = 0;
7642 cur->repeat_pict = 4;
7643 break;
7645 }else{
7646 /* Derive interlacing flag from used decoding process. */
7647 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7650 if (cur->field_poc[0] != cur->field_poc[1]){
7651 /* Derive top_field_first from field pocs. */
7652 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7653 }else{
7654 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7655 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7656 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7657 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7658 cur->top_field_first = 1;
7659 else
7660 cur->top_field_first = 0;
7661 }else{
7662 /* Most likely progressive */
7663 cur->top_field_first = 0;
7667 //FIXME do something with unavailable reference frames
7669 /* Sort B-frames into display order */
7671 if(h->sps.bitstream_restriction_flag
7672 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7673 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7674 s->low_delay = 0;
7677 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7678 && !h->sps.bitstream_restriction_flag){
7679 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7680 s->low_delay= 0;
7683 pics = 0;
7684 while(h->delayed_pic[pics]) pics++;
7686 assert(pics <= MAX_DELAYED_PIC_COUNT);
7688 h->delayed_pic[pics++] = cur;
7689 if(cur->reference == 0)
7690 cur->reference = DELAYED_PIC_REF;
7692 out = h->delayed_pic[0];
7693 out_idx = 0;
7694 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7695 if(h->delayed_pic[i]->poc < out->poc){
7696 out = h->delayed_pic[i];
7697 out_idx = i;
7699 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7701 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7703 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7705 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7706 || (s->low_delay &&
7707 ((!cross_idr && out->poc > h->outputed_poc + 2)
7708 || cur->pict_type == FF_B_TYPE)))
7710 s->low_delay = 0;
7711 s->avctx->has_b_frames++;
7714 if(out_of_order || pics > s->avctx->has_b_frames){
7715 out->reference &= ~DELAYED_PIC_REF;
7716 for(i=out_idx; h->delayed_pic[i]; i++)
7717 h->delayed_pic[i] = h->delayed_pic[i+1];
7719 if(!out_of_order && pics > s->avctx->has_b_frames){
7720 *data_size = sizeof(AVFrame);
7722 h->outputed_poc = out->poc;
7723 *pict= *(AVFrame*)out;
7724 }else{
7725 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7730 assert(pict->data[0] || !*data_size);
7731 ff_print_debug_info(s, pict);
7732 //printf("out %d\n", (int)pict->data[0]);
7733 #if 0 //?
7735 /* Return the Picture timestamp as the frame number */
7736 /* we subtract 1 because it is added on utils.c */
7737 avctx->frame_number = s->picture_number - 1;
7738 #endif
7739 return get_consumed_bytes(s, buf_index, buf_size);
7741 #if 0
7742 static inline void fill_mb_avail(H264Context *h){
7743 MpegEncContext * const s = &h->s;
7744 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7746 if(s->mb_y){
7747 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7748 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7749 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7750 }else{
7751 h->mb_avail[0]=
7752 h->mb_avail[1]=
7753 h->mb_avail[2]= 0;
7755 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7756 h->mb_avail[4]= 1; //FIXME move out
7757 h->mb_avail[5]= 0; //FIXME move out
7759 #endif
7761 #ifdef TEST
7762 #undef printf
7763 #undef random
7764 #define COUNT 8000
7765 #define SIZE (COUNT*40)
7766 int main(void){
7767 int i;
7768 uint8_t temp[SIZE];
7769 PutBitContext pb;
7770 GetBitContext gb;
7771 // int int_temp[10000];
7772 DSPContext dsp;
7773 AVCodecContext avctx;
7775 dsputil_init(&dsp, &avctx);
7777 init_put_bits(&pb, temp, SIZE);
7778 printf("testing unsigned exp golomb\n");
7779 for(i=0; i<COUNT; i++){
7780 START_TIMER
7781 set_ue_golomb(&pb, i);
7782 STOP_TIMER("set_ue_golomb");
7784 flush_put_bits(&pb);
7786 init_get_bits(&gb, temp, 8*SIZE);
7787 for(i=0; i<COUNT; i++){
7788 int j, s;
7790 s= show_bits(&gb, 24);
7792 START_TIMER
7793 j= get_ue_golomb(&gb);
7794 if(j != i){
7795 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7796 // return -1;
7798 STOP_TIMER("get_ue_golomb");
7802 init_put_bits(&pb, temp, SIZE);
7803 printf("testing signed exp golomb\n");
7804 for(i=0; i<COUNT; i++){
7805 START_TIMER
7806 set_se_golomb(&pb, i - COUNT/2);
7807 STOP_TIMER("set_se_golomb");
7809 flush_put_bits(&pb);
7811 init_get_bits(&gb, temp, 8*SIZE);
7812 for(i=0; i<COUNT; i++){
7813 int j, s;
7815 s= show_bits(&gb, 24);
7817 START_TIMER
7818 j= get_se_golomb(&gb);
7819 if(j != i - COUNT/2){
7820 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7821 // return -1;
7823 STOP_TIMER("get_se_golomb");
7826 #if 0
7827 printf("testing 4x4 (I)DCT\n");
7829 DCTELEM block[16];
7830 uint8_t src[16], ref[16];
7831 uint64_t error= 0, max_error=0;
7833 for(i=0; i<COUNT; i++){
7834 int j;
7835 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7836 for(j=0; j<16; j++){
7837 ref[j]= random()%255;
7838 src[j]= random()%255;
7841 h264_diff_dct_c(block, src, ref, 4);
7843 //normalize
7844 for(j=0; j<16; j++){
7845 // printf("%d ", block[j]);
7846 block[j]= block[j]*4;
7847 if(j&1) block[j]= (block[j]*4 + 2)/5;
7848 if(j&4) block[j]= (block[j]*4 + 2)/5;
7850 // printf("\n");
7852 s->dsp.h264_idct_add(ref, block, 4);
7853 /* for(j=0; j<16; j++){
7854 printf("%d ", ref[j]);
7856 printf("\n");*/
7858 for(j=0; j<16; j++){
7859 int diff= FFABS(src[j] - ref[j]);
7861 error+= diff*diff;
7862 max_error= FFMAX(max_error, diff);
7865 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7866 printf("testing quantizer\n");
7867 for(qp=0; qp<52; qp++){
7868 for(i=0; i<16; i++)
7869 src1_block[i]= src2_block[i]= random()%255;
7872 printf("Testing NAL layer\n");
7874 uint8_t bitstream[COUNT];
7875 uint8_t nal[COUNT*2];
7876 H264Context h;
7877 memset(&h, 0, sizeof(H264Context));
7879 for(i=0; i<COUNT; i++){
7880 int zeros= i;
7881 int nal_length;
7882 int consumed;
7883 int out_length;
7884 uint8_t *out;
7885 int j;
7887 for(j=0; j<COUNT; j++){
7888 bitstream[j]= (random() % 255) + 1;
7891 for(j=0; j<zeros; j++){
7892 int pos= random() % COUNT;
7893 while(bitstream[pos] == 0){
7894 pos++;
7895 pos %= COUNT;
7897 bitstream[pos]=0;
7900 START_TIMER
7902 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7903 if(nal_length<0){
7904 printf("encoding failed\n");
7905 return -1;
7908 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7910 STOP_TIMER("NAL")
7912 if(out_length != COUNT){
7913 printf("incorrect length %d %d\n", out_length, COUNT);
7914 return -1;
7917 if(consumed != nal_length){
7918 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7919 return -1;
7922 if(memcmp(bitstream, out, COUNT)){
7923 printf("mismatch\n");
7924 return -1;
7927 #endif
7929 printf("Testing RBSP\n");
7932 return 0;
7934 #endif /* TEST */
7937 static av_cold int decode_end(AVCodecContext *avctx)
7939 H264Context *h = avctx->priv_data;
7940 MpegEncContext *s = &h->s;
7941 int i;
7943 av_freep(&h->rbsp_buffer[0]);
7944 av_freep(&h->rbsp_buffer[1]);
7945 free_tables(h); //FIXME cleanup init stuff perhaps
7947 for(i = 0; i < MAX_SPS_COUNT; i++)
7948 av_freep(h->sps_buffers + i);
7950 for(i = 0; i < MAX_PPS_COUNT; i++)
7951 av_freep(h->pps_buffers + i);
7953 MPV_common_end(s);
7955 // memset(h, 0, sizeof(H264Context));
7957 return 0;
7961 AVCodec h264_decoder = {
7962 "h264",
7963 CODEC_TYPE_VIDEO,
7964 CODEC_ID_H264,
7965 sizeof(H264Context),
7966 decode_init,
7967 NULL,
7968 decode_end,
7969 decode_frame,
7970 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7971 .flush= flush_dpb,
7972 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
7975 #include "svq3.c"