Implement Realmedia/RTSP-compatible SETUP command. This includes calculation
[ffmpeg-lucabe.git] / libavcodec / h264.c
blob18b3ea49e74c1b9c85c870c615f00102b2166730
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264.h"
32 #include "h264data.h"
33 #include "h264_parser.h"
34 #include "golomb.h"
35 #include "rectangle.h"
37 #include "cabac.h"
38 #ifdef ARCH_X86
39 #include "i386/h264_i386.h"
40 #endif
42 //#undef NDEBUG
43 #include <assert.h>
45 /**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
71 static VLC run7_vlc;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
84 #else
85 return (a&0xFFFF) + (b<<16);
86 #endif
89 const uint8_t ff_rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 const uint8_t ff_div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
98 {0,1,2,3,7,10,8,11},
99 {2,2,3,3,8,11,8,11},
100 {0,0,1,1,7,10,7,10},
101 {0,2,0,2,7,10,7,10}
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 int * left_block;
110 int topleft_partition= -1;
111 int i;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
117 return;
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
126 if(FRAME_MBAFF){
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
138 if (bottom
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
144 if (bottom
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
154 if (bottom
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
163 if (bottom) {
164 left_block = left_block_options[1];
165 } else {
166 left_block= left_block_options[2];
168 } else {
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
178 if(for_deblock){
179 topleft_type = 0;
180 topright_type = 0;
181 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
186 int list;
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 ref += h->b8_stride;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
201 }else{
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
230 }else{
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
239 }else{
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 }else{
259 int pred;
260 if(!(top_type & type_mask))
261 pred= -1;
262 else{
263 pred= 2;
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
270 for(i=0; i<2; i++){
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 }else{
275 int pred;
276 if(!(left_type[i] & type_mask))
277 pred= -1;
278 else{
279 pred= 2;
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
291 0 . T T. T T T T
292 1 L . .L . . . .
293 2 L . .L . . . .
294 3 . T TL . . . .
295 4 L . .L . . . .
296 5 L . .. . . . .
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
299 if(top_type){
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
311 }else{
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
326 if(left_type[i]){
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
331 }else{
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 if( h->pps.cabac ) {
340 // top_cbp
341 if(top_type) {
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
344 h->top_cbp = 0x1C0;
345 } else {
346 h->top_cbp = 0;
348 // left_cbp
349 if (left_type[0]) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
352 h->left_cbp = 0x1C0;
353 } else {
354 h->left_cbp = 0;
356 if (left_type[0]) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
359 if (left_type[1]) {
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
364 #if 1
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
366 int list;
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
374 continue;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
389 }else{
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
397 for(i=0; i<2; i++){
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
406 }else{
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
415 continue;
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
422 }else{
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
432 }else{
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
438 continue;
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
451 if( h->pps.cabac ) {
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
459 }else{
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
469 }else{
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
477 }else{
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
496 }else{
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
504 else
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
511 else
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
516 if(FRAME_MBAFF){
517 #define MAP_MVS\
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
528 if(MB_FIELD){
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
535 MAP_MVS
536 #undef MAP_F2F
537 }else{
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
544 MAP_MVS
545 #undef MAP_F2F
550 #endif
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
574 int i;
576 if(!(h->top_samples_available&0x8000)){
577 for(i=0; i<4; i++){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
579 if(status<0){
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
581 return -1;
582 } else if(status){
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
590 for(i=0; i<4; i++){
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
593 if(status<0){
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
595 return -1;
596 } else if(status){
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
603 return 0;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
614 if(mode > 6U) {
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
616 return -1;
619 if(!(h->top_samples_available&0x8000)){
620 mode= top[ mode ];
621 if(mode<0){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
623 return -1;
627 if((h->left_samples_available&0x8080) != 0x8080){
628 mode= left[ mode ];
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
632 if(mode<0){
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
634 return -1;
638 return mode;
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
653 else return min;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
684 int i= left + top;
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
690 return i&31;
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
699 if(FRAME_MBAFF){
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 const int16_t *mv;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
705 if(!MB_FIELD
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 if(!MB_FIELD
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 if(MB_FIELD
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 && i >= scan8[0]+8){
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
736 #undef SET_DIAG_MV
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
741 return topright_ref;
742 }else{
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 const int16_t * C;
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
768 /* mv_cache
769 B . . A T T T T
770 U . . L . . , .
771 U . . L . . . .
772 U . . L . . , .
773 . . . L . . . .
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
783 if(left_ref==ref){
784 *mx= A[0];
785 *my= A[1];
786 }else if(top_ref==ref){
787 *mx= B[0];
788 *my= B[1];
789 }else{
790 *mx= C[0];
791 *my= C[1];
793 }else{
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
795 *mx= A[0];
796 *my= A[1];
797 }else{
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 if(n==0){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 if(top_ref == ref){
820 *mx= B[0];
821 *my= B[1];
822 return;
824 }else{
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
830 if(left_ref == ref){
831 *mx= A[0];
832 *my= A[1];
833 return;
837 //RARE
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 if(n==0){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
854 if(left_ref == ref){
855 *mx= A[0];
856 *my= A[1];
857 return;
859 }else{
860 const int16_t * C;
861 int diagonal_ref;
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
868 *mx= C[0];
869 *my= C[1];
870 return;
874 //RARE
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
888 *mx = *my = 0;
889 return;
892 pred_motion(h, 0, 4, 0, 0, mx, my);
894 return;
897 static inline void direct_dist_scale_factor(H264Context * const h){
898 MpegEncContext * const s = &h->s;
899 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
900 const int poc1 = h->ref_list[1][0].poc;
901 int i;
902 for(i=0; i<h->ref_count[0]; i++){
903 int poc0 = h->ref_list[0][i].poc;
904 int td = av_clip(poc1 - poc0, -128, 127);
905 if(td == 0 || h->ref_list[0][i].long_ref){
906 h->dist_scale_factor[i] = 256;
907 }else{
908 int tb = av_clip(poc - poc0, -128, 127);
909 int tx = (16384 + (FFABS(td) >> 1)) / td;
910 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
913 if(FRAME_MBAFF){
914 for(i=0; i<h->ref_count[0]; i++){
915 h->dist_scale_factor_field[2*i] =
916 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
920 static inline void direct_ref_list_init(H264Context * const h){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 Picture * const cur = s->current_picture_ptr;
924 int list, i, j;
925 int sidx= s->picture_structure&1;
926 int ref1sidx= ref1->reference&1;
927 for(list=0; list<2; list++){
928 cur->ref_count[sidx][list] = h->ref_count[list];
929 for(j=0; j<h->ref_count[list]; j++)
930 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
932 if(s->picture_structure == PICT_FRAME){
933 memcpy(cur->ref_count[0], cur->ref_count[1], sizeof(cur->ref_count[0]));
934 memcpy(cur->ref_poc [0], cur->ref_poc [1], sizeof(cur->ref_poc [0]));
936 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
937 return;
938 for(list=0; list<2; list++){
939 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
940 int poc = ref1->ref_poc[ref1sidx][list][i];
941 if(((poc&3) == 3) != (s->picture_structure == PICT_FRAME))
942 poc= (poc&~3) + s->picture_structure;
943 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
944 for(j=0; j<h->ref_count[list]; j++)
945 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
946 h->map_col_to_list0[list][i] = j;
947 break;
951 if(FRAME_MBAFF){
952 for(list=0; list<2; list++){
953 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
954 j = h->map_col_to_list0[list][i];
955 h->map_col_to_list0_field[list][2*i] = 2*j;
956 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
962 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
963 MpegEncContext * const s = &h->s;
964 int b8_stride = h->b8_stride;
965 int b4_stride = h->b_stride;
966 int mb_xy = h->mb_xy;
967 int mb_type_col[2];
968 const int16_t (*l1mv0)[2], (*l1mv1)[2];
969 const int8_t *l1ref0, *l1ref1;
970 const int is_b8x8 = IS_8X8(*mb_type);
971 unsigned int sub_mb_type;
972 int i8, i4;
974 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
976 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
977 if(h->ref_list[1][0].reference == PICT_FRAME){ // AFL/AFR/FR/FL -> AFL
978 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL
979 int cur_poc = s->current_picture_ptr->poc;
980 int *col_poc = h->ref_list[1]->field_poc;
981 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
982 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
983 b8_stride = 0;
985 }else if(!(s->picture_structure & h->ref_list[1][0].reference)){// FL -> FL & differ parity
986 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
987 mb_xy += s->mb_stride*fieldoff;
989 goto single_col;
990 }else{ // AFL/AFR/FR/FL -> AFR/FR
991 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
992 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
993 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
994 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
995 b8_stride *= 3;
996 b4_stride *= 6;
997 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
998 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
999 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1000 && !is_b8x8){
1001 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1002 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1003 }else{
1004 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1005 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1007 }else{ // AFR/FR -> AFR/FR
1008 single_col:
1009 mb_type_col[0] =
1010 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1011 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1012 /* FIXME save sub mb types from previous frames (or derive from MVs)
1013 * so we know exactly what block size to use */
1014 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1015 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1016 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1017 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1018 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1019 }else{
1020 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1021 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1026 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1027 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1028 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1029 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1030 if(!b8_stride){
1031 if(s->mb_y&1){
1032 l1ref0 += h->b8_stride;
1033 l1ref1 += h->b8_stride;
1034 l1mv0 += 2*b4_stride;
1035 l1mv1 += 2*b4_stride;
1039 if(h->direct_spatial_mv_pred){
1040 int ref[2];
1041 int mv[2][2];
1042 int list;
1044 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1046 /* ref = min(neighbors) */
1047 for(list=0; list<2; list++){
1048 int refa = h->ref_cache[list][scan8[0] - 1];
1049 int refb = h->ref_cache[list][scan8[0] - 8];
1050 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1051 if(refc == PART_NOT_AVAILABLE)
1052 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1053 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1054 if(ref[list] < 0)
1055 ref[list] = -1;
1058 if(ref[0] < 0 && ref[1] < 0){
1059 ref[0] = ref[1] = 0;
1060 mv[0][0] = mv[0][1] =
1061 mv[1][0] = mv[1][1] = 0;
1062 }else{
1063 for(list=0; list<2; list++){
1064 if(ref[list] >= 0)
1065 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1066 else
1067 mv[list][0] = mv[list][1] = 0;
1071 if(ref[1] < 0){
1072 if(!is_b8x8)
1073 *mb_type &= ~MB_TYPE_L1;
1074 sub_mb_type &= ~MB_TYPE_L1;
1075 }else if(ref[0] < 0){
1076 if(!is_b8x8)
1077 *mb_type &= ~MB_TYPE_L0;
1078 sub_mb_type &= ~MB_TYPE_L0;
1081 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1082 for(i8=0; i8<4; i8++){
1083 int x8 = i8&1;
1084 int y8 = i8>>1;
1085 int xy8 = x8+y8*b8_stride;
1086 int xy4 = 3*x8+y8*b4_stride;
1087 int a=0, b=0;
1089 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1090 continue;
1091 h->sub_mb_type[i8] = sub_mb_type;
1093 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1094 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1095 if(!IS_INTRA(mb_type_col[y8])
1096 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1097 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1098 if(ref[0] > 0)
1099 a= pack16to32(mv[0][0],mv[0][1]);
1100 if(ref[1] > 0)
1101 b= pack16to32(mv[1][0],mv[1][1]);
1102 }else{
1103 a= pack16to32(mv[0][0],mv[0][1]);
1104 b= pack16to32(mv[1][0],mv[1][1]);
1106 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1107 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1109 }else if(IS_16X16(*mb_type)){
1110 int a=0, b=0;
1112 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1113 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1114 if(!IS_INTRA(mb_type_col[0])
1115 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1116 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1117 && (h->x264_build>33 || !h->x264_build)))){
1118 if(ref[0] > 0)
1119 a= pack16to32(mv[0][0],mv[0][1]);
1120 if(ref[1] > 0)
1121 b= pack16to32(mv[1][0],mv[1][1]);
1122 }else{
1123 a= pack16to32(mv[0][0],mv[0][1]);
1124 b= pack16to32(mv[1][0],mv[1][1]);
1126 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1127 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1128 }else{
1129 for(i8=0; i8<4; i8++){
1130 const int x8 = i8&1;
1131 const int y8 = i8>>1;
1133 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1134 continue;
1135 h->sub_mb_type[i8] = sub_mb_type;
1137 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1138 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1139 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1140 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1142 /* col_zero_flag */
1143 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1144 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1145 && (h->x264_build>33 || !h->x264_build)))){
1146 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1147 if(IS_SUB_8X8(sub_mb_type)){
1148 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1149 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1150 if(ref[0] == 0)
1151 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1152 if(ref[1] == 0)
1153 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1155 }else
1156 for(i4=0; i4<4; i4++){
1157 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1158 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1159 if(ref[0] == 0)
1160 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1161 if(ref[1] == 0)
1162 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1168 }else{ /* direct temporal mv pred */
1169 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1170 const int *dist_scale_factor = h->dist_scale_factor;
1172 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1173 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1174 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1175 dist_scale_factor = h->dist_scale_factor_field;
1177 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1178 /* FIXME assumes direct_8x8_inference == 1 */
1179 int y_shift = 2*!IS_INTERLACED(*mb_type);
1180 int ref_shift= FRAME_MBAFF ? y_shift : 1;
1182 for(i8=0; i8<4; i8++){
1183 const int x8 = i8&1;
1184 const int y8 = i8>>1;
1185 int ref0, scale;
1186 const int16_t (*l1mv)[2]= l1mv0;
1188 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1189 continue;
1190 h->sub_mb_type[i8] = sub_mb_type;
1192 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1193 if(IS_INTRA(mb_type_col[y8])){
1194 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1195 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1196 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1197 continue;
1200 ref0 = l1ref0[x8 + y8*b8_stride];
1201 if(ref0 >= 0)
1202 ref0 = map_col_to_list0[0][ref0*2>>ref_shift];
1203 else{
1204 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]*2>>ref_shift];
1205 l1mv= l1mv1;
1207 scale = dist_scale_factor[ref0];
1208 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1211 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1212 int my_col = (mv_col[1]<<y_shift)/2;
1213 int mx = (scale * mv_col[0] + 128) >> 8;
1214 int my = (scale * my_col + 128) >> 8;
1215 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1216 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1219 return;
1222 /* one-to-one mv scaling */
1224 if(IS_16X16(*mb_type)){
1225 int ref, mv0, mv1;
1227 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1228 if(IS_INTRA(mb_type_col[0])){
1229 ref=mv0=mv1=0;
1230 }else{
1231 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1232 : map_col_to_list0[1][l1ref1[0]];
1233 const int scale = dist_scale_factor[ref0];
1234 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1235 int mv_l0[2];
1236 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1237 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1238 ref= ref0;
1239 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1240 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1242 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1243 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1244 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1245 }else{
1246 for(i8=0; i8<4; i8++){
1247 const int x8 = i8&1;
1248 const int y8 = i8>>1;
1249 int ref0, scale;
1250 const int16_t (*l1mv)[2]= l1mv0;
1252 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1253 continue;
1254 h->sub_mb_type[i8] = sub_mb_type;
1255 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1256 if(IS_INTRA(mb_type_col[0])){
1257 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1258 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1259 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1260 continue;
1263 ref0 = l1ref0[x8 + y8*b8_stride];
1264 if(ref0 >= 0)
1265 ref0 = map_col_to_list0[0][ref0];
1266 else{
1267 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]];
1268 l1mv= l1mv1;
1270 scale = dist_scale_factor[ref0];
1272 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1273 if(IS_SUB_8X8(sub_mb_type)){
1274 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1275 int mx = (scale * mv_col[0] + 128) >> 8;
1276 int my = (scale * mv_col[1] + 128) >> 8;
1277 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1278 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1279 }else
1280 for(i4=0; i4<4; i4++){
1281 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1282 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1283 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1284 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1285 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1286 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1293 static inline void write_back_motion(H264Context *h, int mb_type){
1294 MpegEncContext * const s = &h->s;
1295 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1296 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1297 int list;
1299 if(!USES_LIST(mb_type, 0))
1300 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1302 for(list=0; list<h->list_count; list++){
1303 int y;
1304 if(!USES_LIST(mb_type, list))
1305 continue;
1307 for(y=0; y<4; y++){
1308 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1309 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1311 if( h->pps.cabac ) {
1312 if(IS_SKIP(mb_type))
1313 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1314 else
1315 for(y=0; y<4; y++){
1316 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1317 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1322 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1323 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1324 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1325 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1326 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1330 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1331 if(IS_8X8(mb_type)){
1332 uint8_t *direct_table = &h->direct_table[b8_xy];
1333 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1334 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1335 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1341 * Decodes a network abstraction layer unit.
1342 * @param consumed is the number of bytes used as input
1343 * @param length is the length of the array
1344 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1345 * @returns decoded bytes, might be src+1 if no escapes
1347 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1348 int i, si, di;
1349 uint8_t *dst;
1350 int bufidx;
1352 // src[0]&0x80; //forbidden bit
1353 h->nal_ref_idc= src[0]>>5;
1354 h->nal_unit_type= src[0]&0x1F;
1356 src++; length--;
1357 #if 0
1358 for(i=0; i<length; i++)
1359 printf("%2X ", src[i]);
1360 #endif
1361 for(i=0; i+1<length; i+=2){
1362 if(src[i]) continue;
1363 if(i>0 && src[i-1]==0) i--;
1364 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1365 if(src[i+2]!=3){
1366 /* startcode, so we must be past the end */
1367 length=i;
1369 break;
1373 if(i>=length-1){ //no escaped 0
1374 *dst_length= length;
1375 *consumed= length+1; //+1 for the header
1376 return src;
1379 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1380 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1381 dst= h->rbsp_buffer[bufidx];
1383 if (dst == NULL){
1384 return NULL;
1387 //printf("decoding esc\n");
1388 si=di=0;
1389 while(si<length){
1390 //remove escapes (very rare 1:2^22)
1391 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1392 if(src[si+2]==3){ //escape
1393 dst[di++]= 0;
1394 dst[di++]= 0;
1395 si+=3;
1396 continue;
1397 }else //next start code
1398 break;
1401 dst[di++]= src[si++];
1404 *dst_length= di;
1405 *consumed= si + 1;//+1 for the header
1406 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1407 return dst;
1411 * identifies the exact end of the bitstream
1412 * @return the length of the trailing, or 0 if damaged
1414 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1415 int v= *src;
1416 int r;
1418 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1420 for(r=1; r<9; r++){
1421 if(v&1) return r;
1422 v>>=1;
1424 return 0;
1428 * IDCT transforms the 16 dc values and dequantizes them.
1429 * @param qp quantization parameter
1431 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1432 #define stride 16
1433 int i;
1434 int temp[16]; //FIXME check if this is a good idea
1435 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1436 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1438 //memset(block, 64, 2*256);
1439 //return;
1440 for(i=0; i<4; i++){
1441 const int offset= y_offset[i];
1442 const int z0= block[offset+stride*0] + block[offset+stride*4];
1443 const int z1= block[offset+stride*0] - block[offset+stride*4];
1444 const int z2= block[offset+stride*1] - block[offset+stride*5];
1445 const int z3= block[offset+stride*1] + block[offset+stride*5];
1447 temp[4*i+0]= z0+z3;
1448 temp[4*i+1]= z1+z2;
1449 temp[4*i+2]= z1-z2;
1450 temp[4*i+3]= z0-z3;
1453 for(i=0; i<4; i++){
1454 const int offset= x_offset[i];
1455 const int z0= temp[4*0+i] + temp[4*2+i];
1456 const int z1= temp[4*0+i] - temp[4*2+i];
1457 const int z2= temp[4*1+i] - temp[4*3+i];
1458 const int z3= temp[4*1+i] + temp[4*3+i];
1460 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1461 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1462 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1463 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1467 #if 0
1469 * DCT transforms the 16 dc values.
1470 * @param qp quantization parameter ??? FIXME
1472 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1473 // const int qmul= dequant_coeff[qp][0];
1474 int i;
1475 int temp[16]; //FIXME check if this is a good idea
1476 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1477 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1479 for(i=0; i<4; i++){
1480 const int offset= y_offset[i];
1481 const int z0= block[offset+stride*0] + block[offset+stride*4];
1482 const int z1= block[offset+stride*0] - block[offset+stride*4];
1483 const int z2= block[offset+stride*1] - block[offset+stride*5];
1484 const int z3= block[offset+stride*1] + block[offset+stride*5];
1486 temp[4*i+0]= z0+z3;
1487 temp[4*i+1]= z1+z2;
1488 temp[4*i+2]= z1-z2;
1489 temp[4*i+3]= z0-z3;
1492 for(i=0; i<4; i++){
1493 const int offset= x_offset[i];
1494 const int z0= temp[4*0+i] + temp[4*2+i];
1495 const int z1= temp[4*0+i] - temp[4*2+i];
1496 const int z2= temp[4*1+i] - temp[4*3+i];
1497 const int z3= temp[4*1+i] + temp[4*3+i];
1499 block[stride*0 +offset]= (z0 + z3)>>1;
1500 block[stride*2 +offset]= (z1 + z2)>>1;
1501 block[stride*8 +offset]= (z1 - z2)>>1;
1502 block[stride*10+offset]= (z0 - z3)>>1;
1505 #endif
1507 #undef xStride
1508 #undef stride
1510 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1511 const int stride= 16*2;
1512 const int xStride= 16;
1513 int a,b,c,d,e;
1515 a= block[stride*0 + xStride*0];
1516 b= block[stride*0 + xStride*1];
1517 c= block[stride*1 + xStride*0];
1518 d= block[stride*1 + xStride*1];
1520 e= a-b;
1521 a= a+b;
1522 b= c-d;
1523 c= c+d;
1525 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1526 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1527 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1528 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1531 #if 0
1532 static void chroma_dc_dct_c(DCTELEM *block){
1533 const int stride= 16*2;
1534 const int xStride= 16;
1535 int a,b,c,d,e;
1537 a= block[stride*0 + xStride*0];
1538 b= block[stride*0 + xStride*1];
1539 c= block[stride*1 + xStride*0];
1540 d= block[stride*1 + xStride*1];
1542 e= a-b;
1543 a= a+b;
1544 b= c-d;
1545 c= c+d;
1547 block[stride*0 + xStride*0]= (a+c);
1548 block[stride*0 + xStride*1]= (e+b);
1549 block[stride*1 + xStride*0]= (a-c);
1550 block[stride*1 + xStride*1]= (e-b);
1552 #endif
1555 * gets the chroma qp.
1557 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1558 return h->pps.chroma_qp_table[t][qscale];
1561 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1562 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1563 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1564 int i;
1565 const int * const quant_table= quant_coeff[qscale];
1566 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1567 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1568 const unsigned int threshold2= (threshold1<<1);
1569 int last_non_zero;
1571 if(separate_dc){
1572 if(qscale<=18){
1573 //avoid overflows
1574 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1575 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1576 const unsigned int dc_threshold2= (dc_threshold1<<1);
1578 int level= block[0]*quant_coeff[qscale+18][0];
1579 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1580 if(level>0){
1581 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1582 block[0]= level;
1583 }else{
1584 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1585 block[0]= -level;
1587 // last_non_zero = i;
1588 }else{
1589 block[0]=0;
1591 }else{
1592 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1593 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1594 const unsigned int dc_threshold2= (dc_threshold1<<1);
1596 int level= block[0]*quant_table[0];
1597 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1598 if(level>0){
1599 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1600 block[0]= level;
1601 }else{
1602 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1603 block[0]= -level;
1605 // last_non_zero = i;
1606 }else{
1607 block[0]=0;
1610 last_non_zero= 0;
1611 i=1;
1612 }else{
1613 last_non_zero= -1;
1614 i=0;
1617 for(; i<16; i++){
1618 const int j= scantable[i];
1619 int level= block[j]*quant_table[j];
1621 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1622 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1623 if(((unsigned)(level+threshold1))>threshold2){
1624 if(level>0){
1625 level= (bias + level)>>QUANT_SHIFT;
1626 block[j]= level;
1627 }else{
1628 level= (bias - level)>>QUANT_SHIFT;
1629 block[j]= -level;
1631 last_non_zero = i;
1632 }else{
1633 block[j]=0;
1637 return last_non_zero;
1640 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1641 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1642 int src_x_offset, int src_y_offset,
1643 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1644 MpegEncContext * const s = &h->s;
1645 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1646 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1647 const int luma_xy= (mx&3) + ((my&3)<<2);
1648 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1649 uint8_t * src_cb, * src_cr;
1650 int extra_width= h->emu_edge_width;
1651 int extra_height= h->emu_edge_height;
1652 int emu=0;
1653 const int full_mx= mx>>2;
1654 const int full_my= my>>2;
1655 const int pic_width = 16*s->mb_width;
1656 const int pic_height = 16*s->mb_height >> MB_FIELD;
1658 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1659 return;
1661 if(mx&7) extra_width -= 3;
1662 if(my&7) extra_height -= 3;
1664 if( full_mx < 0-extra_width
1665 || full_my < 0-extra_height
1666 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1667 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1668 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1669 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1670 emu=1;
1673 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1674 if(!square){
1675 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1678 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1680 if(MB_FIELD){
1681 // chroma offset when predicting from a field of opposite parity
1682 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1683 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1685 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1686 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1688 if(emu){
1689 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1690 src_cb= s->edge_emu_buffer;
1692 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1694 if(emu){
1695 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1696 src_cr= s->edge_emu_buffer;
1698 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1701 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1702 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1703 int x_offset, int y_offset,
1704 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1705 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1706 int list0, int list1){
1707 MpegEncContext * const s = &h->s;
1708 qpel_mc_func *qpix_op= qpix_put;
1709 h264_chroma_mc_func chroma_op= chroma_put;
1711 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1712 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1713 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1714 x_offset += 8*s->mb_x;
1715 y_offset += 8*(s->mb_y >> MB_FIELD);
1717 if(list0){
1718 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1719 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1720 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1721 qpix_op, chroma_op);
1723 qpix_op= qpix_avg;
1724 chroma_op= chroma_avg;
1727 if(list1){
1728 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1729 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1730 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1731 qpix_op, chroma_op);
1735 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1736 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1737 int x_offset, int y_offset,
1738 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1739 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1740 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1741 int list0, int list1){
1742 MpegEncContext * const s = &h->s;
1744 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1745 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1746 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1747 x_offset += 8*s->mb_x;
1748 y_offset += 8*(s->mb_y >> MB_FIELD);
1750 if(list0 && list1){
1751 /* don't optimize for luma-only case, since B-frames usually
1752 * use implicit weights => chroma too. */
1753 uint8_t *tmp_cb = s->obmc_scratchpad;
1754 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1755 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1756 int refn0 = h->ref_cache[0][ scan8[n] ];
1757 int refn1 = h->ref_cache[1][ scan8[n] ];
1759 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1760 dest_y, dest_cb, dest_cr,
1761 x_offset, y_offset, qpix_put, chroma_put);
1762 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1763 tmp_y, tmp_cb, tmp_cr,
1764 x_offset, y_offset, qpix_put, chroma_put);
1766 if(h->use_weight == 2){
1767 int weight0 = h->implicit_weight[refn0][refn1];
1768 int weight1 = 64 - weight0;
1769 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1770 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1771 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1772 }else{
1773 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1774 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1775 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1776 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1777 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1778 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1779 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1780 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1781 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1783 }else{
1784 int list = list1 ? 1 : 0;
1785 int refn = h->ref_cache[list][ scan8[n] ];
1786 Picture *ref= &h->ref_list[list][refn];
1787 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1788 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1789 qpix_put, chroma_put);
1791 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1792 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1793 if(h->use_weight_chroma){
1794 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1795 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1796 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1797 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1802 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1803 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1804 int x_offset, int y_offset,
1805 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1806 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1807 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1808 int list0, int list1){
1809 if((h->use_weight==2 && list0 && list1
1810 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1811 || h->use_weight==1)
1812 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1813 x_offset, y_offset, qpix_put, chroma_put,
1814 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1815 else
1816 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1817 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1820 static inline void prefetch_motion(H264Context *h, int list){
1821 /* fetch pixels for estimated mv 4 macroblocks ahead
1822 * optimized for 64byte cache lines */
1823 MpegEncContext * const s = &h->s;
1824 const int refn = h->ref_cache[list][scan8[0]];
1825 if(refn >= 0){
1826 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1827 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1828 uint8_t **src= h->ref_list[list][refn].data;
1829 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1830 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1831 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1832 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1836 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1837 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1838 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1839 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1840 MpegEncContext * const s = &h->s;
1841 const int mb_xy= h->mb_xy;
1842 const int mb_type= s->current_picture.mb_type[mb_xy];
1844 assert(IS_INTER(mb_type));
1846 prefetch_motion(h, 0);
1848 if(IS_16X16(mb_type)){
1849 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1850 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1851 &weight_op[0], &weight_avg[0],
1852 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1853 }else if(IS_16X8(mb_type)){
1854 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1855 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1856 &weight_op[1], &weight_avg[1],
1857 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1858 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1859 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1860 &weight_op[1], &weight_avg[1],
1861 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1862 }else if(IS_8X16(mb_type)){
1863 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1864 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1865 &weight_op[2], &weight_avg[2],
1866 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1867 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1868 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1869 &weight_op[2], &weight_avg[2],
1870 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1871 }else{
1872 int i;
1874 assert(IS_8X8(mb_type));
1876 for(i=0; i<4; i++){
1877 const int sub_mb_type= h->sub_mb_type[i];
1878 const int n= 4*i;
1879 int x_offset= (i&1)<<2;
1880 int y_offset= (i&2)<<1;
1882 if(IS_SUB_8X8(sub_mb_type)){
1883 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1884 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1885 &weight_op[3], &weight_avg[3],
1886 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1887 }else if(IS_SUB_8X4(sub_mb_type)){
1888 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1889 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1890 &weight_op[4], &weight_avg[4],
1891 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1892 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1893 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1894 &weight_op[4], &weight_avg[4],
1895 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1896 }else if(IS_SUB_4X8(sub_mb_type)){
1897 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1898 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1899 &weight_op[5], &weight_avg[5],
1900 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1901 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1902 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1903 &weight_op[5], &weight_avg[5],
1904 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1905 }else{
1906 int j;
1907 assert(IS_SUB_4X4(sub_mb_type));
1908 for(j=0; j<4; j++){
1909 int sub_x_offset= x_offset + 2*(j&1);
1910 int sub_y_offset= y_offset + (j&2);
1911 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1912 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1913 &weight_op[6], &weight_avg[6],
1914 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1920 prefetch_motion(h, 1);
1923 static av_cold void decode_init_vlc(void){
1924 static int done = 0;
1926 if (!done) {
1927 int i;
1928 int offset;
1929 done = 1;
1931 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1932 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1933 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1934 &chroma_dc_coeff_token_len [0], 1, 1,
1935 &chroma_dc_coeff_token_bits[0], 1, 1,
1936 INIT_VLC_USE_NEW_STATIC);
1938 offset = 0;
1939 for(i=0; i<4; i++){
1940 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1941 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1942 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1943 &coeff_token_len [i][0], 1, 1,
1944 &coeff_token_bits[i][0], 1, 1,
1945 INIT_VLC_USE_NEW_STATIC);
1946 offset += coeff_token_vlc_tables_size[i];
1949 * This is a one time safety check to make sure that
1950 * the packed static coeff_token_vlc table sizes
1951 * were initialized correctly.
1953 assert(offset == sizeof(coeff_token_vlc_tables)/(sizeof(VLC_TYPE)*2));
1955 for(i=0; i<3; i++){
1956 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1957 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1958 init_vlc(&chroma_dc_total_zeros_vlc[i],
1959 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1960 &chroma_dc_total_zeros_len [i][0], 1, 1,
1961 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1962 INIT_VLC_USE_NEW_STATIC);
1964 for(i=0; i<15; i++){
1965 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1966 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1967 init_vlc(&total_zeros_vlc[i],
1968 TOTAL_ZEROS_VLC_BITS, 16,
1969 &total_zeros_len [i][0], 1, 1,
1970 &total_zeros_bits[i][0], 1, 1,
1971 INIT_VLC_USE_NEW_STATIC);
1974 for(i=0; i<6; i++){
1975 run_vlc[i].table = run_vlc_tables[i];
1976 run_vlc[i].table_allocated = run_vlc_tables_size;
1977 init_vlc(&run_vlc[i],
1978 RUN_VLC_BITS, 7,
1979 &run_len [i][0], 1, 1,
1980 &run_bits[i][0], 1, 1,
1981 INIT_VLC_USE_NEW_STATIC);
1983 run7_vlc.table = run7_vlc_table,
1984 run7_vlc.table_allocated = run7_vlc_table_size;
1985 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1986 &run_len [6][0], 1, 1,
1987 &run_bits[6][0], 1, 1,
1988 INIT_VLC_USE_NEW_STATIC);
1992 static void free_tables(H264Context *h){
1993 int i;
1994 H264Context *hx;
1995 av_freep(&h->intra4x4_pred_mode);
1996 av_freep(&h->chroma_pred_mode_table);
1997 av_freep(&h->cbp_table);
1998 av_freep(&h->mvd_table[0]);
1999 av_freep(&h->mvd_table[1]);
2000 av_freep(&h->direct_table);
2001 av_freep(&h->non_zero_count);
2002 av_freep(&h->slice_table_base);
2003 h->slice_table= NULL;
2005 av_freep(&h->mb2b_xy);
2006 av_freep(&h->mb2b8_xy);
2008 for(i = 0; i < MAX_SPS_COUNT; i++)
2009 av_freep(h->sps_buffers + i);
2011 for(i = 0; i < MAX_PPS_COUNT; i++)
2012 av_freep(h->pps_buffers + i);
2014 for(i = 0; i < h->s.avctx->thread_count; i++) {
2015 hx = h->thread_context[i];
2016 if(!hx) continue;
2017 av_freep(&hx->top_borders[1]);
2018 av_freep(&hx->top_borders[0]);
2019 av_freep(&hx->s.obmc_scratchpad);
2023 static void init_dequant8_coeff_table(H264Context *h){
2024 int i,q,x;
2025 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2026 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2027 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2029 for(i=0; i<2; i++ ){
2030 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2031 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2032 break;
2035 for(q=0; q<52; q++){
2036 int shift = ff_div6[q];
2037 int idx = ff_rem6[q];
2038 for(x=0; x<64; x++)
2039 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2040 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2041 h->pps.scaling_matrix8[i][x]) << shift;
2046 static void init_dequant4_coeff_table(H264Context *h){
2047 int i,j,q,x;
2048 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2049 for(i=0; i<6; i++ ){
2050 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2051 for(j=0; j<i; j++){
2052 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2053 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2054 break;
2057 if(j<i)
2058 continue;
2060 for(q=0; q<52; q++){
2061 int shift = ff_div6[q] + 2;
2062 int idx = ff_rem6[q];
2063 for(x=0; x<16; x++)
2064 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2065 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2066 h->pps.scaling_matrix4[i][x]) << shift;
2071 static void init_dequant_tables(H264Context *h){
2072 int i,x;
2073 init_dequant4_coeff_table(h);
2074 if(h->pps.transform_8x8_mode)
2075 init_dequant8_coeff_table(h);
2076 if(h->sps.transform_bypass){
2077 for(i=0; i<6; i++)
2078 for(x=0; x<16; x++)
2079 h->dequant4_coeff[i][0][x] = 1<<6;
2080 if(h->pps.transform_8x8_mode)
2081 for(i=0; i<2; i++)
2082 for(x=0; x<64; x++)
2083 h->dequant8_coeff[i][0][x] = 1<<6;
2089 * allocates tables.
2090 * needs width/height
2092 static int alloc_tables(H264Context *h){
2093 MpegEncContext * const s = &h->s;
2094 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2095 int x,y;
2097 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2099 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2100 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2101 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2103 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2105 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2106 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2108 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2109 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2111 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2112 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2113 for(y=0; y<s->mb_height; y++){
2114 for(x=0; x<s->mb_width; x++){
2115 const int mb_xy= x + y*s->mb_stride;
2116 const int b_xy = 4*x + 4*y*h->b_stride;
2117 const int b8_xy= 2*x + 2*y*h->b8_stride;
2119 h->mb2b_xy [mb_xy]= b_xy;
2120 h->mb2b8_xy[mb_xy]= b8_xy;
2124 s->obmc_scratchpad = NULL;
2126 if(!h->dequant4_coeff[0])
2127 init_dequant_tables(h);
2129 return 0;
2130 fail:
2131 free_tables(h);
2132 return -1;
2136 * Mimic alloc_tables(), but for every context thread.
2138 static void clone_tables(H264Context *dst, H264Context *src){
2139 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2140 dst->non_zero_count = src->non_zero_count;
2141 dst->slice_table = src->slice_table;
2142 dst->cbp_table = src->cbp_table;
2143 dst->mb2b_xy = src->mb2b_xy;
2144 dst->mb2b8_xy = src->mb2b8_xy;
2145 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2146 dst->mvd_table[0] = src->mvd_table[0];
2147 dst->mvd_table[1] = src->mvd_table[1];
2148 dst->direct_table = src->direct_table;
2150 dst->s.obmc_scratchpad = NULL;
2151 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2155 * Init context
2156 * Allocate buffers which are not shared amongst multiple threads.
2158 static int context_init(H264Context *h){
2159 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2160 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2162 return 0;
2163 fail:
2164 return -1; // free_tables will clean up for us
2167 static av_cold void common_init(H264Context *h){
2168 MpegEncContext * const s = &h->s;
2170 s->width = s->avctx->width;
2171 s->height = s->avctx->height;
2172 s->codec_id= s->avctx->codec->id;
2174 ff_h264_pred_init(&h->hpc, s->codec_id);
2176 h->dequant_coeff_pps= -1;
2177 s->unrestricted_mv=1;
2178 s->decode=1; //FIXME
2180 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2181 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2184 static av_cold int decode_init(AVCodecContext *avctx){
2185 H264Context *h= avctx->priv_data;
2186 MpegEncContext * const s = &h->s;
2188 MPV_decode_defaults(s);
2190 s->avctx = avctx;
2191 common_init(h);
2193 s->out_format = FMT_H264;
2194 s->workaround_bugs= avctx->workaround_bugs;
2196 // set defaults
2197 // s->decode_mb= ff_h263_decode_mb;
2198 s->quarter_sample = 1;
2199 s->low_delay= 1;
2201 if(avctx->codec_id == CODEC_ID_SVQ3)
2202 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2203 else
2204 avctx->pix_fmt= PIX_FMT_YUV420P;
2206 decode_init_vlc();
2208 if(avctx->extradata_size > 0 && avctx->extradata &&
2209 *(char *)avctx->extradata == 1){
2210 h->is_avc = 1;
2211 h->got_avcC = 0;
2212 } else {
2213 h->is_avc = 0;
2216 h->thread_context[0] = h;
2217 h->outputed_poc = INT_MIN;
2218 return 0;
2221 static int frame_start(H264Context *h){
2222 MpegEncContext * const s = &h->s;
2223 int i;
2225 if(MPV_frame_start(s, s->avctx) < 0)
2226 return -1;
2227 ff_er_frame_start(s);
2229 * MPV_frame_start uses pict_type to derive key_frame.
2230 * This is incorrect for H.264; IDR markings must be used.
2231 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2232 * See decode_nal_units().
2234 s->current_picture_ptr->key_frame= 0;
2236 assert(s->linesize && s->uvlinesize);
2238 for(i=0; i<16; i++){
2239 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2240 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2242 for(i=0; i<4; i++){
2243 h->block_offset[16+i]=
2244 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2245 h->block_offset[24+16+i]=
2246 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2249 /* can't be in alloc_tables because linesize isn't known there.
2250 * FIXME: redo bipred weight to not require extra buffer? */
2251 for(i = 0; i < s->avctx->thread_count; i++)
2252 if(!h->thread_context[i]->s.obmc_scratchpad)
2253 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2255 /* some macroblocks will be accessed before they're available */
2256 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2257 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2259 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2261 // We mark the current picture as non-reference after allocating it, so
2262 // that if we break out due to an error it can be released automatically
2263 // in the next MPV_frame_start().
2264 // SVQ3 as well as most other codecs have only last/next/current and thus
2265 // get released even with set reference, besides SVQ3 and others do not
2266 // mark frames as reference later "naturally".
2267 if(s->codec_id != CODEC_ID_SVQ3)
2268 s->current_picture_ptr->reference= 0;
2270 s->current_picture_ptr->field_poc[0]=
2271 s->current_picture_ptr->field_poc[1]= INT_MAX;
2272 assert(s->current_picture_ptr->long_ref==0);
2274 return 0;
2277 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2278 MpegEncContext * const s = &h->s;
2279 int i;
2280 int step = 1;
2281 int offset = 1;
2282 int uvoffset= 1;
2283 int top_idx = 1;
2284 int skiplast= 0;
2286 src_y -= linesize;
2287 src_cb -= uvlinesize;
2288 src_cr -= uvlinesize;
2290 if(!simple && FRAME_MBAFF){
2291 if(s->mb_y&1){
2292 offset = MB_MBAFF ? 1 : 17;
2293 uvoffset= MB_MBAFF ? 1 : 9;
2294 if(!MB_MBAFF){
2295 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2296 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2297 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2298 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2299 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2302 }else{
2303 if(!MB_MBAFF){
2304 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2305 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2306 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2307 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2309 skiplast= 1;
2311 offset =
2312 uvoffset=
2313 top_idx = MB_MBAFF ? 0 : 1;
2315 step= MB_MBAFF ? 2 : 1;
2318 // There are two lines saved, the line above the the top macroblock of a pair,
2319 // and the line above the bottom macroblock
2320 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2321 for(i=1; i<17 - skiplast; i++){
2322 h->left_border[offset+i*step]= src_y[15+i* linesize];
2325 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2326 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2328 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2329 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2330 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2331 for(i=1; i<9 - skiplast; i++){
2332 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2333 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2335 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2336 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2340 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2341 MpegEncContext * const s = &h->s;
2342 int temp8, i;
2343 uint64_t temp64;
2344 int deblock_left;
2345 int deblock_top;
2346 int mb_xy;
2347 int step = 1;
2348 int offset = 1;
2349 int uvoffset= 1;
2350 int top_idx = 1;
2352 if(!simple && FRAME_MBAFF){
2353 if(s->mb_y&1){
2354 offset = MB_MBAFF ? 1 : 17;
2355 uvoffset= MB_MBAFF ? 1 : 9;
2356 }else{
2357 offset =
2358 uvoffset=
2359 top_idx = MB_MBAFF ? 0 : 1;
2361 step= MB_MBAFF ? 2 : 1;
2364 if(h->deblocking_filter == 2) {
2365 mb_xy = h->mb_xy;
2366 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2367 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2368 } else {
2369 deblock_left = (s->mb_x > 0);
2370 deblock_top = (s->mb_y > 0);
2373 src_y -= linesize + 1;
2374 src_cb -= uvlinesize + 1;
2375 src_cr -= uvlinesize + 1;
2377 #define XCHG(a,b,t,xchg)\
2378 t= a;\
2379 if(xchg)\
2380 a= b;\
2381 b= t;
2383 if(deblock_left){
2384 for(i = !deblock_top; i<16; i++){
2385 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2387 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2390 if(deblock_top){
2391 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2392 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2393 if(s->mb_x+1 < s->mb_width){
2394 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2398 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2399 if(deblock_left){
2400 for(i = !deblock_top; i<8; i++){
2401 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2402 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2404 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2405 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2407 if(deblock_top){
2408 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2409 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2414 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2415 MpegEncContext * const s = &h->s;
2416 const int mb_x= s->mb_x;
2417 const int mb_y= s->mb_y;
2418 const int mb_xy= h->mb_xy;
2419 const int mb_type= s->current_picture.mb_type[mb_xy];
2420 uint8_t *dest_y, *dest_cb, *dest_cr;
2421 int linesize, uvlinesize /*dct_offset*/;
2422 int i;
2423 int *block_offset = &h->block_offset[0];
2424 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2425 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2426 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2428 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2429 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2430 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2432 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2433 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2435 if (!simple && MB_FIELD) {
2436 linesize = h->mb_linesize = s->linesize * 2;
2437 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2438 block_offset = &h->block_offset[24];
2439 if(mb_y&1){ //FIXME move out of this function?
2440 dest_y -= s->linesize*15;
2441 dest_cb-= s->uvlinesize*7;
2442 dest_cr-= s->uvlinesize*7;
2444 if(FRAME_MBAFF) {
2445 int list;
2446 for(list=0; list<h->list_count; list++){
2447 if(!USES_LIST(mb_type, list))
2448 continue;
2449 if(IS_16X16(mb_type)){
2450 int8_t *ref = &h->ref_cache[list][scan8[0]];
2451 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2452 }else{
2453 for(i=0; i<16; i+=4){
2454 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2455 int ref = h->ref_cache[list][scan8[i]];
2456 if(ref >= 0)
2457 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2462 } else {
2463 linesize = h->mb_linesize = s->linesize;
2464 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2465 // dct_offset = s->linesize * 16;
2468 if(transform_bypass){
2469 idct_dc_add =
2470 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2471 }else if(IS_8x8DCT(mb_type)){
2472 idct_dc_add = s->dsp.h264_idct8_dc_add;
2473 idct_add = s->dsp.h264_idct8_add;
2474 }else{
2475 idct_dc_add = s->dsp.h264_idct_dc_add;
2476 idct_add = s->dsp.h264_idct_add;
2479 if (!simple && IS_INTRA_PCM(mb_type)) {
2480 for (i=0; i<16; i++) {
2481 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2483 for (i=0; i<8; i++) {
2484 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2485 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2487 } else {
2488 if(IS_INTRA(mb_type)){
2489 if(h->deblocking_filter)
2490 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2492 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2493 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2494 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2497 if(IS_INTRA4x4(mb_type)){
2498 if(simple || !s->encoding){
2499 if(IS_8x8DCT(mb_type)){
2500 for(i=0; i<16; i+=4){
2501 uint8_t * const ptr= dest_y + block_offset[i];
2502 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2503 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2504 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2505 (h->topright_samples_available<<i)&0x4000, linesize);
2506 if(nnz){
2507 if(nnz == 1 && h->mb[i*16])
2508 idct_dc_add(ptr, h->mb + i*16, linesize);
2509 else
2510 idct_add(ptr, h->mb + i*16, linesize);
2513 }else
2514 for(i=0; i<16; i++){
2515 uint8_t * const ptr= dest_y + block_offset[i];
2516 uint8_t *topright;
2517 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2518 int nnz, tr;
2520 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2521 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2522 assert(mb_y || linesize <= block_offset[i]);
2523 if(!topright_avail){
2524 tr= ptr[3 - linesize]*0x01010101;
2525 topright= (uint8_t*) &tr;
2526 }else
2527 topright= ptr + 4 - linesize;
2528 }else
2529 topright= NULL;
2531 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2532 nnz = h->non_zero_count_cache[ scan8[i] ];
2533 if(nnz){
2534 if(is_h264){
2535 if(nnz == 1 && h->mb[i*16])
2536 idct_dc_add(ptr, h->mb + i*16, linesize);
2537 else
2538 idct_add(ptr, h->mb + i*16, linesize);
2539 }else
2540 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2544 }else{
2545 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2546 if(is_h264){
2547 if(!transform_bypass)
2548 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2549 }else
2550 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2552 if(h->deblocking_filter)
2553 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2554 }else if(is_h264){
2555 hl_motion(h, dest_y, dest_cb, dest_cr,
2556 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2557 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2558 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2562 if(!IS_INTRA4x4(mb_type)){
2563 if(is_h264){
2564 if(IS_INTRA16x16(mb_type)){
2565 for(i=0; i<16; i++){
2566 if(h->non_zero_count_cache[ scan8[i] ])
2567 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2568 else if(h->mb[i*16])
2569 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2571 }else{
2572 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2573 for(i=0; i<16; i+=di){
2574 int nnz = h->non_zero_count_cache[ scan8[i] ];
2575 if(nnz){
2576 if(nnz==1 && h->mb[i*16])
2577 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2578 else
2579 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2583 }else{
2584 for(i=0; i<16; i++){
2585 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2586 uint8_t * const ptr= dest_y + block_offset[i];
2587 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2593 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2594 uint8_t *dest[2] = {dest_cb, dest_cr};
2595 if(transform_bypass){
2596 idct_add = idct_dc_add = s->dsp.add_pixels4;
2597 }else{
2598 idct_add = s->dsp.h264_idct_add;
2599 idct_dc_add = s->dsp.h264_idct_dc_add;
2600 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2601 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2603 if(is_h264){
2604 for(i=16; i<16+8; i++){
2605 if(h->non_zero_count_cache[ scan8[i] ])
2606 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2607 else if(h->mb[i*16])
2608 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2610 }else{
2611 for(i=16; i<16+8; i++){
2612 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2613 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2614 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2620 if(h->deblocking_filter) {
2621 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2622 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2623 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2624 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2625 if (!simple && FRAME_MBAFF) {
2626 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2627 } else {
2628 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2634 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2636 static void hl_decode_mb_simple(H264Context *h){
2637 hl_decode_mb_internal(h, 1);
2641 * Process a macroblock; this handles edge cases, such as interlacing.
2643 static void av_noinline hl_decode_mb_complex(H264Context *h){
2644 hl_decode_mb_internal(h, 0);
2647 static void hl_decode_mb(H264Context *h){
2648 MpegEncContext * const s = &h->s;
2649 const int mb_xy= h->mb_xy;
2650 const int mb_type= s->current_picture.mb_type[mb_xy];
2651 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2652 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2654 if(ENABLE_H264_ENCODER && !s->decode)
2655 return;
2657 if (is_complex)
2658 hl_decode_mb_complex(h);
2659 else hl_decode_mb_simple(h);
2662 static void pic_as_field(Picture *pic, const int parity){
2663 int i;
2664 for (i = 0; i < 4; ++i) {
2665 if (parity == PICT_BOTTOM_FIELD)
2666 pic->data[i] += pic->linesize[i];
2667 pic->reference = parity;
2668 pic->linesize[i] *= 2;
2670 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2673 static int split_field_copy(Picture *dest, Picture *src,
2674 int parity, int id_add){
2675 int match = !!(src->reference & parity);
2677 if (match) {
2678 *dest = *src;
2679 if(parity != PICT_FRAME){
2680 pic_as_field(dest, parity);
2681 dest->pic_id *= 2;
2682 dest->pic_id += id_add;
2686 return match;
2689 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2690 int i[2]={0};
2691 int index=0;
2693 while(i[0]<len || i[1]<len){
2694 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2695 i[0]++;
2696 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2697 i[1]++;
2698 if(i[0] < len){
2699 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2700 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2702 if(i[1] < len){
2703 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2704 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2708 return index;
2711 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2712 int i, best_poc;
2713 int out_i= 0;
2715 for(;;){
2716 best_poc= dir ? INT_MIN : INT_MAX;
2718 for(i=0; i<len; i++){
2719 const int poc= src[i]->poc;
2720 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2721 best_poc= poc;
2722 sorted[out_i]= src[i];
2725 if(best_poc == (dir ? INT_MIN : INT_MAX))
2726 break;
2727 limit= sorted[out_i++]->poc - dir;
2729 return out_i;
2733 * fills the default_ref_list.
2735 static int fill_default_ref_list(H264Context *h){
2736 MpegEncContext * const s = &h->s;
2737 int i, len;
2739 if(h->slice_type_nos==FF_B_TYPE){
2740 Picture *sorted[32];
2741 int cur_poc, list;
2742 int lens[2];
2744 if(FIELD_PICTURE)
2745 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2746 else
2747 cur_poc= s->current_picture_ptr->poc;
2749 for(list= 0; list<2; list++){
2750 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2751 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2752 assert(len<=32);
2753 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2754 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2755 assert(len<=32);
2757 if(len < h->ref_count[list])
2758 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2759 lens[list]= len;
2762 if(lens[0] == lens[1] && lens[1] > 1){
2763 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2764 if(i == lens[0])
2765 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2767 }else{
2768 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2769 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2770 assert(len <= 32);
2771 if(len < h->ref_count[0])
2772 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2774 #ifdef TRACE
2775 for (i=0; i<h->ref_count[0]; i++) {
2776 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2778 if(h->slice_type_nos==FF_B_TYPE){
2779 for (i=0; i<h->ref_count[1]; i++) {
2780 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2783 #endif
2784 return 0;
2787 static void print_short_term(H264Context *h);
2788 static void print_long_term(H264Context *h);
2791 * Extract structure information about the picture described by pic_num in
2792 * the current decoding context (frame or field). Note that pic_num is
2793 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2794 * @param pic_num picture number for which to extract structure information
2795 * @param structure one of PICT_XXX describing structure of picture
2796 * with pic_num
2797 * @return frame number (short term) or long term index of picture
2798 * described by pic_num
2800 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2801 MpegEncContext * const s = &h->s;
2803 *structure = s->picture_structure;
2804 if(FIELD_PICTURE){
2805 if (!(pic_num & 1))
2806 /* opposite field */
2807 *structure ^= PICT_FRAME;
2808 pic_num >>= 1;
2811 return pic_num;
2814 static int decode_ref_pic_list_reordering(H264Context *h){
2815 MpegEncContext * const s = &h->s;
2816 int list, index, pic_structure;
2818 print_short_term(h);
2819 print_long_term(h);
2821 for(list=0; list<h->list_count; list++){
2822 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2824 if(get_bits1(&s->gb)){
2825 int pred= h->curr_pic_num;
2827 for(index=0; ; index++){
2828 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2829 unsigned int pic_id;
2830 int i;
2831 Picture *ref = NULL;
2833 if(reordering_of_pic_nums_idc==3)
2834 break;
2836 if(index >= h->ref_count[list]){
2837 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2838 return -1;
2841 if(reordering_of_pic_nums_idc<3){
2842 if(reordering_of_pic_nums_idc<2){
2843 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2844 int frame_num;
2846 if(abs_diff_pic_num > h->max_pic_num){
2847 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2848 return -1;
2851 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2852 else pred+= abs_diff_pic_num;
2853 pred &= h->max_pic_num - 1;
2855 frame_num = pic_num_extract(h, pred, &pic_structure);
2857 for(i= h->short_ref_count-1; i>=0; i--){
2858 ref = h->short_ref[i];
2859 assert(ref->reference);
2860 assert(!ref->long_ref);
2862 ref->frame_num == frame_num &&
2863 (ref->reference & pic_structure)
2865 break;
2867 if(i>=0)
2868 ref->pic_id= pred;
2869 }else{
2870 int long_idx;
2871 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2873 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2875 if(long_idx>31){
2876 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2877 return -1;
2879 ref = h->long_ref[long_idx];
2880 assert(!(ref && !ref->reference));
2881 if(ref && (ref->reference & pic_structure)){
2882 ref->pic_id= pic_id;
2883 assert(ref->long_ref);
2884 i=0;
2885 }else{
2886 i=-1;
2890 if (i < 0) {
2891 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2892 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2893 } else {
2894 for(i=index; i+1<h->ref_count[list]; i++){
2895 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2896 break;
2898 for(; i > index; i--){
2899 h->ref_list[list][i]= h->ref_list[list][i-1];
2901 h->ref_list[list][index]= *ref;
2902 if (FIELD_PICTURE){
2903 pic_as_field(&h->ref_list[list][index], pic_structure);
2906 }else{
2907 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2908 return -1;
2913 for(list=0; list<h->list_count; list++){
2914 for(index= 0; index < h->ref_count[list]; index++){
2915 if(!h->ref_list[list][index].data[0]){
2916 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2917 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2922 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
2923 direct_dist_scale_factor(h);
2924 direct_ref_list_init(h);
2925 return 0;
2928 static void fill_mbaff_ref_list(H264Context *h){
2929 int list, i, j;
2930 for(list=0; list<2; list++){ //FIXME try list_count
2931 for(i=0; i<h->ref_count[list]; i++){
2932 Picture *frame = &h->ref_list[list][i];
2933 Picture *field = &h->ref_list[list][16+2*i];
2934 field[0] = *frame;
2935 for(j=0; j<3; j++)
2936 field[0].linesize[j] <<= 1;
2937 field[0].reference = PICT_TOP_FIELD;
2938 field[1] = field[0];
2939 for(j=0; j<3; j++)
2940 field[1].data[j] += frame->linesize[j];
2941 field[1].reference = PICT_BOTTOM_FIELD;
2943 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2944 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2945 for(j=0; j<2; j++){
2946 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2947 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2951 for(j=0; j<h->ref_count[1]; j++){
2952 for(i=0; i<h->ref_count[0]; i++)
2953 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2954 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2955 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2959 static int pred_weight_table(H264Context *h){
2960 MpegEncContext * const s = &h->s;
2961 int list, i;
2962 int luma_def, chroma_def;
2964 h->use_weight= 0;
2965 h->use_weight_chroma= 0;
2966 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2967 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2968 luma_def = 1<<h->luma_log2_weight_denom;
2969 chroma_def = 1<<h->chroma_log2_weight_denom;
2971 for(list=0; list<2; list++){
2972 for(i=0; i<h->ref_count[list]; i++){
2973 int luma_weight_flag, chroma_weight_flag;
2975 luma_weight_flag= get_bits1(&s->gb);
2976 if(luma_weight_flag){
2977 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2978 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2979 if( h->luma_weight[list][i] != luma_def
2980 || h->luma_offset[list][i] != 0)
2981 h->use_weight= 1;
2982 }else{
2983 h->luma_weight[list][i]= luma_def;
2984 h->luma_offset[list][i]= 0;
2987 if(CHROMA){
2988 chroma_weight_flag= get_bits1(&s->gb);
2989 if(chroma_weight_flag){
2990 int j;
2991 for(j=0; j<2; j++){
2992 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2993 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2994 if( h->chroma_weight[list][i][j] != chroma_def
2995 || h->chroma_offset[list][i][j] != 0)
2996 h->use_weight_chroma= 1;
2998 }else{
2999 int j;
3000 for(j=0; j<2; j++){
3001 h->chroma_weight[list][i][j]= chroma_def;
3002 h->chroma_offset[list][i][j]= 0;
3007 if(h->slice_type_nos != FF_B_TYPE) break;
3009 h->use_weight= h->use_weight || h->use_weight_chroma;
3010 return 0;
3013 static void implicit_weight_table(H264Context *h){
3014 MpegEncContext * const s = &h->s;
3015 int ref0, ref1;
3016 int cur_poc = s->current_picture_ptr->poc;
3018 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3019 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3020 h->use_weight= 0;
3021 h->use_weight_chroma= 0;
3022 return;
3025 h->use_weight= 2;
3026 h->use_weight_chroma= 2;
3027 h->luma_log2_weight_denom= 5;
3028 h->chroma_log2_weight_denom= 5;
3030 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3031 int poc0 = h->ref_list[0][ref0].poc;
3032 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3033 int poc1 = h->ref_list[1][ref1].poc;
3034 int td = av_clip(poc1 - poc0, -128, 127);
3035 if(td){
3036 int tb = av_clip(cur_poc - poc0, -128, 127);
3037 int tx = (16384 + (FFABS(td) >> 1)) / td;
3038 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3039 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3040 h->implicit_weight[ref0][ref1] = 32;
3041 else
3042 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3043 }else
3044 h->implicit_weight[ref0][ref1] = 32;
3050 * Mark a picture as no longer needed for reference. The refmask
3051 * argument allows unreferencing of individual fields or the whole frame.
3052 * If the picture becomes entirely unreferenced, but is being held for
3053 * display purposes, it is marked as such.
3054 * @param refmask mask of fields to unreference; the mask is bitwise
3055 * anded with the reference marking of pic
3056 * @return non-zero if pic becomes entirely unreferenced (except possibly
3057 * for display purposes) zero if one of the fields remains in
3058 * reference
3060 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3061 int i;
3062 if (pic->reference &= refmask) {
3063 return 0;
3064 } else {
3065 for(i = 0; h->delayed_pic[i]; i++)
3066 if(pic == h->delayed_pic[i]){
3067 pic->reference=DELAYED_PIC_REF;
3068 break;
3070 return 1;
3075 * instantaneous decoder refresh.
3077 static void idr(H264Context *h){
3078 int i;
3080 for(i=0; i<16; i++){
3081 remove_long(h, i, 0);
3083 assert(h->long_ref_count==0);
3085 for(i=0; i<h->short_ref_count; i++){
3086 unreference_pic(h, h->short_ref[i], 0);
3087 h->short_ref[i]= NULL;
3089 h->short_ref_count=0;
3090 h->prev_frame_num= 0;
3091 h->prev_frame_num_offset= 0;
3092 h->prev_poc_msb=
3093 h->prev_poc_lsb= 0;
3096 /* forget old pics after a seek */
3097 static void flush_dpb(AVCodecContext *avctx){
3098 H264Context *h= avctx->priv_data;
3099 int i;
3100 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3101 if(h->delayed_pic[i])
3102 h->delayed_pic[i]->reference= 0;
3103 h->delayed_pic[i]= NULL;
3105 h->outputed_poc= INT_MIN;
3106 idr(h);
3107 if(h->s.current_picture_ptr)
3108 h->s.current_picture_ptr->reference= 0;
3109 h->s.first_field= 0;
3110 ff_mpeg_flush(avctx);
3114 * Find a Picture in the short term reference list by frame number.
3115 * @param frame_num frame number to search for
3116 * @param idx the index into h->short_ref where returned picture is found
3117 * undefined if no picture found.
3118 * @return pointer to the found picture, or NULL if no pic with the provided
3119 * frame number is found
3121 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3122 MpegEncContext * const s = &h->s;
3123 int i;
3125 for(i=0; i<h->short_ref_count; i++){
3126 Picture *pic= h->short_ref[i];
3127 if(s->avctx->debug&FF_DEBUG_MMCO)
3128 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3129 if(pic->frame_num == frame_num) {
3130 *idx = i;
3131 return pic;
3134 return NULL;
3138 * Remove a picture from the short term reference list by its index in
3139 * that list. This does no checking on the provided index; it is assumed
3140 * to be valid. Other list entries are shifted down.
3141 * @param i index into h->short_ref of picture to remove.
3143 static void remove_short_at_index(H264Context *h, int i){
3144 assert(i >= 0 && i < h->short_ref_count);
3145 h->short_ref[i]= NULL;
3146 if (--h->short_ref_count)
3147 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3152 * @return the removed picture or NULL if an error occurs
3154 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3155 MpegEncContext * const s = &h->s;
3156 Picture *pic;
3157 int i;
3159 if(s->avctx->debug&FF_DEBUG_MMCO)
3160 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3162 pic = find_short(h, frame_num, &i);
3163 if (pic){
3164 if(unreference_pic(h, pic, ref_mask))
3165 remove_short_at_index(h, i);
3168 return pic;
3172 * Remove a picture from the long term reference list by its index in
3173 * that list.
3174 * @return the removed picture or NULL if an error occurs
3176 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3177 Picture *pic;
3179 pic= h->long_ref[i];
3180 if (pic){
3181 if(unreference_pic(h, pic, ref_mask)){
3182 assert(h->long_ref[i]->long_ref == 1);
3183 h->long_ref[i]->long_ref= 0;
3184 h->long_ref[i]= NULL;
3185 h->long_ref_count--;
3189 return pic;
3193 * print short term list
3195 static void print_short_term(H264Context *h) {
3196 uint32_t i;
3197 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3198 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3199 for(i=0; i<h->short_ref_count; i++){
3200 Picture *pic= h->short_ref[i];
3201 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3207 * print long term list
3209 static void print_long_term(H264Context *h) {
3210 uint32_t i;
3211 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3212 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3213 for(i = 0; i < 16; i++){
3214 Picture *pic= h->long_ref[i];
3215 if (pic) {
3216 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3223 * Executes the reference picture marking (memory management control operations).
3225 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3226 MpegEncContext * const s = &h->s;
3227 int i, j;
3228 int current_ref_assigned=0;
3229 Picture *pic;
3231 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3232 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3234 for(i=0; i<mmco_count; i++){
3235 int structure, frame_num;
3236 if(s->avctx->debug&FF_DEBUG_MMCO)
3237 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3239 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3240 || mmco[i].opcode == MMCO_SHORT2LONG){
3241 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3242 pic = find_short(h, frame_num, &j);
3243 if(!pic){
3244 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3245 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3246 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3247 continue;
3251 switch(mmco[i].opcode){
3252 case MMCO_SHORT2UNUSED:
3253 if(s->avctx->debug&FF_DEBUG_MMCO)
3254 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3255 remove_short(h, frame_num, structure ^ PICT_FRAME);
3256 break;
3257 case MMCO_SHORT2LONG:
3258 if (h->long_ref[mmco[i].long_arg] != pic)
3259 remove_long(h, mmco[i].long_arg, 0);
3261 remove_short_at_index(h, j);
3262 h->long_ref[ mmco[i].long_arg ]= pic;
3263 if (h->long_ref[ mmco[i].long_arg ]){
3264 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3265 h->long_ref_count++;
3267 break;
3268 case MMCO_LONG2UNUSED:
3269 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3270 pic = h->long_ref[j];
3271 if (pic) {
3272 remove_long(h, j, structure ^ PICT_FRAME);
3273 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3274 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3275 break;
3276 case MMCO_LONG:
3277 // Comment below left from previous code as it is an interresting note.
3278 /* First field in pair is in short term list or
3279 * at a different long term index.
3280 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3281 * Report the problem and keep the pair where it is,
3282 * and mark this field valid.
3285 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3286 remove_long(h, mmco[i].long_arg, 0);
3288 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3289 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3290 h->long_ref_count++;
3293 s->current_picture_ptr->reference |= s->picture_structure;
3294 current_ref_assigned=1;
3295 break;
3296 case MMCO_SET_MAX_LONG:
3297 assert(mmco[i].long_arg <= 16);
3298 // just remove the long term which index is greater than new max
3299 for(j = mmco[i].long_arg; j<16; j++){
3300 remove_long(h, j, 0);
3302 break;
3303 case MMCO_RESET:
3304 while(h->short_ref_count){
3305 remove_short(h, h->short_ref[0]->frame_num, 0);
3307 for(j = 0; j < 16; j++) {
3308 remove_long(h, j, 0);
3310 s->current_picture_ptr->poc=
3311 s->current_picture_ptr->field_poc[0]=
3312 s->current_picture_ptr->field_poc[1]=
3313 h->poc_lsb=
3314 h->poc_msb=
3315 h->frame_num=
3316 s->current_picture_ptr->frame_num= 0;
3317 break;
3318 default: assert(0);
3322 if (!current_ref_assigned) {
3323 /* Second field of complementary field pair; the first field of
3324 * which is already referenced. If short referenced, it
3325 * should be first entry in short_ref. If not, it must exist
3326 * in long_ref; trying to put it on the short list here is an
3327 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3329 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3330 /* Just mark the second field valid */
3331 s->current_picture_ptr->reference = PICT_FRAME;
3332 } else if (s->current_picture_ptr->long_ref) {
3333 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3334 "assignment for second field "
3335 "in complementary field pair "
3336 "(first field is long term)\n");
3337 } else {
3338 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3339 if(pic){
3340 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3343 if(h->short_ref_count)
3344 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3346 h->short_ref[0]= s->current_picture_ptr;
3347 h->short_ref_count++;
3348 s->current_picture_ptr->reference |= s->picture_structure;
3352 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3354 /* We have too many reference frames, probably due to corrupted
3355 * stream. Need to discard one frame. Prevents overrun of the
3356 * short_ref and long_ref buffers.
3358 av_log(h->s.avctx, AV_LOG_ERROR,
3359 "number of reference frames exceeds max (probably "
3360 "corrupt input), discarding one\n");
3362 if (h->long_ref_count && !h->short_ref_count) {
3363 for (i = 0; i < 16; ++i)
3364 if (h->long_ref[i])
3365 break;
3367 assert(i < 16);
3368 remove_long(h, i, 0);
3369 } else {
3370 pic = h->short_ref[h->short_ref_count - 1];
3371 remove_short(h, pic->frame_num, 0);
3375 print_short_term(h);
3376 print_long_term(h);
3377 return 0;
3380 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3381 MpegEncContext * const s = &h->s;
3382 int i;
3384 h->mmco_index= 0;
3385 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3386 s->broken_link= get_bits1(gb) -1;
3387 if(get_bits1(gb)){
3388 h->mmco[0].opcode= MMCO_LONG;
3389 h->mmco[0].long_arg= 0;
3390 h->mmco_index= 1;
3392 }else{
3393 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3394 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3395 MMCOOpcode opcode= get_ue_golomb(gb);
3397 h->mmco[i].opcode= opcode;
3398 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3399 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3400 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3401 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3402 return -1;
3405 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3406 unsigned int long_arg= get_ue_golomb(gb);
3407 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3408 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3409 return -1;
3411 h->mmco[i].long_arg= long_arg;
3414 if(opcode > (unsigned)MMCO_LONG){
3415 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3416 return -1;
3418 if(opcode == MMCO_END)
3419 break;
3421 h->mmco_index= i;
3422 }else{
3423 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3425 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3426 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3427 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3428 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3429 h->mmco_index= 1;
3430 if (FIELD_PICTURE) {
3431 h->mmco[0].short_pic_num *= 2;
3432 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3433 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3434 h->mmco_index= 2;
3440 return 0;
3443 static int init_poc(H264Context *h){
3444 MpegEncContext * const s = &h->s;
3445 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3446 int field_poc[2];
3447 Picture *cur = s->current_picture_ptr;
3449 h->frame_num_offset= h->prev_frame_num_offset;
3450 if(h->frame_num < h->prev_frame_num)
3451 h->frame_num_offset += max_frame_num;
3453 if(h->sps.poc_type==0){
3454 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3456 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3457 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3458 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3459 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3460 else
3461 h->poc_msb = h->prev_poc_msb;
3462 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3463 field_poc[0] =
3464 field_poc[1] = h->poc_msb + h->poc_lsb;
3465 if(s->picture_structure == PICT_FRAME)
3466 field_poc[1] += h->delta_poc_bottom;
3467 }else if(h->sps.poc_type==1){
3468 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3469 int i;
3471 if(h->sps.poc_cycle_length != 0)
3472 abs_frame_num = h->frame_num_offset + h->frame_num;
3473 else
3474 abs_frame_num = 0;
3476 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3477 abs_frame_num--;
3479 expected_delta_per_poc_cycle = 0;
3480 for(i=0; i < h->sps.poc_cycle_length; i++)
3481 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3483 if(abs_frame_num > 0){
3484 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3485 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3487 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3488 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3489 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3490 } else
3491 expectedpoc = 0;
3493 if(h->nal_ref_idc == 0)
3494 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3496 field_poc[0] = expectedpoc + h->delta_poc[0];
3497 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3499 if(s->picture_structure == PICT_FRAME)
3500 field_poc[1] += h->delta_poc[1];
3501 }else{
3502 int poc= 2*(h->frame_num_offset + h->frame_num);
3504 if(!h->nal_ref_idc)
3505 poc--;
3507 field_poc[0]= poc;
3508 field_poc[1]= poc;
3511 if(s->picture_structure != PICT_BOTTOM_FIELD)
3512 s->current_picture_ptr->field_poc[0]= field_poc[0];
3513 if(s->picture_structure != PICT_TOP_FIELD)
3514 s->current_picture_ptr->field_poc[1]= field_poc[1];
3515 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3517 return 0;
3522 * initialize scan tables
3524 static void init_scan_tables(H264Context *h){
3525 MpegEncContext * const s = &h->s;
3526 int i;
3527 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3528 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3529 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3530 }else{
3531 for(i=0; i<16; i++){
3532 #define T(x) (x>>2) | ((x<<2) & 0xF)
3533 h->zigzag_scan[i] = T(zigzag_scan[i]);
3534 h-> field_scan[i] = T( field_scan[i]);
3535 #undef T
3538 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3539 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3540 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3541 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3542 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3543 }else{
3544 for(i=0; i<64; i++){
3545 #define T(x) (x>>3) | ((x&7)<<3)
3546 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3547 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3548 h->field_scan8x8[i] = T(field_scan8x8[i]);
3549 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3550 #undef T
3553 if(h->sps.transform_bypass){ //FIXME same ugly
3554 h->zigzag_scan_q0 = zigzag_scan;
3555 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3556 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3557 h->field_scan_q0 = field_scan;
3558 h->field_scan8x8_q0 = field_scan8x8;
3559 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3560 }else{
3561 h->zigzag_scan_q0 = h->zigzag_scan;
3562 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3563 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3564 h->field_scan_q0 = h->field_scan;
3565 h->field_scan8x8_q0 = h->field_scan8x8;
3566 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3571 * Replicates H264 "master" context to thread contexts.
3573 static void clone_slice(H264Context *dst, H264Context *src)
3575 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3576 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3577 dst->s.current_picture = src->s.current_picture;
3578 dst->s.linesize = src->s.linesize;
3579 dst->s.uvlinesize = src->s.uvlinesize;
3580 dst->s.first_field = src->s.first_field;
3582 dst->prev_poc_msb = src->prev_poc_msb;
3583 dst->prev_poc_lsb = src->prev_poc_lsb;
3584 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3585 dst->prev_frame_num = src->prev_frame_num;
3586 dst->short_ref_count = src->short_ref_count;
3588 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3589 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3590 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3591 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3593 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3594 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3598 * decodes a slice header.
3599 * This will also call MPV_common_init() and frame_start() as needed.
3601 * @param h h264context
3602 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3604 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3606 static int decode_slice_header(H264Context *h, H264Context *h0){
3607 MpegEncContext * const s = &h->s;
3608 MpegEncContext * const s0 = &h0->s;
3609 unsigned int first_mb_in_slice;
3610 unsigned int pps_id;
3611 int num_ref_idx_active_override_flag;
3612 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3613 unsigned int slice_type, tmp, i, j;
3614 int default_ref_list_done = 0;
3615 int last_pic_structure;
3617 s->dropable= h->nal_ref_idc == 0;
3619 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3620 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3621 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3622 }else{
3623 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3624 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3627 first_mb_in_slice= get_ue_golomb(&s->gb);
3629 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3630 h0->current_slice = 0;
3631 if (!s0->first_field)
3632 s->current_picture_ptr= NULL;
3635 slice_type= get_ue_golomb(&s->gb);
3636 if(slice_type > 9){
3637 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3638 return -1;
3640 if(slice_type > 4){
3641 slice_type -= 5;
3642 h->slice_type_fixed=1;
3643 }else
3644 h->slice_type_fixed=0;
3646 slice_type= slice_type_map[ slice_type ];
3647 if (slice_type == FF_I_TYPE
3648 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3649 default_ref_list_done = 1;
3651 h->slice_type= slice_type;
3652 h->slice_type_nos= slice_type & 3;
3654 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3655 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3656 av_log(h->s.avctx, AV_LOG_ERROR,
3657 "B picture before any references, skipping\n");
3658 return -1;
3661 pps_id= get_ue_golomb(&s->gb);
3662 if(pps_id>=MAX_PPS_COUNT){
3663 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3664 return -1;
3666 if(!h0->pps_buffers[pps_id]) {
3667 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3668 return -1;
3670 h->pps= *h0->pps_buffers[pps_id];
3672 if(!h0->sps_buffers[h->pps.sps_id]) {
3673 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3674 return -1;
3676 h->sps = *h0->sps_buffers[h->pps.sps_id];
3678 if(h == h0 && h->dequant_coeff_pps != pps_id){
3679 h->dequant_coeff_pps = pps_id;
3680 init_dequant_tables(h);
3683 s->mb_width= h->sps.mb_width;
3684 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3686 h->b_stride= s->mb_width*4;
3687 h->b8_stride= s->mb_width*2;
3689 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3690 if(h->sps.frame_mbs_only_flag)
3691 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3692 else
3693 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3695 if (s->context_initialized
3696 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3697 if(h != h0)
3698 return -1; // width / height changed during parallelized decoding
3699 free_tables(h);
3700 MPV_common_end(s);
3702 if (!s->context_initialized) {
3703 if(h != h0)
3704 return -1; // we cant (re-)initialize context during parallel decoding
3705 if (MPV_common_init(s) < 0)
3706 return -1;
3707 s->first_field = 0;
3709 init_scan_tables(h);
3710 alloc_tables(h);
3712 for(i = 1; i < s->avctx->thread_count; i++) {
3713 H264Context *c;
3714 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3715 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3716 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3717 c->sps = h->sps;
3718 c->pps = h->pps;
3719 init_scan_tables(c);
3720 clone_tables(c, h);
3723 for(i = 0; i < s->avctx->thread_count; i++)
3724 if(context_init(h->thread_context[i]) < 0)
3725 return -1;
3727 s->avctx->width = s->width;
3728 s->avctx->height = s->height;
3729 s->avctx->sample_aspect_ratio= h->sps.sar;
3730 if(!s->avctx->sample_aspect_ratio.den)
3731 s->avctx->sample_aspect_ratio.den = 1;
3733 if(h->sps.timing_info_present_flag){
3734 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3735 if(h->x264_build > 0 && h->x264_build < 44)
3736 s->avctx->time_base.den *= 2;
3737 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3738 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3742 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3744 h->mb_mbaff = 0;
3745 h->mb_aff_frame = 0;
3746 last_pic_structure = s0->picture_structure;
3747 if(h->sps.frame_mbs_only_flag){
3748 s->picture_structure= PICT_FRAME;
3749 }else{
3750 if(get_bits1(&s->gb)) { //field_pic_flag
3751 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3752 } else {
3753 s->picture_structure= PICT_FRAME;
3754 h->mb_aff_frame = h->sps.mb_aff;
3757 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3759 if(h0->current_slice == 0){
3760 while(h->frame_num != h->prev_frame_num &&
3761 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3762 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3763 frame_start(h);
3764 h->prev_frame_num++;
3765 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3766 s->current_picture_ptr->frame_num= h->prev_frame_num;
3767 execute_ref_pic_marking(h, NULL, 0);
3770 /* See if we have a decoded first field looking for a pair... */
3771 if (s0->first_field) {
3772 assert(s0->current_picture_ptr);
3773 assert(s0->current_picture_ptr->data[0]);
3774 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3776 /* figure out if we have a complementary field pair */
3777 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3779 * Previous field is unmatched. Don't display it, but let it
3780 * remain for reference if marked as such.
3782 s0->current_picture_ptr = NULL;
3783 s0->first_field = FIELD_PICTURE;
3785 } else {
3786 if (h->nal_ref_idc &&
3787 s0->current_picture_ptr->reference &&
3788 s0->current_picture_ptr->frame_num != h->frame_num) {
3790 * This and previous field were reference, but had
3791 * different frame_nums. Consider this field first in
3792 * pair. Throw away previous field except for reference
3793 * purposes.
3795 s0->first_field = 1;
3796 s0->current_picture_ptr = NULL;
3798 } else {
3799 /* Second field in complementary pair */
3800 s0->first_field = 0;
3804 } else {
3805 /* Frame or first field in a potentially complementary pair */
3806 assert(!s0->current_picture_ptr);
3807 s0->first_field = FIELD_PICTURE;
3810 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3811 s0->first_field = 0;
3812 return -1;
3815 if(h != h0)
3816 clone_slice(h, h0);
3818 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3820 assert(s->mb_num == s->mb_width * s->mb_height);
3821 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3822 first_mb_in_slice >= s->mb_num){
3823 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3824 return -1;
3826 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3827 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3828 if (s->picture_structure == PICT_BOTTOM_FIELD)
3829 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3830 assert(s->mb_y < s->mb_height);
3832 if(s->picture_structure==PICT_FRAME){
3833 h->curr_pic_num= h->frame_num;
3834 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3835 }else{
3836 h->curr_pic_num= 2*h->frame_num + 1;
3837 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3840 if(h->nal_unit_type == NAL_IDR_SLICE){
3841 get_ue_golomb(&s->gb); /* idr_pic_id */
3844 if(h->sps.poc_type==0){
3845 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3847 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3848 h->delta_poc_bottom= get_se_golomb(&s->gb);
3852 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3853 h->delta_poc[0]= get_se_golomb(&s->gb);
3855 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3856 h->delta_poc[1]= get_se_golomb(&s->gb);
3859 init_poc(h);
3861 if(h->pps.redundant_pic_cnt_present){
3862 h->redundant_pic_count= get_ue_golomb(&s->gb);
3865 //set defaults, might be overridden a few lines later
3866 h->ref_count[0]= h->pps.ref_count[0];
3867 h->ref_count[1]= h->pps.ref_count[1];
3869 if(h->slice_type_nos != FF_I_TYPE){
3870 if(h->slice_type_nos == FF_B_TYPE){
3871 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3873 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3875 if(num_ref_idx_active_override_flag){
3876 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3877 if(h->slice_type_nos==FF_B_TYPE)
3878 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3880 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3881 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3882 h->ref_count[0]= h->ref_count[1]= 1;
3883 return -1;
3886 if(h->slice_type_nos == FF_B_TYPE)
3887 h->list_count= 2;
3888 else
3889 h->list_count= 1;
3890 }else
3891 h->list_count= 0;
3893 if(!default_ref_list_done){
3894 fill_default_ref_list(h);
3897 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3898 return -1;
3900 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3901 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3902 pred_weight_table(h);
3903 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3904 implicit_weight_table(h);
3905 else
3906 h->use_weight = 0;
3908 if(h->nal_ref_idc)
3909 decode_ref_pic_marking(h0, &s->gb);
3911 if(FRAME_MBAFF)
3912 fill_mbaff_ref_list(h);
3914 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3915 tmp = get_ue_golomb(&s->gb);
3916 if(tmp > 2){
3917 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3918 return -1;
3920 h->cabac_init_idc= tmp;
3923 h->last_qscale_diff = 0;
3924 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3925 if(tmp>51){
3926 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3927 return -1;
3929 s->qscale= tmp;
3930 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3931 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3932 //FIXME qscale / qp ... stuff
3933 if(h->slice_type == FF_SP_TYPE){
3934 get_bits1(&s->gb); /* sp_for_switch_flag */
3936 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3937 get_se_golomb(&s->gb); /* slice_qs_delta */
3940 h->deblocking_filter = 1;
3941 h->slice_alpha_c0_offset = 0;
3942 h->slice_beta_offset = 0;
3943 if( h->pps.deblocking_filter_parameters_present ) {
3944 tmp= get_ue_golomb(&s->gb);
3945 if(tmp > 2){
3946 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3947 return -1;
3949 h->deblocking_filter= tmp;
3950 if(h->deblocking_filter < 2)
3951 h->deblocking_filter^= 1; // 1<->0
3953 if( h->deblocking_filter ) {
3954 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3955 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3959 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3960 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3961 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3962 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3963 h->deblocking_filter= 0;
3965 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3966 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3967 /* Cheat slightly for speed:
3968 Do not bother to deblock across slices. */
3969 h->deblocking_filter = 2;
3970 } else {
3971 h0->max_contexts = 1;
3972 if(!h0->single_decode_warning) {
3973 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3974 h0->single_decode_warning = 1;
3976 if(h != h0)
3977 return 1; // deblocking switched inside frame
3981 #if 0 //FMO
3982 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3983 slice_group_change_cycle= get_bits(&s->gb, ?);
3984 #endif
3986 h0->last_slice_type = slice_type;
3987 h->slice_num = ++h0->current_slice;
3989 for(j=0; j<2; j++){
3990 int *ref2frm= h->ref2frm[h->slice_num&15][j];
3991 ref2frm[0]=
3992 ref2frm[1]= -1;
3993 for(i=0; i<16; i++)
3994 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3995 +(h->ref_list[j][i].reference&3);
3996 ref2frm[18+0]=
3997 ref2frm[18+1]= -1;
3998 for(i=16; i<48; i++)
3999 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4000 +(h->ref_list[j][i].reference&3);
4003 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4004 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4006 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4007 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4008 h->slice_num,
4009 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4010 first_mb_in_slice,
4011 av_get_pict_type_char(h->slice_type),
4012 pps_id, h->frame_num,
4013 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4014 h->ref_count[0], h->ref_count[1],
4015 s->qscale,
4016 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4017 h->use_weight,
4018 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4019 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4023 return 0;
4029 static inline int get_level_prefix(GetBitContext *gb){
4030 unsigned int buf;
4031 int log;
4033 OPEN_READER(re, gb);
4034 UPDATE_CACHE(re, gb);
4035 buf=GET_CACHE(re, gb);
4037 log= 32 - av_log2(buf);
4038 #ifdef TRACE
4039 print_bin(buf>>(32-log), log);
4040 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4041 #endif
4043 LAST_SKIP_BITS(re, gb, log);
4044 CLOSE_READER(re, gb);
4046 return log-1;
4049 static inline int get_dct8x8_allowed(H264Context *h){
4050 int i;
4051 for(i=0; i<4; i++){
4052 if(!IS_SUB_8X8(h->sub_mb_type[i])
4053 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4054 return 0;
4056 return 1;
4060 * decodes a residual block.
4061 * @param n block index
4062 * @param scantable scantable
4063 * @param max_coeff number of coefficients in the block
4064 * @return <0 if an error occurred
4066 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4067 MpegEncContext * const s = &h->s;
4068 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4069 int level[16];
4070 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4072 //FIXME put trailing_onex into the context
4074 if(n == CHROMA_DC_BLOCK_INDEX){
4075 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4076 total_coeff= coeff_token>>2;
4077 }else{
4078 if(n == LUMA_DC_BLOCK_INDEX){
4079 total_coeff= pred_non_zero_count(h, 0);
4080 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4081 total_coeff= coeff_token>>2;
4082 }else{
4083 total_coeff= pred_non_zero_count(h, n);
4084 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4085 total_coeff= coeff_token>>2;
4086 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4090 //FIXME set last_non_zero?
4092 if(total_coeff==0)
4093 return 0;
4094 if(total_coeff > (unsigned)max_coeff) {
4095 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4096 return -1;
4099 trailing_ones= coeff_token&3;
4100 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4101 assert(total_coeff<=16);
4103 for(i=0; i<trailing_ones; i++){
4104 level[i]= 1 - 2*get_bits1(gb);
4107 if(i<total_coeff) {
4108 int level_code, mask;
4109 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4110 int prefix= get_level_prefix(gb);
4112 //first coefficient has suffix_length equal to 0 or 1
4113 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4114 if(suffix_length)
4115 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4116 else
4117 level_code= (prefix<<suffix_length); //part
4118 }else if(prefix==14){
4119 if(suffix_length)
4120 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4121 else
4122 level_code= prefix + get_bits(gb, 4); //part
4123 }else{
4124 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4125 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4126 if(prefix>=16)
4127 level_code += (1<<(prefix-3))-4096;
4130 if(trailing_ones < 3) level_code += 2;
4132 suffix_length = 1;
4133 if(level_code > 5)
4134 suffix_length++;
4135 mask= -(level_code&1);
4136 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4137 i++;
4139 //remaining coefficients have suffix_length > 0
4140 for(;i<total_coeff;i++) {
4141 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4142 prefix = get_level_prefix(gb);
4143 if(prefix<15){
4144 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4145 }else{
4146 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4147 if(prefix>=16)
4148 level_code += (1<<(prefix-3))-4096;
4150 mask= -(level_code&1);
4151 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4152 if(level_code > suffix_limit[suffix_length])
4153 suffix_length++;
4157 if(total_coeff == max_coeff)
4158 zeros_left=0;
4159 else{
4160 if(n == CHROMA_DC_BLOCK_INDEX)
4161 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4162 else
4163 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4166 coeff_num = zeros_left + total_coeff - 1;
4167 j = scantable[coeff_num];
4168 if(n > 24){
4169 block[j] = level[0];
4170 for(i=1;i<total_coeff;i++) {
4171 if(zeros_left <= 0)
4172 run_before = 0;
4173 else if(zeros_left < 7){
4174 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4175 }else{
4176 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4178 zeros_left -= run_before;
4179 coeff_num -= 1 + run_before;
4180 j= scantable[ coeff_num ];
4182 block[j]= level[i];
4184 }else{
4185 block[j] = (level[0] * qmul[j] + 32)>>6;
4186 for(i=1;i<total_coeff;i++) {
4187 if(zeros_left <= 0)
4188 run_before = 0;
4189 else if(zeros_left < 7){
4190 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4191 }else{
4192 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4194 zeros_left -= run_before;
4195 coeff_num -= 1 + run_before;
4196 j= scantable[ coeff_num ];
4198 block[j]= (level[i] * qmul[j] + 32)>>6;
4202 if(zeros_left<0){
4203 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4204 return -1;
4207 return 0;
4210 static void predict_field_decoding_flag(H264Context *h){
4211 MpegEncContext * const s = &h->s;
4212 const int mb_xy= h->mb_xy;
4213 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4214 ? s->current_picture.mb_type[mb_xy-1]
4215 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4216 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4217 : 0;
4218 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4222 * decodes a P_SKIP or B_SKIP macroblock
4224 static void decode_mb_skip(H264Context *h){
4225 MpegEncContext * const s = &h->s;
4226 const int mb_xy= h->mb_xy;
4227 int mb_type=0;
4229 memset(h->non_zero_count[mb_xy], 0, 16);
4230 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4232 if(MB_FIELD)
4233 mb_type|= MB_TYPE_INTERLACED;
4235 if( h->slice_type_nos == FF_B_TYPE )
4237 // just for fill_caches. pred_direct_motion will set the real mb_type
4238 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4240 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4241 pred_direct_motion(h, &mb_type);
4242 mb_type|= MB_TYPE_SKIP;
4244 else
4246 int mx, my;
4247 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4249 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4250 pred_pskip_motion(h, &mx, &my);
4251 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4252 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4255 write_back_motion(h, mb_type);
4256 s->current_picture.mb_type[mb_xy]= mb_type;
4257 s->current_picture.qscale_table[mb_xy]= s->qscale;
4258 h->slice_table[ mb_xy ]= h->slice_num;
4259 h->prev_mb_skipped= 1;
4263 * decodes a macroblock
4264 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4266 static int decode_mb_cavlc(H264Context *h){
4267 MpegEncContext * const s = &h->s;
4268 int mb_xy;
4269 int partition_count;
4270 unsigned int mb_type, cbp;
4271 int dct8x8_allowed= h->pps.transform_8x8_mode;
4273 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4275 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4277 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4278 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4279 down the code */
4280 if(h->slice_type_nos != FF_I_TYPE){
4281 if(s->mb_skip_run==-1)
4282 s->mb_skip_run= get_ue_golomb(&s->gb);
4284 if (s->mb_skip_run--) {
4285 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4286 if(s->mb_skip_run==0)
4287 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4288 else
4289 predict_field_decoding_flag(h);
4291 decode_mb_skip(h);
4292 return 0;
4295 if(FRAME_MBAFF){
4296 if( (s->mb_y&1) == 0 )
4297 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4300 h->prev_mb_skipped= 0;
4302 mb_type= get_ue_golomb(&s->gb);
4303 if(h->slice_type_nos == FF_B_TYPE){
4304 if(mb_type < 23){
4305 partition_count= b_mb_type_info[mb_type].partition_count;
4306 mb_type= b_mb_type_info[mb_type].type;
4307 }else{
4308 mb_type -= 23;
4309 goto decode_intra_mb;
4311 }else if(h->slice_type_nos == FF_P_TYPE){
4312 if(mb_type < 5){
4313 partition_count= p_mb_type_info[mb_type].partition_count;
4314 mb_type= p_mb_type_info[mb_type].type;
4315 }else{
4316 mb_type -= 5;
4317 goto decode_intra_mb;
4319 }else{
4320 assert(h->slice_type_nos == FF_I_TYPE);
4321 if(h->slice_type == FF_SI_TYPE && mb_type)
4322 mb_type--;
4323 decode_intra_mb:
4324 if(mb_type > 25){
4325 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4326 return -1;
4328 partition_count=0;
4329 cbp= i_mb_type_info[mb_type].cbp;
4330 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4331 mb_type= i_mb_type_info[mb_type].type;
4334 if(MB_FIELD)
4335 mb_type |= MB_TYPE_INTERLACED;
4337 h->slice_table[ mb_xy ]= h->slice_num;
4339 if(IS_INTRA_PCM(mb_type)){
4340 unsigned int x;
4342 // We assume these blocks are very rare so we do not optimize it.
4343 align_get_bits(&s->gb);
4345 // The pixels are stored in the same order as levels in h->mb array.
4346 for(x=0; x < (CHROMA ? 384 : 256); x++){
4347 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4350 // In deblocking, the quantizer is 0
4351 s->current_picture.qscale_table[mb_xy]= 0;
4352 // All coeffs are present
4353 memset(h->non_zero_count[mb_xy], 16, 16);
4355 s->current_picture.mb_type[mb_xy]= mb_type;
4356 return 0;
4359 if(MB_MBAFF){
4360 h->ref_count[0] <<= 1;
4361 h->ref_count[1] <<= 1;
4364 fill_caches(h, mb_type, 0);
4366 //mb_pred
4367 if(IS_INTRA(mb_type)){
4368 int pred_mode;
4369 // init_top_left_availability(h);
4370 if(IS_INTRA4x4(mb_type)){
4371 int i;
4372 int di = 1;
4373 if(dct8x8_allowed && get_bits1(&s->gb)){
4374 mb_type |= MB_TYPE_8x8DCT;
4375 di = 4;
4378 // fill_intra4x4_pred_table(h);
4379 for(i=0; i<16; i+=di){
4380 int mode= pred_intra_mode(h, i);
4382 if(!get_bits1(&s->gb)){
4383 const int rem_mode= get_bits(&s->gb, 3);
4384 mode = rem_mode + (rem_mode >= mode);
4387 if(di==4)
4388 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4389 else
4390 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4392 write_back_intra_pred_mode(h);
4393 if( check_intra4x4_pred_mode(h) < 0)
4394 return -1;
4395 }else{
4396 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4397 if(h->intra16x16_pred_mode < 0)
4398 return -1;
4400 if(CHROMA){
4401 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4402 if(pred_mode < 0)
4403 return -1;
4404 h->chroma_pred_mode= pred_mode;
4406 }else if(partition_count==4){
4407 int i, j, sub_partition_count[4], list, ref[2][4];
4409 if(h->slice_type_nos == FF_B_TYPE){
4410 for(i=0; i<4; i++){
4411 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4412 if(h->sub_mb_type[i] >=13){
4413 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4414 return -1;
4416 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4417 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4419 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4420 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4421 pred_direct_motion(h, &mb_type);
4422 h->ref_cache[0][scan8[4]] =
4423 h->ref_cache[1][scan8[4]] =
4424 h->ref_cache[0][scan8[12]] =
4425 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4427 }else{
4428 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4429 for(i=0; i<4; i++){
4430 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4431 if(h->sub_mb_type[i] >=4){
4432 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4433 return -1;
4435 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4436 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4440 for(list=0; list<h->list_count; list++){
4441 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4442 for(i=0; i<4; i++){
4443 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4444 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4445 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4446 if(tmp>=ref_count){
4447 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4448 return -1;
4450 ref[list][i]= tmp;
4451 }else{
4452 //FIXME
4453 ref[list][i] = -1;
4458 if(dct8x8_allowed)
4459 dct8x8_allowed = get_dct8x8_allowed(h);
4461 for(list=0; list<h->list_count; list++){
4462 for(i=0; i<4; i++){
4463 if(IS_DIRECT(h->sub_mb_type[i])) {
4464 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4465 continue;
4467 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4468 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4470 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4471 const int sub_mb_type= h->sub_mb_type[i];
4472 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4473 for(j=0; j<sub_partition_count[i]; j++){
4474 int mx, my;
4475 const int index= 4*i + block_width*j;
4476 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4477 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4478 mx += get_se_golomb(&s->gb);
4479 my += get_se_golomb(&s->gb);
4480 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4482 if(IS_SUB_8X8(sub_mb_type)){
4483 mv_cache[ 1 ][0]=
4484 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4485 mv_cache[ 1 ][1]=
4486 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4487 }else if(IS_SUB_8X4(sub_mb_type)){
4488 mv_cache[ 1 ][0]= mx;
4489 mv_cache[ 1 ][1]= my;
4490 }else if(IS_SUB_4X8(sub_mb_type)){
4491 mv_cache[ 8 ][0]= mx;
4492 mv_cache[ 8 ][1]= my;
4494 mv_cache[ 0 ][0]= mx;
4495 mv_cache[ 0 ][1]= my;
4497 }else{
4498 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4499 p[0] = p[1]=
4500 p[8] = p[9]= 0;
4504 }else if(IS_DIRECT(mb_type)){
4505 pred_direct_motion(h, &mb_type);
4506 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4507 }else{
4508 int list, mx, my, i;
4509 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4510 if(IS_16X16(mb_type)){
4511 for(list=0; list<h->list_count; list++){
4512 unsigned int val;
4513 if(IS_DIR(mb_type, 0, list)){
4514 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4515 if(val >= h->ref_count[list]){
4516 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4517 return -1;
4519 }else
4520 val= LIST_NOT_USED&0xFF;
4521 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4523 for(list=0; list<h->list_count; list++){
4524 unsigned int val;
4525 if(IS_DIR(mb_type, 0, list)){
4526 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4527 mx += get_se_golomb(&s->gb);
4528 my += get_se_golomb(&s->gb);
4529 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4531 val= pack16to32(mx,my);
4532 }else
4533 val=0;
4534 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4537 else if(IS_16X8(mb_type)){
4538 for(list=0; list<h->list_count; list++){
4539 for(i=0; i<2; i++){
4540 unsigned int val;
4541 if(IS_DIR(mb_type, i, list)){
4542 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4543 if(val >= h->ref_count[list]){
4544 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4545 return -1;
4547 }else
4548 val= LIST_NOT_USED&0xFF;
4549 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4552 for(list=0; list<h->list_count; list++){
4553 for(i=0; i<2; i++){
4554 unsigned int val;
4555 if(IS_DIR(mb_type, i, list)){
4556 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4557 mx += get_se_golomb(&s->gb);
4558 my += get_se_golomb(&s->gb);
4559 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4561 val= pack16to32(mx,my);
4562 }else
4563 val=0;
4564 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4567 }else{
4568 assert(IS_8X16(mb_type));
4569 for(list=0; list<h->list_count; list++){
4570 for(i=0; i<2; i++){
4571 unsigned int val;
4572 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4573 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4574 if(val >= h->ref_count[list]){
4575 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4576 return -1;
4578 }else
4579 val= LIST_NOT_USED&0xFF;
4580 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4583 for(list=0; list<h->list_count; list++){
4584 for(i=0; i<2; i++){
4585 unsigned int val;
4586 if(IS_DIR(mb_type, i, list)){
4587 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4588 mx += get_se_golomb(&s->gb);
4589 my += get_se_golomb(&s->gb);
4590 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4592 val= pack16to32(mx,my);
4593 }else
4594 val=0;
4595 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4601 if(IS_INTER(mb_type))
4602 write_back_motion(h, mb_type);
4604 if(!IS_INTRA16x16(mb_type)){
4605 cbp= get_ue_golomb(&s->gb);
4606 if(cbp > 47){
4607 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4608 return -1;
4611 if(CHROMA){
4612 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4613 else cbp= golomb_to_inter_cbp [cbp];
4614 }else{
4615 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4616 else cbp= golomb_to_inter_cbp_gray[cbp];
4619 h->cbp = cbp;
4621 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4622 if(get_bits1(&s->gb)){
4623 mb_type |= MB_TYPE_8x8DCT;
4624 h->cbp_table[mb_xy]= cbp;
4627 s->current_picture.mb_type[mb_xy]= mb_type;
4629 if(cbp || IS_INTRA16x16(mb_type)){
4630 int i8x8, i4x4, chroma_idx;
4631 int dquant;
4632 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4633 const uint8_t *scan, *scan8x8, *dc_scan;
4635 // fill_non_zero_count_cache(h);
4637 if(IS_INTERLACED(mb_type)){
4638 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4639 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4640 dc_scan= luma_dc_field_scan;
4641 }else{
4642 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4643 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4644 dc_scan= luma_dc_zigzag_scan;
4647 dquant= get_se_golomb(&s->gb);
4649 if( dquant > 25 || dquant < -26 ){
4650 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4651 return -1;
4654 s->qscale += dquant;
4655 if(((unsigned)s->qscale) > 51){
4656 if(s->qscale<0) s->qscale+= 52;
4657 else s->qscale-= 52;
4660 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4661 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4662 if(IS_INTRA16x16(mb_type)){
4663 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4664 return -1; //FIXME continue if partitioned and other return -1 too
4667 assert((cbp&15) == 0 || (cbp&15) == 15);
4669 if(cbp&15){
4670 for(i8x8=0; i8x8<4; i8x8++){
4671 for(i4x4=0; i4x4<4; i4x4++){
4672 const int index= i4x4 + 4*i8x8;
4673 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4674 return -1;
4678 }else{
4679 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4681 }else{
4682 for(i8x8=0; i8x8<4; i8x8++){
4683 if(cbp & (1<<i8x8)){
4684 if(IS_8x8DCT(mb_type)){
4685 DCTELEM *buf = &h->mb[64*i8x8];
4686 uint8_t *nnz;
4687 for(i4x4=0; i4x4<4; i4x4++){
4688 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4689 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4690 return -1;
4692 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4693 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4694 }else{
4695 for(i4x4=0; i4x4<4; i4x4++){
4696 const int index= i4x4 + 4*i8x8;
4698 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4699 return -1;
4703 }else{
4704 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4705 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4710 if(cbp&0x30){
4711 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4712 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4713 return -1;
4717 if(cbp&0x20){
4718 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4719 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4720 for(i4x4=0; i4x4<4; i4x4++){
4721 const int index= 16 + 4*chroma_idx + i4x4;
4722 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4723 return -1;
4727 }else{
4728 uint8_t * const nnz= &h->non_zero_count_cache[0];
4729 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4730 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4732 }else{
4733 uint8_t * const nnz= &h->non_zero_count_cache[0];
4734 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4735 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4736 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4738 s->current_picture.qscale_table[mb_xy]= s->qscale;
4739 write_back_non_zero_count(h);
4741 if(MB_MBAFF){
4742 h->ref_count[0] >>= 1;
4743 h->ref_count[1] >>= 1;
4746 return 0;
4749 static int decode_cabac_field_decoding_flag(H264Context *h) {
4750 MpegEncContext * const s = &h->s;
4751 const int mb_x = s->mb_x;
4752 const int mb_y = s->mb_y & ~1;
4753 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4754 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4756 unsigned int ctx = 0;
4758 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4759 ctx += 1;
4761 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4762 ctx += 1;
4765 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4768 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4769 uint8_t *state= &h->cabac_state[ctx_base];
4770 int mb_type;
4772 if(intra_slice){
4773 MpegEncContext * const s = &h->s;
4774 const int mba_xy = h->left_mb_xy[0];
4775 const int mbb_xy = h->top_mb_xy;
4776 int ctx=0;
4777 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4778 ctx++;
4779 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4780 ctx++;
4781 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4782 return 0; /* I4x4 */
4783 state += 2;
4784 }else{
4785 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4786 return 0; /* I4x4 */
4789 if( get_cabac_terminate( &h->cabac ) )
4790 return 25; /* PCM */
4792 mb_type = 1; /* I16x16 */
4793 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4794 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4795 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4796 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4797 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4798 return mb_type;
4801 static int decode_cabac_mb_type( H264Context *h ) {
4802 MpegEncContext * const s = &h->s;
4804 if( h->slice_type_nos == FF_I_TYPE ) {
4805 return decode_cabac_intra_mb_type(h, 3, 1);
4806 } else if( h->slice_type_nos == FF_P_TYPE ) {
4807 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4808 /* P-type */
4809 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4810 /* P_L0_D16x16, P_8x8 */
4811 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4812 } else {
4813 /* P_L0_D8x16, P_L0_D16x8 */
4814 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4816 } else {
4817 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4819 } else if( h->slice_type_nos == FF_B_TYPE ) {
4820 const int mba_xy = h->left_mb_xy[0];
4821 const int mbb_xy = h->top_mb_xy;
4822 int ctx = 0;
4823 int bits;
4825 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4826 ctx++;
4827 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4828 ctx++;
4830 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4831 return 0; /* B_Direct_16x16 */
4833 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4834 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4837 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4838 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4839 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4840 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4841 if( bits < 8 )
4842 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4843 else if( bits == 13 ) {
4844 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4845 } else if( bits == 14 )
4846 return 11; /* B_L1_L0_8x16 */
4847 else if( bits == 15 )
4848 return 22; /* B_8x8 */
4850 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4851 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4852 } else {
4853 /* TODO SI/SP frames? */
4854 return -1;
4858 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4859 MpegEncContext * const s = &h->s;
4860 int mba_xy, mbb_xy;
4861 int ctx = 0;
4863 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4864 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4865 mba_xy = mb_xy - 1;
4866 if( (mb_y&1)
4867 && h->slice_table[mba_xy] == h->slice_num
4868 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4869 mba_xy += s->mb_stride;
4870 if( MB_FIELD ){
4871 mbb_xy = mb_xy - s->mb_stride;
4872 if( !(mb_y&1)
4873 && h->slice_table[mbb_xy] == h->slice_num
4874 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4875 mbb_xy -= s->mb_stride;
4876 }else
4877 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4878 }else{
4879 int mb_xy = h->mb_xy;
4880 mba_xy = mb_xy - 1;
4881 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4884 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4885 ctx++;
4886 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4887 ctx++;
4889 if( h->slice_type_nos == FF_B_TYPE )
4890 ctx += 13;
4891 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4894 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4895 int mode = 0;
4897 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4898 return pred_mode;
4900 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4901 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4902 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4904 if( mode >= pred_mode )
4905 return mode + 1;
4906 else
4907 return mode;
4910 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4911 const int mba_xy = h->left_mb_xy[0];
4912 const int mbb_xy = h->top_mb_xy;
4914 int ctx = 0;
4916 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4917 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4918 ctx++;
4920 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4921 ctx++;
4923 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4924 return 0;
4926 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4927 return 1;
4928 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4929 return 2;
4930 else
4931 return 3;
4934 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4935 int cbp_b, cbp_a, ctx, cbp = 0;
4937 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4938 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4940 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4941 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4942 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4943 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4944 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4945 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4946 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4947 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4948 return cbp;
4950 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4951 int ctx;
4952 int cbp_a, cbp_b;
4954 cbp_a = (h->left_cbp>>4)&0x03;
4955 cbp_b = (h-> top_cbp>>4)&0x03;
4957 ctx = 0;
4958 if( cbp_a > 0 ) ctx++;
4959 if( cbp_b > 0 ) ctx += 2;
4960 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4961 return 0;
4963 ctx = 4;
4964 if( cbp_a == 2 ) ctx++;
4965 if( cbp_b == 2 ) ctx += 2;
4966 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4968 static int decode_cabac_mb_dqp( H264Context *h) {
4969 int ctx = 0;
4970 int val = 0;
4972 if( h->last_qscale_diff != 0 )
4973 ctx++;
4975 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4976 if( ctx < 2 )
4977 ctx = 2;
4978 else
4979 ctx = 3;
4980 val++;
4981 if(val > 102) //prevent infinite loop
4982 return INT_MIN;
4985 if( val&0x01 )
4986 return (val + 1)/2;
4987 else
4988 return -(val + 1)/2;
4990 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4991 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4992 return 0; /* 8x8 */
4993 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4994 return 1; /* 8x4 */
4995 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4996 return 2; /* 4x8 */
4997 return 3; /* 4x4 */
4999 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5000 int type;
5001 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5002 return 0; /* B_Direct_8x8 */
5003 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5004 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5005 type = 3;
5006 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5007 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5008 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5009 type += 4;
5011 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5012 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5013 return type;
5016 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5017 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5020 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5021 int refa = h->ref_cache[list][scan8[n] - 1];
5022 int refb = h->ref_cache[list][scan8[n] - 8];
5023 int ref = 0;
5024 int ctx = 0;
5026 if( h->slice_type_nos == FF_B_TYPE) {
5027 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5028 ctx++;
5029 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5030 ctx += 2;
5031 } else {
5032 if( refa > 0 )
5033 ctx++;
5034 if( refb > 0 )
5035 ctx += 2;
5038 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5039 ref++;
5040 if( ctx < 4 )
5041 ctx = 4;
5042 else
5043 ctx = 5;
5044 if(ref >= 32 /*h->ref_list[list]*/){
5045 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5046 return 0; //FIXME we should return -1 and check the return everywhere
5049 return ref;
5052 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5053 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5054 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5055 int ctxbase = (l == 0) ? 40 : 47;
5056 int ctx, mvd;
5058 if( amvd < 3 )
5059 ctx = 0;
5060 else if( amvd > 32 )
5061 ctx = 2;
5062 else
5063 ctx = 1;
5065 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5066 return 0;
5068 mvd= 1;
5069 ctx= 3;
5070 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5071 mvd++;
5072 if( ctx < 6 )
5073 ctx++;
5076 if( mvd >= 9 ) {
5077 int k = 3;
5078 while( get_cabac_bypass( &h->cabac ) ) {
5079 mvd += 1 << k;
5080 k++;
5081 if(k>24){
5082 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5083 return INT_MIN;
5086 while( k-- ) {
5087 if( get_cabac_bypass( &h->cabac ) )
5088 mvd += 1 << k;
5091 return get_cabac_bypass_sign( &h->cabac, -mvd );
5094 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5095 int nza, nzb;
5096 int ctx = 0;
5098 if( is_dc ) {
5099 if( cat == 0 ) {
5100 nza = h->left_cbp&0x100;
5101 nzb = h-> top_cbp&0x100;
5102 } else {
5103 nza = (h->left_cbp>>(6+idx))&0x01;
5104 nzb = (h-> top_cbp>>(6+idx))&0x01;
5106 } else {
5107 if( cat == 4 ) {
5108 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5109 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5110 } else {
5111 assert(cat == 1 || cat == 2);
5112 nza = h->non_zero_count_cache[scan8[idx] - 1];
5113 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5117 if( nza > 0 )
5118 ctx++;
5120 if( nzb > 0 )
5121 ctx += 2;
5123 return ctx + 4 * cat;
5126 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5127 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5128 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5129 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5130 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5133 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5134 static const int significant_coeff_flag_offset[2][6] = {
5135 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5136 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5138 static const int last_coeff_flag_offset[2][6] = {
5139 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5140 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5142 static const int coeff_abs_level_m1_offset[6] = {
5143 227+0, 227+10, 227+20, 227+30, 227+39, 426
5145 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5146 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5147 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5148 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5149 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5150 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5151 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5152 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5153 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5155 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5156 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5157 * map node ctx => cabac ctx for level=1 */
5158 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5159 /* map node ctx => cabac ctx for level>1 */
5160 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5161 static const uint8_t coeff_abs_level_transition[2][8] = {
5162 /* update node ctx after decoding a level=1 */
5163 { 1, 2, 3, 3, 4, 5, 6, 7 },
5164 /* update node ctx after decoding a level>1 */
5165 { 4, 4, 4, 4, 5, 6, 7, 7 }
5168 int index[64];
5170 int av_unused last;
5171 int coeff_count = 0;
5172 int node_ctx = 0;
5174 uint8_t *significant_coeff_ctx_base;
5175 uint8_t *last_coeff_ctx_base;
5176 uint8_t *abs_level_m1_ctx_base;
5178 #ifndef ARCH_X86
5179 #define CABAC_ON_STACK
5180 #endif
5181 #ifdef CABAC_ON_STACK
5182 #define CC &cc
5183 CABACContext cc;
5184 cc.range = h->cabac.range;
5185 cc.low = h->cabac.low;
5186 cc.bytestream= h->cabac.bytestream;
5187 #else
5188 #define CC &h->cabac
5189 #endif
5192 /* cat: 0-> DC 16x16 n = 0
5193 * 1-> AC 16x16 n = luma4x4idx
5194 * 2-> Luma4x4 n = luma4x4idx
5195 * 3-> DC Chroma n = iCbCr
5196 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5197 * 5-> Luma8x8 n = 4 * luma8x8idx
5200 /* read coded block flag */
5201 if( is_dc || cat != 5 ) {
5202 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5203 if( !is_dc ) {
5204 if( cat == 4 )
5205 h->non_zero_count_cache[scan8[16+n]] = 0;
5206 else
5207 h->non_zero_count_cache[scan8[n]] = 0;
5210 #ifdef CABAC_ON_STACK
5211 h->cabac.range = cc.range ;
5212 h->cabac.low = cc.low ;
5213 h->cabac.bytestream= cc.bytestream;
5214 #endif
5215 return;
5219 significant_coeff_ctx_base = h->cabac_state
5220 + significant_coeff_flag_offset[MB_FIELD][cat];
5221 last_coeff_ctx_base = h->cabac_state
5222 + last_coeff_flag_offset[MB_FIELD][cat];
5223 abs_level_m1_ctx_base = h->cabac_state
5224 + coeff_abs_level_m1_offset[cat];
5226 if( !is_dc && cat == 5 ) {
5227 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5228 for(last= 0; last < coefs; last++) { \
5229 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5230 if( get_cabac( CC, sig_ctx )) { \
5231 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5232 index[coeff_count++] = last; \
5233 if( get_cabac( CC, last_ctx ) ) { \
5234 last= max_coeff; \
5235 break; \
5239 if( last == max_coeff -1 ) {\
5240 index[coeff_count++] = last;\
5242 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5243 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5244 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5245 } else {
5246 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5247 #else
5248 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5249 } else {
5250 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5251 #endif
5253 assert(coeff_count > 0);
5255 if( is_dc ) {
5256 if( cat == 0 )
5257 h->cbp_table[h->mb_xy] |= 0x100;
5258 else
5259 h->cbp_table[h->mb_xy] |= 0x40 << n;
5260 } else {
5261 if( cat == 5 )
5262 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5263 else if( cat == 4 )
5264 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5265 else {
5266 assert( cat == 1 || cat == 2 );
5267 h->non_zero_count_cache[scan8[n]] = coeff_count;
5271 do {
5272 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5274 int j= scantable[index[--coeff_count]];
5276 if( get_cabac( CC, ctx ) == 0 ) {
5277 node_ctx = coeff_abs_level_transition[0][node_ctx];
5278 if( is_dc ) {
5279 block[j] = get_cabac_bypass_sign( CC, -1);
5280 }else{
5281 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5283 } else {
5284 int coeff_abs = 2;
5285 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5286 node_ctx = coeff_abs_level_transition[1][node_ctx];
5288 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5289 coeff_abs++;
5292 if( coeff_abs >= 15 ) {
5293 int j = 0;
5294 while( get_cabac_bypass( CC ) ) {
5295 j++;
5298 coeff_abs=1;
5299 while( j-- ) {
5300 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5302 coeff_abs+= 14;
5305 if( is_dc ) {
5306 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5307 }else{
5308 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5311 } while( coeff_count );
5312 #ifdef CABAC_ON_STACK
5313 h->cabac.range = cc.range ;
5314 h->cabac.low = cc.low ;
5315 h->cabac.bytestream= cc.bytestream;
5316 #endif
5320 #ifndef CONFIG_SMALL
5321 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5322 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5325 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5326 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5328 #endif
5330 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5331 #ifdef CONFIG_SMALL
5332 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5333 #else
5334 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5335 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5336 #endif
5339 static inline void compute_mb_neighbors(H264Context *h)
5341 MpegEncContext * const s = &h->s;
5342 const int mb_xy = h->mb_xy;
5343 h->top_mb_xy = mb_xy - s->mb_stride;
5344 h->left_mb_xy[0] = mb_xy - 1;
5345 if(FRAME_MBAFF){
5346 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5347 const int top_pair_xy = pair_xy - s->mb_stride;
5348 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5349 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5350 const int curr_mb_frame_flag = !MB_FIELD;
5351 const int bottom = (s->mb_y & 1);
5352 if (bottom
5353 ? !curr_mb_frame_flag // bottom macroblock
5354 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5356 h->top_mb_xy -= s->mb_stride;
5358 if (left_mb_frame_flag != curr_mb_frame_flag) {
5359 h->left_mb_xy[0] = pair_xy - 1;
5361 } else if (FIELD_PICTURE) {
5362 h->top_mb_xy -= s->mb_stride;
5364 return;
5368 * decodes a macroblock
5369 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5371 static int decode_mb_cabac(H264Context *h) {
5372 MpegEncContext * const s = &h->s;
5373 int mb_xy;
5374 int mb_type, partition_count, cbp = 0;
5375 int dct8x8_allowed= h->pps.transform_8x8_mode;
5377 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5379 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5381 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5382 if( h->slice_type_nos != FF_I_TYPE ) {
5383 int skip;
5384 /* a skipped mb needs the aff flag from the following mb */
5385 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5386 predict_field_decoding_flag(h);
5387 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5388 skip = h->next_mb_skipped;
5389 else
5390 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5391 /* read skip flags */
5392 if( skip ) {
5393 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5394 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5395 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5396 if(h->next_mb_skipped)
5397 predict_field_decoding_flag(h);
5398 else
5399 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5402 decode_mb_skip(h);
5404 h->cbp_table[mb_xy] = 0;
5405 h->chroma_pred_mode_table[mb_xy] = 0;
5406 h->last_qscale_diff = 0;
5408 return 0;
5412 if(FRAME_MBAFF){
5413 if( (s->mb_y&1) == 0 )
5414 h->mb_mbaff =
5415 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5418 h->prev_mb_skipped = 0;
5420 compute_mb_neighbors(h);
5421 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5422 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5423 return -1;
5426 if( h->slice_type_nos == FF_B_TYPE ) {
5427 if( mb_type < 23 ){
5428 partition_count= b_mb_type_info[mb_type].partition_count;
5429 mb_type= b_mb_type_info[mb_type].type;
5430 }else{
5431 mb_type -= 23;
5432 goto decode_intra_mb;
5434 } else if( h->slice_type_nos == FF_P_TYPE ) {
5435 if( mb_type < 5) {
5436 partition_count= p_mb_type_info[mb_type].partition_count;
5437 mb_type= p_mb_type_info[mb_type].type;
5438 } else {
5439 mb_type -= 5;
5440 goto decode_intra_mb;
5442 } else {
5443 if(h->slice_type == FF_SI_TYPE && mb_type)
5444 mb_type--;
5445 assert(h->slice_type_nos == FF_I_TYPE);
5446 decode_intra_mb:
5447 partition_count = 0;
5448 cbp= i_mb_type_info[mb_type].cbp;
5449 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5450 mb_type= i_mb_type_info[mb_type].type;
5452 if(MB_FIELD)
5453 mb_type |= MB_TYPE_INTERLACED;
5455 h->slice_table[ mb_xy ]= h->slice_num;
5457 if(IS_INTRA_PCM(mb_type)) {
5458 const uint8_t *ptr;
5460 // We assume these blocks are very rare so we do not optimize it.
5461 // FIXME The two following lines get the bitstream position in the cabac
5462 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5463 ptr= h->cabac.bytestream;
5464 if(h->cabac.low&0x1) ptr--;
5465 if(CABAC_BITS==16){
5466 if(h->cabac.low&0x1FF) ptr--;
5469 // The pixels are stored in the same order as levels in h->mb array.
5470 memcpy(h->mb, ptr, 256); ptr+=256;
5471 if(CHROMA){
5472 memcpy(h->mb+128, ptr, 128); ptr+=128;
5475 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5477 // All blocks are present
5478 h->cbp_table[mb_xy] = 0x1ef;
5479 h->chroma_pred_mode_table[mb_xy] = 0;
5480 // In deblocking, the quantizer is 0
5481 s->current_picture.qscale_table[mb_xy]= 0;
5482 // All coeffs are present
5483 memset(h->non_zero_count[mb_xy], 16, 16);
5484 s->current_picture.mb_type[mb_xy]= mb_type;
5485 h->last_qscale_diff = 0;
5486 return 0;
5489 if(MB_MBAFF){
5490 h->ref_count[0] <<= 1;
5491 h->ref_count[1] <<= 1;
5494 fill_caches(h, mb_type, 0);
5496 if( IS_INTRA( mb_type ) ) {
5497 int i, pred_mode;
5498 if( IS_INTRA4x4( mb_type ) ) {
5499 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5500 mb_type |= MB_TYPE_8x8DCT;
5501 for( i = 0; i < 16; i+=4 ) {
5502 int pred = pred_intra_mode( h, i );
5503 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5504 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5506 } else {
5507 for( i = 0; i < 16; i++ ) {
5508 int pred = pred_intra_mode( h, i );
5509 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5511 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5514 write_back_intra_pred_mode(h);
5515 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5516 } else {
5517 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5518 if( h->intra16x16_pred_mode < 0 ) return -1;
5520 if(CHROMA){
5521 h->chroma_pred_mode_table[mb_xy] =
5522 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5524 pred_mode= check_intra_pred_mode( h, pred_mode );
5525 if( pred_mode < 0 ) return -1;
5526 h->chroma_pred_mode= pred_mode;
5528 } else if( partition_count == 4 ) {
5529 int i, j, sub_partition_count[4], list, ref[2][4];
5531 if( h->slice_type_nos == FF_B_TYPE ) {
5532 for( i = 0; i < 4; i++ ) {
5533 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5534 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5535 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5537 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5538 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5539 pred_direct_motion(h, &mb_type);
5540 h->ref_cache[0][scan8[4]] =
5541 h->ref_cache[1][scan8[4]] =
5542 h->ref_cache[0][scan8[12]] =
5543 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5544 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5545 for( i = 0; i < 4; i++ )
5546 if( IS_DIRECT(h->sub_mb_type[i]) )
5547 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5550 } else {
5551 for( i = 0; i < 4; i++ ) {
5552 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5553 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5554 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5558 for( list = 0; list < h->list_count; list++ ) {
5559 for( i = 0; i < 4; i++ ) {
5560 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5561 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5562 if( h->ref_count[list] > 1 )
5563 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5564 else
5565 ref[list][i] = 0;
5566 } else {
5567 ref[list][i] = -1;
5569 h->ref_cache[list][ scan8[4*i]+1 ]=
5570 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5574 if(dct8x8_allowed)
5575 dct8x8_allowed = get_dct8x8_allowed(h);
5577 for(list=0; list<h->list_count; list++){
5578 for(i=0; i<4; i++){
5579 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5580 if(IS_DIRECT(h->sub_mb_type[i])){
5581 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5582 continue;
5585 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5586 const int sub_mb_type= h->sub_mb_type[i];
5587 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5588 for(j=0; j<sub_partition_count[i]; j++){
5589 int mpx, mpy;
5590 int mx, my;
5591 const int index= 4*i + block_width*j;
5592 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5593 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5594 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5596 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5597 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5598 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5600 if(IS_SUB_8X8(sub_mb_type)){
5601 mv_cache[ 1 ][0]=
5602 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5603 mv_cache[ 1 ][1]=
5604 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5606 mvd_cache[ 1 ][0]=
5607 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5608 mvd_cache[ 1 ][1]=
5609 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5610 }else if(IS_SUB_8X4(sub_mb_type)){
5611 mv_cache[ 1 ][0]= mx;
5612 mv_cache[ 1 ][1]= my;
5614 mvd_cache[ 1 ][0]= mx - mpx;
5615 mvd_cache[ 1 ][1]= my - mpy;
5616 }else if(IS_SUB_4X8(sub_mb_type)){
5617 mv_cache[ 8 ][0]= mx;
5618 mv_cache[ 8 ][1]= my;
5620 mvd_cache[ 8 ][0]= mx - mpx;
5621 mvd_cache[ 8 ][1]= my - mpy;
5623 mv_cache[ 0 ][0]= mx;
5624 mv_cache[ 0 ][1]= my;
5626 mvd_cache[ 0 ][0]= mx - mpx;
5627 mvd_cache[ 0 ][1]= my - mpy;
5629 }else{
5630 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5631 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5632 p[0] = p[1] = p[8] = p[9] = 0;
5633 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5637 } else if( IS_DIRECT(mb_type) ) {
5638 pred_direct_motion(h, &mb_type);
5639 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5640 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5641 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5642 } else {
5643 int list, mx, my, i, mpx, mpy;
5644 if(IS_16X16(mb_type)){
5645 for(list=0; list<h->list_count; list++){
5646 if(IS_DIR(mb_type, 0, list)){
5647 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5648 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5649 }else
5650 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5652 for(list=0; list<h->list_count; list++){
5653 if(IS_DIR(mb_type, 0, list)){
5654 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5656 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5657 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5658 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5660 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5661 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5662 }else
5663 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5666 else if(IS_16X8(mb_type)){
5667 for(list=0; list<h->list_count; list++){
5668 for(i=0; i<2; i++){
5669 if(IS_DIR(mb_type, i, list)){
5670 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5671 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5672 }else
5673 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5676 for(list=0; list<h->list_count; list++){
5677 for(i=0; i<2; i++){
5678 if(IS_DIR(mb_type, i, list)){
5679 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5680 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5681 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5682 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5684 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5685 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5686 }else{
5687 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5688 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5692 }else{
5693 assert(IS_8X16(mb_type));
5694 for(list=0; list<h->list_count; list++){
5695 for(i=0; i<2; i++){
5696 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5697 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5698 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5699 }else
5700 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5703 for(list=0; list<h->list_count; list++){
5704 for(i=0; i<2; i++){
5705 if(IS_DIR(mb_type, i, list)){
5706 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5707 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5708 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5710 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5711 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5712 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5713 }else{
5714 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5715 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5722 if( IS_INTER( mb_type ) ) {
5723 h->chroma_pred_mode_table[mb_xy] = 0;
5724 write_back_motion( h, mb_type );
5727 if( !IS_INTRA16x16( mb_type ) ) {
5728 cbp = decode_cabac_mb_cbp_luma( h );
5729 if(CHROMA)
5730 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5733 h->cbp_table[mb_xy] = h->cbp = cbp;
5735 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5736 if( decode_cabac_mb_transform_size( h ) )
5737 mb_type |= MB_TYPE_8x8DCT;
5739 s->current_picture.mb_type[mb_xy]= mb_type;
5741 if( cbp || IS_INTRA16x16( mb_type ) ) {
5742 const uint8_t *scan, *scan8x8, *dc_scan;
5743 const uint32_t *qmul;
5744 int dqp;
5746 if(IS_INTERLACED(mb_type)){
5747 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5748 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5749 dc_scan= luma_dc_field_scan;
5750 }else{
5751 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5752 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5753 dc_scan= luma_dc_zigzag_scan;
5756 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5757 if( dqp == INT_MIN ){
5758 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5759 return -1;
5761 s->qscale += dqp;
5762 if(((unsigned)s->qscale) > 51){
5763 if(s->qscale<0) s->qscale+= 52;
5764 else s->qscale-= 52;
5766 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5767 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5769 if( IS_INTRA16x16( mb_type ) ) {
5770 int i;
5771 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5772 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5774 if( cbp&15 ) {
5775 qmul = h->dequant4_coeff[0][s->qscale];
5776 for( i = 0; i < 16; i++ ) {
5777 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5778 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5780 } else {
5781 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5783 } else {
5784 int i8x8, i4x4;
5785 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5786 if( cbp & (1<<i8x8) ) {
5787 if( IS_8x8DCT(mb_type) ) {
5788 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5789 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5790 } else {
5791 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5792 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5793 const int index = 4*i8x8 + i4x4;
5794 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5795 //START_TIMER
5796 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5797 //STOP_TIMER("decode_residual")
5800 } else {
5801 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5802 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5807 if( cbp&0x30 ){
5808 int c;
5809 for( c = 0; c < 2; c++ ) {
5810 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5811 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5815 if( cbp&0x20 ) {
5816 int c, i;
5817 for( c = 0; c < 2; c++ ) {
5818 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5819 for( i = 0; i < 4; i++ ) {
5820 const int index = 16 + 4 * c + i;
5821 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5822 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5825 } else {
5826 uint8_t * const nnz= &h->non_zero_count_cache[0];
5827 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5828 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5830 } else {
5831 uint8_t * const nnz= &h->non_zero_count_cache[0];
5832 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5833 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5834 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5835 h->last_qscale_diff = 0;
5838 s->current_picture.qscale_table[mb_xy]= s->qscale;
5839 write_back_non_zero_count(h);
5841 if(MB_MBAFF){
5842 h->ref_count[0] >>= 1;
5843 h->ref_count[1] >>= 1;
5846 return 0;
5850 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5851 int i, d;
5852 const int index_a = qp + h->slice_alpha_c0_offset;
5853 const int alpha = (alpha_table+52)[index_a];
5854 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5856 if( bS[0] < 4 ) {
5857 int8_t tc[4];
5858 for(i=0; i<4; i++)
5859 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5860 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5861 } else {
5862 /* 16px edge length, because bS=4 is triggered by being at
5863 * the edge of an intra MB, so all 4 bS are the same */
5864 for( d = 0; d < 16; d++ ) {
5865 const int p0 = pix[-1];
5866 const int p1 = pix[-2];
5867 const int p2 = pix[-3];
5869 const int q0 = pix[0];
5870 const int q1 = pix[1];
5871 const int q2 = pix[2];
5873 if( FFABS( p0 - q0 ) < alpha &&
5874 FFABS( p1 - p0 ) < beta &&
5875 FFABS( q1 - q0 ) < beta ) {
5877 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5878 if( FFABS( p2 - p0 ) < beta)
5880 const int p3 = pix[-4];
5881 /* p0', p1', p2' */
5882 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5883 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5884 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5885 } else {
5886 /* p0' */
5887 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5889 if( FFABS( q2 - q0 ) < beta)
5891 const int q3 = pix[3];
5892 /* q0', q1', q2' */
5893 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5894 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5895 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5896 } else {
5897 /* q0' */
5898 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5900 }else{
5901 /* p0', q0' */
5902 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5903 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5905 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5907 pix += stride;
5911 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5912 int i;
5913 const int index_a = qp + h->slice_alpha_c0_offset;
5914 const int alpha = (alpha_table+52)[index_a];
5915 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5917 if( bS[0] < 4 ) {
5918 int8_t tc[4];
5919 for(i=0; i<4; i++)
5920 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5921 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5922 } else {
5923 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5927 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5928 int i;
5929 for( i = 0; i < 16; i++, pix += stride) {
5930 int index_a;
5931 int alpha;
5932 int beta;
5934 int qp_index;
5935 int bS_index = (i >> 1);
5936 if (!MB_FIELD) {
5937 bS_index &= ~1;
5938 bS_index |= (i & 1);
5941 if( bS[bS_index] == 0 ) {
5942 continue;
5945 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5946 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5947 alpha = (alpha_table+52)[index_a];
5948 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5950 if( bS[bS_index] < 4 ) {
5951 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5952 const int p0 = pix[-1];
5953 const int p1 = pix[-2];
5954 const int p2 = pix[-3];
5955 const int q0 = pix[0];
5956 const int q1 = pix[1];
5957 const int q2 = pix[2];
5959 if( FFABS( p0 - q0 ) < alpha &&
5960 FFABS( p1 - p0 ) < beta &&
5961 FFABS( q1 - q0 ) < beta ) {
5962 int tc = tc0;
5963 int i_delta;
5965 if( FFABS( p2 - p0 ) < beta ) {
5966 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5967 tc++;
5969 if( FFABS( q2 - q0 ) < beta ) {
5970 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5971 tc++;
5974 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5975 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5976 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5977 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5979 }else{
5980 const int p0 = pix[-1];
5981 const int p1 = pix[-2];
5982 const int p2 = pix[-3];
5984 const int q0 = pix[0];
5985 const int q1 = pix[1];
5986 const int q2 = pix[2];
5988 if( FFABS( p0 - q0 ) < alpha &&
5989 FFABS( p1 - p0 ) < beta &&
5990 FFABS( q1 - q0 ) < beta ) {
5992 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5993 if( FFABS( p2 - p0 ) < beta)
5995 const int p3 = pix[-4];
5996 /* p0', p1', p2' */
5997 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5998 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5999 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6000 } else {
6001 /* p0' */
6002 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6004 if( FFABS( q2 - q0 ) < beta)
6006 const int q3 = pix[3];
6007 /* q0', q1', q2' */
6008 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6009 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6010 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6011 } else {
6012 /* q0' */
6013 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6015 }else{
6016 /* p0', q0' */
6017 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6018 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6020 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6025 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6026 int i;
6027 for( i = 0; i < 8; i++, pix += stride) {
6028 int index_a;
6029 int alpha;
6030 int beta;
6032 int qp_index;
6033 int bS_index = i;
6035 if( bS[bS_index] == 0 ) {
6036 continue;
6039 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6040 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6041 alpha = (alpha_table+52)[index_a];
6042 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6044 if( bS[bS_index] < 4 ) {
6045 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6046 const int p0 = pix[-1];
6047 const int p1 = pix[-2];
6048 const int q0 = pix[0];
6049 const int q1 = pix[1];
6051 if( FFABS( p0 - q0 ) < alpha &&
6052 FFABS( p1 - p0 ) < beta &&
6053 FFABS( q1 - q0 ) < beta ) {
6054 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6056 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6057 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6058 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6060 }else{
6061 const int p0 = pix[-1];
6062 const int p1 = pix[-2];
6063 const int q0 = pix[0];
6064 const int q1 = pix[1];
6066 if( FFABS( p0 - q0 ) < alpha &&
6067 FFABS( p1 - p0 ) < beta &&
6068 FFABS( q1 - q0 ) < beta ) {
6070 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6071 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6072 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6078 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6079 int i, d;
6080 const int index_a = qp + h->slice_alpha_c0_offset;
6081 const int alpha = (alpha_table+52)[index_a];
6082 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6083 const int pix_next = stride;
6085 if( bS[0] < 4 ) {
6086 int8_t tc[4];
6087 for(i=0; i<4; i++)
6088 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6089 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6090 } else {
6091 /* 16px edge length, see filter_mb_edgev */
6092 for( d = 0; d < 16; d++ ) {
6093 const int p0 = pix[-1*pix_next];
6094 const int p1 = pix[-2*pix_next];
6095 const int p2 = pix[-3*pix_next];
6096 const int q0 = pix[0];
6097 const int q1 = pix[1*pix_next];
6098 const int q2 = pix[2*pix_next];
6100 if( FFABS( p0 - q0 ) < alpha &&
6101 FFABS( p1 - p0 ) < beta &&
6102 FFABS( q1 - q0 ) < beta ) {
6104 const int p3 = pix[-4*pix_next];
6105 const int q3 = pix[ 3*pix_next];
6107 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6108 if( FFABS( p2 - p0 ) < beta) {
6109 /* p0', p1', p2' */
6110 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6111 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6112 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6113 } else {
6114 /* p0' */
6115 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6117 if( FFABS( q2 - q0 ) < beta) {
6118 /* q0', q1', q2' */
6119 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6120 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6121 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6122 } else {
6123 /* q0' */
6124 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6126 }else{
6127 /* p0', q0' */
6128 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6129 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6131 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6133 pix++;
6138 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6139 int i;
6140 const int index_a = qp + h->slice_alpha_c0_offset;
6141 const int alpha = (alpha_table+52)[index_a];
6142 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6144 if( bS[0] < 4 ) {
6145 int8_t tc[4];
6146 for(i=0; i<4; i++)
6147 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6148 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6149 } else {
6150 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6154 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6155 MpegEncContext * const s = &h->s;
6156 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6157 int mb_xy, mb_type;
6158 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6160 mb_xy = h->mb_xy;
6162 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6163 1 ||
6164 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6165 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6166 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6167 return;
6169 assert(!FRAME_MBAFF);
6171 mb_type = s->current_picture.mb_type[mb_xy];
6172 qp = s->current_picture.qscale_table[mb_xy];
6173 qp0 = s->current_picture.qscale_table[mb_xy-1];
6174 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6175 qpc = get_chroma_qp( h, 0, qp );
6176 qpc0 = get_chroma_qp( h, 0, qp0 );
6177 qpc1 = get_chroma_qp( h, 0, qp1 );
6178 qp0 = (qp + qp0 + 1) >> 1;
6179 qp1 = (qp + qp1 + 1) >> 1;
6180 qpc0 = (qpc + qpc0 + 1) >> 1;
6181 qpc1 = (qpc + qpc1 + 1) >> 1;
6182 qp_thresh = 15 - h->slice_alpha_c0_offset;
6183 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6184 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6185 return;
6187 if( IS_INTRA(mb_type) ) {
6188 int16_t bS4[4] = {4,4,4,4};
6189 int16_t bS3[4] = {3,3,3,3};
6190 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6191 if( IS_8x8DCT(mb_type) ) {
6192 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6193 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6194 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6195 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6196 } else {
6197 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6198 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6199 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6200 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6201 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6202 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6203 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6204 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6206 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6207 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6208 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6209 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6210 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6211 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6212 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6213 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6214 return;
6215 } else {
6216 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6217 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6218 int edges;
6219 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6220 edges = 4;
6221 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6222 } else {
6223 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6224 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6225 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6226 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6227 ? 3 : 0;
6228 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6229 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6230 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6231 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6233 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6234 bSv[0][0] = 0x0004000400040004ULL;
6235 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6236 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6238 #define FILTER(hv,dir,edge)\
6239 if(bSv[dir][edge]) {\
6240 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6241 if(!(edge&1)) {\
6242 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6243 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6246 if( edges == 1 ) {
6247 FILTER(v,0,0);
6248 FILTER(h,1,0);
6249 } else if( IS_8x8DCT(mb_type) ) {
6250 FILTER(v,0,0);
6251 FILTER(v,0,2);
6252 FILTER(h,1,0);
6253 FILTER(h,1,2);
6254 } else {
6255 FILTER(v,0,0);
6256 FILTER(v,0,1);
6257 FILTER(v,0,2);
6258 FILTER(v,0,3);
6259 FILTER(h,1,0);
6260 FILTER(h,1,1);
6261 FILTER(h,1,2);
6262 FILTER(h,1,3);
6264 #undef FILTER
6268 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6269 MpegEncContext * const s = &h->s;
6270 const int mb_xy= mb_x + mb_y*s->mb_stride;
6271 const int mb_type = s->current_picture.mb_type[mb_xy];
6272 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6273 int first_vertical_edge_done = 0;
6274 int dir;
6276 //for sufficiently low qp, filtering wouldn't do anything
6277 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6278 if(!FRAME_MBAFF){
6279 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6280 int qp = s->current_picture.qscale_table[mb_xy];
6281 if(qp <= qp_thresh
6282 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6283 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6284 return;
6288 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6289 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6290 int top_type, left_type[2];
6291 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6292 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6293 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6295 if(IS_8x8DCT(top_type)){
6296 h->non_zero_count_cache[4+8*0]=
6297 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6298 h->non_zero_count_cache[6+8*0]=
6299 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6301 if(IS_8x8DCT(left_type[0])){
6302 h->non_zero_count_cache[3+8*1]=
6303 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6305 if(IS_8x8DCT(left_type[1])){
6306 h->non_zero_count_cache[3+8*3]=
6307 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6310 if(IS_8x8DCT(mb_type)){
6311 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6312 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6314 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6315 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6317 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6318 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6320 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6321 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6325 if (FRAME_MBAFF
6326 // left mb is in picture
6327 && h->slice_table[mb_xy-1] != 255
6328 // and current and left pair do not have the same interlaced type
6329 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6330 // and left mb is in the same slice if deblocking_filter == 2
6331 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6332 /* First vertical edge is different in MBAFF frames
6333 * There are 8 different bS to compute and 2 different Qp
6335 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6336 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6337 int16_t bS[8];
6338 int qp[2];
6339 int bqp[2];
6340 int rqp[2];
6341 int mb_qp, mbn0_qp, mbn1_qp;
6342 int i;
6343 first_vertical_edge_done = 1;
6345 if( IS_INTRA(mb_type) )
6346 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6347 else {
6348 for( i = 0; i < 8; i++ ) {
6349 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6351 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6352 bS[i] = 4;
6353 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6354 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6355 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6356 bS[i] = 2;
6357 else
6358 bS[i] = 1;
6362 mb_qp = s->current_picture.qscale_table[mb_xy];
6363 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6364 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6365 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6366 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6367 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6368 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6369 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6370 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6371 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6372 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6373 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6374 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6376 /* Filter edge */
6377 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6378 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6379 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6380 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6381 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6383 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6384 for( dir = 0; dir < 2; dir++ )
6386 int edge;
6387 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6388 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6389 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &15 ][0] + (MB_MBAFF ? 20 : 2);
6390 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&15 ][0] + (MB_MBAFF ? 20 : 2);
6391 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6393 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6394 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6395 // how often to recheck mv-based bS when iterating between edges
6396 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6397 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6398 // how often to recheck mv-based bS when iterating along each edge
6399 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6401 if (first_vertical_edge_done) {
6402 start = 1;
6403 first_vertical_edge_done = 0;
6406 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6407 start = 1;
6409 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6410 && !IS_INTERLACED(mb_type)
6411 && IS_INTERLACED(mbm_type)
6413 // This is a special case in the norm where the filtering must
6414 // be done twice (one each of the field) even if we are in a
6415 // frame macroblock.
6417 static const int nnz_idx[4] = {4,5,6,3};
6418 unsigned int tmp_linesize = 2 * linesize;
6419 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6420 int mbn_xy = mb_xy - 2 * s->mb_stride;
6421 int qp;
6422 int i, j;
6423 int16_t bS[4];
6425 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6426 if( IS_INTRA(mb_type) ||
6427 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6428 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6429 } else {
6430 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6431 for( i = 0; i < 4; i++ ) {
6432 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6433 mbn_nnz[nnz_idx[i]] != 0 )
6434 bS[i] = 2;
6435 else
6436 bS[i] = 1;
6439 // Do not use s->qscale as luma quantizer because it has not the same
6440 // value in IPCM macroblocks.
6441 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6442 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6443 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6444 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6445 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6446 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6447 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6448 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6451 start = 1;
6454 /* Calculate bS */
6455 for( edge = start; edge < edges; edge++ ) {
6456 /* mbn_xy: neighbor macroblock */
6457 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6458 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6459 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6460 int16_t bS[4];
6461 int qp;
6463 if( (edge&1) && IS_8x8DCT(mb_type) )
6464 continue;
6466 if( IS_INTRA(mb_type) ||
6467 IS_INTRA(mbn_type) ) {
6468 int value;
6469 if (edge == 0) {
6470 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6471 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6473 value = 4;
6474 } else {
6475 value = 3;
6477 } else {
6478 value = 3;
6480 bS[0] = bS[1] = bS[2] = bS[3] = value;
6481 } else {
6482 int i, l;
6483 int mv_done;
6485 if( edge & mask_edge ) {
6486 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6487 mv_done = 1;
6489 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6490 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6491 mv_done = 1;
6493 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6494 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6495 int bn_idx= b_idx - (dir ? 8:1);
6496 int v = 0;
6498 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6499 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6500 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6501 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6504 if(h->slice_type_nos == FF_B_TYPE && v){
6505 v=0;
6506 for( l = 0; !v && l < 2; l++ ) {
6507 int ln= 1-l;
6508 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6509 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6510 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6514 bS[0] = bS[1] = bS[2] = bS[3] = v;
6515 mv_done = 1;
6517 else
6518 mv_done = 0;
6520 for( i = 0; i < 4; i++ ) {
6521 int x = dir == 0 ? edge : i;
6522 int y = dir == 0 ? i : edge;
6523 int b_idx= 8 + 4 + x + 8*y;
6524 int bn_idx= b_idx - (dir ? 8:1);
6526 if( h->non_zero_count_cache[b_idx] != 0 ||
6527 h->non_zero_count_cache[bn_idx] != 0 ) {
6528 bS[i] = 2;
6530 else if(!mv_done)
6532 bS[i] = 0;
6533 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6534 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6535 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6536 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6537 bS[i] = 1;
6538 break;
6542 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6543 bS[i] = 0;
6544 for( l = 0; l < 2; l++ ) {
6545 int ln= 1-l;
6546 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6547 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6548 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6549 bS[i] = 1;
6550 break;
6557 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6558 continue;
6561 /* Filter edge */
6562 // Do not use s->qscale as luma quantizer because it has not the same
6563 // value in IPCM macroblocks.
6564 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6565 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6566 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6567 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6568 if( dir == 0 ) {
6569 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6570 if( (edge&1) == 0 ) {
6571 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6572 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6573 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6574 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6576 } else {
6577 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6578 if( (edge&1) == 0 ) {
6579 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6580 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6581 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6582 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6589 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6590 MpegEncContext * const s = &h->s;
6591 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6593 s->mb_skip_run= -1;
6595 if( h->pps.cabac ) {
6596 int i;
6598 /* realign */
6599 align_get_bits( &s->gb );
6601 /* init cabac */
6602 ff_init_cabac_states( &h->cabac);
6603 ff_init_cabac_decoder( &h->cabac,
6604 s->gb.buffer + get_bits_count(&s->gb)/8,
6605 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6606 /* calculate pre-state */
6607 for( i= 0; i < 460; i++ ) {
6608 int pre;
6609 if( h->slice_type_nos == FF_I_TYPE )
6610 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6611 else
6612 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6614 if( pre <= 63 )
6615 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6616 else
6617 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6620 for(;;){
6621 //START_TIMER
6622 int ret = decode_mb_cabac(h);
6623 int eos;
6624 //STOP_TIMER("decode_mb_cabac")
6626 if(ret>=0) hl_decode_mb(h);
6628 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6629 s->mb_y++;
6631 if(ret>=0) ret = decode_mb_cabac(h);
6633 if(ret>=0) hl_decode_mb(h);
6634 s->mb_y--;
6636 eos = get_cabac_terminate( &h->cabac );
6638 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6639 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6640 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6641 return -1;
6644 if( ++s->mb_x >= s->mb_width ) {
6645 s->mb_x = 0;
6646 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6647 ++s->mb_y;
6648 if(FIELD_OR_MBAFF_PICTURE) {
6649 ++s->mb_y;
6653 if( eos || s->mb_y >= s->mb_height ) {
6654 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6655 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6656 return 0;
6660 } else {
6661 for(;;){
6662 int ret = decode_mb_cavlc(h);
6664 if(ret>=0) hl_decode_mb(h);
6666 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6667 s->mb_y++;
6668 ret = decode_mb_cavlc(h);
6670 if(ret>=0) hl_decode_mb(h);
6671 s->mb_y--;
6674 if(ret<0){
6675 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6676 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6678 return -1;
6681 if(++s->mb_x >= s->mb_width){
6682 s->mb_x=0;
6683 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6684 ++s->mb_y;
6685 if(FIELD_OR_MBAFF_PICTURE) {
6686 ++s->mb_y;
6688 if(s->mb_y >= s->mb_height){
6689 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6691 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6692 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6694 return 0;
6695 }else{
6696 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6698 return -1;
6703 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6704 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6705 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6706 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6708 return 0;
6709 }else{
6710 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6712 return -1;
6718 #if 0
6719 for(;s->mb_y < s->mb_height; s->mb_y++){
6720 for(;s->mb_x < s->mb_width; s->mb_x++){
6721 int ret= decode_mb(h);
6723 hl_decode_mb(h);
6725 if(ret<0){
6726 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6727 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6729 return -1;
6732 if(++s->mb_x >= s->mb_width){
6733 s->mb_x=0;
6734 if(++s->mb_y >= s->mb_height){
6735 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6736 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6738 return 0;
6739 }else{
6740 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6742 return -1;
6747 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6748 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6749 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6751 return 0;
6752 }else{
6753 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6755 return -1;
6759 s->mb_x=0;
6760 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6762 #endif
6763 return -1; //not reached
6766 static int decode_unregistered_user_data(H264Context *h, int size){
6767 MpegEncContext * const s = &h->s;
6768 uint8_t user_data[16+256];
6769 int e, build, i;
6771 if(size<16)
6772 return -1;
6774 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6775 user_data[i]= get_bits(&s->gb, 8);
6778 user_data[i]= 0;
6779 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6780 if(e==1 && build>=0)
6781 h->x264_build= build;
6783 if(s->avctx->debug & FF_DEBUG_BUGS)
6784 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6786 for(; i<size; i++)
6787 skip_bits(&s->gb, 8);
6789 return 0;
6792 static int decode_sei(H264Context *h){
6793 MpegEncContext * const s = &h->s;
6795 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6796 int size, type;
6798 type=0;
6800 type+= show_bits(&s->gb, 8);
6801 }while(get_bits(&s->gb, 8) == 255);
6803 size=0;
6805 size+= show_bits(&s->gb, 8);
6806 }while(get_bits(&s->gb, 8) == 255);
6808 switch(type){
6809 case 5:
6810 if(decode_unregistered_user_data(h, size) < 0)
6811 return -1;
6812 break;
6813 default:
6814 skip_bits(&s->gb, 8*size);
6817 //FIXME check bits here
6818 align_get_bits(&s->gb);
6821 return 0;
6824 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6825 MpegEncContext * const s = &h->s;
6826 int cpb_count, i;
6827 cpb_count = get_ue_golomb(&s->gb) + 1;
6828 get_bits(&s->gb, 4); /* bit_rate_scale */
6829 get_bits(&s->gb, 4); /* cpb_size_scale */
6830 for(i=0; i<cpb_count; i++){
6831 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6832 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6833 get_bits1(&s->gb); /* cbr_flag */
6835 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6836 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6837 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6838 get_bits(&s->gb, 5); /* time_offset_length */
6841 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6842 MpegEncContext * const s = &h->s;
6843 int aspect_ratio_info_present_flag;
6844 unsigned int aspect_ratio_idc;
6845 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6847 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6849 if( aspect_ratio_info_present_flag ) {
6850 aspect_ratio_idc= get_bits(&s->gb, 8);
6851 if( aspect_ratio_idc == EXTENDED_SAR ) {
6852 sps->sar.num= get_bits(&s->gb, 16);
6853 sps->sar.den= get_bits(&s->gb, 16);
6854 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
6855 sps->sar= pixel_aspect[aspect_ratio_idc];
6856 }else{
6857 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6858 return -1;
6860 }else{
6861 sps->sar.num=
6862 sps->sar.den= 0;
6864 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6866 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6867 get_bits1(&s->gb); /* overscan_appropriate_flag */
6870 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6871 get_bits(&s->gb, 3); /* video_format */
6872 get_bits1(&s->gb); /* video_full_range_flag */
6873 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6874 get_bits(&s->gb, 8); /* colour_primaries */
6875 get_bits(&s->gb, 8); /* transfer_characteristics */
6876 get_bits(&s->gb, 8); /* matrix_coefficients */
6880 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6881 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6882 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6885 sps->timing_info_present_flag = get_bits1(&s->gb);
6886 if(sps->timing_info_present_flag){
6887 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6888 sps->time_scale = get_bits_long(&s->gb, 32);
6889 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6892 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6893 if(nal_hrd_parameters_present_flag)
6894 decode_hrd_parameters(h, sps);
6895 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6896 if(vcl_hrd_parameters_present_flag)
6897 decode_hrd_parameters(h, sps);
6898 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6899 get_bits1(&s->gb); /* low_delay_hrd_flag */
6900 get_bits1(&s->gb); /* pic_struct_present_flag */
6902 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6903 if(sps->bitstream_restriction_flag){
6904 unsigned int num_reorder_frames;
6905 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6906 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6907 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6908 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6909 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6910 num_reorder_frames= get_ue_golomb(&s->gb);
6911 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6913 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6914 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6915 return -1;
6918 sps->num_reorder_frames= num_reorder_frames;
6921 return 0;
6924 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6925 const uint8_t *jvt_list, const uint8_t *fallback_list){
6926 MpegEncContext * const s = &h->s;
6927 int i, last = 8, next = 8;
6928 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6929 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6930 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6931 else
6932 for(i=0;i<size;i++){
6933 if(next)
6934 next = (last + get_se_golomb(&s->gb)) & 0xff;
6935 if(!i && !next){ /* matrix not written, we use the preset one */
6936 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6937 break;
6939 last = factors[scan[i]] = next ? next : last;
6943 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6944 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6945 MpegEncContext * const s = &h->s;
6946 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6947 const uint8_t *fallback[4] = {
6948 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6949 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6950 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6951 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6953 if(get_bits1(&s->gb)){
6954 sps->scaling_matrix_present |= is_sps;
6955 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6956 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6957 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6958 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6959 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6960 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6961 if(is_sps || pps->transform_8x8_mode){
6962 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6963 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6965 } else if(fallback_sps) {
6966 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
6967 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
6972 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
6974 static void *
6975 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
6976 const size_t size, const char *name)
6978 if(id>=max) {
6979 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
6980 return NULL;
6983 if(!vec[id]) {
6984 vec[id] = av_mallocz(size);
6985 if(vec[id] == NULL)
6986 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
6988 return vec[id];
6991 static inline int decode_seq_parameter_set(H264Context *h){
6992 MpegEncContext * const s = &h->s;
6993 int profile_idc, level_idc;
6994 unsigned int sps_id, tmp, mb_width, mb_height;
6995 int i;
6996 SPS *sps;
6998 profile_idc= get_bits(&s->gb, 8);
6999 get_bits1(&s->gb); //constraint_set0_flag
7000 get_bits1(&s->gb); //constraint_set1_flag
7001 get_bits1(&s->gb); //constraint_set2_flag
7002 get_bits1(&s->gb); //constraint_set3_flag
7003 get_bits(&s->gb, 4); // reserved
7004 level_idc= get_bits(&s->gb, 8);
7005 sps_id= get_ue_golomb(&s->gb);
7007 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7008 if(sps == NULL)
7009 return -1;
7011 sps->profile_idc= profile_idc;
7012 sps->level_idc= level_idc;
7014 if(sps->profile_idc >= 100){ //high profile
7015 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7016 if(sps->chroma_format_idc == 3)
7017 get_bits1(&s->gb); //residual_color_transform_flag
7018 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7019 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7020 sps->transform_bypass = get_bits1(&s->gb);
7021 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7022 }else{
7023 sps->scaling_matrix_present = 0;
7024 sps->chroma_format_idc= 1;
7027 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7028 sps->poc_type= get_ue_golomb(&s->gb);
7030 if(sps->poc_type == 0){ //FIXME #define
7031 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7032 } else if(sps->poc_type == 1){//FIXME #define
7033 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7034 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7035 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7036 tmp= get_ue_golomb(&s->gb);
7038 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7039 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7040 return -1;
7042 sps->poc_cycle_length= tmp;
7044 for(i=0; i<sps->poc_cycle_length; i++)
7045 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7046 }else if(sps->poc_type != 2){
7047 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7048 return -1;
7051 tmp= get_ue_golomb(&s->gb);
7052 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7053 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7054 return -1;
7056 sps->ref_frame_count= tmp;
7057 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7058 mb_width= get_ue_golomb(&s->gb) + 1;
7059 mb_height= get_ue_golomb(&s->gb) + 1;
7060 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7061 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7062 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7063 return -1;
7065 sps->mb_width = mb_width;
7066 sps->mb_height= mb_height;
7068 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7069 if(!sps->frame_mbs_only_flag)
7070 sps->mb_aff= get_bits1(&s->gb);
7071 else
7072 sps->mb_aff= 0;
7074 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7076 #ifndef ALLOW_INTERLACE
7077 if(sps->mb_aff)
7078 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7079 #endif
7080 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7081 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7083 sps->crop= get_bits1(&s->gb);
7084 if(sps->crop){
7085 sps->crop_left = get_ue_golomb(&s->gb);
7086 sps->crop_right = get_ue_golomb(&s->gb);
7087 sps->crop_top = get_ue_golomb(&s->gb);
7088 sps->crop_bottom= get_ue_golomb(&s->gb);
7089 if(sps->crop_left || sps->crop_top){
7090 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7092 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7093 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7095 }else{
7096 sps->crop_left =
7097 sps->crop_right =
7098 sps->crop_top =
7099 sps->crop_bottom= 0;
7102 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7103 if( sps->vui_parameters_present_flag )
7104 decode_vui_parameters(h, sps);
7106 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7107 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7108 sps_id, sps->profile_idc, sps->level_idc,
7109 sps->poc_type,
7110 sps->ref_frame_count,
7111 sps->mb_width, sps->mb_height,
7112 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7113 sps->direct_8x8_inference_flag ? "8B8" : "",
7114 sps->crop_left, sps->crop_right,
7115 sps->crop_top, sps->crop_bottom,
7116 sps->vui_parameters_present_flag ? "VUI" : "",
7117 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7120 return 0;
7123 static void
7124 build_qp_table(PPS *pps, int t, int index)
7126 int i;
7127 for(i = 0; i < 52; i++)
7128 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7131 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7132 MpegEncContext * const s = &h->s;
7133 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7134 PPS *pps;
7136 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7137 if(pps == NULL)
7138 return -1;
7140 tmp= get_ue_golomb(&s->gb);
7141 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7142 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7143 return -1;
7145 pps->sps_id= tmp;
7147 pps->cabac= get_bits1(&s->gb);
7148 pps->pic_order_present= get_bits1(&s->gb);
7149 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7150 if(pps->slice_group_count > 1 ){
7151 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7152 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7153 switch(pps->mb_slice_group_map_type){
7154 case 0:
7155 #if 0
7156 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7157 | run_length[ i ] |1 |ue(v) |
7158 #endif
7159 break;
7160 case 2:
7161 #if 0
7162 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7163 |{ | | |
7164 | top_left_mb[ i ] |1 |ue(v) |
7165 | bottom_right_mb[ i ] |1 |ue(v) |
7166 | } | | |
7167 #endif
7168 break;
7169 case 3:
7170 case 4:
7171 case 5:
7172 #if 0
7173 | slice_group_change_direction_flag |1 |u(1) |
7174 | slice_group_change_rate_minus1 |1 |ue(v) |
7175 #endif
7176 break;
7177 case 6:
7178 #if 0
7179 | slice_group_id_cnt_minus1 |1 |ue(v) |
7180 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7181 |) | | |
7182 | slice_group_id[ i ] |1 |u(v) |
7183 #endif
7184 break;
7187 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7188 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7189 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7190 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7191 pps->ref_count[0]= pps->ref_count[1]= 1;
7192 return -1;
7195 pps->weighted_pred= get_bits1(&s->gb);
7196 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7197 pps->init_qp= get_se_golomb(&s->gb) + 26;
7198 pps->init_qs= get_se_golomb(&s->gb) + 26;
7199 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7200 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7201 pps->constrained_intra_pred= get_bits1(&s->gb);
7202 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7204 pps->transform_8x8_mode= 0;
7205 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7206 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7207 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7209 if(get_bits_count(&s->gb) < bit_length){
7210 pps->transform_8x8_mode= get_bits1(&s->gb);
7211 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7212 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7213 } else {
7214 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7217 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7218 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7219 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7220 h->pps.chroma_qp_diff= 1;
7222 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7223 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7224 pps_id, pps->sps_id,
7225 pps->cabac ? "CABAC" : "CAVLC",
7226 pps->slice_group_count,
7227 pps->ref_count[0], pps->ref_count[1],
7228 pps->weighted_pred ? "weighted" : "",
7229 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7230 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7231 pps->constrained_intra_pred ? "CONSTR" : "",
7232 pps->redundant_pic_cnt_present ? "REDU" : "",
7233 pps->transform_8x8_mode ? "8x8DCT" : ""
7237 return 0;
7241 * Call decode_slice() for each context.
7243 * @param h h264 master context
7244 * @param context_count number of contexts to execute
7246 static void execute_decode_slices(H264Context *h, int context_count){
7247 MpegEncContext * const s = &h->s;
7248 AVCodecContext * const avctx= s->avctx;
7249 H264Context *hx;
7250 int i;
7252 if(context_count == 1) {
7253 decode_slice(avctx, h);
7254 } else {
7255 for(i = 1; i < context_count; i++) {
7256 hx = h->thread_context[i];
7257 hx->s.error_resilience = avctx->error_resilience;
7258 hx->s.error_count = 0;
7261 avctx->execute(avctx, (void *)decode_slice,
7262 (void **)h->thread_context, NULL, context_count);
7264 /* pull back stuff from slices to master context */
7265 hx = h->thread_context[context_count - 1];
7266 s->mb_x = hx->s.mb_x;
7267 s->mb_y = hx->s.mb_y;
7268 s->dropable = hx->s.dropable;
7269 s->picture_structure = hx->s.picture_structure;
7270 for(i = 1; i < context_count; i++)
7271 h->s.error_count += h->thread_context[i]->s.error_count;
7276 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7277 MpegEncContext * const s = &h->s;
7278 AVCodecContext * const avctx= s->avctx;
7279 int buf_index=0;
7280 H264Context *hx; ///< thread context
7281 int context_count = 0;
7283 h->max_contexts = avctx->thread_count;
7284 #if 0
7285 int i;
7286 for(i=0; i<50; i++){
7287 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7289 #endif
7290 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7291 h->current_slice = 0;
7292 if (!s->first_field)
7293 s->current_picture_ptr= NULL;
7296 for(;;){
7297 int consumed;
7298 int dst_length;
7299 int bit_length;
7300 const uint8_t *ptr;
7301 int i, nalsize = 0;
7302 int err;
7304 if(h->is_avc) {
7305 if(buf_index >= buf_size) break;
7306 nalsize = 0;
7307 for(i = 0; i < h->nal_length_size; i++)
7308 nalsize = (nalsize << 8) | buf[buf_index++];
7309 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7310 if(nalsize == 1){
7311 buf_index++;
7312 continue;
7313 }else{
7314 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7315 break;
7318 } else {
7319 // start code prefix search
7320 for(; buf_index + 3 < buf_size; buf_index++){
7321 // This should always succeed in the first iteration.
7322 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7323 break;
7326 if(buf_index+3 >= buf_size) break;
7328 buf_index+=3;
7331 hx = h->thread_context[context_count];
7333 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7334 if (ptr==NULL || dst_length < 0){
7335 return -1;
7337 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7338 dst_length--;
7339 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7341 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7342 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7345 if (h->is_avc && (nalsize != consumed)){
7346 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7347 consumed= nalsize;
7350 buf_index += consumed;
7352 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7353 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7354 continue;
7356 again:
7357 err = 0;
7358 switch(hx->nal_unit_type){
7359 case NAL_IDR_SLICE:
7360 if (h->nal_unit_type != NAL_IDR_SLICE) {
7361 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7362 return -1;
7364 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7365 case NAL_SLICE:
7366 init_get_bits(&hx->s.gb, ptr, bit_length);
7367 hx->intra_gb_ptr=
7368 hx->inter_gb_ptr= &hx->s.gb;
7369 hx->s.data_partitioning = 0;
7371 if((err = decode_slice_header(hx, h)))
7372 break;
7374 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7375 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7376 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7377 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7378 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7379 && avctx->skip_frame < AVDISCARD_ALL)
7380 context_count++;
7381 break;
7382 case NAL_DPA:
7383 init_get_bits(&hx->s.gb, ptr, bit_length);
7384 hx->intra_gb_ptr=
7385 hx->inter_gb_ptr= NULL;
7386 hx->s.data_partitioning = 1;
7388 err = decode_slice_header(hx, h);
7389 break;
7390 case NAL_DPB:
7391 init_get_bits(&hx->intra_gb, ptr, bit_length);
7392 hx->intra_gb_ptr= &hx->intra_gb;
7393 break;
7394 case NAL_DPC:
7395 init_get_bits(&hx->inter_gb, ptr, bit_length);
7396 hx->inter_gb_ptr= &hx->inter_gb;
7398 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7399 && s->context_initialized
7400 && s->hurry_up < 5
7401 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7402 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7403 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7404 && avctx->skip_frame < AVDISCARD_ALL)
7405 context_count++;
7406 break;
7407 case NAL_SEI:
7408 init_get_bits(&s->gb, ptr, bit_length);
7409 decode_sei(h);
7410 break;
7411 case NAL_SPS:
7412 init_get_bits(&s->gb, ptr, bit_length);
7413 decode_seq_parameter_set(h);
7415 if(s->flags& CODEC_FLAG_LOW_DELAY)
7416 s->low_delay=1;
7418 if(avctx->has_b_frames < 2)
7419 avctx->has_b_frames= !s->low_delay;
7420 break;
7421 case NAL_PPS:
7422 init_get_bits(&s->gb, ptr, bit_length);
7424 decode_picture_parameter_set(h, bit_length);
7426 break;
7427 case NAL_AUD:
7428 case NAL_END_SEQUENCE:
7429 case NAL_END_STREAM:
7430 case NAL_FILLER_DATA:
7431 case NAL_SPS_EXT:
7432 case NAL_AUXILIARY_SLICE:
7433 break;
7434 default:
7435 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7438 if(context_count == h->max_contexts) {
7439 execute_decode_slices(h, context_count);
7440 context_count = 0;
7443 if (err < 0)
7444 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7445 else if(err == 1) {
7446 /* Slice could not be decoded in parallel mode, copy down
7447 * NAL unit stuff to context 0 and restart. Note that
7448 * rbsp_buffer is not transferred, but since we no longer
7449 * run in parallel mode this should not be an issue. */
7450 h->nal_unit_type = hx->nal_unit_type;
7451 h->nal_ref_idc = hx->nal_ref_idc;
7452 hx = h;
7453 goto again;
7456 if(context_count)
7457 execute_decode_slices(h, context_count);
7458 return buf_index;
7462 * returns the number of bytes consumed for building the current frame
7464 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7465 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7466 if(pos+10>buf_size) pos=buf_size; // oops ;)
7468 return pos;
7471 static int decode_frame(AVCodecContext *avctx,
7472 void *data, int *data_size,
7473 const uint8_t *buf, int buf_size)
7475 H264Context *h = avctx->priv_data;
7476 MpegEncContext *s = &h->s;
7477 AVFrame *pict = data;
7478 int buf_index;
7480 s->flags= avctx->flags;
7481 s->flags2= avctx->flags2;
7483 /* end of stream, output what is still in the buffers */
7484 if (buf_size == 0) {
7485 Picture *out;
7486 int i, out_idx;
7488 //FIXME factorize this with the output code below
7489 out = h->delayed_pic[0];
7490 out_idx = 0;
7491 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7492 if(h->delayed_pic[i]->poc < out->poc){
7493 out = h->delayed_pic[i];
7494 out_idx = i;
7497 for(i=out_idx; h->delayed_pic[i]; i++)
7498 h->delayed_pic[i] = h->delayed_pic[i+1];
7500 if(out){
7501 *data_size = sizeof(AVFrame);
7502 *pict= *(AVFrame*)out;
7505 return 0;
7508 if(h->is_avc && !h->got_avcC) {
7509 int i, cnt, nalsize;
7510 unsigned char *p = avctx->extradata;
7511 if(avctx->extradata_size < 7) {
7512 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7513 return -1;
7515 if(*p != 1) {
7516 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7517 return -1;
7519 /* sps and pps in the avcC always have length coded with 2 bytes,
7520 so put a fake nal_length_size = 2 while parsing them */
7521 h->nal_length_size = 2;
7522 // Decode sps from avcC
7523 cnt = *(p+5) & 0x1f; // Number of sps
7524 p += 6;
7525 for (i = 0; i < cnt; i++) {
7526 nalsize = AV_RB16(p) + 2;
7527 if(decode_nal_units(h, p, nalsize) < 0) {
7528 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7529 return -1;
7531 p += nalsize;
7533 // Decode pps from avcC
7534 cnt = *(p++); // Number of pps
7535 for (i = 0; i < cnt; i++) {
7536 nalsize = AV_RB16(p) + 2;
7537 if(decode_nal_units(h, p, nalsize) != nalsize) {
7538 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7539 return -1;
7541 p += nalsize;
7543 // Now store right nal length size, that will be use to parse all other nals
7544 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7545 // Do not reparse avcC
7546 h->got_avcC = 1;
7549 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7550 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7551 return -1;
7554 buf_index=decode_nal_units(h, buf, buf_size);
7555 if(buf_index < 0)
7556 return -1;
7558 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7559 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7560 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7561 return -1;
7564 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7565 Picture *out = s->current_picture_ptr;
7566 Picture *cur = s->current_picture_ptr;
7567 int i, pics, cross_idr, out_of_order, out_idx;
7569 s->mb_y= 0;
7571 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7572 s->current_picture_ptr->pict_type= s->pict_type;
7574 if(!s->dropable) {
7575 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7576 h->prev_poc_msb= h->poc_msb;
7577 h->prev_poc_lsb= h->poc_lsb;
7579 h->prev_frame_num_offset= h->frame_num_offset;
7580 h->prev_frame_num= h->frame_num;
7583 * FIXME: Error handling code does not seem to support interlaced
7584 * when slices span multiple rows
7585 * The ff_er_add_slice calls don't work right for bottom
7586 * fields; they cause massive erroneous error concealing
7587 * Error marking covers both fields (top and bottom).
7588 * This causes a mismatched s->error_count
7589 * and a bad error table. Further, the error count goes to
7590 * INT_MAX when called for bottom field, because mb_y is
7591 * past end by one (callers fault) and resync_mb_y != 0
7592 * causes problems for the first MB line, too.
7594 if (!FIELD_PICTURE)
7595 ff_er_frame_end(s);
7597 MPV_frame_end(s);
7599 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7600 /* Wait for second field. */
7601 *data_size = 0;
7603 } else {
7604 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7605 /* Derive top_field_first from field pocs. */
7606 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7608 //FIXME do something with unavailable reference frames
7610 /* Sort B-frames into display order */
7612 if(h->sps.bitstream_restriction_flag
7613 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7614 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7615 s->low_delay = 0;
7618 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7619 && !h->sps.bitstream_restriction_flag){
7620 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7621 s->low_delay= 0;
7624 pics = 0;
7625 while(h->delayed_pic[pics]) pics++;
7627 assert(pics <= MAX_DELAYED_PIC_COUNT);
7629 h->delayed_pic[pics++] = cur;
7630 if(cur->reference == 0)
7631 cur->reference = DELAYED_PIC_REF;
7633 out = h->delayed_pic[0];
7634 out_idx = 0;
7635 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7636 if(h->delayed_pic[i]->poc < out->poc){
7637 out = h->delayed_pic[i];
7638 out_idx = i;
7640 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7642 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7644 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7646 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7647 || (s->low_delay &&
7648 ((!cross_idr && out->poc > h->outputed_poc + 2)
7649 || cur->pict_type == FF_B_TYPE)))
7651 s->low_delay = 0;
7652 s->avctx->has_b_frames++;
7655 if(out_of_order || pics > s->avctx->has_b_frames){
7656 out->reference &= ~DELAYED_PIC_REF;
7657 for(i=out_idx; h->delayed_pic[i]; i++)
7658 h->delayed_pic[i] = h->delayed_pic[i+1];
7660 if(!out_of_order && pics > s->avctx->has_b_frames){
7661 *data_size = sizeof(AVFrame);
7663 h->outputed_poc = out->poc;
7664 *pict= *(AVFrame*)out;
7665 }else{
7666 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7671 assert(pict->data[0] || !*data_size);
7672 ff_print_debug_info(s, pict);
7673 //printf("out %d\n", (int)pict->data[0]);
7674 #if 0 //?
7676 /* Return the Picture timestamp as the frame number */
7677 /* we subtract 1 because it is added on utils.c */
7678 avctx->frame_number = s->picture_number - 1;
7679 #endif
7680 return get_consumed_bytes(s, buf_index, buf_size);
7682 #if 0
7683 static inline void fill_mb_avail(H264Context *h){
7684 MpegEncContext * const s = &h->s;
7685 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7687 if(s->mb_y){
7688 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7689 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7690 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7691 }else{
7692 h->mb_avail[0]=
7693 h->mb_avail[1]=
7694 h->mb_avail[2]= 0;
7696 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7697 h->mb_avail[4]= 1; //FIXME move out
7698 h->mb_avail[5]= 0; //FIXME move out
7700 #endif
7702 #ifdef TEST
7703 #undef printf
7704 #undef random
7705 #define COUNT 8000
7706 #define SIZE (COUNT*40)
7707 int main(void){
7708 int i;
7709 uint8_t temp[SIZE];
7710 PutBitContext pb;
7711 GetBitContext gb;
7712 // int int_temp[10000];
7713 DSPContext dsp;
7714 AVCodecContext avctx;
7716 dsputil_init(&dsp, &avctx);
7718 init_put_bits(&pb, temp, SIZE);
7719 printf("testing unsigned exp golomb\n");
7720 for(i=0; i<COUNT; i++){
7721 START_TIMER
7722 set_ue_golomb(&pb, i);
7723 STOP_TIMER("set_ue_golomb");
7725 flush_put_bits(&pb);
7727 init_get_bits(&gb, temp, 8*SIZE);
7728 for(i=0; i<COUNT; i++){
7729 int j, s;
7731 s= show_bits(&gb, 24);
7733 START_TIMER
7734 j= get_ue_golomb(&gb);
7735 if(j != i){
7736 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7737 // return -1;
7739 STOP_TIMER("get_ue_golomb");
7743 init_put_bits(&pb, temp, SIZE);
7744 printf("testing signed exp golomb\n");
7745 for(i=0; i<COUNT; i++){
7746 START_TIMER
7747 set_se_golomb(&pb, i - COUNT/2);
7748 STOP_TIMER("set_se_golomb");
7750 flush_put_bits(&pb);
7752 init_get_bits(&gb, temp, 8*SIZE);
7753 for(i=0; i<COUNT; i++){
7754 int j, s;
7756 s= show_bits(&gb, 24);
7758 START_TIMER
7759 j= get_se_golomb(&gb);
7760 if(j != i - COUNT/2){
7761 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7762 // return -1;
7764 STOP_TIMER("get_se_golomb");
7767 #if 0
7768 printf("testing 4x4 (I)DCT\n");
7770 DCTELEM block[16];
7771 uint8_t src[16], ref[16];
7772 uint64_t error= 0, max_error=0;
7774 for(i=0; i<COUNT; i++){
7775 int j;
7776 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7777 for(j=0; j<16; j++){
7778 ref[j]= random()%255;
7779 src[j]= random()%255;
7782 h264_diff_dct_c(block, src, ref, 4);
7784 //normalize
7785 for(j=0; j<16; j++){
7786 // printf("%d ", block[j]);
7787 block[j]= block[j]*4;
7788 if(j&1) block[j]= (block[j]*4 + 2)/5;
7789 if(j&4) block[j]= (block[j]*4 + 2)/5;
7791 // printf("\n");
7793 s->dsp.h264_idct_add(ref, block, 4);
7794 /* for(j=0; j<16; j++){
7795 printf("%d ", ref[j]);
7797 printf("\n");*/
7799 for(j=0; j<16; j++){
7800 int diff= FFABS(src[j] - ref[j]);
7802 error+= diff*diff;
7803 max_error= FFMAX(max_error, diff);
7806 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7807 printf("testing quantizer\n");
7808 for(qp=0; qp<52; qp++){
7809 for(i=0; i<16; i++)
7810 src1_block[i]= src2_block[i]= random()%255;
7813 printf("Testing NAL layer\n");
7815 uint8_t bitstream[COUNT];
7816 uint8_t nal[COUNT*2];
7817 H264Context h;
7818 memset(&h, 0, sizeof(H264Context));
7820 for(i=0; i<COUNT; i++){
7821 int zeros= i;
7822 int nal_length;
7823 int consumed;
7824 int out_length;
7825 uint8_t *out;
7826 int j;
7828 for(j=0; j<COUNT; j++){
7829 bitstream[j]= (random() % 255) + 1;
7832 for(j=0; j<zeros; j++){
7833 int pos= random() % COUNT;
7834 while(bitstream[pos] == 0){
7835 pos++;
7836 pos %= COUNT;
7838 bitstream[pos]=0;
7841 START_TIMER
7843 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7844 if(nal_length<0){
7845 printf("encoding failed\n");
7846 return -1;
7849 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7851 STOP_TIMER("NAL")
7853 if(out_length != COUNT){
7854 printf("incorrect length %d %d\n", out_length, COUNT);
7855 return -1;
7858 if(consumed != nal_length){
7859 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7860 return -1;
7863 if(memcmp(bitstream, out, COUNT)){
7864 printf("mismatch\n");
7865 return -1;
7868 #endif
7870 printf("Testing RBSP\n");
7873 return 0;
7875 #endif /* TEST */
7878 static av_cold int decode_end(AVCodecContext *avctx)
7880 H264Context *h = avctx->priv_data;
7881 MpegEncContext *s = &h->s;
7883 av_freep(&h->rbsp_buffer[0]);
7884 av_freep(&h->rbsp_buffer[1]);
7885 free_tables(h); //FIXME cleanup init stuff perhaps
7886 MPV_common_end(s);
7888 // memset(h, 0, sizeof(H264Context));
7890 return 0;
7894 AVCodec h264_decoder = {
7895 "h264",
7896 CODEC_TYPE_VIDEO,
7897 CODEC_ID_H264,
7898 sizeof(H264Context),
7899 decode_init,
7900 NULL,
7901 decode_end,
7902 decode_frame,
7903 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7904 .flush= flush_dpb,
7905 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
7908 #include "svq3.c"