Use tables symetry to reduce their size by half.
[ffmpeg-lucabe.git] / libavcodec / h264.c
blob98866c77af1a20f63a3900b81ccafe5cdf70f85a
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264.h"
32 #include "h264data.h"
33 #include "h264_parser.h"
34 #include "golomb.h"
35 #include "rectangle.h"
37 #include "cabac.h"
38 #ifdef ARCH_X86
39 #include "i386/h264_i386.h"
40 #endif
42 //#undef NDEBUG
43 #include <assert.h>
45 /**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
71 static VLC run7_vlc;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
84 #else
85 return (a&0xFFFF) + (b<<16);
86 #endif
89 const uint8_t ff_rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 const uint8_t ff_div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
98 {0,1,2,3,7,10,8,11},
99 {2,2,3,3,8,11,8,11},
100 {0,0,1,1,7,10,7,10},
101 {0,2,0,2,7,10,7,10}
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 int * left_block;
110 int topleft_partition= -1;
111 int i;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
117 return;
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
126 if(FRAME_MBAFF){
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
138 if (bottom
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
144 if (bottom
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
154 if (bottom
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
163 if (bottom) {
164 left_block = left_block_options[1];
165 } else {
166 left_block= left_block_options[2];
168 } else {
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
178 if(for_deblock){
179 topleft_type = 0;
180 topright_type = 0;
181 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
186 int list;
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 ref += h->b8_stride;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
201 }else{
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
230 }else{
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
239 }else{
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 }else{
259 int pred;
260 if(!(top_type & type_mask))
261 pred= -1;
262 else{
263 pred= 2;
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
270 for(i=0; i<2; i++){
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 }else{
275 int pred;
276 if(!(left_type[i] & type_mask))
277 pred= -1;
278 else{
279 pred= 2;
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
291 0 . T T. T T T T
292 1 L . .L . . . .
293 2 L . .L . . . .
294 3 . T TL . . . .
295 4 L . .L . . . .
296 5 L . .. . . . .
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
299 if(top_type){
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
311 }else{
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
326 if(left_type[i]){
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
331 }else{
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 if( h->pps.cabac ) {
340 // top_cbp
341 if(top_type) {
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
344 h->top_cbp = 0x1C0;
345 } else {
346 h->top_cbp = 0;
348 // left_cbp
349 if (left_type[0]) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
352 h->left_cbp = 0x1C0;
353 } else {
354 h->left_cbp = 0;
356 if (left_type[0]) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
359 if (left_type[1]) {
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
364 #if 1
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
366 int list;
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
374 continue;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
389 }else{
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
397 for(i=0; i<2; i++){
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
406 }else{
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
415 continue;
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
422 }else{
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
432 }else{
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
438 continue;
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
451 if( h->pps.cabac ) {
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
459 }else{
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
469 }else{
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
477 }else{
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
496 }else{
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
504 else
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
511 else
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
516 if(FRAME_MBAFF){
517 #define MAP_MVS\
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
528 if(MB_FIELD){
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
535 MAP_MVS
536 #undef MAP_F2F
537 }else{
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
544 MAP_MVS
545 #undef MAP_F2F
550 #endif
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
574 int i;
576 if(!(h->top_samples_available&0x8000)){
577 for(i=0; i<4; i++){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
579 if(status<0){
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
581 return -1;
582 } else if(status){
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
590 for(i=0; i<4; i++){
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
593 if(status<0){
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
595 return -1;
596 } else if(status){
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
603 return 0;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
614 if(mode > 6U) {
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
616 return -1;
619 if(!(h->top_samples_available&0x8000)){
620 mode= top[ mode ];
621 if(mode<0){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
623 return -1;
627 if((h->left_samples_available&0x8080) != 0x8080){
628 mode= left[ mode ];
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
632 if(mode<0){
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
634 return -1;
638 return mode;
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
653 else return min;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
684 int i= left + top;
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
690 return i&31;
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
699 if(FRAME_MBAFF){
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 const int16_t *mv;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
705 if(!MB_FIELD
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 if(!MB_FIELD
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 if(MB_FIELD
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 && i >= scan8[0]+8){
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
736 #undef SET_DIAG_MV
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
741 return topright_ref;
742 }else{
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 const int16_t * C;
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
768 /* mv_cache
769 B . . A T T T T
770 U . . L . . , .
771 U . . L . . . .
772 U . . L . . , .
773 . . . L . . . .
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
783 if(left_ref==ref){
784 *mx= A[0];
785 *my= A[1];
786 }else if(top_ref==ref){
787 *mx= B[0];
788 *my= B[1];
789 }else{
790 *mx= C[0];
791 *my= C[1];
793 }else{
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
795 *mx= A[0];
796 *my= A[1];
797 }else{
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 if(n==0){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 if(top_ref == ref){
820 *mx= B[0];
821 *my= B[1];
822 return;
824 }else{
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
830 if(left_ref == ref){
831 *mx= A[0];
832 *my= A[1];
833 return;
837 //RARE
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 if(n==0){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
854 if(left_ref == ref){
855 *mx= A[0];
856 *my= A[1];
857 return;
859 }else{
860 const int16_t * C;
861 int diagonal_ref;
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
868 *mx= C[0];
869 *my= C[1];
870 return;
874 //RARE
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
888 *mx = *my = 0;
889 return;
892 pred_motion(h, 0, 4, 0, 0, mx, my);
894 return;
897 static inline void direct_dist_scale_factor(H264Context * const h){
898 MpegEncContext * const s = &h->s;
899 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
900 const int poc1 = h->ref_list[1][0].poc;
901 int i;
902 for(i=0; i<h->ref_count[0]; i++){
903 int poc0 = h->ref_list[0][i].poc;
904 int td = av_clip(poc1 - poc0, -128, 127);
905 if(td == 0 || h->ref_list[0][i].long_ref){
906 h->dist_scale_factor[i] = 256;
907 }else{
908 int tb = av_clip(poc - poc0, -128, 127);
909 int tx = (16384 + (FFABS(td) >> 1)) / td;
910 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
913 if(FRAME_MBAFF){
914 for(i=0; i<h->ref_count[0]; i++){
915 h->dist_scale_factor_field[2*i] =
916 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
920 static inline void direct_ref_list_init(H264Context * const h){
921 MpegEncContext * const s = &h->s;
922 Picture * const ref1 = &h->ref_list[1][0];
923 Picture * const cur = s->current_picture_ptr;
924 int list, i, j;
925 int sidx= s->picture_structure&1;
926 int ref1sidx= ref1->reference&1;
927 for(list=0; list<2; list++){
928 cur->ref_count[sidx][list] = h->ref_count[list];
929 for(j=0; j<h->ref_count[list]; j++)
930 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
932 if(s->picture_structure == PICT_FRAME){
933 memcpy(cur->ref_count[0], cur->ref_count[1], sizeof(cur->ref_count[0]));
934 memcpy(cur->ref_poc [0], cur->ref_poc [1], sizeof(cur->ref_poc [0]));
936 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
937 return;
938 for(list=0; list<2; list++){
939 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
940 int poc = ref1->ref_poc[ref1sidx][list][i];
941 if(((poc&3) == 3) != (s->picture_structure == PICT_FRAME))
942 poc= (poc&~3) + s->picture_structure;
943 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
944 for(j=0; j<h->ref_count[list]; j++)
945 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
946 h->map_col_to_list0[list][i] = j;
947 break;
951 if(FRAME_MBAFF){
952 for(list=0; list<2; list++){
953 for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
954 j = h->map_col_to_list0[list][i];
955 h->map_col_to_list0_field[list][2*i] = 2*j;
956 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
962 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
963 MpegEncContext * const s = &h->s;
964 int b8_stride = h->b8_stride;
965 int b4_stride = h->b_stride;
966 int mb_xy = h->mb_xy;
967 int mb_type_col[2];
968 const int16_t (*l1mv0)[2], (*l1mv1)[2];
969 const int8_t *l1ref0, *l1ref1;
970 const int is_b8x8 = IS_8X8(*mb_type);
971 unsigned int sub_mb_type;
972 int i8, i4;
974 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
976 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
977 if(h->ref_list[1][0].reference == PICT_FRAME){ // AFL/AFR/FR/FL -> AFL
978 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL
979 int cur_poc = s->current_picture_ptr->poc;
980 int *col_poc = h->ref_list[1]->field_poc;
981 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
982 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
983 b8_stride = 0;
985 }else if(!(s->picture_structure & h->ref_list[1][0].reference)){// FL -> FL & differ parity
986 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
987 mb_xy += s->mb_stride*fieldoff;
989 goto single_col;
990 }else{ // AFL/AFR/FR/FL -> AFR/FR
991 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
992 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
993 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
994 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
995 b8_stride *= 3;
996 b4_stride *= 6;
997 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
998 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
999 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1000 && !is_b8x8){
1001 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1002 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1003 }else{
1004 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1005 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1007 }else{ // AFR/FR -> AFR/FR
1008 single_col:
1009 mb_type_col[0] =
1010 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1011 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1012 /* FIXME save sub mb types from previous frames (or derive from MVs)
1013 * so we know exactly what block size to use */
1014 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1015 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1016 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1017 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1018 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1019 }else{
1020 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1021 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1026 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1027 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1028 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1029 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1030 if(!b8_stride){
1031 if(s->mb_y&1){
1032 l1ref0 += h->b8_stride;
1033 l1ref1 += h->b8_stride;
1034 l1mv0 += 2*b4_stride;
1035 l1mv1 += 2*b4_stride;
1039 if(h->direct_spatial_mv_pred){
1040 int ref[2];
1041 int mv[2][2];
1042 int list;
1044 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1046 /* ref = min(neighbors) */
1047 for(list=0; list<2; list++){
1048 int refa = h->ref_cache[list][scan8[0] - 1];
1049 int refb = h->ref_cache[list][scan8[0] - 8];
1050 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1051 if(refc == PART_NOT_AVAILABLE)
1052 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1053 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1054 if(ref[list] < 0)
1055 ref[list] = -1;
1058 if(ref[0] < 0 && ref[1] < 0){
1059 ref[0] = ref[1] = 0;
1060 mv[0][0] = mv[0][1] =
1061 mv[1][0] = mv[1][1] = 0;
1062 }else{
1063 for(list=0; list<2; list++){
1064 if(ref[list] >= 0)
1065 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1066 else
1067 mv[list][0] = mv[list][1] = 0;
1071 if(ref[1] < 0){
1072 if(!is_b8x8)
1073 *mb_type &= ~MB_TYPE_L1;
1074 sub_mb_type &= ~MB_TYPE_L1;
1075 }else if(ref[0] < 0){
1076 if(!is_b8x8)
1077 *mb_type &= ~MB_TYPE_L0;
1078 sub_mb_type &= ~MB_TYPE_L0;
1081 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1082 for(i8=0; i8<4; i8++){
1083 int x8 = i8&1;
1084 int y8 = i8>>1;
1085 int xy8 = x8+y8*b8_stride;
1086 int xy4 = 3*x8+y8*b4_stride;
1087 int a=0, b=0;
1089 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1090 continue;
1091 h->sub_mb_type[i8] = sub_mb_type;
1093 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1094 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1095 if(!IS_INTRA(mb_type_col[y8])
1096 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1097 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1098 if(ref[0] > 0)
1099 a= pack16to32(mv[0][0],mv[0][1]);
1100 if(ref[1] > 0)
1101 b= pack16to32(mv[1][0],mv[1][1]);
1102 }else{
1103 a= pack16to32(mv[0][0],mv[0][1]);
1104 b= pack16to32(mv[1][0],mv[1][1]);
1106 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1107 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1109 }else if(IS_16X16(*mb_type)){
1110 int a=0, b=0;
1112 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1113 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1114 if(!IS_INTRA(mb_type_col[0])
1115 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1116 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1117 && (h->x264_build>33 || !h->x264_build)))){
1118 if(ref[0] > 0)
1119 a= pack16to32(mv[0][0],mv[0][1]);
1120 if(ref[1] > 0)
1121 b= pack16to32(mv[1][0],mv[1][1]);
1122 }else{
1123 a= pack16to32(mv[0][0],mv[0][1]);
1124 b= pack16to32(mv[1][0],mv[1][1]);
1126 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1127 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1128 }else{
1129 for(i8=0; i8<4; i8++){
1130 const int x8 = i8&1;
1131 const int y8 = i8>>1;
1133 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1134 continue;
1135 h->sub_mb_type[i8] = sub_mb_type;
1137 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1138 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1139 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1140 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1142 /* col_zero_flag */
1143 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1144 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1145 && (h->x264_build>33 || !h->x264_build)))){
1146 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1147 if(IS_SUB_8X8(sub_mb_type)){
1148 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1149 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1150 if(ref[0] == 0)
1151 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1152 if(ref[1] == 0)
1153 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1155 }else
1156 for(i4=0; i4<4; i4++){
1157 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1158 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1159 if(ref[0] == 0)
1160 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1161 if(ref[1] == 0)
1162 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1168 }else{ /* direct temporal mv pred */
1169 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1170 const int *dist_scale_factor = h->dist_scale_factor;
1172 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1173 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1174 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1175 dist_scale_factor = h->dist_scale_factor_field;
1177 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1178 /* FIXME assumes direct_8x8_inference == 1 */
1179 int y_shift = 2*!IS_INTERLACED(*mb_type);
1180 int ref_shift= FRAME_MBAFF ? y_shift : 1;
1182 for(i8=0; i8<4; i8++){
1183 const int x8 = i8&1;
1184 const int y8 = i8>>1;
1185 int ref0, scale;
1186 const int16_t (*l1mv)[2]= l1mv0;
1188 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1189 continue;
1190 h->sub_mb_type[i8] = sub_mb_type;
1192 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1193 if(IS_INTRA(mb_type_col[y8])){
1194 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1195 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1196 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1197 continue;
1200 ref0 = l1ref0[x8 + y8*b8_stride];
1201 if(ref0 >= 0)
1202 ref0 = map_col_to_list0[0][ref0*2>>ref_shift];
1203 else{
1204 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]*2>>ref_shift];
1205 l1mv= l1mv1;
1207 scale = dist_scale_factor[ref0];
1208 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1211 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1212 int my_col = (mv_col[1]<<y_shift)/2;
1213 int mx = (scale * mv_col[0] + 128) >> 8;
1214 int my = (scale * my_col + 128) >> 8;
1215 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1216 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1219 return;
1222 /* one-to-one mv scaling */
1224 if(IS_16X16(*mb_type)){
1225 int ref, mv0, mv1;
1227 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1228 if(IS_INTRA(mb_type_col[0])){
1229 ref=mv0=mv1=0;
1230 }else{
1231 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1232 : map_col_to_list0[1][l1ref1[0]];
1233 const int scale = dist_scale_factor[ref0];
1234 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1235 int mv_l0[2];
1236 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1237 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1238 ref= ref0;
1239 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1240 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1242 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1243 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1244 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1245 }else{
1246 for(i8=0; i8<4; i8++){
1247 const int x8 = i8&1;
1248 const int y8 = i8>>1;
1249 int ref0, scale;
1250 const int16_t (*l1mv)[2]= l1mv0;
1252 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1253 continue;
1254 h->sub_mb_type[i8] = sub_mb_type;
1255 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1256 if(IS_INTRA(mb_type_col[0])){
1257 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1258 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1259 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1260 continue;
1263 ref0 = l1ref0[x8 + y8*b8_stride];
1264 if(ref0 >= 0)
1265 ref0 = map_col_to_list0[0][ref0];
1266 else{
1267 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]];
1268 l1mv= l1mv1;
1270 scale = dist_scale_factor[ref0];
1272 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1273 if(IS_SUB_8X8(sub_mb_type)){
1274 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1275 int mx = (scale * mv_col[0] + 128) >> 8;
1276 int my = (scale * mv_col[1] + 128) >> 8;
1277 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1278 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1279 }else
1280 for(i4=0; i4<4; i4++){
1281 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1282 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1283 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1284 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1285 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1286 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1293 static inline void write_back_motion(H264Context *h, int mb_type){
1294 MpegEncContext * const s = &h->s;
1295 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1296 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1297 int list;
1299 if(!USES_LIST(mb_type, 0))
1300 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1302 for(list=0; list<h->list_count; list++){
1303 int y;
1304 if(!USES_LIST(mb_type, list))
1305 continue;
1307 for(y=0; y<4; y++){
1308 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1309 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1311 if( h->pps.cabac ) {
1312 if(IS_SKIP(mb_type))
1313 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1314 else
1315 for(y=0; y<4; y++){
1316 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1317 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1322 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1323 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1324 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1325 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1326 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1330 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1331 if(IS_8X8(mb_type)){
1332 uint8_t *direct_table = &h->direct_table[b8_xy];
1333 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1334 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1335 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1341 * Decodes a network abstraction layer unit.
1342 * @param consumed is the number of bytes used as input
1343 * @param length is the length of the array
1344 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1345 * @returns decoded bytes, might be src+1 if no escapes
1347 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1348 int i, si, di;
1349 uint8_t *dst;
1350 int bufidx;
1352 // src[0]&0x80; //forbidden bit
1353 h->nal_ref_idc= src[0]>>5;
1354 h->nal_unit_type= src[0]&0x1F;
1356 src++; length--;
1357 #if 0
1358 for(i=0; i<length; i++)
1359 printf("%2X ", src[i]);
1360 #endif
1361 for(i=0; i+1<length; i+=2){
1362 if(src[i]) continue;
1363 if(i>0 && src[i-1]==0) i--;
1364 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1365 if(src[i+2]!=3){
1366 /* startcode, so we must be past the end */
1367 length=i;
1369 break;
1373 if(i>=length-1){ //no escaped 0
1374 *dst_length= length;
1375 *consumed= length+1; //+1 for the header
1376 return src;
1379 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1380 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1381 dst= h->rbsp_buffer[bufidx];
1383 if (dst == NULL){
1384 return NULL;
1387 //printf("decoding esc\n");
1388 si=di=0;
1389 while(si<length){
1390 //remove escapes (very rare 1:2^22)
1391 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1392 if(src[si+2]==3){ //escape
1393 dst[di++]= 0;
1394 dst[di++]= 0;
1395 si+=3;
1396 continue;
1397 }else //next start code
1398 break;
1401 dst[di++]= src[si++];
1404 *dst_length= di;
1405 *consumed= si + 1;//+1 for the header
1406 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1407 return dst;
1411 * identifies the exact end of the bitstream
1412 * @return the length of the trailing, or 0 if damaged
1414 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1415 int v= *src;
1416 int r;
1418 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1420 for(r=1; r<9; r++){
1421 if(v&1) return r;
1422 v>>=1;
1424 return 0;
1428 * IDCT transforms the 16 dc values and dequantizes them.
1429 * @param qp quantization parameter
1431 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1432 #define stride 16
1433 int i;
1434 int temp[16]; //FIXME check if this is a good idea
1435 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1436 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1438 //memset(block, 64, 2*256);
1439 //return;
1440 for(i=0; i<4; i++){
1441 const int offset= y_offset[i];
1442 const int z0= block[offset+stride*0] + block[offset+stride*4];
1443 const int z1= block[offset+stride*0] - block[offset+stride*4];
1444 const int z2= block[offset+stride*1] - block[offset+stride*5];
1445 const int z3= block[offset+stride*1] + block[offset+stride*5];
1447 temp[4*i+0]= z0+z3;
1448 temp[4*i+1]= z1+z2;
1449 temp[4*i+2]= z1-z2;
1450 temp[4*i+3]= z0-z3;
1453 for(i=0; i<4; i++){
1454 const int offset= x_offset[i];
1455 const int z0= temp[4*0+i] + temp[4*2+i];
1456 const int z1= temp[4*0+i] - temp[4*2+i];
1457 const int z2= temp[4*1+i] - temp[4*3+i];
1458 const int z3= temp[4*1+i] + temp[4*3+i];
1460 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1461 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1462 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1463 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1467 #if 0
1469 * DCT transforms the 16 dc values.
1470 * @param qp quantization parameter ??? FIXME
1472 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1473 // const int qmul= dequant_coeff[qp][0];
1474 int i;
1475 int temp[16]; //FIXME check if this is a good idea
1476 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1477 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1479 for(i=0; i<4; i++){
1480 const int offset= y_offset[i];
1481 const int z0= block[offset+stride*0] + block[offset+stride*4];
1482 const int z1= block[offset+stride*0] - block[offset+stride*4];
1483 const int z2= block[offset+stride*1] - block[offset+stride*5];
1484 const int z3= block[offset+stride*1] + block[offset+stride*5];
1486 temp[4*i+0]= z0+z3;
1487 temp[4*i+1]= z1+z2;
1488 temp[4*i+2]= z1-z2;
1489 temp[4*i+3]= z0-z3;
1492 for(i=0; i<4; i++){
1493 const int offset= x_offset[i];
1494 const int z0= temp[4*0+i] + temp[4*2+i];
1495 const int z1= temp[4*0+i] - temp[4*2+i];
1496 const int z2= temp[4*1+i] - temp[4*3+i];
1497 const int z3= temp[4*1+i] + temp[4*3+i];
1499 block[stride*0 +offset]= (z0 + z3)>>1;
1500 block[stride*2 +offset]= (z1 + z2)>>1;
1501 block[stride*8 +offset]= (z1 - z2)>>1;
1502 block[stride*10+offset]= (z0 - z3)>>1;
1505 #endif
1507 #undef xStride
1508 #undef stride
1510 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1511 const int stride= 16*2;
1512 const int xStride= 16;
1513 int a,b,c,d,e;
1515 a= block[stride*0 + xStride*0];
1516 b= block[stride*0 + xStride*1];
1517 c= block[stride*1 + xStride*0];
1518 d= block[stride*1 + xStride*1];
1520 e= a-b;
1521 a= a+b;
1522 b= c-d;
1523 c= c+d;
1525 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1526 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1527 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1528 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1531 #if 0
1532 static void chroma_dc_dct_c(DCTELEM *block){
1533 const int stride= 16*2;
1534 const int xStride= 16;
1535 int a,b,c,d,e;
1537 a= block[stride*0 + xStride*0];
1538 b= block[stride*0 + xStride*1];
1539 c= block[stride*1 + xStride*0];
1540 d= block[stride*1 + xStride*1];
1542 e= a-b;
1543 a= a+b;
1544 b= c-d;
1545 c= c+d;
1547 block[stride*0 + xStride*0]= (a+c);
1548 block[stride*0 + xStride*1]= (e+b);
1549 block[stride*1 + xStride*0]= (a-c);
1550 block[stride*1 + xStride*1]= (e-b);
1552 #endif
1555 * gets the chroma qp.
1557 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1558 return h->pps.chroma_qp_table[t][qscale];
1561 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1562 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1563 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1564 int i;
1565 const int * const quant_table= quant_coeff[qscale];
1566 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1567 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1568 const unsigned int threshold2= (threshold1<<1);
1569 int last_non_zero;
1571 if(separate_dc){
1572 if(qscale<=18){
1573 //avoid overflows
1574 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1575 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1576 const unsigned int dc_threshold2= (dc_threshold1<<1);
1578 int level= block[0]*quant_coeff[qscale+18][0];
1579 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1580 if(level>0){
1581 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1582 block[0]= level;
1583 }else{
1584 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1585 block[0]= -level;
1587 // last_non_zero = i;
1588 }else{
1589 block[0]=0;
1591 }else{
1592 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1593 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1594 const unsigned int dc_threshold2= (dc_threshold1<<1);
1596 int level= block[0]*quant_table[0];
1597 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1598 if(level>0){
1599 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1600 block[0]= level;
1601 }else{
1602 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1603 block[0]= -level;
1605 // last_non_zero = i;
1606 }else{
1607 block[0]=0;
1610 last_non_zero= 0;
1611 i=1;
1612 }else{
1613 last_non_zero= -1;
1614 i=0;
1617 for(; i<16; i++){
1618 const int j= scantable[i];
1619 int level= block[j]*quant_table[j];
1621 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1622 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1623 if(((unsigned)(level+threshold1))>threshold2){
1624 if(level>0){
1625 level= (bias + level)>>QUANT_SHIFT;
1626 block[j]= level;
1627 }else{
1628 level= (bias - level)>>QUANT_SHIFT;
1629 block[j]= -level;
1631 last_non_zero = i;
1632 }else{
1633 block[j]=0;
1637 return last_non_zero;
1640 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1641 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1642 int src_x_offset, int src_y_offset,
1643 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1644 MpegEncContext * const s = &h->s;
1645 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1646 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1647 const int luma_xy= (mx&3) + ((my&3)<<2);
1648 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1649 uint8_t * src_cb, * src_cr;
1650 int extra_width= h->emu_edge_width;
1651 int extra_height= h->emu_edge_height;
1652 int emu=0;
1653 const int full_mx= mx>>2;
1654 const int full_my= my>>2;
1655 const int pic_width = 16*s->mb_width;
1656 const int pic_height = 16*s->mb_height >> MB_FIELD;
1658 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1659 return;
1661 if(mx&7) extra_width -= 3;
1662 if(my&7) extra_height -= 3;
1664 if( full_mx < 0-extra_width
1665 || full_my < 0-extra_height
1666 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1667 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1668 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1669 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1670 emu=1;
1673 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1674 if(!square){
1675 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1678 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1680 if(MB_FIELD){
1681 // chroma offset when predicting from a field of opposite parity
1682 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1683 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1685 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1686 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1688 if(emu){
1689 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1690 src_cb= s->edge_emu_buffer;
1692 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1694 if(emu){
1695 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1696 src_cr= s->edge_emu_buffer;
1698 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1701 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1702 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1703 int x_offset, int y_offset,
1704 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1705 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1706 int list0, int list1){
1707 MpegEncContext * const s = &h->s;
1708 qpel_mc_func *qpix_op= qpix_put;
1709 h264_chroma_mc_func chroma_op= chroma_put;
1711 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1712 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1713 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1714 x_offset += 8*s->mb_x;
1715 y_offset += 8*(s->mb_y >> MB_FIELD);
1717 if(list0){
1718 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1719 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1720 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1721 qpix_op, chroma_op);
1723 qpix_op= qpix_avg;
1724 chroma_op= chroma_avg;
1727 if(list1){
1728 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1729 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1730 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1731 qpix_op, chroma_op);
1735 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1736 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1737 int x_offset, int y_offset,
1738 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1739 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1740 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1741 int list0, int list1){
1742 MpegEncContext * const s = &h->s;
1744 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1745 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1746 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1747 x_offset += 8*s->mb_x;
1748 y_offset += 8*(s->mb_y >> MB_FIELD);
1750 if(list0 && list1){
1751 /* don't optimize for luma-only case, since B-frames usually
1752 * use implicit weights => chroma too. */
1753 uint8_t *tmp_cb = s->obmc_scratchpad;
1754 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1755 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1756 int refn0 = h->ref_cache[0][ scan8[n] ];
1757 int refn1 = h->ref_cache[1][ scan8[n] ];
1759 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1760 dest_y, dest_cb, dest_cr,
1761 x_offset, y_offset, qpix_put, chroma_put);
1762 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1763 tmp_y, tmp_cb, tmp_cr,
1764 x_offset, y_offset, qpix_put, chroma_put);
1766 if(h->use_weight == 2){
1767 int weight0 = h->implicit_weight[refn0][refn1];
1768 int weight1 = 64 - weight0;
1769 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1770 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1771 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1772 }else{
1773 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1774 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1775 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1776 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1777 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1778 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1779 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1780 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1781 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1783 }else{
1784 int list = list1 ? 1 : 0;
1785 int refn = h->ref_cache[list][ scan8[n] ];
1786 Picture *ref= &h->ref_list[list][refn];
1787 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1788 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1789 qpix_put, chroma_put);
1791 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1792 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1793 if(h->use_weight_chroma){
1794 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1795 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1796 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1797 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1802 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1803 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1804 int x_offset, int y_offset,
1805 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1806 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1807 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1808 int list0, int list1){
1809 if((h->use_weight==2 && list0 && list1
1810 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1811 || h->use_weight==1)
1812 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1813 x_offset, y_offset, qpix_put, chroma_put,
1814 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1815 else
1816 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1817 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1820 static inline void prefetch_motion(H264Context *h, int list){
1821 /* fetch pixels for estimated mv 4 macroblocks ahead
1822 * optimized for 64byte cache lines */
1823 MpegEncContext * const s = &h->s;
1824 const int refn = h->ref_cache[list][scan8[0]];
1825 if(refn >= 0){
1826 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1827 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1828 uint8_t **src= h->ref_list[list][refn].data;
1829 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1830 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1831 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1832 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1836 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1837 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1838 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1839 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1840 MpegEncContext * const s = &h->s;
1841 const int mb_xy= h->mb_xy;
1842 const int mb_type= s->current_picture.mb_type[mb_xy];
1844 assert(IS_INTER(mb_type));
1846 prefetch_motion(h, 0);
1848 if(IS_16X16(mb_type)){
1849 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1850 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1851 &weight_op[0], &weight_avg[0],
1852 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1853 }else if(IS_16X8(mb_type)){
1854 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1855 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1856 &weight_op[1], &weight_avg[1],
1857 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1858 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1859 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1860 &weight_op[1], &weight_avg[1],
1861 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1862 }else if(IS_8X16(mb_type)){
1863 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1864 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1865 &weight_op[2], &weight_avg[2],
1866 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1867 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1868 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1869 &weight_op[2], &weight_avg[2],
1870 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1871 }else{
1872 int i;
1874 assert(IS_8X8(mb_type));
1876 for(i=0; i<4; i++){
1877 const int sub_mb_type= h->sub_mb_type[i];
1878 const int n= 4*i;
1879 int x_offset= (i&1)<<2;
1880 int y_offset= (i&2)<<1;
1882 if(IS_SUB_8X8(sub_mb_type)){
1883 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1884 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1885 &weight_op[3], &weight_avg[3],
1886 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1887 }else if(IS_SUB_8X4(sub_mb_type)){
1888 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1889 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1890 &weight_op[4], &weight_avg[4],
1891 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1892 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1893 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1894 &weight_op[4], &weight_avg[4],
1895 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1896 }else if(IS_SUB_4X8(sub_mb_type)){
1897 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1898 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1899 &weight_op[5], &weight_avg[5],
1900 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1901 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1902 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1903 &weight_op[5], &weight_avg[5],
1904 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1905 }else{
1906 int j;
1907 assert(IS_SUB_4X4(sub_mb_type));
1908 for(j=0; j<4; j++){
1909 int sub_x_offset= x_offset + 2*(j&1);
1910 int sub_y_offset= y_offset + (j&2);
1911 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1912 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1913 &weight_op[6], &weight_avg[6],
1914 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1920 prefetch_motion(h, 1);
1923 static av_cold void decode_init_vlc(void){
1924 static int done = 0;
1926 if (!done) {
1927 int i;
1928 int offset;
1929 done = 1;
1931 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1932 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1933 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1934 &chroma_dc_coeff_token_len [0], 1, 1,
1935 &chroma_dc_coeff_token_bits[0], 1, 1,
1936 INIT_VLC_USE_NEW_STATIC);
1938 offset = 0;
1939 for(i=0; i<4; i++){
1940 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1941 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1942 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1943 &coeff_token_len [i][0], 1, 1,
1944 &coeff_token_bits[i][0], 1, 1,
1945 INIT_VLC_USE_NEW_STATIC);
1946 offset += coeff_token_vlc_tables_size[i];
1949 * This is a one time safety check to make sure that
1950 * the packed static coeff_token_vlc table sizes
1951 * were initialized correctly.
1953 assert(offset == sizeof(coeff_token_vlc_tables)/(sizeof(VLC_TYPE)*2));
1955 for(i=0; i<3; i++){
1956 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1957 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1958 init_vlc(&chroma_dc_total_zeros_vlc[i],
1959 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1960 &chroma_dc_total_zeros_len [i][0], 1, 1,
1961 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1962 INIT_VLC_USE_NEW_STATIC);
1964 for(i=0; i<15; i++){
1965 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1966 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1967 init_vlc(&total_zeros_vlc[i],
1968 TOTAL_ZEROS_VLC_BITS, 16,
1969 &total_zeros_len [i][0], 1, 1,
1970 &total_zeros_bits[i][0], 1, 1,
1971 INIT_VLC_USE_NEW_STATIC);
1974 for(i=0; i<6; i++){
1975 run_vlc[i].table = run_vlc_tables[i];
1976 run_vlc[i].table_allocated = run_vlc_tables_size;
1977 init_vlc(&run_vlc[i],
1978 RUN_VLC_BITS, 7,
1979 &run_len [i][0], 1, 1,
1980 &run_bits[i][0], 1, 1,
1981 INIT_VLC_USE_NEW_STATIC);
1983 run7_vlc.table = run7_vlc_table,
1984 run7_vlc.table_allocated = run7_vlc_table_size;
1985 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1986 &run_len [6][0], 1, 1,
1987 &run_bits[6][0], 1, 1,
1988 INIT_VLC_USE_NEW_STATIC);
1992 static void free_tables(H264Context *h){
1993 int i;
1994 H264Context *hx;
1995 av_freep(&h->intra4x4_pred_mode);
1996 av_freep(&h->chroma_pred_mode_table);
1997 av_freep(&h->cbp_table);
1998 av_freep(&h->mvd_table[0]);
1999 av_freep(&h->mvd_table[1]);
2000 av_freep(&h->direct_table);
2001 av_freep(&h->non_zero_count);
2002 av_freep(&h->slice_table_base);
2003 h->slice_table= NULL;
2005 av_freep(&h->mb2b_xy);
2006 av_freep(&h->mb2b8_xy);
2008 for(i = 0; i < MAX_SPS_COUNT; i++)
2009 av_freep(h->sps_buffers + i);
2011 for(i = 0; i < MAX_PPS_COUNT; i++)
2012 av_freep(h->pps_buffers + i);
2014 for(i = 0; i < h->s.avctx->thread_count; i++) {
2015 hx = h->thread_context[i];
2016 if(!hx) continue;
2017 av_freep(&hx->top_borders[1]);
2018 av_freep(&hx->top_borders[0]);
2019 av_freep(&hx->s.obmc_scratchpad);
2023 static void init_dequant8_coeff_table(H264Context *h){
2024 int i,q,x;
2025 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2026 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2027 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2029 for(i=0; i<2; i++ ){
2030 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2031 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2032 break;
2035 for(q=0; q<52; q++){
2036 int shift = ff_div6[q];
2037 int idx = ff_rem6[q];
2038 for(x=0; x<64; x++)
2039 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2040 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2041 h->pps.scaling_matrix8[i][x]) << shift;
2046 static void init_dequant4_coeff_table(H264Context *h){
2047 int i,j,q,x;
2048 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2049 for(i=0; i<6; i++ ){
2050 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2051 for(j=0; j<i; j++){
2052 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2053 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2054 break;
2057 if(j<i)
2058 continue;
2060 for(q=0; q<52; q++){
2061 int shift = ff_div6[q] + 2;
2062 int idx = ff_rem6[q];
2063 for(x=0; x<16; x++)
2064 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2065 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2066 h->pps.scaling_matrix4[i][x]) << shift;
2071 static void init_dequant_tables(H264Context *h){
2072 int i,x;
2073 init_dequant4_coeff_table(h);
2074 if(h->pps.transform_8x8_mode)
2075 init_dequant8_coeff_table(h);
2076 if(h->sps.transform_bypass){
2077 for(i=0; i<6; i++)
2078 for(x=0; x<16; x++)
2079 h->dequant4_coeff[i][0][x] = 1<<6;
2080 if(h->pps.transform_8x8_mode)
2081 for(i=0; i<2; i++)
2082 for(x=0; x<64; x++)
2083 h->dequant8_coeff[i][0][x] = 1<<6;
2089 * allocates tables.
2090 * needs width/height
2092 static int alloc_tables(H264Context *h){
2093 MpegEncContext * const s = &h->s;
2094 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2095 int x,y;
2097 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2099 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2100 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2101 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2103 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2105 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2106 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2108 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2109 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2111 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2112 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2113 for(y=0; y<s->mb_height; y++){
2114 for(x=0; x<s->mb_width; x++){
2115 const int mb_xy= x + y*s->mb_stride;
2116 const int b_xy = 4*x + 4*y*h->b_stride;
2117 const int b8_xy= 2*x + 2*y*h->b8_stride;
2119 h->mb2b_xy [mb_xy]= b_xy;
2120 h->mb2b8_xy[mb_xy]= b8_xy;
2124 s->obmc_scratchpad = NULL;
2126 if(!h->dequant4_coeff[0])
2127 init_dequant_tables(h);
2129 return 0;
2130 fail:
2131 free_tables(h);
2132 return -1;
2136 * Mimic alloc_tables(), but for every context thread.
2138 static void clone_tables(H264Context *dst, H264Context *src){
2139 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2140 dst->non_zero_count = src->non_zero_count;
2141 dst->slice_table = src->slice_table;
2142 dst->cbp_table = src->cbp_table;
2143 dst->mb2b_xy = src->mb2b_xy;
2144 dst->mb2b8_xy = src->mb2b8_xy;
2145 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2146 dst->mvd_table[0] = src->mvd_table[0];
2147 dst->mvd_table[1] = src->mvd_table[1];
2148 dst->direct_table = src->direct_table;
2150 dst->s.obmc_scratchpad = NULL;
2151 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2155 * Init context
2156 * Allocate buffers which are not shared amongst multiple threads.
2158 static int context_init(H264Context *h){
2159 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2160 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2162 return 0;
2163 fail:
2164 return -1; // free_tables will clean up for us
2167 static av_cold void common_init(H264Context *h){
2168 MpegEncContext * const s = &h->s;
2170 s->width = s->avctx->width;
2171 s->height = s->avctx->height;
2172 s->codec_id= s->avctx->codec->id;
2174 ff_h264_pred_init(&h->hpc, s->codec_id);
2176 h->dequant_coeff_pps= -1;
2177 s->unrestricted_mv=1;
2178 s->decode=1; //FIXME
2180 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2181 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2184 static av_cold int decode_init(AVCodecContext *avctx){
2185 H264Context *h= avctx->priv_data;
2186 MpegEncContext * const s = &h->s;
2188 MPV_decode_defaults(s);
2190 s->avctx = avctx;
2191 common_init(h);
2193 s->out_format = FMT_H264;
2194 s->workaround_bugs= avctx->workaround_bugs;
2196 // set defaults
2197 // s->decode_mb= ff_h263_decode_mb;
2198 s->quarter_sample = 1;
2199 s->low_delay= 1;
2201 if(avctx->codec_id == CODEC_ID_SVQ3)
2202 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2203 else
2204 avctx->pix_fmt= PIX_FMT_YUV420P;
2206 decode_init_vlc();
2208 if(avctx->extradata_size > 0 && avctx->extradata &&
2209 *(char *)avctx->extradata == 1){
2210 h->is_avc = 1;
2211 h->got_avcC = 0;
2212 } else {
2213 h->is_avc = 0;
2216 h->thread_context[0] = h;
2217 h->outputed_poc = INT_MIN;
2218 h->prev_poc_msb= 1<<16;
2219 return 0;
2222 static int frame_start(H264Context *h){
2223 MpegEncContext * const s = &h->s;
2224 int i;
2226 if(MPV_frame_start(s, s->avctx) < 0)
2227 return -1;
2228 ff_er_frame_start(s);
2230 * MPV_frame_start uses pict_type to derive key_frame.
2231 * This is incorrect for H.264; IDR markings must be used.
2232 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2233 * See decode_nal_units().
2235 s->current_picture_ptr->key_frame= 0;
2237 assert(s->linesize && s->uvlinesize);
2239 for(i=0; i<16; i++){
2240 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2241 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2243 for(i=0; i<4; i++){
2244 h->block_offset[16+i]=
2245 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2246 h->block_offset[24+16+i]=
2247 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2250 /* can't be in alloc_tables because linesize isn't known there.
2251 * FIXME: redo bipred weight to not require extra buffer? */
2252 for(i = 0; i < s->avctx->thread_count; i++)
2253 if(!h->thread_context[i]->s.obmc_scratchpad)
2254 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2256 /* some macroblocks will be accessed before they're available */
2257 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2258 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2260 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2262 // We mark the current picture as non-reference after allocating it, so
2263 // that if we break out due to an error it can be released automatically
2264 // in the next MPV_frame_start().
2265 // SVQ3 as well as most other codecs have only last/next/current and thus
2266 // get released even with set reference, besides SVQ3 and others do not
2267 // mark frames as reference later "naturally".
2268 if(s->codec_id != CODEC_ID_SVQ3)
2269 s->current_picture_ptr->reference= 0;
2271 s->current_picture_ptr->field_poc[0]=
2272 s->current_picture_ptr->field_poc[1]= INT_MAX;
2273 assert(s->current_picture_ptr->long_ref==0);
2275 return 0;
2278 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2279 MpegEncContext * const s = &h->s;
2280 int i;
2281 int step = 1;
2282 int offset = 1;
2283 int uvoffset= 1;
2284 int top_idx = 1;
2285 int skiplast= 0;
2287 src_y -= linesize;
2288 src_cb -= uvlinesize;
2289 src_cr -= uvlinesize;
2291 if(!simple && FRAME_MBAFF){
2292 if(s->mb_y&1){
2293 offset = MB_MBAFF ? 1 : 17;
2294 uvoffset= MB_MBAFF ? 1 : 9;
2295 if(!MB_MBAFF){
2296 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2297 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2298 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2299 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2300 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2303 }else{
2304 if(!MB_MBAFF){
2305 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2306 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2307 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2308 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2310 skiplast= 1;
2312 offset =
2313 uvoffset=
2314 top_idx = MB_MBAFF ? 0 : 1;
2316 step= MB_MBAFF ? 2 : 1;
2319 // There are two lines saved, the line above the the top macroblock of a pair,
2320 // and the line above the bottom macroblock
2321 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2322 for(i=1; i<17 - skiplast; i++){
2323 h->left_border[offset+i*step]= src_y[15+i* linesize];
2326 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2327 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2329 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2330 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2331 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2332 for(i=1; i<9 - skiplast; i++){
2333 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2334 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2336 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2337 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2341 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2342 MpegEncContext * const s = &h->s;
2343 int temp8, i;
2344 uint64_t temp64;
2345 int deblock_left;
2346 int deblock_top;
2347 int mb_xy;
2348 int step = 1;
2349 int offset = 1;
2350 int uvoffset= 1;
2351 int top_idx = 1;
2353 if(!simple && FRAME_MBAFF){
2354 if(s->mb_y&1){
2355 offset = MB_MBAFF ? 1 : 17;
2356 uvoffset= MB_MBAFF ? 1 : 9;
2357 }else{
2358 offset =
2359 uvoffset=
2360 top_idx = MB_MBAFF ? 0 : 1;
2362 step= MB_MBAFF ? 2 : 1;
2365 if(h->deblocking_filter == 2) {
2366 mb_xy = h->mb_xy;
2367 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2368 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2369 } else {
2370 deblock_left = (s->mb_x > 0);
2371 deblock_top = (s->mb_y > 0);
2374 src_y -= linesize + 1;
2375 src_cb -= uvlinesize + 1;
2376 src_cr -= uvlinesize + 1;
2378 #define XCHG(a,b,t,xchg)\
2379 t= a;\
2380 if(xchg)\
2381 a= b;\
2382 b= t;
2384 if(deblock_left){
2385 for(i = !deblock_top; i<16; i++){
2386 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2388 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2391 if(deblock_top){
2392 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2393 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2394 if(s->mb_x+1 < s->mb_width){
2395 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2399 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2400 if(deblock_left){
2401 for(i = !deblock_top; i<8; i++){
2402 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2403 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2405 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2406 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2408 if(deblock_top){
2409 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2410 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2415 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2416 MpegEncContext * const s = &h->s;
2417 const int mb_x= s->mb_x;
2418 const int mb_y= s->mb_y;
2419 const int mb_xy= h->mb_xy;
2420 const int mb_type= s->current_picture.mb_type[mb_xy];
2421 uint8_t *dest_y, *dest_cb, *dest_cr;
2422 int linesize, uvlinesize /*dct_offset*/;
2423 int i;
2424 int *block_offset = &h->block_offset[0];
2425 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2426 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2427 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2429 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2430 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2431 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2433 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2434 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2436 if (!simple && MB_FIELD) {
2437 linesize = h->mb_linesize = s->linesize * 2;
2438 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2439 block_offset = &h->block_offset[24];
2440 if(mb_y&1){ //FIXME move out of this function?
2441 dest_y -= s->linesize*15;
2442 dest_cb-= s->uvlinesize*7;
2443 dest_cr-= s->uvlinesize*7;
2445 if(FRAME_MBAFF) {
2446 int list;
2447 for(list=0; list<h->list_count; list++){
2448 if(!USES_LIST(mb_type, list))
2449 continue;
2450 if(IS_16X16(mb_type)){
2451 int8_t *ref = &h->ref_cache[list][scan8[0]];
2452 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2453 }else{
2454 for(i=0; i<16; i+=4){
2455 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2456 int ref = h->ref_cache[list][scan8[i]];
2457 if(ref >= 0)
2458 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2463 } else {
2464 linesize = h->mb_linesize = s->linesize;
2465 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2466 // dct_offset = s->linesize * 16;
2469 if(transform_bypass){
2470 idct_dc_add =
2471 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2472 }else if(IS_8x8DCT(mb_type)){
2473 idct_dc_add = s->dsp.h264_idct8_dc_add;
2474 idct_add = s->dsp.h264_idct8_add;
2475 }else{
2476 idct_dc_add = s->dsp.h264_idct_dc_add;
2477 idct_add = s->dsp.h264_idct_add;
2480 if (!simple && IS_INTRA_PCM(mb_type)) {
2481 for (i=0; i<16; i++) {
2482 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2484 for (i=0; i<8; i++) {
2485 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2486 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2488 } else {
2489 if(IS_INTRA(mb_type)){
2490 if(h->deblocking_filter)
2491 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2493 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2494 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2495 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2498 if(IS_INTRA4x4(mb_type)){
2499 if(simple || !s->encoding){
2500 if(IS_8x8DCT(mb_type)){
2501 for(i=0; i<16; i+=4){
2502 uint8_t * const ptr= dest_y + block_offset[i];
2503 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2504 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2505 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2506 (h->topright_samples_available<<i)&0x4000, linesize);
2507 if(nnz){
2508 if(nnz == 1 && h->mb[i*16])
2509 idct_dc_add(ptr, h->mb + i*16, linesize);
2510 else
2511 idct_add(ptr, h->mb + i*16, linesize);
2514 }else
2515 for(i=0; i<16; i++){
2516 uint8_t * const ptr= dest_y + block_offset[i];
2517 uint8_t *topright;
2518 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2519 int nnz, tr;
2521 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2522 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2523 assert(mb_y || linesize <= block_offset[i]);
2524 if(!topright_avail){
2525 tr= ptr[3 - linesize]*0x01010101;
2526 topright= (uint8_t*) &tr;
2527 }else
2528 topright= ptr + 4 - linesize;
2529 }else
2530 topright= NULL;
2532 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2533 nnz = h->non_zero_count_cache[ scan8[i] ];
2534 if(nnz){
2535 if(is_h264){
2536 if(nnz == 1 && h->mb[i*16])
2537 idct_dc_add(ptr, h->mb + i*16, linesize);
2538 else
2539 idct_add(ptr, h->mb + i*16, linesize);
2540 }else
2541 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2545 }else{
2546 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2547 if(is_h264){
2548 if(!transform_bypass)
2549 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2550 }else
2551 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2553 if(h->deblocking_filter)
2554 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2555 }else if(is_h264){
2556 hl_motion(h, dest_y, dest_cb, dest_cr,
2557 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2558 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2559 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2563 if(!IS_INTRA4x4(mb_type)){
2564 if(is_h264){
2565 if(IS_INTRA16x16(mb_type)){
2566 for(i=0; i<16; i++){
2567 if(h->non_zero_count_cache[ scan8[i] ])
2568 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2569 else if(h->mb[i*16])
2570 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2572 }else{
2573 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2574 for(i=0; i<16; i+=di){
2575 int nnz = h->non_zero_count_cache[ scan8[i] ];
2576 if(nnz){
2577 if(nnz==1 && h->mb[i*16])
2578 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2579 else
2580 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2584 }else{
2585 for(i=0; i<16; i++){
2586 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2587 uint8_t * const ptr= dest_y + block_offset[i];
2588 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2594 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2595 uint8_t *dest[2] = {dest_cb, dest_cr};
2596 if(transform_bypass){
2597 idct_add = idct_dc_add = s->dsp.add_pixels4;
2598 }else{
2599 idct_add = s->dsp.h264_idct_add;
2600 idct_dc_add = s->dsp.h264_idct_dc_add;
2601 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2602 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2604 if(is_h264){
2605 for(i=16; i<16+8; i++){
2606 if(h->non_zero_count_cache[ scan8[i] ])
2607 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2608 else if(h->mb[i*16])
2609 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2611 }else{
2612 for(i=16; i<16+8; i++){
2613 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2614 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2615 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2621 if(h->deblocking_filter) {
2622 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2623 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2624 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2625 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2626 if (!simple && FRAME_MBAFF) {
2627 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2628 } else {
2629 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2635 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2637 static void hl_decode_mb_simple(H264Context *h){
2638 hl_decode_mb_internal(h, 1);
2642 * Process a macroblock; this handles edge cases, such as interlacing.
2644 static void av_noinline hl_decode_mb_complex(H264Context *h){
2645 hl_decode_mb_internal(h, 0);
2648 static void hl_decode_mb(H264Context *h){
2649 MpegEncContext * const s = &h->s;
2650 const int mb_xy= h->mb_xy;
2651 const int mb_type= s->current_picture.mb_type[mb_xy];
2652 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2653 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2655 if(ENABLE_H264_ENCODER && !s->decode)
2656 return;
2658 if (is_complex)
2659 hl_decode_mb_complex(h);
2660 else hl_decode_mb_simple(h);
2663 static void pic_as_field(Picture *pic, const int parity){
2664 int i;
2665 for (i = 0; i < 4; ++i) {
2666 if (parity == PICT_BOTTOM_FIELD)
2667 pic->data[i] += pic->linesize[i];
2668 pic->reference = parity;
2669 pic->linesize[i] *= 2;
2671 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2674 static int split_field_copy(Picture *dest, Picture *src,
2675 int parity, int id_add){
2676 int match = !!(src->reference & parity);
2678 if (match) {
2679 *dest = *src;
2680 if(parity != PICT_FRAME){
2681 pic_as_field(dest, parity);
2682 dest->pic_id *= 2;
2683 dest->pic_id += id_add;
2687 return match;
2690 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2691 int i[2]={0};
2692 int index=0;
2694 while(i[0]<len || i[1]<len){
2695 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2696 i[0]++;
2697 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2698 i[1]++;
2699 if(i[0] < len){
2700 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2701 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2703 if(i[1] < len){
2704 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2705 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2709 return index;
2712 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2713 int i, best_poc;
2714 int out_i= 0;
2716 for(;;){
2717 best_poc= dir ? INT_MIN : INT_MAX;
2719 for(i=0; i<len; i++){
2720 const int poc= src[i]->poc;
2721 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2722 best_poc= poc;
2723 sorted[out_i]= src[i];
2726 if(best_poc == (dir ? INT_MIN : INT_MAX))
2727 break;
2728 limit= sorted[out_i++]->poc - dir;
2730 return out_i;
2734 * fills the default_ref_list.
2736 static int fill_default_ref_list(H264Context *h){
2737 MpegEncContext * const s = &h->s;
2738 int i, len;
2740 if(h->slice_type_nos==FF_B_TYPE){
2741 Picture *sorted[32];
2742 int cur_poc, list;
2743 int lens[2];
2745 if(FIELD_PICTURE)
2746 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2747 else
2748 cur_poc= s->current_picture_ptr->poc;
2750 for(list= 0; list<2; list++){
2751 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2752 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2753 assert(len<=32);
2754 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2755 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2756 assert(len<=32);
2758 if(len < h->ref_count[list])
2759 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2760 lens[list]= len;
2763 if(lens[0] == lens[1] && lens[1] > 1){
2764 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2765 if(i == lens[0])
2766 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2768 }else{
2769 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2770 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2771 assert(len <= 32);
2772 if(len < h->ref_count[0])
2773 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2775 #ifdef TRACE
2776 for (i=0; i<h->ref_count[0]; i++) {
2777 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2779 if(h->slice_type_nos==FF_B_TYPE){
2780 for (i=0; i<h->ref_count[1]; i++) {
2781 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2784 #endif
2785 return 0;
2788 static void print_short_term(H264Context *h);
2789 static void print_long_term(H264Context *h);
2792 * Extract structure information about the picture described by pic_num in
2793 * the current decoding context (frame or field). Note that pic_num is
2794 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2795 * @param pic_num picture number for which to extract structure information
2796 * @param structure one of PICT_XXX describing structure of picture
2797 * with pic_num
2798 * @return frame number (short term) or long term index of picture
2799 * described by pic_num
2801 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2802 MpegEncContext * const s = &h->s;
2804 *structure = s->picture_structure;
2805 if(FIELD_PICTURE){
2806 if (!(pic_num & 1))
2807 /* opposite field */
2808 *structure ^= PICT_FRAME;
2809 pic_num >>= 1;
2812 return pic_num;
2815 static int decode_ref_pic_list_reordering(H264Context *h){
2816 MpegEncContext * const s = &h->s;
2817 int list, index, pic_structure;
2819 print_short_term(h);
2820 print_long_term(h);
2822 for(list=0; list<h->list_count; list++){
2823 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2825 if(get_bits1(&s->gb)){
2826 int pred= h->curr_pic_num;
2828 for(index=0; ; index++){
2829 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2830 unsigned int pic_id;
2831 int i;
2832 Picture *ref = NULL;
2834 if(reordering_of_pic_nums_idc==3)
2835 break;
2837 if(index >= h->ref_count[list]){
2838 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2839 return -1;
2842 if(reordering_of_pic_nums_idc<3){
2843 if(reordering_of_pic_nums_idc<2){
2844 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2845 int frame_num;
2847 if(abs_diff_pic_num > h->max_pic_num){
2848 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2849 return -1;
2852 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2853 else pred+= abs_diff_pic_num;
2854 pred &= h->max_pic_num - 1;
2856 frame_num = pic_num_extract(h, pred, &pic_structure);
2858 for(i= h->short_ref_count-1; i>=0; i--){
2859 ref = h->short_ref[i];
2860 assert(ref->reference);
2861 assert(!ref->long_ref);
2863 ref->frame_num == frame_num &&
2864 (ref->reference & pic_structure)
2866 break;
2868 if(i>=0)
2869 ref->pic_id= pred;
2870 }else{
2871 int long_idx;
2872 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2874 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2876 if(long_idx>31){
2877 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2878 return -1;
2880 ref = h->long_ref[long_idx];
2881 assert(!(ref && !ref->reference));
2882 if(ref && (ref->reference & pic_structure)){
2883 ref->pic_id= pic_id;
2884 assert(ref->long_ref);
2885 i=0;
2886 }else{
2887 i=-1;
2891 if (i < 0) {
2892 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2893 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2894 } else {
2895 for(i=index; i+1<h->ref_count[list]; i++){
2896 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2897 break;
2899 for(; i > index; i--){
2900 h->ref_list[list][i]= h->ref_list[list][i-1];
2902 h->ref_list[list][index]= *ref;
2903 if (FIELD_PICTURE){
2904 pic_as_field(&h->ref_list[list][index], pic_structure);
2907 }else{
2908 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2909 return -1;
2914 for(list=0; list<h->list_count; list++){
2915 for(index= 0; index < h->ref_count[list]; index++){
2916 if(!h->ref_list[list][index].data[0]){
2917 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2918 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2923 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
2924 direct_dist_scale_factor(h);
2925 direct_ref_list_init(h);
2926 return 0;
2929 static void fill_mbaff_ref_list(H264Context *h){
2930 int list, i, j;
2931 for(list=0; list<2; list++){ //FIXME try list_count
2932 for(i=0; i<h->ref_count[list]; i++){
2933 Picture *frame = &h->ref_list[list][i];
2934 Picture *field = &h->ref_list[list][16+2*i];
2935 field[0] = *frame;
2936 for(j=0; j<3; j++)
2937 field[0].linesize[j] <<= 1;
2938 field[0].reference = PICT_TOP_FIELD;
2939 field[1] = field[0];
2940 for(j=0; j<3; j++)
2941 field[1].data[j] += frame->linesize[j];
2942 field[1].reference = PICT_BOTTOM_FIELD;
2944 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2945 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2946 for(j=0; j<2; j++){
2947 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2948 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2952 for(j=0; j<h->ref_count[1]; j++){
2953 for(i=0; i<h->ref_count[0]; i++)
2954 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2955 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2956 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2960 static int pred_weight_table(H264Context *h){
2961 MpegEncContext * const s = &h->s;
2962 int list, i;
2963 int luma_def, chroma_def;
2965 h->use_weight= 0;
2966 h->use_weight_chroma= 0;
2967 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2968 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2969 luma_def = 1<<h->luma_log2_weight_denom;
2970 chroma_def = 1<<h->chroma_log2_weight_denom;
2972 for(list=0; list<2; list++){
2973 for(i=0; i<h->ref_count[list]; i++){
2974 int luma_weight_flag, chroma_weight_flag;
2976 luma_weight_flag= get_bits1(&s->gb);
2977 if(luma_weight_flag){
2978 h->luma_weight[list][i]= get_se_golomb(&s->gb);
2979 h->luma_offset[list][i]= get_se_golomb(&s->gb);
2980 if( h->luma_weight[list][i] != luma_def
2981 || h->luma_offset[list][i] != 0)
2982 h->use_weight= 1;
2983 }else{
2984 h->luma_weight[list][i]= luma_def;
2985 h->luma_offset[list][i]= 0;
2988 if(CHROMA){
2989 chroma_weight_flag= get_bits1(&s->gb);
2990 if(chroma_weight_flag){
2991 int j;
2992 for(j=0; j<2; j++){
2993 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2994 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2995 if( h->chroma_weight[list][i][j] != chroma_def
2996 || h->chroma_offset[list][i][j] != 0)
2997 h->use_weight_chroma= 1;
2999 }else{
3000 int j;
3001 for(j=0; j<2; j++){
3002 h->chroma_weight[list][i][j]= chroma_def;
3003 h->chroma_offset[list][i][j]= 0;
3008 if(h->slice_type_nos != FF_B_TYPE) break;
3010 h->use_weight= h->use_weight || h->use_weight_chroma;
3011 return 0;
3014 static void implicit_weight_table(H264Context *h){
3015 MpegEncContext * const s = &h->s;
3016 int ref0, ref1;
3017 int cur_poc = s->current_picture_ptr->poc;
3019 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3020 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3021 h->use_weight= 0;
3022 h->use_weight_chroma= 0;
3023 return;
3026 h->use_weight= 2;
3027 h->use_weight_chroma= 2;
3028 h->luma_log2_weight_denom= 5;
3029 h->chroma_log2_weight_denom= 5;
3031 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3032 int poc0 = h->ref_list[0][ref0].poc;
3033 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3034 int poc1 = h->ref_list[1][ref1].poc;
3035 int td = av_clip(poc1 - poc0, -128, 127);
3036 if(td){
3037 int tb = av_clip(cur_poc - poc0, -128, 127);
3038 int tx = (16384 + (FFABS(td) >> 1)) / td;
3039 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3040 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3041 h->implicit_weight[ref0][ref1] = 32;
3042 else
3043 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3044 }else
3045 h->implicit_weight[ref0][ref1] = 32;
3051 * Mark a picture as no longer needed for reference. The refmask
3052 * argument allows unreferencing of individual fields or the whole frame.
3053 * If the picture becomes entirely unreferenced, but is being held for
3054 * display purposes, it is marked as such.
3055 * @param refmask mask of fields to unreference; the mask is bitwise
3056 * anded with the reference marking of pic
3057 * @return non-zero if pic becomes entirely unreferenced (except possibly
3058 * for display purposes) zero if one of the fields remains in
3059 * reference
3061 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3062 int i;
3063 if (pic->reference &= refmask) {
3064 return 0;
3065 } else {
3066 for(i = 0; h->delayed_pic[i]; i++)
3067 if(pic == h->delayed_pic[i]){
3068 pic->reference=DELAYED_PIC_REF;
3069 break;
3071 return 1;
3076 * instantaneous decoder refresh.
3078 static void idr(H264Context *h){
3079 int i;
3081 for(i=0; i<16; i++){
3082 remove_long(h, i, 0);
3084 assert(h->long_ref_count==0);
3086 for(i=0; i<h->short_ref_count; i++){
3087 unreference_pic(h, h->short_ref[i], 0);
3088 h->short_ref[i]= NULL;
3090 h->short_ref_count=0;
3091 h->prev_frame_num= 0;
3092 h->prev_frame_num_offset= 0;
3093 h->prev_poc_msb=
3094 h->prev_poc_lsb= 0;
3097 /* forget old pics after a seek */
3098 static void flush_dpb(AVCodecContext *avctx){
3099 H264Context *h= avctx->priv_data;
3100 int i;
3101 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3102 if(h->delayed_pic[i])
3103 h->delayed_pic[i]->reference= 0;
3104 h->delayed_pic[i]= NULL;
3106 h->outputed_poc= INT_MIN;
3107 idr(h);
3108 if(h->s.current_picture_ptr)
3109 h->s.current_picture_ptr->reference= 0;
3110 h->s.first_field= 0;
3111 ff_mpeg_flush(avctx);
3115 * Find a Picture in the short term reference list by frame number.
3116 * @param frame_num frame number to search for
3117 * @param idx the index into h->short_ref where returned picture is found
3118 * undefined if no picture found.
3119 * @return pointer to the found picture, or NULL if no pic with the provided
3120 * frame number is found
3122 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3123 MpegEncContext * const s = &h->s;
3124 int i;
3126 for(i=0; i<h->short_ref_count; i++){
3127 Picture *pic= h->short_ref[i];
3128 if(s->avctx->debug&FF_DEBUG_MMCO)
3129 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3130 if(pic->frame_num == frame_num) {
3131 *idx = i;
3132 return pic;
3135 return NULL;
3139 * Remove a picture from the short term reference list by its index in
3140 * that list. This does no checking on the provided index; it is assumed
3141 * to be valid. Other list entries are shifted down.
3142 * @param i index into h->short_ref of picture to remove.
3144 static void remove_short_at_index(H264Context *h, int i){
3145 assert(i >= 0 && i < h->short_ref_count);
3146 h->short_ref[i]= NULL;
3147 if (--h->short_ref_count)
3148 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3153 * @return the removed picture or NULL if an error occurs
3155 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3156 MpegEncContext * const s = &h->s;
3157 Picture *pic;
3158 int i;
3160 if(s->avctx->debug&FF_DEBUG_MMCO)
3161 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3163 pic = find_short(h, frame_num, &i);
3164 if (pic){
3165 if(unreference_pic(h, pic, ref_mask))
3166 remove_short_at_index(h, i);
3169 return pic;
3173 * Remove a picture from the long term reference list by its index in
3174 * that list.
3175 * @return the removed picture or NULL if an error occurs
3177 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3178 Picture *pic;
3180 pic= h->long_ref[i];
3181 if (pic){
3182 if(unreference_pic(h, pic, ref_mask)){
3183 assert(h->long_ref[i]->long_ref == 1);
3184 h->long_ref[i]->long_ref= 0;
3185 h->long_ref[i]= NULL;
3186 h->long_ref_count--;
3190 return pic;
3194 * print short term list
3196 static void print_short_term(H264Context *h) {
3197 uint32_t i;
3198 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3199 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3200 for(i=0; i<h->short_ref_count; i++){
3201 Picture *pic= h->short_ref[i];
3202 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3208 * print long term list
3210 static void print_long_term(H264Context *h) {
3211 uint32_t i;
3212 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3213 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3214 for(i = 0; i < 16; i++){
3215 Picture *pic= h->long_ref[i];
3216 if (pic) {
3217 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3224 * Executes the reference picture marking (memory management control operations).
3226 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3227 MpegEncContext * const s = &h->s;
3228 int i, j;
3229 int current_ref_assigned=0;
3230 Picture *pic;
3232 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3233 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3235 for(i=0; i<mmco_count; i++){
3236 int structure, frame_num;
3237 if(s->avctx->debug&FF_DEBUG_MMCO)
3238 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3240 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3241 || mmco[i].opcode == MMCO_SHORT2LONG){
3242 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3243 pic = find_short(h, frame_num, &j);
3244 if(!pic){
3245 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3246 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3247 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3248 continue;
3252 switch(mmco[i].opcode){
3253 case MMCO_SHORT2UNUSED:
3254 if(s->avctx->debug&FF_DEBUG_MMCO)
3255 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3256 remove_short(h, frame_num, structure ^ PICT_FRAME);
3257 break;
3258 case MMCO_SHORT2LONG:
3259 if (h->long_ref[mmco[i].long_arg] != pic)
3260 remove_long(h, mmco[i].long_arg, 0);
3262 remove_short_at_index(h, j);
3263 h->long_ref[ mmco[i].long_arg ]= pic;
3264 if (h->long_ref[ mmco[i].long_arg ]){
3265 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3266 h->long_ref_count++;
3268 break;
3269 case MMCO_LONG2UNUSED:
3270 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3271 pic = h->long_ref[j];
3272 if (pic) {
3273 remove_long(h, j, structure ^ PICT_FRAME);
3274 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3275 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3276 break;
3277 case MMCO_LONG:
3278 // Comment below left from previous code as it is an interresting note.
3279 /* First field in pair is in short term list or
3280 * at a different long term index.
3281 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3282 * Report the problem and keep the pair where it is,
3283 * and mark this field valid.
3286 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3287 remove_long(h, mmco[i].long_arg, 0);
3289 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3290 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3291 h->long_ref_count++;
3294 s->current_picture_ptr->reference |= s->picture_structure;
3295 current_ref_assigned=1;
3296 break;
3297 case MMCO_SET_MAX_LONG:
3298 assert(mmco[i].long_arg <= 16);
3299 // just remove the long term which index is greater than new max
3300 for(j = mmco[i].long_arg; j<16; j++){
3301 remove_long(h, j, 0);
3303 break;
3304 case MMCO_RESET:
3305 while(h->short_ref_count){
3306 remove_short(h, h->short_ref[0]->frame_num, 0);
3308 for(j = 0; j < 16; j++) {
3309 remove_long(h, j, 0);
3311 s->current_picture_ptr->poc=
3312 s->current_picture_ptr->field_poc[0]=
3313 s->current_picture_ptr->field_poc[1]=
3314 h->poc_lsb=
3315 h->poc_msb=
3316 h->frame_num=
3317 s->current_picture_ptr->frame_num= 0;
3318 break;
3319 default: assert(0);
3323 if (!current_ref_assigned) {
3324 /* Second field of complementary field pair; the first field of
3325 * which is already referenced. If short referenced, it
3326 * should be first entry in short_ref. If not, it must exist
3327 * in long_ref; trying to put it on the short list here is an
3328 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3330 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3331 /* Just mark the second field valid */
3332 s->current_picture_ptr->reference = PICT_FRAME;
3333 } else if (s->current_picture_ptr->long_ref) {
3334 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3335 "assignment for second field "
3336 "in complementary field pair "
3337 "(first field is long term)\n");
3338 } else {
3339 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3340 if(pic){
3341 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3344 if(h->short_ref_count)
3345 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3347 h->short_ref[0]= s->current_picture_ptr;
3348 h->short_ref_count++;
3349 s->current_picture_ptr->reference |= s->picture_structure;
3353 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3355 /* We have too many reference frames, probably due to corrupted
3356 * stream. Need to discard one frame. Prevents overrun of the
3357 * short_ref and long_ref buffers.
3359 av_log(h->s.avctx, AV_LOG_ERROR,
3360 "number of reference frames exceeds max (probably "
3361 "corrupt input), discarding one\n");
3363 if (h->long_ref_count && !h->short_ref_count) {
3364 for (i = 0; i < 16; ++i)
3365 if (h->long_ref[i])
3366 break;
3368 assert(i < 16);
3369 remove_long(h, i, 0);
3370 } else {
3371 pic = h->short_ref[h->short_ref_count - 1];
3372 remove_short(h, pic->frame_num, 0);
3376 print_short_term(h);
3377 print_long_term(h);
3378 return 0;
3381 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3382 MpegEncContext * const s = &h->s;
3383 int i;
3385 h->mmco_index= 0;
3386 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3387 s->broken_link= get_bits1(gb) -1;
3388 if(get_bits1(gb)){
3389 h->mmco[0].opcode= MMCO_LONG;
3390 h->mmco[0].long_arg= 0;
3391 h->mmco_index= 1;
3393 }else{
3394 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3395 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3396 MMCOOpcode opcode= get_ue_golomb(gb);
3398 h->mmco[i].opcode= opcode;
3399 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3400 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3401 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3402 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3403 return -1;
3406 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3407 unsigned int long_arg= get_ue_golomb(gb);
3408 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3409 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3410 return -1;
3412 h->mmco[i].long_arg= long_arg;
3415 if(opcode > (unsigned)MMCO_LONG){
3416 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3417 return -1;
3419 if(opcode == MMCO_END)
3420 break;
3422 h->mmco_index= i;
3423 }else{
3424 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3426 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3427 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3428 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3429 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3430 h->mmco_index= 1;
3431 if (FIELD_PICTURE) {
3432 h->mmco[0].short_pic_num *= 2;
3433 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3434 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3435 h->mmco_index= 2;
3441 return 0;
3444 static int init_poc(H264Context *h){
3445 MpegEncContext * const s = &h->s;
3446 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3447 int field_poc[2];
3448 Picture *cur = s->current_picture_ptr;
3450 h->frame_num_offset= h->prev_frame_num_offset;
3451 if(h->frame_num < h->prev_frame_num)
3452 h->frame_num_offset += max_frame_num;
3454 if(h->sps.poc_type==0){
3455 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3457 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3458 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3459 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3460 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3461 else
3462 h->poc_msb = h->prev_poc_msb;
3463 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3464 field_poc[0] =
3465 field_poc[1] = h->poc_msb + h->poc_lsb;
3466 if(s->picture_structure == PICT_FRAME)
3467 field_poc[1] += h->delta_poc_bottom;
3468 }else if(h->sps.poc_type==1){
3469 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3470 int i;
3472 if(h->sps.poc_cycle_length != 0)
3473 abs_frame_num = h->frame_num_offset + h->frame_num;
3474 else
3475 abs_frame_num = 0;
3477 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3478 abs_frame_num--;
3480 expected_delta_per_poc_cycle = 0;
3481 for(i=0; i < h->sps.poc_cycle_length; i++)
3482 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3484 if(abs_frame_num > 0){
3485 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3486 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3488 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3489 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3490 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3491 } else
3492 expectedpoc = 0;
3494 if(h->nal_ref_idc == 0)
3495 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3497 field_poc[0] = expectedpoc + h->delta_poc[0];
3498 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3500 if(s->picture_structure == PICT_FRAME)
3501 field_poc[1] += h->delta_poc[1];
3502 }else{
3503 int poc= 2*(h->frame_num_offset + h->frame_num);
3505 if(!h->nal_ref_idc)
3506 poc--;
3508 field_poc[0]= poc;
3509 field_poc[1]= poc;
3512 if(s->picture_structure != PICT_BOTTOM_FIELD)
3513 s->current_picture_ptr->field_poc[0]= field_poc[0];
3514 if(s->picture_structure != PICT_TOP_FIELD)
3515 s->current_picture_ptr->field_poc[1]= field_poc[1];
3516 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3518 return 0;
3523 * initialize scan tables
3525 static void init_scan_tables(H264Context *h){
3526 MpegEncContext * const s = &h->s;
3527 int i;
3528 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3529 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3530 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3531 }else{
3532 for(i=0; i<16; i++){
3533 #define T(x) (x>>2) | ((x<<2) & 0xF)
3534 h->zigzag_scan[i] = T(zigzag_scan[i]);
3535 h-> field_scan[i] = T( field_scan[i]);
3536 #undef T
3539 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3540 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3541 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3542 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3543 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3544 }else{
3545 for(i=0; i<64; i++){
3546 #define T(x) (x>>3) | ((x&7)<<3)
3547 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3548 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3549 h->field_scan8x8[i] = T(field_scan8x8[i]);
3550 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3551 #undef T
3554 if(h->sps.transform_bypass){ //FIXME same ugly
3555 h->zigzag_scan_q0 = zigzag_scan;
3556 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3557 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3558 h->field_scan_q0 = field_scan;
3559 h->field_scan8x8_q0 = field_scan8x8;
3560 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3561 }else{
3562 h->zigzag_scan_q0 = h->zigzag_scan;
3563 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3564 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3565 h->field_scan_q0 = h->field_scan;
3566 h->field_scan8x8_q0 = h->field_scan8x8;
3567 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3572 * Replicates H264 "master" context to thread contexts.
3574 static void clone_slice(H264Context *dst, H264Context *src)
3576 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3577 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3578 dst->s.current_picture = src->s.current_picture;
3579 dst->s.linesize = src->s.linesize;
3580 dst->s.uvlinesize = src->s.uvlinesize;
3581 dst->s.first_field = src->s.first_field;
3583 dst->prev_poc_msb = src->prev_poc_msb;
3584 dst->prev_poc_lsb = src->prev_poc_lsb;
3585 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3586 dst->prev_frame_num = src->prev_frame_num;
3587 dst->short_ref_count = src->short_ref_count;
3589 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3590 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3591 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3592 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3594 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3595 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3599 * decodes a slice header.
3600 * This will also call MPV_common_init() and frame_start() as needed.
3602 * @param h h264context
3603 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3605 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3607 static int decode_slice_header(H264Context *h, H264Context *h0){
3608 MpegEncContext * const s = &h->s;
3609 MpegEncContext * const s0 = &h0->s;
3610 unsigned int first_mb_in_slice;
3611 unsigned int pps_id;
3612 int num_ref_idx_active_override_flag;
3613 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3614 unsigned int slice_type, tmp, i, j;
3615 int default_ref_list_done = 0;
3616 int last_pic_structure;
3618 s->dropable= h->nal_ref_idc == 0;
3620 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3621 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3622 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3623 }else{
3624 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3625 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3628 first_mb_in_slice= get_ue_golomb(&s->gb);
3630 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3631 h0->current_slice = 0;
3632 if (!s0->first_field)
3633 s->current_picture_ptr= NULL;
3636 slice_type= get_ue_golomb(&s->gb);
3637 if(slice_type > 9){
3638 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3639 return -1;
3641 if(slice_type > 4){
3642 slice_type -= 5;
3643 h->slice_type_fixed=1;
3644 }else
3645 h->slice_type_fixed=0;
3647 slice_type= slice_type_map[ slice_type ];
3648 if (slice_type == FF_I_TYPE
3649 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3650 default_ref_list_done = 1;
3652 h->slice_type= slice_type;
3653 h->slice_type_nos= slice_type & 3;
3655 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3656 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3657 av_log(h->s.avctx, AV_LOG_ERROR,
3658 "B picture before any references, skipping\n");
3659 return -1;
3662 pps_id= get_ue_golomb(&s->gb);
3663 if(pps_id>=MAX_PPS_COUNT){
3664 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3665 return -1;
3667 if(!h0->pps_buffers[pps_id]) {
3668 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3669 return -1;
3671 h->pps= *h0->pps_buffers[pps_id];
3673 if(!h0->sps_buffers[h->pps.sps_id]) {
3674 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3675 return -1;
3677 h->sps = *h0->sps_buffers[h->pps.sps_id];
3679 if(h == h0 && h->dequant_coeff_pps != pps_id){
3680 h->dequant_coeff_pps = pps_id;
3681 init_dequant_tables(h);
3684 s->mb_width= h->sps.mb_width;
3685 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3687 h->b_stride= s->mb_width*4;
3688 h->b8_stride= s->mb_width*2;
3690 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3691 if(h->sps.frame_mbs_only_flag)
3692 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3693 else
3694 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3696 if (s->context_initialized
3697 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3698 if(h != h0)
3699 return -1; // width / height changed during parallelized decoding
3700 free_tables(h);
3701 MPV_common_end(s);
3703 if (!s->context_initialized) {
3704 if(h != h0)
3705 return -1; // we cant (re-)initialize context during parallel decoding
3706 if (MPV_common_init(s) < 0)
3707 return -1;
3708 s->first_field = 0;
3710 init_scan_tables(h);
3711 alloc_tables(h);
3713 for(i = 1; i < s->avctx->thread_count; i++) {
3714 H264Context *c;
3715 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3716 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3717 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3718 c->sps = h->sps;
3719 c->pps = h->pps;
3720 init_scan_tables(c);
3721 clone_tables(c, h);
3724 for(i = 0; i < s->avctx->thread_count; i++)
3725 if(context_init(h->thread_context[i]) < 0)
3726 return -1;
3728 s->avctx->width = s->width;
3729 s->avctx->height = s->height;
3730 s->avctx->sample_aspect_ratio= h->sps.sar;
3731 if(!s->avctx->sample_aspect_ratio.den)
3732 s->avctx->sample_aspect_ratio.den = 1;
3734 if(h->sps.timing_info_present_flag){
3735 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3736 if(h->x264_build > 0 && h->x264_build < 44)
3737 s->avctx->time_base.den *= 2;
3738 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3739 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3743 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3745 h->mb_mbaff = 0;
3746 h->mb_aff_frame = 0;
3747 last_pic_structure = s0->picture_structure;
3748 if(h->sps.frame_mbs_only_flag){
3749 s->picture_structure= PICT_FRAME;
3750 }else{
3751 if(get_bits1(&s->gb)) { //field_pic_flag
3752 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3753 } else {
3754 s->picture_structure= PICT_FRAME;
3755 h->mb_aff_frame = h->sps.mb_aff;
3758 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3760 if(h0->current_slice == 0){
3761 while(h->frame_num != h->prev_frame_num &&
3762 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3763 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3764 frame_start(h);
3765 h->prev_frame_num++;
3766 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3767 s->current_picture_ptr->frame_num= h->prev_frame_num;
3768 execute_ref_pic_marking(h, NULL, 0);
3771 /* See if we have a decoded first field looking for a pair... */
3772 if (s0->first_field) {
3773 assert(s0->current_picture_ptr);
3774 assert(s0->current_picture_ptr->data[0]);
3775 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3777 /* figure out if we have a complementary field pair */
3778 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3780 * Previous field is unmatched. Don't display it, but let it
3781 * remain for reference if marked as such.
3783 s0->current_picture_ptr = NULL;
3784 s0->first_field = FIELD_PICTURE;
3786 } else {
3787 if (h->nal_ref_idc &&
3788 s0->current_picture_ptr->reference &&
3789 s0->current_picture_ptr->frame_num != h->frame_num) {
3791 * This and previous field were reference, but had
3792 * different frame_nums. Consider this field first in
3793 * pair. Throw away previous field except for reference
3794 * purposes.
3796 s0->first_field = 1;
3797 s0->current_picture_ptr = NULL;
3799 } else {
3800 /* Second field in complementary pair */
3801 s0->first_field = 0;
3805 } else {
3806 /* Frame or first field in a potentially complementary pair */
3807 assert(!s0->current_picture_ptr);
3808 s0->first_field = FIELD_PICTURE;
3811 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3812 s0->first_field = 0;
3813 return -1;
3816 if(h != h0)
3817 clone_slice(h, h0);
3819 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3821 assert(s->mb_num == s->mb_width * s->mb_height);
3822 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3823 first_mb_in_slice >= s->mb_num){
3824 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3825 return -1;
3827 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3828 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3829 if (s->picture_structure == PICT_BOTTOM_FIELD)
3830 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3831 assert(s->mb_y < s->mb_height);
3833 if(s->picture_structure==PICT_FRAME){
3834 h->curr_pic_num= h->frame_num;
3835 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3836 }else{
3837 h->curr_pic_num= 2*h->frame_num + 1;
3838 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3841 if(h->nal_unit_type == NAL_IDR_SLICE){
3842 get_ue_golomb(&s->gb); /* idr_pic_id */
3845 if(h->sps.poc_type==0){
3846 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3848 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3849 h->delta_poc_bottom= get_se_golomb(&s->gb);
3853 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3854 h->delta_poc[0]= get_se_golomb(&s->gb);
3856 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3857 h->delta_poc[1]= get_se_golomb(&s->gb);
3860 init_poc(h);
3862 if(h->pps.redundant_pic_cnt_present){
3863 h->redundant_pic_count= get_ue_golomb(&s->gb);
3866 //set defaults, might be overridden a few lines later
3867 h->ref_count[0]= h->pps.ref_count[0];
3868 h->ref_count[1]= h->pps.ref_count[1];
3870 if(h->slice_type_nos != FF_I_TYPE){
3871 if(h->slice_type_nos == FF_B_TYPE){
3872 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3874 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3876 if(num_ref_idx_active_override_flag){
3877 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3878 if(h->slice_type_nos==FF_B_TYPE)
3879 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3881 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3882 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3883 h->ref_count[0]= h->ref_count[1]= 1;
3884 return -1;
3887 if(h->slice_type_nos == FF_B_TYPE)
3888 h->list_count= 2;
3889 else
3890 h->list_count= 1;
3891 }else
3892 h->list_count= 0;
3894 if(!default_ref_list_done){
3895 fill_default_ref_list(h);
3898 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3899 return -1;
3901 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3902 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3903 pred_weight_table(h);
3904 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3905 implicit_weight_table(h);
3906 else
3907 h->use_weight = 0;
3909 if(h->nal_ref_idc)
3910 decode_ref_pic_marking(h0, &s->gb);
3912 if(FRAME_MBAFF)
3913 fill_mbaff_ref_list(h);
3915 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3916 tmp = get_ue_golomb(&s->gb);
3917 if(tmp > 2){
3918 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3919 return -1;
3921 h->cabac_init_idc= tmp;
3924 h->last_qscale_diff = 0;
3925 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3926 if(tmp>51){
3927 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3928 return -1;
3930 s->qscale= tmp;
3931 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3932 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3933 //FIXME qscale / qp ... stuff
3934 if(h->slice_type == FF_SP_TYPE){
3935 get_bits1(&s->gb); /* sp_for_switch_flag */
3937 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3938 get_se_golomb(&s->gb); /* slice_qs_delta */
3941 h->deblocking_filter = 1;
3942 h->slice_alpha_c0_offset = 0;
3943 h->slice_beta_offset = 0;
3944 if( h->pps.deblocking_filter_parameters_present ) {
3945 tmp= get_ue_golomb(&s->gb);
3946 if(tmp > 2){
3947 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3948 return -1;
3950 h->deblocking_filter= tmp;
3951 if(h->deblocking_filter < 2)
3952 h->deblocking_filter^= 1; // 1<->0
3954 if( h->deblocking_filter ) {
3955 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3956 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3960 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3961 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3962 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3963 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3964 h->deblocking_filter= 0;
3966 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
3967 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
3968 /* Cheat slightly for speed:
3969 Do not bother to deblock across slices. */
3970 h->deblocking_filter = 2;
3971 } else {
3972 h0->max_contexts = 1;
3973 if(!h0->single_decode_warning) {
3974 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
3975 h0->single_decode_warning = 1;
3977 if(h != h0)
3978 return 1; // deblocking switched inside frame
3982 #if 0 //FMO
3983 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3984 slice_group_change_cycle= get_bits(&s->gb, ?);
3985 #endif
3987 h0->last_slice_type = slice_type;
3988 h->slice_num = ++h0->current_slice;
3990 for(j=0; j<2; j++){
3991 int *ref2frm= h->ref2frm[h->slice_num&15][j];
3992 ref2frm[0]=
3993 ref2frm[1]= -1;
3994 for(i=0; i<16; i++)
3995 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
3996 +(h->ref_list[j][i].reference&3);
3997 ref2frm[18+0]=
3998 ref2frm[18+1]= -1;
3999 for(i=16; i<48; i++)
4000 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4001 +(h->ref_list[j][i].reference&3);
4004 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4005 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4007 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4008 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4009 h->slice_num,
4010 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4011 first_mb_in_slice,
4012 av_get_pict_type_char(h->slice_type),
4013 pps_id, h->frame_num,
4014 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4015 h->ref_count[0], h->ref_count[1],
4016 s->qscale,
4017 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4018 h->use_weight,
4019 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4020 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4024 return 0;
4030 static inline int get_level_prefix(GetBitContext *gb){
4031 unsigned int buf;
4032 int log;
4034 OPEN_READER(re, gb);
4035 UPDATE_CACHE(re, gb);
4036 buf=GET_CACHE(re, gb);
4038 log= 32 - av_log2(buf);
4039 #ifdef TRACE
4040 print_bin(buf>>(32-log), log);
4041 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4042 #endif
4044 LAST_SKIP_BITS(re, gb, log);
4045 CLOSE_READER(re, gb);
4047 return log-1;
4050 static inline int get_dct8x8_allowed(H264Context *h){
4051 int i;
4052 for(i=0; i<4; i++){
4053 if(!IS_SUB_8X8(h->sub_mb_type[i])
4054 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4055 return 0;
4057 return 1;
4061 * decodes a residual block.
4062 * @param n block index
4063 * @param scantable scantable
4064 * @param max_coeff number of coefficients in the block
4065 * @return <0 if an error occurred
4067 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4068 MpegEncContext * const s = &h->s;
4069 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4070 int level[16];
4071 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4073 //FIXME put trailing_onex into the context
4075 if(n == CHROMA_DC_BLOCK_INDEX){
4076 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4077 total_coeff= coeff_token>>2;
4078 }else{
4079 if(n == LUMA_DC_BLOCK_INDEX){
4080 total_coeff= pred_non_zero_count(h, 0);
4081 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4082 total_coeff= coeff_token>>2;
4083 }else{
4084 total_coeff= pred_non_zero_count(h, n);
4085 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4086 total_coeff= coeff_token>>2;
4087 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4091 //FIXME set last_non_zero?
4093 if(total_coeff==0)
4094 return 0;
4095 if(total_coeff > (unsigned)max_coeff) {
4096 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4097 return -1;
4100 trailing_ones= coeff_token&3;
4101 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4102 assert(total_coeff<=16);
4104 for(i=0; i<trailing_ones; i++){
4105 level[i]= 1 - 2*get_bits1(gb);
4108 if(i<total_coeff) {
4109 int level_code, mask;
4110 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4111 int prefix= get_level_prefix(gb);
4113 //first coefficient has suffix_length equal to 0 or 1
4114 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4115 if(suffix_length)
4116 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4117 else
4118 level_code= (prefix<<suffix_length); //part
4119 }else if(prefix==14){
4120 if(suffix_length)
4121 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4122 else
4123 level_code= prefix + get_bits(gb, 4); //part
4124 }else{
4125 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4126 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4127 if(prefix>=16)
4128 level_code += (1<<(prefix-3))-4096;
4131 if(trailing_ones < 3) level_code += 2;
4133 suffix_length = 1;
4134 if(level_code > 5)
4135 suffix_length++;
4136 mask= -(level_code&1);
4137 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4138 i++;
4140 //remaining coefficients have suffix_length > 0
4141 for(;i<total_coeff;i++) {
4142 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4143 prefix = get_level_prefix(gb);
4144 if(prefix<15){
4145 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4146 }else{
4147 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4148 if(prefix>=16)
4149 level_code += (1<<(prefix-3))-4096;
4151 mask= -(level_code&1);
4152 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4153 if(level_code > suffix_limit[suffix_length])
4154 suffix_length++;
4158 if(total_coeff == max_coeff)
4159 zeros_left=0;
4160 else{
4161 if(n == CHROMA_DC_BLOCK_INDEX)
4162 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4163 else
4164 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4167 coeff_num = zeros_left + total_coeff - 1;
4168 j = scantable[coeff_num];
4169 if(n > 24){
4170 block[j] = level[0];
4171 for(i=1;i<total_coeff;i++) {
4172 if(zeros_left <= 0)
4173 run_before = 0;
4174 else if(zeros_left < 7){
4175 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4176 }else{
4177 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4179 zeros_left -= run_before;
4180 coeff_num -= 1 + run_before;
4181 j= scantable[ coeff_num ];
4183 block[j]= level[i];
4185 }else{
4186 block[j] = (level[0] * qmul[j] + 32)>>6;
4187 for(i=1;i<total_coeff;i++) {
4188 if(zeros_left <= 0)
4189 run_before = 0;
4190 else if(zeros_left < 7){
4191 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4192 }else{
4193 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4195 zeros_left -= run_before;
4196 coeff_num -= 1 + run_before;
4197 j= scantable[ coeff_num ];
4199 block[j]= (level[i] * qmul[j] + 32)>>6;
4203 if(zeros_left<0){
4204 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4205 return -1;
4208 return 0;
4211 static void predict_field_decoding_flag(H264Context *h){
4212 MpegEncContext * const s = &h->s;
4213 const int mb_xy= h->mb_xy;
4214 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4215 ? s->current_picture.mb_type[mb_xy-1]
4216 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4217 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4218 : 0;
4219 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4223 * decodes a P_SKIP or B_SKIP macroblock
4225 static void decode_mb_skip(H264Context *h){
4226 MpegEncContext * const s = &h->s;
4227 const int mb_xy= h->mb_xy;
4228 int mb_type=0;
4230 memset(h->non_zero_count[mb_xy], 0, 16);
4231 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4233 if(MB_FIELD)
4234 mb_type|= MB_TYPE_INTERLACED;
4236 if( h->slice_type_nos == FF_B_TYPE )
4238 // just for fill_caches. pred_direct_motion will set the real mb_type
4239 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4241 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4242 pred_direct_motion(h, &mb_type);
4243 mb_type|= MB_TYPE_SKIP;
4245 else
4247 int mx, my;
4248 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4250 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4251 pred_pskip_motion(h, &mx, &my);
4252 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4253 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4256 write_back_motion(h, mb_type);
4257 s->current_picture.mb_type[mb_xy]= mb_type;
4258 s->current_picture.qscale_table[mb_xy]= s->qscale;
4259 h->slice_table[ mb_xy ]= h->slice_num;
4260 h->prev_mb_skipped= 1;
4264 * decodes a macroblock
4265 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4267 static int decode_mb_cavlc(H264Context *h){
4268 MpegEncContext * const s = &h->s;
4269 int mb_xy;
4270 int partition_count;
4271 unsigned int mb_type, cbp;
4272 int dct8x8_allowed= h->pps.transform_8x8_mode;
4274 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4276 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4278 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4279 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4280 down the code */
4281 if(h->slice_type_nos != FF_I_TYPE){
4282 if(s->mb_skip_run==-1)
4283 s->mb_skip_run= get_ue_golomb(&s->gb);
4285 if (s->mb_skip_run--) {
4286 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4287 if(s->mb_skip_run==0)
4288 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4289 else
4290 predict_field_decoding_flag(h);
4292 decode_mb_skip(h);
4293 return 0;
4296 if(FRAME_MBAFF){
4297 if( (s->mb_y&1) == 0 )
4298 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4301 h->prev_mb_skipped= 0;
4303 mb_type= get_ue_golomb(&s->gb);
4304 if(h->slice_type_nos == FF_B_TYPE){
4305 if(mb_type < 23){
4306 partition_count= b_mb_type_info[mb_type].partition_count;
4307 mb_type= b_mb_type_info[mb_type].type;
4308 }else{
4309 mb_type -= 23;
4310 goto decode_intra_mb;
4312 }else if(h->slice_type_nos == FF_P_TYPE){
4313 if(mb_type < 5){
4314 partition_count= p_mb_type_info[mb_type].partition_count;
4315 mb_type= p_mb_type_info[mb_type].type;
4316 }else{
4317 mb_type -= 5;
4318 goto decode_intra_mb;
4320 }else{
4321 assert(h->slice_type_nos == FF_I_TYPE);
4322 if(h->slice_type == FF_SI_TYPE && mb_type)
4323 mb_type--;
4324 decode_intra_mb:
4325 if(mb_type > 25){
4326 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4327 return -1;
4329 partition_count=0;
4330 cbp= i_mb_type_info[mb_type].cbp;
4331 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4332 mb_type= i_mb_type_info[mb_type].type;
4335 if(MB_FIELD)
4336 mb_type |= MB_TYPE_INTERLACED;
4338 h->slice_table[ mb_xy ]= h->slice_num;
4340 if(IS_INTRA_PCM(mb_type)){
4341 unsigned int x;
4343 // We assume these blocks are very rare so we do not optimize it.
4344 align_get_bits(&s->gb);
4346 // The pixels are stored in the same order as levels in h->mb array.
4347 for(x=0; x < (CHROMA ? 384 : 256); x++){
4348 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4351 // In deblocking, the quantizer is 0
4352 s->current_picture.qscale_table[mb_xy]= 0;
4353 // All coeffs are present
4354 memset(h->non_zero_count[mb_xy], 16, 16);
4356 s->current_picture.mb_type[mb_xy]= mb_type;
4357 return 0;
4360 if(MB_MBAFF){
4361 h->ref_count[0] <<= 1;
4362 h->ref_count[1] <<= 1;
4365 fill_caches(h, mb_type, 0);
4367 //mb_pred
4368 if(IS_INTRA(mb_type)){
4369 int pred_mode;
4370 // init_top_left_availability(h);
4371 if(IS_INTRA4x4(mb_type)){
4372 int i;
4373 int di = 1;
4374 if(dct8x8_allowed && get_bits1(&s->gb)){
4375 mb_type |= MB_TYPE_8x8DCT;
4376 di = 4;
4379 // fill_intra4x4_pred_table(h);
4380 for(i=0; i<16; i+=di){
4381 int mode= pred_intra_mode(h, i);
4383 if(!get_bits1(&s->gb)){
4384 const int rem_mode= get_bits(&s->gb, 3);
4385 mode = rem_mode + (rem_mode >= mode);
4388 if(di==4)
4389 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4390 else
4391 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4393 write_back_intra_pred_mode(h);
4394 if( check_intra4x4_pred_mode(h) < 0)
4395 return -1;
4396 }else{
4397 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4398 if(h->intra16x16_pred_mode < 0)
4399 return -1;
4401 if(CHROMA){
4402 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4403 if(pred_mode < 0)
4404 return -1;
4405 h->chroma_pred_mode= pred_mode;
4407 }else if(partition_count==4){
4408 int i, j, sub_partition_count[4], list, ref[2][4];
4410 if(h->slice_type_nos == FF_B_TYPE){
4411 for(i=0; i<4; i++){
4412 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4413 if(h->sub_mb_type[i] >=13){
4414 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4415 return -1;
4417 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4418 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4420 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4421 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4422 pred_direct_motion(h, &mb_type);
4423 h->ref_cache[0][scan8[4]] =
4424 h->ref_cache[1][scan8[4]] =
4425 h->ref_cache[0][scan8[12]] =
4426 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4428 }else{
4429 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4430 for(i=0; i<4; i++){
4431 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4432 if(h->sub_mb_type[i] >=4){
4433 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4434 return -1;
4436 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4437 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4441 for(list=0; list<h->list_count; list++){
4442 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4443 for(i=0; i<4; i++){
4444 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4445 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4446 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4447 if(tmp>=ref_count){
4448 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4449 return -1;
4451 ref[list][i]= tmp;
4452 }else{
4453 //FIXME
4454 ref[list][i] = -1;
4459 if(dct8x8_allowed)
4460 dct8x8_allowed = get_dct8x8_allowed(h);
4462 for(list=0; list<h->list_count; list++){
4463 for(i=0; i<4; i++){
4464 if(IS_DIRECT(h->sub_mb_type[i])) {
4465 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4466 continue;
4468 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4469 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4471 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4472 const int sub_mb_type= h->sub_mb_type[i];
4473 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4474 for(j=0; j<sub_partition_count[i]; j++){
4475 int mx, my;
4476 const int index= 4*i + block_width*j;
4477 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4478 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4479 mx += get_se_golomb(&s->gb);
4480 my += get_se_golomb(&s->gb);
4481 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4483 if(IS_SUB_8X8(sub_mb_type)){
4484 mv_cache[ 1 ][0]=
4485 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4486 mv_cache[ 1 ][1]=
4487 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4488 }else if(IS_SUB_8X4(sub_mb_type)){
4489 mv_cache[ 1 ][0]= mx;
4490 mv_cache[ 1 ][1]= my;
4491 }else if(IS_SUB_4X8(sub_mb_type)){
4492 mv_cache[ 8 ][0]= mx;
4493 mv_cache[ 8 ][1]= my;
4495 mv_cache[ 0 ][0]= mx;
4496 mv_cache[ 0 ][1]= my;
4498 }else{
4499 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4500 p[0] = p[1]=
4501 p[8] = p[9]= 0;
4505 }else if(IS_DIRECT(mb_type)){
4506 pred_direct_motion(h, &mb_type);
4507 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4508 }else{
4509 int list, mx, my, i;
4510 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4511 if(IS_16X16(mb_type)){
4512 for(list=0; list<h->list_count; list++){
4513 unsigned int val;
4514 if(IS_DIR(mb_type, 0, list)){
4515 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4516 if(val >= h->ref_count[list]){
4517 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4518 return -1;
4520 }else
4521 val= LIST_NOT_USED&0xFF;
4522 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4524 for(list=0; list<h->list_count; list++){
4525 unsigned int val;
4526 if(IS_DIR(mb_type, 0, list)){
4527 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4528 mx += get_se_golomb(&s->gb);
4529 my += get_se_golomb(&s->gb);
4530 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4532 val= pack16to32(mx,my);
4533 }else
4534 val=0;
4535 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4538 else if(IS_16X8(mb_type)){
4539 for(list=0; list<h->list_count; list++){
4540 for(i=0; i<2; i++){
4541 unsigned int val;
4542 if(IS_DIR(mb_type, i, list)){
4543 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4544 if(val >= h->ref_count[list]){
4545 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4546 return -1;
4548 }else
4549 val= LIST_NOT_USED&0xFF;
4550 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4553 for(list=0; list<h->list_count; list++){
4554 for(i=0; i<2; i++){
4555 unsigned int val;
4556 if(IS_DIR(mb_type, i, list)){
4557 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4558 mx += get_se_golomb(&s->gb);
4559 my += get_se_golomb(&s->gb);
4560 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4562 val= pack16to32(mx,my);
4563 }else
4564 val=0;
4565 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4568 }else{
4569 assert(IS_8X16(mb_type));
4570 for(list=0; list<h->list_count; list++){
4571 for(i=0; i<2; i++){
4572 unsigned int val;
4573 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4574 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4575 if(val >= h->ref_count[list]){
4576 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4577 return -1;
4579 }else
4580 val= LIST_NOT_USED&0xFF;
4581 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4584 for(list=0; list<h->list_count; list++){
4585 for(i=0; i<2; i++){
4586 unsigned int val;
4587 if(IS_DIR(mb_type, i, list)){
4588 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4589 mx += get_se_golomb(&s->gb);
4590 my += get_se_golomb(&s->gb);
4591 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4593 val= pack16to32(mx,my);
4594 }else
4595 val=0;
4596 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4602 if(IS_INTER(mb_type))
4603 write_back_motion(h, mb_type);
4605 if(!IS_INTRA16x16(mb_type)){
4606 cbp= get_ue_golomb(&s->gb);
4607 if(cbp > 47){
4608 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4609 return -1;
4612 if(CHROMA){
4613 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4614 else cbp= golomb_to_inter_cbp [cbp];
4615 }else{
4616 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4617 else cbp= golomb_to_inter_cbp_gray[cbp];
4620 h->cbp = cbp;
4622 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4623 if(get_bits1(&s->gb)){
4624 mb_type |= MB_TYPE_8x8DCT;
4625 h->cbp_table[mb_xy]= cbp;
4628 s->current_picture.mb_type[mb_xy]= mb_type;
4630 if(cbp || IS_INTRA16x16(mb_type)){
4631 int i8x8, i4x4, chroma_idx;
4632 int dquant;
4633 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4634 const uint8_t *scan, *scan8x8, *dc_scan;
4636 // fill_non_zero_count_cache(h);
4638 if(IS_INTERLACED(mb_type)){
4639 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4640 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4641 dc_scan= luma_dc_field_scan;
4642 }else{
4643 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4644 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4645 dc_scan= luma_dc_zigzag_scan;
4648 dquant= get_se_golomb(&s->gb);
4650 if( dquant > 25 || dquant < -26 ){
4651 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4652 return -1;
4655 s->qscale += dquant;
4656 if(((unsigned)s->qscale) > 51){
4657 if(s->qscale<0) s->qscale+= 52;
4658 else s->qscale-= 52;
4661 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4662 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4663 if(IS_INTRA16x16(mb_type)){
4664 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4665 return -1; //FIXME continue if partitioned and other return -1 too
4668 assert((cbp&15) == 0 || (cbp&15) == 15);
4670 if(cbp&15){
4671 for(i8x8=0; i8x8<4; i8x8++){
4672 for(i4x4=0; i4x4<4; i4x4++){
4673 const int index= i4x4 + 4*i8x8;
4674 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4675 return -1;
4679 }else{
4680 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4682 }else{
4683 for(i8x8=0; i8x8<4; i8x8++){
4684 if(cbp & (1<<i8x8)){
4685 if(IS_8x8DCT(mb_type)){
4686 DCTELEM *buf = &h->mb[64*i8x8];
4687 uint8_t *nnz;
4688 for(i4x4=0; i4x4<4; i4x4++){
4689 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4690 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4691 return -1;
4693 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4694 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4695 }else{
4696 for(i4x4=0; i4x4<4; i4x4++){
4697 const int index= i4x4 + 4*i8x8;
4699 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4700 return -1;
4704 }else{
4705 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4706 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4711 if(cbp&0x30){
4712 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4713 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4714 return -1;
4718 if(cbp&0x20){
4719 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4720 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4721 for(i4x4=0; i4x4<4; i4x4++){
4722 const int index= 16 + 4*chroma_idx + i4x4;
4723 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4724 return -1;
4728 }else{
4729 uint8_t * const nnz= &h->non_zero_count_cache[0];
4730 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4731 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4733 }else{
4734 uint8_t * const nnz= &h->non_zero_count_cache[0];
4735 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4736 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4737 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4739 s->current_picture.qscale_table[mb_xy]= s->qscale;
4740 write_back_non_zero_count(h);
4742 if(MB_MBAFF){
4743 h->ref_count[0] >>= 1;
4744 h->ref_count[1] >>= 1;
4747 return 0;
4750 static int decode_cabac_field_decoding_flag(H264Context *h) {
4751 MpegEncContext * const s = &h->s;
4752 const int mb_x = s->mb_x;
4753 const int mb_y = s->mb_y & ~1;
4754 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4755 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4757 unsigned int ctx = 0;
4759 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4760 ctx += 1;
4762 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4763 ctx += 1;
4766 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4769 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4770 uint8_t *state= &h->cabac_state[ctx_base];
4771 int mb_type;
4773 if(intra_slice){
4774 MpegEncContext * const s = &h->s;
4775 const int mba_xy = h->left_mb_xy[0];
4776 const int mbb_xy = h->top_mb_xy;
4777 int ctx=0;
4778 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4779 ctx++;
4780 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4781 ctx++;
4782 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4783 return 0; /* I4x4 */
4784 state += 2;
4785 }else{
4786 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4787 return 0; /* I4x4 */
4790 if( get_cabac_terminate( &h->cabac ) )
4791 return 25; /* PCM */
4793 mb_type = 1; /* I16x16 */
4794 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4795 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4796 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4797 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4798 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4799 return mb_type;
4802 static int decode_cabac_mb_type( H264Context *h ) {
4803 MpegEncContext * const s = &h->s;
4805 if( h->slice_type_nos == FF_I_TYPE ) {
4806 return decode_cabac_intra_mb_type(h, 3, 1);
4807 } else if( h->slice_type_nos == FF_P_TYPE ) {
4808 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4809 /* P-type */
4810 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4811 /* P_L0_D16x16, P_8x8 */
4812 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4813 } else {
4814 /* P_L0_D8x16, P_L0_D16x8 */
4815 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4817 } else {
4818 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4820 } else if( h->slice_type_nos == FF_B_TYPE ) {
4821 const int mba_xy = h->left_mb_xy[0];
4822 const int mbb_xy = h->top_mb_xy;
4823 int ctx = 0;
4824 int bits;
4826 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4827 ctx++;
4828 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4829 ctx++;
4831 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4832 return 0; /* B_Direct_16x16 */
4834 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4835 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4838 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4839 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4840 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4841 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4842 if( bits < 8 )
4843 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4844 else if( bits == 13 ) {
4845 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4846 } else if( bits == 14 )
4847 return 11; /* B_L1_L0_8x16 */
4848 else if( bits == 15 )
4849 return 22; /* B_8x8 */
4851 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4852 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4853 } else {
4854 /* TODO SI/SP frames? */
4855 return -1;
4859 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4860 MpegEncContext * const s = &h->s;
4861 int mba_xy, mbb_xy;
4862 int ctx = 0;
4864 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4865 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4866 mba_xy = mb_xy - 1;
4867 if( (mb_y&1)
4868 && h->slice_table[mba_xy] == h->slice_num
4869 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4870 mba_xy += s->mb_stride;
4871 if( MB_FIELD ){
4872 mbb_xy = mb_xy - s->mb_stride;
4873 if( !(mb_y&1)
4874 && h->slice_table[mbb_xy] == h->slice_num
4875 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4876 mbb_xy -= s->mb_stride;
4877 }else
4878 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4879 }else{
4880 int mb_xy = h->mb_xy;
4881 mba_xy = mb_xy - 1;
4882 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4885 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4886 ctx++;
4887 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4888 ctx++;
4890 if( h->slice_type_nos == FF_B_TYPE )
4891 ctx += 13;
4892 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4895 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4896 int mode = 0;
4898 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4899 return pred_mode;
4901 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4902 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4903 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4905 if( mode >= pred_mode )
4906 return mode + 1;
4907 else
4908 return mode;
4911 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4912 const int mba_xy = h->left_mb_xy[0];
4913 const int mbb_xy = h->top_mb_xy;
4915 int ctx = 0;
4917 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4918 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4919 ctx++;
4921 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4922 ctx++;
4924 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4925 return 0;
4927 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4928 return 1;
4929 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4930 return 2;
4931 else
4932 return 3;
4935 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4936 int cbp_b, cbp_a, ctx, cbp = 0;
4938 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4939 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4941 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4942 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4943 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4944 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4945 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4946 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4947 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4948 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4949 return cbp;
4951 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4952 int ctx;
4953 int cbp_a, cbp_b;
4955 cbp_a = (h->left_cbp>>4)&0x03;
4956 cbp_b = (h-> top_cbp>>4)&0x03;
4958 ctx = 0;
4959 if( cbp_a > 0 ) ctx++;
4960 if( cbp_b > 0 ) ctx += 2;
4961 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4962 return 0;
4964 ctx = 4;
4965 if( cbp_a == 2 ) ctx++;
4966 if( cbp_b == 2 ) ctx += 2;
4967 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
4969 static int decode_cabac_mb_dqp( H264Context *h) {
4970 int ctx = 0;
4971 int val = 0;
4973 if( h->last_qscale_diff != 0 )
4974 ctx++;
4976 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
4977 if( ctx < 2 )
4978 ctx = 2;
4979 else
4980 ctx = 3;
4981 val++;
4982 if(val > 102) //prevent infinite loop
4983 return INT_MIN;
4986 if( val&0x01 )
4987 return (val + 1)/2;
4988 else
4989 return -(val + 1)/2;
4991 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
4992 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
4993 return 0; /* 8x8 */
4994 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
4995 return 1; /* 8x4 */
4996 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
4997 return 2; /* 4x8 */
4998 return 3; /* 4x4 */
5000 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5001 int type;
5002 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5003 return 0; /* B_Direct_8x8 */
5004 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5005 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5006 type = 3;
5007 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5008 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5009 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5010 type += 4;
5012 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5013 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5014 return type;
5017 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5018 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5021 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5022 int refa = h->ref_cache[list][scan8[n] - 1];
5023 int refb = h->ref_cache[list][scan8[n] - 8];
5024 int ref = 0;
5025 int ctx = 0;
5027 if( h->slice_type_nos == FF_B_TYPE) {
5028 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5029 ctx++;
5030 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5031 ctx += 2;
5032 } else {
5033 if( refa > 0 )
5034 ctx++;
5035 if( refb > 0 )
5036 ctx += 2;
5039 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5040 ref++;
5041 if( ctx < 4 )
5042 ctx = 4;
5043 else
5044 ctx = 5;
5045 if(ref >= 32 /*h->ref_list[list]*/){
5046 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5047 return 0; //FIXME we should return -1 and check the return everywhere
5050 return ref;
5053 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5054 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5055 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5056 int ctxbase = (l == 0) ? 40 : 47;
5057 int ctx, mvd;
5059 if( amvd < 3 )
5060 ctx = 0;
5061 else if( amvd > 32 )
5062 ctx = 2;
5063 else
5064 ctx = 1;
5066 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5067 return 0;
5069 mvd= 1;
5070 ctx= 3;
5071 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5072 mvd++;
5073 if( ctx < 6 )
5074 ctx++;
5077 if( mvd >= 9 ) {
5078 int k = 3;
5079 while( get_cabac_bypass( &h->cabac ) ) {
5080 mvd += 1 << k;
5081 k++;
5082 if(k>24){
5083 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5084 return INT_MIN;
5087 while( k-- ) {
5088 if( get_cabac_bypass( &h->cabac ) )
5089 mvd += 1 << k;
5092 return get_cabac_bypass_sign( &h->cabac, -mvd );
5095 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5096 int nza, nzb;
5097 int ctx = 0;
5099 if( is_dc ) {
5100 if( cat == 0 ) {
5101 nza = h->left_cbp&0x100;
5102 nzb = h-> top_cbp&0x100;
5103 } else {
5104 nza = (h->left_cbp>>(6+idx))&0x01;
5105 nzb = (h-> top_cbp>>(6+idx))&0x01;
5107 } else {
5108 if( cat == 4 ) {
5109 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5110 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5111 } else {
5112 assert(cat == 1 || cat == 2);
5113 nza = h->non_zero_count_cache[scan8[idx] - 1];
5114 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5118 if( nza > 0 )
5119 ctx++;
5121 if( nzb > 0 )
5122 ctx += 2;
5124 return ctx + 4 * cat;
5127 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5128 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5129 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5130 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5131 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5134 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5135 static const int significant_coeff_flag_offset[2][6] = {
5136 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5137 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5139 static const int last_coeff_flag_offset[2][6] = {
5140 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5141 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5143 static const int coeff_abs_level_m1_offset[6] = {
5144 227+0, 227+10, 227+20, 227+30, 227+39, 426
5146 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5147 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5148 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5149 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5150 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5151 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5152 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5153 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5154 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5156 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5157 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5158 * map node ctx => cabac ctx for level=1 */
5159 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5160 /* map node ctx => cabac ctx for level>1 */
5161 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5162 static const uint8_t coeff_abs_level_transition[2][8] = {
5163 /* update node ctx after decoding a level=1 */
5164 { 1, 2, 3, 3, 4, 5, 6, 7 },
5165 /* update node ctx after decoding a level>1 */
5166 { 4, 4, 4, 4, 5, 6, 7, 7 }
5169 int index[64];
5171 int av_unused last;
5172 int coeff_count = 0;
5173 int node_ctx = 0;
5175 uint8_t *significant_coeff_ctx_base;
5176 uint8_t *last_coeff_ctx_base;
5177 uint8_t *abs_level_m1_ctx_base;
5179 #ifndef ARCH_X86
5180 #define CABAC_ON_STACK
5181 #endif
5182 #ifdef CABAC_ON_STACK
5183 #define CC &cc
5184 CABACContext cc;
5185 cc.range = h->cabac.range;
5186 cc.low = h->cabac.low;
5187 cc.bytestream= h->cabac.bytestream;
5188 #else
5189 #define CC &h->cabac
5190 #endif
5193 /* cat: 0-> DC 16x16 n = 0
5194 * 1-> AC 16x16 n = luma4x4idx
5195 * 2-> Luma4x4 n = luma4x4idx
5196 * 3-> DC Chroma n = iCbCr
5197 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5198 * 5-> Luma8x8 n = 4 * luma8x8idx
5201 /* read coded block flag */
5202 if( is_dc || cat != 5 ) {
5203 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5204 if( !is_dc ) {
5205 if( cat == 4 )
5206 h->non_zero_count_cache[scan8[16+n]] = 0;
5207 else
5208 h->non_zero_count_cache[scan8[n]] = 0;
5211 #ifdef CABAC_ON_STACK
5212 h->cabac.range = cc.range ;
5213 h->cabac.low = cc.low ;
5214 h->cabac.bytestream= cc.bytestream;
5215 #endif
5216 return;
5220 significant_coeff_ctx_base = h->cabac_state
5221 + significant_coeff_flag_offset[MB_FIELD][cat];
5222 last_coeff_ctx_base = h->cabac_state
5223 + last_coeff_flag_offset[MB_FIELD][cat];
5224 abs_level_m1_ctx_base = h->cabac_state
5225 + coeff_abs_level_m1_offset[cat];
5227 if( !is_dc && cat == 5 ) {
5228 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5229 for(last= 0; last < coefs; last++) { \
5230 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5231 if( get_cabac( CC, sig_ctx )) { \
5232 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5233 index[coeff_count++] = last; \
5234 if( get_cabac( CC, last_ctx ) ) { \
5235 last= max_coeff; \
5236 break; \
5240 if( last == max_coeff -1 ) {\
5241 index[coeff_count++] = last;\
5243 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5244 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5245 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5246 } else {
5247 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5248 #else
5249 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5250 } else {
5251 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5252 #endif
5254 assert(coeff_count > 0);
5256 if( is_dc ) {
5257 if( cat == 0 )
5258 h->cbp_table[h->mb_xy] |= 0x100;
5259 else
5260 h->cbp_table[h->mb_xy] |= 0x40 << n;
5261 } else {
5262 if( cat == 5 )
5263 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5264 else if( cat == 4 )
5265 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5266 else {
5267 assert( cat == 1 || cat == 2 );
5268 h->non_zero_count_cache[scan8[n]] = coeff_count;
5272 do {
5273 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5275 int j= scantable[index[--coeff_count]];
5277 if( get_cabac( CC, ctx ) == 0 ) {
5278 node_ctx = coeff_abs_level_transition[0][node_ctx];
5279 if( is_dc ) {
5280 block[j] = get_cabac_bypass_sign( CC, -1);
5281 }else{
5282 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5284 } else {
5285 int coeff_abs = 2;
5286 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5287 node_ctx = coeff_abs_level_transition[1][node_ctx];
5289 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5290 coeff_abs++;
5293 if( coeff_abs >= 15 ) {
5294 int j = 0;
5295 while( get_cabac_bypass( CC ) ) {
5296 j++;
5299 coeff_abs=1;
5300 while( j-- ) {
5301 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5303 coeff_abs+= 14;
5306 if( is_dc ) {
5307 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5308 }else{
5309 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5312 } while( coeff_count );
5313 #ifdef CABAC_ON_STACK
5314 h->cabac.range = cc.range ;
5315 h->cabac.low = cc.low ;
5316 h->cabac.bytestream= cc.bytestream;
5317 #endif
5321 #ifndef CONFIG_SMALL
5322 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5323 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5326 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5327 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5329 #endif
5331 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5332 #ifdef CONFIG_SMALL
5333 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5334 #else
5335 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5336 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5337 #endif
5340 static inline void compute_mb_neighbors(H264Context *h)
5342 MpegEncContext * const s = &h->s;
5343 const int mb_xy = h->mb_xy;
5344 h->top_mb_xy = mb_xy - s->mb_stride;
5345 h->left_mb_xy[0] = mb_xy - 1;
5346 if(FRAME_MBAFF){
5347 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5348 const int top_pair_xy = pair_xy - s->mb_stride;
5349 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5350 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5351 const int curr_mb_frame_flag = !MB_FIELD;
5352 const int bottom = (s->mb_y & 1);
5353 if (bottom
5354 ? !curr_mb_frame_flag // bottom macroblock
5355 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5357 h->top_mb_xy -= s->mb_stride;
5359 if (left_mb_frame_flag != curr_mb_frame_flag) {
5360 h->left_mb_xy[0] = pair_xy - 1;
5362 } else if (FIELD_PICTURE) {
5363 h->top_mb_xy -= s->mb_stride;
5365 return;
5369 * decodes a macroblock
5370 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5372 static int decode_mb_cabac(H264Context *h) {
5373 MpegEncContext * const s = &h->s;
5374 int mb_xy;
5375 int mb_type, partition_count, cbp = 0;
5376 int dct8x8_allowed= h->pps.transform_8x8_mode;
5378 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5380 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5382 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5383 if( h->slice_type_nos != FF_I_TYPE ) {
5384 int skip;
5385 /* a skipped mb needs the aff flag from the following mb */
5386 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5387 predict_field_decoding_flag(h);
5388 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5389 skip = h->next_mb_skipped;
5390 else
5391 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5392 /* read skip flags */
5393 if( skip ) {
5394 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5395 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5396 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5397 if(h->next_mb_skipped)
5398 predict_field_decoding_flag(h);
5399 else
5400 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5403 decode_mb_skip(h);
5405 h->cbp_table[mb_xy] = 0;
5406 h->chroma_pred_mode_table[mb_xy] = 0;
5407 h->last_qscale_diff = 0;
5409 return 0;
5413 if(FRAME_MBAFF){
5414 if( (s->mb_y&1) == 0 )
5415 h->mb_mbaff =
5416 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5419 h->prev_mb_skipped = 0;
5421 compute_mb_neighbors(h);
5422 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5423 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5424 return -1;
5427 if( h->slice_type_nos == FF_B_TYPE ) {
5428 if( mb_type < 23 ){
5429 partition_count= b_mb_type_info[mb_type].partition_count;
5430 mb_type= b_mb_type_info[mb_type].type;
5431 }else{
5432 mb_type -= 23;
5433 goto decode_intra_mb;
5435 } else if( h->slice_type_nos == FF_P_TYPE ) {
5436 if( mb_type < 5) {
5437 partition_count= p_mb_type_info[mb_type].partition_count;
5438 mb_type= p_mb_type_info[mb_type].type;
5439 } else {
5440 mb_type -= 5;
5441 goto decode_intra_mb;
5443 } else {
5444 if(h->slice_type == FF_SI_TYPE && mb_type)
5445 mb_type--;
5446 assert(h->slice_type_nos == FF_I_TYPE);
5447 decode_intra_mb:
5448 partition_count = 0;
5449 cbp= i_mb_type_info[mb_type].cbp;
5450 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5451 mb_type= i_mb_type_info[mb_type].type;
5453 if(MB_FIELD)
5454 mb_type |= MB_TYPE_INTERLACED;
5456 h->slice_table[ mb_xy ]= h->slice_num;
5458 if(IS_INTRA_PCM(mb_type)) {
5459 const uint8_t *ptr;
5461 // We assume these blocks are very rare so we do not optimize it.
5462 // FIXME The two following lines get the bitstream position in the cabac
5463 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5464 ptr= h->cabac.bytestream;
5465 if(h->cabac.low&0x1) ptr--;
5466 if(CABAC_BITS==16){
5467 if(h->cabac.low&0x1FF) ptr--;
5470 // The pixels are stored in the same order as levels in h->mb array.
5471 memcpy(h->mb, ptr, 256); ptr+=256;
5472 if(CHROMA){
5473 memcpy(h->mb+128, ptr, 128); ptr+=128;
5476 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5478 // All blocks are present
5479 h->cbp_table[mb_xy] = 0x1ef;
5480 h->chroma_pred_mode_table[mb_xy] = 0;
5481 // In deblocking, the quantizer is 0
5482 s->current_picture.qscale_table[mb_xy]= 0;
5483 // All coeffs are present
5484 memset(h->non_zero_count[mb_xy], 16, 16);
5485 s->current_picture.mb_type[mb_xy]= mb_type;
5486 h->last_qscale_diff = 0;
5487 return 0;
5490 if(MB_MBAFF){
5491 h->ref_count[0] <<= 1;
5492 h->ref_count[1] <<= 1;
5495 fill_caches(h, mb_type, 0);
5497 if( IS_INTRA( mb_type ) ) {
5498 int i, pred_mode;
5499 if( IS_INTRA4x4( mb_type ) ) {
5500 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5501 mb_type |= MB_TYPE_8x8DCT;
5502 for( i = 0; i < 16; i+=4 ) {
5503 int pred = pred_intra_mode( h, i );
5504 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5505 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5507 } else {
5508 for( i = 0; i < 16; i++ ) {
5509 int pred = pred_intra_mode( h, i );
5510 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5512 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5515 write_back_intra_pred_mode(h);
5516 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5517 } else {
5518 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5519 if( h->intra16x16_pred_mode < 0 ) return -1;
5521 if(CHROMA){
5522 h->chroma_pred_mode_table[mb_xy] =
5523 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5525 pred_mode= check_intra_pred_mode( h, pred_mode );
5526 if( pred_mode < 0 ) return -1;
5527 h->chroma_pred_mode= pred_mode;
5529 } else if( partition_count == 4 ) {
5530 int i, j, sub_partition_count[4], list, ref[2][4];
5532 if( h->slice_type_nos == FF_B_TYPE ) {
5533 for( i = 0; i < 4; i++ ) {
5534 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5535 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5536 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5538 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5539 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5540 pred_direct_motion(h, &mb_type);
5541 h->ref_cache[0][scan8[4]] =
5542 h->ref_cache[1][scan8[4]] =
5543 h->ref_cache[0][scan8[12]] =
5544 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5545 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5546 for( i = 0; i < 4; i++ )
5547 if( IS_DIRECT(h->sub_mb_type[i]) )
5548 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5551 } else {
5552 for( i = 0; i < 4; i++ ) {
5553 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5554 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5555 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5559 for( list = 0; list < h->list_count; list++ ) {
5560 for( i = 0; i < 4; i++ ) {
5561 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5562 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5563 if( h->ref_count[list] > 1 )
5564 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5565 else
5566 ref[list][i] = 0;
5567 } else {
5568 ref[list][i] = -1;
5570 h->ref_cache[list][ scan8[4*i]+1 ]=
5571 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5575 if(dct8x8_allowed)
5576 dct8x8_allowed = get_dct8x8_allowed(h);
5578 for(list=0; list<h->list_count; list++){
5579 for(i=0; i<4; i++){
5580 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5581 if(IS_DIRECT(h->sub_mb_type[i])){
5582 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5583 continue;
5586 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5587 const int sub_mb_type= h->sub_mb_type[i];
5588 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5589 for(j=0; j<sub_partition_count[i]; j++){
5590 int mpx, mpy;
5591 int mx, my;
5592 const int index= 4*i + block_width*j;
5593 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5594 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5595 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5597 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5598 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5599 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5601 if(IS_SUB_8X8(sub_mb_type)){
5602 mv_cache[ 1 ][0]=
5603 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5604 mv_cache[ 1 ][1]=
5605 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5607 mvd_cache[ 1 ][0]=
5608 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5609 mvd_cache[ 1 ][1]=
5610 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5611 }else if(IS_SUB_8X4(sub_mb_type)){
5612 mv_cache[ 1 ][0]= mx;
5613 mv_cache[ 1 ][1]= my;
5615 mvd_cache[ 1 ][0]= mx - mpx;
5616 mvd_cache[ 1 ][1]= my - mpy;
5617 }else if(IS_SUB_4X8(sub_mb_type)){
5618 mv_cache[ 8 ][0]= mx;
5619 mv_cache[ 8 ][1]= my;
5621 mvd_cache[ 8 ][0]= mx - mpx;
5622 mvd_cache[ 8 ][1]= my - mpy;
5624 mv_cache[ 0 ][0]= mx;
5625 mv_cache[ 0 ][1]= my;
5627 mvd_cache[ 0 ][0]= mx - mpx;
5628 mvd_cache[ 0 ][1]= my - mpy;
5630 }else{
5631 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5632 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5633 p[0] = p[1] = p[8] = p[9] = 0;
5634 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5638 } else if( IS_DIRECT(mb_type) ) {
5639 pred_direct_motion(h, &mb_type);
5640 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5641 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5642 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5643 } else {
5644 int list, mx, my, i, mpx, mpy;
5645 if(IS_16X16(mb_type)){
5646 for(list=0; list<h->list_count; list++){
5647 if(IS_DIR(mb_type, 0, list)){
5648 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5649 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5650 }else
5651 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5653 for(list=0; list<h->list_count; list++){
5654 if(IS_DIR(mb_type, 0, list)){
5655 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5657 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5658 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5659 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5661 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5662 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5663 }else
5664 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5667 else if(IS_16X8(mb_type)){
5668 for(list=0; list<h->list_count; list++){
5669 for(i=0; i<2; i++){
5670 if(IS_DIR(mb_type, i, list)){
5671 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5672 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5673 }else
5674 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5677 for(list=0; list<h->list_count; list++){
5678 for(i=0; i<2; i++){
5679 if(IS_DIR(mb_type, i, list)){
5680 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5681 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5682 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5683 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5685 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5686 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5687 }else{
5688 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5689 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5693 }else{
5694 assert(IS_8X16(mb_type));
5695 for(list=0; list<h->list_count; list++){
5696 for(i=0; i<2; i++){
5697 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5698 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5699 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5700 }else
5701 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5704 for(list=0; list<h->list_count; list++){
5705 for(i=0; i<2; i++){
5706 if(IS_DIR(mb_type, i, list)){
5707 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5708 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5709 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5711 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5712 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5713 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5714 }else{
5715 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5716 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5723 if( IS_INTER( mb_type ) ) {
5724 h->chroma_pred_mode_table[mb_xy] = 0;
5725 write_back_motion( h, mb_type );
5728 if( !IS_INTRA16x16( mb_type ) ) {
5729 cbp = decode_cabac_mb_cbp_luma( h );
5730 if(CHROMA)
5731 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5734 h->cbp_table[mb_xy] = h->cbp = cbp;
5736 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5737 if( decode_cabac_mb_transform_size( h ) )
5738 mb_type |= MB_TYPE_8x8DCT;
5740 s->current_picture.mb_type[mb_xy]= mb_type;
5742 if( cbp || IS_INTRA16x16( mb_type ) ) {
5743 const uint8_t *scan, *scan8x8, *dc_scan;
5744 const uint32_t *qmul;
5745 int dqp;
5747 if(IS_INTERLACED(mb_type)){
5748 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5749 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5750 dc_scan= luma_dc_field_scan;
5751 }else{
5752 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5753 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5754 dc_scan= luma_dc_zigzag_scan;
5757 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5758 if( dqp == INT_MIN ){
5759 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5760 return -1;
5762 s->qscale += dqp;
5763 if(((unsigned)s->qscale) > 51){
5764 if(s->qscale<0) s->qscale+= 52;
5765 else s->qscale-= 52;
5767 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5768 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5770 if( IS_INTRA16x16( mb_type ) ) {
5771 int i;
5772 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5773 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5775 if( cbp&15 ) {
5776 qmul = h->dequant4_coeff[0][s->qscale];
5777 for( i = 0; i < 16; i++ ) {
5778 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5779 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5781 } else {
5782 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5784 } else {
5785 int i8x8, i4x4;
5786 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5787 if( cbp & (1<<i8x8) ) {
5788 if( IS_8x8DCT(mb_type) ) {
5789 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5790 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5791 } else {
5792 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5793 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5794 const int index = 4*i8x8 + i4x4;
5795 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5796 //START_TIMER
5797 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5798 //STOP_TIMER("decode_residual")
5801 } else {
5802 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5803 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5808 if( cbp&0x30 ){
5809 int c;
5810 for( c = 0; c < 2; c++ ) {
5811 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5812 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5816 if( cbp&0x20 ) {
5817 int c, i;
5818 for( c = 0; c < 2; c++ ) {
5819 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5820 for( i = 0; i < 4; i++ ) {
5821 const int index = 16 + 4 * c + i;
5822 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5823 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5826 } else {
5827 uint8_t * const nnz= &h->non_zero_count_cache[0];
5828 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5829 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5831 } else {
5832 uint8_t * const nnz= &h->non_zero_count_cache[0];
5833 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5834 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5835 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5836 h->last_qscale_diff = 0;
5839 s->current_picture.qscale_table[mb_xy]= s->qscale;
5840 write_back_non_zero_count(h);
5842 if(MB_MBAFF){
5843 h->ref_count[0] >>= 1;
5844 h->ref_count[1] >>= 1;
5847 return 0;
5851 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5852 int i, d;
5853 const int index_a = qp + h->slice_alpha_c0_offset;
5854 const int alpha = (alpha_table+52)[index_a];
5855 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5857 if( bS[0] < 4 ) {
5858 int8_t tc[4];
5859 for(i=0; i<4; i++)
5860 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5861 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5862 } else {
5863 /* 16px edge length, because bS=4 is triggered by being at
5864 * the edge of an intra MB, so all 4 bS are the same */
5865 for( d = 0; d < 16; d++ ) {
5866 const int p0 = pix[-1];
5867 const int p1 = pix[-2];
5868 const int p2 = pix[-3];
5870 const int q0 = pix[0];
5871 const int q1 = pix[1];
5872 const int q2 = pix[2];
5874 if( FFABS( p0 - q0 ) < alpha &&
5875 FFABS( p1 - p0 ) < beta &&
5876 FFABS( q1 - q0 ) < beta ) {
5878 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5879 if( FFABS( p2 - p0 ) < beta)
5881 const int p3 = pix[-4];
5882 /* p0', p1', p2' */
5883 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5884 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5885 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5886 } else {
5887 /* p0' */
5888 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5890 if( FFABS( q2 - q0 ) < beta)
5892 const int q3 = pix[3];
5893 /* q0', q1', q2' */
5894 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5895 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5896 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5897 } else {
5898 /* q0' */
5899 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5901 }else{
5902 /* p0', q0' */
5903 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5904 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5906 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5908 pix += stride;
5912 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5913 int i;
5914 const int index_a = qp + h->slice_alpha_c0_offset;
5915 const int alpha = (alpha_table+52)[index_a];
5916 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5918 if( bS[0] < 4 ) {
5919 int8_t tc[4];
5920 for(i=0; i<4; i++)
5921 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5922 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5923 } else {
5924 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5928 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5929 int i;
5930 for( i = 0; i < 16; i++, pix += stride) {
5931 int index_a;
5932 int alpha;
5933 int beta;
5935 int qp_index;
5936 int bS_index = (i >> 1);
5937 if (!MB_FIELD) {
5938 bS_index &= ~1;
5939 bS_index |= (i & 1);
5942 if( bS[bS_index] == 0 ) {
5943 continue;
5946 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5947 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5948 alpha = (alpha_table+52)[index_a];
5949 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5951 if( bS[bS_index] < 4 ) {
5952 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5953 const int p0 = pix[-1];
5954 const int p1 = pix[-2];
5955 const int p2 = pix[-3];
5956 const int q0 = pix[0];
5957 const int q1 = pix[1];
5958 const int q2 = pix[2];
5960 if( FFABS( p0 - q0 ) < alpha &&
5961 FFABS( p1 - p0 ) < beta &&
5962 FFABS( q1 - q0 ) < beta ) {
5963 int tc = tc0;
5964 int i_delta;
5966 if( FFABS( p2 - p0 ) < beta ) {
5967 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
5968 tc++;
5970 if( FFABS( q2 - q0 ) < beta ) {
5971 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
5972 tc++;
5975 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
5976 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
5977 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
5978 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
5980 }else{
5981 const int p0 = pix[-1];
5982 const int p1 = pix[-2];
5983 const int p2 = pix[-3];
5985 const int q0 = pix[0];
5986 const int q1 = pix[1];
5987 const int q2 = pix[2];
5989 if( FFABS( p0 - q0 ) < alpha &&
5990 FFABS( p1 - p0 ) < beta &&
5991 FFABS( q1 - q0 ) < beta ) {
5993 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5994 if( FFABS( p2 - p0 ) < beta)
5996 const int p3 = pix[-4];
5997 /* p0', p1', p2' */
5998 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5999 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6000 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6001 } else {
6002 /* p0' */
6003 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6005 if( FFABS( q2 - q0 ) < beta)
6007 const int q3 = pix[3];
6008 /* q0', q1', q2' */
6009 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6010 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6011 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6012 } else {
6013 /* q0' */
6014 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6016 }else{
6017 /* p0', q0' */
6018 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6019 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6021 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6026 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6027 int i;
6028 for( i = 0; i < 8; i++, pix += stride) {
6029 int index_a;
6030 int alpha;
6031 int beta;
6033 int qp_index;
6034 int bS_index = i;
6036 if( bS[bS_index] == 0 ) {
6037 continue;
6040 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6041 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6042 alpha = (alpha_table+52)[index_a];
6043 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6045 if( bS[bS_index] < 4 ) {
6046 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6047 const int p0 = pix[-1];
6048 const int p1 = pix[-2];
6049 const int q0 = pix[0];
6050 const int q1 = pix[1];
6052 if( FFABS( p0 - q0 ) < alpha &&
6053 FFABS( p1 - p0 ) < beta &&
6054 FFABS( q1 - q0 ) < beta ) {
6055 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6057 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6058 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6059 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6061 }else{
6062 const int p0 = pix[-1];
6063 const int p1 = pix[-2];
6064 const int q0 = pix[0];
6065 const int q1 = pix[1];
6067 if( FFABS( p0 - q0 ) < alpha &&
6068 FFABS( p1 - p0 ) < beta &&
6069 FFABS( q1 - q0 ) < beta ) {
6071 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6072 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6073 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6079 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6080 int i, d;
6081 const int index_a = qp + h->slice_alpha_c0_offset;
6082 const int alpha = (alpha_table+52)[index_a];
6083 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6084 const int pix_next = stride;
6086 if( bS[0] < 4 ) {
6087 int8_t tc[4];
6088 for(i=0; i<4; i++)
6089 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6090 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6091 } else {
6092 /* 16px edge length, see filter_mb_edgev */
6093 for( d = 0; d < 16; d++ ) {
6094 const int p0 = pix[-1*pix_next];
6095 const int p1 = pix[-2*pix_next];
6096 const int p2 = pix[-3*pix_next];
6097 const int q0 = pix[0];
6098 const int q1 = pix[1*pix_next];
6099 const int q2 = pix[2*pix_next];
6101 if( FFABS( p0 - q0 ) < alpha &&
6102 FFABS( p1 - p0 ) < beta &&
6103 FFABS( q1 - q0 ) < beta ) {
6105 const int p3 = pix[-4*pix_next];
6106 const int q3 = pix[ 3*pix_next];
6108 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6109 if( FFABS( p2 - p0 ) < beta) {
6110 /* p0', p1', p2' */
6111 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6112 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6113 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6114 } else {
6115 /* p0' */
6116 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6118 if( FFABS( q2 - q0 ) < beta) {
6119 /* q0', q1', q2' */
6120 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6121 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6122 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6123 } else {
6124 /* q0' */
6125 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6127 }else{
6128 /* p0', q0' */
6129 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6130 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6132 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6134 pix++;
6139 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6140 int i;
6141 const int index_a = qp + h->slice_alpha_c0_offset;
6142 const int alpha = (alpha_table+52)[index_a];
6143 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6145 if( bS[0] < 4 ) {
6146 int8_t tc[4];
6147 for(i=0; i<4; i++)
6148 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6149 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6150 } else {
6151 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6155 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6156 MpegEncContext * const s = &h->s;
6157 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6158 int mb_xy, mb_type;
6159 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6161 mb_xy = h->mb_xy;
6163 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6164 1 ||
6165 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6166 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6167 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6168 return;
6170 assert(!FRAME_MBAFF);
6172 mb_type = s->current_picture.mb_type[mb_xy];
6173 qp = s->current_picture.qscale_table[mb_xy];
6174 qp0 = s->current_picture.qscale_table[mb_xy-1];
6175 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6176 qpc = get_chroma_qp( h, 0, qp );
6177 qpc0 = get_chroma_qp( h, 0, qp0 );
6178 qpc1 = get_chroma_qp( h, 0, qp1 );
6179 qp0 = (qp + qp0 + 1) >> 1;
6180 qp1 = (qp + qp1 + 1) >> 1;
6181 qpc0 = (qpc + qpc0 + 1) >> 1;
6182 qpc1 = (qpc + qpc1 + 1) >> 1;
6183 qp_thresh = 15 - h->slice_alpha_c0_offset;
6184 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6185 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6186 return;
6188 if( IS_INTRA(mb_type) ) {
6189 int16_t bS4[4] = {4,4,4,4};
6190 int16_t bS3[4] = {3,3,3,3};
6191 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6192 if( IS_8x8DCT(mb_type) ) {
6193 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6194 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6195 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6196 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6197 } else {
6198 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6199 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6200 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6201 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6202 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6203 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6204 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6205 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6207 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6208 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6209 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6210 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6211 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6212 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6213 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6214 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6215 return;
6216 } else {
6217 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6218 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6219 int edges;
6220 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6221 edges = 4;
6222 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6223 } else {
6224 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6225 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6226 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6227 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6228 ? 3 : 0;
6229 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6230 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6231 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6232 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6234 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6235 bSv[0][0] = 0x0004000400040004ULL;
6236 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6237 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6239 #define FILTER(hv,dir,edge)\
6240 if(bSv[dir][edge]) {\
6241 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6242 if(!(edge&1)) {\
6243 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6244 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6247 if( edges == 1 ) {
6248 FILTER(v,0,0);
6249 FILTER(h,1,0);
6250 } else if( IS_8x8DCT(mb_type) ) {
6251 FILTER(v,0,0);
6252 FILTER(v,0,2);
6253 FILTER(h,1,0);
6254 FILTER(h,1,2);
6255 } else {
6256 FILTER(v,0,0);
6257 FILTER(v,0,1);
6258 FILTER(v,0,2);
6259 FILTER(v,0,3);
6260 FILTER(h,1,0);
6261 FILTER(h,1,1);
6262 FILTER(h,1,2);
6263 FILTER(h,1,3);
6265 #undef FILTER
6269 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6270 MpegEncContext * const s = &h->s;
6271 const int mb_xy= mb_x + mb_y*s->mb_stride;
6272 const int mb_type = s->current_picture.mb_type[mb_xy];
6273 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6274 int first_vertical_edge_done = 0;
6275 int dir;
6277 //for sufficiently low qp, filtering wouldn't do anything
6278 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6279 if(!FRAME_MBAFF){
6280 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6281 int qp = s->current_picture.qscale_table[mb_xy];
6282 if(qp <= qp_thresh
6283 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6284 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6285 return;
6289 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6290 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6291 int top_type, left_type[2];
6292 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6293 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6294 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6296 if(IS_8x8DCT(top_type)){
6297 h->non_zero_count_cache[4+8*0]=
6298 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6299 h->non_zero_count_cache[6+8*0]=
6300 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6302 if(IS_8x8DCT(left_type[0])){
6303 h->non_zero_count_cache[3+8*1]=
6304 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6306 if(IS_8x8DCT(left_type[1])){
6307 h->non_zero_count_cache[3+8*3]=
6308 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6311 if(IS_8x8DCT(mb_type)){
6312 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6313 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6315 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6316 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6318 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6319 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6321 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6322 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6326 if (FRAME_MBAFF
6327 // left mb is in picture
6328 && h->slice_table[mb_xy-1] != 255
6329 // and current and left pair do not have the same interlaced type
6330 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6331 // and left mb is in the same slice if deblocking_filter == 2
6332 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6333 /* First vertical edge is different in MBAFF frames
6334 * There are 8 different bS to compute and 2 different Qp
6336 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6337 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6338 int16_t bS[8];
6339 int qp[2];
6340 int bqp[2];
6341 int rqp[2];
6342 int mb_qp, mbn0_qp, mbn1_qp;
6343 int i;
6344 first_vertical_edge_done = 1;
6346 if( IS_INTRA(mb_type) )
6347 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6348 else {
6349 for( i = 0; i < 8; i++ ) {
6350 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6352 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6353 bS[i] = 4;
6354 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6355 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6356 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6357 bS[i] = 2;
6358 else
6359 bS[i] = 1;
6363 mb_qp = s->current_picture.qscale_table[mb_xy];
6364 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6365 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6366 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6367 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6368 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6369 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6370 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6371 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6372 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6373 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6374 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6375 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6377 /* Filter edge */
6378 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6379 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6380 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6381 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6382 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6384 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6385 for( dir = 0; dir < 2; dir++ )
6387 int edge;
6388 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6389 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6390 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &15 ][0] + (MB_MBAFF ? 20 : 2);
6391 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&15 ][0] + (MB_MBAFF ? 20 : 2);
6392 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6394 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6395 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6396 // how often to recheck mv-based bS when iterating between edges
6397 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6398 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6399 // how often to recheck mv-based bS when iterating along each edge
6400 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6402 if (first_vertical_edge_done) {
6403 start = 1;
6404 first_vertical_edge_done = 0;
6407 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6408 start = 1;
6410 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6411 && !IS_INTERLACED(mb_type)
6412 && IS_INTERLACED(mbm_type)
6414 // This is a special case in the norm where the filtering must
6415 // be done twice (one each of the field) even if we are in a
6416 // frame macroblock.
6418 static const int nnz_idx[4] = {4,5,6,3};
6419 unsigned int tmp_linesize = 2 * linesize;
6420 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6421 int mbn_xy = mb_xy - 2 * s->mb_stride;
6422 int qp;
6423 int i, j;
6424 int16_t bS[4];
6426 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6427 if( IS_INTRA(mb_type) ||
6428 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6429 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6430 } else {
6431 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6432 for( i = 0; i < 4; i++ ) {
6433 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6434 mbn_nnz[nnz_idx[i]] != 0 )
6435 bS[i] = 2;
6436 else
6437 bS[i] = 1;
6440 // Do not use s->qscale as luma quantizer because it has not the same
6441 // value in IPCM macroblocks.
6442 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6443 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6444 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6445 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6446 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6447 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6448 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6449 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6452 start = 1;
6455 /* Calculate bS */
6456 for( edge = start; edge < edges; edge++ ) {
6457 /* mbn_xy: neighbor macroblock */
6458 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6459 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6460 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6461 int16_t bS[4];
6462 int qp;
6464 if( (edge&1) && IS_8x8DCT(mb_type) )
6465 continue;
6467 if( IS_INTRA(mb_type) ||
6468 IS_INTRA(mbn_type) ) {
6469 int value;
6470 if (edge == 0) {
6471 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6472 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6474 value = 4;
6475 } else {
6476 value = 3;
6478 } else {
6479 value = 3;
6481 bS[0] = bS[1] = bS[2] = bS[3] = value;
6482 } else {
6483 int i, l;
6484 int mv_done;
6486 if( edge & mask_edge ) {
6487 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6488 mv_done = 1;
6490 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6491 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6492 mv_done = 1;
6494 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6495 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6496 int bn_idx= b_idx - (dir ? 8:1);
6497 int v = 0;
6499 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6500 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6501 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6502 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6505 if(h->slice_type_nos == FF_B_TYPE && v){
6506 v=0;
6507 for( l = 0; !v && l < 2; l++ ) {
6508 int ln= 1-l;
6509 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6510 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6511 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6515 bS[0] = bS[1] = bS[2] = bS[3] = v;
6516 mv_done = 1;
6518 else
6519 mv_done = 0;
6521 for( i = 0; i < 4; i++ ) {
6522 int x = dir == 0 ? edge : i;
6523 int y = dir == 0 ? i : edge;
6524 int b_idx= 8 + 4 + x + 8*y;
6525 int bn_idx= b_idx - (dir ? 8:1);
6527 if( h->non_zero_count_cache[b_idx] != 0 ||
6528 h->non_zero_count_cache[bn_idx] != 0 ) {
6529 bS[i] = 2;
6531 else if(!mv_done)
6533 bS[i] = 0;
6534 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6535 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6536 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6537 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6538 bS[i] = 1;
6539 break;
6543 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6544 bS[i] = 0;
6545 for( l = 0; l < 2; l++ ) {
6546 int ln= 1-l;
6547 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6548 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6549 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6550 bS[i] = 1;
6551 break;
6558 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6559 continue;
6562 /* Filter edge */
6563 // Do not use s->qscale as luma quantizer because it has not the same
6564 // value in IPCM macroblocks.
6565 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6566 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6567 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6568 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6569 if( dir == 0 ) {
6570 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6571 if( (edge&1) == 0 ) {
6572 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6573 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6574 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6575 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6577 } else {
6578 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6579 if( (edge&1) == 0 ) {
6580 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6581 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6582 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6583 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6590 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6591 MpegEncContext * const s = &h->s;
6592 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6594 s->mb_skip_run= -1;
6596 if( h->pps.cabac ) {
6597 int i;
6599 /* realign */
6600 align_get_bits( &s->gb );
6602 /* init cabac */
6603 ff_init_cabac_states( &h->cabac);
6604 ff_init_cabac_decoder( &h->cabac,
6605 s->gb.buffer + get_bits_count(&s->gb)/8,
6606 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6607 /* calculate pre-state */
6608 for( i= 0; i < 460; i++ ) {
6609 int pre;
6610 if( h->slice_type_nos == FF_I_TYPE )
6611 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6612 else
6613 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6615 if( pre <= 63 )
6616 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6617 else
6618 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6621 for(;;){
6622 //START_TIMER
6623 int ret = decode_mb_cabac(h);
6624 int eos;
6625 //STOP_TIMER("decode_mb_cabac")
6627 if(ret>=0) hl_decode_mb(h);
6629 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6630 s->mb_y++;
6632 if(ret>=0) ret = decode_mb_cabac(h);
6634 if(ret>=0) hl_decode_mb(h);
6635 s->mb_y--;
6637 eos = get_cabac_terminate( &h->cabac );
6639 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6640 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6641 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6642 return -1;
6645 if( ++s->mb_x >= s->mb_width ) {
6646 s->mb_x = 0;
6647 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6648 ++s->mb_y;
6649 if(FIELD_OR_MBAFF_PICTURE) {
6650 ++s->mb_y;
6654 if( eos || s->mb_y >= s->mb_height ) {
6655 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6656 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6657 return 0;
6661 } else {
6662 for(;;){
6663 int ret = decode_mb_cavlc(h);
6665 if(ret>=0) hl_decode_mb(h);
6667 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6668 s->mb_y++;
6669 ret = decode_mb_cavlc(h);
6671 if(ret>=0) hl_decode_mb(h);
6672 s->mb_y--;
6675 if(ret<0){
6676 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6677 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6679 return -1;
6682 if(++s->mb_x >= s->mb_width){
6683 s->mb_x=0;
6684 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6685 ++s->mb_y;
6686 if(FIELD_OR_MBAFF_PICTURE) {
6687 ++s->mb_y;
6689 if(s->mb_y >= s->mb_height){
6690 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6692 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6693 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6695 return 0;
6696 }else{
6697 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6699 return -1;
6704 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6705 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6706 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6707 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6709 return 0;
6710 }else{
6711 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6713 return -1;
6719 #if 0
6720 for(;s->mb_y < s->mb_height; s->mb_y++){
6721 for(;s->mb_x < s->mb_width; s->mb_x++){
6722 int ret= decode_mb(h);
6724 hl_decode_mb(h);
6726 if(ret<0){
6727 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6728 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6730 return -1;
6733 if(++s->mb_x >= s->mb_width){
6734 s->mb_x=0;
6735 if(++s->mb_y >= s->mb_height){
6736 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6737 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6739 return 0;
6740 }else{
6741 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6743 return -1;
6748 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6749 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6750 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6752 return 0;
6753 }else{
6754 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6756 return -1;
6760 s->mb_x=0;
6761 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6763 #endif
6764 return -1; //not reached
6767 static int decode_unregistered_user_data(H264Context *h, int size){
6768 MpegEncContext * const s = &h->s;
6769 uint8_t user_data[16+256];
6770 int e, build, i;
6772 if(size<16)
6773 return -1;
6775 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6776 user_data[i]= get_bits(&s->gb, 8);
6779 user_data[i]= 0;
6780 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6781 if(e==1 && build>=0)
6782 h->x264_build= build;
6784 if(s->avctx->debug & FF_DEBUG_BUGS)
6785 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6787 for(; i<size; i++)
6788 skip_bits(&s->gb, 8);
6790 return 0;
6793 static int decode_sei(H264Context *h){
6794 MpegEncContext * const s = &h->s;
6796 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6797 int size, type;
6799 type=0;
6801 type+= show_bits(&s->gb, 8);
6802 }while(get_bits(&s->gb, 8) == 255);
6804 size=0;
6806 size+= show_bits(&s->gb, 8);
6807 }while(get_bits(&s->gb, 8) == 255);
6809 switch(type){
6810 case 5:
6811 if(decode_unregistered_user_data(h, size) < 0)
6812 return -1;
6813 break;
6814 default:
6815 skip_bits(&s->gb, 8*size);
6818 //FIXME check bits here
6819 align_get_bits(&s->gb);
6822 return 0;
6825 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6826 MpegEncContext * const s = &h->s;
6827 int cpb_count, i;
6828 cpb_count = get_ue_golomb(&s->gb) + 1;
6829 get_bits(&s->gb, 4); /* bit_rate_scale */
6830 get_bits(&s->gb, 4); /* cpb_size_scale */
6831 for(i=0; i<cpb_count; i++){
6832 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6833 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6834 get_bits1(&s->gb); /* cbr_flag */
6836 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6837 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6838 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6839 get_bits(&s->gb, 5); /* time_offset_length */
6842 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6843 MpegEncContext * const s = &h->s;
6844 int aspect_ratio_info_present_flag;
6845 unsigned int aspect_ratio_idc;
6846 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6848 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6850 if( aspect_ratio_info_present_flag ) {
6851 aspect_ratio_idc= get_bits(&s->gb, 8);
6852 if( aspect_ratio_idc == EXTENDED_SAR ) {
6853 sps->sar.num= get_bits(&s->gb, 16);
6854 sps->sar.den= get_bits(&s->gb, 16);
6855 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
6856 sps->sar= pixel_aspect[aspect_ratio_idc];
6857 }else{
6858 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6859 return -1;
6861 }else{
6862 sps->sar.num=
6863 sps->sar.den= 0;
6865 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6867 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6868 get_bits1(&s->gb); /* overscan_appropriate_flag */
6871 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6872 get_bits(&s->gb, 3); /* video_format */
6873 get_bits1(&s->gb); /* video_full_range_flag */
6874 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6875 get_bits(&s->gb, 8); /* colour_primaries */
6876 get_bits(&s->gb, 8); /* transfer_characteristics */
6877 get_bits(&s->gb, 8); /* matrix_coefficients */
6881 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6882 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6883 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6886 sps->timing_info_present_flag = get_bits1(&s->gb);
6887 if(sps->timing_info_present_flag){
6888 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6889 sps->time_scale = get_bits_long(&s->gb, 32);
6890 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6893 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6894 if(nal_hrd_parameters_present_flag)
6895 decode_hrd_parameters(h, sps);
6896 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6897 if(vcl_hrd_parameters_present_flag)
6898 decode_hrd_parameters(h, sps);
6899 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6900 get_bits1(&s->gb); /* low_delay_hrd_flag */
6901 get_bits1(&s->gb); /* pic_struct_present_flag */
6903 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6904 if(sps->bitstream_restriction_flag){
6905 unsigned int num_reorder_frames;
6906 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6907 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6908 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6909 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6910 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6911 num_reorder_frames= get_ue_golomb(&s->gb);
6912 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6914 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6915 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6916 return -1;
6919 sps->num_reorder_frames= num_reorder_frames;
6922 return 0;
6925 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6926 const uint8_t *jvt_list, const uint8_t *fallback_list){
6927 MpegEncContext * const s = &h->s;
6928 int i, last = 8, next = 8;
6929 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6930 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6931 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6932 else
6933 for(i=0;i<size;i++){
6934 if(next)
6935 next = (last + get_se_golomb(&s->gb)) & 0xff;
6936 if(!i && !next){ /* matrix not written, we use the preset one */
6937 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6938 break;
6940 last = factors[scan[i]] = next ? next : last;
6944 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6945 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6946 MpegEncContext * const s = &h->s;
6947 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6948 const uint8_t *fallback[4] = {
6949 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6950 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6951 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6952 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6954 if(get_bits1(&s->gb)){
6955 sps->scaling_matrix_present |= is_sps;
6956 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6957 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6958 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6959 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6960 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6961 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6962 if(is_sps || pps->transform_8x8_mode){
6963 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
6964 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
6966 } else if(fallback_sps) {
6967 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
6968 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
6973 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
6975 static void *
6976 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
6977 const size_t size, const char *name)
6979 if(id>=max) {
6980 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
6981 return NULL;
6984 if(!vec[id]) {
6985 vec[id] = av_mallocz(size);
6986 if(vec[id] == NULL)
6987 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
6989 return vec[id];
6992 static inline int decode_seq_parameter_set(H264Context *h){
6993 MpegEncContext * const s = &h->s;
6994 int profile_idc, level_idc;
6995 unsigned int sps_id, tmp, mb_width, mb_height;
6996 int i;
6997 SPS *sps;
6999 profile_idc= get_bits(&s->gb, 8);
7000 get_bits1(&s->gb); //constraint_set0_flag
7001 get_bits1(&s->gb); //constraint_set1_flag
7002 get_bits1(&s->gb); //constraint_set2_flag
7003 get_bits1(&s->gb); //constraint_set3_flag
7004 get_bits(&s->gb, 4); // reserved
7005 level_idc= get_bits(&s->gb, 8);
7006 sps_id= get_ue_golomb(&s->gb);
7008 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7009 if(sps == NULL)
7010 return -1;
7012 sps->profile_idc= profile_idc;
7013 sps->level_idc= level_idc;
7015 if(sps->profile_idc >= 100){ //high profile
7016 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7017 if(sps->chroma_format_idc == 3)
7018 get_bits1(&s->gb); //residual_color_transform_flag
7019 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7020 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7021 sps->transform_bypass = get_bits1(&s->gb);
7022 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7023 }else{
7024 sps->scaling_matrix_present = 0;
7025 sps->chroma_format_idc= 1;
7028 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7029 sps->poc_type= get_ue_golomb(&s->gb);
7031 if(sps->poc_type == 0){ //FIXME #define
7032 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7033 } else if(sps->poc_type == 1){//FIXME #define
7034 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7035 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7036 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7037 tmp= get_ue_golomb(&s->gb);
7039 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7040 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7041 return -1;
7043 sps->poc_cycle_length= tmp;
7045 for(i=0; i<sps->poc_cycle_length; i++)
7046 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7047 }else if(sps->poc_type != 2){
7048 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7049 return -1;
7052 tmp= get_ue_golomb(&s->gb);
7053 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7054 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7055 return -1;
7057 sps->ref_frame_count= tmp;
7058 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7059 mb_width= get_ue_golomb(&s->gb) + 1;
7060 mb_height= get_ue_golomb(&s->gb) + 1;
7061 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7062 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7063 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7064 return -1;
7066 sps->mb_width = mb_width;
7067 sps->mb_height= mb_height;
7069 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7070 if(!sps->frame_mbs_only_flag)
7071 sps->mb_aff= get_bits1(&s->gb);
7072 else
7073 sps->mb_aff= 0;
7075 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7077 #ifndef ALLOW_INTERLACE
7078 if(sps->mb_aff)
7079 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7080 #endif
7081 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7082 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7084 sps->crop= get_bits1(&s->gb);
7085 if(sps->crop){
7086 sps->crop_left = get_ue_golomb(&s->gb);
7087 sps->crop_right = get_ue_golomb(&s->gb);
7088 sps->crop_top = get_ue_golomb(&s->gb);
7089 sps->crop_bottom= get_ue_golomb(&s->gb);
7090 if(sps->crop_left || sps->crop_top){
7091 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7093 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7094 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7096 }else{
7097 sps->crop_left =
7098 sps->crop_right =
7099 sps->crop_top =
7100 sps->crop_bottom= 0;
7103 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7104 if( sps->vui_parameters_present_flag )
7105 decode_vui_parameters(h, sps);
7107 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7108 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7109 sps_id, sps->profile_idc, sps->level_idc,
7110 sps->poc_type,
7111 sps->ref_frame_count,
7112 sps->mb_width, sps->mb_height,
7113 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7114 sps->direct_8x8_inference_flag ? "8B8" : "",
7115 sps->crop_left, sps->crop_right,
7116 sps->crop_top, sps->crop_bottom,
7117 sps->vui_parameters_present_flag ? "VUI" : "",
7118 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7121 return 0;
7124 static void
7125 build_qp_table(PPS *pps, int t, int index)
7127 int i;
7128 for(i = 0; i < 52; i++)
7129 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7132 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7133 MpegEncContext * const s = &h->s;
7134 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7135 PPS *pps;
7137 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7138 if(pps == NULL)
7139 return -1;
7141 tmp= get_ue_golomb(&s->gb);
7142 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7143 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7144 return -1;
7146 pps->sps_id= tmp;
7148 pps->cabac= get_bits1(&s->gb);
7149 pps->pic_order_present= get_bits1(&s->gb);
7150 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7151 if(pps->slice_group_count > 1 ){
7152 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7153 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7154 switch(pps->mb_slice_group_map_type){
7155 case 0:
7156 #if 0
7157 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7158 | run_length[ i ] |1 |ue(v) |
7159 #endif
7160 break;
7161 case 2:
7162 #if 0
7163 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7164 |{ | | |
7165 | top_left_mb[ i ] |1 |ue(v) |
7166 | bottom_right_mb[ i ] |1 |ue(v) |
7167 | } | | |
7168 #endif
7169 break;
7170 case 3:
7171 case 4:
7172 case 5:
7173 #if 0
7174 | slice_group_change_direction_flag |1 |u(1) |
7175 | slice_group_change_rate_minus1 |1 |ue(v) |
7176 #endif
7177 break;
7178 case 6:
7179 #if 0
7180 | slice_group_id_cnt_minus1 |1 |ue(v) |
7181 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7182 |) | | |
7183 | slice_group_id[ i ] |1 |u(v) |
7184 #endif
7185 break;
7188 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7189 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7190 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7191 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7192 pps->ref_count[0]= pps->ref_count[1]= 1;
7193 return -1;
7196 pps->weighted_pred= get_bits1(&s->gb);
7197 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7198 pps->init_qp= get_se_golomb(&s->gb) + 26;
7199 pps->init_qs= get_se_golomb(&s->gb) + 26;
7200 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7201 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7202 pps->constrained_intra_pred= get_bits1(&s->gb);
7203 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7205 pps->transform_8x8_mode= 0;
7206 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7207 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7208 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7210 if(get_bits_count(&s->gb) < bit_length){
7211 pps->transform_8x8_mode= get_bits1(&s->gb);
7212 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7213 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7214 } else {
7215 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7218 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7219 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7220 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7221 h->pps.chroma_qp_diff= 1;
7223 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7224 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7225 pps_id, pps->sps_id,
7226 pps->cabac ? "CABAC" : "CAVLC",
7227 pps->slice_group_count,
7228 pps->ref_count[0], pps->ref_count[1],
7229 pps->weighted_pred ? "weighted" : "",
7230 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7231 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7232 pps->constrained_intra_pred ? "CONSTR" : "",
7233 pps->redundant_pic_cnt_present ? "REDU" : "",
7234 pps->transform_8x8_mode ? "8x8DCT" : ""
7238 return 0;
7242 * Call decode_slice() for each context.
7244 * @param h h264 master context
7245 * @param context_count number of contexts to execute
7247 static void execute_decode_slices(H264Context *h, int context_count){
7248 MpegEncContext * const s = &h->s;
7249 AVCodecContext * const avctx= s->avctx;
7250 H264Context *hx;
7251 int i;
7253 if(context_count == 1) {
7254 decode_slice(avctx, h);
7255 } else {
7256 for(i = 1; i < context_count; i++) {
7257 hx = h->thread_context[i];
7258 hx->s.error_resilience = avctx->error_resilience;
7259 hx->s.error_count = 0;
7262 avctx->execute(avctx, (void *)decode_slice,
7263 (void **)h->thread_context, NULL, context_count);
7265 /* pull back stuff from slices to master context */
7266 hx = h->thread_context[context_count - 1];
7267 s->mb_x = hx->s.mb_x;
7268 s->mb_y = hx->s.mb_y;
7269 s->dropable = hx->s.dropable;
7270 s->picture_structure = hx->s.picture_structure;
7271 for(i = 1; i < context_count; i++)
7272 h->s.error_count += h->thread_context[i]->s.error_count;
7277 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7278 MpegEncContext * const s = &h->s;
7279 AVCodecContext * const avctx= s->avctx;
7280 int buf_index=0;
7281 H264Context *hx; ///< thread context
7282 int context_count = 0;
7284 h->max_contexts = avctx->thread_count;
7285 #if 0
7286 int i;
7287 for(i=0; i<50; i++){
7288 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7290 #endif
7291 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7292 h->current_slice = 0;
7293 if (!s->first_field)
7294 s->current_picture_ptr= NULL;
7297 for(;;){
7298 int consumed;
7299 int dst_length;
7300 int bit_length;
7301 const uint8_t *ptr;
7302 int i, nalsize = 0;
7303 int err;
7305 if(h->is_avc) {
7306 if(buf_index >= buf_size) break;
7307 nalsize = 0;
7308 for(i = 0; i < h->nal_length_size; i++)
7309 nalsize = (nalsize << 8) | buf[buf_index++];
7310 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7311 if(nalsize == 1){
7312 buf_index++;
7313 continue;
7314 }else{
7315 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7316 break;
7319 } else {
7320 // start code prefix search
7321 for(; buf_index + 3 < buf_size; buf_index++){
7322 // This should always succeed in the first iteration.
7323 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7324 break;
7327 if(buf_index+3 >= buf_size) break;
7329 buf_index+=3;
7332 hx = h->thread_context[context_count];
7334 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7335 if (ptr==NULL || dst_length < 0){
7336 return -1;
7338 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7339 dst_length--;
7340 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7342 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7343 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7346 if (h->is_avc && (nalsize != consumed)){
7347 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7348 consumed= nalsize;
7351 buf_index += consumed;
7353 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7354 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7355 continue;
7357 again:
7358 err = 0;
7359 switch(hx->nal_unit_type){
7360 case NAL_IDR_SLICE:
7361 if (h->nal_unit_type != NAL_IDR_SLICE) {
7362 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7363 return -1;
7365 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7366 case NAL_SLICE:
7367 init_get_bits(&hx->s.gb, ptr, bit_length);
7368 hx->intra_gb_ptr=
7369 hx->inter_gb_ptr= &hx->s.gb;
7370 hx->s.data_partitioning = 0;
7372 if((err = decode_slice_header(hx, h)))
7373 break;
7375 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7376 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7377 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7378 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7379 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7380 && avctx->skip_frame < AVDISCARD_ALL)
7381 context_count++;
7382 break;
7383 case NAL_DPA:
7384 init_get_bits(&hx->s.gb, ptr, bit_length);
7385 hx->intra_gb_ptr=
7386 hx->inter_gb_ptr= NULL;
7387 hx->s.data_partitioning = 1;
7389 err = decode_slice_header(hx, h);
7390 break;
7391 case NAL_DPB:
7392 init_get_bits(&hx->intra_gb, ptr, bit_length);
7393 hx->intra_gb_ptr= &hx->intra_gb;
7394 break;
7395 case NAL_DPC:
7396 init_get_bits(&hx->inter_gb, ptr, bit_length);
7397 hx->inter_gb_ptr= &hx->inter_gb;
7399 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7400 && s->context_initialized
7401 && s->hurry_up < 5
7402 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7403 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7404 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7405 && avctx->skip_frame < AVDISCARD_ALL)
7406 context_count++;
7407 break;
7408 case NAL_SEI:
7409 init_get_bits(&s->gb, ptr, bit_length);
7410 decode_sei(h);
7411 break;
7412 case NAL_SPS:
7413 init_get_bits(&s->gb, ptr, bit_length);
7414 decode_seq_parameter_set(h);
7416 if(s->flags& CODEC_FLAG_LOW_DELAY)
7417 s->low_delay=1;
7419 if(avctx->has_b_frames < 2)
7420 avctx->has_b_frames= !s->low_delay;
7421 break;
7422 case NAL_PPS:
7423 init_get_bits(&s->gb, ptr, bit_length);
7425 decode_picture_parameter_set(h, bit_length);
7427 break;
7428 case NAL_AUD:
7429 case NAL_END_SEQUENCE:
7430 case NAL_END_STREAM:
7431 case NAL_FILLER_DATA:
7432 case NAL_SPS_EXT:
7433 case NAL_AUXILIARY_SLICE:
7434 break;
7435 default:
7436 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7439 if(context_count == h->max_contexts) {
7440 execute_decode_slices(h, context_count);
7441 context_count = 0;
7444 if (err < 0)
7445 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7446 else if(err == 1) {
7447 /* Slice could not be decoded in parallel mode, copy down
7448 * NAL unit stuff to context 0 and restart. Note that
7449 * rbsp_buffer is not transferred, but since we no longer
7450 * run in parallel mode this should not be an issue. */
7451 h->nal_unit_type = hx->nal_unit_type;
7452 h->nal_ref_idc = hx->nal_ref_idc;
7453 hx = h;
7454 goto again;
7457 if(context_count)
7458 execute_decode_slices(h, context_count);
7459 return buf_index;
7463 * returns the number of bytes consumed for building the current frame
7465 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7466 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7467 if(pos+10>buf_size) pos=buf_size; // oops ;)
7469 return pos;
7472 static int decode_frame(AVCodecContext *avctx,
7473 void *data, int *data_size,
7474 const uint8_t *buf, int buf_size)
7476 H264Context *h = avctx->priv_data;
7477 MpegEncContext *s = &h->s;
7478 AVFrame *pict = data;
7479 int buf_index;
7481 s->flags= avctx->flags;
7482 s->flags2= avctx->flags2;
7484 /* end of stream, output what is still in the buffers */
7485 if (buf_size == 0) {
7486 Picture *out;
7487 int i, out_idx;
7489 //FIXME factorize this with the output code below
7490 out = h->delayed_pic[0];
7491 out_idx = 0;
7492 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7493 if(h->delayed_pic[i]->poc < out->poc){
7494 out = h->delayed_pic[i];
7495 out_idx = i;
7498 for(i=out_idx; h->delayed_pic[i]; i++)
7499 h->delayed_pic[i] = h->delayed_pic[i+1];
7501 if(out){
7502 *data_size = sizeof(AVFrame);
7503 *pict= *(AVFrame*)out;
7506 return 0;
7509 if(h->is_avc && !h->got_avcC) {
7510 int i, cnt, nalsize;
7511 unsigned char *p = avctx->extradata;
7512 if(avctx->extradata_size < 7) {
7513 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7514 return -1;
7516 if(*p != 1) {
7517 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7518 return -1;
7520 /* sps and pps in the avcC always have length coded with 2 bytes,
7521 so put a fake nal_length_size = 2 while parsing them */
7522 h->nal_length_size = 2;
7523 // Decode sps from avcC
7524 cnt = *(p+5) & 0x1f; // Number of sps
7525 p += 6;
7526 for (i = 0; i < cnt; i++) {
7527 nalsize = AV_RB16(p) + 2;
7528 if(decode_nal_units(h, p, nalsize) < 0) {
7529 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7530 return -1;
7532 p += nalsize;
7534 // Decode pps from avcC
7535 cnt = *(p++); // Number of pps
7536 for (i = 0; i < cnt; i++) {
7537 nalsize = AV_RB16(p) + 2;
7538 if(decode_nal_units(h, p, nalsize) != nalsize) {
7539 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7540 return -1;
7542 p += nalsize;
7544 // Now store right nal length size, that will be use to parse all other nals
7545 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7546 // Do not reparse avcC
7547 h->got_avcC = 1;
7550 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7551 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7552 return -1;
7555 buf_index=decode_nal_units(h, buf, buf_size);
7556 if(buf_index < 0)
7557 return -1;
7559 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7560 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7561 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7562 return -1;
7565 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7566 Picture *out = s->current_picture_ptr;
7567 Picture *cur = s->current_picture_ptr;
7568 int i, pics, cross_idr, out_of_order, out_idx;
7570 s->mb_y= 0;
7572 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7573 s->current_picture_ptr->pict_type= s->pict_type;
7575 if(!s->dropable) {
7576 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7577 h->prev_poc_msb= h->poc_msb;
7578 h->prev_poc_lsb= h->poc_lsb;
7580 h->prev_frame_num_offset= h->frame_num_offset;
7581 h->prev_frame_num= h->frame_num;
7584 * FIXME: Error handling code does not seem to support interlaced
7585 * when slices span multiple rows
7586 * The ff_er_add_slice calls don't work right for bottom
7587 * fields; they cause massive erroneous error concealing
7588 * Error marking covers both fields (top and bottom).
7589 * This causes a mismatched s->error_count
7590 * and a bad error table. Further, the error count goes to
7591 * INT_MAX when called for bottom field, because mb_y is
7592 * past end by one (callers fault) and resync_mb_y != 0
7593 * causes problems for the first MB line, too.
7595 if (!FIELD_PICTURE)
7596 ff_er_frame_end(s);
7598 MPV_frame_end(s);
7600 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7601 /* Wait for second field. */
7602 *data_size = 0;
7604 } else {
7605 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7606 /* Derive top_field_first from field pocs. */
7607 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7609 //FIXME do something with unavailable reference frames
7611 /* Sort B-frames into display order */
7613 if(h->sps.bitstream_restriction_flag
7614 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7615 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7616 s->low_delay = 0;
7619 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7620 && !h->sps.bitstream_restriction_flag){
7621 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7622 s->low_delay= 0;
7625 pics = 0;
7626 while(h->delayed_pic[pics]) pics++;
7628 assert(pics <= MAX_DELAYED_PIC_COUNT);
7630 h->delayed_pic[pics++] = cur;
7631 if(cur->reference == 0)
7632 cur->reference = DELAYED_PIC_REF;
7634 out = h->delayed_pic[0];
7635 out_idx = 0;
7636 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7637 if(h->delayed_pic[i]->poc < out->poc){
7638 out = h->delayed_pic[i];
7639 out_idx = i;
7641 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7643 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7645 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7647 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7648 || (s->low_delay &&
7649 ((!cross_idr && out->poc > h->outputed_poc + 2)
7650 || cur->pict_type == FF_B_TYPE)))
7652 s->low_delay = 0;
7653 s->avctx->has_b_frames++;
7656 if(out_of_order || pics > s->avctx->has_b_frames){
7657 out->reference &= ~DELAYED_PIC_REF;
7658 for(i=out_idx; h->delayed_pic[i]; i++)
7659 h->delayed_pic[i] = h->delayed_pic[i+1];
7661 if(!out_of_order && pics > s->avctx->has_b_frames){
7662 *data_size = sizeof(AVFrame);
7664 h->outputed_poc = out->poc;
7665 *pict= *(AVFrame*)out;
7666 }else{
7667 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7672 assert(pict->data[0] || !*data_size);
7673 ff_print_debug_info(s, pict);
7674 //printf("out %d\n", (int)pict->data[0]);
7675 #if 0 //?
7677 /* Return the Picture timestamp as the frame number */
7678 /* we subtract 1 because it is added on utils.c */
7679 avctx->frame_number = s->picture_number - 1;
7680 #endif
7681 return get_consumed_bytes(s, buf_index, buf_size);
7683 #if 0
7684 static inline void fill_mb_avail(H264Context *h){
7685 MpegEncContext * const s = &h->s;
7686 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7688 if(s->mb_y){
7689 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7690 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7691 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7692 }else{
7693 h->mb_avail[0]=
7694 h->mb_avail[1]=
7695 h->mb_avail[2]= 0;
7697 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7698 h->mb_avail[4]= 1; //FIXME move out
7699 h->mb_avail[5]= 0; //FIXME move out
7701 #endif
7703 #ifdef TEST
7704 #undef printf
7705 #undef random
7706 #define COUNT 8000
7707 #define SIZE (COUNT*40)
7708 int main(void){
7709 int i;
7710 uint8_t temp[SIZE];
7711 PutBitContext pb;
7712 GetBitContext gb;
7713 // int int_temp[10000];
7714 DSPContext dsp;
7715 AVCodecContext avctx;
7717 dsputil_init(&dsp, &avctx);
7719 init_put_bits(&pb, temp, SIZE);
7720 printf("testing unsigned exp golomb\n");
7721 for(i=0; i<COUNT; i++){
7722 START_TIMER
7723 set_ue_golomb(&pb, i);
7724 STOP_TIMER("set_ue_golomb");
7726 flush_put_bits(&pb);
7728 init_get_bits(&gb, temp, 8*SIZE);
7729 for(i=0; i<COUNT; i++){
7730 int j, s;
7732 s= show_bits(&gb, 24);
7734 START_TIMER
7735 j= get_ue_golomb(&gb);
7736 if(j != i){
7737 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7738 // return -1;
7740 STOP_TIMER("get_ue_golomb");
7744 init_put_bits(&pb, temp, SIZE);
7745 printf("testing signed exp golomb\n");
7746 for(i=0; i<COUNT; i++){
7747 START_TIMER
7748 set_se_golomb(&pb, i - COUNT/2);
7749 STOP_TIMER("set_se_golomb");
7751 flush_put_bits(&pb);
7753 init_get_bits(&gb, temp, 8*SIZE);
7754 for(i=0; i<COUNT; i++){
7755 int j, s;
7757 s= show_bits(&gb, 24);
7759 START_TIMER
7760 j= get_se_golomb(&gb);
7761 if(j != i - COUNT/2){
7762 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7763 // return -1;
7765 STOP_TIMER("get_se_golomb");
7768 #if 0
7769 printf("testing 4x4 (I)DCT\n");
7771 DCTELEM block[16];
7772 uint8_t src[16], ref[16];
7773 uint64_t error= 0, max_error=0;
7775 for(i=0; i<COUNT; i++){
7776 int j;
7777 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7778 for(j=0; j<16; j++){
7779 ref[j]= random()%255;
7780 src[j]= random()%255;
7783 h264_diff_dct_c(block, src, ref, 4);
7785 //normalize
7786 for(j=0; j<16; j++){
7787 // printf("%d ", block[j]);
7788 block[j]= block[j]*4;
7789 if(j&1) block[j]= (block[j]*4 + 2)/5;
7790 if(j&4) block[j]= (block[j]*4 + 2)/5;
7792 // printf("\n");
7794 s->dsp.h264_idct_add(ref, block, 4);
7795 /* for(j=0; j<16; j++){
7796 printf("%d ", ref[j]);
7798 printf("\n");*/
7800 for(j=0; j<16; j++){
7801 int diff= FFABS(src[j] - ref[j]);
7803 error+= diff*diff;
7804 max_error= FFMAX(max_error, diff);
7807 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7808 printf("testing quantizer\n");
7809 for(qp=0; qp<52; qp++){
7810 for(i=0; i<16; i++)
7811 src1_block[i]= src2_block[i]= random()%255;
7814 printf("Testing NAL layer\n");
7816 uint8_t bitstream[COUNT];
7817 uint8_t nal[COUNT*2];
7818 H264Context h;
7819 memset(&h, 0, sizeof(H264Context));
7821 for(i=0; i<COUNT; i++){
7822 int zeros= i;
7823 int nal_length;
7824 int consumed;
7825 int out_length;
7826 uint8_t *out;
7827 int j;
7829 for(j=0; j<COUNT; j++){
7830 bitstream[j]= (random() % 255) + 1;
7833 for(j=0; j<zeros; j++){
7834 int pos= random() % COUNT;
7835 while(bitstream[pos] == 0){
7836 pos++;
7837 pos %= COUNT;
7839 bitstream[pos]=0;
7842 START_TIMER
7844 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7845 if(nal_length<0){
7846 printf("encoding failed\n");
7847 return -1;
7850 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7852 STOP_TIMER("NAL")
7854 if(out_length != COUNT){
7855 printf("incorrect length %d %d\n", out_length, COUNT);
7856 return -1;
7859 if(consumed != nal_length){
7860 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7861 return -1;
7864 if(memcmp(bitstream, out, COUNT)){
7865 printf("mismatch\n");
7866 return -1;
7869 #endif
7871 printf("Testing RBSP\n");
7874 return 0;
7876 #endif /* TEST */
7879 static av_cold int decode_end(AVCodecContext *avctx)
7881 H264Context *h = avctx->priv_data;
7882 MpegEncContext *s = &h->s;
7884 av_freep(&h->rbsp_buffer[0]);
7885 av_freep(&h->rbsp_buffer[1]);
7886 free_tables(h); //FIXME cleanup init stuff perhaps
7887 MPV_common_end(s);
7889 // memset(h, 0, sizeof(H264Context));
7891 return 0;
7895 AVCodec h264_decoder = {
7896 "h264",
7897 CODEC_TYPE_VIDEO,
7898 CODEC_ID_H264,
7899 sizeof(H264Context),
7900 decode_init,
7901 NULL,
7902 decode_end,
7903 decode_frame,
7904 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7905 .flush= flush_dpb,
7906 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
7909 #include "svq3.c"