Use golomb_to_pict_type instead of its duplicate, slice_type_map.
[FFMpeg-mirror/ffmpeg-vdpau.git] / libavcodec / h264.c
blob6782d7f076abc3523809d854fe9dd13b2e19e60d
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264.h"
32 #include "h264data.h"
33 #include "h264_parser.h"
34 #include "golomb.h"
35 #include "rectangle.h"
37 #include "cabac.h"
38 #ifdef ARCH_X86
39 #include "i386/h264_i386.h"
40 #endif
42 //#undef NDEBUG
43 #include <assert.h>
45 /**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
71 static VLC run7_vlc;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
84 #else
85 return (a&0xFFFF) + (b<<16);
86 #endif
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
98 {0,1,2,3,7,10,8,11},
99 {2,2,3,3,8,11,8,11},
100 {0,0,1,1,7,10,7,10},
101 {0,2,0,2,7,10,7,10}
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 int * left_block;
110 int topleft_partition= -1;
111 int i;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
117 return;
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
126 if(FRAME_MBAFF){
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
138 if (bottom
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
144 if (bottom
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
154 if (bottom
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
163 if (bottom) {
164 left_block = left_block_options[1];
165 } else {
166 left_block= left_block_options[2];
168 } else {
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
178 if(for_deblock){
179 topleft_type = 0;
180 topright_type = 0;
181 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
186 int list;
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 ref += h->b8_stride;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
201 }else{
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
230 }else{
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
239 }else{
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 }else{
259 int pred;
260 if(!(top_type & type_mask))
261 pred= -1;
262 else{
263 pred= 2;
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
270 for(i=0; i<2; i++){
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 }else{
275 int pred;
276 if(!(left_type[i] & type_mask))
277 pred= -1;
278 else{
279 pred= 2;
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
291 0 . T T. T T T T
292 1 L . .L . . . .
293 2 L . .L . . . .
294 3 . T TL . . . .
295 4 L . .L . . . .
296 5 L . .. . . . .
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
299 if(top_type){
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
311 }else{
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
326 if(left_type[i]){
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
331 }else{
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 if( h->pps.cabac ) {
340 // top_cbp
341 if(top_type) {
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
344 h->top_cbp = 0x1C0;
345 } else {
346 h->top_cbp = 0;
348 // left_cbp
349 if (left_type[0]) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
352 h->left_cbp = 0x1C0;
353 } else {
354 h->left_cbp = 0;
356 if (left_type[0]) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
359 if (left_type[1]) {
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
364 #if 1
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
366 int list;
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
374 continue;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
389 }else{
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
397 for(i=0; i<2; i++){
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
406 }else{
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
415 continue;
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
422 }else{
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
432 }else{
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
438 continue;
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
451 if( h->pps.cabac ) {
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
459 }else{
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
469 }else{
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
477 }else{
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
496 }else{
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
504 else
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
511 else
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
516 if(FRAME_MBAFF){
517 #define MAP_MVS\
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
528 if(MB_FIELD){
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
535 MAP_MVS
536 #undef MAP_F2F
537 }else{
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
544 MAP_MVS
545 #undef MAP_F2F
550 #endif
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
574 int i;
576 if(!(h->top_samples_available&0x8000)){
577 for(i=0; i<4; i++){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
579 if(status<0){
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
581 return -1;
582 } else if(status){
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
590 for(i=0; i<4; i++){
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
593 if(status<0){
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
595 return -1;
596 } else if(status){
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
603 return 0;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
614 if(mode > 6U) {
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
616 return -1;
619 if(!(h->top_samples_available&0x8000)){
620 mode= top[ mode ];
621 if(mode<0){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
623 return -1;
627 if((h->left_samples_available&0x8080) != 0x8080){
628 mode= left[ mode ];
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
632 if(mode<0){
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
634 return -1;
638 return mode;
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
653 else return min;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
684 int i= left + top;
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
690 return i&31;
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
699 if(FRAME_MBAFF){
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 const int16_t *mv;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
705 if(!MB_FIELD
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 if(!MB_FIELD
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 if(MB_FIELD
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 && i >= scan8[0]+8){
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
736 #undef SET_DIAG_MV
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
741 return topright_ref;
742 }else{
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 const int16_t * C;
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
768 /* mv_cache
769 B . . A T T T T
770 U . . L . . , .
771 U . . L . . . .
772 U . . L . . , .
773 . . . L . . . .
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
783 if(left_ref==ref){
784 *mx= A[0];
785 *my= A[1];
786 }else if(top_ref==ref){
787 *mx= B[0];
788 *my= B[1];
789 }else{
790 *mx= C[0];
791 *my= C[1];
793 }else{
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
795 *mx= A[0];
796 *my= A[1];
797 }else{
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 if(n==0){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 if(top_ref == ref){
820 *mx= B[0];
821 *my= B[1];
822 return;
824 }else{
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
830 if(left_ref == ref){
831 *mx= A[0];
832 *my= A[1];
833 return;
837 //RARE
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 if(n==0){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
854 if(left_ref == ref){
855 *mx= A[0];
856 *my= A[1];
857 return;
859 }else{
860 const int16_t * C;
861 int diagonal_ref;
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
868 *mx= C[0];
869 *my= C[1];
870 return;
874 //RARE
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
888 *mx = *my = 0;
889 return;
892 pred_motion(h, 0, 4, 0, 0, mx, my);
894 return;
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
901 return 256;
902 }else{
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
913 int i, field;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
941 if (!interl)
942 poc |= 3;
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
950 if(rfield == field)
951 map[list][old_ref] = cur_ref;
952 break;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
963 int list, j, field;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
981 return;
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
995 int mb_type_col[2];
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1000 int i8, i4;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1010 b8_stride = 0;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1015 goto single_col;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1021 b8_stride *= 3;
1022 b4_stride *= 6;
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1026 && !is_b8x8){
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1029 }else{
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1034 single_col:
1035 mb_type_col[0] =
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1045 }else{
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1056 if(!b8_stride){
1057 if(s->mb_y&1){
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1066 int ref[2];
1067 int mv[2][2];
1068 int list;
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1080 if(ref[list] < 0)
1081 ref[list] = -1;
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1088 }else{
1089 for(list=0; list<2; list++){
1090 if(ref[list] >= 0)
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1092 else
1093 mv[list][0] = mv[list][1] = 0;
1097 if(ref[1] < 0){
1098 if(!is_b8x8)
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1102 if(!is_b8x8)
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1109 int x8 = i8&1;
1110 int y8 = i8>>1;
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1113 int a=0, b=0;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1116 continue;
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1124 if(ref[0] > 0)
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 if(ref[1] > 0)
1127 b= pack16to32(mv[1][0],mv[1][1]);
1128 }else{
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1136 int a=0, b=0;
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1144 if(ref[0] > 0)
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 if(ref[1] > 0)
1147 b= pack16to32(mv[1][0],mv[1][1]);
1148 }else{
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1154 }else{
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1160 continue;
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1168 /* col_zero_flag */
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1176 if(ref[0] == 0)
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1178 if(ref[1] == 0)
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1181 }else
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1185 if(ref[0] == 0)
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1187 if(ref[1] == 0)
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1197 int ref_offset= 0;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1205 ref_offset += 16;
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1214 int ref0, scale;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1218 continue;
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1226 continue;
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1230 if(ref0 >= 0)
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1232 else{
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1234 l1mv= l1mv1;
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1248 return;
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1254 int ref, mv0, mv1;
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1258 ref=mv0=mv1=0;
1259 }else{
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1264 int mv_l0[2];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1267 ref= ref0;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1274 }else{
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1278 int ref0, scale;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1282 continue;
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1289 continue;
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1293 if(ref0 >= 0)
1294 ref0 = map_col_to_list0[0][ref0];
1295 else{
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1297 l1mv= l1mv1;
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1308 }else
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1326 int list;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1332 int y;
1333 if(!USES_LIST(mb_type, list))
1334 continue;
1336 for(y=0; y<4; y++){
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1343 else
1344 for(y=0; y<4; y++){
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1377 int i, si, di;
1378 uint8_t *dst;
1379 int bufidx;
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1385 src++; length--;
1386 #if 0
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1389 #endif
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1394 if(src[i+2]!=3){
1395 /* startcode, so we must be past the end */
1396 length=i;
1398 break;
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1405 return src;
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1412 if (dst == NULL){
1413 return NULL;
1416 //printf("decoding esc\n");
1417 si=di=0;
1418 while(si<length){
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1422 dst[di++]= 0;
1423 dst[di++]= 0;
1424 si+=3;
1425 continue;
1426 }else //next start code
1427 break;
1430 dst[di++]= src[si++];
1433 *dst_length= di;
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1436 return dst;
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1444 int v= *src;
1445 int r;
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1449 for(r=1; r<9; r++){
1450 if(v&1) return r;
1451 v>>=1;
1453 return 0;
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1461 #define stride 16
1462 int i;
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1468 //return;
1469 for(i=0; i<4; i++){
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1476 temp[4*i+0]= z0+z3;
1477 temp[4*i+1]= z1+z2;
1478 temp[4*i+2]= z1-z2;
1479 temp[4*i+3]= z0-z3;
1482 for(i=0; i<4; i++){
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1496 #if 0
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1503 int i;
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1508 for(i=0; i<4; i++){
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1515 temp[4*i+0]= z0+z3;
1516 temp[4*i+1]= z1+z2;
1517 temp[4*i+2]= z1-z2;
1518 temp[4*i+3]= z0-z3;
1521 for(i=0; i<4; i++){
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1534 #endif
1536 #undef xStride
1537 #undef stride
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1542 int a,b,c,d,e;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1549 e= a-b;
1550 a= a+b;
1551 b= c-d;
1552 c= c+d;
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1560 #if 0
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1564 int a,b,c,d,e;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1571 e= a-b;
1572 a= a+b;
1573 b= c-d;
1574 c= c+d;
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1581 #endif
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1591 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1592 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1593 int i;
1594 const int * const quant_table= quant_coeff[qscale];
1595 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1596 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1597 const unsigned int threshold2= (threshold1<<1);
1598 int last_non_zero;
1600 if(separate_dc){
1601 if(qscale<=18){
1602 //avoid overflows
1603 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1604 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1605 const unsigned int dc_threshold2= (dc_threshold1<<1);
1607 int level= block[0]*quant_coeff[qscale+18][0];
1608 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1609 if(level>0){
1610 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1611 block[0]= level;
1612 }else{
1613 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1614 block[0]= -level;
1616 // last_non_zero = i;
1617 }else{
1618 block[0]=0;
1620 }else{
1621 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1622 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1623 const unsigned int dc_threshold2= (dc_threshold1<<1);
1625 int level= block[0]*quant_table[0];
1626 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1627 if(level>0){
1628 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1629 block[0]= level;
1630 }else{
1631 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1632 block[0]= -level;
1634 // last_non_zero = i;
1635 }else{
1636 block[0]=0;
1639 last_non_zero= 0;
1640 i=1;
1641 }else{
1642 last_non_zero= -1;
1643 i=0;
1646 for(; i<16; i++){
1647 const int j= scantable[i];
1648 int level= block[j]*quant_table[j];
1650 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1651 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1652 if(((unsigned)(level+threshold1))>threshold2){
1653 if(level>0){
1654 level= (bias + level)>>QUANT_SHIFT;
1655 block[j]= level;
1656 }else{
1657 level= (bias - level)>>QUANT_SHIFT;
1658 block[j]= -level;
1660 last_non_zero = i;
1661 }else{
1662 block[j]=0;
1666 return last_non_zero;
1669 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1670 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1671 int src_x_offset, int src_y_offset,
1672 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1673 MpegEncContext * const s = &h->s;
1674 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1675 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1676 const int luma_xy= (mx&3) + ((my&3)<<2);
1677 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1678 uint8_t * src_cb, * src_cr;
1679 int extra_width= h->emu_edge_width;
1680 int extra_height= h->emu_edge_height;
1681 int emu=0;
1682 const int full_mx= mx>>2;
1683 const int full_my= my>>2;
1684 const int pic_width = 16*s->mb_width;
1685 const int pic_height = 16*s->mb_height >> MB_FIELD;
1687 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1688 return;
1690 if(mx&7) extra_width -= 3;
1691 if(my&7) extra_height -= 3;
1693 if( full_mx < 0-extra_width
1694 || full_my < 0-extra_height
1695 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1696 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1697 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1698 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1699 emu=1;
1702 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1703 if(!square){
1704 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1707 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1709 if(MB_FIELD){
1710 // chroma offset when predicting from a field of opposite parity
1711 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1712 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1714 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1715 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1717 if(emu){
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1719 src_cb= s->edge_emu_buffer;
1721 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1723 if(emu){
1724 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1725 src_cr= s->edge_emu_buffer;
1727 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1730 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1731 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1732 int x_offset, int y_offset,
1733 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1734 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1735 int list0, int list1){
1736 MpegEncContext * const s = &h->s;
1737 qpel_mc_func *qpix_op= qpix_put;
1738 h264_chroma_mc_func chroma_op= chroma_put;
1740 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1741 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1742 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1743 x_offset += 8*s->mb_x;
1744 y_offset += 8*(s->mb_y >> MB_FIELD);
1746 if(list0){
1747 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1748 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1749 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1750 qpix_op, chroma_op);
1752 qpix_op= qpix_avg;
1753 chroma_op= chroma_avg;
1756 if(list1){
1757 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1758 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1759 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1760 qpix_op, chroma_op);
1764 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1765 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1766 int x_offset, int y_offset,
1767 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1768 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1769 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1770 int list0, int list1){
1771 MpegEncContext * const s = &h->s;
1773 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1774 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1775 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1776 x_offset += 8*s->mb_x;
1777 y_offset += 8*(s->mb_y >> MB_FIELD);
1779 if(list0 && list1){
1780 /* don't optimize for luma-only case, since B-frames usually
1781 * use implicit weights => chroma too. */
1782 uint8_t *tmp_cb = s->obmc_scratchpad;
1783 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1784 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1785 int refn0 = h->ref_cache[0][ scan8[n] ];
1786 int refn1 = h->ref_cache[1][ scan8[n] ];
1788 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1789 dest_y, dest_cb, dest_cr,
1790 x_offset, y_offset, qpix_put, chroma_put);
1791 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1792 tmp_y, tmp_cb, tmp_cr,
1793 x_offset, y_offset, qpix_put, chroma_put);
1795 if(h->use_weight == 2){
1796 int weight0 = h->implicit_weight[refn0][refn1];
1797 int weight1 = 64 - weight0;
1798 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1799 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1800 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1801 }else{
1802 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1803 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1804 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1805 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1806 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1807 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1808 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1809 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1810 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1812 }else{
1813 int list = list1 ? 1 : 0;
1814 int refn = h->ref_cache[list][ scan8[n] ];
1815 Picture *ref= &h->ref_list[list][refn];
1816 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1817 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1818 qpix_put, chroma_put);
1820 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1821 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1822 if(h->use_weight_chroma){
1823 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1824 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1825 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1826 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1831 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1832 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1833 int x_offset, int y_offset,
1834 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1835 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1836 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1837 int list0, int list1){
1838 if((h->use_weight==2 && list0 && list1
1839 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1840 || h->use_weight==1)
1841 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1842 x_offset, y_offset, qpix_put, chroma_put,
1843 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1844 else
1845 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1849 static inline void prefetch_motion(H264Context *h, int list){
1850 /* fetch pixels for estimated mv 4 macroblocks ahead
1851 * optimized for 64byte cache lines */
1852 MpegEncContext * const s = &h->s;
1853 const int refn = h->ref_cache[list][scan8[0]];
1854 if(refn >= 0){
1855 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1856 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1857 uint8_t **src= h->ref_list[list][refn].data;
1858 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1859 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1860 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1861 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1865 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1866 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1867 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1868 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1869 MpegEncContext * const s = &h->s;
1870 const int mb_xy= h->mb_xy;
1871 const int mb_type= s->current_picture.mb_type[mb_xy];
1873 assert(IS_INTER(mb_type));
1875 prefetch_motion(h, 0);
1877 if(IS_16X16(mb_type)){
1878 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1879 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1880 &weight_op[0], &weight_avg[0],
1881 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1882 }else if(IS_16X8(mb_type)){
1883 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1884 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1885 &weight_op[1], &weight_avg[1],
1886 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1887 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1891 }else if(IS_8X16(mb_type)){
1892 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1893 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1894 &weight_op[2], &weight_avg[2],
1895 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1896 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1900 }else{
1901 int i;
1903 assert(IS_8X8(mb_type));
1905 for(i=0; i<4; i++){
1906 const int sub_mb_type= h->sub_mb_type[i];
1907 const int n= 4*i;
1908 int x_offset= (i&1)<<2;
1909 int y_offset= (i&2)<<1;
1911 if(IS_SUB_8X8(sub_mb_type)){
1912 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1913 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1914 &weight_op[3], &weight_avg[3],
1915 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1916 }else if(IS_SUB_8X4(sub_mb_type)){
1917 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1918 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1919 &weight_op[4], &weight_avg[4],
1920 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1921 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 }else if(IS_SUB_4X8(sub_mb_type)){
1926 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1927 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1928 &weight_op[5], &weight_avg[5],
1929 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1930 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1934 }else{
1935 int j;
1936 assert(IS_SUB_4X4(sub_mb_type));
1937 for(j=0; j<4; j++){
1938 int sub_x_offset= x_offset + 2*(j&1);
1939 int sub_y_offset= y_offset + (j&2);
1940 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1941 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1942 &weight_op[6], &weight_avg[6],
1943 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1949 prefetch_motion(h, 1);
1952 static av_cold void decode_init_vlc(void){
1953 static int done = 0;
1955 if (!done) {
1956 int i;
1957 int offset;
1958 done = 1;
1960 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1961 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1962 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1963 &chroma_dc_coeff_token_len [0], 1, 1,
1964 &chroma_dc_coeff_token_bits[0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
1967 offset = 0;
1968 for(i=0; i<4; i++){
1969 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1970 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1971 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1972 &coeff_token_len [i][0], 1, 1,
1973 &coeff_token_bits[i][0], 1, 1,
1974 INIT_VLC_USE_NEW_STATIC);
1975 offset += coeff_token_vlc_tables_size[i];
1978 * This is a one time safety check to make sure that
1979 * the packed static coeff_token_vlc table sizes
1980 * were initialized correctly.
1982 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1984 for(i=0; i<3; i++){
1985 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1986 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1987 init_vlc(&chroma_dc_total_zeros_vlc[i],
1988 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1989 &chroma_dc_total_zeros_len [i][0], 1, 1,
1990 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1991 INIT_VLC_USE_NEW_STATIC);
1993 for(i=0; i<15; i++){
1994 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1995 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1996 init_vlc(&total_zeros_vlc[i],
1997 TOTAL_ZEROS_VLC_BITS, 16,
1998 &total_zeros_len [i][0], 1, 1,
1999 &total_zeros_bits[i][0], 1, 1,
2000 INIT_VLC_USE_NEW_STATIC);
2003 for(i=0; i<6; i++){
2004 run_vlc[i].table = run_vlc_tables[i];
2005 run_vlc[i].table_allocated = run_vlc_tables_size;
2006 init_vlc(&run_vlc[i],
2007 RUN_VLC_BITS, 7,
2008 &run_len [i][0], 1, 1,
2009 &run_bits[i][0], 1, 1,
2010 INIT_VLC_USE_NEW_STATIC);
2012 run7_vlc.table = run7_vlc_table,
2013 run7_vlc.table_allocated = run7_vlc_table_size;
2014 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2015 &run_len [6][0], 1, 1,
2016 &run_bits[6][0], 1, 1,
2017 INIT_VLC_USE_NEW_STATIC);
2021 static void free_tables(H264Context *h){
2022 int i;
2023 H264Context *hx;
2024 av_freep(&h->intra4x4_pred_mode);
2025 av_freep(&h->chroma_pred_mode_table);
2026 av_freep(&h->cbp_table);
2027 av_freep(&h->mvd_table[0]);
2028 av_freep(&h->mvd_table[1]);
2029 av_freep(&h->direct_table);
2030 av_freep(&h->non_zero_count);
2031 av_freep(&h->slice_table_base);
2032 h->slice_table= NULL;
2034 av_freep(&h->mb2b_xy);
2035 av_freep(&h->mb2b8_xy);
2037 for(i = 0; i < h->s.avctx->thread_count; i++) {
2038 hx = h->thread_context[i];
2039 if(!hx) continue;
2040 av_freep(&hx->top_borders[1]);
2041 av_freep(&hx->top_borders[0]);
2042 av_freep(&hx->s.obmc_scratchpad);
2046 static void init_dequant8_coeff_table(H264Context *h){
2047 int i,q,x;
2048 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2049 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2050 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2052 for(i=0; i<2; i++ ){
2053 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2054 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2055 break;
2058 for(q=0; q<52; q++){
2059 int shift = div6[q];
2060 int idx = rem6[q];
2061 for(x=0; x<64; x++)
2062 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2063 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2064 h->pps.scaling_matrix8[i][x]) << shift;
2069 static void init_dequant4_coeff_table(H264Context *h){
2070 int i,j,q,x;
2071 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2072 for(i=0; i<6; i++ ){
2073 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2074 for(j=0; j<i; j++){
2075 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2076 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2077 break;
2080 if(j<i)
2081 continue;
2083 for(q=0; q<52; q++){
2084 int shift = div6[q] + 2;
2085 int idx = rem6[q];
2086 for(x=0; x<16; x++)
2087 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2088 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2089 h->pps.scaling_matrix4[i][x]) << shift;
2094 static void init_dequant_tables(H264Context *h){
2095 int i,x;
2096 init_dequant4_coeff_table(h);
2097 if(h->pps.transform_8x8_mode)
2098 init_dequant8_coeff_table(h);
2099 if(h->sps.transform_bypass){
2100 for(i=0; i<6; i++)
2101 for(x=0; x<16; x++)
2102 h->dequant4_coeff[i][0][x] = 1<<6;
2103 if(h->pps.transform_8x8_mode)
2104 for(i=0; i<2; i++)
2105 for(x=0; x<64; x++)
2106 h->dequant8_coeff[i][0][x] = 1<<6;
2112 * allocates tables.
2113 * needs width/height
2115 static int alloc_tables(H264Context *h){
2116 MpegEncContext * const s = &h->s;
2117 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2118 int x,y;
2120 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2122 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2123 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2124 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2126 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2127 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2128 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2129 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2131 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2132 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2134 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2135 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2136 for(y=0; y<s->mb_height; y++){
2137 for(x=0; x<s->mb_width; x++){
2138 const int mb_xy= x + y*s->mb_stride;
2139 const int b_xy = 4*x + 4*y*h->b_stride;
2140 const int b8_xy= 2*x + 2*y*h->b8_stride;
2142 h->mb2b_xy [mb_xy]= b_xy;
2143 h->mb2b8_xy[mb_xy]= b8_xy;
2147 s->obmc_scratchpad = NULL;
2149 if(!h->dequant4_coeff[0])
2150 init_dequant_tables(h);
2152 return 0;
2153 fail:
2154 free_tables(h);
2155 return -1;
2159 * Mimic alloc_tables(), but for every context thread.
2161 static void clone_tables(H264Context *dst, H264Context *src){
2162 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2163 dst->non_zero_count = src->non_zero_count;
2164 dst->slice_table = src->slice_table;
2165 dst->cbp_table = src->cbp_table;
2166 dst->mb2b_xy = src->mb2b_xy;
2167 dst->mb2b8_xy = src->mb2b8_xy;
2168 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2169 dst->mvd_table[0] = src->mvd_table[0];
2170 dst->mvd_table[1] = src->mvd_table[1];
2171 dst->direct_table = src->direct_table;
2173 dst->s.obmc_scratchpad = NULL;
2174 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2178 * Init context
2179 * Allocate buffers which are not shared amongst multiple threads.
2181 static int context_init(H264Context *h){
2182 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2183 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2185 return 0;
2186 fail:
2187 return -1; // free_tables will clean up for us
2190 static av_cold void common_init(H264Context *h){
2191 MpegEncContext * const s = &h->s;
2193 s->width = s->avctx->width;
2194 s->height = s->avctx->height;
2195 s->codec_id= s->avctx->codec->id;
2197 ff_h264_pred_init(&h->hpc, s->codec_id);
2199 h->dequant_coeff_pps= -1;
2200 s->unrestricted_mv=1;
2201 s->decode=1; //FIXME
2203 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2204 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2207 static av_cold int decode_init(AVCodecContext *avctx){
2208 H264Context *h= avctx->priv_data;
2209 MpegEncContext * const s = &h->s;
2211 MPV_decode_defaults(s);
2213 s->avctx = avctx;
2214 common_init(h);
2216 s->out_format = FMT_H264;
2217 s->workaround_bugs= avctx->workaround_bugs;
2219 // set defaults
2220 // s->decode_mb= ff_h263_decode_mb;
2221 s->quarter_sample = 1;
2222 s->low_delay= 1;
2224 if(avctx->codec_id == CODEC_ID_SVQ3)
2225 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2226 else
2227 avctx->pix_fmt= PIX_FMT_YUV420P;
2229 decode_init_vlc();
2231 if(avctx->extradata_size > 0 && avctx->extradata &&
2232 *(char *)avctx->extradata == 1){
2233 h->is_avc = 1;
2234 h->got_avcC = 0;
2235 } else {
2236 h->is_avc = 0;
2239 h->thread_context[0] = h;
2240 h->outputed_poc = INT_MIN;
2241 h->prev_poc_msb= 1<<16;
2242 return 0;
2245 static int frame_start(H264Context *h){
2246 MpegEncContext * const s = &h->s;
2247 int i;
2249 if(MPV_frame_start(s, s->avctx) < 0)
2250 return -1;
2251 ff_er_frame_start(s);
2253 * MPV_frame_start uses pict_type to derive key_frame.
2254 * This is incorrect for H.264; IDR markings must be used.
2255 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2256 * See decode_nal_units().
2258 s->current_picture_ptr->key_frame= 0;
2260 assert(s->linesize && s->uvlinesize);
2262 for(i=0; i<16; i++){
2263 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2264 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2266 for(i=0; i<4; i++){
2267 h->block_offset[16+i]=
2268 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2269 h->block_offset[24+16+i]=
2270 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2273 /* can't be in alloc_tables because linesize isn't known there.
2274 * FIXME: redo bipred weight to not require extra buffer? */
2275 for(i = 0; i < s->avctx->thread_count; i++)
2276 if(!h->thread_context[i]->s.obmc_scratchpad)
2277 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2279 /* some macroblocks will be accessed before they're available */
2280 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2281 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2283 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2285 // We mark the current picture as non-reference after allocating it, so
2286 // that if we break out due to an error it can be released automatically
2287 // in the next MPV_frame_start().
2288 // SVQ3 as well as most other codecs have only last/next/current and thus
2289 // get released even with set reference, besides SVQ3 and others do not
2290 // mark frames as reference later "naturally".
2291 if(s->codec_id != CODEC_ID_SVQ3)
2292 s->current_picture_ptr->reference= 0;
2294 s->current_picture_ptr->field_poc[0]=
2295 s->current_picture_ptr->field_poc[1]= INT_MAX;
2296 assert(s->current_picture_ptr->long_ref==0);
2298 return 0;
2301 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2302 MpegEncContext * const s = &h->s;
2303 int i;
2304 int step = 1;
2305 int offset = 1;
2306 int uvoffset= 1;
2307 int top_idx = 1;
2308 int skiplast= 0;
2310 src_y -= linesize;
2311 src_cb -= uvlinesize;
2312 src_cr -= uvlinesize;
2314 if(!simple && FRAME_MBAFF){
2315 if(s->mb_y&1){
2316 offset = MB_MBAFF ? 1 : 17;
2317 uvoffset= MB_MBAFF ? 1 : 9;
2318 if(!MB_MBAFF){
2319 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2320 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2321 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2322 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2323 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2326 }else{
2327 if(!MB_MBAFF){
2328 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2329 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2330 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2331 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2333 skiplast= 1;
2335 offset =
2336 uvoffset=
2337 top_idx = MB_MBAFF ? 0 : 1;
2339 step= MB_MBAFF ? 2 : 1;
2342 // There are two lines saved, the line above the the top macroblock of a pair,
2343 // and the line above the bottom macroblock
2344 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2345 for(i=1; i<17 - skiplast; i++){
2346 h->left_border[offset+i*step]= src_y[15+i* linesize];
2349 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2350 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2352 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2353 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2354 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2355 for(i=1; i<9 - skiplast; i++){
2356 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2357 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2359 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2360 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2364 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2365 MpegEncContext * const s = &h->s;
2366 int temp8, i;
2367 uint64_t temp64;
2368 int deblock_left;
2369 int deblock_top;
2370 int mb_xy;
2371 int step = 1;
2372 int offset = 1;
2373 int uvoffset= 1;
2374 int top_idx = 1;
2376 if(!simple && FRAME_MBAFF){
2377 if(s->mb_y&1){
2378 offset = MB_MBAFF ? 1 : 17;
2379 uvoffset= MB_MBAFF ? 1 : 9;
2380 }else{
2381 offset =
2382 uvoffset=
2383 top_idx = MB_MBAFF ? 0 : 1;
2385 step= MB_MBAFF ? 2 : 1;
2388 if(h->deblocking_filter == 2) {
2389 mb_xy = h->mb_xy;
2390 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2391 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2392 } else {
2393 deblock_left = (s->mb_x > 0);
2394 deblock_top = (s->mb_y > !!MB_FIELD);
2397 src_y -= linesize + 1;
2398 src_cb -= uvlinesize + 1;
2399 src_cr -= uvlinesize + 1;
2401 #define XCHG(a,b,t,xchg)\
2402 t= a;\
2403 if(xchg)\
2404 a= b;\
2405 b= t;
2407 if(deblock_left){
2408 for(i = !deblock_top; i<16; i++){
2409 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2411 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2414 if(deblock_top){
2415 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2416 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2417 if(s->mb_x+1 < s->mb_width){
2418 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2422 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2423 if(deblock_left){
2424 for(i = !deblock_top; i<8; i++){
2425 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2426 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2428 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2429 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2431 if(deblock_top){
2432 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2433 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2438 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2439 MpegEncContext * const s = &h->s;
2440 const int mb_x= s->mb_x;
2441 const int mb_y= s->mb_y;
2442 const int mb_xy= h->mb_xy;
2443 const int mb_type= s->current_picture.mb_type[mb_xy];
2444 uint8_t *dest_y, *dest_cb, *dest_cr;
2445 int linesize, uvlinesize /*dct_offset*/;
2446 int i;
2447 int *block_offset = &h->block_offset[0];
2448 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2449 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2450 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2452 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2453 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2454 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2456 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2457 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2459 if (!simple && MB_FIELD) {
2460 linesize = h->mb_linesize = s->linesize * 2;
2461 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2462 block_offset = &h->block_offset[24];
2463 if(mb_y&1){ //FIXME move out of this function?
2464 dest_y -= s->linesize*15;
2465 dest_cb-= s->uvlinesize*7;
2466 dest_cr-= s->uvlinesize*7;
2468 if(FRAME_MBAFF) {
2469 int list;
2470 for(list=0; list<h->list_count; list++){
2471 if(!USES_LIST(mb_type, list))
2472 continue;
2473 if(IS_16X16(mb_type)){
2474 int8_t *ref = &h->ref_cache[list][scan8[0]];
2475 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2476 }else{
2477 for(i=0; i<16; i+=4){
2478 int ref = h->ref_cache[list][scan8[i]];
2479 if(ref >= 0)
2480 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2485 } else {
2486 linesize = h->mb_linesize = s->linesize;
2487 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2488 // dct_offset = s->linesize * 16;
2491 if(transform_bypass){
2492 idct_dc_add =
2493 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2494 }else if(IS_8x8DCT(mb_type)){
2495 idct_dc_add = s->dsp.h264_idct8_dc_add;
2496 idct_add = s->dsp.h264_idct8_add;
2497 }else{
2498 idct_dc_add = s->dsp.h264_idct_dc_add;
2499 idct_add = s->dsp.h264_idct_add;
2502 if (!simple && IS_INTRA_PCM(mb_type)) {
2503 for (i=0; i<16; i++) {
2504 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2506 for (i=0; i<8; i++) {
2507 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2508 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2510 } else {
2511 if(IS_INTRA(mb_type)){
2512 if(h->deblocking_filter)
2513 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2515 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2516 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2517 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2520 if(IS_INTRA4x4(mb_type)){
2521 if(simple || !s->encoding){
2522 if(IS_8x8DCT(mb_type)){
2523 for(i=0; i<16; i+=4){
2524 uint8_t * const ptr= dest_y + block_offset[i];
2525 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2526 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2527 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2528 (h->topright_samples_available<<i)&0x4000, linesize);
2529 if(nnz){
2530 if(nnz == 1 && h->mb[i*16])
2531 idct_dc_add(ptr, h->mb + i*16, linesize);
2532 else
2533 idct_add(ptr, h->mb + i*16, linesize);
2536 }else
2537 for(i=0; i<16; i++){
2538 uint8_t * const ptr= dest_y + block_offset[i];
2539 uint8_t *topright;
2540 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2541 int nnz, tr;
2543 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2544 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2545 assert(mb_y || linesize <= block_offset[i]);
2546 if(!topright_avail){
2547 tr= ptr[3 - linesize]*0x01010101;
2548 topright= (uint8_t*) &tr;
2549 }else
2550 topright= ptr + 4 - linesize;
2551 }else
2552 topright= NULL;
2554 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2555 nnz = h->non_zero_count_cache[ scan8[i] ];
2556 if(nnz){
2557 if(is_h264){
2558 if(nnz == 1 && h->mb[i*16])
2559 idct_dc_add(ptr, h->mb + i*16, linesize);
2560 else
2561 idct_add(ptr, h->mb + i*16, linesize);
2562 }else
2563 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2567 }else{
2568 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2569 if(is_h264){
2570 if(!transform_bypass)
2571 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2572 }else
2573 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2575 if(h->deblocking_filter)
2576 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2577 }else if(is_h264){
2578 hl_motion(h, dest_y, dest_cb, dest_cr,
2579 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2580 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2581 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2585 if(!IS_INTRA4x4(mb_type)){
2586 if(is_h264){
2587 if(IS_INTRA16x16(mb_type)){
2588 for(i=0; i<16; i++){
2589 if(h->non_zero_count_cache[ scan8[i] ])
2590 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2591 else if(h->mb[i*16])
2592 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2594 }else{
2595 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2596 for(i=0; i<16; i+=di){
2597 int nnz = h->non_zero_count_cache[ scan8[i] ];
2598 if(nnz){
2599 if(nnz==1 && h->mb[i*16])
2600 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2601 else
2602 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2606 }else{
2607 for(i=0; i<16; i++){
2608 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2609 uint8_t * const ptr= dest_y + block_offset[i];
2610 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2616 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2617 uint8_t *dest[2] = {dest_cb, dest_cr};
2618 if(transform_bypass){
2619 idct_add = idct_dc_add = s->dsp.add_pixels4;
2620 }else{
2621 idct_add = s->dsp.h264_idct_add;
2622 idct_dc_add = s->dsp.h264_idct_dc_add;
2623 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2624 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2626 if(is_h264){
2627 for(i=16; i<16+8; i++){
2628 if(h->non_zero_count_cache[ scan8[i] ])
2629 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2630 else if(h->mb[i*16])
2631 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2633 }else{
2634 for(i=16; i<16+8; i++){
2635 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2636 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2637 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2643 if(h->deblocking_filter) {
2644 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2645 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2646 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2647 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2648 if (!simple && FRAME_MBAFF) {
2649 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2650 } else {
2651 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2657 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2659 static void hl_decode_mb_simple(H264Context *h){
2660 hl_decode_mb_internal(h, 1);
2664 * Process a macroblock; this handles edge cases, such as interlacing.
2666 static void av_noinline hl_decode_mb_complex(H264Context *h){
2667 hl_decode_mb_internal(h, 0);
2670 static void hl_decode_mb(H264Context *h){
2671 MpegEncContext * const s = &h->s;
2672 const int mb_xy= h->mb_xy;
2673 const int mb_type= s->current_picture.mb_type[mb_xy];
2674 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2675 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2677 if(ENABLE_H264_ENCODER && !s->decode)
2678 return;
2680 if (is_complex)
2681 hl_decode_mb_complex(h);
2682 else hl_decode_mb_simple(h);
2685 static void pic_as_field(Picture *pic, const int parity){
2686 int i;
2687 for (i = 0; i < 4; ++i) {
2688 if (parity == PICT_BOTTOM_FIELD)
2689 pic->data[i] += pic->linesize[i];
2690 pic->reference = parity;
2691 pic->linesize[i] *= 2;
2693 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2696 static int split_field_copy(Picture *dest, Picture *src,
2697 int parity, int id_add){
2698 int match = !!(src->reference & parity);
2700 if (match) {
2701 *dest = *src;
2702 if(parity != PICT_FRAME){
2703 pic_as_field(dest, parity);
2704 dest->pic_id *= 2;
2705 dest->pic_id += id_add;
2709 return match;
2712 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2713 int i[2]={0};
2714 int index=0;
2716 while(i[0]<len || i[1]<len){
2717 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2718 i[0]++;
2719 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2720 i[1]++;
2721 if(i[0] < len){
2722 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2723 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2725 if(i[1] < len){
2726 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2727 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2731 return index;
2734 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2735 int i, best_poc;
2736 int out_i= 0;
2738 for(;;){
2739 best_poc= dir ? INT_MIN : INT_MAX;
2741 for(i=0; i<len; i++){
2742 const int poc= src[i]->poc;
2743 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2744 best_poc= poc;
2745 sorted[out_i]= src[i];
2748 if(best_poc == (dir ? INT_MIN : INT_MAX))
2749 break;
2750 limit= sorted[out_i++]->poc - dir;
2752 return out_i;
2756 * fills the default_ref_list.
2758 static int fill_default_ref_list(H264Context *h){
2759 MpegEncContext * const s = &h->s;
2760 int i, len;
2762 if(h->slice_type_nos==FF_B_TYPE){
2763 Picture *sorted[32];
2764 int cur_poc, list;
2765 int lens[2];
2767 if(FIELD_PICTURE)
2768 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2769 else
2770 cur_poc= s->current_picture_ptr->poc;
2772 for(list= 0; list<2; list++){
2773 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2774 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2775 assert(len<=32);
2776 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2777 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2778 assert(len<=32);
2780 if(len < h->ref_count[list])
2781 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2782 lens[list]= len;
2785 if(lens[0] == lens[1] && lens[1] > 1){
2786 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2787 if(i == lens[0])
2788 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2790 }else{
2791 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2792 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2793 assert(len <= 32);
2794 if(len < h->ref_count[0])
2795 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2797 #ifdef TRACE
2798 for (i=0; i<h->ref_count[0]; i++) {
2799 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2801 if(h->slice_type_nos==FF_B_TYPE){
2802 for (i=0; i<h->ref_count[1]; i++) {
2803 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2806 #endif
2807 return 0;
2810 static void print_short_term(H264Context *h);
2811 static void print_long_term(H264Context *h);
2814 * Extract structure information about the picture described by pic_num in
2815 * the current decoding context (frame or field). Note that pic_num is
2816 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2817 * @param pic_num picture number for which to extract structure information
2818 * @param structure one of PICT_XXX describing structure of picture
2819 * with pic_num
2820 * @return frame number (short term) or long term index of picture
2821 * described by pic_num
2823 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2824 MpegEncContext * const s = &h->s;
2826 *structure = s->picture_structure;
2827 if(FIELD_PICTURE){
2828 if (!(pic_num & 1))
2829 /* opposite field */
2830 *structure ^= PICT_FRAME;
2831 pic_num >>= 1;
2834 return pic_num;
2837 static int decode_ref_pic_list_reordering(H264Context *h){
2838 MpegEncContext * const s = &h->s;
2839 int list, index, pic_structure;
2841 print_short_term(h);
2842 print_long_term(h);
2844 for(list=0; list<h->list_count; list++){
2845 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2847 if(get_bits1(&s->gb)){
2848 int pred= h->curr_pic_num;
2850 for(index=0; ; index++){
2851 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2852 unsigned int pic_id;
2853 int i;
2854 Picture *ref = NULL;
2856 if(reordering_of_pic_nums_idc==3)
2857 break;
2859 if(index >= h->ref_count[list]){
2860 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2861 return -1;
2864 if(reordering_of_pic_nums_idc<3){
2865 if(reordering_of_pic_nums_idc<2){
2866 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2867 int frame_num;
2869 if(abs_diff_pic_num > h->max_pic_num){
2870 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2871 return -1;
2874 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2875 else pred+= abs_diff_pic_num;
2876 pred &= h->max_pic_num - 1;
2878 frame_num = pic_num_extract(h, pred, &pic_structure);
2880 for(i= h->short_ref_count-1; i>=0; i--){
2881 ref = h->short_ref[i];
2882 assert(ref->reference);
2883 assert(!ref->long_ref);
2885 ref->frame_num == frame_num &&
2886 (ref->reference & pic_structure)
2888 break;
2890 if(i>=0)
2891 ref->pic_id= pred;
2892 }else{
2893 int long_idx;
2894 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2896 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2898 if(long_idx>31){
2899 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2900 return -1;
2902 ref = h->long_ref[long_idx];
2903 assert(!(ref && !ref->reference));
2904 if(ref && (ref->reference & pic_structure)){
2905 ref->pic_id= pic_id;
2906 assert(ref->long_ref);
2907 i=0;
2908 }else{
2909 i=-1;
2913 if (i < 0) {
2914 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2915 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2916 } else {
2917 for(i=index; i+1<h->ref_count[list]; i++){
2918 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2919 break;
2921 for(; i > index; i--){
2922 h->ref_list[list][i]= h->ref_list[list][i-1];
2924 h->ref_list[list][index]= *ref;
2925 if (FIELD_PICTURE){
2926 pic_as_field(&h->ref_list[list][index], pic_structure);
2929 }else{
2930 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2931 return -1;
2936 for(list=0; list<h->list_count; list++){
2937 for(index= 0; index < h->ref_count[list]; index++){
2938 if(!h->ref_list[list][index].data[0]){
2939 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2940 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2945 return 0;
2948 static void fill_mbaff_ref_list(H264Context *h){
2949 int list, i, j;
2950 for(list=0; list<2; list++){ //FIXME try list_count
2951 for(i=0; i<h->ref_count[list]; i++){
2952 Picture *frame = &h->ref_list[list][i];
2953 Picture *field = &h->ref_list[list][16+2*i];
2954 field[0] = *frame;
2955 for(j=0; j<3; j++)
2956 field[0].linesize[j] <<= 1;
2957 field[0].reference = PICT_TOP_FIELD;
2958 field[0].poc= field[0].field_poc[0];
2959 field[1] = field[0];
2960 for(j=0; j<3; j++)
2961 field[1].data[j] += frame->linesize[j];
2962 field[1].reference = PICT_BOTTOM_FIELD;
2963 field[1].poc= field[1].field_poc[1];
2965 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2966 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2967 for(j=0; j<2; j++){
2968 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2969 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2973 for(j=0; j<h->ref_count[1]; j++){
2974 for(i=0; i<h->ref_count[0]; i++)
2975 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2976 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2977 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2981 static int pred_weight_table(H264Context *h){
2982 MpegEncContext * const s = &h->s;
2983 int list, i;
2984 int luma_def, chroma_def;
2986 h->use_weight= 0;
2987 h->use_weight_chroma= 0;
2988 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2989 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2990 luma_def = 1<<h->luma_log2_weight_denom;
2991 chroma_def = 1<<h->chroma_log2_weight_denom;
2993 for(list=0; list<2; list++){
2994 for(i=0; i<h->ref_count[list]; i++){
2995 int luma_weight_flag, chroma_weight_flag;
2997 luma_weight_flag= get_bits1(&s->gb);
2998 if(luma_weight_flag){
2999 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3000 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3001 if( h->luma_weight[list][i] != luma_def
3002 || h->luma_offset[list][i] != 0)
3003 h->use_weight= 1;
3004 }else{
3005 h->luma_weight[list][i]= luma_def;
3006 h->luma_offset[list][i]= 0;
3009 if(CHROMA){
3010 chroma_weight_flag= get_bits1(&s->gb);
3011 if(chroma_weight_flag){
3012 int j;
3013 for(j=0; j<2; j++){
3014 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3015 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3016 if( h->chroma_weight[list][i][j] != chroma_def
3017 || h->chroma_offset[list][i][j] != 0)
3018 h->use_weight_chroma= 1;
3020 }else{
3021 int j;
3022 for(j=0; j<2; j++){
3023 h->chroma_weight[list][i][j]= chroma_def;
3024 h->chroma_offset[list][i][j]= 0;
3029 if(h->slice_type_nos != FF_B_TYPE) break;
3031 h->use_weight= h->use_weight || h->use_weight_chroma;
3032 return 0;
3035 static void implicit_weight_table(H264Context *h){
3036 MpegEncContext * const s = &h->s;
3037 int ref0, ref1;
3038 int cur_poc = s->current_picture_ptr->poc;
3040 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3041 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3042 h->use_weight= 0;
3043 h->use_weight_chroma= 0;
3044 return;
3047 h->use_weight= 2;
3048 h->use_weight_chroma= 2;
3049 h->luma_log2_weight_denom= 5;
3050 h->chroma_log2_weight_denom= 5;
3052 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3053 int poc0 = h->ref_list[0][ref0].poc;
3054 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3055 int poc1 = h->ref_list[1][ref1].poc;
3056 int td = av_clip(poc1 - poc0, -128, 127);
3057 if(td){
3058 int tb = av_clip(cur_poc - poc0, -128, 127);
3059 int tx = (16384 + (FFABS(td) >> 1)) / td;
3060 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3061 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3062 h->implicit_weight[ref0][ref1] = 32;
3063 else
3064 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3065 }else
3066 h->implicit_weight[ref0][ref1] = 32;
3072 * Mark a picture as no longer needed for reference. The refmask
3073 * argument allows unreferencing of individual fields or the whole frame.
3074 * If the picture becomes entirely unreferenced, but is being held for
3075 * display purposes, it is marked as such.
3076 * @param refmask mask of fields to unreference; the mask is bitwise
3077 * anded with the reference marking of pic
3078 * @return non-zero if pic becomes entirely unreferenced (except possibly
3079 * for display purposes) zero if one of the fields remains in
3080 * reference
3082 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3083 int i;
3084 if (pic->reference &= refmask) {
3085 return 0;
3086 } else {
3087 for(i = 0; h->delayed_pic[i]; i++)
3088 if(pic == h->delayed_pic[i]){
3089 pic->reference=DELAYED_PIC_REF;
3090 break;
3092 return 1;
3097 * instantaneous decoder refresh.
3099 static void idr(H264Context *h){
3100 int i;
3102 for(i=0; i<16; i++){
3103 remove_long(h, i, 0);
3105 assert(h->long_ref_count==0);
3107 for(i=0; i<h->short_ref_count; i++){
3108 unreference_pic(h, h->short_ref[i], 0);
3109 h->short_ref[i]= NULL;
3111 h->short_ref_count=0;
3112 h->prev_frame_num= 0;
3113 h->prev_frame_num_offset= 0;
3114 h->prev_poc_msb=
3115 h->prev_poc_lsb= 0;
3118 /* forget old pics after a seek */
3119 static void flush_dpb(AVCodecContext *avctx){
3120 H264Context *h= avctx->priv_data;
3121 int i;
3122 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3123 if(h->delayed_pic[i])
3124 h->delayed_pic[i]->reference= 0;
3125 h->delayed_pic[i]= NULL;
3127 h->outputed_poc= INT_MIN;
3128 idr(h);
3129 if(h->s.current_picture_ptr)
3130 h->s.current_picture_ptr->reference= 0;
3131 h->s.first_field= 0;
3132 ff_mpeg_flush(avctx);
3136 * Find a Picture in the short term reference list by frame number.
3137 * @param frame_num frame number to search for
3138 * @param idx the index into h->short_ref where returned picture is found
3139 * undefined if no picture found.
3140 * @return pointer to the found picture, or NULL if no pic with the provided
3141 * frame number is found
3143 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3144 MpegEncContext * const s = &h->s;
3145 int i;
3147 for(i=0; i<h->short_ref_count; i++){
3148 Picture *pic= h->short_ref[i];
3149 if(s->avctx->debug&FF_DEBUG_MMCO)
3150 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3151 if(pic->frame_num == frame_num) {
3152 *idx = i;
3153 return pic;
3156 return NULL;
3160 * Remove a picture from the short term reference list by its index in
3161 * that list. This does no checking on the provided index; it is assumed
3162 * to be valid. Other list entries are shifted down.
3163 * @param i index into h->short_ref of picture to remove.
3165 static void remove_short_at_index(H264Context *h, int i){
3166 assert(i >= 0 && i < h->short_ref_count);
3167 h->short_ref[i]= NULL;
3168 if (--h->short_ref_count)
3169 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3174 * @return the removed picture or NULL if an error occurs
3176 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3177 MpegEncContext * const s = &h->s;
3178 Picture *pic;
3179 int i;
3181 if(s->avctx->debug&FF_DEBUG_MMCO)
3182 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3184 pic = find_short(h, frame_num, &i);
3185 if (pic){
3186 if(unreference_pic(h, pic, ref_mask))
3187 remove_short_at_index(h, i);
3190 return pic;
3194 * Remove a picture from the long term reference list by its index in
3195 * that list.
3196 * @return the removed picture or NULL if an error occurs
3198 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3199 Picture *pic;
3201 pic= h->long_ref[i];
3202 if (pic){
3203 if(unreference_pic(h, pic, ref_mask)){
3204 assert(h->long_ref[i]->long_ref == 1);
3205 h->long_ref[i]->long_ref= 0;
3206 h->long_ref[i]= NULL;
3207 h->long_ref_count--;
3211 return pic;
3215 * print short term list
3217 static void print_short_term(H264Context *h) {
3218 uint32_t i;
3219 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3220 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3221 for(i=0; i<h->short_ref_count; i++){
3222 Picture *pic= h->short_ref[i];
3223 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3229 * print long term list
3231 static void print_long_term(H264Context *h) {
3232 uint32_t i;
3233 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3234 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3235 for(i = 0; i < 16; i++){
3236 Picture *pic= h->long_ref[i];
3237 if (pic) {
3238 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3245 * Executes the reference picture marking (memory management control operations).
3247 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3248 MpegEncContext * const s = &h->s;
3249 int i, j;
3250 int current_ref_assigned=0;
3251 Picture *pic;
3253 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3254 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3256 for(i=0; i<mmco_count; i++){
3257 int structure, frame_num;
3258 if(s->avctx->debug&FF_DEBUG_MMCO)
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3261 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3262 || mmco[i].opcode == MMCO_SHORT2LONG){
3263 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3264 pic = find_short(h, frame_num, &j);
3265 if(!pic){
3266 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3267 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3268 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3269 continue;
3273 switch(mmco[i].opcode){
3274 case MMCO_SHORT2UNUSED:
3275 if(s->avctx->debug&FF_DEBUG_MMCO)
3276 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3277 remove_short(h, frame_num, structure ^ PICT_FRAME);
3278 break;
3279 case MMCO_SHORT2LONG:
3280 if (h->long_ref[mmco[i].long_arg] != pic)
3281 remove_long(h, mmco[i].long_arg, 0);
3283 remove_short_at_index(h, j);
3284 h->long_ref[ mmco[i].long_arg ]= pic;
3285 if (h->long_ref[ mmco[i].long_arg ]){
3286 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3287 h->long_ref_count++;
3289 break;
3290 case MMCO_LONG2UNUSED:
3291 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3292 pic = h->long_ref[j];
3293 if (pic) {
3294 remove_long(h, j, structure ^ PICT_FRAME);
3295 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3296 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3297 break;
3298 case MMCO_LONG:
3299 // Comment below left from previous code as it is an interresting note.
3300 /* First field in pair is in short term list or
3301 * at a different long term index.
3302 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3303 * Report the problem and keep the pair where it is,
3304 * and mark this field valid.
3307 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3308 remove_long(h, mmco[i].long_arg, 0);
3310 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3311 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3312 h->long_ref_count++;
3315 s->current_picture_ptr->reference |= s->picture_structure;
3316 current_ref_assigned=1;
3317 break;
3318 case MMCO_SET_MAX_LONG:
3319 assert(mmco[i].long_arg <= 16);
3320 // just remove the long term which index is greater than new max
3321 for(j = mmco[i].long_arg; j<16; j++){
3322 remove_long(h, j, 0);
3324 break;
3325 case MMCO_RESET:
3326 while(h->short_ref_count){
3327 remove_short(h, h->short_ref[0]->frame_num, 0);
3329 for(j = 0; j < 16; j++) {
3330 remove_long(h, j, 0);
3332 s->current_picture_ptr->poc=
3333 s->current_picture_ptr->field_poc[0]=
3334 s->current_picture_ptr->field_poc[1]=
3335 h->poc_lsb=
3336 h->poc_msb=
3337 h->frame_num=
3338 s->current_picture_ptr->frame_num= 0;
3339 break;
3340 default: assert(0);
3344 if (!current_ref_assigned) {
3345 /* Second field of complementary field pair; the first field of
3346 * which is already referenced. If short referenced, it
3347 * should be first entry in short_ref. If not, it must exist
3348 * in long_ref; trying to put it on the short list here is an
3349 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3351 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3352 /* Just mark the second field valid */
3353 s->current_picture_ptr->reference = PICT_FRAME;
3354 } else if (s->current_picture_ptr->long_ref) {
3355 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3356 "assignment for second field "
3357 "in complementary field pair "
3358 "(first field is long term)\n");
3359 } else {
3360 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3361 if(pic){
3362 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3365 if(h->short_ref_count)
3366 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3368 h->short_ref[0]= s->current_picture_ptr;
3369 h->short_ref_count++;
3370 s->current_picture_ptr->reference |= s->picture_structure;
3374 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3376 /* We have too many reference frames, probably due to corrupted
3377 * stream. Need to discard one frame. Prevents overrun of the
3378 * short_ref and long_ref buffers.
3380 av_log(h->s.avctx, AV_LOG_ERROR,
3381 "number of reference frames exceeds max (probably "
3382 "corrupt input), discarding one\n");
3384 if (h->long_ref_count && !h->short_ref_count) {
3385 for (i = 0; i < 16; ++i)
3386 if (h->long_ref[i])
3387 break;
3389 assert(i < 16);
3390 remove_long(h, i, 0);
3391 } else {
3392 pic = h->short_ref[h->short_ref_count - 1];
3393 remove_short(h, pic->frame_num, 0);
3397 print_short_term(h);
3398 print_long_term(h);
3399 return 0;
3402 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3403 MpegEncContext * const s = &h->s;
3404 int i;
3406 h->mmco_index= 0;
3407 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3408 s->broken_link= get_bits1(gb) -1;
3409 if(get_bits1(gb)){
3410 h->mmco[0].opcode= MMCO_LONG;
3411 h->mmco[0].long_arg= 0;
3412 h->mmco_index= 1;
3414 }else{
3415 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3416 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3417 MMCOOpcode opcode= get_ue_golomb(gb);
3419 h->mmco[i].opcode= opcode;
3420 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3421 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3422 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3423 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3424 return -1;
3427 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3428 unsigned int long_arg= get_ue_golomb(gb);
3429 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3430 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3431 return -1;
3433 h->mmco[i].long_arg= long_arg;
3436 if(opcode > (unsigned)MMCO_LONG){
3437 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3438 return -1;
3440 if(opcode == MMCO_END)
3441 break;
3443 h->mmco_index= i;
3444 }else{
3445 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3447 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3448 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3449 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3450 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3451 h->mmco_index= 1;
3452 if (FIELD_PICTURE) {
3453 h->mmco[0].short_pic_num *= 2;
3454 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3455 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3456 h->mmco_index= 2;
3462 return 0;
3465 static int init_poc(H264Context *h){
3466 MpegEncContext * const s = &h->s;
3467 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3468 int field_poc[2];
3469 Picture *cur = s->current_picture_ptr;
3471 h->frame_num_offset= h->prev_frame_num_offset;
3472 if(h->frame_num < h->prev_frame_num)
3473 h->frame_num_offset += max_frame_num;
3475 if(h->sps.poc_type==0){
3476 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3478 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3479 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3480 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3481 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3482 else
3483 h->poc_msb = h->prev_poc_msb;
3484 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3485 field_poc[0] =
3486 field_poc[1] = h->poc_msb + h->poc_lsb;
3487 if(s->picture_structure == PICT_FRAME)
3488 field_poc[1] += h->delta_poc_bottom;
3489 }else if(h->sps.poc_type==1){
3490 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3491 int i;
3493 if(h->sps.poc_cycle_length != 0)
3494 abs_frame_num = h->frame_num_offset + h->frame_num;
3495 else
3496 abs_frame_num = 0;
3498 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3499 abs_frame_num--;
3501 expected_delta_per_poc_cycle = 0;
3502 for(i=0; i < h->sps.poc_cycle_length; i++)
3503 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3505 if(abs_frame_num > 0){
3506 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3507 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3509 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3510 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3511 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3512 } else
3513 expectedpoc = 0;
3515 if(h->nal_ref_idc == 0)
3516 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3518 field_poc[0] = expectedpoc + h->delta_poc[0];
3519 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3521 if(s->picture_structure == PICT_FRAME)
3522 field_poc[1] += h->delta_poc[1];
3523 }else{
3524 int poc= 2*(h->frame_num_offset + h->frame_num);
3526 if(!h->nal_ref_idc)
3527 poc--;
3529 field_poc[0]= poc;
3530 field_poc[1]= poc;
3533 if(s->picture_structure != PICT_BOTTOM_FIELD)
3534 s->current_picture_ptr->field_poc[0]= field_poc[0];
3535 if(s->picture_structure != PICT_TOP_FIELD)
3536 s->current_picture_ptr->field_poc[1]= field_poc[1];
3537 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3539 return 0;
3544 * initialize scan tables
3546 static void init_scan_tables(H264Context *h){
3547 MpegEncContext * const s = &h->s;
3548 int i;
3549 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3550 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3551 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3552 }else{
3553 for(i=0; i<16; i++){
3554 #define T(x) (x>>2) | ((x<<2) & 0xF)
3555 h->zigzag_scan[i] = T(zigzag_scan[i]);
3556 h-> field_scan[i] = T( field_scan[i]);
3557 #undef T
3560 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3561 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3562 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3563 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3564 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3565 }else{
3566 for(i=0; i<64; i++){
3567 #define T(x) (x>>3) | ((x&7)<<3)
3568 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3569 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3570 h->field_scan8x8[i] = T(field_scan8x8[i]);
3571 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3572 #undef T
3575 if(h->sps.transform_bypass){ //FIXME same ugly
3576 h->zigzag_scan_q0 = zigzag_scan;
3577 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3578 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3579 h->field_scan_q0 = field_scan;
3580 h->field_scan8x8_q0 = field_scan8x8;
3581 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3582 }else{
3583 h->zigzag_scan_q0 = h->zigzag_scan;
3584 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3585 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3586 h->field_scan_q0 = h->field_scan;
3587 h->field_scan8x8_q0 = h->field_scan8x8;
3588 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3593 * Replicates H264 "master" context to thread contexts.
3595 static void clone_slice(H264Context *dst, H264Context *src)
3597 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3598 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3599 dst->s.current_picture = src->s.current_picture;
3600 dst->s.linesize = src->s.linesize;
3601 dst->s.uvlinesize = src->s.uvlinesize;
3602 dst->s.first_field = src->s.first_field;
3604 dst->prev_poc_msb = src->prev_poc_msb;
3605 dst->prev_poc_lsb = src->prev_poc_lsb;
3606 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3607 dst->prev_frame_num = src->prev_frame_num;
3608 dst->short_ref_count = src->short_ref_count;
3610 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3611 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3612 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3613 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3615 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3616 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3620 * decodes a slice header.
3621 * This will also call MPV_common_init() and frame_start() as needed.
3623 * @param h h264context
3624 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3626 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3628 static int decode_slice_header(H264Context *h, H264Context *h0){
3629 MpegEncContext * const s = &h->s;
3630 MpegEncContext * const s0 = &h0->s;
3631 unsigned int first_mb_in_slice;
3632 unsigned int pps_id;
3633 int num_ref_idx_active_override_flag;
3634 unsigned int slice_type, tmp, i, j;
3635 int default_ref_list_done = 0;
3636 int last_pic_structure;
3638 s->dropable= h->nal_ref_idc == 0;
3640 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3641 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3642 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3643 }else{
3644 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3645 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3648 first_mb_in_slice= get_ue_golomb(&s->gb);
3650 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3651 h0->current_slice = 0;
3652 if (!s0->first_field)
3653 s->current_picture_ptr= NULL;
3656 slice_type= get_ue_golomb(&s->gb);
3657 if(slice_type > 9){
3658 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3659 return -1;
3661 if(slice_type > 4){
3662 slice_type -= 5;
3663 h->slice_type_fixed=1;
3664 }else
3665 h->slice_type_fixed=0;
3667 slice_type= golomb_to_pict_type[ slice_type ];
3668 if (slice_type == FF_I_TYPE
3669 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3670 default_ref_list_done = 1;
3672 h->slice_type= slice_type;
3673 h->slice_type_nos= slice_type & 3;
3675 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3676 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3677 av_log(h->s.avctx, AV_LOG_ERROR,
3678 "B picture before any references, skipping\n");
3679 return -1;
3682 pps_id= get_ue_golomb(&s->gb);
3683 if(pps_id>=MAX_PPS_COUNT){
3684 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3685 return -1;
3687 if(!h0->pps_buffers[pps_id]) {
3688 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3689 return -1;
3691 h->pps= *h0->pps_buffers[pps_id];
3693 if(!h0->sps_buffers[h->pps.sps_id]) {
3694 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3695 return -1;
3697 h->sps = *h0->sps_buffers[h->pps.sps_id];
3699 if(h == h0 && h->dequant_coeff_pps != pps_id){
3700 h->dequant_coeff_pps = pps_id;
3701 init_dequant_tables(h);
3704 s->mb_width= h->sps.mb_width;
3705 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3707 h->b_stride= s->mb_width*4;
3708 h->b8_stride= s->mb_width*2;
3710 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3711 if(h->sps.frame_mbs_only_flag)
3712 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3713 else
3714 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3716 if (s->context_initialized
3717 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3718 if(h != h0)
3719 return -1; // width / height changed during parallelized decoding
3720 free_tables(h);
3721 MPV_common_end(s);
3723 if (!s->context_initialized) {
3724 if(h != h0)
3725 return -1; // we cant (re-)initialize context during parallel decoding
3726 if (MPV_common_init(s) < 0)
3727 return -1;
3728 s->first_field = 0;
3730 init_scan_tables(h);
3731 alloc_tables(h);
3733 for(i = 1; i < s->avctx->thread_count; i++) {
3734 H264Context *c;
3735 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3736 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3737 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3738 c->sps = h->sps;
3739 c->pps = h->pps;
3740 init_scan_tables(c);
3741 clone_tables(c, h);
3744 for(i = 0; i < s->avctx->thread_count; i++)
3745 if(context_init(h->thread_context[i]) < 0)
3746 return -1;
3748 s->avctx->width = s->width;
3749 s->avctx->height = s->height;
3750 s->avctx->sample_aspect_ratio= h->sps.sar;
3751 if(!s->avctx->sample_aspect_ratio.den)
3752 s->avctx->sample_aspect_ratio.den = 1;
3754 if(h->sps.timing_info_present_flag){
3755 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3756 if(h->x264_build > 0 && h->x264_build < 44)
3757 s->avctx->time_base.den *= 2;
3758 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3759 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3763 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3765 h->mb_mbaff = 0;
3766 h->mb_aff_frame = 0;
3767 last_pic_structure = s0->picture_structure;
3768 if(h->sps.frame_mbs_only_flag){
3769 s->picture_structure= PICT_FRAME;
3770 }else{
3771 if(get_bits1(&s->gb)) { //field_pic_flag
3772 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3773 } else {
3774 s->picture_structure= PICT_FRAME;
3775 h->mb_aff_frame = h->sps.mb_aff;
3778 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3780 if(h0->current_slice == 0){
3781 while(h->frame_num != h->prev_frame_num &&
3782 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3783 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3784 frame_start(h);
3785 h->prev_frame_num++;
3786 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3787 s->current_picture_ptr->frame_num= h->prev_frame_num;
3788 execute_ref_pic_marking(h, NULL, 0);
3791 /* See if we have a decoded first field looking for a pair... */
3792 if (s0->first_field) {
3793 assert(s0->current_picture_ptr);
3794 assert(s0->current_picture_ptr->data[0]);
3795 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3797 /* figure out if we have a complementary field pair */
3798 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3800 * Previous field is unmatched. Don't display it, but let it
3801 * remain for reference if marked as such.
3803 s0->current_picture_ptr = NULL;
3804 s0->first_field = FIELD_PICTURE;
3806 } else {
3807 if (h->nal_ref_idc &&
3808 s0->current_picture_ptr->reference &&
3809 s0->current_picture_ptr->frame_num != h->frame_num) {
3811 * This and previous field were reference, but had
3812 * different frame_nums. Consider this field first in
3813 * pair. Throw away previous field except for reference
3814 * purposes.
3816 s0->first_field = 1;
3817 s0->current_picture_ptr = NULL;
3819 } else {
3820 /* Second field in complementary pair */
3821 s0->first_field = 0;
3825 } else {
3826 /* Frame or first field in a potentially complementary pair */
3827 assert(!s0->current_picture_ptr);
3828 s0->first_field = FIELD_PICTURE;
3831 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3832 s0->first_field = 0;
3833 return -1;
3836 if(h != h0)
3837 clone_slice(h, h0);
3839 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3841 assert(s->mb_num == s->mb_width * s->mb_height);
3842 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3843 first_mb_in_slice >= s->mb_num){
3844 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3845 return -1;
3847 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3848 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3849 if (s->picture_structure == PICT_BOTTOM_FIELD)
3850 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3851 assert(s->mb_y < s->mb_height);
3853 if(s->picture_structure==PICT_FRAME){
3854 h->curr_pic_num= h->frame_num;
3855 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3856 }else{
3857 h->curr_pic_num= 2*h->frame_num + 1;
3858 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3861 if(h->nal_unit_type == NAL_IDR_SLICE){
3862 get_ue_golomb(&s->gb); /* idr_pic_id */
3865 if(h->sps.poc_type==0){
3866 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3868 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3869 h->delta_poc_bottom= get_se_golomb(&s->gb);
3873 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3874 h->delta_poc[0]= get_se_golomb(&s->gb);
3876 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3877 h->delta_poc[1]= get_se_golomb(&s->gb);
3880 init_poc(h);
3882 if(h->pps.redundant_pic_cnt_present){
3883 h->redundant_pic_count= get_ue_golomb(&s->gb);
3886 //set defaults, might be overridden a few lines later
3887 h->ref_count[0]= h->pps.ref_count[0];
3888 h->ref_count[1]= h->pps.ref_count[1];
3890 if(h->slice_type_nos != FF_I_TYPE){
3891 if(h->slice_type_nos == FF_B_TYPE){
3892 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3894 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3896 if(num_ref_idx_active_override_flag){
3897 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3898 if(h->slice_type_nos==FF_B_TYPE)
3899 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3901 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3902 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3903 h->ref_count[0]= h->ref_count[1]= 1;
3904 return -1;
3907 if(h->slice_type_nos == FF_B_TYPE)
3908 h->list_count= 2;
3909 else
3910 h->list_count= 1;
3911 }else
3912 h->list_count= 0;
3914 if(!default_ref_list_done){
3915 fill_default_ref_list(h);
3918 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3919 return -1;
3921 if(h->slice_type_nos!=FF_I_TYPE){
3922 s->last_picture_ptr= &h->ref_list[0][0];
3923 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3925 if(h->slice_type_nos==FF_B_TYPE){
3926 s->next_picture_ptr= &h->ref_list[1][0];
3927 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3930 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3931 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3932 pred_weight_table(h);
3933 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3934 implicit_weight_table(h);
3935 else
3936 h->use_weight = 0;
3938 if(h->nal_ref_idc)
3939 decode_ref_pic_marking(h0, &s->gb);
3941 if(FRAME_MBAFF)
3942 fill_mbaff_ref_list(h);
3944 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3945 direct_dist_scale_factor(h);
3946 direct_ref_list_init(h);
3948 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3949 tmp = get_ue_golomb(&s->gb);
3950 if(tmp > 2){
3951 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3952 return -1;
3954 h->cabac_init_idc= tmp;
3957 h->last_qscale_diff = 0;
3958 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3959 if(tmp>51){
3960 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3961 return -1;
3963 s->qscale= tmp;
3964 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3965 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3966 //FIXME qscale / qp ... stuff
3967 if(h->slice_type == FF_SP_TYPE){
3968 get_bits1(&s->gb); /* sp_for_switch_flag */
3970 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3971 get_se_golomb(&s->gb); /* slice_qs_delta */
3974 h->deblocking_filter = 1;
3975 h->slice_alpha_c0_offset = 0;
3976 h->slice_beta_offset = 0;
3977 if( h->pps.deblocking_filter_parameters_present ) {
3978 tmp= get_ue_golomb(&s->gb);
3979 if(tmp > 2){
3980 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3981 return -1;
3983 h->deblocking_filter= tmp;
3984 if(h->deblocking_filter < 2)
3985 h->deblocking_filter^= 1; // 1<->0
3987 if( h->deblocking_filter ) {
3988 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3989 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3993 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3994 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3995 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3996 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3997 h->deblocking_filter= 0;
3999 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4000 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4001 /* Cheat slightly for speed:
4002 Do not bother to deblock across slices. */
4003 h->deblocking_filter = 2;
4004 } else {
4005 h0->max_contexts = 1;
4006 if(!h0->single_decode_warning) {
4007 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4008 h0->single_decode_warning = 1;
4010 if(h != h0)
4011 return 1; // deblocking switched inside frame
4015 #if 0 //FMO
4016 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4017 slice_group_change_cycle= get_bits(&s->gb, ?);
4018 #endif
4020 h0->last_slice_type = slice_type;
4021 h->slice_num = ++h0->current_slice;
4022 if(h->slice_num >= MAX_SLICES){
4023 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4026 for(j=0; j<2; j++){
4027 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4028 ref2frm[0]=
4029 ref2frm[1]= -1;
4030 for(i=0; i<16; i++)
4031 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4032 +(h->ref_list[j][i].reference&3);
4033 ref2frm[18+0]=
4034 ref2frm[18+1]= -1;
4035 for(i=16; i<48; i++)
4036 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4037 +(h->ref_list[j][i].reference&3);
4040 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4041 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4043 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4044 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4045 h->slice_num,
4046 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4047 first_mb_in_slice,
4048 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4049 pps_id, h->frame_num,
4050 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4051 h->ref_count[0], h->ref_count[1],
4052 s->qscale,
4053 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4054 h->use_weight,
4055 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4056 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4060 return 0;
4066 static inline int get_level_prefix(GetBitContext *gb){
4067 unsigned int buf;
4068 int log;
4070 OPEN_READER(re, gb);
4071 UPDATE_CACHE(re, gb);
4072 buf=GET_CACHE(re, gb);
4074 log= 32 - av_log2(buf);
4075 #ifdef TRACE
4076 print_bin(buf>>(32-log), log);
4077 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4078 #endif
4080 LAST_SKIP_BITS(re, gb, log);
4081 CLOSE_READER(re, gb);
4083 return log-1;
4086 static inline int get_dct8x8_allowed(H264Context *h){
4087 int i;
4088 for(i=0; i<4; i++){
4089 if(!IS_SUB_8X8(h->sub_mb_type[i])
4090 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4091 return 0;
4093 return 1;
4097 * decodes a residual block.
4098 * @param n block index
4099 * @param scantable scantable
4100 * @param max_coeff number of coefficients in the block
4101 * @return <0 if an error occurred
4103 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4104 MpegEncContext * const s = &h->s;
4105 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4106 int level[16];
4107 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4109 //FIXME put trailing_onex into the context
4111 if(n == CHROMA_DC_BLOCK_INDEX){
4112 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4113 total_coeff= coeff_token>>2;
4114 }else{
4115 if(n == LUMA_DC_BLOCK_INDEX){
4116 total_coeff= pred_non_zero_count(h, 0);
4117 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4118 total_coeff= coeff_token>>2;
4119 }else{
4120 total_coeff= pred_non_zero_count(h, n);
4121 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4122 total_coeff= coeff_token>>2;
4123 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4127 //FIXME set last_non_zero?
4129 if(total_coeff==0)
4130 return 0;
4131 if(total_coeff > (unsigned)max_coeff) {
4132 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4133 return -1;
4136 trailing_ones= coeff_token&3;
4137 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4138 assert(total_coeff<=16);
4140 for(i=0; i<trailing_ones; i++){
4141 level[i]= 1 - 2*get_bits1(gb);
4144 if(i<total_coeff) {
4145 int level_code, mask;
4146 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4147 int prefix= get_level_prefix(gb);
4149 //first coefficient has suffix_length equal to 0 or 1
4150 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4151 if(suffix_length)
4152 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4153 else
4154 level_code= (prefix<<suffix_length); //part
4155 }else if(prefix==14){
4156 if(suffix_length)
4157 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4158 else
4159 level_code= prefix + get_bits(gb, 4); //part
4160 }else{
4161 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4162 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4163 if(prefix>=16)
4164 level_code += (1<<(prefix-3))-4096;
4167 if(trailing_ones < 3) level_code += 2;
4169 suffix_length = 1;
4170 if(level_code > 5)
4171 suffix_length++;
4172 mask= -(level_code&1);
4173 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4174 i++;
4176 //remaining coefficients have suffix_length > 0
4177 for(;i<total_coeff;i++) {
4178 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4179 prefix = get_level_prefix(gb);
4180 if(prefix<15){
4181 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4182 }else{
4183 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4184 if(prefix>=16)
4185 level_code += (1<<(prefix-3))-4096;
4187 mask= -(level_code&1);
4188 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4189 if(level_code > suffix_limit[suffix_length])
4190 suffix_length++;
4194 if(total_coeff == max_coeff)
4195 zeros_left=0;
4196 else{
4197 if(n == CHROMA_DC_BLOCK_INDEX)
4198 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4199 else
4200 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4203 coeff_num = zeros_left + total_coeff - 1;
4204 j = scantable[coeff_num];
4205 if(n > 24){
4206 block[j] = level[0];
4207 for(i=1;i<total_coeff;i++) {
4208 if(zeros_left <= 0)
4209 run_before = 0;
4210 else if(zeros_left < 7){
4211 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4212 }else{
4213 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4215 zeros_left -= run_before;
4216 coeff_num -= 1 + run_before;
4217 j= scantable[ coeff_num ];
4219 block[j]= level[i];
4221 }else{
4222 block[j] = (level[0] * qmul[j] + 32)>>6;
4223 for(i=1;i<total_coeff;i++) {
4224 if(zeros_left <= 0)
4225 run_before = 0;
4226 else if(zeros_left < 7){
4227 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4228 }else{
4229 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4231 zeros_left -= run_before;
4232 coeff_num -= 1 + run_before;
4233 j= scantable[ coeff_num ];
4235 block[j]= (level[i] * qmul[j] + 32)>>6;
4239 if(zeros_left<0){
4240 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4241 return -1;
4244 return 0;
4247 static void predict_field_decoding_flag(H264Context *h){
4248 MpegEncContext * const s = &h->s;
4249 const int mb_xy= h->mb_xy;
4250 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4251 ? s->current_picture.mb_type[mb_xy-1]
4252 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4253 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4254 : 0;
4255 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4259 * decodes a P_SKIP or B_SKIP macroblock
4261 static void decode_mb_skip(H264Context *h){
4262 MpegEncContext * const s = &h->s;
4263 const int mb_xy= h->mb_xy;
4264 int mb_type=0;
4266 memset(h->non_zero_count[mb_xy], 0, 16);
4267 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4269 if(MB_FIELD)
4270 mb_type|= MB_TYPE_INTERLACED;
4272 if( h->slice_type_nos == FF_B_TYPE )
4274 // just for fill_caches. pred_direct_motion will set the real mb_type
4275 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4277 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4278 pred_direct_motion(h, &mb_type);
4279 mb_type|= MB_TYPE_SKIP;
4281 else
4283 int mx, my;
4284 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4286 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4287 pred_pskip_motion(h, &mx, &my);
4288 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4289 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4292 write_back_motion(h, mb_type);
4293 s->current_picture.mb_type[mb_xy]= mb_type;
4294 s->current_picture.qscale_table[mb_xy]= s->qscale;
4295 h->slice_table[ mb_xy ]= h->slice_num;
4296 h->prev_mb_skipped= 1;
4300 * decodes a macroblock
4301 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4303 static int decode_mb_cavlc(H264Context *h){
4304 MpegEncContext * const s = &h->s;
4305 int mb_xy;
4306 int partition_count;
4307 unsigned int mb_type, cbp;
4308 int dct8x8_allowed= h->pps.transform_8x8_mode;
4310 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4312 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4314 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4315 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4316 down the code */
4317 if(h->slice_type_nos != FF_I_TYPE){
4318 if(s->mb_skip_run==-1)
4319 s->mb_skip_run= get_ue_golomb(&s->gb);
4321 if (s->mb_skip_run--) {
4322 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4323 if(s->mb_skip_run==0)
4324 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4325 else
4326 predict_field_decoding_flag(h);
4328 decode_mb_skip(h);
4329 return 0;
4332 if(FRAME_MBAFF){
4333 if( (s->mb_y&1) == 0 )
4334 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4337 h->prev_mb_skipped= 0;
4339 mb_type= get_ue_golomb(&s->gb);
4340 if(h->slice_type_nos == FF_B_TYPE){
4341 if(mb_type < 23){
4342 partition_count= b_mb_type_info[mb_type].partition_count;
4343 mb_type= b_mb_type_info[mb_type].type;
4344 }else{
4345 mb_type -= 23;
4346 goto decode_intra_mb;
4348 }else if(h->slice_type_nos == FF_P_TYPE){
4349 if(mb_type < 5){
4350 partition_count= p_mb_type_info[mb_type].partition_count;
4351 mb_type= p_mb_type_info[mb_type].type;
4352 }else{
4353 mb_type -= 5;
4354 goto decode_intra_mb;
4356 }else{
4357 assert(h->slice_type_nos == FF_I_TYPE);
4358 if(h->slice_type == FF_SI_TYPE && mb_type)
4359 mb_type--;
4360 decode_intra_mb:
4361 if(mb_type > 25){
4362 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4363 return -1;
4365 partition_count=0;
4366 cbp= i_mb_type_info[mb_type].cbp;
4367 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4368 mb_type= i_mb_type_info[mb_type].type;
4371 if(MB_FIELD)
4372 mb_type |= MB_TYPE_INTERLACED;
4374 h->slice_table[ mb_xy ]= h->slice_num;
4376 if(IS_INTRA_PCM(mb_type)){
4377 unsigned int x;
4379 // We assume these blocks are very rare so we do not optimize it.
4380 align_get_bits(&s->gb);
4382 // The pixels are stored in the same order as levels in h->mb array.
4383 for(x=0; x < (CHROMA ? 384 : 256); x++){
4384 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4387 // In deblocking, the quantizer is 0
4388 s->current_picture.qscale_table[mb_xy]= 0;
4389 // All coeffs are present
4390 memset(h->non_zero_count[mb_xy], 16, 16);
4392 s->current_picture.mb_type[mb_xy]= mb_type;
4393 return 0;
4396 if(MB_MBAFF){
4397 h->ref_count[0] <<= 1;
4398 h->ref_count[1] <<= 1;
4401 fill_caches(h, mb_type, 0);
4403 //mb_pred
4404 if(IS_INTRA(mb_type)){
4405 int pred_mode;
4406 // init_top_left_availability(h);
4407 if(IS_INTRA4x4(mb_type)){
4408 int i;
4409 int di = 1;
4410 if(dct8x8_allowed && get_bits1(&s->gb)){
4411 mb_type |= MB_TYPE_8x8DCT;
4412 di = 4;
4415 // fill_intra4x4_pred_table(h);
4416 for(i=0; i<16; i+=di){
4417 int mode= pred_intra_mode(h, i);
4419 if(!get_bits1(&s->gb)){
4420 const int rem_mode= get_bits(&s->gb, 3);
4421 mode = rem_mode + (rem_mode >= mode);
4424 if(di==4)
4425 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4426 else
4427 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4429 write_back_intra_pred_mode(h);
4430 if( check_intra4x4_pred_mode(h) < 0)
4431 return -1;
4432 }else{
4433 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4434 if(h->intra16x16_pred_mode < 0)
4435 return -1;
4437 if(CHROMA){
4438 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4439 if(pred_mode < 0)
4440 return -1;
4441 h->chroma_pred_mode= pred_mode;
4443 }else if(partition_count==4){
4444 int i, j, sub_partition_count[4], list, ref[2][4];
4446 if(h->slice_type_nos == FF_B_TYPE){
4447 for(i=0; i<4; i++){
4448 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4449 if(h->sub_mb_type[i] >=13){
4450 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4451 return -1;
4453 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4454 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4456 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4457 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4458 pred_direct_motion(h, &mb_type);
4459 h->ref_cache[0][scan8[4]] =
4460 h->ref_cache[1][scan8[4]] =
4461 h->ref_cache[0][scan8[12]] =
4462 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4464 }else{
4465 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4466 for(i=0; i<4; i++){
4467 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4468 if(h->sub_mb_type[i] >=4){
4469 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4470 return -1;
4472 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4473 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4477 for(list=0; list<h->list_count; list++){
4478 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4479 for(i=0; i<4; i++){
4480 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4481 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4482 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4483 if(tmp>=ref_count){
4484 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4485 return -1;
4487 ref[list][i]= tmp;
4488 }else{
4489 //FIXME
4490 ref[list][i] = -1;
4495 if(dct8x8_allowed)
4496 dct8x8_allowed = get_dct8x8_allowed(h);
4498 for(list=0; list<h->list_count; list++){
4499 for(i=0; i<4; i++){
4500 if(IS_DIRECT(h->sub_mb_type[i])) {
4501 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4502 continue;
4504 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4505 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4507 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4508 const int sub_mb_type= h->sub_mb_type[i];
4509 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4510 for(j=0; j<sub_partition_count[i]; j++){
4511 int mx, my;
4512 const int index= 4*i + block_width*j;
4513 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4514 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4515 mx += get_se_golomb(&s->gb);
4516 my += get_se_golomb(&s->gb);
4517 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4519 if(IS_SUB_8X8(sub_mb_type)){
4520 mv_cache[ 1 ][0]=
4521 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4522 mv_cache[ 1 ][1]=
4523 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4524 }else if(IS_SUB_8X4(sub_mb_type)){
4525 mv_cache[ 1 ][0]= mx;
4526 mv_cache[ 1 ][1]= my;
4527 }else if(IS_SUB_4X8(sub_mb_type)){
4528 mv_cache[ 8 ][0]= mx;
4529 mv_cache[ 8 ][1]= my;
4531 mv_cache[ 0 ][0]= mx;
4532 mv_cache[ 0 ][1]= my;
4534 }else{
4535 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4536 p[0] = p[1]=
4537 p[8] = p[9]= 0;
4541 }else if(IS_DIRECT(mb_type)){
4542 pred_direct_motion(h, &mb_type);
4543 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4544 }else{
4545 int list, mx, my, i;
4546 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4547 if(IS_16X16(mb_type)){
4548 for(list=0; list<h->list_count; list++){
4549 unsigned int val;
4550 if(IS_DIR(mb_type, 0, list)){
4551 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4552 if(val >= h->ref_count[list]){
4553 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4554 return -1;
4556 }else
4557 val= LIST_NOT_USED&0xFF;
4558 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4560 for(list=0; list<h->list_count; list++){
4561 unsigned int val;
4562 if(IS_DIR(mb_type, 0, list)){
4563 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4564 mx += get_se_golomb(&s->gb);
4565 my += get_se_golomb(&s->gb);
4566 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4568 val= pack16to32(mx,my);
4569 }else
4570 val=0;
4571 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4574 else if(IS_16X8(mb_type)){
4575 for(list=0; list<h->list_count; list++){
4576 for(i=0; i<2; i++){
4577 unsigned int val;
4578 if(IS_DIR(mb_type, i, list)){
4579 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4580 if(val >= h->ref_count[list]){
4581 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4582 return -1;
4584 }else
4585 val= LIST_NOT_USED&0xFF;
4586 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4589 for(list=0; list<h->list_count; list++){
4590 for(i=0; i<2; i++){
4591 unsigned int val;
4592 if(IS_DIR(mb_type, i, list)){
4593 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4594 mx += get_se_golomb(&s->gb);
4595 my += get_se_golomb(&s->gb);
4596 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4598 val= pack16to32(mx,my);
4599 }else
4600 val=0;
4601 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4604 }else{
4605 assert(IS_8X16(mb_type));
4606 for(list=0; list<h->list_count; list++){
4607 for(i=0; i<2; i++){
4608 unsigned int val;
4609 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4610 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4611 if(val >= h->ref_count[list]){
4612 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4613 return -1;
4615 }else
4616 val= LIST_NOT_USED&0xFF;
4617 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4620 for(list=0; list<h->list_count; list++){
4621 for(i=0; i<2; i++){
4622 unsigned int val;
4623 if(IS_DIR(mb_type, i, list)){
4624 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4625 mx += get_se_golomb(&s->gb);
4626 my += get_se_golomb(&s->gb);
4627 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4629 val= pack16to32(mx,my);
4630 }else
4631 val=0;
4632 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4638 if(IS_INTER(mb_type))
4639 write_back_motion(h, mb_type);
4641 if(!IS_INTRA16x16(mb_type)){
4642 cbp= get_ue_golomb(&s->gb);
4643 if(cbp > 47){
4644 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4645 return -1;
4648 if(CHROMA){
4649 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4650 else cbp= golomb_to_inter_cbp [cbp];
4651 }else{
4652 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4653 else cbp= golomb_to_inter_cbp_gray[cbp];
4656 h->cbp = cbp;
4658 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4659 if(get_bits1(&s->gb)){
4660 mb_type |= MB_TYPE_8x8DCT;
4661 h->cbp_table[mb_xy]= cbp;
4664 s->current_picture.mb_type[mb_xy]= mb_type;
4666 if(cbp || IS_INTRA16x16(mb_type)){
4667 int i8x8, i4x4, chroma_idx;
4668 int dquant;
4669 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4670 const uint8_t *scan, *scan8x8, *dc_scan;
4672 // fill_non_zero_count_cache(h);
4674 if(IS_INTERLACED(mb_type)){
4675 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4676 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4677 dc_scan= luma_dc_field_scan;
4678 }else{
4679 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4680 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4681 dc_scan= luma_dc_zigzag_scan;
4684 dquant= get_se_golomb(&s->gb);
4686 if( dquant > 25 || dquant < -26 ){
4687 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4688 return -1;
4691 s->qscale += dquant;
4692 if(((unsigned)s->qscale) > 51){
4693 if(s->qscale<0) s->qscale+= 52;
4694 else s->qscale-= 52;
4697 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4698 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4699 if(IS_INTRA16x16(mb_type)){
4700 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4701 return -1; //FIXME continue if partitioned and other return -1 too
4704 assert((cbp&15) == 0 || (cbp&15) == 15);
4706 if(cbp&15){
4707 for(i8x8=0; i8x8<4; i8x8++){
4708 for(i4x4=0; i4x4<4; i4x4++){
4709 const int index= i4x4 + 4*i8x8;
4710 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4711 return -1;
4715 }else{
4716 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4718 }else{
4719 for(i8x8=0; i8x8<4; i8x8++){
4720 if(cbp & (1<<i8x8)){
4721 if(IS_8x8DCT(mb_type)){
4722 DCTELEM *buf = &h->mb[64*i8x8];
4723 uint8_t *nnz;
4724 for(i4x4=0; i4x4<4; i4x4++){
4725 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4726 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4727 return -1;
4729 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4730 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4731 }else{
4732 for(i4x4=0; i4x4<4; i4x4++){
4733 const int index= i4x4 + 4*i8x8;
4735 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4736 return -1;
4740 }else{
4741 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4742 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4747 if(cbp&0x30){
4748 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4749 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4750 return -1;
4754 if(cbp&0x20){
4755 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4756 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4757 for(i4x4=0; i4x4<4; i4x4++){
4758 const int index= 16 + 4*chroma_idx + i4x4;
4759 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4760 return -1;
4764 }else{
4765 uint8_t * const nnz= &h->non_zero_count_cache[0];
4766 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4767 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4769 }else{
4770 uint8_t * const nnz= &h->non_zero_count_cache[0];
4771 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4772 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4773 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4775 s->current_picture.qscale_table[mb_xy]= s->qscale;
4776 write_back_non_zero_count(h);
4778 if(MB_MBAFF){
4779 h->ref_count[0] >>= 1;
4780 h->ref_count[1] >>= 1;
4783 return 0;
4786 static int decode_cabac_field_decoding_flag(H264Context *h) {
4787 MpegEncContext * const s = &h->s;
4788 const int mb_x = s->mb_x;
4789 const int mb_y = s->mb_y & ~1;
4790 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4791 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4793 unsigned int ctx = 0;
4795 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4796 ctx += 1;
4798 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4799 ctx += 1;
4802 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4805 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4806 uint8_t *state= &h->cabac_state[ctx_base];
4807 int mb_type;
4809 if(intra_slice){
4810 MpegEncContext * const s = &h->s;
4811 const int mba_xy = h->left_mb_xy[0];
4812 const int mbb_xy = h->top_mb_xy;
4813 int ctx=0;
4814 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4815 ctx++;
4816 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4817 ctx++;
4818 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4819 return 0; /* I4x4 */
4820 state += 2;
4821 }else{
4822 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4823 return 0; /* I4x4 */
4826 if( get_cabac_terminate( &h->cabac ) )
4827 return 25; /* PCM */
4829 mb_type = 1; /* I16x16 */
4830 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4831 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4832 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4833 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4834 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4835 return mb_type;
4838 static int decode_cabac_mb_type( H264Context *h ) {
4839 MpegEncContext * const s = &h->s;
4841 if( h->slice_type_nos == FF_I_TYPE ) {
4842 return decode_cabac_intra_mb_type(h, 3, 1);
4843 } else if( h->slice_type_nos == FF_P_TYPE ) {
4844 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4845 /* P-type */
4846 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4847 /* P_L0_D16x16, P_8x8 */
4848 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4849 } else {
4850 /* P_L0_D8x16, P_L0_D16x8 */
4851 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4853 } else {
4854 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4856 } else if( h->slice_type_nos == FF_B_TYPE ) {
4857 const int mba_xy = h->left_mb_xy[0];
4858 const int mbb_xy = h->top_mb_xy;
4859 int ctx = 0;
4860 int bits;
4862 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4863 ctx++;
4864 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4865 ctx++;
4867 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4868 return 0; /* B_Direct_16x16 */
4870 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4871 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4874 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4875 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4876 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4877 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4878 if( bits < 8 )
4879 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4880 else if( bits == 13 ) {
4881 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4882 } else if( bits == 14 )
4883 return 11; /* B_L1_L0_8x16 */
4884 else if( bits == 15 )
4885 return 22; /* B_8x8 */
4887 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4888 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4889 } else {
4890 /* TODO SI/SP frames? */
4891 return -1;
4895 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4896 MpegEncContext * const s = &h->s;
4897 int mba_xy, mbb_xy;
4898 int ctx = 0;
4900 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4901 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4902 mba_xy = mb_xy - 1;
4903 if( (mb_y&1)
4904 && h->slice_table[mba_xy] == h->slice_num
4905 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4906 mba_xy += s->mb_stride;
4907 if( MB_FIELD ){
4908 mbb_xy = mb_xy - s->mb_stride;
4909 if( !(mb_y&1)
4910 && h->slice_table[mbb_xy] == h->slice_num
4911 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4912 mbb_xy -= s->mb_stride;
4913 }else
4914 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4915 }else{
4916 int mb_xy = h->mb_xy;
4917 mba_xy = mb_xy - 1;
4918 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4921 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4922 ctx++;
4923 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4924 ctx++;
4926 if( h->slice_type_nos == FF_B_TYPE )
4927 ctx += 13;
4928 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4931 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4932 int mode = 0;
4934 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4935 return pred_mode;
4937 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4938 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4939 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4941 if( mode >= pred_mode )
4942 return mode + 1;
4943 else
4944 return mode;
4947 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4948 const int mba_xy = h->left_mb_xy[0];
4949 const int mbb_xy = h->top_mb_xy;
4951 int ctx = 0;
4953 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4954 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4955 ctx++;
4957 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4958 ctx++;
4960 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4961 return 0;
4963 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4964 return 1;
4965 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4966 return 2;
4967 else
4968 return 3;
4971 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4972 int cbp_b, cbp_a, ctx, cbp = 0;
4974 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4975 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4977 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4978 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4979 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4980 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4981 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4982 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4983 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4984 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4985 return cbp;
4987 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4988 int ctx;
4989 int cbp_a, cbp_b;
4991 cbp_a = (h->left_cbp>>4)&0x03;
4992 cbp_b = (h-> top_cbp>>4)&0x03;
4994 ctx = 0;
4995 if( cbp_a > 0 ) ctx++;
4996 if( cbp_b > 0 ) ctx += 2;
4997 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4998 return 0;
5000 ctx = 4;
5001 if( cbp_a == 2 ) ctx++;
5002 if( cbp_b == 2 ) ctx += 2;
5003 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5005 static int decode_cabac_mb_dqp( H264Context *h) {
5006 int ctx = 0;
5007 int val = 0;
5009 if( h->last_qscale_diff != 0 )
5010 ctx++;
5012 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5013 if( ctx < 2 )
5014 ctx = 2;
5015 else
5016 ctx = 3;
5017 val++;
5018 if(val > 102) //prevent infinite loop
5019 return INT_MIN;
5022 if( val&0x01 )
5023 return (val + 1)/2;
5024 else
5025 return -(val + 1)/2;
5027 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5028 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5029 return 0; /* 8x8 */
5030 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5031 return 1; /* 8x4 */
5032 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5033 return 2; /* 4x8 */
5034 return 3; /* 4x4 */
5036 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5037 int type;
5038 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5039 return 0; /* B_Direct_8x8 */
5040 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5041 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5042 type = 3;
5043 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5044 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5045 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5046 type += 4;
5048 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5049 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5050 return type;
5053 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5054 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5057 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5058 int refa = h->ref_cache[list][scan8[n] - 1];
5059 int refb = h->ref_cache[list][scan8[n] - 8];
5060 int ref = 0;
5061 int ctx = 0;
5063 if( h->slice_type_nos == FF_B_TYPE) {
5064 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5065 ctx++;
5066 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5067 ctx += 2;
5068 } else {
5069 if( refa > 0 )
5070 ctx++;
5071 if( refb > 0 )
5072 ctx += 2;
5075 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5076 ref++;
5077 if( ctx < 4 )
5078 ctx = 4;
5079 else
5080 ctx = 5;
5081 if(ref >= 32 /*h->ref_list[list]*/){
5082 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5083 return 0; //FIXME we should return -1 and check the return everywhere
5086 return ref;
5089 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5090 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5091 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5092 int ctxbase = (l == 0) ? 40 : 47;
5093 int ctx, mvd;
5095 if( amvd < 3 )
5096 ctx = 0;
5097 else if( amvd > 32 )
5098 ctx = 2;
5099 else
5100 ctx = 1;
5102 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5103 return 0;
5105 mvd= 1;
5106 ctx= 3;
5107 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5108 mvd++;
5109 if( ctx < 6 )
5110 ctx++;
5113 if( mvd >= 9 ) {
5114 int k = 3;
5115 while( get_cabac_bypass( &h->cabac ) ) {
5116 mvd += 1 << k;
5117 k++;
5118 if(k>24){
5119 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5120 return INT_MIN;
5123 while( k-- ) {
5124 if( get_cabac_bypass( &h->cabac ) )
5125 mvd += 1 << k;
5128 return get_cabac_bypass_sign( &h->cabac, -mvd );
5131 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5132 int nza, nzb;
5133 int ctx = 0;
5135 if( is_dc ) {
5136 if( cat == 0 ) {
5137 nza = h->left_cbp&0x100;
5138 nzb = h-> top_cbp&0x100;
5139 } else {
5140 nza = (h->left_cbp>>(6+idx))&0x01;
5141 nzb = (h-> top_cbp>>(6+idx))&0x01;
5143 } else {
5144 if( cat == 4 ) {
5145 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5146 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5147 } else {
5148 assert(cat == 1 || cat == 2);
5149 nza = h->non_zero_count_cache[scan8[idx] - 1];
5150 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5154 if( nza > 0 )
5155 ctx++;
5157 if( nzb > 0 )
5158 ctx += 2;
5160 return ctx + 4 * cat;
5163 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5164 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5165 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5166 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5167 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5170 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5171 static const int significant_coeff_flag_offset[2][6] = {
5172 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5173 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5175 static const int last_coeff_flag_offset[2][6] = {
5176 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5177 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5179 static const int coeff_abs_level_m1_offset[6] = {
5180 227+0, 227+10, 227+20, 227+30, 227+39, 426
5182 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5183 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5184 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5185 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5186 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5187 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5188 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5189 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5190 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5192 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5193 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5194 * map node ctx => cabac ctx for level=1 */
5195 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5196 /* map node ctx => cabac ctx for level>1 */
5197 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5198 static const uint8_t coeff_abs_level_transition[2][8] = {
5199 /* update node ctx after decoding a level=1 */
5200 { 1, 2, 3, 3, 4, 5, 6, 7 },
5201 /* update node ctx after decoding a level>1 */
5202 { 4, 4, 4, 4, 5, 6, 7, 7 }
5205 int index[64];
5207 int av_unused last;
5208 int coeff_count = 0;
5209 int node_ctx = 0;
5211 uint8_t *significant_coeff_ctx_base;
5212 uint8_t *last_coeff_ctx_base;
5213 uint8_t *abs_level_m1_ctx_base;
5215 #ifndef ARCH_X86
5216 #define CABAC_ON_STACK
5217 #endif
5218 #ifdef CABAC_ON_STACK
5219 #define CC &cc
5220 CABACContext cc;
5221 cc.range = h->cabac.range;
5222 cc.low = h->cabac.low;
5223 cc.bytestream= h->cabac.bytestream;
5224 #else
5225 #define CC &h->cabac
5226 #endif
5229 /* cat: 0-> DC 16x16 n = 0
5230 * 1-> AC 16x16 n = luma4x4idx
5231 * 2-> Luma4x4 n = luma4x4idx
5232 * 3-> DC Chroma n = iCbCr
5233 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5234 * 5-> Luma8x8 n = 4 * luma8x8idx
5237 /* read coded block flag */
5238 if( is_dc || cat != 5 ) {
5239 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5240 if( !is_dc ) {
5241 if( cat == 4 )
5242 h->non_zero_count_cache[scan8[16+n]] = 0;
5243 else
5244 h->non_zero_count_cache[scan8[n]] = 0;
5247 #ifdef CABAC_ON_STACK
5248 h->cabac.range = cc.range ;
5249 h->cabac.low = cc.low ;
5250 h->cabac.bytestream= cc.bytestream;
5251 #endif
5252 return;
5256 significant_coeff_ctx_base = h->cabac_state
5257 + significant_coeff_flag_offset[MB_FIELD][cat];
5258 last_coeff_ctx_base = h->cabac_state
5259 + last_coeff_flag_offset[MB_FIELD][cat];
5260 abs_level_m1_ctx_base = h->cabac_state
5261 + coeff_abs_level_m1_offset[cat];
5263 if( !is_dc && cat == 5 ) {
5264 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5265 for(last= 0; last < coefs; last++) { \
5266 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5267 if( get_cabac( CC, sig_ctx )) { \
5268 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5269 index[coeff_count++] = last; \
5270 if( get_cabac( CC, last_ctx ) ) { \
5271 last= max_coeff; \
5272 break; \
5276 if( last == max_coeff -1 ) {\
5277 index[coeff_count++] = last;\
5279 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5280 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5281 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5282 } else {
5283 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5284 #else
5285 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5286 } else {
5287 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5288 #endif
5290 assert(coeff_count > 0);
5292 if( is_dc ) {
5293 if( cat == 0 )
5294 h->cbp_table[h->mb_xy] |= 0x100;
5295 else
5296 h->cbp_table[h->mb_xy] |= 0x40 << n;
5297 } else {
5298 if( cat == 5 )
5299 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5300 else if( cat == 4 )
5301 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5302 else {
5303 assert( cat == 1 || cat == 2 );
5304 h->non_zero_count_cache[scan8[n]] = coeff_count;
5308 do {
5309 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5311 int j= scantable[index[--coeff_count]];
5313 if( get_cabac( CC, ctx ) == 0 ) {
5314 node_ctx = coeff_abs_level_transition[0][node_ctx];
5315 if( is_dc ) {
5316 block[j] = get_cabac_bypass_sign( CC, -1);
5317 }else{
5318 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5320 } else {
5321 int coeff_abs = 2;
5322 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5323 node_ctx = coeff_abs_level_transition[1][node_ctx];
5325 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5326 coeff_abs++;
5329 if( coeff_abs >= 15 ) {
5330 int j = 0;
5331 while( get_cabac_bypass( CC ) ) {
5332 j++;
5335 coeff_abs=1;
5336 while( j-- ) {
5337 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5339 coeff_abs+= 14;
5342 if( is_dc ) {
5343 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5344 }else{
5345 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5348 } while( coeff_count );
5349 #ifdef CABAC_ON_STACK
5350 h->cabac.range = cc.range ;
5351 h->cabac.low = cc.low ;
5352 h->cabac.bytestream= cc.bytestream;
5353 #endif
5357 #ifndef CONFIG_SMALL
5358 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5359 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5362 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5363 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5365 #endif
5367 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5368 #ifdef CONFIG_SMALL
5369 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5370 #else
5371 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5372 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5373 #endif
5376 static inline void compute_mb_neighbors(H264Context *h)
5378 MpegEncContext * const s = &h->s;
5379 const int mb_xy = h->mb_xy;
5380 h->top_mb_xy = mb_xy - s->mb_stride;
5381 h->left_mb_xy[0] = mb_xy - 1;
5382 if(FRAME_MBAFF){
5383 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5384 const int top_pair_xy = pair_xy - s->mb_stride;
5385 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5386 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5387 const int curr_mb_frame_flag = !MB_FIELD;
5388 const int bottom = (s->mb_y & 1);
5389 if (bottom
5390 ? !curr_mb_frame_flag // bottom macroblock
5391 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5393 h->top_mb_xy -= s->mb_stride;
5395 if (left_mb_frame_flag != curr_mb_frame_flag) {
5396 h->left_mb_xy[0] = pair_xy - 1;
5398 } else if (FIELD_PICTURE) {
5399 h->top_mb_xy -= s->mb_stride;
5401 return;
5405 * decodes a macroblock
5406 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5408 static int decode_mb_cabac(H264Context *h) {
5409 MpegEncContext * const s = &h->s;
5410 int mb_xy;
5411 int mb_type, partition_count, cbp = 0;
5412 int dct8x8_allowed= h->pps.transform_8x8_mode;
5414 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5416 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5418 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5419 if( h->slice_type_nos != FF_I_TYPE ) {
5420 int skip;
5421 /* a skipped mb needs the aff flag from the following mb */
5422 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5423 predict_field_decoding_flag(h);
5424 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5425 skip = h->next_mb_skipped;
5426 else
5427 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5428 /* read skip flags */
5429 if( skip ) {
5430 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5431 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5432 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5433 if(h->next_mb_skipped)
5434 predict_field_decoding_flag(h);
5435 else
5436 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5439 decode_mb_skip(h);
5441 h->cbp_table[mb_xy] = 0;
5442 h->chroma_pred_mode_table[mb_xy] = 0;
5443 h->last_qscale_diff = 0;
5445 return 0;
5449 if(FRAME_MBAFF){
5450 if( (s->mb_y&1) == 0 )
5451 h->mb_mbaff =
5452 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5455 h->prev_mb_skipped = 0;
5457 compute_mb_neighbors(h);
5458 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5459 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5460 return -1;
5463 if( h->slice_type_nos == FF_B_TYPE ) {
5464 if( mb_type < 23 ){
5465 partition_count= b_mb_type_info[mb_type].partition_count;
5466 mb_type= b_mb_type_info[mb_type].type;
5467 }else{
5468 mb_type -= 23;
5469 goto decode_intra_mb;
5471 } else if( h->slice_type_nos == FF_P_TYPE ) {
5472 if( mb_type < 5) {
5473 partition_count= p_mb_type_info[mb_type].partition_count;
5474 mb_type= p_mb_type_info[mb_type].type;
5475 } else {
5476 mb_type -= 5;
5477 goto decode_intra_mb;
5479 } else {
5480 if(h->slice_type == FF_SI_TYPE && mb_type)
5481 mb_type--;
5482 assert(h->slice_type_nos == FF_I_TYPE);
5483 decode_intra_mb:
5484 partition_count = 0;
5485 cbp= i_mb_type_info[mb_type].cbp;
5486 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5487 mb_type= i_mb_type_info[mb_type].type;
5489 if(MB_FIELD)
5490 mb_type |= MB_TYPE_INTERLACED;
5492 h->slice_table[ mb_xy ]= h->slice_num;
5494 if(IS_INTRA_PCM(mb_type)) {
5495 const uint8_t *ptr;
5497 // We assume these blocks are very rare so we do not optimize it.
5498 // FIXME The two following lines get the bitstream position in the cabac
5499 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5500 ptr= h->cabac.bytestream;
5501 if(h->cabac.low&0x1) ptr--;
5502 if(CABAC_BITS==16){
5503 if(h->cabac.low&0x1FF) ptr--;
5506 // The pixels are stored in the same order as levels in h->mb array.
5507 memcpy(h->mb, ptr, 256); ptr+=256;
5508 if(CHROMA){
5509 memcpy(h->mb+128, ptr, 128); ptr+=128;
5512 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5514 // All blocks are present
5515 h->cbp_table[mb_xy] = 0x1ef;
5516 h->chroma_pred_mode_table[mb_xy] = 0;
5517 // In deblocking, the quantizer is 0
5518 s->current_picture.qscale_table[mb_xy]= 0;
5519 // All coeffs are present
5520 memset(h->non_zero_count[mb_xy], 16, 16);
5521 s->current_picture.mb_type[mb_xy]= mb_type;
5522 h->last_qscale_diff = 0;
5523 return 0;
5526 if(MB_MBAFF){
5527 h->ref_count[0] <<= 1;
5528 h->ref_count[1] <<= 1;
5531 fill_caches(h, mb_type, 0);
5533 if( IS_INTRA( mb_type ) ) {
5534 int i, pred_mode;
5535 if( IS_INTRA4x4( mb_type ) ) {
5536 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5537 mb_type |= MB_TYPE_8x8DCT;
5538 for( i = 0; i < 16; i+=4 ) {
5539 int pred = pred_intra_mode( h, i );
5540 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5541 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5543 } else {
5544 for( i = 0; i < 16; i++ ) {
5545 int pred = pred_intra_mode( h, i );
5546 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5548 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5551 write_back_intra_pred_mode(h);
5552 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5553 } else {
5554 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5555 if( h->intra16x16_pred_mode < 0 ) return -1;
5557 if(CHROMA){
5558 h->chroma_pred_mode_table[mb_xy] =
5559 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5561 pred_mode= check_intra_pred_mode( h, pred_mode );
5562 if( pred_mode < 0 ) return -1;
5563 h->chroma_pred_mode= pred_mode;
5565 } else if( partition_count == 4 ) {
5566 int i, j, sub_partition_count[4], list, ref[2][4];
5568 if( h->slice_type_nos == FF_B_TYPE ) {
5569 for( i = 0; i < 4; i++ ) {
5570 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5571 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5572 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5574 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5575 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5576 pred_direct_motion(h, &mb_type);
5577 h->ref_cache[0][scan8[4]] =
5578 h->ref_cache[1][scan8[4]] =
5579 h->ref_cache[0][scan8[12]] =
5580 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5581 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5582 for( i = 0; i < 4; i++ )
5583 if( IS_DIRECT(h->sub_mb_type[i]) )
5584 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5587 } else {
5588 for( i = 0; i < 4; i++ ) {
5589 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5590 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5591 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5595 for( list = 0; list < h->list_count; list++ ) {
5596 for( i = 0; i < 4; i++ ) {
5597 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5598 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5599 if( h->ref_count[list] > 1 )
5600 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5601 else
5602 ref[list][i] = 0;
5603 } else {
5604 ref[list][i] = -1;
5606 h->ref_cache[list][ scan8[4*i]+1 ]=
5607 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5611 if(dct8x8_allowed)
5612 dct8x8_allowed = get_dct8x8_allowed(h);
5614 for(list=0; list<h->list_count; list++){
5615 for(i=0; i<4; i++){
5616 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5617 if(IS_DIRECT(h->sub_mb_type[i])){
5618 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5619 continue;
5622 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5623 const int sub_mb_type= h->sub_mb_type[i];
5624 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5625 for(j=0; j<sub_partition_count[i]; j++){
5626 int mpx, mpy;
5627 int mx, my;
5628 const int index= 4*i + block_width*j;
5629 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5630 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5631 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5633 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5634 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5635 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5637 if(IS_SUB_8X8(sub_mb_type)){
5638 mv_cache[ 1 ][0]=
5639 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5640 mv_cache[ 1 ][1]=
5641 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5643 mvd_cache[ 1 ][0]=
5644 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5645 mvd_cache[ 1 ][1]=
5646 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5647 }else if(IS_SUB_8X4(sub_mb_type)){
5648 mv_cache[ 1 ][0]= mx;
5649 mv_cache[ 1 ][1]= my;
5651 mvd_cache[ 1 ][0]= mx - mpx;
5652 mvd_cache[ 1 ][1]= my - mpy;
5653 }else if(IS_SUB_4X8(sub_mb_type)){
5654 mv_cache[ 8 ][0]= mx;
5655 mv_cache[ 8 ][1]= my;
5657 mvd_cache[ 8 ][0]= mx - mpx;
5658 mvd_cache[ 8 ][1]= my - mpy;
5660 mv_cache[ 0 ][0]= mx;
5661 mv_cache[ 0 ][1]= my;
5663 mvd_cache[ 0 ][0]= mx - mpx;
5664 mvd_cache[ 0 ][1]= my - mpy;
5666 }else{
5667 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5668 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5669 p[0] = p[1] = p[8] = p[9] = 0;
5670 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5674 } else if( IS_DIRECT(mb_type) ) {
5675 pred_direct_motion(h, &mb_type);
5676 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5677 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5678 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5679 } else {
5680 int list, mx, my, i, mpx, mpy;
5681 if(IS_16X16(mb_type)){
5682 for(list=0; list<h->list_count; list++){
5683 if(IS_DIR(mb_type, 0, list)){
5684 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5685 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5686 }else
5687 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5689 for(list=0; list<h->list_count; list++){
5690 if(IS_DIR(mb_type, 0, list)){
5691 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5693 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5694 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5695 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5697 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5698 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5699 }else
5700 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5703 else if(IS_16X8(mb_type)){
5704 for(list=0; list<h->list_count; list++){
5705 for(i=0; i<2; i++){
5706 if(IS_DIR(mb_type, i, list)){
5707 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5708 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5709 }else
5710 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5713 for(list=0; list<h->list_count; list++){
5714 for(i=0; i<2; i++){
5715 if(IS_DIR(mb_type, i, list)){
5716 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5717 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5718 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5719 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5721 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5722 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5723 }else{
5724 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5725 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5729 }else{
5730 assert(IS_8X16(mb_type));
5731 for(list=0; list<h->list_count; list++){
5732 for(i=0; i<2; i++){
5733 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5734 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5735 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5736 }else
5737 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5740 for(list=0; list<h->list_count; list++){
5741 for(i=0; i<2; i++){
5742 if(IS_DIR(mb_type, i, list)){
5743 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5744 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5745 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5747 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5748 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5749 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5750 }else{
5751 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5752 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5759 if( IS_INTER( mb_type ) ) {
5760 h->chroma_pred_mode_table[mb_xy] = 0;
5761 write_back_motion( h, mb_type );
5764 if( !IS_INTRA16x16( mb_type ) ) {
5765 cbp = decode_cabac_mb_cbp_luma( h );
5766 if(CHROMA)
5767 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5770 h->cbp_table[mb_xy] = h->cbp = cbp;
5772 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5773 if( decode_cabac_mb_transform_size( h ) )
5774 mb_type |= MB_TYPE_8x8DCT;
5776 s->current_picture.mb_type[mb_xy]= mb_type;
5778 if( cbp || IS_INTRA16x16( mb_type ) ) {
5779 const uint8_t *scan, *scan8x8, *dc_scan;
5780 const uint32_t *qmul;
5781 int dqp;
5783 if(IS_INTERLACED(mb_type)){
5784 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5785 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5786 dc_scan= luma_dc_field_scan;
5787 }else{
5788 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5789 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5790 dc_scan= luma_dc_zigzag_scan;
5793 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5794 if( dqp == INT_MIN ){
5795 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5796 return -1;
5798 s->qscale += dqp;
5799 if(((unsigned)s->qscale) > 51){
5800 if(s->qscale<0) s->qscale+= 52;
5801 else s->qscale-= 52;
5803 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5804 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5806 if( IS_INTRA16x16( mb_type ) ) {
5807 int i;
5808 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5809 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5811 if( cbp&15 ) {
5812 qmul = h->dequant4_coeff[0][s->qscale];
5813 for( i = 0; i < 16; i++ ) {
5814 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5815 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5817 } else {
5818 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5820 } else {
5821 int i8x8, i4x4;
5822 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5823 if( cbp & (1<<i8x8) ) {
5824 if( IS_8x8DCT(mb_type) ) {
5825 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5826 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5827 } else {
5828 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5829 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5830 const int index = 4*i8x8 + i4x4;
5831 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5832 //START_TIMER
5833 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5834 //STOP_TIMER("decode_residual")
5837 } else {
5838 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5839 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5844 if( cbp&0x30 ){
5845 int c;
5846 for( c = 0; c < 2; c++ ) {
5847 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5848 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5852 if( cbp&0x20 ) {
5853 int c, i;
5854 for( c = 0; c < 2; c++ ) {
5855 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5856 for( i = 0; i < 4; i++ ) {
5857 const int index = 16 + 4 * c + i;
5858 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5859 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5862 } else {
5863 uint8_t * const nnz= &h->non_zero_count_cache[0];
5864 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5865 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5867 } else {
5868 uint8_t * const nnz= &h->non_zero_count_cache[0];
5869 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5870 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5871 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5872 h->last_qscale_diff = 0;
5875 s->current_picture.qscale_table[mb_xy]= s->qscale;
5876 write_back_non_zero_count(h);
5878 if(MB_MBAFF){
5879 h->ref_count[0] >>= 1;
5880 h->ref_count[1] >>= 1;
5883 return 0;
5887 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5888 int i, d;
5889 const int index_a = qp + h->slice_alpha_c0_offset;
5890 const int alpha = (alpha_table+52)[index_a];
5891 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5893 if( bS[0] < 4 ) {
5894 int8_t tc[4];
5895 for(i=0; i<4; i++)
5896 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5897 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5898 } else {
5899 /* 16px edge length, because bS=4 is triggered by being at
5900 * the edge of an intra MB, so all 4 bS are the same */
5901 for( d = 0; d < 16; d++ ) {
5902 const int p0 = pix[-1];
5903 const int p1 = pix[-2];
5904 const int p2 = pix[-3];
5906 const int q0 = pix[0];
5907 const int q1 = pix[1];
5908 const int q2 = pix[2];
5910 if( FFABS( p0 - q0 ) < alpha &&
5911 FFABS( p1 - p0 ) < beta &&
5912 FFABS( q1 - q0 ) < beta ) {
5914 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5915 if( FFABS( p2 - p0 ) < beta)
5917 const int p3 = pix[-4];
5918 /* p0', p1', p2' */
5919 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5920 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5921 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5922 } else {
5923 /* p0' */
5924 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5926 if( FFABS( q2 - q0 ) < beta)
5928 const int q3 = pix[3];
5929 /* q0', q1', q2' */
5930 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5931 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5932 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5933 } else {
5934 /* q0' */
5935 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5937 }else{
5938 /* p0', q0' */
5939 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5940 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5942 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5944 pix += stride;
5948 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5949 int i;
5950 const int index_a = qp + h->slice_alpha_c0_offset;
5951 const int alpha = (alpha_table+52)[index_a];
5952 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5954 if( bS[0] < 4 ) {
5955 int8_t tc[4];
5956 for(i=0; i<4; i++)
5957 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5958 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5959 } else {
5960 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5964 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5965 int i;
5966 for( i = 0; i < 16; i++, pix += stride) {
5967 int index_a;
5968 int alpha;
5969 int beta;
5971 int qp_index;
5972 int bS_index = (i >> 1);
5973 if (!MB_FIELD) {
5974 bS_index &= ~1;
5975 bS_index |= (i & 1);
5978 if( bS[bS_index] == 0 ) {
5979 continue;
5982 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5983 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5984 alpha = (alpha_table+52)[index_a];
5985 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5987 if( bS[bS_index] < 4 ) {
5988 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5989 const int p0 = pix[-1];
5990 const int p1 = pix[-2];
5991 const int p2 = pix[-3];
5992 const int q0 = pix[0];
5993 const int q1 = pix[1];
5994 const int q2 = pix[2];
5996 if( FFABS( p0 - q0 ) < alpha &&
5997 FFABS( p1 - p0 ) < beta &&
5998 FFABS( q1 - q0 ) < beta ) {
5999 int tc = tc0;
6000 int i_delta;
6002 if( FFABS( p2 - p0 ) < beta ) {
6003 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6004 tc++;
6006 if( FFABS( q2 - q0 ) < beta ) {
6007 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6008 tc++;
6011 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6012 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6013 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6014 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6016 }else{
6017 const int p0 = pix[-1];
6018 const int p1 = pix[-2];
6019 const int p2 = pix[-3];
6021 const int q0 = pix[0];
6022 const int q1 = pix[1];
6023 const int q2 = pix[2];
6025 if( FFABS( p0 - q0 ) < alpha &&
6026 FFABS( p1 - p0 ) < beta &&
6027 FFABS( q1 - q0 ) < beta ) {
6029 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6030 if( FFABS( p2 - p0 ) < beta)
6032 const int p3 = pix[-4];
6033 /* p0', p1', p2' */
6034 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6035 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6036 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6037 } else {
6038 /* p0' */
6039 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6041 if( FFABS( q2 - q0 ) < beta)
6043 const int q3 = pix[3];
6044 /* q0', q1', q2' */
6045 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6046 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6047 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6048 } else {
6049 /* q0' */
6050 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6052 }else{
6053 /* p0', q0' */
6054 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6055 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6057 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6062 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6063 int i;
6064 for( i = 0; i < 8; i++, pix += stride) {
6065 int index_a;
6066 int alpha;
6067 int beta;
6069 int qp_index;
6070 int bS_index = i;
6072 if( bS[bS_index] == 0 ) {
6073 continue;
6076 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6077 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6078 alpha = (alpha_table+52)[index_a];
6079 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6081 if( bS[bS_index] < 4 ) {
6082 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6083 const int p0 = pix[-1];
6084 const int p1 = pix[-2];
6085 const int q0 = pix[0];
6086 const int q1 = pix[1];
6088 if( FFABS( p0 - q0 ) < alpha &&
6089 FFABS( p1 - p0 ) < beta &&
6090 FFABS( q1 - q0 ) < beta ) {
6091 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6093 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6094 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6095 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6097 }else{
6098 const int p0 = pix[-1];
6099 const int p1 = pix[-2];
6100 const int q0 = pix[0];
6101 const int q1 = pix[1];
6103 if( FFABS( p0 - q0 ) < alpha &&
6104 FFABS( p1 - p0 ) < beta &&
6105 FFABS( q1 - q0 ) < beta ) {
6107 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6108 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6109 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6115 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6116 int i, d;
6117 const int index_a = qp + h->slice_alpha_c0_offset;
6118 const int alpha = (alpha_table+52)[index_a];
6119 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6120 const int pix_next = stride;
6122 if( bS[0] < 4 ) {
6123 int8_t tc[4];
6124 for(i=0; i<4; i++)
6125 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6126 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6127 } else {
6128 /* 16px edge length, see filter_mb_edgev */
6129 for( d = 0; d < 16; d++ ) {
6130 const int p0 = pix[-1*pix_next];
6131 const int p1 = pix[-2*pix_next];
6132 const int p2 = pix[-3*pix_next];
6133 const int q0 = pix[0];
6134 const int q1 = pix[1*pix_next];
6135 const int q2 = pix[2*pix_next];
6137 if( FFABS( p0 - q0 ) < alpha &&
6138 FFABS( p1 - p0 ) < beta &&
6139 FFABS( q1 - q0 ) < beta ) {
6141 const int p3 = pix[-4*pix_next];
6142 const int q3 = pix[ 3*pix_next];
6144 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6145 if( FFABS( p2 - p0 ) < beta) {
6146 /* p0', p1', p2' */
6147 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6148 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6149 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6150 } else {
6151 /* p0' */
6152 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6154 if( FFABS( q2 - q0 ) < beta) {
6155 /* q0', q1', q2' */
6156 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6157 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6158 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6159 } else {
6160 /* q0' */
6161 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6163 }else{
6164 /* p0', q0' */
6165 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6166 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6168 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6170 pix++;
6175 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6176 int i;
6177 const int index_a = qp + h->slice_alpha_c0_offset;
6178 const int alpha = (alpha_table+52)[index_a];
6179 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6181 if( bS[0] < 4 ) {
6182 int8_t tc[4];
6183 for(i=0; i<4; i++)
6184 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6185 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6186 } else {
6187 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6191 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6192 MpegEncContext * const s = &h->s;
6193 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6194 int mb_xy, mb_type;
6195 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6197 mb_xy = h->mb_xy;
6199 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6200 1 ||
6201 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6202 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6203 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6204 return;
6206 assert(!FRAME_MBAFF);
6208 mb_type = s->current_picture.mb_type[mb_xy];
6209 qp = s->current_picture.qscale_table[mb_xy];
6210 qp0 = s->current_picture.qscale_table[mb_xy-1];
6211 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6212 qpc = get_chroma_qp( h, 0, qp );
6213 qpc0 = get_chroma_qp( h, 0, qp0 );
6214 qpc1 = get_chroma_qp( h, 0, qp1 );
6215 qp0 = (qp + qp0 + 1) >> 1;
6216 qp1 = (qp + qp1 + 1) >> 1;
6217 qpc0 = (qpc + qpc0 + 1) >> 1;
6218 qpc1 = (qpc + qpc1 + 1) >> 1;
6219 qp_thresh = 15 - h->slice_alpha_c0_offset;
6220 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6221 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6222 return;
6224 if( IS_INTRA(mb_type) ) {
6225 int16_t bS4[4] = {4,4,4,4};
6226 int16_t bS3[4] = {3,3,3,3};
6227 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6228 if( IS_8x8DCT(mb_type) ) {
6229 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6230 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6231 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6232 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6233 } else {
6234 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6235 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6236 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6237 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6238 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6239 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6240 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6241 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6243 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6244 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6245 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6246 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6247 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6248 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6249 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6250 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6251 return;
6252 } else {
6253 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6254 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6255 int edges;
6256 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6257 edges = 4;
6258 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6259 } else {
6260 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6261 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6262 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6263 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6264 ? 3 : 0;
6265 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6266 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6267 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6268 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6270 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6271 bSv[0][0] = 0x0004000400040004ULL;
6272 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6273 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6275 #define FILTER(hv,dir,edge)\
6276 if(bSv[dir][edge]) {\
6277 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6278 if(!(edge&1)) {\
6279 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6280 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6283 if( edges == 1 ) {
6284 FILTER(v,0,0);
6285 FILTER(h,1,0);
6286 } else if( IS_8x8DCT(mb_type) ) {
6287 FILTER(v,0,0);
6288 FILTER(v,0,2);
6289 FILTER(h,1,0);
6290 FILTER(h,1,2);
6291 } else {
6292 FILTER(v,0,0);
6293 FILTER(v,0,1);
6294 FILTER(v,0,2);
6295 FILTER(v,0,3);
6296 FILTER(h,1,0);
6297 FILTER(h,1,1);
6298 FILTER(h,1,2);
6299 FILTER(h,1,3);
6301 #undef FILTER
6305 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6306 MpegEncContext * const s = &h->s;
6307 const int mb_xy= mb_x + mb_y*s->mb_stride;
6308 const int mb_type = s->current_picture.mb_type[mb_xy];
6309 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6310 int first_vertical_edge_done = 0;
6311 int dir;
6313 //for sufficiently low qp, filtering wouldn't do anything
6314 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6315 if(!FRAME_MBAFF){
6316 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6317 int qp = s->current_picture.qscale_table[mb_xy];
6318 if(qp <= qp_thresh
6319 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6320 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6321 return;
6325 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6326 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6327 int top_type, left_type[2];
6328 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6329 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6330 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6332 if(IS_8x8DCT(top_type)){
6333 h->non_zero_count_cache[4+8*0]=
6334 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6335 h->non_zero_count_cache[6+8*0]=
6336 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6338 if(IS_8x8DCT(left_type[0])){
6339 h->non_zero_count_cache[3+8*1]=
6340 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6342 if(IS_8x8DCT(left_type[1])){
6343 h->non_zero_count_cache[3+8*3]=
6344 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6347 if(IS_8x8DCT(mb_type)){
6348 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6349 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6351 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6352 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6354 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6355 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6357 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6358 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6362 if (FRAME_MBAFF
6363 // left mb is in picture
6364 && h->slice_table[mb_xy-1] != 0xFFFF
6365 // and current and left pair do not have the same interlaced type
6366 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6367 // and left mb is in the same slice if deblocking_filter == 2
6368 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6369 /* First vertical edge is different in MBAFF frames
6370 * There are 8 different bS to compute and 2 different Qp
6372 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6373 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6374 int16_t bS[8];
6375 int qp[2];
6376 int bqp[2];
6377 int rqp[2];
6378 int mb_qp, mbn0_qp, mbn1_qp;
6379 int i;
6380 first_vertical_edge_done = 1;
6382 if( IS_INTRA(mb_type) )
6383 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6384 else {
6385 for( i = 0; i < 8; i++ ) {
6386 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6388 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6389 bS[i] = 4;
6390 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6391 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6392 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6394 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6395 bS[i] = 2;
6396 else
6397 bS[i] = 1;
6401 mb_qp = s->current_picture.qscale_table[mb_xy];
6402 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6403 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6404 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6405 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6406 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6407 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6408 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6409 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6410 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6411 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6412 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6413 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6415 /* Filter edge */
6416 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6417 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6418 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6419 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6420 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6422 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6423 for( dir = 0; dir < 2; dir++ )
6425 int edge;
6426 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6427 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6428 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6429 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6430 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6432 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6433 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6434 // how often to recheck mv-based bS when iterating between edges
6435 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6436 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6437 // how often to recheck mv-based bS when iterating along each edge
6438 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6440 if (first_vertical_edge_done) {
6441 start = 1;
6442 first_vertical_edge_done = 0;
6445 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6446 start = 1;
6448 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6449 && !IS_INTERLACED(mb_type)
6450 && IS_INTERLACED(mbm_type)
6452 // This is a special case in the norm where the filtering must
6453 // be done twice (one each of the field) even if we are in a
6454 // frame macroblock.
6456 static const int nnz_idx[4] = {4,5,6,3};
6457 unsigned int tmp_linesize = 2 * linesize;
6458 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6459 int mbn_xy = mb_xy - 2 * s->mb_stride;
6460 int qp;
6461 int i, j;
6462 int16_t bS[4];
6464 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6465 if( IS_INTRA(mb_type) ||
6466 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6467 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6468 } else {
6469 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6470 for( i = 0; i < 4; i++ ) {
6471 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6472 mbn_nnz[nnz_idx[i]] != 0 )
6473 bS[i] = 2;
6474 else
6475 bS[i] = 1;
6478 // Do not use s->qscale as luma quantizer because it has not the same
6479 // value in IPCM macroblocks.
6480 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6481 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6482 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6483 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6484 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6485 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6486 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6487 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6490 start = 1;
6493 /* Calculate bS */
6494 for( edge = start; edge < edges; edge++ ) {
6495 /* mbn_xy: neighbor macroblock */
6496 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6497 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6498 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6499 int16_t bS[4];
6500 int qp;
6502 if( (edge&1) && IS_8x8DCT(mb_type) )
6503 continue;
6505 if( IS_INTRA(mb_type) ||
6506 IS_INTRA(mbn_type) ) {
6507 int value;
6508 if (edge == 0) {
6509 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6510 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6512 value = 4;
6513 } else {
6514 value = 3;
6516 } else {
6517 value = 3;
6519 bS[0] = bS[1] = bS[2] = bS[3] = value;
6520 } else {
6521 int i, l;
6522 int mv_done;
6524 if( edge & mask_edge ) {
6525 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6526 mv_done = 1;
6528 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6529 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6530 mv_done = 1;
6532 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6533 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6534 int bn_idx= b_idx - (dir ? 8:1);
6535 int v = 0;
6537 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6538 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6539 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6540 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6543 if(h->slice_type_nos == FF_B_TYPE && v){
6544 v=0;
6545 for( l = 0; !v && l < 2; l++ ) {
6546 int ln= 1-l;
6547 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6548 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6549 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6553 bS[0] = bS[1] = bS[2] = bS[3] = v;
6554 mv_done = 1;
6556 else
6557 mv_done = 0;
6559 for( i = 0; i < 4; i++ ) {
6560 int x = dir == 0 ? edge : i;
6561 int y = dir == 0 ? i : edge;
6562 int b_idx= 8 + 4 + x + 8*y;
6563 int bn_idx= b_idx - (dir ? 8:1);
6565 if( h->non_zero_count_cache[b_idx] != 0 ||
6566 h->non_zero_count_cache[bn_idx] != 0 ) {
6567 bS[i] = 2;
6569 else if(!mv_done)
6571 bS[i] = 0;
6572 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6573 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6574 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6575 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6576 bS[i] = 1;
6577 break;
6581 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6582 bS[i] = 0;
6583 for( l = 0; l < 2; l++ ) {
6584 int ln= 1-l;
6585 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6586 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6587 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6588 bS[i] = 1;
6589 break;
6596 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6597 continue;
6600 /* Filter edge */
6601 // Do not use s->qscale as luma quantizer because it has not the same
6602 // value in IPCM macroblocks.
6603 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6604 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6605 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6606 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6607 if( dir == 0 ) {
6608 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6609 if( (edge&1) == 0 ) {
6610 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6611 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6612 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6613 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6615 } else {
6616 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6617 if( (edge&1) == 0 ) {
6618 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6619 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6620 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6621 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6628 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6629 H264Context *h = *(void**)arg;
6630 MpegEncContext * const s = &h->s;
6631 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6633 s->mb_skip_run= -1;
6635 if( h->pps.cabac ) {
6636 int i;
6638 /* realign */
6639 align_get_bits( &s->gb );
6641 /* init cabac */
6642 ff_init_cabac_states( &h->cabac);
6643 ff_init_cabac_decoder( &h->cabac,
6644 s->gb.buffer + get_bits_count(&s->gb)/8,
6645 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6646 /* calculate pre-state */
6647 for( i= 0; i < 460; i++ ) {
6648 int pre;
6649 if( h->slice_type_nos == FF_I_TYPE )
6650 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6651 else
6652 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6654 if( pre <= 63 )
6655 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6656 else
6657 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6660 for(;;){
6661 //START_TIMER
6662 int ret = decode_mb_cabac(h);
6663 int eos;
6664 //STOP_TIMER("decode_mb_cabac")
6666 if(ret>=0) hl_decode_mb(h);
6668 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6669 s->mb_y++;
6671 if(ret>=0) ret = decode_mb_cabac(h);
6673 if(ret>=0) hl_decode_mb(h);
6674 s->mb_y--;
6676 eos = get_cabac_terminate( &h->cabac );
6678 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6679 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6680 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6681 return -1;
6684 if( ++s->mb_x >= s->mb_width ) {
6685 s->mb_x = 0;
6686 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6687 ++s->mb_y;
6688 if(FIELD_OR_MBAFF_PICTURE) {
6689 ++s->mb_y;
6693 if( eos || s->mb_y >= s->mb_height ) {
6694 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6695 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6696 return 0;
6700 } else {
6701 for(;;){
6702 int ret = decode_mb_cavlc(h);
6704 if(ret>=0) hl_decode_mb(h);
6706 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6707 s->mb_y++;
6708 ret = decode_mb_cavlc(h);
6710 if(ret>=0) hl_decode_mb(h);
6711 s->mb_y--;
6714 if(ret<0){
6715 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6716 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6718 return -1;
6721 if(++s->mb_x >= s->mb_width){
6722 s->mb_x=0;
6723 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6724 ++s->mb_y;
6725 if(FIELD_OR_MBAFF_PICTURE) {
6726 ++s->mb_y;
6728 if(s->mb_y >= s->mb_height){
6729 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6731 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6732 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6734 return 0;
6735 }else{
6736 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6738 return -1;
6743 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6744 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6745 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6746 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6748 return 0;
6749 }else{
6750 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6752 return -1;
6758 #if 0
6759 for(;s->mb_y < s->mb_height; s->mb_y++){
6760 for(;s->mb_x < s->mb_width; s->mb_x++){
6761 int ret= decode_mb(h);
6763 hl_decode_mb(h);
6765 if(ret<0){
6766 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6767 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6769 return -1;
6772 if(++s->mb_x >= s->mb_width){
6773 s->mb_x=0;
6774 if(++s->mb_y >= s->mb_height){
6775 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6776 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6778 return 0;
6779 }else{
6780 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6782 return -1;
6787 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6788 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6789 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6791 return 0;
6792 }else{
6793 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6795 return -1;
6799 s->mb_x=0;
6800 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6802 #endif
6803 return -1; //not reached
6806 static int decode_picture_timing(H264Context *h){
6807 MpegEncContext * const s = &h->s;
6808 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6809 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6810 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6812 if(h->sps.pic_struct_present_flag){
6813 unsigned int i, num_clock_ts;
6814 h->sei_pic_struct = get_bits(&s->gb, 4);
6816 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6817 return -1;
6819 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6821 for (i = 0 ; i < num_clock_ts ; i++){
6822 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6823 unsigned int full_timestamp_flag;
6824 skip_bits(&s->gb, 2); /* ct_type */
6825 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6826 skip_bits(&s->gb, 5); /* counting_type */
6827 full_timestamp_flag = get_bits(&s->gb, 1);
6828 skip_bits(&s->gb, 1); /* discontinuity_flag */
6829 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6830 skip_bits(&s->gb, 8); /* n_frames */
6831 if(full_timestamp_flag){
6832 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6833 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6834 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6835 }else{
6836 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6837 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6838 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6839 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6840 if(get_bits(&s->gb, 1)) /* hours_flag */
6841 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6845 if(h->sps.time_offset_length > 0)
6846 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6850 return 0;
6853 static int decode_unregistered_user_data(H264Context *h, int size){
6854 MpegEncContext * const s = &h->s;
6855 uint8_t user_data[16+256];
6856 int e, build, i;
6858 if(size<16)
6859 return -1;
6861 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6862 user_data[i]= get_bits(&s->gb, 8);
6865 user_data[i]= 0;
6866 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6867 if(e==1 && build>=0)
6868 h->x264_build= build;
6870 if(s->avctx->debug & FF_DEBUG_BUGS)
6871 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6873 for(; i<size; i++)
6874 skip_bits(&s->gb, 8);
6876 return 0;
6879 static int decode_sei(H264Context *h){
6880 MpegEncContext * const s = &h->s;
6882 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6883 int size, type;
6885 type=0;
6887 type+= show_bits(&s->gb, 8);
6888 }while(get_bits(&s->gb, 8) == 255);
6890 size=0;
6892 size+= show_bits(&s->gb, 8);
6893 }while(get_bits(&s->gb, 8) == 255);
6895 switch(type){
6896 case 1: // Picture timing SEI
6897 if(decode_picture_timing(h) < 0)
6898 return -1;
6899 break;
6900 case 5:
6901 if(decode_unregistered_user_data(h, size) < 0)
6902 return -1;
6903 break;
6904 default:
6905 skip_bits(&s->gb, 8*size);
6908 //FIXME check bits here
6909 align_get_bits(&s->gb);
6912 return 0;
6915 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6916 MpegEncContext * const s = &h->s;
6917 int cpb_count, i;
6918 cpb_count = get_ue_golomb(&s->gb) + 1;
6919 get_bits(&s->gb, 4); /* bit_rate_scale */
6920 get_bits(&s->gb, 4); /* cpb_size_scale */
6921 for(i=0; i<cpb_count; i++){
6922 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6923 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6924 get_bits1(&s->gb); /* cbr_flag */
6926 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6927 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6928 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6929 sps->time_offset_length = get_bits(&s->gb, 5);
6932 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6933 MpegEncContext * const s = &h->s;
6934 int aspect_ratio_info_present_flag;
6935 unsigned int aspect_ratio_idc;
6937 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6939 if( aspect_ratio_info_present_flag ) {
6940 aspect_ratio_idc= get_bits(&s->gb, 8);
6941 if( aspect_ratio_idc == EXTENDED_SAR ) {
6942 sps->sar.num= get_bits(&s->gb, 16);
6943 sps->sar.den= get_bits(&s->gb, 16);
6944 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6945 sps->sar= pixel_aspect[aspect_ratio_idc];
6946 }else{
6947 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6948 return -1;
6950 }else{
6951 sps->sar.num=
6952 sps->sar.den= 0;
6954 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6956 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6957 get_bits1(&s->gb); /* overscan_appropriate_flag */
6960 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6961 get_bits(&s->gb, 3); /* video_format */
6962 get_bits1(&s->gb); /* video_full_range_flag */
6963 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6964 get_bits(&s->gb, 8); /* colour_primaries */
6965 get_bits(&s->gb, 8); /* transfer_characteristics */
6966 get_bits(&s->gb, 8); /* matrix_coefficients */
6970 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6971 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6972 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6975 sps->timing_info_present_flag = get_bits1(&s->gb);
6976 if(sps->timing_info_present_flag){
6977 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6978 sps->time_scale = get_bits_long(&s->gb, 32);
6979 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6982 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6983 if(sps->nal_hrd_parameters_present_flag)
6984 decode_hrd_parameters(h, sps);
6985 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6986 if(sps->vcl_hrd_parameters_present_flag)
6987 decode_hrd_parameters(h, sps);
6988 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
6989 get_bits1(&s->gb); /* low_delay_hrd_flag */
6990 sps->pic_struct_present_flag = get_bits1(&s->gb);
6992 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6993 if(sps->bitstream_restriction_flag){
6994 unsigned int num_reorder_frames;
6995 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6996 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6997 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6998 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6999 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7000 num_reorder_frames= get_ue_golomb(&s->gb);
7001 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7003 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7004 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7005 return -1;
7008 sps->num_reorder_frames= num_reorder_frames;
7011 return 0;
7014 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7015 const uint8_t *jvt_list, const uint8_t *fallback_list){
7016 MpegEncContext * const s = &h->s;
7017 int i, last = 8, next = 8;
7018 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7019 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7020 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7021 else
7022 for(i=0;i<size;i++){
7023 if(next)
7024 next = (last + get_se_golomb(&s->gb)) & 0xff;
7025 if(!i && !next){ /* matrix not written, we use the preset one */
7026 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7027 break;
7029 last = factors[scan[i]] = next ? next : last;
7033 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7034 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7035 MpegEncContext * const s = &h->s;
7036 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7037 const uint8_t *fallback[4] = {
7038 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7039 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7040 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7041 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7043 if(get_bits1(&s->gb)){
7044 sps->scaling_matrix_present |= is_sps;
7045 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7046 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7047 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7048 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7049 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7050 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7051 if(is_sps || pps->transform_8x8_mode){
7052 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7053 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7059 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7061 static void *
7062 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7063 const size_t size, const char *name)
7065 if(id>=max) {
7066 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7067 return NULL;
7070 if(!vec[id]) {
7071 vec[id] = av_mallocz(size);
7072 if(vec[id] == NULL)
7073 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7075 return vec[id];
7078 static inline int decode_seq_parameter_set(H264Context *h){
7079 MpegEncContext * const s = &h->s;
7080 int profile_idc, level_idc;
7081 unsigned int sps_id, tmp, mb_width, mb_height;
7082 int i;
7083 SPS *sps;
7085 profile_idc= get_bits(&s->gb, 8);
7086 get_bits1(&s->gb); //constraint_set0_flag
7087 get_bits1(&s->gb); //constraint_set1_flag
7088 get_bits1(&s->gb); //constraint_set2_flag
7089 get_bits1(&s->gb); //constraint_set3_flag
7090 get_bits(&s->gb, 4); // reserved
7091 level_idc= get_bits(&s->gb, 8);
7092 sps_id= get_ue_golomb(&s->gb);
7094 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7095 if(sps == NULL)
7096 return -1;
7098 sps->profile_idc= profile_idc;
7099 sps->level_idc= level_idc;
7101 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7102 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7103 sps->scaling_matrix_present = 0;
7105 if(sps->profile_idc >= 100){ //high profile
7106 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7107 if(sps->chroma_format_idc == 3)
7108 get_bits1(&s->gb); //residual_color_transform_flag
7109 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7110 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7111 sps->transform_bypass = get_bits1(&s->gb);
7112 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7113 }else{
7114 sps->chroma_format_idc= 1;
7117 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7118 sps->poc_type= get_ue_golomb(&s->gb);
7120 if(sps->poc_type == 0){ //FIXME #define
7121 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7122 } else if(sps->poc_type == 1){//FIXME #define
7123 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7124 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7125 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7126 tmp= get_ue_golomb(&s->gb);
7128 if(tmp >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7129 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7130 return -1;
7132 sps->poc_cycle_length= tmp;
7134 for(i=0; i<sps->poc_cycle_length; i++)
7135 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7136 }else if(sps->poc_type != 2){
7137 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7138 return -1;
7141 tmp= get_ue_golomb(&s->gb);
7142 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7143 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7144 return -1;
7146 sps->ref_frame_count= tmp;
7147 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7148 mb_width= get_ue_golomb(&s->gb) + 1;
7149 mb_height= get_ue_golomb(&s->gb) + 1;
7150 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7151 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7152 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7153 return -1;
7155 sps->mb_width = mb_width;
7156 sps->mb_height= mb_height;
7158 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7159 if(!sps->frame_mbs_only_flag)
7160 sps->mb_aff= get_bits1(&s->gb);
7161 else
7162 sps->mb_aff= 0;
7164 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7166 #ifndef ALLOW_INTERLACE
7167 if(sps->mb_aff)
7168 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7169 #endif
7170 sps->crop= get_bits1(&s->gb);
7171 if(sps->crop){
7172 sps->crop_left = get_ue_golomb(&s->gb);
7173 sps->crop_right = get_ue_golomb(&s->gb);
7174 sps->crop_top = get_ue_golomb(&s->gb);
7175 sps->crop_bottom= get_ue_golomb(&s->gb);
7176 if(sps->crop_left || sps->crop_top){
7177 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7179 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7180 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7182 }else{
7183 sps->crop_left =
7184 sps->crop_right =
7185 sps->crop_top =
7186 sps->crop_bottom= 0;
7189 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7190 if( sps->vui_parameters_present_flag )
7191 decode_vui_parameters(h, sps);
7193 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7194 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7195 sps_id, sps->profile_idc, sps->level_idc,
7196 sps->poc_type,
7197 sps->ref_frame_count,
7198 sps->mb_width, sps->mb_height,
7199 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7200 sps->direct_8x8_inference_flag ? "8B8" : "",
7201 sps->crop_left, sps->crop_right,
7202 sps->crop_top, sps->crop_bottom,
7203 sps->vui_parameters_present_flag ? "VUI" : "",
7204 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7207 return 0;
7210 static void
7211 build_qp_table(PPS *pps, int t, int index)
7213 int i;
7214 for(i = 0; i < 52; i++)
7215 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7218 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7219 MpegEncContext * const s = &h->s;
7220 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7221 PPS *pps;
7223 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7224 if(pps == NULL)
7225 return -1;
7227 tmp= get_ue_golomb(&s->gb);
7228 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7229 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7230 return -1;
7232 pps->sps_id= tmp;
7234 pps->cabac= get_bits1(&s->gb);
7235 pps->pic_order_present= get_bits1(&s->gb);
7236 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7237 if(pps->slice_group_count > 1 ){
7238 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7239 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7240 switch(pps->mb_slice_group_map_type){
7241 case 0:
7242 #if 0
7243 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7244 | run_length[ i ] |1 |ue(v) |
7245 #endif
7246 break;
7247 case 2:
7248 #if 0
7249 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7250 |{ | | |
7251 | top_left_mb[ i ] |1 |ue(v) |
7252 | bottom_right_mb[ i ] |1 |ue(v) |
7253 | } | | |
7254 #endif
7255 break;
7256 case 3:
7257 case 4:
7258 case 5:
7259 #if 0
7260 | slice_group_change_direction_flag |1 |u(1) |
7261 | slice_group_change_rate_minus1 |1 |ue(v) |
7262 #endif
7263 break;
7264 case 6:
7265 #if 0
7266 | slice_group_id_cnt_minus1 |1 |ue(v) |
7267 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7268 |) | | |
7269 | slice_group_id[ i ] |1 |u(v) |
7270 #endif
7271 break;
7274 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7275 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7276 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7277 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7278 pps->ref_count[0]= pps->ref_count[1]= 1;
7279 return -1;
7282 pps->weighted_pred= get_bits1(&s->gb);
7283 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7284 pps->init_qp= get_se_golomb(&s->gb) + 26;
7285 pps->init_qs= get_se_golomb(&s->gb) + 26;
7286 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7287 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7288 pps->constrained_intra_pred= get_bits1(&s->gb);
7289 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7291 pps->transform_8x8_mode= 0;
7292 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7293 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7294 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7296 if(get_bits_count(&s->gb) < bit_length){
7297 pps->transform_8x8_mode= get_bits1(&s->gb);
7298 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7299 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7300 } else {
7301 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7304 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7305 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7306 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7307 h->pps.chroma_qp_diff= 1;
7309 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7310 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7311 pps_id, pps->sps_id,
7312 pps->cabac ? "CABAC" : "CAVLC",
7313 pps->slice_group_count,
7314 pps->ref_count[0], pps->ref_count[1],
7315 pps->weighted_pred ? "weighted" : "",
7316 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7317 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7318 pps->constrained_intra_pred ? "CONSTR" : "",
7319 pps->redundant_pic_cnt_present ? "REDU" : "",
7320 pps->transform_8x8_mode ? "8x8DCT" : ""
7324 return 0;
7328 * Call decode_slice() for each context.
7330 * @param h h264 master context
7331 * @param context_count number of contexts to execute
7333 static void execute_decode_slices(H264Context *h, int context_count){
7334 MpegEncContext * const s = &h->s;
7335 AVCodecContext * const avctx= s->avctx;
7336 H264Context *hx;
7337 int i;
7339 if(context_count == 1) {
7340 decode_slice(avctx, &h);
7341 } else {
7342 for(i = 1; i < context_count; i++) {
7343 hx = h->thread_context[i];
7344 hx->s.error_recognition = avctx->error_recognition;
7345 hx->s.error_count = 0;
7348 avctx->execute(avctx, (void *)decode_slice,
7349 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7351 /* pull back stuff from slices to master context */
7352 hx = h->thread_context[context_count - 1];
7353 s->mb_x = hx->s.mb_x;
7354 s->mb_y = hx->s.mb_y;
7355 s->dropable = hx->s.dropable;
7356 s->picture_structure = hx->s.picture_structure;
7357 for(i = 1; i < context_count; i++)
7358 h->s.error_count += h->thread_context[i]->s.error_count;
7363 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7364 MpegEncContext * const s = &h->s;
7365 AVCodecContext * const avctx= s->avctx;
7366 int buf_index=0;
7367 H264Context *hx; ///< thread context
7368 int context_count = 0;
7370 h->max_contexts = avctx->thread_count;
7371 #if 0
7372 int i;
7373 for(i=0; i<50; i++){
7374 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7376 #endif
7377 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7378 h->current_slice = 0;
7379 if (!s->first_field)
7380 s->current_picture_ptr= NULL;
7383 for(;;){
7384 int consumed;
7385 int dst_length;
7386 int bit_length;
7387 const uint8_t *ptr;
7388 int i, nalsize = 0;
7389 int err;
7391 if(h->is_avc) {
7392 if(buf_index >= buf_size) break;
7393 nalsize = 0;
7394 for(i = 0; i < h->nal_length_size; i++)
7395 nalsize = (nalsize << 8) | buf[buf_index++];
7396 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7397 if(nalsize == 1){
7398 buf_index++;
7399 continue;
7400 }else{
7401 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7402 break;
7405 } else {
7406 // start code prefix search
7407 for(; buf_index + 3 < buf_size; buf_index++){
7408 // This should always succeed in the first iteration.
7409 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7410 break;
7413 if(buf_index+3 >= buf_size) break;
7415 buf_index+=3;
7418 hx = h->thread_context[context_count];
7420 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7421 if (ptr==NULL || dst_length < 0){
7422 return -1;
7424 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7425 dst_length--;
7426 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7428 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7429 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7432 if (h->is_avc && (nalsize != consumed)){
7433 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7434 consumed= nalsize;
7437 buf_index += consumed;
7439 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7440 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7441 continue;
7443 again:
7444 err = 0;
7445 switch(hx->nal_unit_type){
7446 case NAL_IDR_SLICE:
7447 if (h->nal_unit_type != NAL_IDR_SLICE) {
7448 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7449 return -1;
7451 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7452 case NAL_SLICE:
7453 init_get_bits(&hx->s.gb, ptr, bit_length);
7454 hx->intra_gb_ptr=
7455 hx->inter_gb_ptr= &hx->s.gb;
7456 hx->s.data_partitioning = 0;
7458 if((err = decode_slice_header(hx, h)))
7459 break;
7461 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7462 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7463 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7464 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7465 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7466 && avctx->skip_frame < AVDISCARD_ALL)
7467 context_count++;
7468 break;
7469 case NAL_DPA:
7470 init_get_bits(&hx->s.gb, ptr, bit_length);
7471 hx->intra_gb_ptr=
7472 hx->inter_gb_ptr= NULL;
7473 hx->s.data_partitioning = 1;
7475 err = decode_slice_header(hx, h);
7476 break;
7477 case NAL_DPB:
7478 init_get_bits(&hx->intra_gb, ptr, bit_length);
7479 hx->intra_gb_ptr= &hx->intra_gb;
7480 break;
7481 case NAL_DPC:
7482 init_get_bits(&hx->inter_gb, ptr, bit_length);
7483 hx->inter_gb_ptr= &hx->inter_gb;
7485 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7486 && s->context_initialized
7487 && s->hurry_up < 5
7488 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7489 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7490 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7491 && avctx->skip_frame < AVDISCARD_ALL)
7492 context_count++;
7493 break;
7494 case NAL_SEI:
7495 init_get_bits(&s->gb, ptr, bit_length);
7496 decode_sei(h);
7497 break;
7498 case NAL_SPS:
7499 init_get_bits(&s->gb, ptr, bit_length);
7500 decode_seq_parameter_set(h);
7502 if(s->flags& CODEC_FLAG_LOW_DELAY)
7503 s->low_delay=1;
7505 if(avctx->has_b_frames < 2)
7506 avctx->has_b_frames= !s->low_delay;
7507 break;
7508 case NAL_PPS:
7509 init_get_bits(&s->gb, ptr, bit_length);
7511 decode_picture_parameter_set(h, bit_length);
7513 break;
7514 case NAL_AUD:
7515 case NAL_END_SEQUENCE:
7516 case NAL_END_STREAM:
7517 case NAL_FILLER_DATA:
7518 case NAL_SPS_EXT:
7519 case NAL_AUXILIARY_SLICE:
7520 break;
7521 default:
7522 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7525 if(context_count == h->max_contexts) {
7526 execute_decode_slices(h, context_count);
7527 context_count = 0;
7530 if (err < 0)
7531 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7532 else if(err == 1) {
7533 /* Slice could not be decoded in parallel mode, copy down
7534 * NAL unit stuff to context 0 and restart. Note that
7535 * rbsp_buffer is not transferred, but since we no longer
7536 * run in parallel mode this should not be an issue. */
7537 h->nal_unit_type = hx->nal_unit_type;
7538 h->nal_ref_idc = hx->nal_ref_idc;
7539 hx = h;
7540 goto again;
7543 if(context_count)
7544 execute_decode_slices(h, context_count);
7545 return buf_index;
7549 * returns the number of bytes consumed for building the current frame
7551 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7552 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7553 if(pos+10>buf_size) pos=buf_size; // oops ;)
7555 return pos;
7558 static int decode_frame(AVCodecContext *avctx,
7559 void *data, int *data_size,
7560 const uint8_t *buf, int buf_size)
7562 H264Context *h = avctx->priv_data;
7563 MpegEncContext *s = &h->s;
7564 AVFrame *pict = data;
7565 int buf_index;
7567 s->flags= avctx->flags;
7568 s->flags2= avctx->flags2;
7570 /* end of stream, output what is still in the buffers */
7571 if (buf_size == 0) {
7572 Picture *out;
7573 int i, out_idx;
7575 //FIXME factorize this with the output code below
7576 out = h->delayed_pic[0];
7577 out_idx = 0;
7578 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7579 if(h->delayed_pic[i]->poc < out->poc){
7580 out = h->delayed_pic[i];
7581 out_idx = i;
7584 for(i=out_idx; h->delayed_pic[i]; i++)
7585 h->delayed_pic[i] = h->delayed_pic[i+1];
7587 if(out){
7588 *data_size = sizeof(AVFrame);
7589 *pict= *(AVFrame*)out;
7592 return 0;
7595 if(h->is_avc && !h->got_avcC) {
7596 int i, cnt, nalsize;
7597 unsigned char *p = avctx->extradata;
7598 if(avctx->extradata_size < 7) {
7599 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7600 return -1;
7602 if(*p != 1) {
7603 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7604 return -1;
7606 /* sps and pps in the avcC always have length coded with 2 bytes,
7607 so put a fake nal_length_size = 2 while parsing them */
7608 h->nal_length_size = 2;
7609 // Decode sps from avcC
7610 cnt = *(p+5) & 0x1f; // Number of sps
7611 p += 6;
7612 for (i = 0; i < cnt; i++) {
7613 nalsize = AV_RB16(p) + 2;
7614 if(decode_nal_units(h, p, nalsize) < 0) {
7615 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7616 return -1;
7618 p += nalsize;
7620 // Decode pps from avcC
7621 cnt = *(p++); // Number of pps
7622 for (i = 0; i < cnt; i++) {
7623 nalsize = AV_RB16(p) + 2;
7624 if(decode_nal_units(h, p, nalsize) != nalsize) {
7625 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7626 return -1;
7628 p += nalsize;
7630 // Now store right nal length size, that will be use to parse all other nals
7631 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7632 // Do not reparse avcC
7633 h->got_avcC = 1;
7636 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7637 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7638 return -1;
7639 h->got_avcC = 1;
7642 buf_index=decode_nal_units(h, buf, buf_size);
7643 if(buf_index < 0)
7644 return -1;
7646 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7647 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7648 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7649 return -1;
7652 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7653 Picture *out = s->current_picture_ptr;
7654 Picture *cur = s->current_picture_ptr;
7655 int i, pics, cross_idr, out_of_order, out_idx;
7657 s->mb_y= 0;
7659 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7660 s->current_picture_ptr->pict_type= s->pict_type;
7662 if(!s->dropable) {
7663 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7664 h->prev_poc_msb= h->poc_msb;
7665 h->prev_poc_lsb= h->poc_lsb;
7667 h->prev_frame_num_offset= h->frame_num_offset;
7668 h->prev_frame_num= h->frame_num;
7671 * FIXME: Error handling code does not seem to support interlaced
7672 * when slices span multiple rows
7673 * The ff_er_add_slice calls don't work right for bottom
7674 * fields; they cause massive erroneous error concealing
7675 * Error marking covers both fields (top and bottom).
7676 * This causes a mismatched s->error_count
7677 * and a bad error table. Further, the error count goes to
7678 * INT_MAX when called for bottom field, because mb_y is
7679 * past end by one (callers fault) and resync_mb_y != 0
7680 * causes problems for the first MB line, too.
7682 if (!FIELD_PICTURE)
7683 ff_er_frame_end(s);
7685 MPV_frame_end(s);
7687 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7688 /* Wait for second field. */
7689 *data_size = 0;
7691 } else {
7692 cur->repeat_pict = 0;
7694 /* Signal interlacing information externally. */
7695 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7696 if(h->sps.pic_struct_present_flag){
7697 switch (h->sei_pic_struct)
7699 case SEI_PIC_STRUCT_FRAME:
7700 cur->interlaced_frame = 0;
7701 break;
7702 case SEI_PIC_STRUCT_TOP_FIELD:
7703 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7704 case SEI_PIC_STRUCT_TOP_BOTTOM:
7705 case SEI_PIC_STRUCT_BOTTOM_TOP:
7706 cur->interlaced_frame = 1;
7707 break;
7708 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7709 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7710 // Signal the possibility of telecined film externally (pic_struct 5,6)
7711 // From these hints, let the applications decide if they apply deinterlacing.
7712 cur->repeat_pict = 1;
7713 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7714 break;
7715 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7716 // Force progressive here, as doubling interlaced frame is a bad idea.
7717 cur->interlaced_frame = 0;
7718 cur->repeat_pict = 2;
7719 break;
7720 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7721 cur->interlaced_frame = 0;
7722 cur->repeat_pict = 4;
7723 break;
7725 }else{
7726 /* Derive interlacing flag from used decoding process. */
7727 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7730 if (cur->field_poc[0] != cur->field_poc[1]){
7731 /* Derive top_field_first from field pocs. */
7732 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7733 }else{
7734 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7735 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7736 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7737 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7738 cur->top_field_first = 1;
7739 else
7740 cur->top_field_first = 0;
7741 }else{
7742 /* Most likely progressive */
7743 cur->top_field_first = 0;
7747 //FIXME do something with unavailable reference frames
7749 /* Sort B-frames into display order */
7751 if(h->sps.bitstream_restriction_flag
7752 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7753 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7754 s->low_delay = 0;
7757 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7758 && !h->sps.bitstream_restriction_flag){
7759 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7760 s->low_delay= 0;
7763 pics = 0;
7764 while(h->delayed_pic[pics]) pics++;
7766 assert(pics <= MAX_DELAYED_PIC_COUNT);
7768 h->delayed_pic[pics++] = cur;
7769 if(cur->reference == 0)
7770 cur->reference = DELAYED_PIC_REF;
7772 out = h->delayed_pic[0];
7773 out_idx = 0;
7774 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7775 if(h->delayed_pic[i]->poc < out->poc){
7776 out = h->delayed_pic[i];
7777 out_idx = i;
7779 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7781 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7783 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7785 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7786 || (s->low_delay &&
7787 ((!cross_idr && out->poc > h->outputed_poc + 2)
7788 || cur->pict_type == FF_B_TYPE)))
7790 s->low_delay = 0;
7791 s->avctx->has_b_frames++;
7794 if(out_of_order || pics > s->avctx->has_b_frames){
7795 out->reference &= ~DELAYED_PIC_REF;
7796 for(i=out_idx; h->delayed_pic[i]; i++)
7797 h->delayed_pic[i] = h->delayed_pic[i+1];
7799 if(!out_of_order && pics > s->avctx->has_b_frames){
7800 *data_size = sizeof(AVFrame);
7802 h->outputed_poc = out->poc;
7803 *pict= *(AVFrame*)out;
7804 }else{
7805 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7810 assert(pict->data[0] || !*data_size);
7811 ff_print_debug_info(s, pict);
7812 //printf("out %d\n", (int)pict->data[0]);
7813 #if 0 //?
7815 /* Return the Picture timestamp as the frame number */
7816 /* we subtract 1 because it is added on utils.c */
7817 avctx->frame_number = s->picture_number - 1;
7818 #endif
7819 return get_consumed_bytes(s, buf_index, buf_size);
7821 #if 0
7822 static inline void fill_mb_avail(H264Context *h){
7823 MpegEncContext * const s = &h->s;
7824 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7826 if(s->mb_y){
7827 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7828 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7829 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7830 }else{
7831 h->mb_avail[0]=
7832 h->mb_avail[1]=
7833 h->mb_avail[2]= 0;
7835 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7836 h->mb_avail[4]= 1; //FIXME move out
7837 h->mb_avail[5]= 0; //FIXME move out
7839 #endif
7841 #ifdef TEST
7842 #undef printf
7843 #undef random
7844 #define COUNT 8000
7845 #define SIZE (COUNT*40)
7846 int main(void){
7847 int i;
7848 uint8_t temp[SIZE];
7849 PutBitContext pb;
7850 GetBitContext gb;
7851 // int int_temp[10000];
7852 DSPContext dsp;
7853 AVCodecContext avctx;
7855 dsputil_init(&dsp, &avctx);
7857 init_put_bits(&pb, temp, SIZE);
7858 printf("testing unsigned exp golomb\n");
7859 for(i=0; i<COUNT; i++){
7860 START_TIMER
7861 set_ue_golomb(&pb, i);
7862 STOP_TIMER("set_ue_golomb");
7864 flush_put_bits(&pb);
7866 init_get_bits(&gb, temp, 8*SIZE);
7867 for(i=0; i<COUNT; i++){
7868 int j, s;
7870 s= show_bits(&gb, 24);
7872 START_TIMER
7873 j= get_ue_golomb(&gb);
7874 if(j != i){
7875 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7876 // return -1;
7878 STOP_TIMER("get_ue_golomb");
7882 init_put_bits(&pb, temp, SIZE);
7883 printf("testing signed exp golomb\n");
7884 for(i=0; i<COUNT; i++){
7885 START_TIMER
7886 set_se_golomb(&pb, i - COUNT/2);
7887 STOP_TIMER("set_se_golomb");
7889 flush_put_bits(&pb);
7891 init_get_bits(&gb, temp, 8*SIZE);
7892 for(i=0; i<COUNT; i++){
7893 int j, s;
7895 s= show_bits(&gb, 24);
7897 START_TIMER
7898 j= get_se_golomb(&gb);
7899 if(j != i - COUNT/2){
7900 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7901 // return -1;
7903 STOP_TIMER("get_se_golomb");
7906 #if 0
7907 printf("testing 4x4 (I)DCT\n");
7909 DCTELEM block[16];
7910 uint8_t src[16], ref[16];
7911 uint64_t error= 0, max_error=0;
7913 for(i=0; i<COUNT; i++){
7914 int j;
7915 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7916 for(j=0; j<16; j++){
7917 ref[j]= random()%255;
7918 src[j]= random()%255;
7921 h264_diff_dct_c(block, src, ref, 4);
7923 //normalize
7924 for(j=0; j<16; j++){
7925 // printf("%d ", block[j]);
7926 block[j]= block[j]*4;
7927 if(j&1) block[j]= (block[j]*4 + 2)/5;
7928 if(j&4) block[j]= (block[j]*4 + 2)/5;
7930 // printf("\n");
7932 s->dsp.h264_idct_add(ref, block, 4);
7933 /* for(j=0; j<16; j++){
7934 printf("%d ", ref[j]);
7936 printf("\n");*/
7938 for(j=0; j<16; j++){
7939 int diff= FFABS(src[j] - ref[j]);
7941 error+= diff*diff;
7942 max_error= FFMAX(max_error, diff);
7945 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7946 printf("testing quantizer\n");
7947 for(qp=0; qp<52; qp++){
7948 for(i=0; i<16; i++)
7949 src1_block[i]= src2_block[i]= random()%255;
7952 printf("Testing NAL layer\n");
7954 uint8_t bitstream[COUNT];
7955 uint8_t nal[COUNT*2];
7956 H264Context h;
7957 memset(&h, 0, sizeof(H264Context));
7959 for(i=0; i<COUNT; i++){
7960 int zeros= i;
7961 int nal_length;
7962 int consumed;
7963 int out_length;
7964 uint8_t *out;
7965 int j;
7967 for(j=0; j<COUNT; j++){
7968 bitstream[j]= (random() % 255) + 1;
7971 for(j=0; j<zeros; j++){
7972 int pos= random() % COUNT;
7973 while(bitstream[pos] == 0){
7974 pos++;
7975 pos %= COUNT;
7977 bitstream[pos]=0;
7980 START_TIMER
7982 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7983 if(nal_length<0){
7984 printf("encoding failed\n");
7985 return -1;
7988 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7990 STOP_TIMER("NAL")
7992 if(out_length != COUNT){
7993 printf("incorrect length %d %d\n", out_length, COUNT);
7994 return -1;
7997 if(consumed != nal_length){
7998 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7999 return -1;
8002 if(memcmp(bitstream, out, COUNT)){
8003 printf("mismatch\n");
8004 return -1;
8007 #endif
8009 printf("Testing RBSP\n");
8012 return 0;
8014 #endif /* TEST */
8017 static av_cold int decode_end(AVCodecContext *avctx)
8019 H264Context *h = avctx->priv_data;
8020 MpegEncContext *s = &h->s;
8021 int i;
8023 av_freep(&h->rbsp_buffer[0]);
8024 av_freep(&h->rbsp_buffer[1]);
8025 free_tables(h); //FIXME cleanup init stuff perhaps
8027 for(i = 0; i < MAX_SPS_COUNT; i++)
8028 av_freep(h->sps_buffers + i);
8030 for(i = 0; i < MAX_PPS_COUNT; i++)
8031 av_freep(h->pps_buffers + i);
8033 MPV_common_end(s);
8035 // memset(h, 0, sizeof(H264Context));
8037 return 0;
8041 AVCodec h264_decoder = {
8042 "h264",
8043 CODEC_TYPE_VIDEO,
8044 CODEC_ID_H264,
8045 sizeof(H264Context),
8046 decode_init,
8047 NULL,
8048 decode_end,
8049 decode_frame,
8050 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8051 .flush= flush_dpb,
8052 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8055 #include "svq3.c"