Merge avfiltergraphdesc.c in avfiltergraph.c
[ffmpeg-lucabe.git] / libavcodec / h264.c
blob46614918c1afc18a57b901be58b4933070805b20
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264.h"
32 #include "h264data.h"
33 #include "h264_parser.h"
34 #include "golomb.h"
35 #include "rectangle.h"
37 #include "cabac.h"
39 //#undef NDEBUG
40 #include <assert.h>
42 /**
43 * Value of Picture.reference when Picture is not a reference picture, but
44 * is held for delayed output.
46 #define DELAYED_PIC_REF 4
48 static VLC coeff_token_vlc[4];
49 static VLC chroma_dc_coeff_token_vlc;
51 static VLC total_zeros_vlc[15];
52 static VLC chroma_dc_total_zeros_vlc[3];
54 static VLC run_vlc[6];
55 static VLC run7_vlc;
57 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
58 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
59 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
60 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
62 static av_always_inline uint32_t pack16to32(int a, int b){
63 #ifdef WORDS_BIGENDIAN
64 return (b&0xFFFF) + (a<<16);
65 #else
66 return (a&0xFFFF) + (b<<16);
67 #endif
70 const uint8_t ff_rem6[52]={
71 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
74 const uint8_t ff_div6[52]={
75 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
79 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
80 MpegEncContext * const s = &h->s;
81 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
82 int topleft_xy, top_xy, topright_xy, left_xy[2];
83 int topleft_type, top_type, topright_type, left_type[2];
84 int left_block[8];
85 int topleft_partition= -1;
86 int i;
88 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
90 //FIXME deblocking could skip the intra and nnz parts.
91 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
92 return;
94 /* Wow, what a mess, why didn't they simplify the interlacing & intra
95 * stuff, I can't imagine that these complex rules are worth it. */
97 topleft_xy = top_xy - 1;
98 topright_xy= top_xy + 1;
99 left_xy[1] = left_xy[0] = mb_xy-1;
100 left_block[0]= 0;
101 left_block[1]= 1;
102 left_block[2]= 2;
103 left_block[3]= 3;
104 left_block[4]= 7;
105 left_block[5]= 10;
106 left_block[6]= 8;
107 left_block[7]= 11;
108 if(FRAME_MBAFF){
109 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
110 const int top_pair_xy = pair_xy - s->mb_stride;
111 const int topleft_pair_xy = top_pair_xy - 1;
112 const int topright_pair_xy = top_pair_xy + 1;
113 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
114 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
115 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
116 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
117 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
118 const int bottom = (s->mb_y & 1);
119 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
120 if (bottom
121 ? !curr_mb_frame_flag // bottom macroblock
122 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
124 top_xy -= s->mb_stride;
126 if (bottom
127 ? !curr_mb_frame_flag // bottom macroblock
128 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
130 topleft_xy -= s->mb_stride;
131 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
132 topleft_xy += s->mb_stride;
133 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
134 topleft_partition = 0;
136 if (bottom
137 ? !curr_mb_frame_flag // bottom macroblock
138 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
140 topright_xy -= s->mb_stride;
142 if (left_mb_frame_flag != curr_mb_frame_flag) {
143 left_xy[1] = left_xy[0] = pair_xy - 1;
144 if (curr_mb_frame_flag) {
145 if (bottom) {
146 left_block[0]= 2;
147 left_block[1]= 2;
148 left_block[2]= 3;
149 left_block[3]= 3;
150 left_block[4]= 8;
151 left_block[5]= 11;
152 left_block[6]= 8;
153 left_block[7]= 11;
154 } else {
155 left_block[0]= 0;
156 left_block[1]= 0;
157 left_block[2]= 1;
158 left_block[3]= 1;
159 left_block[4]= 7;
160 left_block[5]= 10;
161 left_block[6]= 7;
162 left_block[7]= 10;
164 } else {
165 left_xy[1] += s->mb_stride;
166 //left_block[0]= 0;
167 left_block[1]= 2;
168 left_block[2]= 0;
169 left_block[3]= 2;
170 //left_block[4]= 7;
171 left_block[5]= 10;
172 left_block[6]= 7;
173 left_block[7]= 10;
178 h->top_mb_xy = top_xy;
179 h->left_mb_xy[0] = left_xy[0];
180 h->left_mb_xy[1] = left_xy[1];
181 if(for_deblock){
182 topleft_type = 0;
183 topright_type = 0;
184 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
185 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
186 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
188 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
189 int list;
190 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
191 for(i=0; i<16; i++)
192 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
193 for(list=0; list<h->list_count; list++){
194 if(USES_LIST(mb_type,list)){
195 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
196 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
197 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
198 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
199 dst[0] = src[0];
200 dst[1] = src[1];
201 dst[2] = src[2];
202 dst[3] = src[3];
204 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
205 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
206 ref += h->b8_stride;
207 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
209 }else{
210 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
211 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
215 }else{
216 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
217 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
218 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
219 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
220 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
223 if(IS_INTRA(mb_type)){
224 h->topleft_samples_available=
225 h->top_samples_available=
226 h->left_samples_available= 0xFFFF;
227 h->topright_samples_available= 0xEEEA;
229 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
230 h->topleft_samples_available= 0xB3FF;
231 h->top_samples_available= 0x33FF;
232 h->topright_samples_available= 0x26EA;
234 for(i=0; i<2; i++){
235 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
236 h->topleft_samples_available&= 0xDF5F;
237 h->left_samples_available&= 0x5F5F;
241 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
242 h->topleft_samples_available&= 0x7FFF;
244 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
245 h->topright_samples_available&= 0xFBFF;
247 if(IS_INTRA4x4(mb_type)){
248 if(IS_INTRA4x4(top_type)){
249 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
250 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
251 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
252 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
253 }else{
254 int pred;
255 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
256 pred= -1;
257 else{
258 pred= 2;
260 h->intra4x4_pred_mode_cache[4+8*0]=
261 h->intra4x4_pred_mode_cache[5+8*0]=
262 h->intra4x4_pred_mode_cache[6+8*0]=
263 h->intra4x4_pred_mode_cache[7+8*0]= pred;
265 for(i=0; i<2; i++){
266 if(IS_INTRA4x4(left_type[i])){
267 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
268 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
269 }else{
270 int pred;
271 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
272 pred= -1;
273 else{
274 pred= 2;
276 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
277 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
285 0 . T T. T T T T
286 1 L . .L . . . .
287 2 L . .L . . . .
288 3 . T TL . . . .
289 4 L . .L . . . .
290 5 L . .. . . . .
292 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
293 if(top_type){
294 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
295 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
296 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
297 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
299 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
300 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
302 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
303 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
305 }else{
306 h->non_zero_count_cache[4+8*0]=
307 h->non_zero_count_cache[5+8*0]=
308 h->non_zero_count_cache[6+8*0]=
309 h->non_zero_count_cache[7+8*0]=
311 h->non_zero_count_cache[1+8*0]=
312 h->non_zero_count_cache[2+8*0]=
314 h->non_zero_count_cache[1+8*3]=
315 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
319 for (i=0; i<2; i++) {
320 if(left_type[i]){
321 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
322 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
323 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
324 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
325 }else{
326 h->non_zero_count_cache[3+8*1 + 2*8*i]=
327 h->non_zero_count_cache[3+8*2 + 2*8*i]=
328 h->non_zero_count_cache[0+8*1 + 8*i]=
329 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
333 if( h->pps.cabac ) {
334 // top_cbp
335 if(top_type) {
336 h->top_cbp = h->cbp_table[top_xy];
337 } else if(IS_INTRA(mb_type)) {
338 h->top_cbp = 0x1C0;
339 } else {
340 h->top_cbp = 0;
342 // left_cbp
343 if (left_type[0]) {
344 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
345 } else if(IS_INTRA(mb_type)) {
346 h->left_cbp = 0x1C0;
347 } else {
348 h->left_cbp = 0;
350 if (left_type[0]) {
351 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
353 if (left_type[1]) {
354 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
358 #if 1
359 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
360 int list;
361 for(list=0; list<h->list_count; list++){
362 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
363 /*if(!h->mv_cache_clean[list]){
364 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
365 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
366 h->mv_cache_clean[list]= 1;
368 continue;
370 h->mv_cache_clean[list]= 0;
372 if(USES_LIST(top_type, list)){
373 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
374 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
375 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
376 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
377 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
379 h->ref_cache[list][scan8[0] + 0 - 1*8]=
380 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
381 h->ref_cache[list][scan8[0] + 2 - 1*8]=
382 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
383 }else{
384 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
385 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
386 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
388 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
391 for(i=0; i<2; i++){
392 int cache_idx = scan8[0] - 1 + i*2*8;
393 if(USES_LIST(left_type[i], list)){
394 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
395 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
396 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
397 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
398 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
399 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
400 }else{
401 *(uint32_t*)h->mv_cache [list][cache_idx ]=
402 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
403 h->ref_cache[list][cache_idx ]=
404 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
408 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
409 continue;
411 if(USES_LIST(topleft_type, list)){
412 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
413 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
414 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
415 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
416 }else{
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
421 if(USES_LIST(topright_type, list)){
422 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
423 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
424 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
425 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
426 }else{
427 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
431 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
432 continue;
434 h->ref_cache[list][scan8[5 ]+1] =
435 h->ref_cache[list][scan8[7 ]+1] =
436 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
437 h->ref_cache[list][scan8[4 ]] =
438 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
439 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
440 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
441 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
442 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
443 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
445 if( h->pps.cabac ) {
446 /* XXX beurk, Load mvd */
447 if(USES_LIST(top_type, list)){
448 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
449 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
450 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
451 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
453 }else{
454 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
456 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
459 if(USES_LIST(left_type[0], list)){
460 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
461 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
462 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
463 }else{
464 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
465 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
467 if(USES_LIST(left_type[1], list)){
468 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
471 }else{
472 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
473 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
475 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
476 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
477 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
478 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
479 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
481 if(h->slice_type == FF_B_TYPE){
482 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
484 if(IS_DIRECT(top_type)){
485 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
486 }else if(IS_8X8(top_type)){
487 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
488 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
489 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
490 }else{
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
494 if(IS_DIRECT(left_type[0]))
495 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
496 else if(IS_8X8(left_type[0]))
497 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
498 else
499 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
501 if(IS_DIRECT(left_type[1]))
502 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
503 else if(IS_8X8(left_type[1]))
504 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
505 else
506 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
510 if(FRAME_MBAFF){
511 #define MAP_MVS\
512 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
513 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
514 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
515 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
516 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
518 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
519 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
520 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
521 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
522 if(MB_FIELD){
523 #define MAP_F2F(idx, mb_type)\
524 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
525 h->ref_cache[list][idx] <<= 1;\
526 h->mv_cache[list][idx][1] /= 2;\
527 h->mvd_cache[list][idx][1] /= 2;\
529 MAP_MVS
530 #undef MAP_F2F
531 }else{
532 #define MAP_F2F(idx, mb_type)\
533 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
534 h->ref_cache[list][idx] >>= 1;\
535 h->mv_cache[list][idx][1] <<= 1;\
536 h->mvd_cache[list][idx][1] <<= 1;\
538 MAP_MVS
539 #undef MAP_F2F
544 #endif
546 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
549 static inline void write_back_intra_pred_mode(H264Context *h){
550 MpegEncContext * const s = &h->s;
551 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
553 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
554 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
555 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
556 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
557 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
558 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
559 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
563 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
565 static inline int check_intra4x4_pred_mode(H264Context *h){
566 MpegEncContext * const s = &h->s;
567 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
568 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
569 int i;
571 if(!(h->top_samples_available&0x8000)){
572 for(i=0; i<4; i++){
573 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
574 if(status<0){
575 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
576 return -1;
577 } else if(status){
578 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
583 if(!(h->left_samples_available&0x8000)){
584 for(i=0; i<4; i++){
585 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
586 if(status<0){
587 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
588 return -1;
589 } else if(status){
590 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
595 return 0;
596 } //FIXME cleanup like next
599 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
601 static inline int check_intra_pred_mode(H264Context *h, int mode){
602 MpegEncContext * const s = &h->s;
603 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
604 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
606 if(mode > 6U) {
607 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
608 return -1;
611 if(!(h->top_samples_available&0x8000)){
612 mode= top[ mode ];
613 if(mode<0){
614 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
615 return -1;
619 if(!(h->left_samples_available&0x8000)){
620 mode= left[ mode ];
621 if(mode<0){
622 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
623 return -1;
627 return mode;
631 * gets the predicted intra4x4 prediction mode.
633 static inline int pred_intra_mode(H264Context *h, int n){
634 const int index8= scan8[n];
635 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
636 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
637 const int min= FFMIN(left, top);
639 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
641 if(min<0) return DC_PRED;
642 else return min;
645 static inline void write_back_non_zero_count(H264Context *h){
646 MpegEncContext * const s = &h->s;
647 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
649 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
650 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
651 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
652 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
653 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
654 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
655 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
657 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
658 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
659 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
661 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
662 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
663 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
665 if(FRAME_MBAFF){
666 // store all luma nnzs, for deblocking
667 int v = 0, i;
668 for(i=0; i<16; i++)
669 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
670 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
675 * gets the predicted number of non zero coefficients.
676 * @param n block index
678 static inline int pred_non_zero_count(H264Context *h, int n){
679 const int index8= scan8[n];
680 const int left= h->non_zero_count_cache[index8 - 1];
681 const int top = h->non_zero_count_cache[index8 - 8];
682 int i= left + top;
684 if(i<64) i= (i+1)>>1;
686 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
688 return i&31;
691 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
692 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
693 MpegEncContext *s = &h->s;
695 /* there is no consistent mapping of mvs to neighboring locations that will
696 * make mbaff happy, so we can't move all this logic to fill_caches */
697 if(FRAME_MBAFF){
698 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
699 const int16_t *mv;
700 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
701 *C = h->mv_cache[list][scan8[0]-2];
703 if(!MB_FIELD
704 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
705 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
706 if(IS_INTERLACED(mb_types[topright_xy])){
707 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
708 const int x4 = X4, y4 = Y4;\
709 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
710 if(!USES_LIST(mb_type,list))\
711 return LIST_NOT_USED;\
712 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
713 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
714 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
715 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
717 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
720 if(topright_ref == PART_NOT_AVAILABLE
721 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
722 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
723 if(!MB_FIELD
724 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
725 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
727 if(MB_FIELD
728 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
729 && i >= scan8[0]+8){
730 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
731 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
734 #undef SET_DIAG_MV
737 if(topright_ref != PART_NOT_AVAILABLE){
738 *C= h->mv_cache[list][ i - 8 + part_width ];
739 return topright_ref;
740 }else{
741 tprintf(s->avctx, "topright MV not available\n");
743 *C= h->mv_cache[list][ i - 8 - 1 ];
744 return h->ref_cache[list][ i - 8 - 1 ];
749 * gets the predicted MV.
750 * @param n the block index
751 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
752 * @param mx the x component of the predicted motion vector
753 * @param my the y component of the predicted motion vector
755 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
756 const int index8= scan8[n];
757 const int top_ref= h->ref_cache[list][ index8 - 8 ];
758 const int left_ref= h->ref_cache[list][ index8 - 1 ];
759 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
760 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
761 const int16_t * C;
762 int diagonal_ref, match_count;
764 assert(part_width==1 || part_width==2 || part_width==4);
766 /* mv_cache
767 B . . A T T T T
768 U . . L . . , .
769 U . . L . . . .
770 U . . L . . , .
771 . . . L . . . .
774 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
775 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
776 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
777 if(match_count > 1){ //most common
778 *mx= mid_pred(A[0], B[0], C[0]);
779 *my= mid_pred(A[1], B[1], C[1]);
780 }else if(match_count==1){
781 if(left_ref==ref){
782 *mx= A[0];
783 *my= A[1];
784 }else if(top_ref==ref){
785 *mx= B[0];
786 *my= B[1];
787 }else{
788 *mx= C[0];
789 *my= C[1];
791 }else{
792 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
793 *mx= A[0];
794 *my= A[1];
795 }else{
796 *mx= mid_pred(A[0], B[0], C[0]);
797 *my= mid_pred(A[1], B[1], C[1]);
801 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
805 * gets the directionally predicted 16x8 MV.
806 * @param n the block index
807 * @param mx the x component of the predicted motion vector
808 * @param my the y component of the predicted motion vector
810 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
811 if(n==0){
812 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
813 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
815 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
817 if(top_ref == ref){
818 *mx= B[0];
819 *my= B[1];
820 return;
822 }else{
823 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
824 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
826 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
828 if(left_ref == ref){
829 *mx= A[0];
830 *my= A[1];
831 return;
835 //RARE
836 pred_motion(h, n, 4, list, ref, mx, my);
840 * gets the directionally predicted 8x16 MV.
841 * @param n the block index
842 * @param mx the x component of the predicted motion vector
843 * @param my the y component of the predicted motion vector
845 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
846 if(n==0){
847 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
848 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
850 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
852 if(left_ref == ref){
853 *mx= A[0];
854 *my= A[1];
855 return;
857 }else{
858 const int16_t * C;
859 int diagonal_ref;
861 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
863 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
865 if(diagonal_ref == ref){
866 *mx= C[0];
867 *my= C[1];
868 return;
872 //RARE
873 pred_motion(h, n, 2, list, ref, mx, my);
876 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
877 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
878 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
880 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
882 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
883 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
884 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
886 *mx = *my = 0;
887 return;
890 pred_motion(h, 0, 4, 0, 0, mx, my);
892 return;
895 static inline void direct_dist_scale_factor(H264Context * const h){
896 const int poc = h->s.current_picture_ptr->poc;
897 const int poc1 = h->ref_list[1][0].poc;
898 int i;
899 for(i=0; i<h->ref_count[0]; i++){
900 int poc0 = h->ref_list[0][i].poc;
901 int td = av_clip(poc1 - poc0, -128, 127);
902 if(td == 0 /* FIXME || pic0 is a long-term ref */){
903 h->dist_scale_factor[i] = 256;
904 }else{
905 int tb = av_clip(poc - poc0, -128, 127);
906 int tx = (16384 + (FFABS(td) >> 1)) / td;
907 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
910 if(FRAME_MBAFF){
911 for(i=0; i<h->ref_count[0]; i++){
912 h->dist_scale_factor_field[2*i] =
913 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
917 static inline void direct_ref_list_init(H264Context * const h){
918 MpegEncContext * const s = &h->s;
919 Picture * const ref1 = &h->ref_list[1][0];
920 Picture * const cur = s->current_picture_ptr;
921 int list, i, j;
922 if(cur->pict_type == FF_I_TYPE)
923 cur->ref_count[0] = 0;
924 if(cur->pict_type != FF_B_TYPE)
925 cur->ref_count[1] = 0;
926 for(list=0; list<2; list++){
927 cur->ref_count[list] = h->ref_count[list];
928 for(j=0; j<h->ref_count[list]; j++)
929 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
931 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
932 return;
933 for(list=0; list<2; list++){
934 for(i=0; i<ref1->ref_count[list]; i++){
935 const int poc = ref1->ref_poc[list][i];
936 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
937 for(j=0; j<h->ref_count[list]; j++)
938 if(h->ref_list[list][j].poc == poc){
939 h->map_col_to_list0[list][i] = j;
940 break;
944 if(FRAME_MBAFF){
945 for(list=0; list<2; list++){
946 for(i=0; i<ref1->ref_count[list]; i++){
947 j = h->map_col_to_list0[list][i];
948 h->map_col_to_list0_field[list][2*i] = 2*j;
949 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
955 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
956 MpegEncContext * const s = &h->s;
957 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
958 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
959 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
960 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
961 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
962 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
963 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
964 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
965 const int is_b8x8 = IS_8X8(*mb_type);
966 unsigned int sub_mb_type;
967 int i8, i4;
969 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
970 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
971 /* FIXME save sub mb types from previous frames (or derive from MVs)
972 * so we know exactly what block size to use */
973 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
974 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
975 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
976 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
977 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
978 }else{
979 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
980 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
982 if(!is_b8x8)
983 *mb_type |= MB_TYPE_DIRECT2;
984 if(MB_FIELD)
985 *mb_type |= MB_TYPE_INTERLACED;
987 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
989 if(h->direct_spatial_mv_pred){
990 int ref[2];
991 int mv[2][2];
992 int list;
994 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
996 /* ref = min(neighbors) */
997 for(list=0; list<2; list++){
998 int refa = h->ref_cache[list][scan8[0] - 1];
999 int refb = h->ref_cache[list][scan8[0] - 8];
1000 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1001 if(refc == -2)
1002 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1003 ref[list] = refa;
1004 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1005 ref[list] = refb;
1006 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1007 ref[list] = refc;
1008 if(ref[list] < 0)
1009 ref[list] = -1;
1012 if(ref[0] < 0 && ref[1] < 0){
1013 ref[0] = ref[1] = 0;
1014 mv[0][0] = mv[0][1] =
1015 mv[1][0] = mv[1][1] = 0;
1016 }else{
1017 for(list=0; list<2; list++){
1018 if(ref[list] >= 0)
1019 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1020 else
1021 mv[list][0] = mv[list][1] = 0;
1025 if(ref[1] < 0){
1026 if(!is_b8x8)
1027 *mb_type &= ~MB_TYPE_L1;
1028 sub_mb_type &= ~MB_TYPE_L1;
1029 }else if(ref[0] < 0){
1030 if(!is_b8x8)
1031 *mb_type &= ~MB_TYPE_L0;
1032 sub_mb_type &= ~MB_TYPE_L0;
1035 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1036 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1037 int mb_types_col[2];
1038 int b8_stride = h->b8_stride;
1039 int b4_stride = h->b_stride;
1041 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1043 if(IS_INTERLACED(*mb_type)){
1044 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1045 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1046 if(s->mb_y&1){
1047 l1ref0 -= 2*b8_stride;
1048 l1ref1 -= 2*b8_stride;
1049 l1mv0 -= 4*b4_stride;
1050 l1mv1 -= 4*b4_stride;
1052 b8_stride *= 3;
1053 b4_stride *= 6;
1054 }else{
1055 int cur_poc = s->current_picture_ptr->poc;
1056 int *col_poc = h->ref_list[1]->field_poc;
1057 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1058 int dy = 2*col_parity - (s->mb_y&1);
1059 mb_types_col[0] =
1060 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1061 l1ref0 += dy*b8_stride;
1062 l1ref1 += dy*b8_stride;
1063 l1mv0 += 2*dy*b4_stride;
1064 l1mv1 += 2*dy*b4_stride;
1065 b8_stride = 0;
1068 for(i8=0; i8<4; i8++){
1069 int x8 = i8&1;
1070 int y8 = i8>>1;
1071 int xy8 = x8+y8*b8_stride;
1072 int xy4 = 3*x8+y8*b4_stride;
1073 int a=0, b=0;
1075 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1076 continue;
1077 h->sub_mb_type[i8] = sub_mb_type;
1079 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1080 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1081 if(!IS_INTRA(mb_types_col[y8])
1082 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1083 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1084 if(ref[0] > 0)
1085 a= pack16to32(mv[0][0],mv[0][1]);
1086 if(ref[1] > 0)
1087 b= pack16to32(mv[1][0],mv[1][1]);
1088 }else{
1089 a= pack16to32(mv[0][0],mv[0][1]);
1090 b= pack16to32(mv[1][0],mv[1][1]);
1092 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1093 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1095 }else if(IS_16X16(*mb_type)){
1096 int a=0, b=0;
1098 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1099 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1100 if(!IS_INTRA(mb_type_col)
1101 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1102 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1103 && (h->x264_build>33 || !h->x264_build)))){
1104 if(ref[0] > 0)
1105 a= pack16to32(mv[0][0],mv[0][1]);
1106 if(ref[1] > 0)
1107 b= pack16to32(mv[1][0],mv[1][1]);
1108 }else{
1109 a= pack16to32(mv[0][0],mv[0][1]);
1110 b= pack16to32(mv[1][0],mv[1][1]);
1112 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1113 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1114 }else{
1115 for(i8=0; i8<4; i8++){
1116 const int x8 = i8&1;
1117 const int y8 = i8>>1;
1119 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1120 continue;
1121 h->sub_mb_type[i8] = sub_mb_type;
1123 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1124 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1125 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1126 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1128 /* col_zero_flag */
1129 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1130 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1131 && (h->x264_build>33 || !h->x264_build)))){
1132 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1133 if(IS_SUB_8X8(sub_mb_type)){
1134 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1135 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1136 if(ref[0] == 0)
1137 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1138 if(ref[1] == 0)
1139 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1141 }else
1142 for(i4=0; i4<4; i4++){
1143 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1144 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1145 if(ref[0] == 0)
1146 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1147 if(ref[1] == 0)
1148 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1154 }else{ /* direct temporal mv pred */
1155 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1156 const int *dist_scale_factor = h->dist_scale_factor;
1158 if(FRAME_MBAFF){
1159 if(IS_INTERLACED(*mb_type)){
1160 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1161 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1162 dist_scale_factor = h->dist_scale_factor_field;
1164 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1165 /* FIXME assumes direct_8x8_inference == 1 */
1166 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1167 int mb_types_col[2];
1168 int y_shift;
1170 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1171 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1172 | (*mb_type & MB_TYPE_INTERLACED);
1173 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1175 if(IS_INTERLACED(*mb_type)){
1176 /* frame to field scaling */
1177 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1178 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1179 if(s->mb_y&1){
1180 l1ref0 -= 2*h->b8_stride;
1181 l1ref1 -= 2*h->b8_stride;
1182 l1mv0 -= 4*h->b_stride;
1183 l1mv1 -= 4*h->b_stride;
1185 y_shift = 0;
1187 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1188 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1189 && !is_b8x8)
1190 *mb_type |= MB_TYPE_16x8;
1191 else
1192 *mb_type |= MB_TYPE_8x8;
1193 }else{
1194 /* field to frame scaling */
1195 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1196 * but in MBAFF, top and bottom POC are equal */
1197 int dy = (s->mb_y&1) ? 1 : 2;
1198 mb_types_col[0] =
1199 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1200 l1ref0 += dy*h->b8_stride;
1201 l1ref1 += dy*h->b8_stride;
1202 l1mv0 += 2*dy*h->b_stride;
1203 l1mv1 += 2*dy*h->b_stride;
1204 y_shift = 2;
1206 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1207 && !is_b8x8)
1208 *mb_type |= MB_TYPE_16x16;
1209 else
1210 *mb_type |= MB_TYPE_8x8;
1213 for(i8=0; i8<4; i8++){
1214 const int x8 = i8&1;
1215 const int y8 = i8>>1;
1216 int ref0, scale;
1217 const int16_t (*l1mv)[2]= l1mv0;
1219 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1220 continue;
1221 h->sub_mb_type[i8] = sub_mb_type;
1223 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 if(IS_INTRA(mb_types_col[y8])){
1225 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1226 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1227 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1228 continue;
1231 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1232 if(ref0 >= 0)
1233 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1234 else{
1235 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1236 l1mv= l1mv1;
1238 scale = dist_scale_factor[ref0];
1239 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1242 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1243 int my_col = (mv_col[1]<<y_shift)/2;
1244 int mx = (scale * mv_col[0] + 128) >> 8;
1245 int my = (scale * my_col + 128) >> 8;
1246 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1247 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1250 return;
1254 /* one-to-one mv scaling */
1256 if(IS_16X16(*mb_type)){
1257 int ref, mv0, mv1;
1259 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1260 if(IS_INTRA(mb_type_col)){
1261 ref=mv0=mv1=0;
1262 }else{
1263 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1264 : map_col_to_list0[1][l1ref1[0]];
1265 const int scale = dist_scale_factor[ref0];
1266 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1267 int mv_l0[2];
1268 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1269 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1270 ref= ref0;
1271 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1272 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1274 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1275 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1276 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1277 }else{
1278 for(i8=0; i8<4; i8++){
1279 const int x8 = i8&1;
1280 const int y8 = i8>>1;
1281 int ref0, scale;
1282 const int16_t (*l1mv)[2]= l1mv0;
1284 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1285 continue;
1286 h->sub_mb_type[i8] = sub_mb_type;
1287 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1288 if(IS_INTRA(mb_type_col)){
1289 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1290 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1291 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 continue;
1295 ref0 = l1ref0[x8 + y8*h->b8_stride];
1296 if(ref0 >= 0)
1297 ref0 = map_col_to_list0[0][ref0];
1298 else{
1299 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1300 l1mv= l1mv1;
1302 scale = dist_scale_factor[ref0];
1304 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1305 if(IS_SUB_8X8(sub_mb_type)){
1306 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1307 int mx = (scale * mv_col[0] + 128) >> 8;
1308 int my = (scale * mv_col[1] + 128) >> 8;
1309 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1310 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1311 }else
1312 for(i4=0; i4<4; i4++){
1313 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1314 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1315 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1316 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1317 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1318 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1325 static inline void write_back_motion(H264Context *h, int mb_type){
1326 MpegEncContext * const s = &h->s;
1327 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1328 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1329 int list;
1331 if(!USES_LIST(mb_type, 0))
1332 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1334 for(list=0; list<h->list_count; list++){
1335 int y;
1336 if(!USES_LIST(mb_type, list))
1337 continue;
1339 for(y=0; y<4; y++){
1340 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1341 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1343 if( h->pps.cabac ) {
1344 if(IS_SKIP(mb_type))
1345 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1346 else
1347 for(y=0; y<4; y++){
1348 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1349 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1354 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1355 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1356 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1357 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1358 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1362 if(h->slice_type == FF_B_TYPE && h->pps.cabac){
1363 if(IS_8X8(mb_type)){
1364 uint8_t *direct_table = &h->direct_table[b8_xy];
1365 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1366 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1367 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1373 * Decodes a network abstraction layer unit.
1374 * @param consumed is the number of bytes used as input
1375 * @param length is the length of the array
1376 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1377 * @returns decoded bytes, might be src+1 if no escapes
1379 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1380 int i, si, di;
1381 uint8_t *dst;
1382 int bufidx;
1384 // src[0]&0x80; //forbidden bit
1385 h->nal_ref_idc= src[0]>>5;
1386 h->nal_unit_type= src[0]&0x1F;
1388 src++; length--;
1389 #if 0
1390 for(i=0; i<length; i++)
1391 printf("%2X ", src[i]);
1392 #endif
1393 for(i=0; i+1<length; i+=2){
1394 if(src[i]) continue;
1395 if(i>0 && src[i-1]==0) i--;
1396 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1397 if(src[i+2]!=3){
1398 /* startcode, so we must be past the end */
1399 length=i;
1401 break;
1405 if(i>=length-1){ //no escaped 0
1406 *dst_length= length;
1407 *consumed= length+1; //+1 for the header
1408 return src;
1411 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1412 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1413 dst= h->rbsp_buffer[bufidx];
1415 if (dst == NULL){
1416 return NULL;
1419 //printf("decoding esc\n");
1420 si=di=0;
1421 while(si<length){
1422 //remove escapes (very rare 1:2^22)
1423 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1424 if(src[si+2]==3){ //escape
1425 dst[di++]= 0;
1426 dst[di++]= 0;
1427 si+=3;
1428 continue;
1429 }else //next start code
1430 break;
1433 dst[di++]= src[si++];
1436 *dst_length= di;
1437 *consumed= si + 1;//+1 for the header
1438 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1439 return dst;
1443 * identifies the exact end of the bitstream
1444 * @return the length of the trailing, or 0 if damaged
1446 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1447 int v= *src;
1448 int r;
1450 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1452 for(r=1; r<9; r++){
1453 if(v&1) return r;
1454 v>>=1;
1456 return 0;
1460 * idct tranforms the 16 dc values and dequantize them.
1461 * @param qp quantization parameter
1463 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1464 #define stride 16
1465 int i;
1466 int temp[16]; //FIXME check if this is a good idea
1467 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1468 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1470 //memset(block, 64, 2*256);
1471 //return;
1472 for(i=0; i<4; i++){
1473 const int offset= y_offset[i];
1474 const int z0= block[offset+stride*0] + block[offset+stride*4];
1475 const int z1= block[offset+stride*0] - block[offset+stride*4];
1476 const int z2= block[offset+stride*1] - block[offset+stride*5];
1477 const int z3= block[offset+stride*1] + block[offset+stride*5];
1479 temp[4*i+0]= z0+z3;
1480 temp[4*i+1]= z1+z2;
1481 temp[4*i+2]= z1-z2;
1482 temp[4*i+3]= z0-z3;
1485 for(i=0; i<4; i++){
1486 const int offset= x_offset[i];
1487 const int z0= temp[4*0+i] + temp[4*2+i];
1488 const int z1= temp[4*0+i] - temp[4*2+i];
1489 const int z2= temp[4*1+i] - temp[4*3+i];
1490 const int z3= temp[4*1+i] + temp[4*3+i];
1492 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1493 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1494 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1495 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1499 #if 0
1501 * dct tranforms the 16 dc values.
1502 * @param qp quantization parameter ??? FIXME
1504 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1505 // const int qmul= dequant_coeff[qp][0];
1506 int i;
1507 int temp[16]; //FIXME check if this is a good idea
1508 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1509 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1511 for(i=0; i<4; i++){
1512 const int offset= y_offset[i];
1513 const int z0= block[offset+stride*0] + block[offset+stride*4];
1514 const int z1= block[offset+stride*0] - block[offset+stride*4];
1515 const int z2= block[offset+stride*1] - block[offset+stride*5];
1516 const int z3= block[offset+stride*1] + block[offset+stride*5];
1518 temp[4*i+0]= z0+z3;
1519 temp[4*i+1]= z1+z2;
1520 temp[4*i+2]= z1-z2;
1521 temp[4*i+3]= z0-z3;
1524 for(i=0; i<4; i++){
1525 const int offset= x_offset[i];
1526 const int z0= temp[4*0+i] + temp[4*2+i];
1527 const int z1= temp[4*0+i] - temp[4*2+i];
1528 const int z2= temp[4*1+i] - temp[4*3+i];
1529 const int z3= temp[4*1+i] + temp[4*3+i];
1531 block[stride*0 +offset]= (z0 + z3)>>1;
1532 block[stride*2 +offset]= (z1 + z2)>>1;
1533 block[stride*8 +offset]= (z1 - z2)>>1;
1534 block[stride*10+offset]= (z0 - z3)>>1;
1537 #endif
1539 #undef xStride
1540 #undef stride
1542 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1543 const int stride= 16*2;
1544 const int xStride= 16;
1545 int a,b,c,d,e;
1547 a= block[stride*0 + xStride*0];
1548 b= block[stride*0 + xStride*1];
1549 c= block[stride*1 + xStride*0];
1550 d= block[stride*1 + xStride*1];
1552 e= a-b;
1553 a= a+b;
1554 b= c-d;
1555 c= c+d;
1557 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1558 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1559 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1560 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1563 #if 0
1564 static void chroma_dc_dct_c(DCTELEM *block){
1565 const int stride= 16*2;
1566 const int xStride= 16;
1567 int a,b,c,d,e;
1569 a= block[stride*0 + xStride*0];
1570 b= block[stride*0 + xStride*1];
1571 c= block[stride*1 + xStride*0];
1572 d= block[stride*1 + xStride*1];
1574 e= a-b;
1575 a= a+b;
1576 b= c-d;
1577 c= c+d;
1579 block[stride*0 + xStride*0]= (a+c);
1580 block[stride*0 + xStride*1]= (e+b);
1581 block[stride*1 + xStride*0]= (a-c);
1582 block[stride*1 + xStride*1]= (e-b);
1584 #endif
1587 * gets the chroma qp.
1589 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1590 return h->pps.chroma_qp_table[t][qscale & 0xff];
1593 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1594 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1595 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1596 int i;
1597 const int * const quant_table= quant_coeff[qscale];
1598 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1599 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1600 const unsigned int threshold2= (threshold1<<1);
1601 int last_non_zero;
1603 if(separate_dc){
1604 if(qscale<=18){
1605 //avoid overflows
1606 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1607 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1608 const unsigned int dc_threshold2= (dc_threshold1<<1);
1610 int level= block[0]*quant_coeff[qscale+18][0];
1611 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1612 if(level>0){
1613 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1614 block[0]= level;
1615 }else{
1616 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1617 block[0]= -level;
1619 // last_non_zero = i;
1620 }else{
1621 block[0]=0;
1623 }else{
1624 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1625 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1626 const unsigned int dc_threshold2= (dc_threshold1<<1);
1628 int level= block[0]*quant_table[0];
1629 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1630 if(level>0){
1631 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1632 block[0]= level;
1633 }else{
1634 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1635 block[0]= -level;
1637 // last_non_zero = i;
1638 }else{
1639 block[0]=0;
1642 last_non_zero= 0;
1643 i=1;
1644 }else{
1645 last_non_zero= -1;
1646 i=0;
1649 for(; i<16; i++){
1650 const int j= scantable[i];
1651 int level= block[j]*quant_table[j];
1653 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1654 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1655 if(((unsigned)(level+threshold1))>threshold2){
1656 if(level>0){
1657 level= (bias + level)>>QUANT_SHIFT;
1658 block[j]= level;
1659 }else{
1660 level= (bias - level)>>QUANT_SHIFT;
1661 block[j]= -level;
1663 last_non_zero = i;
1664 }else{
1665 block[j]=0;
1669 return last_non_zero;
1672 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1673 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1674 int src_x_offset, int src_y_offset,
1675 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1676 MpegEncContext * const s = &h->s;
1677 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1678 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1679 const int luma_xy= (mx&3) + ((my&3)<<2);
1680 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1681 uint8_t * src_cb, * src_cr;
1682 int extra_width= h->emu_edge_width;
1683 int extra_height= h->emu_edge_height;
1684 int emu=0;
1685 const int full_mx= mx>>2;
1686 const int full_my= my>>2;
1687 const int pic_width = 16*s->mb_width;
1688 const int pic_height = 16*s->mb_height >> MB_FIELD;
1690 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1691 return;
1693 if(mx&7) extra_width -= 3;
1694 if(my&7) extra_height -= 3;
1696 if( full_mx < 0-extra_width
1697 || full_my < 0-extra_height
1698 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1699 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1700 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1701 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1702 emu=1;
1705 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1706 if(!square){
1707 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1710 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1712 if(MB_FIELD){
1713 // chroma offset when predicting from a field of opposite parity
1714 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1715 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1717 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1718 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1720 if(emu){
1721 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1722 src_cb= s->edge_emu_buffer;
1724 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1726 if(emu){
1727 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1728 src_cr= s->edge_emu_buffer;
1730 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1733 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1734 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1735 int x_offset, int y_offset,
1736 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1737 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1738 int list0, int list1){
1739 MpegEncContext * const s = &h->s;
1740 qpel_mc_func *qpix_op= qpix_put;
1741 h264_chroma_mc_func chroma_op= chroma_put;
1743 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1744 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1745 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1746 x_offset += 8*s->mb_x;
1747 y_offset += 8*(s->mb_y >> MB_FIELD);
1749 if(list0){
1750 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1751 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1752 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1753 qpix_op, chroma_op);
1755 qpix_op= qpix_avg;
1756 chroma_op= chroma_avg;
1759 if(list1){
1760 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1761 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1762 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1763 qpix_op, chroma_op);
1767 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1768 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1769 int x_offset, int y_offset,
1770 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1771 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1772 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1773 int list0, int list1){
1774 MpegEncContext * const s = &h->s;
1776 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1777 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1778 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1779 x_offset += 8*s->mb_x;
1780 y_offset += 8*(s->mb_y >> MB_FIELD);
1782 if(list0 && list1){
1783 /* don't optimize for luma-only case, since B-frames usually
1784 * use implicit weights => chroma too. */
1785 uint8_t *tmp_cb = s->obmc_scratchpad;
1786 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1787 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1788 int refn0 = h->ref_cache[0][ scan8[n] ];
1789 int refn1 = h->ref_cache[1][ scan8[n] ];
1791 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1792 dest_y, dest_cb, dest_cr,
1793 x_offset, y_offset, qpix_put, chroma_put);
1794 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1795 tmp_y, tmp_cb, tmp_cr,
1796 x_offset, y_offset, qpix_put, chroma_put);
1798 if(h->use_weight == 2){
1799 int weight0 = h->implicit_weight[refn0][refn1];
1800 int weight1 = 64 - weight0;
1801 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1802 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1803 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1804 }else{
1805 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1806 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1807 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1808 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1809 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1810 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1811 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1812 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1813 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1815 }else{
1816 int list = list1 ? 1 : 0;
1817 int refn = h->ref_cache[list][ scan8[n] ];
1818 Picture *ref= &h->ref_list[list][refn];
1819 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1820 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1821 qpix_put, chroma_put);
1823 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1824 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1825 if(h->use_weight_chroma){
1826 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1827 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1828 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1829 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1834 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1835 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1836 int x_offset, int y_offset,
1837 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1838 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1839 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1840 int list0, int list1){
1841 if((h->use_weight==2 && list0 && list1
1842 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1843 || h->use_weight==1)
1844 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1845 x_offset, y_offset, qpix_put, chroma_put,
1846 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1847 else
1848 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1849 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1852 static inline void prefetch_motion(H264Context *h, int list){
1853 /* fetch pixels for estimated mv 4 macroblocks ahead
1854 * optimized for 64byte cache lines */
1855 MpegEncContext * const s = &h->s;
1856 const int refn = h->ref_cache[list][scan8[0]];
1857 if(refn >= 0){
1858 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1859 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1860 uint8_t **src= h->ref_list[list][refn].data;
1861 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1862 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1863 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1864 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1868 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1869 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1870 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1871 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1872 MpegEncContext * const s = &h->s;
1873 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1874 const int mb_type= s->current_picture.mb_type[mb_xy];
1876 assert(IS_INTER(mb_type));
1878 prefetch_motion(h, 0);
1880 if(IS_16X16(mb_type)){
1881 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1882 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1883 &weight_op[0], &weight_avg[0],
1884 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1885 }else if(IS_16X8(mb_type)){
1886 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1887 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1888 &weight_op[1], &weight_avg[1],
1889 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1890 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1891 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1892 &weight_op[1], &weight_avg[1],
1893 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1894 }else if(IS_8X16(mb_type)){
1895 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1896 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1897 &weight_op[2], &weight_avg[2],
1898 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1899 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1900 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1901 &weight_op[2], &weight_avg[2],
1902 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1903 }else{
1904 int i;
1906 assert(IS_8X8(mb_type));
1908 for(i=0; i<4; i++){
1909 const int sub_mb_type= h->sub_mb_type[i];
1910 const int n= 4*i;
1911 int x_offset= (i&1)<<2;
1912 int y_offset= (i&2)<<1;
1914 if(IS_SUB_8X8(sub_mb_type)){
1915 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1916 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1917 &weight_op[3], &weight_avg[3],
1918 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1919 }else if(IS_SUB_8X4(sub_mb_type)){
1920 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1921 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1922 &weight_op[4], &weight_avg[4],
1923 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1924 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1925 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1926 &weight_op[4], &weight_avg[4],
1927 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1928 }else if(IS_SUB_4X8(sub_mb_type)){
1929 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1930 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1931 &weight_op[5], &weight_avg[5],
1932 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1933 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1934 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1935 &weight_op[5], &weight_avg[5],
1936 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1937 }else{
1938 int j;
1939 assert(IS_SUB_4X4(sub_mb_type));
1940 for(j=0; j<4; j++){
1941 int sub_x_offset= x_offset + 2*(j&1);
1942 int sub_y_offset= y_offset + (j&2);
1943 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1944 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1945 &weight_op[6], &weight_avg[6],
1946 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1952 prefetch_motion(h, 1);
1955 static av_cold void decode_init_vlc(void){
1956 static int done = 0;
1958 if (!done) {
1959 int i;
1960 done = 1;
1962 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1963 &chroma_dc_coeff_token_len [0], 1, 1,
1964 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1966 for(i=0; i<4; i++){
1967 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1968 &coeff_token_len [i][0], 1, 1,
1969 &coeff_token_bits[i][0], 1, 1, 1);
1972 for(i=0; i<3; i++){
1973 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1974 &chroma_dc_total_zeros_len [i][0], 1, 1,
1975 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1977 for(i=0; i<15; i++){
1978 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1979 &total_zeros_len [i][0], 1, 1,
1980 &total_zeros_bits[i][0], 1, 1, 1);
1983 for(i=0; i<6; i++){
1984 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1985 &run_len [i][0], 1, 1,
1986 &run_bits[i][0], 1, 1, 1);
1988 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1989 &run_len [6][0], 1, 1,
1990 &run_bits[6][0], 1, 1, 1);
1994 static void free_tables(H264Context *h){
1995 int i;
1996 H264Context *hx;
1997 av_freep(&h->intra4x4_pred_mode);
1998 av_freep(&h->chroma_pred_mode_table);
1999 av_freep(&h->cbp_table);
2000 av_freep(&h->mvd_table[0]);
2001 av_freep(&h->mvd_table[1]);
2002 av_freep(&h->direct_table);
2003 av_freep(&h->non_zero_count);
2004 av_freep(&h->slice_table_base);
2005 h->slice_table= NULL;
2007 av_freep(&h->mb2b_xy);
2008 av_freep(&h->mb2b8_xy);
2010 for(i = 0; i < MAX_SPS_COUNT; i++)
2011 av_freep(h->sps_buffers + i);
2013 for(i = 0; i < MAX_PPS_COUNT; i++)
2014 av_freep(h->pps_buffers + i);
2016 for(i = 0; i < h->s.avctx->thread_count; i++) {
2017 hx = h->thread_context[i];
2018 if(!hx) continue;
2019 av_freep(&hx->top_borders[1]);
2020 av_freep(&hx->top_borders[0]);
2021 av_freep(&hx->s.obmc_scratchpad);
2025 static void init_dequant8_coeff_table(H264Context *h){
2026 int i,q,x;
2027 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2028 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2029 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2031 for(i=0; i<2; i++ ){
2032 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2033 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2034 break;
2037 for(q=0; q<52; q++){
2038 int shift = ff_div6[q];
2039 int idx = ff_rem6[q];
2040 for(x=0; x<64; x++)
2041 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2042 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2043 h->pps.scaling_matrix8[i][x]) << shift;
2048 static void init_dequant4_coeff_table(H264Context *h){
2049 int i,j,q,x;
2050 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2051 for(i=0; i<6; i++ ){
2052 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2053 for(j=0; j<i; j++){
2054 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2055 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2056 break;
2059 if(j<i)
2060 continue;
2062 for(q=0; q<52; q++){
2063 int shift = ff_div6[q] + 2;
2064 int idx = ff_rem6[q];
2065 for(x=0; x<16; x++)
2066 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2067 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2068 h->pps.scaling_matrix4[i][x]) << shift;
2073 static void init_dequant_tables(H264Context *h){
2074 int i,x;
2075 init_dequant4_coeff_table(h);
2076 if(h->pps.transform_8x8_mode)
2077 init_dequant8_coeff_table(h);
2078 if(h->sps.transform_bypass){
2079 for(i=0; i<6; i++)
2080 for(x=0; x<16; x++)
2081 h->dequant4_coeff[i][0][x] = 1<<6;
2082 if(h->pps.transform_8x8_mode)
2083 for(i=0; i<2; i++)
2084 for(x=0; x<64; x++)
2085 h->dequant8_coeff[i][0][x] = 1<<6;
2091 * allocates tables.
2092 * needs width/height
2094 static int alloc_tables(H264Context *h){
2095 MpegEncContext * const s = &h->s;
2096 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2097 int x,y;
2099 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2101 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2102 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2105 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2106 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2107 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2108 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2110 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2111 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2113 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2114 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2115 for(y=0; y<s->mb_height; y++){
2116 for(x=0; x<s->mb_width; x++){
2117 const int mb_xy= x + y*s->mb_stride;
2118 const int b_xy = 4*x + 4*y*h->b_stride;
2119 const int b8_xy= 2*x + 2*y*h->b8_stride;
2121 h->mb2b_xy [mb_xy]= b_xy;
2122 h->mb2b8_xy[mb_xy]= b8_xy;
2126 s->obmc_scratchpad = NULL;
2128 if(!h->dequant4_coeff[0])
2129 init_dequant_tables(h);
2131 return 0;
2132 fail:
2133 free_tables(h);
2134 return -1;
2138 * Mimic alloc_tables(), but for every context thread.
2140 static void clone_tables(H264Context *dst, H264Context *src){
2141 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2142 dst->non_zero_count = src->non_zero_count;
2143 dst->slice_table = src->slice_table;
2144 dst->cbp_table = src->cbp_table;
2145 dst->mb2b_xy = src->mb2b_xy;
2146 dst->mb2b8_xy = src->mb2b8_xy;
2147 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2148 dst->mvd_table[0] = src->mvd_table[0];
2149 dst->mvd_table[1] = src->mvd_table[1];
2150 dst->direct_table = src->direct_table;
2152 dst->s.obmc_scratchpad = NULL;
2153 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2157 * Init context
2158 * Allocate buffers which are not shared amongst multiple threads.
2160 static int context_init(H264Context *h){
2161 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2162 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2164 return 0;
2165 fail:
2166 return -1; // free_tables will clean up for us
2169 static av_cold void common_init(H264Context *h){
2170 MpegEncContext * const s = &h->s;
2172 s->width = s->avctx->width;
2173 s->height = s->avctx->height;
2174 s->codec_id= s->avctx->codec->id;
2176 ff_h264_pred_init(&h->hpc, s->codec_id);
2178 h->dequant_coeff_pps= -1;
2179 s->unrestricted_mv=1;
2180 s->decode=1; //FIXME
2182 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2183 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2186 static av_cold int decode_init(AVCodecContext *avctx){
2187 H264Context *h= avctx->priv_data;
2188 MpegEncContext * const s = &h->s;
2190 MPV_decode_defaults(s);
2192 s->avctx = avctx;
2193 common_init(h);
2195 s->out_format = FMT_H264;
2196 s->workaround_bugs= avctx->workaround_bugs;
2198 // set defaults
2199 // s->decode_mb= ff_h263_decode_mb;
2200 s->quarter_sample = 1;
2201 s->low_delay= 1;
2202 avctx->pix_fmt= PIX_FMT_YUV420P;
2204 decode_init_vlc();
2206 if(avctx->extradata_size > 0 && avctx->extradata &&
2207 *(char *)avctx->extradata == 1){
2208 h->is_avc = 1;
2209 h->got_avcC = 0;
2210 } else {
2211 h->is_avc = 0;
2214 h->thread_context[0] = h;
2215 return 0;
2218 static int frame_start(H264Context *h){
2219 MpegEncContext * const s = &h->s;
2220 int i;
2222 if(MPV_frame_start(s, s->avctx) < 0)
2223 return -1;
2224 ff_er_frame_start(s);
2226 * MPV_frame_start uses pict_type to derive key_frame.
2227 * This is incorrect for H.264; IDR markings must be used.
2228 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2229 * See decode_nal_units().
2231 s->current_picture_ptr->key_frame= 0;
2233 assert(s->linesize && s->uvlinesize);
2235 for(i=0; i<16; i++){
2236 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2237 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2239 for(i=0; i<4; i++){
2240 h->block_offset[16+i]=
2241 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2242 h->block_offset[24+16+i]=
2243 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2246 /* can't be in alloc_tables because linesize isn't known there.
2247 * FIXME: redo bipred weight to not require extra buffer? */
2248 for(i = 0; i < s->avctx->thread_count; i++)
2249 if(!h->thread_context[i]->s.obmc_scratchpad)
2250 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2252 /* some macroblocks will be accessed before they're available */
2253 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2254 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2256 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2257 return 0;
2260 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2261 MpegEncContext * const s = &h->s;
2262 int i;
2264 src_y -= linesize;
2265 src_cb -= uvlinesize;
2266 src_cr -= uvlinesize;
2268 // There are two lines saved, the line above the the top macroblock of a pair,
2269 // and the line above the bottom macroblock
2270 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2271 for(i=1; i<17; i++){
2272 h->left_border[i]= src_y[15+i* linesize];
2275 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2276 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2278 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2279 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2280 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2281 for(i=1; i<9; i++){
2282 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2283 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2285 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2286 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2290 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2291 MpegEncContext * const s = &h->s;
2292 int temp8, i;
2293 uint64_t temp64;
2294 int deblock_left;
2295 int deblock_top;
2296 int mb_xy;
2298 if(h->deblocking_filter == 2) {
2299 mb_xy = s->mb_x + s->mb_y*s->mb_stride;
2300 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2301 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2302 } else {
2303 deblock_left = (s->mb_x > 0);
2304 deblock_top = (s->mb_y > 0);
2307 src_y -= linesize + 1;
2308 src_cb -= uvlinesize + 1;
2309 src_cr -= uvlinesize + 1;
2311 #define XCHG(a,b,t,xchg)\
2312 t= a;\
2313 if(xchg)\
2314 a= b;\
2315 b= t;
2317 if(deblock_left){
2318 for(i = !deblock_top; i<17; i++){
2319 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2323 if(deblock_top){
2324 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2325 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2326 if(s->mb_x+1 < s->mb_width){
2327 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2331 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2332 if(deblock_left){
2333 for(i = !deblock_top; i<9; i++){
2334 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2335 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2338 if(deblock_top){
2339 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2340 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2345 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2346 MpegEncContext * const s = &h->s;
2347 int i;
2349 src_y -= 2 * linesize;
2350 src_cb -= 2 * uvlinesize;
2351 src_cr -= 2 * uvlinesize;
2353 // There are two lines saved, the line above the the top macroblock of a pair,
2354 // and the line above the bottom macroblock
2355 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2356 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2357 for(i=2; i<34; i++){
2358 h->left_border[i]= src_y[15+i* linesize];
2361 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2362 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2363 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2364 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2366 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2367 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2368 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2369 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2370 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2371 for(i=2; i<18; i++){
2372 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2373 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2375 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2376 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2377 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2378 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2382 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2383 MpegEncContext * const s = &h->s;
2384 int temp8, i;
2385 uint64_t temp64;
2386 int deblock_left = (s->mb_x > 0);
2387 int deblock_top = (s->mb_y > 1);
2389 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2391 src_y -= 2 * linesize + 1;
2392 src_cb -= 2 * uvlinesize + 1;
2393 src_cr -= 2 * uvlinesize + 1;
2395 #define XCHG(a,b,t,xchg)\
2396 t= a;\
2397 if(xchg)\
2398 a= b;\
2399 b= t;
2401 if(deblock_left){
2402 for(i = (!deblock_top)<<1; i<34; i++){
2403 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2407 if(deblock_top){
2408 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2409 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2410 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2411 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2412 if(s->mb_x+1 < s->mb_width){
2413 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2414 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2418 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2419 if(deblock_left){
2420 for(i = (!deblock_top) << 1; i<18; i++){
2421 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2422 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2425 if(deblock_top){
2426 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2427 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2428 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2429 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2434 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2435 MpegEncContext * const s = &h->s;
2436 const int mb_x= s->mb_x;
2437 const int mb_y= s->mb_y;
2438 const int mb_xy= mb_x + mb_y*s->mb_stride;
2439 const int mb_type= s->current_picture.mb_type[mb_xy];
2440 uint8_t *dest_y, *dest_cb, *dest_cr;
2441 int linesize, uvlinesize /*dct_offset*/;
2442 int i;
2443 int *block_offset = &h->block_offset[0];
2444 const unsigned int bottom = mb_y & 1;
2445 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2446 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2447 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2449 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2450 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2451 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2453 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2454 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2456 if (!simple && MB_FIELD) {
2457 linesize = h->mb_linesize = s->linesize * 2;
2458 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2459 block_offset = &h->block_offset[24];
2460 if(mb_y&1){ //FIXME move out of this func?
2461 dest_y -= s->linesize*15;
2462 dest_cb-= s->uvlinesize*7;
2463 dest_cr-= s->uvlinesize*7;
2465 if(FRAME_MBAFF) {
2466 int list;
2467 for(list=0; list<h->list_count; list++){
2468 if(!USES_LIST(mb_type, list))
2469 continue;
2470 if(IS_16X16(mb_type)){
2471 int8_t *ref = &h->ref_cache[list][scan8[0]];
2472 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2473 }else{
2474 for(i=0; i<16; i+=4){
2475 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2476 int ref = h->ref_cache[list][scan8[i]];
2477 if(ref >= 0)
2478 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2483 } else {
2484 linesize = h->mb_linesize = s->linesize;
2485 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2486 // dct_offset = s->linesize * 16;
2489 if(transform_bypass){
2490 idct_dc_add =
2491 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2492 }else if(IS_8x8DCT(mb_type)){
2493 idct_dc_add = s->dsp.h264_idct8_dc_add;
2494 idct_add = s->dsp.h264_idct8_add;
2495 }else{
2496 idct_dc_add = s->dsp.h264_idct_dc_add;
2497 idct_add = s->dsp.h264_idct_add;
2500 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2501 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2502 int mbt_y = mb_y&~1;
2503 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2504 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2505 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2506 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2509 if (!simple && IS_INTRA_PCM(mb_type)) {
2510 unsigned int x, y;
2512 // The pixels are stored in h->mb array in the same order as levels,
2513 // copy them in output in the correct order.
2514 for(i=0; i<16; i++) {
2515 for (y=0; y<4; y++) {
2516 for (x=0; x<4; x++) {
2517 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2521 for(i=16; i<16+4; i++) {
2522 for (y=0; y<4; y++) {
2523 for (x=0; x<4; x++) {
2524 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2528 for(i=20; i<20+4; i++) {
2529 for (y=0; y<4; y++) {
2530 for (x=0; x<4; x++) {
2531 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2535 } else {
2536 if(IS_INTRA(mb_type)){
2537 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2538 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2540 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2541 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2542 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2545 if(IS_INTRA4x4(mb_type)){
2546 if(simple || !s->encoding){
2547 if(IS_8x8DCT(mb_type)){
2548 for(i=0; i<16; i+=4){
2549 uint8_t * const ptr= dest_y + block_offset[i];
2550 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2551 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2552 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2553 (h->topright_samples_available<<i)&0x4000, linesize);
2554 if(nnz){
2555 if(nnz == 1 && h->mb[i*16])
2556 idct_dc_add(ptr, h->mb + i*16, linesize);
2557 else
2558 idct_add(ptr, h->mb + i*16, linesize);
2561 }else
2562 for(i=0; i<16; i++){
2563 uint8_t * const ptr= dest_y + block_offset[i];
2564 uint8_t *topright;
2565 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2566 int nnz, tr;
2568 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2569 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2570 assert(mb_y || linesize <= block_offset[i]);
2571 if(!topright_avail){
2572 tr= ptr[3 - linesize]*0x01010101;
2573 topright= (uint8_t*) &tr;
2574 }else
2575 topright= ptr + 4 - linesize;
2576 }else
2577 topright= NULL;
2579 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2580 nnz = h->non_zero_count_cache[ scan8[i] ];
2581 if(nnz){
2582 if(is_h264){
2583 if(nnz == 1 && h->mb[i*16])
2584 idct_dc_add(ptr, h->mb + i*16, linesize);
2585 else
2586 idct_add(ptr, h->mb + i*16, linesize);
2587 }else
2588 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2592 }else{
2593 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2594 if(is_h264){
2595 if(!transform_bypass)
2596 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2597 }else
2598 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2600 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2601 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2602 }else if(is_h264){
2603 hl_motion(h, dest_y, dest_cb, dest_cr,
2604 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2605 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2606 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2610 if(!IS_INTRA4x4(mb_type)){
2611 if(is_h264){
2612 if(IS_INTRA16x16(mb_type)){
2613 for(i=0; i<16; i++){
2614 if(h->non_zero_count_cache[ scan8[i] ])
2615 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2616 else if(h->mb[i*16])
2617 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2619 }else{
2620 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2621 for(i=0; i<16; i+=di){
2622 int nnz = h->non_zero_count_cache[ scan8[i] ];
2623 if(nnz){
2624 if(nnz==1 && h->mb[i*16])
2625 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2626 else
2627 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2631 }else{
2632 for(i=0; i<16; i++){
2633 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2634 uint8_t * const ptr= dest_y + block_offset[i];
2635 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2641 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2642 uint8_t *dest[2] = {dest_cb, dest_cr};
2643 if(transform_bypass){
2644 idct_add = idct_dc_add = s->dsp.add_pixels4;
2645 }else{
2646 idct_add = s->dsp.h264_idct_add;
2647 idct_dc_add = s->dsp.h264_idct_dc_add;
2648 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2649 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2651 if(is_h264){
2652 for(i=16; i<16+8; i++){
2653 if(h->non_zero_count_cache[ scan8[i] ])
2654 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2655 else if(h->mb[i*16])
2656 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2658 }else{
2659 for(i=16; i<16+8; i++){
2660 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2661 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2662 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2668 if(h->deblocking_filter) {
2669 if (!simple && FRAME_MBAFF) {
2670 //FIXME try deblocking one mb at a time?
2671 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2672 const int mb_y = s->mb_y - 1;
2673 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2674 const int mb_xy= mb_x + mb_y*s->mb_stride;
2675 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2676 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2677 if (!bottom) return;
2678 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2679 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2680 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2682 if(IS_INTRA(mb_type_top | mb_type_bottom))
2683 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2685 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2686 // deblock a pair
2687 // top
2688 s->mb_y--;
2689 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2690 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2691 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2692 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2693 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2694 // bottom
2695 s->mb_y++;
2696 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2697 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2698 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2699 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2700 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2701 } else {
2702 tprintf(h->s.avctx, "call filter_mb\n");
2703 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2704 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2705 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2711 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2713 static void hl_decode_mb_simple(H264Context *h){
2714 hl_decode_mb_internal(h, 1);
2718 * Process a macroblock; this handles edge cases, such as interlacing.
2720 static void av_noinline hl_decode_mb_complex(H264Context *h){
2721 hl_decode_mb_internal(h, 0);
2724 static void hl_decode_mb(H264Context *h){
2725 MpegEncContext * const s = &h->s;
2726 const int mb_x= s->mb_x;
2727 const int mb_y= s->mb_y;
2728 const int mb_xy= mb_x + mb_y*s->mb_stride;
2729 const int mb_type= s->current_picture.mb_type[mb_xy];
2730 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2732 if(!s->decode)
2733 return;
2735 if (is_complex)
2736 hl_decode_mb_complex(h);
2737 else hl_decode_mb_simple(h);
2740 static void pic_as_field(Picture *pic, const int parity){
2741 int i;
2742 for (i = 0; i < 4; ++i) {
2743 if (parity == PICT_BOTTOM_FIELD)
2744 pic->data[i] += pic->linesize[i];
2745 pic->reference = parity;
2746 pic->linesize[i] *= 2;
2750 static int split_field_copy(Picture *dest, Picture *src,
2751 int parity, int id_add){
2752 int match = !!(src->reference & parity);
2754 if (match) {
2755 *dest = *src;
2756 pic_as_field(dest, parity);
2757 dest->pic_id *= 2;
2758 dest->pic_id += id_add;
2761 return match;
2765 * Split one reference list into field parts, interleaving by parity
2766 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2767 * set to look at the actual start of data for that field.
2769 * @param dest output list
2770 * @param dest_len maximum number of fields to put in dest
2771 * @param src the source reference list containing fields and/or field pairs
2772 * (aka short_ref/long_ref, or
2773 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2774 * @param src_len number of Picture's in source (pairs and unmatched fields)
2775 * @param parity the parity of the picture being decoded/needing
2776 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2777 * @return number of fields placed in dest
2779 static int split_field_half_ref_list(Picture *dest, int dest_len,
2780 Picture *src, int src_len, int parity){
2781 int same_parity = 1;
2782 int same_i = 0;
2783 int opp_i = 0;
2784 int out_i;
2785 int field_output;
2787 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2788 if (same_parity && same_i < src_len) {
2789 field_output = split_field_copy(dest + out_i, src + same_i,
2790 parity, 1);
2791 same_parity = !field_output;
2792 same_i++;
2794 } else if (opp_i < src_len) {
2795 field_output = split_field_copy(dest + out_i, src + opp_i,
2796 PICT_FRAME - parity, 0);
2797 same_parity = field_output;
2798 opp_i++;
2800 } else {
2801 break;
2805 return out_i;
2809 * Split the reference frame list into a reference field list.
2810 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2811 * The input list contains both reference field pairs and
2812 * unmatched reference fields; it is ordered as spec describes
2813 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2814 * unmatched field pairs are also present. Conceptually this is equivalent
2815 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2817 * @param dest output reference list where ordered fields are to be placed
2818 * @param dest_len max number of fields to place at dest
2819 * @param src source reference list, as described above
2820 * @param src_len number of pictures (pairs and unmatched fields) in src
2821 * @param parity parity of field being currently decoded
2822 * (one of PICT_{TOP,BOTTOM}_FIELD)
2823 * @param long_i index into src array that holds first long reference picture,
2824 * or src_len if no long refs present.
2826 static int split_field_ref_list(Picture *dest, int dest_len,
2827 Picture *src, int src_len,
2828 int parity, int long_i){
2830 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2831 dest += i;
2832 dest_len -= i;
2834 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2835 src_len - long_i, parity);
2836 return i;
2840 * fills the default_ref_list.
2842 static int fill_default_ref_list(H264Context *h){
2843 MpegEncContext * const s = &h->s;
2844 int i;
2845 int smallest_poc_greater_than_current = -1;
2846 int structure_sel;
2847 Picture sorted_short_ref[32];
2848 Picture field_entry_list[2][32];
2849 Picture *frame_list[2];
2851 if (FIELD_PICTURE) {
2852 structure_sel = PICT_FRAME;
2853 frame_list[0] = field_entry_list[0];
2854 frame_list[1] = field_entry_list[1];
2855 } else {
2856 structure_sel = 0;
2857 frame_list[0] = h->default_ref_list[0];
2858 frame_list[1] = h->default_ref_list[1];
2861 if(h->slice_type==FF_B_TYPE){
2862 int list;
2863 int len[2];
2864 int short_len[2];
2865 int out_i;
2866 int limit= INT_MIN;
2868 /* sort frame according to poc in B slice */
2869 for(out_i=0; out_i<h->short_ref_count; out_i++){
2870 int best_i=INT_MIN;
2871 int best_poc=INT_MAX;
2873 for(i=0; i<h->short_ref_count; i++){
2874 const int poc= h->short_ref[i]->poc;
2875 if(poc > limit && poc < best_poc){
2876 best_poc= poc;
2877 best_i= i;
2881 assert(best_i != INT_MIN);
2883 limit= best_poc;
2884 sorted_short_ref[out_i]= *h->short_ref[best_i];
2885 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2886 if (-1 == smallest_poc_greater_than_current) {
2887 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2888 smallest_poc_greater_than_current = out_i;
2893 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2895 // find the largest poc
2896 for(list=0; list<2; list++){
2897 int index = 0;
2898 int j= -99;
2899 int step= list ? -1 : 1;
2901 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2902 int sel;
2903 while(j<0 || j>= h->short_ref_count){
2904 if(j != -99 && step == (list ? -1 : 1))
2905 return -1;
2906 step = -step;
2907 j= smallest_poc_greater_than_current + (step>>1);
2909 sel = sorted_short_ref[j].reference | structure_sel;
2910 if(sel != PICT_FRAME) continue;
2911 frame_list[list][index ]= sorted_short_ref[j];
2912 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2914 short_len[list] = index;
2916 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2917 int sel;
2918 if(h->long_ref[i] == NULL) continue;
2919 sel = h->long_ref[i]->reference | structure_sel;
2920 if(sel != PICT_FRAME) continue;
2922 frame_list[ list ][index ]= *h->long_ref[i];
2923 frame_list[ list ][index++].pic_id= i;
2925 len[list] = index;
2928 for(list=0; list<2; list++){
2929 if (FIELD_PICTURE)
2930 len[list] = split_field_ref_list(h->default_ref_list[list],
2931 h->ref_count[list],
2932 frame_list[list],
2933 len[list],
2934 s->picture_structure,
2935 short_len[list]);
2937 // swap the two first elements of L1 when L0 and L1 are identical
2938 if(list && len[0] > 1 && len[0] == len[1])
2939 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2940 if(i == len[0]){
2941 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2942 break;
2945 if(len[list] < h->ref_count[ list ])
2946 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2950 }else{
2951 int index=0;
2952 int short_len;
2953 for(i=0; i<h->short_ref_count; i++){
2954 int sel;
2955 sel = h->short_ref[i]->reference | structure_sel;
2956 if(sel != PICT_FRAME) continue;
2957 frame_list[0][index ]= *h->short_ref[i];
2958 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2960 short_len = index;
2961 for(i = 0; i < 16; i++){
2962 int sel;
2963 if(h->long_ref[i] == NULL) continue;
2964 sel = h->long_ref[i]->reference | structure_sel;
2965 if(sel != PICT_FRAME) continue;
2966 frame_list[0][index ]= *h->long_ref[i];
2967 frame_list[0][index++].pic_id= i;
2970 if (FIELD_PICTURE)
2971 index = split_field_ref_list(h->default_ref_list[0],
2972 h->ref_count[0], frame_list[0],
2973 index, s->picture_structure,
2974 short_len);
2976 if(index < h->ref_count[0])
2977 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2979 #ifdef TRACE
2980 for (i=0; i<h->ref_count[0]; i++) {
2981 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2983 if(h->slice_type==FF_B_TYPE){
2984 for (i=0; i<h->ref_count[1]; i++) {
2985 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2988 #endif
2989 return 0;
2992 static void print_short_term(H264Context *h);
2993 static void print_long_term(H264Context *h);
2996 * Extract structure information about the picture described by pic_num in
2997 * the current decoding context (frame or field). Note that pic_num is
2998 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2999 * @param pic_num picture number for which to extract structure information
3000 * @param structure one of PICT_XXX describing structure of picture
3001 * with pic_num
3002 * @return frame number (short term) or long term index of picture
3003 * described by pic_num
3005 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3006 MpegEncContext * const s = &h->s;
3008 *structure = s->picture_structure;
3009 if(FIELD_PICTURE){
3010 if (!(pic_num & 1))
3011 /* opposite field */
3012 *structure ^= PICT_FRAME;
3013 pic_num >>= 1;
3016 return pic_num;
3019 static int decode_ref_pic_list_reordering(H264Context *h){
3020 MpegEncContext * const s = &h->s;
3021 int list, index, pic_structure;
3023 print_short_term(h);
3024 print_long_term(h);
3025 if(h->slice_type==FF_I_TYPE || h->slice_type==FF_SI_TYPE) return 0; //FIXME move before func
3027 for(list=0; list<h->list_count; list++){
3028 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3030 if(get_bits1(&s->gb)){
3031 int pred= h->curr_pic_num;
3033 for(index=0; ; index++){
3034 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3035 unsigned int pic_id;
3036 int i;
3037 Picture *ref = NULL;
3039 if(reordering_of_pic_nums_idc==3)
3040 break;
3042 if(index >= h->ref_count[list]){
3043 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3044 return -1;
3047 if(reordering_of_pic_nums_idc<3){
3048 if(reordering_of_pic_nums_idc<2){
3049 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3050 int frame_num;
3052 if(abs_diff_pic_num > h->max_pic_num){
3053 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3054 return -1;
3057 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3058 else pred+= abs_diff_pic_num;
3059 pred &= h->max_pic_num - 1;
3061 frame_num = pic_num_extract(h, pred, &pic_structure);
3063 for(i= h->short_ref_count-1; i>=0; i--){
3064 ref = h->short_ref[i];
3065 assert(ref->reference);
3066 assert(!ref->long_ref);
3067 if(ref->data[0] != NULL &&
3068 ref->frame_num == frame_num &&
3069 (ref->reference & pic_structure) &&
3070 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3071 break;
3073 if(i>=0)
3074 ref->pic_id= pred;
3075 }else{
3076 int long_idx;
3077 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3079 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3081 if(long_idx>31){
3082 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3083 return -1;
3085 ref = h->long_ref[long_idx];
3086 assert(!(ref && !ref->reference));
3087 if(ref && (ref->reference & pic_structure)){
3088 ref->pic_id= pic_id;
3089 assert(ref->long_ref);
3090 i=0;
3091 }else{
3092 i=-1;
3096 if (i < 0) {
3097 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3098 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3099 } else {
3100 for(i=index; i+1<h->ref_count[list]; i++){
3101 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3102 break;
3104 for(; i > index; i--){
3105 h->ref_list[list][i]= h->ref_list[list][i-1];
3107 h->ref_list[list][index]= *ref;
3108 if (FIELD_PICTURE){
3109 pic_as_field(&h->ref_list[list][index], pic_structure);
3112 }else{
3113 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3114 return -1;
3119 for(list=0; list<h->list_count; list++){
3120 for(index= 0; index < h->ref_count[list]; index++){
3121 if(!h->ref_list[list][index].data[0])
3122 h->ref_list[list][index]= s->current_picture;
3126 if(h->slice_type==FF_B_TYPE && !h->direct_spatial_mv_pred)
3127 direct_dist_scale_factor(h);
3128 direct_ref_list_init(h);
3129 return 0;
3132 static void fill_mbaff_ref_list(H264Context *h){
3133 int list, i, j;
3134 for(list=0; list<2; list++){ //FIXME try list_count
3135 for(i=0; i<h->ref_count[list]; i++){
3136 Picture *frame = &h->ref_list[list][i];
3137 Picture *field = &h->ref_list[list][16+2*i];
3138 field[0] = *frame;
3139 for(j=0; j<3; j++)
3140 field[0].linesize[j] <<= 1;
3141 field[0].reference = PICT_TOP_FIELD;
3142 field[1] = field[0];
3143 for(j=0; j<3; j++)
3144 field[1].data[j] += frame->linesize[j];
3145 field[1].reference = PICT_BOTTOM_FIELD;
3147 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3148 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3149 for(j=0; j<2; j++){
3150 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3151 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3155 for(j=0; j<h->ref_count[1]; j++){
3156 for(i=0; i<h->ref_count[0]; i++)
3157 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3158 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3159 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3163 static int pred_weight_table(H264Context *h){
3164 MpegEncContext * const s = &h->s;
3165 int list, i;
3166 int luma_def, chroma_def;
3168 h->use_weight= 0;
3169 h->use_weight_chroma= 0;
3170 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3171 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3172 luma_def = 1<<h->luma_log2_weight_denom;
3173 chroma_def = 1<<h->chroma_log2_weight_denom;
3175 for(list=0; list<2; list++){
3176 for(i=0; i<h->ref_count[list]; i++){
3177 int luma_weight_flag, chroma_weight_flag;
3179 luma_weight_flag= get_bits1(&s->gb);
3180 if(luma_weight_flag){
3181 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3182 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3183 if( h->luma_weight[list][i] != luma_def
3184 || h->luma_offset[list][i] != 0)
3185 h->use_weight= 1;
3186 }else{
3187 h->luma_weight[list][i]= luma_def;
3188 h->luma_offset[list][i]= 0;
3191 chroma_weight_flag= get_bits1(&s->gb);
3192 if(chroma_weight_flag){
3193 int j;
3194 for(j=0; j<2; j++){
3195 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3196 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3197 if( h->chroma_weight[list][i][j] != chroma_def
3198 || h->chroma_offset[list][i][j] != 0)
3199 h->use_weight_chroma= 1;
3201 }else{
3202 int j;
3203 for(j=0; j<2; j++){
3204 h->chroma_weight[list][i][j]= chroma_def;
3205 h->chroma_offset[list][i][j]= 0;
3209 if(h->slice_type != FF_B_TYPE) break;
3211 h->use_weight= h->use_weight || h->use_weight_chroma;
3212 return 0;
3215 static void implicit_weight_table(H264Context *h){
3216 MpegEncContext * const s = &h->s;
3217 int ref0, ref1;
3218 int cur_poc = s->current_picture_ptr->poc;
3220 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3221 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3222 h->use_weight= 0;
3223 h->use_weight_chroma= 0;
3224 return;
3227 h->use_weight= 2;
3228 h->use_weight_chroma= 2;
3229 h->luma_log2_weight_denom= 5;
3230 h->chroma_log2_weight_denom= 5;
3232 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3233 int poc0 = h->ref_list[0][ref0].poc;
3234 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3235 int poc1 = h->ref_list[1][ref1].poc;
3236 int td = av_clip(poc1 - poc0, -128, 127);
3237 if(td){
3238 int tb = av_clip(cur_poc - poc0, -128, 127);
3239 int tx = (16384 + (FFABS(td) >> 1)) / td;
3240 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3241 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3242 h->implicit_weight[ref0][ref1] = 32;
3243 else
3244 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3245 }else
3246 h->implicit_weight[ref0][ref1] = 32;
3252 * Mark a picture as no longer needed for reference. The refmask
3253 * argument allows unreferencing of individual fields or the whole frame.
3254 * If the picture becomes entirely unreferenced, but is being held for
3255 * display purposes, it is marked as such.
3256 * @param refmask mask of fields to unreference; the mask is bitwise
3257 * anded with the reference marking of pic
3258 * @return non-zero if pic becomes entirely unreferenced (except possibly
3259 * for display purposes) zero if one of the fields remains in
3260 * reference
3262 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3263 int i;
3264 if (pic->reference &= refmask) {
3265 return 0;
3266 } else {
3267 if(pic == h->delayed_output_pic)
3268 pic->reference=DELAYED_PIC_REF;
3269 else{
3270 for(i = 0; h->delayed_pic[i]; i++)
3271 if(pic == h->delayed_pic[i]){
3272 pic->reference=DELAYED_PIC_REF;
3273 break;
3276 return 1;
3281 * instantaneous decoder refresh.
3283 static void idr(H264Context *h){
3284 int i;
3286 for(i=0; i<16; i++){
3287 if (h->long_ref[i] != NULL) {
3288 unreference_pic(h, h->long_ref[i], 0);
3289 h->long_ref[i]= NULL;
3292 h->long_ref_count=0;
3294 for(i=0; i<h->short_ref_count; i++){
3295 unreference_pic(h, h->short_ref[i], 0);
3296 h->short_ref[i]= NULL;
3298 h->short_ref_count=0;
3301 /* forget old pics after a seek */
3302 static void flush_dpb(AVCodecContext *avctx){
3303 H264Context *h= avctx->priv_data;
3304 int i;
3305 for(i=0; i<16; i++) {
3306 if(h->delayed_pic[i])
3307 h->delayed_pic[i]->reference= 0;
3308 h->delayed_pic[i]= NULL;
3310 if(h->delayed_output_pic)
3311 h->delayed_output_pic->reference= 0;
3312 h->delayed_output_pic= NULL;
3313 idr(h);
3314 if(h->s.current_picture_ptr)
3315 h->s.current_picture_ptr->reference= 0;
3316 h->s.first_field= 0;
3317 ff_mpeg_flush(avctx);
3321 * Find a Picture in the short term reference list by frame number.
3322 * @param frame_num frame number to search for
3323 * @param idx the index into h->short_ref where returned picture is found
3324 * undefined if no picture found.
3325 * @return pointer to the found picture, or NULL if no pic with the provided
3326 * frame number is found
3328 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3329 MpegEncContext * const s = &h->s;
3330 int i;
3332 for(i=0; i<h->short_ref_count; i++){
3333 Picture *pic= h->short_ref[i];
3334 if(s->avctx->debug&FF_DEBUG_MMCO)
3335 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3336 if(pic->frame_num == frame_num) {
3337 *idx = i;
3338 return pic;
3341 return NULL;
3345 * Remove a picture from the short term reference list by its index in
3346 * that list. This does no checking on the provided index; it is assumed
3347 * to be valid. Other list entries are shifted down.
3348 * @param i index into h->short_ref of picture to remove.
3350 static void remove_short_at_index(H264Context *h, int i){
3351 assert(i > 0 && i < h->short_ref_count);
3352 h->short_ref[i]= NULL;
3353 if (--h->short_ref_count)
3354 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3359 * @return the removed picture or NULL if an error occurs
3361 static Picture * remove_short(H264Context *h, int frame_num){
3362 MpegEncContext * const s = &h->s;
3363 Picture *pic;
3364 int i;
3366 if(s->avctx->debug&FF_DEBUG_MMCO)
3367 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3369 pic = find_short(h, frame_num, &i);
3370 if (pic)
3371 remove_short_at_index(h, i);
3373 return pic;
3377 * Remove a picture from the long term reference list by its index in
3378 * that list. This does no checking on the provided index; it is assumed
3379 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3380 * @param i index into h->long_ref of picture to remove.
3382 static void remove_long_at_index(H264Context *h, int i){
3383 h->long_ref[i]= NULL;
3384 h->long_ref_count--;
3389 * @return the removed picture or NULL if an error occurs
3391 static Picture * remove_long(H264Context *h, int i){
3392 Picture *pic;
3394 pic= h->long_ref[i];
3395 if (pic)
3396 remove_long_at_index(h, i);
3398 return pic;
3402 * print short term list
3404 static void print_short_term(H264Context *h) {
3405 uint32_t i;
3406 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3407 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3408 for(i=0; i<h->short_ref_count; i++){
3409 Picture *pic= h->short_ref[i];
3410 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3416 * print long term list
3418 static void print_long_term(H264Context *h) {
3419 uint32_t i;
3420 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3421 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3422 for(i = 0; i < 16; i++){
3423 Picture *pic= h->long_ref[i];
3424 if (pic) {
3425 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3432 * Executes the reference picture marking (memory management control operations).
3434 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3435 MpegEncContext * const s = &h->s;
3436 int i, j;
3437 int current_ref_assigned=0;
3438 Picture *pic;
3440 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3441 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3443 for(i=0; i<mmco_count; i++){
3444 int structure, frame_num, unref_pic;
3445 if(s->avctx->debug&FF_DEBUG_MMCO)
3446 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3448 switch(mmco[i].opcode){
3449 case MMCO_SHORT2UNUSED:
3450 if(s->avctx->debug&FF_DEBUG_MMCO)
3451 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3452 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3453 pic = find_short(h, frame_num, &j);
3454 if (pic) {
3455 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3456 remove_short_at_index(h, j);
3457 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3458 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3459 break;
3460 case MMCO_SHORT2LONG:
3461 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3462 h->long_ref[mmco[i].long_arg]->frame_num ==
3463 mmco[i].short_pic_num / 2) {
3464 /* do nothing, we've already moved this field pair. */
3465 } else {
3466 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3468 pic= remove_long(h, mmco[i].long_arg);
3469 if(pic) unreference_pic(h, pic, 0);
3471 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3472 if (h->long_ref[ mmco[i].long_arg ]){
3473 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3474 h->long_ref_count++;
3477 break;
3478 case MMCO_LONG2UNUSED:
3479 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3480 pic = h->long_ref[j];
3481 if (pic) {
3482 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3483 remove_long_at_index(h, j);
3484 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3485 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3486 break;
3487 case MMCO_LONG:
3488 unref_pic = 1;
3489 if (FIELD_PICTURE && !s->first_field) {
3490 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3491 /* Just mark second field as referenced */
3492 unref_pic = 0;
3493 } else if (s->current_picture_ptr->reference) {
3494 /* First field in pair is in short term list or
3495 * at a different long term index.
3496 * This is not allowed; see 7.4.3, notes 2 and 3.
3497 * Report the problem and keep the pair where it is,
3498 * and mark this field valid.
3500 av_log(h->s.avctx, AV_LOG_ERROR,
3501 "illegal long term reference assignment for second "
3502 "field in complementary field pair (first field is "
3503 "short term or has non-matching long index)\n");
3504 unref_pic = 0;
3508 if (unref_pic) {
3509 pic= remove_long(h, mmco[i].long_arg);
3510 if(pic) unreference_pic(h, pic, 0);
3512 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3513 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3514 h->long_ref_count++;
3517 s->current_picture_ptr->reference |= s->picture_structure;
3518 current_ref_assigned=1;
3519 break;
3520 case MMCO_SET_MAX_LONG:
3521 assert(mmco[i].long_arg <= 16);
3522 // just remove the long term which index is greater than new max
3523 for(j = mmco[i].long_arg; j<16; j++){
3524 pic = remove_long(h, j);
3525 if (pic) unreference_pic(h, pic, 0);
3527 break;
3528 case MMCO_RESET:
3529 while(h->short_ref_count){
3530 pic= remove_short(h, h->short_ref[0]->frame_num);
3531 if(pic) unreference_pic(h, pic, 0);
3533 for(j = 0; j < 16; j++) {
3534 pic= remove_long(h, j);
3535 if(pic) unreference_pic(h, pic, 0);
3537 break;
3538 default: assert(0);
3542 if (!current_ref_assigned && FIELD_PICTURE &&
3543 !s->first_field && s->current_picture_ptr->reference) {
3545 /* Second field of complementary field pair; the first field of
3546 * which is already referenced. If short referenced, it
3547 * should be first entry in short_ref. If not, it must exist
3548 * in long_ref; trying to put it on the short list here is an
3549 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3551 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3552 /* Just mark the second field valid */
3553 s->current_picture_ptr->reference = PICT_FRAME;
3554 } else if (s->current_picture_ptr->long_ref) {
3555 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3556 "assignment for second field "
3557 "in complementary field pair "
3558 "(first field is long term)\n");
3559 } else {
3561 * First field in reference, but not in any sensible place on our
3562 * reference lists. This shouldn't happen unless reference
3563 * handling somewhere else is wrong.
3565 assert(0);
3567 current_ref_assigned = 1;
3570 if(!current_ref_assigned){
3571 pic= remove_short(h, s->current_picture_ptr->frame_num);
3572 if(pic){
3573 unreference_pic(h, pic, 0);
3574 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3577 if(h->short_ref_count)
3578 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3580 h->short_ref[0]= s->current_picture_ptr;
3581 h->short_ref[0]->long_ref=0;
3582 h->short_ref_count++;
3583 s->current_picture_ptr->reference |= s->picture_structure;
3586 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3588 /* We have too many reference frames, probably due to corrupted
3589 * stream. Need to discard one frame. Prevents overrun of the
3590 * short_ref and long_ref buffers.
3592 av_log(h->s.avctx, AV_LOG_ERROR,
3593 "number of reference frames exceeds max (probably "
3594 "corrupt input), discarding one\n");
3596 if (h->long_ref_count) {
3597 for (i = 0; i < 16; ++i)
3598 if (h->long_ref[i])
3599 break;
3601 assert(i < 16);
3602 pic = h->long_ref[i];
3603 remove_long_at_index(h, i);
3604 } else {
3605 pic = h->short_ref[h->short_ref_count - 1];
3606 remove_short_at_index(h, h->short_ref_count - 1);
3608 unreference_pic(h, pic, 0);
3611 print_short_term(h);
3612 print_long_term(h);
3613 return 0;
3616 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3617 MpegEncContext * const s = &h->s;
3618 int i;
3620 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3621 s->broken_link= get_bits1(gb) -1;
3622 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3623 if(h->mmco[0].long_arg == -1)
3624 h->mmco_index= 0;
3625 else{
3626 h->mmco[0].opcode= MMCO_LONG;
3627 h->mmco_index= 1;
3629 }else{
3630 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3631 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3632 MMCOOpcode opcode= get_ue_golomb(gb);
3634 h->mmco[i].opcode= opcode;
3635 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3636 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3637 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3638 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3639 return -1;
3642 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3643 unsigned int long_arg= get_ue_golomb(gb);
3644 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3645 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3646 return -1;
3648 h->mmco[i].long_arg= long_arg;
3651 if(opcode > (unsigned)MMCO_LONG){
3652 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3653 return -1;
3655 if(opcode == MMCO_END)
3656 break;
3658 h->mmco_index= i;
3659 }else{
3660 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3662 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3663 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3664 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3665 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3666 h->mmco_index= 1;
3667 if (FIELD_PICTURE) {
3668 h->mmco[0].short_pic_num *= 2;
3669 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3670 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3671 h->mmco_index= 2;
3673 }else
3674 h->mmco_index= 0;
3678 return 0;
3681 static int init_poc(H264Context *h){
3682 MpegEncContext * const s = &h->s;
3683 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3684 int field_poc[2];
3686 if(h->nal_unit_type == NAL_IDR_SLICE){
3687 h->frame_num_offset= 0;
3688 }else{
3689 if(h->frame_num < h->prev_frame_num)
3690 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3691 else
3692 h->frame_num_offset= h->prev_frame_num_offset;
3695 if(h->sps.poc_type==0){
3696 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3698 if(h->nal_unit_type == NAL_IDR_SLICE){
3699 h->prev_poc_msb=
3700 h->prev_poc_lsb= 0;
3703 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3704 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3705 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3706 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3707 else
3708 h->poc_msb = h->prev_poc_msb;
3709 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3710 field_poc[0] =
3711 field_poc[1] = h->poc_msb + h->poc_lsb;
3712 if(s->picture_structure == PICT_FRAME)
3713 field_poc[1] += h->delta_poc_bottom;
3714 }else if(h->sps.poc_type==1){
3715 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3716 int i;
3718 if(h->sps.poc_cycle_length != 0)
3719 abs_frame_num = h->frame_num_offset + h->frame_num;
3720 else
3721 abs_frame_num = 0;
3723 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3724 abs_frame_num--;
3726 expected_delta_per_poc_cycle = 0;
3727 for(i=0; i < h->sps.poc_cycle_length; i++)
3728 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3730 if(abs_frame_num > 0){
3731 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3732 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3734 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3735 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3736 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3737 } else
3738 expectedpoc = 0;
3740 if(h->nal_ref_idc == 0)
3741 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3743 field_poc[0] = expectedpoc + h->delta_poc[0];
3744 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3746 if(s->picture_structure == PICT_FRAME)
3747 field_poc[1] += h->delta_poc[1];
3748 }else{
3749 int poc;
3750 if(h->nal_unit_type == NAL_IDR_SLICE){
3751 poc= 0;
3752 }else{
3753 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3754 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3756 field_poc[0]= poc;
3757 field_poc[1]= poc;
3760 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3761 s->current_picture_ptr->field_poc[0]= field_poc[0];
3762 s->current_picture_ptr->poc = field_poc[0];
3764 if(s->picture_structure != PICT_TOP_FIELD) {
3765 s->current_picture_ptr->field_poc[1]= field_poc[1];
3766 s->current_picture_ptr->poc = field_poc[1];
3768 if(!FIELD_PICTURE || !s->first_field) {
3769 Picture *cur = s->current_picture_ptr;
3770 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3773 return 0;
3778 * initialize scan tables
3780 static void init_scan_tables(H264Context *h){
3781 MpegEncContext * const s = &h->s;
3782 int i;
3783 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3784 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3785 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3786 }else{
3787 for(i=0; i<16; i++){
3788 #define T(x) (x>>2) | ((x<<2) & 0xF)
3789 h->zigzag_scan[i] = T(zigzag_scan[i]);
3790 h-> field_scan[i] = T( field_scan[i]);
3791 #undef T
3794 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3795 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3796 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3797 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3798 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3799 }else{
3800 for(i=0; i<64; i++){
3801 #define T(x) (x>>3) | ((x&7)<<3)
3802 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3803 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3804 h->field_scan8x8[i] = T(field_scan8x8[i]);
3805 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3806 #undef T
3809 if(h->sps.transform_bypass){ //FIXME same ugly
3810 h->zigzag_scan_q0 = zigzag_scan;
3811 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3812 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3813 h->field_scan_q0 = field_scan;
3814 h->field_scan8x8_q0 = field_scan8x8;
3815 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3816 }else{
3817 h->zigzag_scan_q0 = h->zigzag_scan;
3818 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3819 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3820 h->field_scan_q0 = h->field_scan;
3821 h->field_scan8x8_q0 = h->field_scan8x8;
3822 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3827 * Replicates H264 "master" context to thread contexts.
3829 static void clone_slice(H264Context *dst, H264Context *src)
3831 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3832 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3833 dst->s.current_picture = src->s.current_picture;
3834 dst->s.linesize = src->s.linesize;
3835 dst->s.uvlinesize = src->s.uvlinesize;
3836 dst->s.first_field = src->s.first_field;
3838 dst->prev_poc_msb = src->prev_poc_msb;
3839 dst->prev_poc_lsb = src->prev_poc_lsb;
3840 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3841 dst->prev_frame_num = src->prev_frame_num;
3842 dst->short_ref_count = src->short_ref_count;
3844 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3845 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3846 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3847 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3849 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3850 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3854 * decodes a slice header.
3855 * this will allso call MPV_common_init() and frame_start() as needed
3857 * @param h h264context
3858 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3860 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3862 static int decode_slice_header(H264Context *h, H264Context *h0){
3863 MpegEncContext * const s = &h->s;
3864 MpegEncContext * const s0 = &h0->s;
3865 unsigned int first_mb_in_slice;
3866 unsigned int pps_id;
3867 int num_ref_idx_active_override_flag;
3868 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3869 unsigned int slice_type, tmp, i;
3870 int default_ref_list_done = 0;
3871 int last_pic_structure;
3873 s->dropable= h->nal_ref_idc == 0;
3875 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3876 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3877 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3878 }else{
3879 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3880 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3883 first_mb_in_slice= get_ue_golomb(&s->gb);
3885 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3886 h0->current_slice = 0;
3887 if (!s0->first_field)
3888 s->current_picture_ptr= NULL;
3891 slice_type= get_ue_golomb(&s->gb);
3892 if(slice_type > 9){
3893 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3894 return -1;
3896 if(slice_type > 4){
3897 slice_type -= 5;
3898 h->slice_type_fixed=1;
3899 }else
3900 h->slice_type_fixed=0;
3902 slice_type= slice_type_map[ slice_type ];
3903 if (slice_type == FF_I_TYPE
3904 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3905 default_ref_list_done = 1;
3907 h->slice_type= slice_type;
3909 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3910 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3911 av_log(h->s.avctx, AV_LOG_ERROR,
3912 "B picture before any references, skipping\n");
3913 return -1;
3916 pps_id= get_ue_golomb(&s->gb);
3917 if(pps_id>=MAX_PPS_COUNT){
3918 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3919 return -1;
3921 if(!h0->pps_buffers[pps_id]) {
3922 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3923 return -1;
3925 h->pps= *h0->pps_buffers[pps_id];
3927 if(!h0->sps_buffers[h->pps.sps_id]) {
3928 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3929 return -1;
3931 h->sps = *h0->sps_buffers[h->pps.sps_id];
3933 if(h == h0 && h->dequant_coeff_pps != pps_id){
3934 h->dequant_coeff_pps = pps_id;
3935 init_dequant_tables(h);
3938 s->mb_width= h->sps.mb_width;
3939 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3941 h->b_stride= s->mb_width*4;
3942 h->b8_stride= s->mb_width*2;
3944 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
3945 if(h->sps.frame_mbs_only_flag)
3946 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
3947 else
3948 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
3950 if (s->context_initialized
3951 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3952 if(h != h0)
3953 return -1; // width / height changed during parallelized decoding
3954 free_tables(h);
3955 MPV_common_end(s);
3957 if (!s->context_initialized) {
3958 if(h != h0)
3959 return -1; // we cant (re-)initialize context during parallel decoding
3960 if (MPV_common_init(s) < 0)
3961 return -1;
3962 s->first_field = 0;
3964 init_scan_tables(h);
3965 alloc_tables(h);
3967 for(i = 1; i < s->avctx->thread_count; i++) {
3968 H264Context *c;
3969 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3970 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3971 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3972 c->sps = h->sps;
3973 c->pps = h->pps;
3974 init_scan_tables(c);
3975 clone_tables(c, h);
3978 for(i = 0; i < s->avctx->thread_count; i++)
3979 if(context_init(h->thread_context[i]) < 0)
3980 return -1;
3982 s->avctx->width = s->width;
3983 s->avctx->height = s->height;
3984 s->avctx->sample_aspect_ratio= h->sps.sar;
3985 if(!s->avctx->sample_aspect_ratio.den)
3986 s->avctx->sample_aspect_ratio.den = 1;
3988 if(h->sps.timing_info_present_flag){
3989 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3990 if(h->x264_build > 0 && h->x264_build < 44)
3991 s->avctx->time_base.den *= 2;
3992 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3993 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3997 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3999 h->mb_mbaff = 0;
4000 h->mb_aff_frame = 0;
4001 last_pic_structure = s0->picture_structure;
4002 if(h->sps.frame_mbs_only_flag){
4003 s->picture_structure= PICT_FRAME;
4004 }else{
4005 if(get_bits1(&s->gb)) { //field_pic_flag
4006 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4007 } else {
4008 s->picture_structure= PICT_FRAME;
4009 h->mb_aff_frame = h->sps.mb_aff;
4013 if(h0->current_slice == 0){
4014 /* See if we have a decoded first field looking for a pair... */
4015 if (s0->first_field) {
4016 assert(s0->current_picture_ptr);
4017 assert(s0->current_picture_ptr->data[0]);
4018 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4020 /* figure out if we have a complementary field pair */
4021 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4023 * Previous field is unmatched. Don't display it, but let it
4024 * remain for reference if marked as such.
4026 s0->current_picture_ptr = NULL;
4027 s0->first_field = FIELD_PICTURE;
4029 } else {
4030 if (h->nal_ref_idc &&
4031 s0->current_picture_ptr->reference &&
4032 s0->current_picture_ptr->frame_num != h->frame_num) {
4034 * This and previous field were reference, but had
4035 * different frame_nums. Consider this field first in
4036 * pair. Throw away previous field except for reference
4037 * purposes.
4039 s0->first_field = 1;
4040 s0->current_picture_ptr = NULL;
4042 } else {
4043 /* Second field in complementary pair */
4044 s0->first_field = 0;
4048 } else {
4049 /* Frame or first field in a potentially complementary pair */
4050 assert(!s0->current_picture_ptr);
4051 s0->first_field = FIELD_PICTURE;
4054 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4055 s0->first_field = 0;
4056 return -1;
4059 if(h != h0)
4060 clone_slice(h, h0);
4062 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4064 assert(s->mb_num == s->mb_width * s->mb_height);
4065 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4066 first_mb_in_slice >= s->mb_num){
4067 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4068 return -1;
4070 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4071 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4072 if (s->picture_structure == PICT_BOTTOM_FIELD)
4073 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4074 assert(s->mb_y < s->mb_height);
4076 if(s->picture_structure==PICT_FRAME){
4077 h->curr_pic_num= h->frame_num;
4078 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4079 }else{
4080 h->curr_pic_num= 2*h->frame_num + 1;
4081 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4084 if(h->nal_unit_type == NAL_IDR_SLICE){
4085 get_ue_golomb(&s->gb); /* idr_pic_id */
4088 if(h->sps.poc_type==0){
4089 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4091 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4092 h->delta_poc_bottom= get_se_golomb(&s->gb);
4096 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4097 h->delta_poc[0]= get_se_golomb(&s->gb);
4099 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4100 h->delta_poc[1]= get_se_golomb(&s->gb);
4103 init_poc(h);
4105 if(h->pps.redundant_pic_cnt_present){
4106 h->redundant_pic_count= get_ue_golomb(&s->gb);
4109 //set defaults, might be overriden a few line later
4110 h->ref_count[0]= h->pps.ref_count[0];
4111 h->ref_count[1]= h->pps.ref_count[1];
4113 if(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE || h->slice_type == FF_B_TYPE){
4114 if(h->slice_type == FF_B_TYPE){
4115 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4117 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4119 if(num_ref_idx_active_override_flag){
4120 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4121 if(h->slice_type==FF_B_TYPE)
4122 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4124 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4125 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4126 h->ref_count[0]= h->ref_count[1]= 1;
4127 return -1;
4130 if(h->slice_type == FF_B_TYPE)
4131 h->list_count= 2;
4132 else
4133 h->list_count= 1;
4134 }else
4135 h->list_count= 0;
4137 if(!default_ref_list_done){
4138 fill_default_ref_list(h);
4141 if(decode_ref_pic_list_reordering(h) < 0)
4142 return -1;
4144 if( (h->pps.weighted_pred && (h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE ))
4145 || (h->pps.weighted_bipred_idc==1 && h->slice_type==FF_B_TYPE ) )
4146 pred_weight_table(h);
4147 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==FF_B_TYPE)
4148 implicit_weight_table(h);
4149 else
4150 h->use_weight = 0;
4152 if(h->nal_ref_idc)
4153 decode_ref_pic_marking(h0, &s->gb);
4155 if(FRAME_MBAFF)
4156 fill_mbaff_ref_list(h);
4158 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE && h->pps.cabac ){
4159 tmp = get_ue_golomb(&s->gb);
4160 if(tmp > 2){
4161 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4162 return -1;
4164 h->cabac_init_idc= tmp;
4167 h->last_qscale_diff = 0;
4168 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4169 if(tmp>51){
4170 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4171 return -1;
4173 s->qscale= tmp;
4174 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4175 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4176 //FIXME qscale / qp ... stuff
4177 if(h->slice_type == FF_SP_TYPE){
4178 get_bits1(&s->gb); /* sp_for_switch_flag */
4180 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4181 get_se_golomb(&s->gb); /* slice_qs_delta */
4184 h->deblocking_filter = 1;
4185 h->slice_alpha_c0_offset = 0;
4186 h->slice_beta_offset = 0;
4187 if( h->pps.deblocking_filter_parameters_present ) {
4188 tmp= get_ue_golomb(&s->gb);
4189 if(tmp > 2){
4190 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4191 return -1;
4193 h->deblocking_filter= tmp;
4194 if(h->deblocking_filter < 2)
4195 h->deblocking_filter^= 1; // 1<->0
4197 if( h->deblocking_filter ) {
4198 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4199 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4203 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4204 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != FF_I_TYPE)
4205 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == FF_B_TYPE)
4206 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4207 h->deblocking_filter= 0;
4209 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4210 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4211 /* Cheat slightly for speed:
4212 Do not bother to deblock across slices. */
4213 h->deblocking_filter = 2;
4214 } else {
4215 h0->max_contexts = 1;
4216 if(!h0->single_decode_warning) {
4217 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4218 h0->single_decode_warning = 1;
4220 if(h != h0)
4221 return 1; // deblocking switched inside frame
4225 #if 0 //FMO
4226 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4227 slice_group_change_cycle= get_bits(&s->gb, ?);
4228 #endif
4230 h0->last_slice_type = slice_type;
4231 h->slice_num = ++h0->current_slice;
4233 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4234 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4236 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4237 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4238 h->slice_num,
4239 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4240 first_mb_in_slice,
4241 av_get_pict_type_char(h->slice_type),
4242 pps_id, h->frame_num,
4243 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4244 h->ref_count[0], h->ref_count[1],
4245 s->qscale,
4246 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4247 h->use_weight,
4248 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4252 return 0;
4258 static inline int get_level_prefix(GetBitContext *gb){
4259 unsigned int buf;
4260 int log;
4262 OPEN_READER(re, gb);
4263 UPDATE_CACHE(re, gb);
4264 buf=GET_CACHE(re, gb);
4266 log= 32 - av_log2(buf);
4267 #ifdef TRACE
4268 print_bin(buf>>(32-log), log);
4269 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4270 #endif
4272 LAST_SKIP_BITS(re, gb, log);
4273 CLOSE_READER(re, gb);
4275 return log-1;
4278 static inline int get_dct8x8_allowed(H264Context *h){
4279 int i;
4280 for(i=0; i<4; i++){
4281 if(!IS_SUB_8X8(h->sub_mb_type[i])
4282 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4283 return 0;
4285 return 1;
4289 * decodes a residual block.
4290 * @param n block index
4291 * @param scantable scantable
4292 * @param max_coeff number of coefficients in the block
4293 * @return <0 if an error occurred
4295 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4296 MpegEncContext * const s = &h->s;
4297 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4298 int level[16];
4299 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4301 //FIXME put trailing_onex into the context
4303 if(n == CHROMA_DC_BLOCK_INDEX){
4304 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4305 total_coeff= coeff_token>>2;
4306 }else{
4307 if(n == LUMA_DC_BLOCK_INDEX){
4308 total_coeff= pred_non_zero_count(h, 0);
4309 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4310 total_coeff= coeff_token>>2;
4311 }else{
4312 total_coeff= pred_non_zero_count(h, n);
4313 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4314 total_coeff= coeff_token>>2;
4315 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4319 //FIXME set last_non_zero?
4321 if(total_coeff==0)
4322 return 0;
4323 if(total_coeff > (unsigned)max_coeff) {
4324 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4325 return -1;
4328 trailing_ones= coeff_token&3;
4329 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4330 assert(total_coeff<=16);
4332 for(i=0; i<trailing_ones; i++){
4333 level[i]= 1 - 2*get_bits1(gb);
4336 if(i<total_coeff) {
4337 int level_code, mask;
4338 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4339 int prefix= get_level_prefix(gb);
4341 //first coefficient has suffix_length equal to 0 or 1
4342 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4343 if(suffix_length)
4344 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4345 else
4346 level_code= (prefix<<suffix_length); //part
4347 }else if(prefix==14){
4348 if(suffix_length)
4349 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4350 else
4351 level_code= prefix + get_bits(gb, 4); //part
4352 }else if(prefix==15){
4353 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4354 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4355 }else{
4356 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4357 return -1;
4360 if(trailing_ones < 3) level_code += 2;
4362 suffix_length = 1;
4363 if(level_code > 5)
4364 suffix_length++;
4365 mask= -(level_code&1);
4366 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4367 i++;
4369 //remaining coefficients have suffix_length > 0
4370 for(;i<total_coeff;i++) {
4371 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4372 prefix = get_level_prefix(gb);
4373 if(prefix<15){
4374 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4375 }else if(prefix==15){
4376 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4377 }else{
4378 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4379 return -1;
4381 mask= -(level_code&1);
4382 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4383 if(level_code > suffix_limit[suffix_length])
4384 suffix_length++;
4388 if(total_coeff == max_coeff)
4389 zeros_left=0;
4390 else{
4391 if(n == CHROMA_DC_BLOCK_INDEX)
4392 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4393 else
4394 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4397 coeff_num = zeros_left + total_coeff - 1;
4398 j = scantable[coeff_num];
4399 if(n > 24){
4400 block[j] = level[0];
4401 for(i=1;i<total_coeff;i++) {
4402 if(zeros_left <= 0)
4403 run_before = 0;
4404 else if(zeros_left < 7){
4405 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4406 }else{
4407 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4409 zeros_left -= run_before;
4410 coeff_num -= 1 + run_before;
4411 j= scantable[ coeff_num ];
4413 block[j]= level[i];
4415 }else{
4416 block[j] = (level[0] * qmul[j] + 32)>>6;
4417 for(i=1;i<total_coeff;i++) {
4418 if(zeros_left <= 0)
4419 run_before = 0;
4420 else if(zeros_left < 7){
4421 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4422 }else{
4423 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4425 zeros_left -= run_before;
4426 coeff_num -= 1 + run_before;
4427 j= scantable[ coeff_num ];
4429 block[j]= (level[i] * qmul[j] + 32)>>6;
4433 if(zeros_left<0){
4434 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4435 return -1;
4438 return 0;
4441 static void predict_field_decoding_flag(H264Context *h){
4442 MpegEncContext * const s = &h->s;
4443 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4444 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4445 ? s->current_picture.mb_type[mb_xy-1]
4446 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4447 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4448 : 0;
4449 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4453 * decodes a P_SKIP or B_SKIP macroblock
4455 static void decode_mb_skip(H264Context *h){
4456 MpegEncContext * const s = &h->s;
4457 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4458 int mb_type=0;
4460 memset(h->non_zero_count[mb_xy], 0, 16);
4461 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4463 if(MB_FIELD)
4464 mb_type|= MB_TYPE_INTERLACED;
4466 if( h->slice_type == FF_B_TYPE )
4468 // just for fill_caches. pred_direct_motion will set the real mb_type
4469 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4471 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4472 pred_direct_motion(h, &mb_type);
4473 mb_type|= MB_TYPE_SKIP;
4475 else
4477 int mx, my;
4478 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4480 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4481 pred_pskip_motion(h, &mx, &my);
4482 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4483 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4486 write_back_motion(h, mb_type);
4487 s->current_picture.mb_type[mb_xy]= mb_type;
4488 s->current_picture.qscale_table[mb_xy]= s->qscale;
4489 h->slice_table[ mb_xy ]= h->slice_num;
4490 h->prev_mb_skipped= 1;
4494 * decodes a macroblock
4495 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4497 static int decode_mb_cavlc(H264Context *h){
4498 MpegEncContext * const s = &h->s;
4499 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4500 int partition_count;
4501 unsigned int mb_type, cbp;
4502 int dct8x8_allowed= h->pps.transform_8x8_mode;
4504 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4506 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4507 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4508 down the code */
4509 if(h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE){
4510 if(s->mb_skip_run==-1)
4511 s->mb_skip_run= get_ue_golomb(&s->gb);
4513 if (s->mb_skip_run--) {
4514 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4515 if(s->mb_skip_run==0)
4516 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4517 else
4518 predict_field_decoding_flag(h);
4520 decode_mb_skip(h);
4521 return 0;
4524 if(FRAME_MBAFF){
4525 if( (s->mb_y&1) == 0 )
4526 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4527 }else
4528 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4530 h->prev_mb_skipped= 0;
4532 mb_type= get_ue_golomb(&s->gb);
4533 if(h->slice_type == FF_B_TYPE){
4534 if(mb_type < 23){
4535 partition_count= b_mb_type_info[mb_type].partition_count;
4536 mb_type= b_mb_type_info[mb_type].type;
4537 }else{
4538 mb_type -= 23;
4539 goto decode_intra_mb;
4541 }else if(h->slice_type == FF_P_TYPE /*|| h->slice_type == FF_SP_TYPE */){
4542 if(mb_type < 5){
4543 partition_count= p_mb_type_info[mb_type].partition_count;
4544 mb_type= p_mb_type_info[mb_type].type;
4545 }else{
4546 mb_type -= 5;
4547 goto decode_intra_mb;
4549 }else{
4550 assert(h->slice_type == FF_I_TYPE);
4551 decode_intra_mb:
4552 if(mb_type > 25){
4553 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4554 return -1;
4556 partition_count=0;
4557 cbp= i_mb_type_info[mb_type].cbp;
4558 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4559 mb_type= i_mb_type_info[mb_type].type;
4562 if(MB_FIELD)
4563 mb_type |= MB_TYPE_INTERLACED;
4565 h->slice_table[ mb_xy ]= h->slice_num;
4567 if(IS_INTRA_PCM(mb_type)){
4568 unsigned int x, y;
4570 // We assume these blocks are very rare so we do not optimize it.
4571 align_get_bits(&s->gb);
4573 // The pixels are stored in the same order as levels in h->mb array.
4574 for(y=0; y<16; y++){
4575 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4576 for(x=0; x<16; x++){
4577 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4578 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4581 for(y=0; y<8; y++){
4582 const int index= 256 + 4*(y&3) + 32*(y>>2);
4583 for(x=0; x<8; x++){
4584 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4585 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4588 for(y=0; y<8; y++){
4589 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4590 for(x=0; x<8; x++){
4591 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4592 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4596 // In deblocking, the quantizer is 0
4597 s->current_picture.qscale_table[mb_xy]= 0;
4598 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4599 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4600 // All coeffs are present
4601 memset(h->non_zero_count[mb_xy], 16, 16);
4603 s->current_picture.mb_type[mb_xy]= mb_type;
4604 return 0;
4607 if(MB_MBAFF){
4608 h->ref_count[0] <<= 1;
4609 h->ref_count[1] <<= 1;
4612 fill_caches(h, mb_type, 0);
4614 //mb_pred
4615 if(IS_INTRA(mb_type)){
4616 int pred_mode;
4617 // init_top_left_availability(h);
4618 if(IS_INTRA4x4(mb_type)){
4619 int i;
4620 int di = 1;
4621 if(dct8x8_allowed && get_bits1(&s->gb)){
4622 mb_type |= MB_TYPE_8x8DCT;
4623 di = 4;
4626 // fill_intra4x4_pred_table(h);
4627 for(i=0; i<16; i+=di){
4628 int mode= pred_intra_mode(h, i);
4630 if(!get_bits1(&s->gb)){
4631 const int rem_mode= get_bits(&s->gb, 3);
4632 mode = rem_mode + (rem_mode >= mode);
4635 if(di==4)
4636 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4637 else
4638 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4640 write_back_intra_pred_mode(h);
4641 if( check_intra4x4_pred_mode(h) < 0)
4642 return -1;
4643 }else{
4644 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4645 if(h->intra16x16_pred_mode < 0)
4646 return -1;
4649 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4650 if(pred_mode < 0)
4651 return -1;
4652 h->chroma_pred_mode= pred_mode;
4653 }else if(partition_count==4){
4654 int i, j, sub_partition_count[4], list, ref[2][4];
4656 if(h->slice_type == FF_B_TYPE){
4657 for(i=0; i<4; i++){
4658 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4659 if(h->sub_mb_type[i] >=13){
4660 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4661 return -1;
4663 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4664 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4666 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4667 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4668 pred_direct_motion(h, &mb_type);
4669 h->ref_cache[0][scan8[4]] =
4670 h->ref_cache[1][scan8[4]] =
4671 h->ref_cache[0][scan8[12]] =
4672 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4674 }else{
4675 assert(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE); //FIXME SP correct ?
4676 for(i=0; i<4; i++){
4677 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4678 if(h->sub_mb_type[i] >=4){
4679 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4680 return -1;
4682 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4683 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4687 for(list=0; list<h->list_count; list++){
4688 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4689 for(i=0; i<4; i++){
4690 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4691 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4692 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4693 if(tmp>=ref_count){
4694 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4695 return -1;
4697 ref[list][i]= tmp;
4698 }else{
4699 //FIXME
4700 ref[list][i] = -1;
4705 if(dct8x8_allowed)
4706 dct8x8_allowed = get_dct8x8_allowed(h);
4708 for(list=0; list<h->list_count; list++){
4709 for(i=0; i<4; i++){
4710 if(IS_DIRECT(h->sub_mb_type[i])) {
4711 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4712 continue;
4714 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4715 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4717 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4718 const int sub_mb_type= h->sub_mb_type[i];
4719 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4720 for(j=0; j<sub_partition_count[i]; j++){
4721 int mx, my;
4722 const int index= 4*i + block_width*j;
4723 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4724 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4725 mx += get_se_golomb(&s->gb);
4726 my += get_se_golomb(&s->gb);
4727 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4729 if(IS_SUB_8X8(sub_mb_type)){
4730 mv_cache[ 1 ][0]=
4731 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4732 mv_cache[ 1 ][1]=
4733 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4734 }else if(IS_SUB_8X4(sub_mb_type)){
4735 mv_cache[ 1 ][0]= mx;
4736 mv_cache[ 1 ][1]= my;
4737 }else if(IS_SUB_4X8(sub_mb_type)){
4738 mv_cache[ 8 ][0]= mx;
4739 mv_cache[ 8 ][1]= my;
4741 mv_cache[ 0 ][0]= mx;
4742 mv_cache[ 0 ][1]= my;
4744 }else{
4745 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4746 p[0] = p[1]=
4747 p[8] = p[9]= 0;
4751 }else if(IS_DIRECT(mb_type)){
4752 pred_direct_motion(h, &mb_type);
4753 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4754 }else{
4755 int list, mx, my, i;
4756 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4757 if(IS_16X16(mb_type)){
4758 for(list=0; list<h->list_count; list++){
4759 unsigned int val;
4760 if(IS_DIR(mb_type, 0, list)){
4761 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4762 if(val >= h->ref_count[list]){
4763 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4764 return -1;
4766 }else
4767 val= LIST_NOT_USED&0xFF;
4768 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4770 for(list=0; list<h->list_count; list++){
4771 unsigned int val;
4772 if(IS_DIR(mb_type, 0, list)){
4773 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4774 mx += get_se_golomb(&s->gb);
4775 my += get_se_golomb(&s->gb);
4776 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4778 val= pack16to32(mx,my);
4779 }else
4780 val=0;
4781 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4784 else if(IS_16X8(mb_type)){
4785 for(list=0; list<h->list_count; list++){
4786 for(i=0; i<2; i++){
4787 unsigned int val;
4788 if(IS_DIR(mb_type, i, list)){
4789 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4790 if(val >= h->ref_count[list]){
4791 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4792 return -1;
4794 }else
4795 val= LIST_NOT_USED&0xFF;
4796 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4799 for(list=0; list<h->list_count; list++){
4800 for(i=0; i<2; i++){
4801 unsigned int val;
4802 if(IS_DIR(mb_type, i, list)){
4803 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4804 mx += get_se_golomb(&s->gb);
4805 my += get_se_golomb(&s->gb);
4806 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4808 val= pack16to32(mx,my);
4809 }else
4810 val=0;
4811 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4814 }else{
4815 assert(IS_8X16(mb_type));
4816 for(list=0; list<h->list_count; list++){
4817 for(i=0; i<2; i++){
4818 unsigned int val;
4819 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4820 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4821 if(val >= h->ref_count[list]){
4822 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4823 return -1;
4825 }else
4826 val= LIST_NOT_USED&0xFF;
4827 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4830 for(list=0; list<h->list_count; list++){
4831 for(i=0; i<2; i++){
4832 unsigned int val;
4833 if(IS_DIR(mb_type, i, list)){
4834 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4835 mx += get_se_golomb(&s->gb);
4836 my += get_se_golomb(&s->gb);
4837 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4839 val= pack16to32(mx,my);
4840 }else
4841 val=0;
4842 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4848 if(IS_INTER(mb_type))
4849 write_back_motion(h, mb_type);
4851 if(!IS_INTRA16x16(mb_type)){
4852 cbp= get_ue_golomb(&s->gb);
4853 if(cbp > 47){
4854 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4855 return -1;
4858 if(IS_INTRA4x4(mb_type))
4859 cbp= golomb_to_intra4x4_cbp[cbp];
4860 else
4861 cbp= golomb_to_inter_cbp[cbp];
4863 h->cbp = cbp;
4865 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4866 if(get_bits1(&s->gb))
4867 mb_type |= MB_TYPE_8x8DCT;
4869 s->current_picture.mb_type[mb_xy]= mb_type;
4871 if(cbp || IS_INTRA16x16(mb_type)){
4872 int i8x8, i4x4, chroma_idx;
4873 int dquant;
4874 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4875 const uint8_t *scan, *scan8x8, *dc_scan;
4877 // fill_non_zero_count_cache(h);
4879 if(IS_INTERLACED(mb_type)){
4880 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4881 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4882 dc_scan= luma_dc_field_scan;
4883 }else{
4884 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4885 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4886 dc_scan= luma_dc_zigzag_scan;
4889 dquant= get_se_golomb(&s->gb);
4891 if( dquant > 25 || dquant < -26 ){
4892 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4893 return -1;
4896 s->qscale += dquant;
4897 if(((unsigned)s->qscale) > 51){
4898 if(s->qscale<0) s->qscale+= 52;
4899 else s->qscale-= 52;
4902 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4903 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4904 if(IS_INTRA16x16(mb_type)){
4905 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4906 return -1; //FIXME continue if partitioned and other return -1 too
4909 assert((cbp&15) == 0 || (cbp&15) == 15);
4911 if(cbp&15){
4912 for(i8x8=0; i8x8<4; i8x8++){
4913 for(i4x4=0; i4x4<4; i4x4++){
4914 const int index= i4x4 + 4*i8x8;
4915 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4916 return -1;
4920 }else{
4921 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4923 }else{
4924 for(i8x8=0; i8x8<4; i8x8++){
4925 if(cbp & (1<<i8x8)){
4926 if(IS_8x8DCT(mb_type)){
4927 DCTELEM *buf = &h->mb[64*i8x8];
4928 uint8_t *nnz;
4929 for(i4x4=0; i4x4<4; i4x4++){
4930 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4931 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4932 return -1;
4934 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4935 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4936 }else{
4937 for(i4x4=0; i4x4<4; i4x4++){
4938 const int index= i4x4 + 4*i8x8;
4940 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4941 return -1;
4945 }else{
4946 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4947 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4952 if(cbp&0x30){
4953 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4954 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4955 return -1;
4959 if(cbp&0x20){
4960 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4961 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4962 for(i4x4=0; i4x4<4; i4x4++){
4963 const int index= 16 + 4*chroma_idx + i4x4;
4964 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4965 return -1;
4969 }else{
4970 uint8_t * const nnz= &h->non_zero_count_cache[0];
4971 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4972 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4974 }else{
4975 uint8_t * const nnz= &h->non_zero_count_cache[0];
4976 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4977 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4978 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4980 s->current_picture.qscale_table[mb_xy]= s->qscale;
4981 write_back_non_zero_count(h);
4983 if(MB_MBAFF){
4984 h->ref_count[0] >>= 1;
4985 h->ref_count[1] >>= 1;
4988 return 0;
4991 static int decode_cabac_field_decoding_flag(H264Context *h) {
4992 MpegEncContext * const s = &h->s;
4993 const int mb_x = s->mb_x;
4994 const int mb_y = s->mb_y & ~1;
4995 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4996 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4998 unsigned int ctx = 0;
5000 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5001 ctx += 1;
5003 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5004 ctx += 1;
5007 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5010 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5011 uint8_t *state= &h->cabac_state[ctx_base];
5012 int mb_type;
5014 if(intra_slice){
5015 MpegEncContext * const s = &h->s;
5016 const int mba_xy = h->left_mb_xy[0];
5017 const int mbb_xy = h->top_mb_xy;
5018 int ctx=0;
5019 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5020 ctx++;
5021 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5022 ctx++;
5023 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5024 return 0; /* I4x4 */
5025 state += 2;
5026 }else{
5027 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5028 return 0; /* I4x4 */
5031 if( get_cabac_terminate( &h->cabac ) )
5032 return 25; /* PCM */
5034 mb_type = 1; /* I16x16 */
5035 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5036 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5037 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5038 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5039 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5040 return mb_type;
5043 static int decode_cabac_mb_type( H264Context *h ) {
5044 MpegEncContext * const s = &h->s;
5046 if( h->slice_type == FF_I_TYPE ) {
5047 return decode_cabac_intra_mb_type(h, 3, 1);
5048 } else if( h->slice_type == FF_P_TYPE ) {
5049 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5050 /* P-type */
5051 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5052 /* P_L0_D16x16, P_8x8 */
5053 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5054 } else {
5055 /* P_L0_D8x16, P_L0_D16x8 */
5056 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5058 } else {
5059 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5061 } else if( h->slice_type == FF_B_TYPE ) {
5062 const int mba_xy = h->left_mb_xy[0];
5063 const int mbb_xy = h->top_mb_xy;
5064 int ctx = 0;
5065 int bits;
5067 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5068 ctx++;
5069 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5070 ctx++;
5072 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5073 return 0; /* B_Direct_16x16 */
5075 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5076 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5079 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5080 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5081 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5082 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5083 if( bits < 8 )
5084 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5085 else if( bits == 13 ) {
5086 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5087 } else if( bits == 14 )
5088 return 11; /* B_L1_L0_8x16 */
5089 else if( bits == 15 )
5090 return 22; /* B_8x8 */
5092 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5093 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5094 } else {
5095 /* TODO SI/SP frames? */
5096 return -1;
5100 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5101 MpegEncContext * const s = &h->s;
5102 int mba_xy, mbb_xy;
5103 int ctx = 0;
5105 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5106 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5107 mba_xy = mb_xy - 1;
5108 if( (mb_y&1)
5109 && h->slice_table[mba_xy] == h->slice_num
5110 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5111 mba_xy += s->mb_stride;
5112 if( MB_FIELD ){
5113 mbb_xy = mb_xy - s->mb_stride;
5114 if( !(mb_y&1)
5115 && h->slice_table[mbb_xy] == h->slice_num
5116 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5117 mbb_xy -= s->mb_stride;
5118 }else
5119 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5120 }else{
5121 int mb_xy = mb_x + mb_y*s->mb_stride;
5122 mba_xy = mb_xy - 1;
5123 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5126 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5127 ctx++;
5128 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5129 ctx++;
5131 if( h->slice_type == FF_B_TYPE )
5132 ctx += 13;
5133 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5136 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5137 int mode = 0;
5139 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5140 return pred_mode;
5142 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5143 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5144 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5146 if( mode >= pred_mode )
5147 return mode + 1;
5148 else
5149 return mode;
5152 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5153 const int mba_xy = h->left_mb_xy[0];
5154 const int mbb_xy = h->top_mb_xy;
5156 int ctx = 0;
5158 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5159 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5160 ctx++;
5162 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5163 ctx++;
5165 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5166 return 0;
5168 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5169 return 1;
5170 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5171 return 2;
5172 else
5173 return 3;
5176 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5177 int cbp_b, cbp_a, ctx, cbp = 0;
5179 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5180 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5182 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5183 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5184 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5185 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5186 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5187 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5188 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5189 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5190 return cbp;
5192 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5193 int ctx;
5194 int cbp_a, cbp_b;
5196 cbp_a = (h->left_cbp>>4)&0x03;
5197 cbp_b = (h-> top_cbp>>4)&0x03;
5199 ctx = 0;
5200 if( cbp_a > 0 ) ctx++;
5201 if( cbp_b > 0 ) ctx += 2;
5202 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5203 return 0;
5205 ctx = 4;
5206 if( cbp_a == 2 ) ctx++;
5207 if( cbp_b == 2 ) ctx += 2;
5208 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5210 static int decode_cabac_mb_dqp( H264Context *h) {
5211 int ctx = 0;
5212 int val = 0;
5214 if( h->last_qscale_diff != 0 )
5215 ctx++;
5217 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5218 if( ctx < 2 )
5219 ctx = 2;
5220 else
5221 ctx = 3;
5222 val++;
5223 if(val > 102) //prevent infinite loop
5224 return INT_MIN;
5227 if( val&0x01 )
5228 return (val + 1)/2;
5229 else
5230 return -(val + 1)/2;
5232 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5233 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5234 return 0; /* 8x8 */
5235 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5236 return 1; /* 8x4 */
5237 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5238 return 2; /* 4x8 */
5239 return 3; /* 4x4 */
5241 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5242 int type;
5243 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5244 return 0; /* B_Direct_8x8 */
5245 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5246 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5247 type = 3;
5248 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5249 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5250 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5251 type += 4;
5253 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5254 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5255 return type;
5258 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5259 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5262 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5263 int refa = h->ref_cache[list][scan8[n] - 1];
5264 int refb = h->ref_cache[list][scan8[n] - 8];
5265 int ref = 0;
5266 int ctx = 0;
5268 if( h->slice_type == FF_B_TYPE) {
5269 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5270 ctx++;
5271 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5272 ctx += 2;
5273 } else {
5274 if( refa > 0 )
5275 ctx++;
5276 if( refb > 0 )
5277 ctx += 2;
5280 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5281 ref++;
5282 if( ctx < 4 )
5283 ctx = 4;
5284 else
5285 ctx = 5;
5286 if(ref >= 32 /*h->ref_list[list]*/){
5287 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5288 return 0; //FIXME we should return -1 and check the return everywhere
5291 return ref;
5294 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5295 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5296 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5297 int ctxbase = (l == 0) ? 40 : 47;
5298 int ctx, mvd;
5300 if( amvd < 3 )
5301 ctx = 0;
5302 else if( amvd > 32 )
5303 ctx = 2;
5304 else
5305 ctx = 1;
5307 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5308 return 0;
5310 mvd= 1;
5311 ctx= 3;
5312 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5313 mvd++;
5314 if( ctx < 6 )
5315 ctx++;
5318 if( mvd >= 9 ) {
5319 int k = 3;
5320 while( get_cabac_bypass( &h->cabac ) ) {
5321 mvd += 1 << k;
5322 k++;
5323 if(k>24){
5324 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5325 return INT_MIN;
5328 while( k-- ) {
5329 if( get_cabac_bypass( &h->cabac ) )
5330 mvd += 1 << k;
5333 return get_cabac_bypass_sign( &h->cabac, -mvd );
5336 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5337 int nza, nzb;
5338 int ctx = 0;
5340 if( cat == 0 ) {
5341 nza = h->left_cbp&0x100;
5342 nzb = h-> top_cbp&0x100;
5343 } else if( cat == 1 || cat == 2 ) {
5344 nza = h->non_zero_count_cache[scan8[idx] - 1];
5345 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5346 } else if( cat == 3 ) {
5347 nza = (h->left_cbp>>(6+idx))&0x01;
5348 nzb = (h-> top_cbp>>(6+idx))&0x01;
5349 } else {
5350 assert(cat == 4);
5351 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5352 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5355 if( nza > 0 )
5356 ctx++;
5358 if( nzb > 0 )
5359 ctx += 2;
5361 return ctx + 4 * cat;
5364 DECLARE_ASM_CONST(1, const uint8_t, last_coeff_flag_offset_8x8[63]) = {
5365 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5366 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5367 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5368 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5371 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5372 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5373 static const int significant_coeff_flag_offset[2][6] = {
5374 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5375 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5377 static const int last_coeff_flag_offset[2][6] = {
5378 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5379 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5381 static const int coeff_abs_level_m1_offset[6] = {
5382 227+0, 227+10, 227+20, 227+30, 227+39, 426
5384 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5385 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5386 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5387 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5388 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5389 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5390 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5391 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5392 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5395 int index[64];
5397 int av_unused last;
5398 int coeff_count = 0;
5400 int abslevel1 = 1;
5401 int abslevelgt1 = 0;
5403 uint8_t *significant_coeff_ctx_base;
5404 uint8_t *last_coeff_ctx_base;
5405 uint8_t *abs_level_m1_ctx_base;
5407 #ifndef ARCH_X86
5408 #define CABAC_ON_STACK
5409 #endif
5410 #ifdef CABAC_ON_STACK
5411 #define CC &cc
5412 CABACContext cc;
5413 cc.range = h->cabac.range;
5414 cc.low = h->cabac.low;
5415 cc.bytestream= h->cabac.bytestream;
5416 #else
5417 #define CC &h->cabac
5418 #endif
5421 /* cat: 0-> DC 16x16 n = 0
5422 * 1-> AC 16x16 n = luma4x4idx
5423 * 2-> Luma4x4 n = luma4x4idx
5424 * 3-> DC Chroma n = iCbCr
5425 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5426 * 5-> Luma8x8 n = 4 * luma8x8idx
5429 /* read coded block flag */
5430 if( cat != 5 ) {
5431 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5432 if( cat == 1 || cat == 2 )
5433 h->non_zero_count_cache[scan8[n]] = 0;
5434 else if( cat == 4 )
5435 h->non_zero_count_cache[scan8[16+n]] = 0;
5436 #ifdef CABAC_ON_STACK
5437 h->cabac.range = cc.range ;
5438 h->cabac.low = cc.low ;
5439 h->cabac.bytestream= cc.bytestream;
5440 #endif
5441 return;
5445 significant_coeff_ctx_base = h->cabac_state
5446 + significant_coeff_flag_offset[MB_FIELD][cat];
5447 last_coeff_ctx_base = h->cabac_state
5448 + last_coeff_flag_offset[MB_FIELD][cat];
5449 abs_level_m1_ctx_base = h->cabac_state
5450 + coeff_abs_level_m1_offset[cat];
5452 if( cat == 5 ) {
5453 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5454 for(last= 0; last < coefs; last++) { \
5455 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5456 if( get_cabac( CC, sig_ctx )) { \
5457 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5458 index[coeff_count++] = last; \
5459 if( get_cabac( CC, last_ctx ) ) { \
5460 last= max_coeff; \
5461 break; \
5465 if( last == max_coeff -1 ) {\
5466 index[coeff_count++] = last;\
5468 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5469 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5470 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5471 } else {
5472 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5473 #else
5474 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5475 } else {
5476 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5477 #endif
5479 assert(coeff_count > 0);
5481 if( cat == 0 )
5482 h->cbp_table[mb_xy] |= 0x100;
5483 else if( cat == 1 || cat == 2 )
5484 h->non_zero_count_cache[scan8[n]] = coeff_count;
5485 else if( cat == 3 )
5486 h->cbp_table[mb_xy] |= 0x40 << n;
5487 else if( cat == 4 )
5488 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5489 else {
5490 assert( cat == 5 );
5491 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5494 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5495 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
5496 int j= scantable[index[coeff_count]];
5498 if( get_cabac( CC, ctx ) == 0 ) {
5499 if( !qmul ) {
5500 block[j] = get_cabac_bypass_sign( CC, -1);
5501 }else{
5502 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5505 abslevel1++;
5506 } else {
5507 int coeff_abs = 2;
5508 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
5509 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5510 coeff_abs++;
5513 if( coeff_abs >= 15 ) {
5514 int j = 0;
5515 while( get_cabac_bypass( CC ) ) {
5516 j++;
5519 coeff_abs=1;
5520 while( j-- ) {
5521 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5523 coeff_abs+= 14;
5526 if( !qmul ) {
5527 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5528 else block[j] = coeff_abs;
5529 }else{
5530 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5531 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5534 abslevelgt1++;
5537 #ifdef CABAC_ON_STACK
5538 h->cabac.range = cc.range ;
5539 h->cabac.low = cc.low ;
5540 h->cabac.bytestream= cc.bytestream;
5541 #endif
5545 static inline void compute_mb_neighbors(H264Context *h)
5547 MpegEncContext * const s = &h->s;
5548 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5549 h->top_mb_xy = mb_xy - s->mb_stride;
5550 h->left_mb_xy[0] = mb_xy - 1;
5551 if(FRAME_MBAFF){
5552 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5553 const int top_pair_xy = pair_xy - s->mb_stride;
5554 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5555 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5556 const int curr_mb_frame_flag = !MB_FIELD;
5557 const int bottom = (s->mb_y & 1);
5558 if (bottom
5559 ? !curr_mb_frame_flag // bottom macroblock
5560 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5562 h->top_mb_xy -= s->mb_stride;
5564 if (left_mb_frame_flag != curr_mb_frame_flag) {
5565 h->left_mb_xy[0] = pair_xy - 1;
5567 } else if (FIELD_PICTURE) {
5568 h->top_mb_xy -= s->mb_stride;
5570 return;
5574 * decodes a macroblock
5575 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5577 static int decode_mb_cabac(H264Context *h) {
5578 MpegEncContext * const s = &h->s;
5579 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5580 int mb_type, partition_count, cbp = 0;
5581 int dct8x8_allowed= h->pps.transform_8x8_mode;
5583 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5585 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5586 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE ) {
5587 int skip;
5588 /* a skipped mb needs the aff flag from the following mb */
5589 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5590 predict_field_decoding_flag(h);
5591 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5592 skip = h->next_mb_skipped;
5593 else
5594 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5595 /* read skip flags */
5596 if( skip ) {
5597 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5598 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5599 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5600 if(h->next_mb_skipped)
5601 predict_field_decoding_flag(h);
5602 else
5603 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5606 decode_mb_skip(h);
5608 h->cbp_table[mb_xy] = 0;
5609 h->chroma_pred_mode_table[mb_xy] = 0;
5610 h->last_qscale_diff = 0;
5612 return 0;
5616 if(FRAME_MBAFF){
5617 if( (s->mb_y&1) == 0 )
5618 h->mb_mbaff =
5619 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5620 }else
5621 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5623 h->prev_mb_skipped = 0;
5625 compute_mb_neighbors(h);
5626 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5627 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5628 return -1;
5631 if( h->slice_type == FF_B_TYPE ) {
5632 if( mb_type < 23 ){
5633 partition_count= b_mb_type_info[mb_type].partition_count;
5634 mb_type= b_mb_type_info[mb_type].type;
5635 }else{
5636 mb_type -= 23;
5637 goto decode_intra_mb;
5639 } else if( h->slice_type == FF_P_TYPE ) {
5640 if( mb_type < 5) {
5641 partition_count= p_mb_type_info[mb_type].partition_count;
5642 mb_type= p_mb_type_info[mb_type].type;
5643 } else {
5644 mb_type -= 5;
5645 goto decode_intra_mb;
5647 } else {
5648 assert(h->slice_type == FF_I_TYPE);
5649 decode_intra_mb:
5650 partition_count = 0;
5651 cbp= i_mb_type_info[mb_type].cbp;
5652 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5653 mb_type= i_mb_type_info[mb_type].type;
5655 if(MB_FIELD)
5656 mb_type |= MB_TYPE_INTERLACED;
5658 h->slice_table[ mb_xy ]= h->slice_num;
5660 if(IS_INTRA_PCM(mb_type)) {
5661 const uint8_t *ptr;
5662 unsigned int x, y;
5664 // We assume these blocks are very rare so we do not optimize it.
5665 // FIXME The two following lines get the bitstream position in the cabac
5666 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5667 ptr= h->cabac.bytestream;
5668 if(h->cabac.low&0x1) ptr--;
5669 if(CABAC_BITS==16){
5670 if(h->cabac.low&0x1FF) ptr--;
5673 // The pixels are stored in the same order as levels in h->mb array.
5674 for(y=0; y<16; y++){
5675 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5676 for(x=0; x<16; x++){
5677 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5678 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5681 for(y=0; y<8; y++){
5682 const int index= 256 + 4*(y&3) + 32*(y>>2);
5683 for(x=0; x<8; x++){
5684 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5685 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5688 for(y=0; y<8; y++){
5689 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5690 for(x=0; x<8; x++){
5691 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5692 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5696 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5698 // All blocks are present
5699 h->cbp_table[mb_xy] = 0x1ef;
5700 h->chroma_pred_mode_table[mb_xy] = 0;
5701 // In deblocking, the quantizer is 0
5702 s->current_picture.qscale_table[mb_xy]= 0;
5703 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5704 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5705 // All coeffs are present
5706 memset(h->non_zero_count[mb_xy], 16, 16);
5707 s->current_picture.mb_type[mb_xy]= mb_type;
5708 return 0;
5711 if(MB_MBAFF){
5712 h->ref_count[0] <<= 1;
5713 h->ref_count[1] <<= 1;
5716 fill_caches(h, mb_type, 0);
5718 if( IS_INTRA( mb_type ) ) {
5719 int i, pred_mode;
5720 if( IS_INTRA4x4( mb_type ) ) {
5721 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5722 mb_type |= MB_TYPE_8x8DCT;
5723 for( i = 0; i < 16; i+=4 ) {
5724 int pred = pred_intra_mode( h, i );
5725 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5726 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5728 } else {
5729 for( i = 0; i < 16; i++ ) {
5730 int pred = pred_intra_mode( h, i );
5731 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5733 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5736 write_back_intra_pred_mode(h);
5737 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5738 } else {
5739 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5740 if( h->intra16x16_pred_mode < 0 ) return -1;
5742 h->chroma_pred_mode_table[mb_xy] =
5743 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5745 pred_mode= check_intra_pred_mode( h, pred_mode );
5746 if( pred_mode < 0 ) return -1;
5747 h->chroma_pred_mode= pred_mode;
5748 } else if( partition_count == 4 ) {
5749 int i, j, sub_partition_count[4], list, ref[2][4];
5751 if( h->slice_type == FF_B_TYPE ) {
5752 for( i = 0; i < 4; i++ ) {
5753 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5754 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5755 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5757 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5758 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5759 pred_direct_motion(h, &mb_type);
5760 h->ref_cache[0][scan8[4]] =
5761 h->ref_cache[1][scan8[4]] =
5762 h->ref_cache[0][scan8[12]] =
5763 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5764 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5765 for( i = 0; i < 4; i++ )
5766 if( IS_DIRECT(h->sub_mb_type[i]) )
5767 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5770 } else {
5771 for( i = 0; i < 4; i++ ) {
5772 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5773 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5774 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5778 for( list = 0; list < h->list_count; list++ ) {
5779 for( i = 0; i < 4; i++ ) {
5780 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5781 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5782 if( h->ref_count[list] > 1 )
5783 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5784 else
5785 ref[list][i] = 0;
5786 } else {
5787 ref[list][i] = -1;
5789 h->ref_cache[list][ scan8[4*i]+1 ]=
5790 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5794 if(dct8x8_allowed)
5795 dct8x8_allowed = get_dct8x8_allowed(h);
5797 for(list=0; list<h->list_count; list++){
5798 for(i=0; i<4; i++){
5799 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5800 if(IS_DIRECT(h->sub_mb_type[i])){
5801 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5802 continue;
5805 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5806 const int sub_mb_type= h->sub_mb_type[i];
5807 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5808 for(j=0; j<sub_partition_count[i]; j++){
5809 int mpx, mpy;
5810 int mx, my;
5811 const int index= 4*i + block_width*j;
5812 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5813 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5814 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5816 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5817 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5818 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5820 if(IS_SUB_8X8(sub_mb_type)){
5821 mv_cache[ 1 ][0]=
5822 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5823 mv_cache[ 1 ][1]=
5824 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5826 mvd_cache[ 1 ][0]=
5827 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5828 mvd_cache[ 1 ][1]=
5829 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5830 }else if(IS_SUB_8X4(sub_mb_type)){
5831 mv_cache[ 1 ][0]= mx;
5832 mv_cache[ 1 ][1]= my;
5834 mvd_cache[ 1 ][0]= mx - mpx;
5835 mvd_cache[ 1 ][1]= my - mpy;
5836 }else if(IS_SUB_4X8(sub_mb_type)){
5837 mv_cache[ 8 ][0]= mx;
5838 mv_cache[ 8 ][1]= my;
5840 mvd_cache[ 8 ][0]= mx - mpx;
5841 mvd_cache[ 8 ][1]= my - mpy;
5843 mv_cache[ 0 ][0]= mx;
5844 mv_cache[ 0 ][1]= my;
5846 mvd_cache[ 0 ][0]= mx - mpx;
5847 mvd_cache[ 0 ][1]= my - mpy;
5849 }else{
5850 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5851 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5852 p[0] = p[1] = p[8] = p[9] = 0;
5853 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5857 } else if( IS_DIRECT(mb_type) ) {
5858 pred_direct_motion(h, &mb_type);
5859 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5860 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5861 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5862 } else {
5863 int list, mx, my, i, mpx, mpy;
5864 if(IS_16X16(mb_type)){
5865 for(list=0; list<h->list_count; list++){
5866 if(IS_DIR(mb_type, 0, list)){
5867 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5868 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5869 }else
5870 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5872 for(list=0; list<h->list_count; list++){
5873 if(IS_DIR(mb_type, 0, list)){
5874 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5876 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5877 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5878 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5880 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5881 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5882 }else
5883 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5886 else if(IS_16X8(mb_type)){
5887 for(list=0; list<h->list_count; list++){
5888 for(i=0; i<2; i++){
5889 if(IS_DIR(mb_type, i, list)){
5890 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5891 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5892 }else
5893 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5896 for(list=0; list<h->list_count; list++){
5897 for(i=0; i<2; i++){
5898 if(IS_DIR(mb_type, i, list)){
5899 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5900 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5901 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5902 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5904 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5905 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5906 }else{
5907 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5908 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5912 }else{
5913 assert(IS_8X16(mb_type));
5914 for(list=0; list<h->list_count; list++){
5915 for(i=0; i<2; i++){
5916 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5917 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5918 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5919 }else
5920 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5923 for(list=0; list<h->list_count; list++){
5924 for(i=0; i<2; i++){
5925 if(IS_DIR(mb_type, i, list)){
5926 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5927 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5928 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5930 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5931 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5932 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5933 }else{
5934 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5935 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5942 if( IS_INTER( mb_type ) ) {
5943 h->chroma_pred_mode_table[mb_xy] = 0;
5944 write_back_motion( h, mb_type );
5947 if( !IS_INTRA16x16( mb_type ) ) {
5948 cbp = decode_cabac_mb_cbp_luma( h );
5949 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5952 h->cbp_table[mb_xy] = h->cbp = cbp;
5954 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5955 if( decode_cabac_mb_transform_size( h ) )
5956 mb_type |= MB_TYPE_8x8DCT;
5958 s->current_picture.mb_type[mb_xy]= mb_type;
5960 if( cbp || IS_INTRA16x16( mb_type ) ) {
5961 const uint8_t *scan, *scan8x8, *dc_scan;
5962 const uint32_t *qmul;
5963 int dqp;
5965 if(IS_INTERLACED(mb_type)){
5966 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5967 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5968 dc_scan= luma_dc_field_scan;
5969 }else{
5970 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5971 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5972 dc_scan= luma_dc_zigzag_scan;
5975 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5976 if( dqp == INT_MIN ){
5977 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5978 return -1;
5980 s->qscale += dqp;
5981 if(((unsigned)s->qscale) > 51){
5982 if(s->qscale<0) s->qscale+= 52;
5983 else s->qscale-= 52;
5985 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5986 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5988 if( IS_INTRA16x16( mb_type ) ) {
5989 int i;
5990 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5991 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5993 if( cbp&15 ) {
5994 qmul = h->dequant4_coeff[0][s->qscale];
5995 for( i = 0; i < 16; i++ ) {
5996 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5997 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5999 } else {
6000 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6002 } else {
6003 int i8x8, i4x4;
6004 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6005 if( cbp & (1<<i8x8) ) {
6006 if( IS_8x8DCT(mb_type) ) {
6007 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6008 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6009 } else {
6010 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6011 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6012 const int index = 4*i8x8 + i4x4;
6013 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6014 //START_TIMER
6015 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6016 //STOP_TIMER("decode_residual")
6019 } else {
6020 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6021 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6026 if( cbp&0x30 ){
6027 int c;
6028 for( c = 0; c < 2; c++ ) {
6029 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6030 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6034 if( cbp&0x20 ) {
6035 int c, i;
6036 for( c = 0; c < 2; c++ ) {
6037 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6038 for( i = 0; i < 4; i++ ) {
6039 const int index = 16 + 4 * c + i;
6040 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6041 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6044 } else {
6045 uint8_t * const nnz= &h->non_zero_count_cache[0];
6046 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6047 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6049 } else {
6050 uint8_t * const nnz= &h->non_zero_count_cache[0];
6051 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6052 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6053 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6054 h->last_qscale_diff = 0;
6057 s->current_picture.qscale_table[mb_xy]= s->qscale;
6058 write_back_non_zero_count(h);
6060 if(MB_MBAFF){
6061 h->ref_count[0] >>= 1;
6062 h->ref_count[1] >>= 1;
6065 return 0;
6069 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6070 int i, d;
6071 const int index_a = qp + h->slice_alpha_c0_offset;
6072 const int alpha = (alpha_table+52)[index_a];
6073 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6075 if( bS[0] < 4 ) {
6076 int8_t tc[4];
6077 for(i=0; i<4; i++)
6078 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6079 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6080 } else {
6081 /* 16px edge length, because bS=4 is triggered by being at
6082 * the edge of an intra MB, so all 4 bS are the same */
6083 for( d = 0; d < 16; d++ ) {
6084 const int p0 = pix[-1];
6085 const int p1 = pix[-2];
6086 const int p2 = pix[-3];
6088 const int q0 = pix[0];
6089 const int q1 = pix[1];
6090 const int q2 = pix[2];
6092 if( FFABS( p0 - q0 ) < alpha &&
6093 FFABS( p1 - p0 ) < beta &&
6094 FFABS( q1 - q0 ) < beta ) {
6096 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6097 if( FFABS( p2 - p0 ) < beta)
6099 const int p3 = pix[-4];
6100 /* p0', p1', p2' */
6101 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6102 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6103 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6104 } else {
6105 /* p0' */
6106 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6108 if( FFABS( q2 - q0 ) < beta)
6110 const int q3 = pix[3];
6111 /* q0', q1', q2' */
6112 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6113 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6114 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6115 } else {
6116 /* q0' */
6117 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6119 }else{
6120 /* p0', q0' */
6121 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6122 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6124 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6126 pix += stride;
6130 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6131 int i;
6132 const int index_a = qp + h->slice_alpha_c0_offset;
6133 const int alpha = (alpha_table+52)[index_a];
6134 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6136 if( bS[0] < 4 ) {
6137 int8_t tc[4];
6138 for(i=0; i<4; i++)
6139 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6140 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6141 } else {
6142 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6146 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6147 int i;
6148 for( i = 0; i < 16; i++, pix += stride) {
6149 int index_a;
6150 int alpha;
6151 int beta;
6153 int qp_index;
6154 int bS_index = (i >> 1);
6155 if (!MB_FIELD) {
6156 bS_index &= ~1;
6157 bS_index |= (i & 1);
6160 if( bS[bS_index] == 0 ) {
6161 continue;
6164 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6165 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6166 alpha = (alpha_table+52)[index_a];
6167 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6169 if( bS[bS_index] < 4 ) {
6170 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6171 const int p0 = pix[-1];
6172 const int p1 = pix[-2];
6173 const int p2 = pix[-3];
6174 const int q0 = pix[0];
6175 const int q1 = pix[1];
6176 const int q2 = pix[2];
6178 if( FFABS( p0 - q0 ) < alpha &&
6179 FFABS( p1 - p0 ) < beta &&
6180 FFABS( q1 - q0 ) < beta ) {
6181 int tc = tc0;
6182 int i_delta;
6184 if( FFABS( p2 - p0 ) < beta ) {
6185 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6186 tc++;
6188 if( FFABS( q2 - q0 ) < beta ) {
6189 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6190 tc++;
6193 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6194 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6195 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6196 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6198 }else{
6199 const int p0 = pix[-1];
6200 const int p1 = pix[-2];
6201 const int p2 = pix[-3];
6203 const int q0 = pix[0];
6204 const int q1 = pix[1];
6205 const int q2 = pix[2];
6207 if( FFABS( p0 - q0 ) < alpha &&
6208 FFABS( p1 - p0 ) < beta &&
6209 FFABS( q1 - q0 ) < beta ) {
6211 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6212 if( FFABS( p2 - p0 ) < beta)
6214 const int p3 = pix[-4];
6215 /* p0', p1', p2' */
6216 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6217 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6218 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6219 } else {
6220 /* p0' */
6221 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6223 if( FFABS( q2 - q0 ) < beta)
6225 const int q3 = pix[3];
6226 /* q0', q1', q2' */
6227 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6228 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6229 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6230 } else {
6231 /* q0' */
6232 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6234 }else{
6235 /* p0', q0' */
6236 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6237 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6239 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6244 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6245 int i;
6246 for( i = 0; i < 8; i++, pix += stride) {
6247 int index_a;
6248 int alpha;
6249 int beta;
6251 int qp_index;
6252 int bS_index = i;
6254 if( bS[bS_index] == 0 ) {
6255 continue;
6258 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6259 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6260 alpha = (alpha_table+52)[index_a];
6261 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6263 if( bS[bS_index] < 4 ) {
6264 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6265 const int p0 = pix[-1];
6266 const int p1 = pix[-2];
6267 const int q0 = pix[0];
6268 const int q1 = pix[1];
6270 if( FFABS( p0 - q0 ) < alpha &&
6271 FFABS( p1 - p0 ) < beta &&
6272 FFABS( q1 - q0 ) < beta ) {
6273 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6275 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6276 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6277 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6279 }else{
6280 const int p0 = pix[-1];
6281 const int p1 = pix[-2];
6282 const int q0 = pix[0];
6283 const int q1 = pix[1];
6285 if( FFABS( p0 - q0 ) < alpha &&
6286 FFABS( p1 - p0 ) < beta &&
6287 FFABS( q1 - q0 ) < beta ) {
6289 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6290 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6291 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6297 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6298 int i, d;
6299 const int index_a = qp + h->slice_alpha_c0_offset;
6300 const int alpha = (alpha_table+52)[index_a];
6301 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6302 const int pix_next = stride;
6304 if( bS[0] < 4 ) {
6305 int8_t tc[4];
6306 for(i=0; i<4; i++)
6307 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6308 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6309 } else {
6310 /* 16px edge length, see filter_mb_edgev */
6311 for( d = 0; d < 16; d++ ) {
6312 const int p0 = pix[-1*pix_next];
6313 const int p1 = pix[-2*pix_next];
6314 const int p2 = pix[-3*pix_next];
6315 const int q0 = pix[0];
6316 const int q1 = pix[1*pix_next];
6317 const int q2 = pix[2*pix_next];
6319 if( FFABS( p0 - q0 ) < alpha &&
6320 FFABS( p1 - p0 ) < beta &&
6321 FFABS( q1 - q0 ) < beta ) {
6323 const int p3 = pix[-4*pix_next];
6324 const int q3 = pix[ 3*pix_next];
6326 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6327 if( FFABS( p2 - p0 ) < beta) {
6328 /* p0', p1', p2' */
6329 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6330 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6331 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6332 } else {
6333 /* p0' */
6334 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6336 if( FFABS( q2 - q0 ) < beta) {
6337 /* q0', q1', q2' */
6338 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6339 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6340 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6341 } else {
6342 /* q0' */
6343 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6345 }else{
6346 /* p0', q0' */
6347 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6348 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6350 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6352 pix++;
6357 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6358 int i;
6359 const int index_a = qp + h->slice_alpha_c0_offset;
6360 const int alpha = (alpha_table+52)[index_a];
6361 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6363 if( bS[0] < 4 ) {
6364 int8_t tc[4];
6365 for(i=0; i<4; i++)
6366 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6367 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6368 } else {
6369 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6373 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6374 MpegEncContext * const s = &h->s;
6375 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6376 int mb_xy, mb_type;
6377 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6379 mb_xy = mb_x + mb_y*s->mb_stride;
6381 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6382 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6383 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6384 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6385 return;
6387 assert(!FRAME_MBAFF);
6389 mb_type = s->current_picture.mb_type[mb_xy];
6390 qp = s->current_picture.qscale_table[mb_xy];
6391 qp0 = s->current_picture.qscale_table[mb_xy-1];
6392 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6393 qpc = get_chroma_qp( h, 0, qp );
6394 qpc0 = get_chroma_qp( h, 0, qp0 );
6395 qpc1 = get_chroma_qp( h, 0, qp1 );
6396 qp0 = (qp + qp0 + 1) >> 1;
6397 qp1 = (qp + qp1 + 1) >> 1;
6398 qpc0 = (qpc + qpc0 + 1) >> 1;
6399 qpc1 = (qpc + qpc1 + 1) >> 1;
6400 qp_thresh = 15 - h->slice_alpha_c0_offset;
6401 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6402 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6403 return;
6405 if( IS_INTRA(mb_type) ) {
6406 int16_t bS4[4] = {4,4,4,4};
6407 int16_t bS3[4] = {3,3,3,3};
6408 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6409 if( IS_8x8DCT(mb_type) ) {
6410 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6411 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6412 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6413 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6414 } else {
6415 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6416 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6417 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6418 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6419 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6420 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6421 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6422 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6424 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6425 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6426 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6427 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6428 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6429 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6430 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6431 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6432 return;
6433 } else {
6434 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6435 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6436 int edges;
6437 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6438 edges = 4;
6439 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6440 } else {
6441 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6442 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6443 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6444 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6445 ? 3 : 0;
6446 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6447 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6448 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6449 (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 );
6451 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6452 bSv[0][0] = 0x0004000400040004ULL;
6453 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6454 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6456 #define FILTER(hv,dir,edge)\
6457 if(bSv[dir][edge]) {\
6458 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6459 if(!(edge&1)) {\
6460 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6461 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6464 if( edges == 1 ) {
6465 FILTER(v,0,0);
6466 FILTER(h,1,0);
6467 } else if( IS_8x8DCT(mb_type) ) {
6468 FILTER(v,0,0);
6469 FILTER(v,0,2);
6470 FILTER(h,1,0);
6471 FILTER(h,1,2);
6472 } else {
6473 FILTER(v,0,0);
6474 FILTER(v,0,1);
6475 FILTER(v,0,2);
6476 FILTER(v,0,3);
6477 FILTER(h,1,0);
6478 FILTER(h,1,1);
6479 FILTER(h,1,2);
6480 FILTER(h,1,3);
6482 #undef FILTER
6486 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6487 MpegEncContext * const s = &h->s;
6488 const int mb_xy= mb_x + mb_y*s->mb_stride;
6489 const int mb_type = s->current_picture.mb_type[mb_xy];
6490 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6491 int first_vertical_edge_done = 0;
6492 int dir;
6493 /* FIXME: A given frame may occupy more than one position in
6494 * the reference list. So ref2frm should be populated with
6495 * frame numbers, not indices. */
6496 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6497 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6499 //for sufficiently low qp, filtering wouldn't do anything
6500 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6501 if(!FRAME_MBAFF){
6502 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
6503 int qp = s->current_picture.qscale_table[mb_xy];
6504 if(qp <= qp_thresh
6505 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6506 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6507 return;
6511 if (FRAME_MBAFF
6512 // left mb is in picture
6513 && h->slice_table[mb_xy-1] != 255
6514 // and current and left pair do not have the same interlaced type
6515 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6516 // and left mb is in the same slice if deblocking_filter == 2
6517 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6518 /* First vertical edge is different in MBAFF frames
6519 * There are 8 different bS to compute and 2 different Qp
6521 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6522 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6523 int16_t bS[8];
6524 int qp[2];
6525 int bqp[2];
6526 int rqp[2];
6527 int mb_qp, mbn0_qp, mbn1_qp;
6528 int i;
6529 first_vertical_edge_done = 1;
6531 if( IS_INTRA(mb_type) )
6532 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6533 else {
6534 for( i = 0; i < 8; i++ ) {
6535 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6537 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6538 bS[i] = 4;
6539 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6540 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6541 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6542 bS[i] = 2;
6543 else
6544 bS[i] = 1;
6548 mb_qp = s->current_picture.qscale_table[mb_xy];
6549 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6550 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6551 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6552 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6553 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6554 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6555 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6556 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6557 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6558 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6559 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6560 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6562 /* Filter edge */
6563 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6564 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6565 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6566 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6567 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6569 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6570 for( dir = 0; dir < 2; dir++ )
6572 int edge;
6573 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6574 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6575 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6577 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6578 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6579 // how often to recheck mv-based bS when iterating between edges
6580 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6581 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6582 // how often to recheck mv-based bS when iterating along each edge
6583 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6585 if (first_vertical_edge_done) {
6586 start = 1;
6587 first_vertical_edge_done = 0;
6590 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6591 start = 1;
6593 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6594 && !IS_INTERLACED(mb_type)
6595 && IS_INTERLACED(mbm_type)
6597 // This is a special case in the norm where the filtering must
6598 // be done twice (one each of the field) even if we are in a
6599 // frame macroblock.
6601 static const int nnz_idx[4] = {4,5,6,3};
6602 unsigned int tmp_linesize = 2 * linesize;
6603 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6604 int mbn_xy = mb_xy - 2 * s->mb_stride;
6605 int qp;
6606 int i, j;
6607 int16_t bS[4];
6609 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6610 if( IS_INTRA(mb_type) ||
6611 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6612 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6613 } else {
6614 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6615 for( i = 0; i < 4; i++ ) {
6616 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6617 mbn_nnz[nnz_idx[i]] != 0 )
6618 bS[i] = 2;
6619 else
6620 bS[i] = 1;
6623 // Do not use s->qscale as luma quantizer because it has not the same
6624 // value in IPCM macroblocks.
6625 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6626 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6627 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6628 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6629 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6630 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6631 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6632 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6635 start = 1;
6638 /* Calculate bS */
6639 for( edge = start; edge < edges; edge++ ) {
6640 /* mbn_xy: neighbor macroblock */
6641 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6642 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6643 int16_t bS[4];
6644 int qp;
6646 if( (edge&1) && IS_8x8DCT(mb_type) )
6647 continue;
6649 if( IS_INTRA(mb_type) ||
6650 IS_INTRA(mbn_type) ) {
6651 int value;
6652 if (edge == 0) {
6653 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6654 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6656 value = 4;
6657 } else {
6658 value = 3;
6660 } else {
6661 value = 3;
6663 bS[0] = bS[1] = bS[2] = bS[3] = value;
6664 } else {
6665 int i, l;
6666 int mv_done;
6668 if( edge & mask_edge ) {
6669 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6670 mv_done = 1;
6672 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6673 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6674 mv_done = 1;
6676 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6677 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6678 int bn_idx= b_idx - (dir ? 8:1);
6679 int v = 0;
6680 for( l = 0; !v && l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6681 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6682 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6683 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6685 bS[0] = bS[1] = bS[2] = bS[3] = v;
6686 mv_done = 1;
6688 else
6689 mv_done = 0;
6691 for( i = 0; i < 4; i++ ) {
6692 int x = dir == 0 ? edge : i;
6693 int y = dir == 0 ? i : edge;
6694 int b_idx= 8 + 4 + x + 8*y;
6695 int bn_idx= b_idx - (dir ? 8:1);
6697 if( h->non_zero_count_cache[b_idx] != 0 ||
6698 h->non_zero_count_cache[bn_idx] != 0 ) {
6699 bS[i] = 2;
6701 else if(!mv_done)
6703 bS[i] = 0;
6704 for( l = 0; l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6705 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6706 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6707 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6708 bS[i] = 1;
6709 break;
6715 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6716 continue;
6719 /* Filter edge */
6720 // Do not use s->qscale as luma quantizer because it has not the same
6721 // value in IPCM macroblocks.
6722 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6723 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6724 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6725 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6726 if( dir == 0 ) {
6727 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6728 if( (edge&1) == 0 ) {
6729 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6730 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6731 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6732 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6734 } else {
6735 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6736 if( (edge&1) == 0 ) {
6737 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6738 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6739 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6740 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6747 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6748 MpegEncContext * const s = &h->s;
6749 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6751 s->mb_skip_run= -1;
6753 if( h->pps.cabac ) {
6754 int i;
6756 /* realign */
6757 align_get_bits( &s->gb );
6759 /* init cabac */
6760 ff_init_cabac_states( &h->cabac);
6761 ff_init_cabac_decoder( &h->cabac,
6762 s->gb.buffer + get_bits_count(&s->gb)/8,
6763 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6764 /* calculate pre-state */
6765 for( i= 0; i < 460; i++ ) {
6766 int pre;
6767 if( h->slice_type == FF_I_TYPE )
6768 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6769 else
6770 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6772 if( pre <= 63 )
6773 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6774 else
6775 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6778 for(;;){
6779 //START_TIMER
6780 int ret = decode_mb_cabac(h);
6781 int eos;
6782 //STOP_TIMER("decode_mb_cabac")
6784 if(ret>=0) hl_decode_mb(h);
6786 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6787 s->mb_y++;
6789 if(ret>=0) ret = decode_mb_cabac(h);
6791 if(ret>=0) hl_decode_mb(h);
6792 s->mb_y--;
6794 eos = get_cabac_terminate( &h->cabac );
6796 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6797 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6798 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6799 return -1;
6802 if( ++s->mb_x >= s->mb_width ) {
6803 s->mb_x = 0;
6804 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6805 ++s->mb_y;
6806 if(FIELD_OR_MBAFF_PICTURE) {
6807 ++s->mb_y;
6811 if( eos || s->mb_y >= s->mb_height ) {
6812 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6813 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6814 return 0;
6818 } else {
6819 for(;;){
6820 int ret = decode_mb_cavlc(h);
6822 if(ret>=0) hl_decode_mb(h);
6824 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6825 s->mb_y++;
6826 ret = decode_mb_cavlc(h);
6828 if(ret>=0) hl_decode_mb(h);
6829 s->mb_y--;
6832 if(ret<0){
6833 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6834 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6836 return -1;
6839 if(++s->mb_x >= s->mb_width){
6840 s->mb_x=0;
6841 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6842 ++s->mb_y;
6843 if(FIELD_OR_MBAFF_PICTURE) {
6844 ++s->mb_y;
6846 if(s->mb_y >= s->mb_height){
6847 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6849 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6850 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6852 return 0;
6853 }else{
6854 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6856 return -1;
6861 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6862 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6863 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6864 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6866 return 0;
6867 }else{
6868 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6870 return -1;
6876 #if 0
6877 for(;s->mb_y < s->mb_height; s->mb_y++){
6878 for(;s->mb_x < s->mb_width; s->mb_x++){
6879 int ret= decode_mb(h);
6881 hl_decode_mb(h);
6883 if(ret<0){
6884 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6885 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6887 return -1;
6890 if(++s->mb_x >= s->mb_width){
6891 s->mb_x=0;
6892 if(++s->mb_y >= s->mb_height){
6893 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6894 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6896 return 0;
6897 }else{
6898 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6900 return -1;
6905 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6906 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6907 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6909 return 0;
6910 }else{
6911 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6913 return -1;
6917 s->mb_x=0;
6918 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6920 #endif
6921 return -1; //not reached
6924 static int decode_unregistered_user_data(H264Context *h, int size){
6925 MpegEncContext * const s = &h->s;
6926 uint8_t user_data[16+256];
6927 int e, build, i;
6929 if(size<16)
6930 return -1;
6932 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6933 user_data[i]= get_bits(&s->gb, 8);
6936 user_data[i]= 0;
6937 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6938 if(e==1 && build>=0)
6939 h->x264_build= build;
6941 if(s->avctx->debug & FF_DEBUG_BUGS)
6942 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6944 for(; i<size; i++)
6945 skip_bits(&s->gb, 8);
6947 return 0;
6950 static int decode_sei(H264Context *h){
6951 MpegEncContext * const s = &h->s;
6953 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6954 int size, type;
6956 type=0;
6958 type+= show_bits(&s->gb, 8);
6959 }while(get_bits(&s->gb, 8) == 255);
6961 size=0;
6963 size+= show_bits(&s->gb, 8);
6964 }while(get_bits(&s->gb, 8) == 255);
6966 switch(type){
6967 case 5:
6968 if(decode_unregistered_user_data(h, size) < 0)
6969 return -1;
6970 break;
6971 default:
6972 skip_bits(&s->gb, 8*size);
6975 //FIXME check bits here
6976 align_get_bits(&s->gb);
6979 return 0;
6982 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6983 MpegEncContext * const s = &h->s;
6984 int cpb_count, i;
6985 cpb_count = get_ue_golomb(&s->gb) + 1;
6986 get_bits(&s->gb, 4); /* bit_rate_scale */
6987 get_bits(&s->gb, 4); /* cpb_size_scale */
6988 for(i=0; i<cpb_count; i++){
6989 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6990 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6991 get_bits1(&s->gb); /* cbr_flag */
6993 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6994 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6995 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6996 get_bits(&s->gb, 5); /* time_offset_length */
6999 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7000 MpegEncContext * const s = &h->s;
7001 int aspect_ratio_info_present_flag;
7002 unsigned int aspect_ratio_idc;
7003 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7005 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7007 if( aspect_ratio_info_present_flag ) {
7008 aspect_ratio_idc= get_bits(&s->gb, 8);
7009 if( aspect_ratio_idc == EXTENDED_SAR ) {
7010 sps->sar.num= get_bits(&s->gb, 16);
7011 sps->sar.den= get_bits(&s->gb, 16);
7012 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7013 sps->sar= pixel_aspect[aspect_ratio_idc];
7014 }else{
7015 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7016 return -1;
7018 }else{
7019 sps->sar.num=
7020 sps->sar.den= 0;
7022 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7024 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7025 get_bits1(&s->gb); /* overscan_appropriate_flag */
7028 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7029 get_bits(&s->gb, 3); /* video_format */
7030 get_bits1(&s->gb); /* video_full_range_flag */
7031 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7032 get_bits(&s->gb, 8); /* colour_primaries */
7033 get_bits(&s->gb, 8); /* transfer_characteristics */
7034 get_bits(&s->gb, 8); /* matrix_coefficients */
7038 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7039 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7040 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7043 sps->timing_info_present_flag = get_bits1(&s->gb);
7044 if(sps->timing_info_present_flag){
7045 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7046 sps->time_scale = get_bits_long(&s->gb, 32);
7047 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7050 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7051 if(nal_hrd_parameters_present_flag)
7052 decode_hrd_parameters(h, sps);
7053 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7054 if(vcl_hrd_parameters_present_flag)
7055 decode_hrd_parameters(h, sps);
7056 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7057 get_bits1(&s->gb); /* low_delay_hrd_flag */
7058 get_bits1(&s->gb); /* pic_struct_present_flag */
7060 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7061 if(sps->bitstream_restriction_flag){
7062 unsigned int num_reorder_frames;
7063 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7064 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7065 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7066 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7067 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7068 num_reorder_frames= get_ue_golomb(&s->gb);
7069 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7071 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7072 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7073 return -1;
7076 sps->num_reorder_frames= num_reorder_frames;
7079 return 0;
7082 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7083 const uint8_t *jvt_list, const uint8_t *fallback_list){
7084 MpegEncContext * const s = &h->s;
7085 int i, last = 8, next = 8;
7086 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7087 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7088 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7089 else
7090 for(i=0;i<size;i++){
7091 if(next)
7092 next = (last + get_se_golomb(&s->gb)) & 0xff;
7093 if(!i && !next){ /* matrix not written, we use the preset one */
7094 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7095 break;
7097 last = factors[scan[i]] = next ? next : last;
7101 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7102 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7103 MpegEncContext * const s = &h->s;
7104 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7105 const uint8_t *fallback[4] = {
7106 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7107 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7108 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7109 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7111 if(get_bits1(&s->gb)){
7112 sps->scaling_matrix_present |= is_sps;
7113 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7114 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7115 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7116 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7117 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7118 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7119 if(is_sps || pps->transform_8x8_mode){
7120 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7121 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7123 } else if(fallback_sps) {
7124 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7125 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7130 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7132 static void *
7133 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7134 const size_t size, const char *name)
7136 if(id>=max) {
7137 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7138 return NULL;
7141 if(!vec[id]) {
7142 vec[id] = av_mallocz(size);
7143 if(vec[id] == NULL)
7144 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7146 return vec[id];
7149 static inline int decode_seq_parameter_set(H264Context *h){
7150 MpegEncContext * const s = &h->s;
7151 int profile_idc, level_idc;
7152 unsigned int sps_id, tmp, mb_width, mb_height;
7153 int i;
7154 SPS *sps;
7156 profile_idc= get_bits(&s->gb, 8);
7157 get_bits1(&s->gb); //constraint_set0_flag
7158 get_bits1(&s->gb); //constraint_set1_flag
7159 get_bits1(&s->gb); //constraint_set2_flag
7160 get_bits1(&s->gb); //constraint_set3_flag
7161 get_bits(&s->gb, 4); // reserved
7162 level_idc= get_bits(&s->gb, 8);
7163 sps_id= get_ue_golomb(&s->gb);
7165 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7166 if(sps == NULL)
7167 return -1;
7169 sps->profile_idc= profile_idc;
7170 sps->level_idc= level_idc;
7172 if(sps->profile_idc >= 100){ //high profile
7173 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7174 get_bits1(&s->gb); //residual_color_transform_flag
7175 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7176 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7177 sps->transform_bypass = get_bits1(&s->gb);
7178 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7179 }else
7180 sps->scaling_matrix_present = 0;
7182 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7183 sps->poc_type= get_ue_golomb(&s->gb);
7185 if(sps->poc_type == 0){ //FIXME #define
7186 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7187 } else if(sps->poc_type == 1){//FIXME #define
7188 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7189 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7190 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7191 tmp= get_ue_golomb(&s->gb);
7193 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7194 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7195 return -1;
7197 sps->poc_cycle_length= tmp;
7199 for(i=0; i<sps->poc_cycle_length; i++)
7200 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7201 }else if(sps->poc_type != 2){
7202 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7203 return -1;
7206 tmp= get_ue_golomb(&s->gb);
7207 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7208 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7209 return -1;
7211 sps->ref_frame_count= tmp;
7212 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7213 mb_width= get_ue_golomb(&s->gb) + 1;
7214 mb_height= get_ue_golomb(&s->gb) + 1;
7215 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7216 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7217 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7218 return -1;
7220 sps->mb_width = mb_width;
7221 sps->mb_height= mb_height;
7223 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7224 if(!sps->frame_mbs_only_flag)
7225 sps->mb_aff= get_bits1(&s->gb);
7226 else
7227 sps->mb_aff= 0;
7229 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7231 #ifndef ALLOW_INTERLACE
7232 if(sps->mb_aff)
7233 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7234 #endif
7235 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7236 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7238 sps->crop= get_bits1(&s->gb);
7239 if(sps->crop){
7240 sps->crop_left = get_ue_golomb(&s->gb);
7241 sps->crop_right = get_ue_golomb(&s->gb);
7242 sps->crop_top = get_ue_golomb(&s->gb);
7243 sps->crop_bottom= get_ue_golomb(&s->gb);
7244 if(sps->crop_left || sps->crop_top){
7245 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7247 }else{
7248 sps->crop_left =
7249 sps->crop_right =
7250 sps->crop_top =
7251 sps->crop_bottom= 0;
7254 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7255 if( sps->vui_parameters_present_flag )
7256 decode_vui_parameters(h, sps);
7258 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7259 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7260 sps_id, sps->profile_idc, sps->level_idc,
7261 sps->poc_type,
7262 sps->ref_frame_count,
7263 sps->mb_width, sps->mb_height,
7264 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7265 sps->direct_8x8_inference_flag ? "8B8" : "",
7266 sps->crop_left, sps->crop_right,
7267 sps->crop_top, sps->crop_bottom,
7268 sps->vui_parameters_present_flag ? "VUI" : ""
7271 return 0;
7274 static void
7275 build_qp_table(PPS *pps, int t, int index)
7277 int i;
7278 for(i = 0; i < 255; i++)
7279 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7282 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7283 MpegEncContext * const s = &h->s;
7284 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7285 PPS *pps;
7287 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7288 if(pps == NULL)
7289 return -1;
7291 tmp= get_ue_golomb(&s->gb);
7292 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7293 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7294 return -1;
7296 pps->sps_id= tmp;
7298 pps->cabac= get_bits1(&s->gb);
7299 pps->pic_order_present= get_bits1(&s->gb);
7300 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7301 if(pps->slice_group_count > 1 ){
7302 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7303 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7304 switch(pps->mb_slice_group_map_type){
7305 case 0:
7306 #if 0
7307 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7308 | run_length[ i ] |1 |ue(v) |
7309 #endif
7310 break;
7311 case 2:
7312 #if 0
7313 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7314 |{ | | |
7315 | top_left_mb[ i ] |1 |ue(v) |
7316 | bottom_right_mb[ i ] |1 |ue(v) |
7317 | } | | |
7318 #endif
7319 break;
7320 case 3:
7321 case 4:
7322 case 5:
7323 #if 0
7324 | slice_group_change_direction_flag |1 |u(1) |
7325 | slice_group_change_rate_minus1 |1 |ue(v) |
7326 #endif
7327 break;
7328 case 6:
7329 #if 0
7330 | slice_group_id_cnt_minus1 |1 |ue(v) |
7331 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7332 |) | | |
7333 | slice_group_id[ i ] |1 |u(v) |
7334 #endif
7335 break;
7338 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7339 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7340 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7341 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7342 pps->ref_count[0]= pps->ref_count[1]= 1;
7343 return -1;
7346 pps->weighted_pred= get_bits1(&s->gb);
7347 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7348 pps->init_qp= get_se_golomb(&s->gb) + 26;
7349 pps->init_qs= get_se_golomb(&s->gb) + 26;
7350 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7351 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7352 pps->constrained_intra_pred= get_bits1(&s->gb);
7353 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7355 pps->transform_8x8_mode= 0;
7356 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7357 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7358 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7360 if(get_bits_count(&s->gb) < bit_length){
7361 pps->transform_8x8_mode= get_bits1(&s->gb);
7362 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7363 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7364 } else {
7365 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7368 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7369 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7370 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7371 h->pps.chroma_qp_diff= 1;
7372 } else
7373 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7375 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7376 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7377 pps_id, pps->sps_id,
7378 pps->cabac ? "CABAC" : "CAVLC",
7379 pps->slice_group_count,
7380 pps->ref_count[0], pps->ref_count[1],
7381 pps->weighted_pred ? "weighted" : "",
7382 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7383 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7384 pps->constrained_intra_pred ? "CONSTR" : "",
7385 pps->redundant_pic_cnt_present ? "REDU" : "",
7386 pps->transform_8x8_mode ? "8x8DCT" : ""
7390 return 0;
7394 * Call decode_slice() for each context.
7396 * @param h h264 master context
7397 * @param context_count number of contexts to execute
7399 static void execute_decode_slices(H264Context *h, int context_count){
7400 MpegEncContext * const s = &h->s;
7401 AVCodecContext * const avctx= s->avctx;
7402 H264Context *hx;
7403 int i;
7405 if(context_count == 1) {
7406 decode_slice(avctx, h);
7407 } else {
7408 for(i = 1; i < context_count; i++) {
7409 hx = h->thread_context[i];
7410 hx->s.error_resilience = avctx->error_resilience;
7411 hx->s.error_count = 0;
7414 avctx->execute(avctx, (void *)decode_slice,
7415 (void **)h->thread_context, NULL, context_count);
7417 /* pull back stuff from slices to master context */
7418 hx = h->thread_context[context_count - 1];
7419 s->mb_x = hx->s.mb_x;
7420 s->mb_y = hx->s.mb_y;
7421 s->dropable = hx->s.dropable;
7422 s->picture_structure = hx->s.picture_structure;
7423 for(i = 1; i < context_count; i++)
7424 h->s.error_count += h->thread_context[i]->s.error_count;
7429 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7430 MpegEncContext * const s = &h->s;
7431 AVCodecContext * const avctx= s->avctx;
7432 int buf_index=0;
7433 H264Context *hx; ///< thread context
7434 int context_count = 0;
7436 h->max_contexts = avctx->thread_count;
7437 #if 0
7438 int i;
7439 for(i=0; i<50; i++){
7440 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7442 #endif
7443 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7444 h->current_slice = 0;
7445 if (!s->first_field)
7446 s->current_picture_ptr= NULL;
7449 for(;;){
7450 int consumed;
7451 int dst_length;
7452 int bit_length;
7453 const uint8_t *ptr;
7454 int i, nalsize = 0;
7455 int err;
7457 if(h->is_avc) {
7458 if(buf_index >= buf_size) break;
7459 nalsize = 0;
7460 for(i = 0; i < h->nal_length_size; i++)
7461 nalsize = (nalsize << 8) | buf[buf_index++];
7462 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7463 if(nalsize == 1){
7464 buf_index++;
7465 continue;
7466 }else{
7467 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7468 break;
7471 } else {
7472 // start code prefix search
7473 for(; buf_index + 3 < buf_size; buf_index++){
7474 // This should always succeed in the first iteration.
7475 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7476 break;
7479 if(buf_index+3 >= buf_size) break;
7481 buf_index+=3;
7484 hx = h->thread_context[context_count];
7486 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7487 if (ptr==NULL || dst_length < 0){
7488 return -1;
7490 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7491 dst_length--;
7492 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7494 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7495 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7498 if (h->is_avc && (nalsize != consumed)){
7499 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7500 consumed= nalsize;
7503 buf_index += consumed;
7505 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7506 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7507 continue;
7509 again:
7510 err = 0;
7511 switch(hx->nal_unit_type){
7512 case NAL_IDR_SLICE:
7513 if (h->nal_unit_type != NAL_IDR_SLICE) {
7514 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7515 return -1;
7517 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7518 case NAL_SLICE:
7519 init_get_bits(&hx->s.gb, ptr, bit_length);
7520 hx->intra_gb_ptr=
7521 hx->inter_gb_ptr= &hx->s.gb;
7522 hx->s.data_partitioning = 0;
7524 if((err = decode_slice_header(hx, h)))
7525 break;
7527 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7528 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7529 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7530 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7531 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7532 && avctx->skip_frame < AVDISCARD_ALL)
7533 context_count++;
7534 break;
7535 case NAL_DPA:
7536 init_get_bits(&hx->s.gb, ptr, bit_length);
7537 hx->intra_gb_ptr=
7538 hx->inter_gb_ptr= NULL;
7539 hx->s.data_partitioning = 1;
7541 err = decode_slice_header(hx, h);
7542 break;
7543 case NAL_DPB:
7544 init_get_bits(&hx->intra_gb, ptr, bit_length);
7545 hx->intra_gb_ptr= &hx->intra_gb;
7546 break;
7547 case NAL_DPC:
7548 init_get_bits(&hx->inter_gb, ptr, bit_length);
7549 hx->inter_gb_ptr= &hx->inter_gb;
7551 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7552 && s->context_initialized
7553 && s->hurry_up < 5
7554 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7555 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7556 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7557 && avctx->skip_frame < AVDISCARD_ALL)
7558 context_count++;
7559 break;
7560 case NAL_SEI:
7561 init_get_bits(&s->gb, ptr, bit_length);
7562 decode_sei(h);
7563 break;
7564 case NAL_SPS:
7565 init_get_bits(&s->gb, ptr, bit_length);
7566 decode_seq_parameter_set(h);
7568 if(s->flags& CODEC_FLAG_LOW_DELAY)
7569 s->low_delay=1;
7571 if(avctx->has_b_frames < 2)
7572 avctx->has_b_frames= !s->low_delay;
7573 break;
7574 case NAL_PPS:
7575 init_get_bits(&s->gb, ptr, bit_length);
7577 decode_picture_parameter_set(h, bit_length);
7579 break;
7580 case NAL_AUD:
7581 case NAL_END_SEQUENCE:
7582 case NAL_END_STREAM:
7583 case NAL_FILLER_DATA:
7584 case NAL_SPS_EXT:
7585 case NAL_AUXILIARY_SLICE:
7586 break;
7587 default:
7588 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7591 if(context_count == h->max_contexts) {
7592 execute_decode_slices(h, context_count);
7593 context_count = 0;
7596 if (err < 0)
7597 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7598 else if(err == 1) {
7599 /* Slice could not be decoded in parallel mode, copy down
7600 * NAL unit stuff to context 0 and restart. Note that
7601 * rbsp_buffer is not transfered, but since we no longer
7602 * run in parallel mode this should not be an issue. */
7603 h->nal_unit_type = hx->nal_unit_type;
7604 h->nal_ref_idc = hx->nal_ref_idc;
7605 hx = h;
7606 goto again;
7609 if(context_count)
7610 execute_decode_slices(h, context_count);
7611 return buf_index;
7615 * returns the number of bytes consumed for building the current frame
7617 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7618 if(s->flags&CODEC_FLAG_TRUNCATED){
7619 pos -= s->parse_context.last_index;
7620 if(pos<0) pos=0; // FIXME remove (unneeded?)
7622 return pos;
7623 }else{
7624 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7625 if(pos+10>buf_size) pos=buf_size; // oops ;)
7627 return pos;
7631 static int decode_frame(AVCodecContext *avctx,
7632 void *data, int *data_size,
7633 const uint8_t *buf, int buf_size)
7635 H264Context *h = avctx->priv_data;
7636 MpegEncContext *s = &h->s;
7637 AVFrame *pict = data;
7638 int buf_index;
7640 s->flags= avctx->flags;
7641 s->flags2= avctx->flags2;
7643 /* no supplementary picture */
7644 if (buf_size == 0) {
7645 Picture *out;
7646 int i, out_idx;
7648 //FIXME factorize this with the output code below
7649 out = h->delayed_pic[0];
7650 out_idx = 0;
7651 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7652 if(h->delayed_pic[i]->poc < out->poc){
7653 out = h->delayed_pic[i];
7654 out_idx = i;
7657 for(i=out_idx; h->delayed_pic[i]; i++)
7658 h->delayed_pic[i] = h->delayed_pic[i+1];
7660 if(out){
7661 *data_size = sizeof(AVFrame);
7662 *pict= *(AVFrame*)out;
7665 return 0;
7668 if(s->flags&CODEC_FLAG_TRUNCATED){
7669 int next= ff_h264_find_frame_end(h, buf, buf_size);
7671 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7672 return buf_size;
7673 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7676 if(h->is_avc && !h->got_avcC) {
7677 int i, cnt, nalsize;
7678 unsigned char *p = avctx->extradata;
7679 if(avctx->extradata_size < 7) {
7680 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7681 return -1;
7683 if(*p != 1) {
7684 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7685 return -1;
7687 /* sps and pps in the avcC always have length coded with 2 bytes,
7688 so put a fake nal_length_size = 2 while parsing them */
7689 h->nal_length_size = 2;
7690 // Decode sps from avcC
7691 cnt = *(p+5) & 0x1f; // Number of sps
7692 p += 6;
7693 for (i = 0; i < cnt; i++) {
7694 nalsize = AV_RB16(p) + 2;
7695 if(decode_nal_units(h, p, nalsize) < 0) {
7696 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7697 return -1;
7699 p += nalsize;
7701 // Decode pps from avcC
7702 cnt = *(p++); // Number of pps
7703 for (i = 0; i < cnt; i++) {
7704 nalsize = AV_RB16(p) + 2;
7705 if(decode_nal_units(h, p, nalsize) != nalsize) {
7706 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7707 return -1;
7709 p += nalsize;
7711 // Now store right nal length size, that will be use to parse all other nals
7712 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7713 // Do not reparse avcC
7714 h->got_avcC = 1;
7717 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7718 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7719 return -1;
7722 buf_index=decode_nal_units(h, buf, buf_size);
7723 if(buf_index < 0)
7724 return -1;
7726 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7727 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7728 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7729 return -1;
7732 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7733 Picture *out = s->current_picture_ptr;
7734 Picture *cur = s->current_picture_ptr;
7735 Picture *prev = h->delayed_output_pic;
7736 int i, pics, cross_idr, out_of_order, out_idx;
7738 s->mb_y= 0;
7740 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7741 s->current_picture_ptr->pict_type= s->pict_type;
7743 h->prev_frame_num_offset= h->frame_num_offset;
7744 h->prev_frame_num= h->frame_num;
7745 if(!s->dropable) {
7746 h->prev_poc_msb= h->poc_msb;
7747 h->prev_poc_lsb= h->poc_lsb;
7748 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7752 * FIXME: Error handling code does not seem to support interlaced
7753 * when slices span multiple rows
7754 * The ff_er_add_slice calls don't work right for bottom
7755 * fields; they cause massive erroneous error concealing
7756 * Error marking covers both fields (top and bottom).
7757 * This causes a mismatched s->error_count
7758 * and a bad error table. Further, the error count goes to
7759 * INT_MAX when called for bottom field, because mb_y is
7760 * past end by one (callers fault) and resync_mb_y != 0
7761 * causes problems for the first MB line, too.
7763 if (!FIELD_PICTURE)
7764 ff_er_frame_end(s);
7766 MPV_frame_end(s);
7768 if (s->first_field) {
7769 /* Wait for second field. */
7770 *data_size = 0;
7772 } else {
7773 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7774 /* Derive top_field_first from field pocs. */
7775 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7777 //FIXME do something with unavailable reference frames
7779 #if 0 //decode order
7780 *data_size = sizeof(AVFrame);
7781 #else
7782 /* Sort B-frames into display order */
7784 if(h->sps.bitstream_restriction_flag
7785 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7786 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7787 s->low_delay = 0;
7790 pics = 0;
7791 while(h->delayed_pic[pics]) pics++;
7793 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7795 h->delayed_pic[pics++] = cur;
7796 if(cur->reference == 0)
7797 cur->reference = DELAYED_PIC_REF;
7799 cross_idr = 0;
7800 for(i=0; h->delayed_pic[i]; i++)
7801 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7802 cross_idr = 1;
7804 out = h->delayed_pic[0];
7805 out_idx = 0;
7806 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7807 if(h->delayed_pic[i]->poc < out->poc){
7808 out = h->delayed_pic[i];
7809 out_idx = i;
7812 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7813 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7815 else if(prev && pics <= s->avctx->has_b_frames)
7816 out = prev;
7817 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7818 || (s->low_delay &&
7819 ((!cross_idr && prev && out->poc > prev->poc + 2)
7820 || cur->pict_type == FF_B_TYPE)))
7822 s->low_delay = 0;
7823 s->avctx->has_b_frames++;
7824 out = prev;
7826 else if(out_of_order)
7827 out = prev;
7829 if(out_of_order || pics > s->avctx->has_b_frames){
7830 for(i=out_idx; h->delayed_pic[i]; i++)
7831 h->delayed_pic[i] = h->delayed_pic[i+1];
7834 if(prev == out)
7835 *data_size = 0;
7836 else
7837 *data_size = sizeof(AVFrame);
7838 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7839 prev->reference = 0;
7840 h->delayed_output_pic = out;
7841 #endif
7843 if(out)
7844 *pict= *(AVFrame*)out;
7845 else
7846 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7850 assert(pict->data[0] || !*data_size);
7851 ff_print_debug_info(s, pict);
7852 //printf("out %d\n", (int)pict->data[0]);
7853 #if 0 //?
7855 /* Return the Picture timestamp as the frame number */
7856 /* we subtract 1 because it is added on utils.c */
7857 avctx->frame_number = s->picture_number - 1;
7858 #endif
7859 return get_consumed_bytes(s, buf_index, buf_size);
7861 #if 0
7862 static inline void fill_mb_avail(H264Context *h){
7863 MpegEncContext * const s = &h->s;
7864 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7866 if(s->mb_y){
7867 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7868 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7869 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7870 }else{
7871 h->mb_avail[0]=
7872 h->mb_avail[1]=
7873 h->mb_avail[2]= 0;
7875 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7876 h->mb_avail[4]= 1; //FIXME move out
7877 h->mb_avail[5]= 0; //FIXME move out
7879 #endif
7881 #ifdef TEST
7882 #undef printf
7883 #undef random
7884 #define COUNT 8000
7885 #define SIZE (COUNT*40)
7886 int main(void){
7887 int i;
7888 uint8_t temp[SIZE];
7889 PutBitContext pb;
7890 GetBitContext gb;
7891 // int int_temp[10000];
7892 DSPContext dsp;
7893 AVCodecContext avctx;
7895 dsputil_init(&dsp, &avctx);
7897 init_put_bits(&pb, temp, SIZE);
7898 printf("testing unsigned exp golomb\n");
7899 for(i=0; i<COUNT; i++){
7900 START_TIMER
7901 set_ue_golomb(&pb, i);
7902 STOP_TIMER("set_ue_golomb");
7904 flush_put_bits(&pb);
7906 init_get_bits(&gb, temp, 8*SIZE);
7907 for(i=0; i<COUNT; i++){
7908 int j, s;
7910 s= show_bits(&gb, 24);
7912 START_TIMER
7913 j= get_ue_golomb(&gb);
7914 if(j != i){
7915 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7916 // return -1;
7918 STOP_TIMER("get_ue_golomb");
7922 init_put_bits(&pb, temp, SIZE);
7923 printf("testing signed exp golomb\n");
7924 for(i=0; i<COUNT; i++){
7925 START_TIMER
7926 set_se_golomb(&pb, i - COUNT/2);
7927 STOP_TIMER("set_se_golomb");
7929 flush_put_bits(&pb);
7931 init_get_bits(&gb, temp, 8*SIZE);
7932 for(i=0; i<COUNT; i++){
7933 int j, s;
7935 s= show_bits(&gb, 24);
7937 START_TIMER
7938 j= get_se_golomb(&gb);
7939 if(j != i - COUNT/2){
7940 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7941 // return -1;
7943 STOP_TIMER("get_se_golomb");
7946 #if 0
7947 printf("testing 4x4 (I)DCT\n");
7949 DCTELEM block[16];
7950 uint8_t src[16], ref[16];
7951 uint64_t error= 0, max_error=0;
7953 for(i=0; i<COUNT; i++){
7954 int j;
7955 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7956 for(j=0; j<16; j++){
7957 ref[j]= random()%255;
7958 src[j]= random()%255;
7961 h264_diff_dct_c(block, src, ref, 4);
7963 //normalize
7964 for(j=0; j<16; j++){
7965 // printf("%d ", block[j]);
7966 block[j]= block[j]*4;
7967 if(j&1) block[j]= (block[j]*4 + 2)/5;
7968 if(j&4) block[j]= (block[j]*4 + 2)/5;
7970 // printf("\n");
7972 s->dsp.h264_idct_add(ref, block, 4);
7973 /* for(j=0; j<16; j++){
7974 printf("%d ", ref[j]);
7976 printf("\n");*/
7978 for(j=0; j<16; j++){
7979 int diff= FFABS(src[j] - ref[j]);
7981 error+= diff*diff;
7982 max_error= FFMAX(max_error, diff);
7985 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7986 printf("testing quantizer\n");
7987 for(qp=0; qp<52; qp++){
7988 for(i=0; i<16; i++)
7989 src1_block[i]= src2_block[i]= random()%255;
7992 printf("Testing NAL layer\n");
7994 uint8_t bitstream[COUNT];
7995 uint8_t nal[COUNT*2];
7996 H264Context h;
7997 memset(&h, 0, sizeof(H264Context));
7999 for(i=0; i<COUNT; i++){
8000 int zeros= i;
8001 int nal_length;
8002 int consumed;
8003 int out_length;
8004 uint8_t *out;
8005 int j;
8007 for(j=0; j<COUNT; j++){
8008 bitstream[j]= (random() % 255) + 1;
8011 for(j=0; j<zeros; j++){
8012 int pos= random() % COUNT;
8013 while(bitstream[pos] == 0){
8014 pos++;
8015 pos %= COUNT;
8017 bitstream[pos]=0;
8020 START_TIMER
8022 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8023 if(nal_length<0){
8024 printf("encoding failed\n");
8025 return -1;
8028 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8030 STOP_TIMER("NAL")
8032 if(out_length != COUNT){
8033 printf("incorrect length %d %d\n", out_length, COUNT);
8034 return -1;
8037 if(consumed != nal_length){
8038 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8039 return -1;
8042 if(memcmp(bitstream, out, COUNT)){
8043 printf("mismatch\n");
8044 return -1;
8047 #endif
8049 printf("Testing RBSP\n");
8052 return 0;
8054 #endif /* TEST */
8057 static av_cold int decode_end(AVCodecContext *avctx)
8059 H264Context *h = avctx->priv_data;
8060 MpegEncContext *s = &h->s;
8062 av_freep(&h->rbsp_buffer[0]);
8063 av_freep(&h->rbsp_buffer[1]);
8064 free_tables(h); //FIXME cleanup init stuff perhaps
8065 MPV_common_end(s);
8067 // memset(h, 0, sizeof(H264Context));
8069 return 0;
8073 AVCodec h264_decoder = {
8074 "h264",
8075 CODEC_TYPE_VIDEO,
8076 CODEC_ID_H264,
8077 sizeof(H264Context),
8078 decode_init,
8079 NULL,
8080 decode_end,
8081 decode_frame,
8082 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8083 .flush= flush_dpb,
8086 #include "svq3.c"