Merge branch 'mirror' into vdpau
[FFMpeg-mirror/ffmpeg-vdpau.git] / libavcodec / h264.c
blob856813ec47ea60087c0412cc775854496b337779
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264.h"
32 #include "h264data.h"
33 #include "h264_parser.h"
34 #include "golomb.h"
35 #include "rectangle.h"
37 #include "cabac.h"
38 #ifdef ARCH_X86
39 #include "i386/h264_i386.h"
40 #endif
42 //#undef NDEBUG
43 #include <assert.h>
45 /**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
71 static VLC run7_vlc;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 extern int ff_VDPAU_h264_set_reference_frames(H264Context *h);
76 extern int ff_VDPAU_h264_picture_complete(H264Context *h, const uint8_t *buf, int buf_size);
77 extern void ff_VDPAU_h264_set_reference_frames_count(H264Context *h);
79 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
80 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
81 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
82 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
83 static Picture * remove_long(H264Context *h, int i, int ref_mask);
85 static av_always_inline uint32_t pack16to32(int a, int b){
86 #ifdef WORDS_BIGENDIAN
87 return (b&0xFFFF) + (a<<16);
88 #else
89 return (a&0xFFFF) + (b<<16);
90 #endif
93 static const uint8_t rem6[52]={
94 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
97 static const uint8_t div6[52]={
98 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
101 static const int left_block_options[4][8]={
102 {0,1,2,3,7,10,8,11},
103 {2,2,3,3,8,11,8,11},
104 {0,0,1,1,7,10,7,10},
105 {0,2,0,2,7,10,7,10}
108 static const enum PixelFormat pixfmt_vdpau_h264_baseline_420[] = {
109 PIX_FMT_VDPAU_H264_BASELINE,
110 PIX_FMT_NONE};
111 static const enum PixelFormat pixfmt_vdpau_h264_main_420[] = {
112 PIX_FMT_VDPAU_H264_MAIN,
113 PIX_FMT_NONE};
114 static const enum PixelFormat pixfmt_vdpau_h264_high_420[] = {
115 PIX_FMT_VDPAU_H264_HIGH,
116 PIX_FMT_NONE};
118 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
119 MpegEncContext * const s = &h->s;
120 const int mb_xy= h->mb_xy;
121 int topleft_xy, top_xy, topright_xy, left_xy[2];
122 int topleft_type, top_type, topright_type, left_type[2];
123 int * left_block;
124 int topleft_partition= -1;
125 int i;
127 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
129 //FIXME deblocking could skip the intra and nnz parts.
130 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
131 return;
133 /* Wow, what a mess, why didn't they simplify the interlacing & intra
134 * stuff, I can't imagine that these complex rules are worth it. */
136 topleft_xy = top_xy - 1;
137 topright_xy= top_xy + 1;
138 left_xy[1] = left_xy[0] = mb_xy-1;
139 left_block = left_block_options[0];
140 if(FRAME_MBAFF){
141 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
142 const int top_pair_xy = pair_xy - s->mb_stride;
143 const int topleft_pair_xy = top_pair_xy - 1;
144 const int topright_pair_xy = top_pair_xy + 1;
145 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
146 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
147 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
148 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
149 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
150 const int bottom = (s->mb_y & 1);
151 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
152 if (bottom
153 ? !curr_mb_frame_flag // bottom macroblock
154 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
156 top_xy -= s->mb_stride;
158 if (bottom
159 ? !curr_mb_frame_flag // bottom macroblock
160 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
162 topleft_xy -= s->mb_stride;
163 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
164 topleft_xy += s->mb_stride;
165 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
166 topleft_partition = 0;
168 if (bottom
169 ? !curr_mb_frame_flag // bottom macroblock
170 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
172 topright_xy -= s->mb_stride;
174 if (left_mb_frame_flag != curr_mb_frame_flag) {
175 left_xy[1] = left_xy[0] = pair_xy - 1;
176 if (curr_mb_frame_flag) {
177 if (bottom) {
178 left_block = left_block_options[1];
179 } else {
180 left_block= left_block_options[2];
182 } else {
183 left_xy[1] += s->mb_stride;
184 left_block = left_block_options[3];
189 h->top_mb_xy = top_xy;
190 h->left_mb_xy[0] = left_xy[0];
191 h->left_mb_xy[1] = left_xy[1];
192 if(for_deblock){
193 topleft_type = 0;
194 topright_type = 0;
195 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
196 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
197 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
199 if(MB_MBAFF && !IS_INTRA(mb_type)){
200 int list;
201 for(list=0; list<h->list_count; list++){
202 //These values where changed for ease of performing MC, we need to change them back
203 //FIXME maybe we can make MC and loop filter use the same values or prevent
204 //the MC code from changing ref_cache and rather use a temporary array.
205 if(USES_LIST(mb_type,list)){
206 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
207 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
209 ref += h->b8_stride;
210 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
211 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
215 }else{
216 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
217 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
218 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
219 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
220 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
222 if(IS_INTRA(mb_type)){
223 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
224 h->topleft_samples_available=
225 h->top_samples_available=
226 h->left_samples_available= 0xFFFF;
227 h->topright_samples_available= 0xEEEA;
229 if(!(top_type & type_mask)){
230 h->topleft_samples_available= 0xB3FF;
231 h->top_samples_available= 0x33FF;
232 h->topright_samples_available= 0x26EA;
234 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
235 if(IS_INTERLACED(mb_type)){
236 if(!(left_type[0] & type_mask)){
237 h->topleft_samples_available&= 0xDFFF;
238 h->left_samples_available&= 0x5FFF;
240 if(!(left_type[1] & type_mask)){
241 h->topleft_samples_available&= 0xFF5F;
242 h->left_samples_available&= 0xFF5F;
244 }else{
245 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
246 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
247 assert(left_xy[0] == left_xy[1]);
248 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
249 h->topleft_samples_available&= 0xDF5F;
250 h->left_samples_available&= 0x5F5F;
253 }else{
254 if(!(left_type[0] & type_mask)){
255 h->topleft_samples_available&= 0xDF5F;
256 h->left_samples_available&= 0x5F5F;
260 if(!(topleft_type & type_mask))
261 h->topleft_samples_available&= 0x7FFF;
263 if(!(topright_type & type_mask))
264 h->topright_samples_available&= 0xFBFF;
266 if(IS_INTRA4x4(mb_type)){
267 if(IS_INTRA4x4(top_type)){
268 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
269 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
270 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
271 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
272 }else{
273 int pred;
274 if(!(top_type & type_mask))
275 pred= -1;
276 else{
277 pred= 2;
279 h->intra4x4_pred_mode_cache[4+8*0]=
280 h->intra4x4_pred_mode_cache[5+8*0]=
281 h->intra4x4_pred_mode_cache[6+8*0]=
282 h->intra4x4_pred_mode_cache[7+8*0]= pred;
284 for(i=0; i<2; i++){
285 if(IS_INTRA4x4(left_type[i])){
286 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
287 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
288 }else{
289 int pred;
290 if(!(left_type[i] & type_mask))
291 pred= -1;
292 else{
293 pred= 2;
295 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
296 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
305 0 . T T. T T T T
306 1 L . .L . . . .
307 2 L . .L . . . .
308 3 . T TL . . . .
309 4 L . .L . . . .
310 5 L . .. . . . .
312 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
313 if(top_type){
314 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
315 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
316 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
317 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
319 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
320 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
322 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
323 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
325 }else{
326 h->non_zero_count_cache[4+8*0]=
327 h->non_zero_count_cache[5+8*0]=
328 h->non_zero_count_cache[6+8*0]=
329 h->non_zero_count_cache[7+8*0]=
331 h->non_zero_count_cache[1+8*0]=
332 h->non_zero_count_cache[2+8*0]=
334 h->non_zero_count_cache[1+8*3]=
335 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 for (i=0; i<2; i++) {
340 if(left_type[i]){
341 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
342 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
343 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
344 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
345 }else{
346 h->non_zero_count_cache[3+8*1 + 2*8*i]=
347 h->non_zero_count_cache[3+8*2 + 2*8*i]=
348 h->non_zero_count_cache[0+8*1 + 8*i]=
349 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
353 if( h->pps.cabac ) {
354 // top_cbp
355 if(top_type) {
356 h->top_cbp = h->cbp_table[top_xy];
357 } else if(IS_INTRA(mb_type)) {
358 h->top_cbp = 0x1C0;
359 } else {
360 h->top_cbp = 0;
362 // left_cbp
363 if (left_type[0]) {
364 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
365 } else if(IS_INTRA(mb_type)) {
366 h->left_cbp = 0x1C0;
367 } else {
368 h->left_cbp = 0;
370 if (left_type[0]) {
371 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
373 if (left_type[1]) {
374 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
378 #if 1
379 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
380 int list;
381 for(list=0; list<h->list_count; list++){
382 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
383 /*if(!h->mv_cache_clean[list]){
384 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
385 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
386 h->mv_cache_clean[list]= 1;
388 continue;
390 h->mv_cache_clean[list]= 0;
392 if(USES_LIST(top_type, list)){
393 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
394 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
395 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
396 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
397 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
398 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
399 h->ref_cache[list][scan8[0] + 0 - 1*8]=
400 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
401 h->ref_cache[list][scan8[0] + 2 - 1*8]=
402 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
403 }else{
404 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
405 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
406 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
407 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
408 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
411 for(i=0; i<2; i++){
412 int cache_idx = scan8[0] - 1 + i*2*8;
413 if(USES_LIST(left_type[i], list)){
414 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
415 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
416 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
417 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
418 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
419 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
420 }else{
421 *(uint32_t*)h->mv_cache [list][cache_idx ]=
422 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
423 h->ref_cache[list][cache_idx ]=
424 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
428 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
429 continue;
431 if(USES_LIST(topleft_type, list)){
432 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
433 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
434 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
435 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
436 }else{
437 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
438 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
441 if(USES_LIST(topright_type, list)){
442 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
443 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
444 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
445 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
446 }else{
447 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
448 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
451 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
452 continue;
454 h->ref_cache[list][scan8[5 ]+1] =
455 h->ref_cache[list][scan8[7 ]+1] =
456 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
457 h->ref_cache[list][scan8[4 ]] =
458 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
459 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
460 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
461 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
462 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
463 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
465 if( h->pps.cabac ) {
466 /* XXX beurk, Load mvd */
467 if(USES_LIST(top_type, list)){
468 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
469 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
470 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
471 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
472 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
473 }else{
474 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
475 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
476 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
477 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
479 if(USES_LIST(left_type[0], list)){
480 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
481 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
482 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
483 }else{
484 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
485 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
487 if(USES_LIST(left_type[1], list)){
488 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
489 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
490 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
491 }else{
492 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
493 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
495 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
496 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
497 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
498 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
499 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
501 if(h->slice_type_nos == FF_B_TYPE){
502 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
504 if(IS_DIRECT(top_type)){
505 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
506 }else if(IS_8X8(top_type)){
507 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
508 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
509 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
510 }else{
511 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
514 if(IS_DIRECT(left_type[0]))
515 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
516 else if(IS_8X8(left_type[0]))
517 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
518 else
519 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
521 if(IS_DIRECT(left_type[1]))
522 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
523 else if(IS_8X8(left_type[1]))
524 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
525 else
526 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
530 if(FRAME_MBAFF){
531 #define MAP_MVS\
532 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
533 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
534 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
535 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
536 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
537 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
538 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
539 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
540 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
541 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
542 if(MB_FIELD){
543 #define MAP_F2F(idx, mb_type)\
544 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
545 h->ref_cache[list][idx] <<= 1;\
546 h->mv_cache[list][idx][1] /= 2;\
547 h->mvd_cache[list][idx][1] /= 2;\
549 MAP_MVS
550 #undef MAP_F2F
551 }else{
552 #define MAP_F2F(idx, mb_type)\
553 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
554 h->ref_cache[list][idx] >>= 1;\
555 h->mv_cache[list][idx][1] <<= 1;\
556 h->mvd_cache[list][idx][1] <<= 1;\
558 MAP_MVS
559 #undef MAP_F2F
564 #endif
566 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
569 static inline void write_back_intra_pred_mode(H264Context *h){
570 const int mb_xy= h->mb_xy;
572 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
573 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
574 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
575 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
576 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
577 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
578 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
582 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
584 static inline int check_intra4x4_pred_mode(H264Context *h){
585 MpegEncContext * const s = &h->s;
586 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
587 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
588 int i;
590 if(!(h->top_samples_available&0x8000)){
591 for(i=0; i<4; i++){
592 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
593 if(status<0){
594 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
595 return -1;
596 } else if(status){
597 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
602 if((h->left_samples_available&0x8888)!=0x8888){
603 static const int mask[4]={0x8000,0x2000,0x80,0x20};
604 for(i=0; i<4; i++){
605 if(!(h->left_samples_available&mask[i])){
606 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
607 if(status<0){
608 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
609 return -1;
610 } else if(status){
611 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
617 return 0;
618 } //FIXME cleanup like next
621 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
623 static inline int check_intra_pred_mode(H264Context *h, int mode){
624 MpegEncContext * const s = &h->s;
625 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
626 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
628 if(mode > 6U) {
629 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
630 return -1;
633 if(!(h->top_samples_available&0x8000)){
634 mode= top[ mode ];
635 if(mode<0){
636 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
637 return -1;
641 if((h->left_samples_available&0x8080) != 0x8080){
642 mode= left[ mode ];
643 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
644 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
646 if(mode<0){
647 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
648 return -1;
652 return mode;
656 * gets the predicted intra4x4 prediction mode.
658 static inline int pred_intra_mode(H264Context *h, int n){
659 const int index8= scan8[n];
660 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
661 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
662 const int min= FFMIN(left, top);
664 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
666 if(min<0) return DC_PRED;
667 else return min;
670 static inline void write_back_non_zero_count(H264Context *h){
671 const int mb_xy= h->mb_xy;
673 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
674 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
675 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
676 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
677 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
678 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
679 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
681 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
682 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
683 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
685 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
686 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
687 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
691 * gets the predicted number of non-zero coefficients.
692 * @param n block index
694 static inline int pred_non_zero_count(H264Context *h, int n){
695 const int index8= scan8[n];
696 const int left= h->non_zero_count_cache[index8 - 1];
697 const int top = h->non_zero_count_cache[index8 - 8];
698 int i= left + top;
700 if(i<64) i= (i+1)>>1;
702 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
704 return i&31;
707 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
708 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
709 MpegEncContext *s = &h->s;
711 /* there is no consistent mapping of mvs to neighboring locations that will
712 * make mbaff happy, so we can't move all this logic to fill_caches */
713 if(FRAME_MBAFF){
714 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
715 const int16_t *mv;
716 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
717 *C = h->mv_cache[list][scan8[0]-2];
719 if(!MB_FIELD
720 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
721 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
722 if(IS_INTERLACED(mb_types[topright_xy])){
723 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
724 const int x4 = X4, y4 = Y4;\
725 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
726 if(!USES_LIST(mb_type,list))\
727 return LIST_NOT_USED;\
728 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
729 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
730 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
731 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
733 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
736 if(topright_ref == PART_NOT_AVAILABLE
737 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
738 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
739 if(!MB_FIELD
740 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
741 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
743 if(MB_FIELD
744 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
745 && i >= scan8[0]+8){
746 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
747 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
750 #undef SET_DIAG_MV
753 if(topright_ref != PART_NOT_AVAILABLE){
754 *C= h->mv_cache[list][ i - 8 + part_width ];
755 return topright_ref;
756 }else{
757 tprintf(s->avctx, "topright MV not available\n");
759 *C= h->mv_cache[list][ i - 8 - 1 ];
760 return h->ref_cache[list][ i - 8 - 1 ];
765 * gets the predicted MV.
766 * @param n the block index
767 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
768 * @param mx the x component of the predicted motion vector
769 * @param my the y component of the predicted motion vector
771 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
772 const int index8= scan8[n];
773 const int top_ref= h->ref_cache[list][ index8 - 8 ];
774 const int left_ref= h->ref_cache[list][ index8 - 1 ];
775 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
776 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
777 const int16_t * C;
778 int diagonal_ref, match_count;
780 assert(part_width==1 || part_width==2 || part_width==4);
782 /* mv_cache
783 B . . A T T T T
784 U . . L . . , .
785 U . . L . . . .
786 U . . L . . , .
787 . . . L . . . .
790 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
791 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
792 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
793 if(match_count > 1){ //most common
794 *mx= mid_pred(A[0], B[0], C[0]);
795 *my= mid_pred(A[1], B[1], C[1]);
796 }else if(match_count==1){
797 if(left_ref==ref){
798 *mx= A[0];
799 *my= A[1];
800 }else if(top_ref==ref){
801 *mx= B[0];
802 *my= B[1];
803 }else{
804 *mx= C[0];
805 *my= C[1];
807 }else{
808 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
809 *mx= A[0];
810 *my= A[1];
811 }else{
812 *mx= mid_pred(A[0], B[0], C[0]);
813 *my= mid_pred(A[1], B[1], C[1]);
817 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
821 * gets the directionally predicted 16x8 MV.
822 * @param n the block index
823 * @param mx the x component of the predicted motion vector
824 * @param my the y component of the predicted motion vector
826 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
827 if(n==0){
828 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
829 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
831 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
833 if(top_ref == ref){
834 *mx= B[0];
835 *my= B[1];
836 return;
838 }else{
839 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
840 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
842 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
844 if(left_ref == ref){
845 *mx= A[0];
846 *my= A[1];
847 return;
851 //RARE
852 pred_motion(h, n, 4, list, ref, mx, my);
856 * gets the directionally predicted 8x16 MV.
857 * @param n the block index
858 * @param mx the x component of the predicted motion vector
859 * @param my the y component of the predicted motion vector
861 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
862 if(n==0){
863 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
864 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
866 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
868 if(left_ref == ref){
869 *mx= A[0];
870 *my= A[1];
871 return;
873 }else{
874 const int16_t * C;
875 int diagonal_ref;
877 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
879 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
881 if(diagonal_ref == ref){
882 *mx= C[0];
883 *my= C[1];
884 return;
888 //RARE
889 pred_motion(h, n, 2, list, ref, mx, my);
892 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
893 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
894 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
896 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
898 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
899 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
900 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
902 *mx = *my = 0;
903 return;
906 pred_motion(h, 0, 4, 0, 0, mx, my);
908 return;
911 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
912 int poc0 = h->ref_list[0][i].poc;
913 int td = av_clip(poc1 - poc0, -128, 127);
914 if(td == 0 || h->ref_list[0][i].long_ref){
915 return 256;
916 }else{
917 int tb = av_clip(poc - poc0, -128, 127);
918 int tx = (16384 + (FFABS(td) >> 1)) / td;
919 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
923 static inline void direct_dist_scale_factor(H264Context * const h){
924 MpegEncContext * const s = &h->s;
925 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
926 const int poc1 = h->ref_list[1][0].poc;
927 int i, field;
928 for(field=0; field<2; field++){
929 const int poc = h->s.current_picture_ptr->field_poc[field];
930 const int poc1 = h->ref_list[1][0].field_poc[field];
931 for(i=0; i < 2*h->ref_count[0]; i++)
932 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
935 for(i=0; i<h->ref_count[0]; i++){
936 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
940 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
941 MpegEncContext * const s = &h->s;
942 Picture * const ref1 = &h->ref_list[1][0];
943 int j, old_ref, rfield;
944 int start= mbafi ? 16 : 0;
945 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
946 int interl= mbafi || s->picture_structure != PICT_FRAME;
948 /* bogus; fills in for missing frames */
949 memset(map[list], 0, sizeof(map[list]));
951 for(rfield=0; rfield<2; rfield++){
952 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
953 int poc = ref1->ref_poc[colfield][list][old_ref];
955 if (!interl)
956 poc |= 3;
957 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
958 poc= (poc&~3) + rfield + 1;
960 for(j=start; j<end; j++){
961 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
962 int cur_ref= mbafi ? (j-16)^field : j;
963 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
964 if(rfield == field)
965 map[list][old_ref] = cur_ref;
966 break;
973 static inline void direct_ref_list_init(H264Context * const h){
974 MpegEncContext * const s = &h->s;
975 Picture * const ref1 = &h->ref_list[1][0];
976 Picture * const cur = s->current_picture_ptr;
977 int list, j, field;
978 int sidx= (s->picture_structure&1)^1;
979 int ref1sidx= (ref1->reference&1)^1;
981 for(list=0; list<2; list++){
982 cur->ref_count[sidx][list] = h->ref_count[list];
983 for(j=0; j<h->ref_count[list]; j++)
984 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
987 if(s->picture_structure == PICT_FRAME){
988 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
989 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
992 cur->mbaff= FRAME_MBAFF;
994 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
995 return;
997 for(list=0; list<2; list++){
998 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
999 for(field=0; field<2; field++)
1000 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
1004 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1005 MpegEncContext * const s = &h->s;
1006 int b8_stride = h->b8_stride;
1007 int b4_stride = h->b_stride;
1008 int mb_xy = h->mb_xy;
1009 int mb_type_col[2];
1010 const int16_t (*l1mv0)[2], (*l1mv1)[2];
1011 const int8_t *l1ref0, *l1ref1;
1012 const int is_b8x8 = IS_8X8(*mb_type);
1013 unsigned int sub_mb_type;
1014 int i8, i4;
1016 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1018 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1019 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1020 int cur_poc = s->current_picture_ptr->poc;
1021 int *col_poc = h->ref_list[1]->field_poc;
1022 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1023 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1024 b8_stride = 0;
1025 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1026 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1027 mb_xy += s->mb_stride*fieldoff;
1029 goto single_col;
1030 }else{ // AFL/AFR/FR/FL -> AFR/FR
1031 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1032 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1033 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1034 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1035 b8_stride *= 3;
1036 b4_stride *= 6;
1037 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1038 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1039 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1040 && !is_b8x8){
1041 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1042 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1043 }else{
1044 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1045 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1047 }else{ // AFR/FR -> AFR/FR
1048 single_col:
1049 mb_type_col[0] =
1050 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1051 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1052 /* FIXME save sub mb types from previous frames (or derive from MVs)
1053 * so we know exactly what block size to use */
1054 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1055 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1056 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1057 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1058 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1059 }else{
1060 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1061 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1066 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1067 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1068 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1069 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1070 if(!b8_stride){
1071 if(s->mb_y&1){
1072 l1ref0 += h->b8_stride;
1073 l1ref1 += h->b8_stride;
1074 l1mv0 += 2*b4_stride;
1075 l1mv1 += 2*b4_stride;
1079 if(h->direct_spatial_mv_pred){
1080 int ref[2];
1081 int mv[2][2];
1082 int list;
1084 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1086 /* ref = min(neighbors) */
1087 for(list=0; list<2; list++){
1088 int refa = h->ref_cache[list][scan8[0] - 1];
1089 int refb = h->ref_cache[list][scan8[0] - 8];
1090 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1091 if(refc == PART_NOT_AVAILABLE)
1092 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1093 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1094 if(ref[list] < 0)
1095 ref[list] = -1;
1098 if(ref[0] < 0 && ref[1] < 0){
1099 ref[0] = ref[1] = 0;
1100 mv[0][0] = mv[0][1] =
1101 mv[1][0] = mv[1][1] = 0;
1102 }else{
1103 for(list=0; list<2; list++){
1104 if(ref[list] >= 0)
1105 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1106 else
1107 mv[list][0] = mv[list][1] = 0;
1111 if(ref[1] < 0){
1112 if(!is_b8x8)
1113 *mb_type &= ~MB_TYPE_L1;
1114 sub_mb_type &= ~MB_TYPE_L1;
1115 }else if(ref[0] < 0){
1116 if(!is_b8x8)
1117 *mb_type &= ~MB_TYPE_L0;
1118 sub_mb_type &= ~MB_TYPE_L0;
1121 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1122 for(i8=0; i8<4; i8++){
1123 int x8 = i8&1;
1124 int y8 = i8>>1;
1125 int xy8 = x8+y8*b8_stride;
1126 int xy4 = 3*x8+y8*b4_stride;
1127 int a=0, b=0;
1129 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1130 continue;
1131 h->sub_mb_type[i8] = sub_mb_type;
1133 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1134 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1135 if(!IS_INTRA(mb_type_col[y8])
1136 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1137 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1138 if(ref[0] > 0)
1139 a= pack16to32(mv[0][0],mv[0][1]);
1140 if(ref[1] > 0)
1141 b= pack16to32(mv[1][0],mv[1][1]);
1142 }else{
1143 a= pack16to32(mv[0][0],mv[0][1]);
1144 b= pack16to32(mv[1][0],mv[1][1]);
1146 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1147 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1149 }else if(IS_16X16(*mb_type)){
1150 int a=0, b=0;
1152 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1153 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1154 if(!IS_INTRA(mb_type_col[0])
1155 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1156 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1157 && (h->x264_build>33 || !h->x264_build)))){
1158 if(ref[0] > 0)
1159 a= pack16to32(mv[0][0],mv[0][1]);
1160 if(ref[1] > 0)
1161 b= pack16to32(mv[1][0],mv[1][1]);
1162 }else{
1163 a= pack16to32(mv[0][0],mv[0][1]);
1164 b= pack16to32(mv[1][0],mv[1][1]);
1166 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1167 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1168 }else{
1169 for(i8=0; i8<4; i8++){
1170 const int x8 = i8&1;
1171 const int y8 = i8>>1;
1173 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1174 continue;
1175 h->sub_mb_type[i8] = sub_mb_type;
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1178 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1179 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1180 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1182 /* col_zero_flag */
1183 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1184 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1185 && (h->x264_build>33 || !h->x264_build)))){
1186 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1187 if(IS_SUB_8X8(sub_mb_type)){
1188 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1189 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1190 if(ref[0] == 0)
1191 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1192 if(ref[1] == 0)
1193 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1195 }else
1196 for(i4=0; i4<4; i4++){
1197 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1198 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1199 if(ref[0] == 0)
1200 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1201 if(ref[1] == 0)
1202 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1208 }else{ /* direct temporal mv pred */
1209 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1210 const int *dist_scale_factor = h->dist_scale_factor;
1211 int ref_offset= 0;
1213 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1214 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1215 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1216 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1218 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1219 ref_offset += 16;
1221 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1222 /* FIXME assumes direct_8x8_inference == 1 */
1223 int y_shift = 2*!IS_INTERLACED(*mb_type);
1225 for(i8=0; i8<4; i8++){
1226 const int x8 = i8&1;
1227 const int y8 = i8>>1;
1228 int ref0, scale;
1229 const int16_t (*l1mv)[2]= l1mv0;
1231 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1232 continue;
1233 h->sub_mb_type[i8] = sub_mb_type;
1235 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1236 if(IS_INTRA(mb_type_col[y8])){
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1238 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1239 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1240 continue;
1243 ref0 = l1ref0[x8 + y8*b8_stride];
1244 if(ref0 >= 0)
1245 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1246 else{
1247 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1248 l1mv= l1mv1;
1250 scale = dist_scale_factor[ref0];
1251 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1254 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1255 int my_col = (mv_col[1]<<y_shift)/2;
1256 int mx = (scale * mv_col[0] + 128) >> 8;
1257 int my = (scale * my_col + 128) >> 8;
1258 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1259 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1262 return;
1265 /* one-to-one mv scaling */
1267 if(IS_16X16(*mb_type)){
1268 int ref, mv0, mv1;
1270 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1271 if(IS_INTRA(mb_type_col[0])){
1272 ref=mv0=mv1=0;
1273 }else{
1274 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1275 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1276 const int scale = dist_scale_factor[ref0];
1277 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1278 int mv_l0[2];
1279 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1280 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1281 ref= ref0;
1282 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1283 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1285 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1286 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1287 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1288 }else{
1289 for(i8=0; i8<4; i8++){
1290 const int x8 = i8&1;
1291 const int y8 = i8>>1;
1292 int ref0, scale;
1293 const int16_t (*l1mv)[2]= l1mv0;
1295 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1296 continue;
1297 h->sub_mb_type[i8] = sub_mb_type;
1298 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1299 if(IS_INTRA(mb_type_col[0])){
1300 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1301 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1302 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1303 continue;
1306 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1307 if(ref0 >= 0)
1308 ref0 = map_col_to_list0[0][ref0];
1309 else{
1310 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1311 l1mv= l1mv1;
1313 scale = dist_scale_factor[ref0];
1315 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1316 if(IS_SUB_8X8(sub_mb_type)){
1317 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1318 int mx = (scale * mv_col[0] + 128) >> 8;
1319 int my = (scale * mv_col[1] + 128) >> 8;
1320 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1321 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1322 }else
1323 for(i4=0; i4<4; i4++){
1324 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1325 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1326 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1327 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1328 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1329 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1336 static inline void write_back_motion(H264Context *h, int mb_type){
1337 MpegEncContext * const s = &h->s;
1338 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1339 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1340 int list;
1342 if(!USES_LIST(mb_type, 0))
1343 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1345 for(list=0; list<h->list_count; list++){
1346 int y;
1347 if(!USES_LIST(mb_type, list))
1348 continue;
1350 for(y=0; y<4; y++){
1351 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1352 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1354 if( h->pps.cabac ) {
1355 if(IS_SKIP(mb_type))
1356 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1357 else
1358 for(y=0; y<4; y++){
1359 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1360 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1365 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1366 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1367 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1368 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1369 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1373 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1374 if(IS_8X8(mb_type)){
1375 uint8_t *direct_table = &h->direct_table[b8_xy];
1376 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1377 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1378 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1384 * Decodes a network abstraction layer unit.
1385 * @param consumed is the number of bytes used as input
1386 * @param length is the length of the array
1387 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1388 * @returns decoded bytes, might be src+1 if no escapes
1390 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1391 int i, si, di;
1392 uint8_t *dst;
1393 int bufidx;
1395 // src[0]&0x80; //forbidden bit
1396 h->nal_ref_idc= src[0]>>5;
1397 h->nal_unit_type= src[0]&0x1F;
1399 src++; length--;
1400 #if 0
1401 for(i=0; i<length; i++)
1402 printf("%2X ", src[i]);
1403 #endif
1404 for(i=0; i+1<length; i+=2){
1405 if(src[i]) continue;
1406 if(i>0 && src[i-1]==0) i--;
1407 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1408 if(src[i+2]!=3){
1409 /* startcode, so we must be past the end */
1410 length=i;
1412 break;
1416 if(i>=length-1){ //no escaped 0
1417 *dst_length= length;
1418 *consumed= length+1; //+1 for the header
1419 return src;
1422 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1423 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1424 dst= h->rbsp_buffer[bufidx];
1426 if (dst == NULL){
1427 return NULL;
1430 //printf("decoding esc\n");
1431 si=di=0;
1432 while(si<length){
1433 //remove escapes (very rare 1:2^22)
1434 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1435 if(src[si+2]==3){ //escape
1436 dst[di++]= 0;
1437 dst[di++]= 0;
1438 si+=3;
1439 continue;
1440 }else //next start code
1441 break;
1444 dst[di++]= src[si++];
1447 *dst_length= di;
1448 *consumed= si + 1;//+1 for the header
1449 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1450 return dst;
1454 * identifies the exact end of the bitstream
1455 * @return the length of the trailing, or 0 if damaged
1457 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1458 int v= *src;
1459 int r;
1461 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1463 for(r=1; r<9; r++){
1464 if(v&1) return r;
1465 v>>=1;
1467 return 0;
1471 * IDCT transforms the 16 dc values and dequantizes them.
1472 * @param qp quantization parameter
1474 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1475 #define stride 16
1476 int i;
1477 int temp[16]; //FIXME check if this is a good idea
1478 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1479 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1481 //memset(block, 64, 2*256);
1482 //return;
1483 for(i=0; i<4; i++){
1484 const int offset= y_offset[i];
1485 const int z0= block[offset+stride*0] + block[offset+stride*4];
1486 const int z1= block[offset+stride*0] - block[offset+stride*4];
1487 const int z2= block[offset+stride*1] - block[offset+stride*5];
1488 const int z3= block[offset+stride*1] + block[offset+stride*5];
1490 temp[4*i+0]= z0+z3;
1491 temp[4*i+1]= z1+z2;
1492 temp[4*i+2]= z1-z2;
1493 temp[4*i+3]= z0-z3;
1496 for(i=0; i<4; i++){
1497 const int offset= x_offset[i];
1498 const int z0= temp[4*0+i] + temp[4*2+i];
1499 const int z1= temp[4*0+i] - temp[4*2+i];
1500 const int z2= temp[4*1+i] - temp[4*3+i];
1501 const int z3= temp[4*1+i] + temp[4*3+i];
1503 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1504 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1505 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1506 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1510 #if 0
1512 * DCT transforms the 16 dc values.
1513 * @param qp quantization parameter ??? FIXME
1515 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1516 // const int qmul= dequant_coeff[qp][0];
1517 int i;
1518 int temp[16]; //FIXME check if this is a good idea
1519 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1520 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1522 for(i=0; i<4; i++){
1523 const int offset= y_offset[i];
1524 const int z0= block[offset+stride*0] + block[offset+stride*4];
1525 const int z1= block[offset+stride*0] - block[offset+stride*4];
1526 const int z2= block[offset+stride*1] - block[offset+stride*5];
1527 const int z3= block[offset+stride*1] + block[offset+stride*5];
1529 temp[4*i+0]= z0+z3;
1530 temp[4*i+1]= z1+z2;
1531 temp[4*i+2]= z1-z2;
1532 temp[4*i+3]= z0-z3;
1535 for(i=0; i<4; i++){
1536 const int offset= x_offset[i];
1537 const int z0= temp[4*0+i] + temp[4*2+i];
1538 const int z1= temp[4*0+i] - temp[4*2+i];
1539 const int z2= temp[4*1+i] - temp[4*3+i];
1540 const int z3= temp[4*1+i] + temp[4*3+i];
1542 block[stride*0 +offset]= (z0 + z3)>>1;
1543 block[stride*2 +offset]= (z1 + z2)>>1;
1544 block[stride*8 +offset]= (z1 - z2)>>1;
1545 block[stride*10+offset]= (z0 - z3)>>1;
1548 #endif
1550 #undef xStride
1551 #undef stride
1553 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1554 const int stride= 16*2;
1555 const int xStride= 16;
1556 int a,b,c,d,e;
1558 a= block[stride*0 + xStride*0];
1559 b= block[stride*0 + xStride*1];
1560 c= block[stride*1 + xStride*0];
1561 d= block[stride*1 + xStride*1];
1563 e= a-b;
1564 a= a+b;
1565 b= c-d;
1566 c= c+d;
1568 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1569 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1570 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1571 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1574 #if 0
1575 static void chroma_dc_dct_c(DCTELEM *block){
1576 const int stride= 16*2;
1577 const int xStride= 16;
1578 int a,b,c,d,e;
1580 a= block[stride*0 + xStride*0];
1581 b= block[stride*0 + xStride*1];
1582 c= block[stride*1 + xStride*0];
1583 d= block[stride*1 + xStride*1];
1585 e= a-b;
1586 a= a+b;
1587 b= c-d;
1588 c= c+d;
1590 block[stride*0 + xStride*0]= (a+c);
1591 block[stride*0 + xStride*1]= (e+b);
1592 block[stride*1 + xStride*0]= (a-c);
1593 block[stride*1 + xStride*1]= (e-b);
1595 #endif
1598 * gets the chroma qp.
1600 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1601 return h->pps.chroma_qp_table[t][qscale];
1604 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1605 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1606 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1607 int i;
1608 const int * const quant_table= quant_coeff[qscale];
1609 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1610 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1611 const unsigned int threshold2= (threshold1<<1);
1612 int last_non_zero;
1614 if(separate_dc){
1615 if(qscale<=18){
1616 //avoid overflows
1617 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1618 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1619 const unsigned int dc_threshold2= (dc_threshold1<<1);
1621 int level= block[0]*quant_coeff[qscale+18][0];
1622 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1623 if(level>0){
1624 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1625 block[0]= level;
1626 }else{
1627 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1628 block[0]= -level;
1630 // last_non_zero = i;
1631 }else{
1632 block[0]=0;
1634 }else{
1635 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1636 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1637 const unsigned int dc_threshold2= (dc_threshold1<<1);
1639 int level= block[0]*quant_table[0];
1640 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1641 if(level>0){
1642 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1643 block[0]= level;
1644 }else{
1645 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1646 block[0]= -level;
1648 // last_non_zero = i;
1649 }else{
1650 block[0]=0;
1653 last_non_zero= 0;
1654 i=1;
1655 }else{
1656 last_non_zero= -1;
1657 i=0;
1660 for(; i<16; i++){
1661 const int j= scantable[i];
1662 int level= block[j]*quant_table[j];
1664 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1665 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1666 if(((unsigned)(level+threshold1))>threshold2){
1667 if(level>0){
1668 level= (bias + level)>>QUANT_SHIFT;
1669 block[j]= level;
1670 }else{
1671 level= (bias - level)>>QUANT_SHIFT;
1672 block[j]= -level;
1674 last_non_zero = i;
1675 }else{
1676 block[j]=0;
1680 return last_non_zero;
1683 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1684 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1685 int src_x_offset, int src_y_offset,
1686 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1687 MpegEncContext * const s = &h->s;
1688 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1689 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1690 const int luma_xy= (mx&3) + ((my&3)<<2);
1691 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1692 uint8_t * src_cb, * src_cr;
1693 int extra_width= h->emu_edge_width;
1694 int extra_height= h->emu_edge_height;
1695 int emu=0;
1696 const int full_mx= mx>>2;
1697 const int full_my= my>>2;
1698 const int pic_width = 16*s->mb_width;
1699 const int pic_height = 16*s->mb_height >> MB_FIELD;
1701 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1702 return;
1704 if(mx&7) extra_width -= 3;
1705 if(my&7) extra_height -= 3;
1707 if( full_mx < 0-extra_width
1708 || full_my < 0-extra_height
1709 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1710 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1711 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1712 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1713 emu=1;
1716 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1717 if(!square){
1718 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1721 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1723 if(MB_FIELD){
1724 // chroma offset when predicting from a field of opposite parity
1725 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1726 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1728 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1729 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1731 if(emu){
1732 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1733 src_cb= s->edge_emu_buffer;
1735 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1737 if(emu){
1738 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1739 src_cr= s->edge_emu_buffer;
1741 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1744 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1745 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1746 int x_offset, int y_offset,
1747 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1748 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1749 int list0, int list1){
1750 MpegEncContext * const s = &h->s;
1751 qpel_mc_func *qpix_op= qpix_put;
1752 h264_chroma_mc_func chroma_op= chroma_put;
1754 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1755 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1756 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1757 x_offset += 8*s->mb_x;
1758 y_offset += 8*(s->mb_y >> MB_FIELD);
1760 if(list0){
1761 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1762 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1763 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1764 qpix_op, chroma_op);
1766 qpix_op= qpix_avg;
1767 chroma_op= chroma_avg;
1770 if(list1){
1771 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1772 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1773 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1774 qpix_op, chroma_op);
1778 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1779 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1780 int x_offset, int y_offset,
1781 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1782 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1783 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1784 int list0, int list1){
1785 MpegEncContext * const s = &h->s;
1787 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1788 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1789 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1790 x_offset += 8*s->mb_x;
1791 y_offset += 8*(s->mb_y >> MB_FIELD);
1793 if(list0 && list1){
1794 /* don't optimize for luma-only case, since B-frames usually
1795 * use implicit weights => chroma too. */
1796 uint8_t *tmp_cb = s->obmc_scratchpad;
1797 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1798 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1799 int refn0 = h->ref_cache[0][ scan8[n] ];
1800 int refn1 = h->ref_cache[1][ scan8[n] ];
1802 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1803 dest_y, dest_cb, dest_cr,
1804 x_offset, y_offset, qpix_put, chroma_put);
1805 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1806 tmp_y, tmp_cb, tmp_cr,
1807 x_offset, y_offset, qpix_put, chroma_put);
1809 if(h->use_weight == 2){
1810 int weight0 = h->implicit_weight[refn0][refn1];
1811 int weight1 = 64 - weight0;
1812 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1813 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1814 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1815 }else{
1816 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1817 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1818 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1819 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1820 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1821 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1822 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1823 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1824 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1826 }else{
1827 int list = list1 ? 1 : 0;
1828 int refn = h->ref_cache[list][ scan8[n] ];
1829 Picture *ref= &h->ref_list[list][refn];
1830 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1831 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1832 qpix_put, chroma_put);
1834 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1835 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1836 if(h->use_weight_chroma){
1837 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1838 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1839 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1840 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1845 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1846 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1847 int x_offset, int y_offset,
1848 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1849 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1850 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1851 int list0, int list1){
1852 if((h->use_weight==2 && list0 && list1
1853 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1854 || h->use_weight==1)
1855 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1856 x_offset, y_offset, qpix_put, chroma_put,
1857 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1858 else
1859 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1860 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1863 static inline void prefetch_motion(H264Context *h, int list){
1864 /* fetch pixels for estimated mv 4 macroblocks ahead
1865 * optimized for 64byte cache lines */
1866 MpegEncContext * const s = &h->s;
1867 const int refn = h->ref_cache[list][scan8[0]];
1868 if(refn >= 0){
1869 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1870 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1871 uint8_t **src= h->ref_list[list][refn].data;
1872 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1873 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1874 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1875 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1879 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1880 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1881 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1882 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1883 MpegEncContext * const s = &h->s;
1884 const int mb_xy= h->mb_xy;
1885 const int mb_type= s->current_picture.mb_type[mb_xy];
1887 assert(IS_INTER(mb_type));
1889 prefetch_motion(h, 0);
1891 if(IS_16X16(mb_type)){
1892 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1893 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1894 &weight_op[0], &weight_avg[0],
1895 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1896 }else if(IS_16X8(mb_type)){
1897 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1898 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1899 &weight_op[1], &weight_avg[1],
1900 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1901 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1902 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1903 &weight_op[1], &weight_avg[1],
1904 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1905 }else if(IS_8X16(mb_type)){
1906 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1907 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1908 &weight_op[2], &weight_avg[2],
1909 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1910 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1911 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1912 &weight_op[2], &weight_avg[2],
1913 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1914 }else{
1915 int i;
1917 assert(IS_8X8(mb_type));
1919 for(i=0; i<4; i++){
1920 const int sub_mb_type= h->sub_mb_type[i];
1921 const int n= 4*i;
1922 int x_offset= (i&1)<<2;
1923 int y_offset= (i&2)<<1;
1925 if(IS_SUB_8X8(sub_mb_type)){
1926 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1927 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1928 &weight_op[3], &weight_avg[3],
1929 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1930 }else if(IS_SUB_8X4(sub_mb_type)){
1931 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1932 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1933 &weight_op[4], &weight_avg[4],
1934 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1935 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1936 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1937 &weight_op[4], &weight_avg[4],
1938 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1939 }else if(IS_SUB_4X8(sub_mb_type)){
1940 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1941 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1942 &weight_op[5], &weight_avg[5],
1943 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1944 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1945 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1946 &weight_op[5], &weight_avg[5],
1947 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1948 }else{
1949 int j;
1950 assert(IS_SUB_4X4(sub_mb_type));
1951 for(j=0; j<4; j++){
1952 int sub_x_offset= x_offset + 2*(j&1);
1953 int sub_y_offset= y_offset + (j&2);
1954 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1955 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1956 &weight_op[6], &weight_avg[6],
1957 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1963 prefetch_motion(h, 1);
1966 static av_cold void decode_init_vlc(void){
1967 static int done = 0;
1969 if (!done) {
1970 int i;
1971 int offset;
1972 done = 1;
1974 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1975 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1976 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1977 &chroma_dc_coeff_token_len [0], 1, 1,
1978 &chroma_dc_coeff_token_bits[0], 1, 1,
1979 INIT_VLC_USE_NEW_STATIC);
1981 offset = 0;
1982 for(i=0; i<4; i++){
1983 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1984 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1985 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1986 &coeff_token_len [i][0], 1, 1,
1987 &coeff_token_bits[i][0], 1, 1,
1988 INIT_VLC_USE_NEW_STATIC);
1989 offset += coeff_token_vlc_tables_size[i];
1992 * This is a one time safety check to make sure that
1993 * the packed static coeff_token_vlc table sizes
1994 * were initialized correctly.
1996 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1998 for(i=0; i<3; i++){
1999 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
2000 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
2001 init_vlc(&chroma_dc_total_zeros_vlc[i],
2002 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2003 &chroma_dc_total_zeros_len [i][0], 1, 1,
2004 &chroma_dc_total_zeros_bits[i][0], 1, 1,
2005 INIT_VLC_USE_NEW_STATIC);
2007 for(i=0; i<15; i++){
2008 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
2009 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
2010 init_vlc(&total_zeros_vlc[i],
2011 TOTAL_ZEROS_VLC_BITS, 16,
2012 &total_zeros_len [i][0], 1, 1,
2013 &total_zeros_bits[i][0], 1, 1,
2014 INIT_VLC_USE_NEW_STATIC);
2017 for(i=0; i<6; i++){
2018 run_vlc[i].table = run_vlc_tables[i];
2019 run_vlc[i].table_allocated = run_vlc_tables_size;
2020 init_vlc(&run_vlc[i],
2021 RUN_VLC_BITS, 7,
2022 &run_len [i][0], 1, 1,
2023 &run_bits[i][0], 1, 1,
2024 INIT_VLC_USE_NEW_STATIC);
2026 run7_vlc.table = run7_vlc_table,
2027 run7_vlc.table_allocated = run7_vlc_table_size;
2028 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2029 &run_len [6][0], 1, 1,
2030 &run_bits[6][0], 1, 1,
2031 INIT_VLC_USE_NEW_STATIC);
2035 static void free_tables(H264Context *h){
2036 int i;
2037 H264Context *hx;
2038 av_freep(&h->intra4x4_pred_mode);
2039 av_freep(&h->chroma_pred_mode_table);
2040 av_freep(&h->cbp_table);
2041 av_freep(&h->mvd_table[0]);
2042 av_freep(&h->mvd_table[1]);
2043 av_freep(&h->direct_table);
2044 av_freep(&h->non_zero_count);
2045 av_freep(&h->slice_table_base);
2046 h->slice_table= NULL;
2048 av_freep(&h->mb2b_xy);
2049 av_freep(&h->mb2b8_xy);
2051 for(i = 0; i < h->s.avctx->thread_count; i++) {
2052 hx = h->thread_context[i];
2053 if(!hx) continue;
2054 av_freep(&hx->top_borders[1]);
2055 av_freep(&hx->top_borders[0]);
2056 av_freep(&hx->s.obmc_scratchpad);
2060 static void init_dequant8_coeff_table(H264Context *h){
2061 int i,q,x;
2062 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2063 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2064 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2066 for(i=0; i<2; i++ ){
2067 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2068 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2069 break;
2072 for(q=0; q<52; q++){
2073 int shift = div6[q];
2074 int idx = rem6[q];
2075 for(x=0; x<64; x++)
2076 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2077 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2078 h->pps.scaling_matrix8[i][x]) << shift;
2083 static void init_dequant4_coeff_table(H264Context *h){
2084 int i,j,q,x;
2085 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2086 for(i=0; i<6; i++ ){
2087 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2088 for(j=0; j<i; j++){
2089 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2090 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2091 break;
2094 if(j<i)
2095 continue;
2097 for(q=0; q<52; q++){
2098 int shift = div6[q] + 2;
2099 int idx = rem6[q];
2100 for(x=0; x<16; x++)
2101 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2102 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2103 h->pps.scaling_matrix4[i][x]) << shift;
2108 static void init_dequant_tables(H264Context *h){
2109 int i,x;
2110 init_dequant4_coeff_table(h);
2111 if(h->pps.transform_8x8_mode)
2112 init_dequant8_coeff_table(h);
2113 if(h->sps.transform_bypass){
2114 for(i=0; i<6; i++)
2115 for(x=0; x<16; x++)
2116 h->dequant4_coeff[i][0][x] = 1<<6;
2117 if(h->pps.transform_8x8_mode)
2118 for(i=0; i<2; i++)
2119 for(x=0; x<64; x++)
2120 h->dequant8_coeff[i][0][x] = 1<<6;
2126 * allocates tables.
2127 * needs width/height
2129 static int alloc_tables(H264Context *h){
2130 MpegEncContext * const s = &h->s;
2131 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2132 int x,y;
2134 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2136 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2137 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2138 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2140 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2141 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2142 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2143 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2145 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
2146 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2148 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2149 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2150 for(y=0; y<s->mb_height; y++){
2151 for(x=0; x<s->mb_width; x++){
2152 const int mb_xy= x + y*s->mb_stride;
2153 const int b_xy = 4*x + 4*y*h->b_stride;
2154 const int b8_xy= 2*x + 2*y*h->b8_stride;
2156 h->mb2b_xy [mb_xy]= b_xy;
2157 h->mb2b8_xy[mb_xy]= b8_xy;
2161 s->obmc_scratchpad = NULL;
2163 if(!h->dequant4_coeff[0])
2164 init_dequant_tables(h);
2166 return 0;
2167 fail:
2168 free_tables(h);
2169 return -1;
2173 * Mimic alloc_tables(), but for every context thread.
2175 static void clone_tables(H264Context *dst, H264Context *src){
2176 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2177 dst->non_zero_count = src->non_zero_count;
2178 dst->slice_table = src->slice_table;
2179 dst->cbp_table = src->cbp_table;
2180 dst->mb2b_xy = src->mb2b_xy;
2181 dst->mb2b8_xy = src->mb2b8_xy;
2182 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2183 dst->mvd_table[0] = src->mvd_table[0];
2184 dst->mvd_table[1] = src->mvd_table[1];
2185 dst->direct_table = src->direct_table;
2187 dst->s.obmc_scratchpad = NULL;
2188 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2192 * Init context
2193 * Allocate buffers which are not shared amongst multiple threads.
2195 static int context_init(H264Context *h){
2196 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2197 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2199 return 0;
2200 fail:
2201 return -1; // free_tables will clean up for us
2204 static av_cold void common_init(H264Context *h){
2205 MpegEncContext * const s = &h->s;
2207 s->width = s->avctx->width;
2208 s->height = s->avctx->height;
2209 s->codec_id= s->avctx->codec->id;
2211 ff_h264_pred_init(&h->hpc, s->codec_id);
2213 h->dequant_coeff_pps= -1;
2214 s->unrestricted_mv=1;
2215 s->decode=1; //FIXME
2217 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2218 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2221 static av_cold int decode_init(AVCodecContext *avctx){
2222 H264Context *h= avctx->priv_data;
2223 MpegEncContext * const s = &h->s;
2225 MPV_decode_defaults(s);
2227 s->avctx = avctx;
2228 common_init(h);
2230 s->out_format = FMT_H264;
2231 s->workaround_bugs= avctx->workaround_bugs;
2233 // set defaults
2234 // s->decode_mb= ff_h263_decode_mb;
2235 s->quarter_sample = 1;
2236 s->low_delay= 1;
2238 // Set in decode_postinit() once initial parsing is complete
2239 avctx->pix_fmt = PIX_FMT_NONE;
2241 decode_init_vlc();
2243 if(avctx->extradata_size > 0 && avctx->extradata &&
2244 *(char *)avctx->extradata == 1){
2245 h->is_avc = 1;
2246 h->got_avcC = 0;
2247 } else {
2248 h->is_avc = 0;
2251 h->thread_context[0] = h;
2252 h->outputed_poc = INT_MIN;
2253 h->prev_poc_msb= 1<<16;
2254 return 0;
2257 static int decode_postinit(H264Context *h, SPS *sps){
2258 AVCodecContext * const avctx= h->s.avctx;
2260 if (avctx->pix_fmt != PIX_FMT_NONE){
2261 return 0;
2264 if (avctx->vdpau_acceleration) {
2265 if(h->s.chroma_format >= 2) {
2266 return -2;
2268 if (sps->profile_idc == 66) {
2269 avctx->pix_fmt = avctx->get_format(avctx, pixfmt_vdpau_h264_baseline_420);
2270 } else if (sps->profile_idc == 77) {
2271 avctx->pix_fmt = avctx->get_format(avctx, pixfmt_vdpau_h264_main_420);
2272 } else if (sps->profile_idc == 100) {
2273 avctx->pix_fmt = avctx->get_format(avctx, pixfmt_vdpau_h264_high_420);
2274 } else {
2275 return -2;
2277 } else if (avctx->codec_id == CODEC_ID_SVQ3) {
2278 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2279 } else {
2280 avctx->pix_fmt= PIX_FMT_YUV420P;
2283 return 0;
2286 static int frame_start(H264Context *h){
2287 MpegEncContext * const s = &h->s;
2288 int i;
2290 #ifdef HAVE_VDPAU
2291 ff_VDPAU_h264_set_reference_frames_count(h);
2292 #endif
2294 if(MPV_frame_start(s, s->avctx) < 0)
2295 return -1;
2296 ff_er_frame_start(s);
2298 * MPV_frame_start uses pict_type to derive key_frame.
2299 * This is incorrect for H.264; IDR markings must be used.
2300 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2301 * See decode_nal_units().
2303 s->current_picture_ptr->key_frame= 0;
2305 assert(s->linesize && s->uvlinesize);
2307 for(i=0; i<16; i++){
2308 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2309 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2311 for(i=0; i<4; i++){
2312 h->block_offset[16+i]=
2313 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2314 h->block_offset[24+16+i]=
2315 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2318 /* can't be in alloc_tables because linesize isn't known there.
2319 * FIXME: redo bipred weight to not require extra buffer? */
2320 for(i = 0; i < s->avctx->thread_count; i++)
2321 if(!h->thread_context[i]->s.obmc_scratchpad)
2322 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2324 /* some macroblocks will be accessed before they're available */
2325 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2326 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2328 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2330 // We mark the current picture as non-reference after allocating it, so
2331 // that if we break out due to an error it can be released automatically
2332 // in the next MPV_frame_start().
2333 // SVQ3 as well as most other codecs have only last/next/current and thus
2334 // get released even with set reference, besides SVQ3 and others do not
2335 // mark frames as reference later "naturally".
2336 if(s->codec_id != CODEC_ID_SVQ3)
2337 s->current_picture_ptr->reference= 0;
2339 s->current_picture_ptr->field_poc[0]=
2340 s->current_picture_ptr->field_poc[1]= INT_MAX;
2341 assert(s->current_picture_ptr->long_ref==0);
2343 return 0;
2346 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2347 MpegEncContext * const s = &h->s;
2348 int i;
2349 int step = 1;
2350 int offset = 1;
2351 int uvoffset= 1;
2352 int top_idx = 1;
2353 int skiplast= 0;
2355 src_y -= linesize;
2356 src_cb -= uvlinesize;
2357 src_cr -= uvlinesize;
2359 if(!simple && FRAME_MBAFF){
2360 if(s->mb_y&1){
2361 offset = MB_MBAFF ? 1 : 17;
2362 uvoffset= MB_MBAFF ? 1 : 9;
2363 if(!MB_MBAFF){
2364 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2365 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2366 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2367 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2368 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2371 }else{
2372 if(!MB_MBAFF){
2373 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2374 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2375 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2376 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2378 skiplast= 1;
2380 offset =
2381 uvoffset=
2382 top_idx = MB_MBAFF ? 0 : 1;
2384 step= MB_MBAFF ? 2 : 1;
2387 // There are two lines saved, the line above the the top macroblock of a pair,
2388 // and the line above the bottom macroblock
2389 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2390 for(i=1; i<17 - skiplast; i++){
2391 h->left_border[offset+i*step]= src_y[15+i* linesize];
2394 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2395 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2397 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2398 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2399 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2400 for(i=1; i<9 - skiplast; i++){
2401 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2402 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2404 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2405 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2409 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2410 MpegEncContext * const s = &h->s;
2411 int temp8, i;
2412 uint64_t temp64;
2413 int deblock_left;
2414 int deblock_top;
2415 int mb_xy;
2416 int step = 1;
2417 int offset = 1;
2418 int uvoffset= 1;
2419 int top_idx = 1;
2421 if(!simple && FRAME_MBAFF){
2422 if(s->mb_y&1){
2423 offset = MB_MBAFF ? 1 : 17;
2424 uvoffset= MB_MBAFF ? 1 : 9;
2425 }else{
2426 offset =
2427 uvoffset=
2428 top_idx = MB_MBAFF ? 0 : 1;
2430 step= MB_MBAFF ? 2 : 1;
2433 if(h->deblocking_filter == 2) {
2434 mb_xy = h->mb_xy;
2435 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2436 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2437 } else {
2438 deblock_left = (s->mb_x > 0);
2439 deblock_top = (s->mb_y > !!MB_FIELD);
2442 src_y -= linesize + 1;
2443 src_cb -= uvlinesize + 1;
2444 src_cr -= uvlinesize + 1;
2446 #define XCHG(a,b,t,xchg)\
2447 t= a;\
2448 if(xchg)\
2449 a= b;\
2450 b= t;
2452 if(deblock_left){
2453 for(i = !deblock_top; i<16; i++){
2454 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2456 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2459 if(deblock_top){
2460 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2461 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2462 if(s->mb_x+1 < s->mb_width){
2463 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2467 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2468 if(deblock_left){
2469 for(i = !deblock_top; i<8; i++){
2470 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2471 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2473 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2474 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2476 if(deblock_top){
2477 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2478 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2483 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2484 MpegEncContext * const s = &h->s;
2485 const int mb_x= s->mb_x;
2486 const int mb_y= s->mb_y;
2487 const int mb_xy= h->mb_xy;
2488 const int mb_type= s->current_picture.mb_type[mb_xy];
2489 uint8_t *dest_y, *dest_cb, *dest_cr;
2490 int linesize, uvlinesize /*dct_offset*/;
2491 int i;
2492 int *block_offset = &h->block_offset[0];
2493 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2494 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2495 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2497 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2498 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2499 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2501 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2502 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2504 if (!simple && MB_FIELD) {
2505 linesize = h->mb_linesize = s->linesize * 2;
2506 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2507 block_offset = &h->block_offset[24];
2508 if(mb_y&1){ //FIXME move out of this function?
2509 dest_y -= s->linesize*15;
2510 dest_cb-= s->uvlinesize*7;
2511 dest_cr-= s->uvlinesize*7;
2513 if(FRAME_MBAFF) {
2514 int list;
2515 for(list=0; list<h->list_count; list++){
2516 if(!USES_LIST(mb_type, list))
2517 continue;
2518 if(IS_16X16(mb_type)){
2519 int8_t *ref = &h->ref_cache[list][scan8[0]];
2520 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2521 }else{
2522 for(i=0; i<16; i+=4){
2523 int ref = h->ref_cache[list][scan8[i]];
2524 if(ref >= 0)
2525 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2530 } else {
2531 linesize = h->mb_linesize = s->linesize;
2532 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2533 // dct_offset = s->linesize * 16;
2536 if(transform_bypass){
2537 idct_dc_add =
2538 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2539 }else if(IS_8x8DCT(mb_type)){
2540 idct_dc_add = s->dsp.h264_idct8_dc_add;
2541 idct_add = s->dsp.h264_idct8_add;
2542 }else{
2543 idct_dc_add = s->dsp.h264_idct_dc_add;
2544 idct_add = s->dsp.h264_idct_add;
2547 if (!simple && IS_INTRA_PCM(mb_type)) {
2548 for (i=0; i<16; i++) {
2549 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2551 for (i=0; i<8; i++) {
2552 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2553 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2555 } else {
2556 if(IS_INTRA(mb_type)){
2557 if(h->deblocking_filter)
2558 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2560 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2561 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2562 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2565 if(IS_INTRA4x4(mb_type)){
2566 if(simple || !s->encoding){
2567 if(IS_8x8DCT(mb_type)){
2568 for(i=0; i<16; i+=4){
2569 uint8_t * const ptr= dest_y + block_offset[i];
2570 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2571 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2572 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2573 (h->topright_samples_available<<i)&0x4000, linesize);
2574 if(nnz){
2575 if(nnz == 1 && h->mb[i*16])
2576 idct_dc_add(ptr, h->mb + i*16, linesize);
2577 else
2578 idct_add(ptr, h->mb + i*16, linesize);
2581 }else
2582 for(i=0; i<16; i++){
2583 uint8_t * const ptr= dest_y + block_offset[i];
2584 uint8_t *topright;
2585 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2586 int nnz, tr;
2588 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2589 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2590 assert(mb_y || linesize <= block_offset[i]);
2591 if(!topright_avail){
2592 tr= ptr[3 - linesize]*0x01010101;
2593 topright= (uint8_t*) &tr;
2594 }else
2595 topright= ptr + 4 - linesize;
2596 }else
2597 topright= NULL;
2599 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2600 nnz = h->non_zero_count_cache[ scan8[i] ];
2601 if(nnz){
2602 if(is_h264){
2603 if(nnz == 1 && h->mb[i*16])
2604 idct_dc_add(ptr, h->mb + i*16, linesize);
2605 else
2606 idct_add(ptr, h->mb + i*16, linesize);
2607 }else
2608 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2612 }else{
2613 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2614 if(is_h264){
2615 if(!transform_bypass)
2616 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2617 }else
2618 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2620 if(h->deblocking_filter)
2621 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2622 }else if(is_h264){
2623 hl_motion(h, dest_y, dest_cb, dest_cr,
2624 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2625 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2626 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2630 if(!IS_INTRA4x4(mb_type)){
2631 if(is_h264){
2632 if(IS_INTRA16x16(mb_type)){
2633 for(i=0; i<16; i++){
2634 if(h->non_zero_count_cache[ scan8[i] ])
2635 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2636 else if(h->mb[i*16])
2637 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2639 }else{
2640 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2641 for(i=0; i<16; i+=di){
2642 int nnz = h->non_zero_count_cache[ scan8[i] ];
2643 if(nnz){
2644 if(nnz==1 && h->mb[i*16])
2645 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2646 else
2647 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2651 }else{
2652 for(i=0; i<16; i++){
2653 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2654 uint8_t * const ptr= dest_y + block_offset[i];
2655 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2661 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2662 uint8_t *dest[2] = {dest_cb, dest_cr};
2663 if(transform_bypass){
2664 idct_add = idct_dc_add = s->dsp.add_pixels4;
2665 }else{
2666 idct_add = s->dsp.h264_idct_add;
2667 idct_dc_add = s->dsp.h264_idct_dc_add;
2668 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2669 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2671 if(is_h264){
2672 for(i=16; i<16+8; i++){
2673 if(h->non_zero_count_cache[ scan8[i] ])
2674 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2675 else if(h->mb[i*16])
2676 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2678 }else{
2679 for(i=16; i<16+8; i++){
2680 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2681 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2682 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2688 if(h->deblocking_filter) {
2689 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2690 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2691 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2692 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2693 if (!simple && FRAME_MBAFF) {
2694 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2695 } else {
2696 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2702 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2704 static void hl_decode_mb_simple(H264Context *h){
2705 hl_decode_mb_internal(h, 1);
2709 * Process a macroblock; this handles edge cases, such as interlacing.
2711 static void av_noinline hl_decode_mb_complex(H264Context *h){
2712 hl_decode_mb_internal(h, 0);
2715 static void hl_decode_mb(H264Context *h){
2716 MpegEncContext * const s = &h->s;
2717 const int mb_xy= h->mb_xy;
2718 const int mb_type= s->current_picture.mb_type[mb_xy];
2719 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2720 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2722 if(ENABLE_H264_ENCODER && !s->decode)
2723 return;
2725 if (is_complex)
2726 hl_decode_mb_complex(h);
2727 else hl_decode_mb_simple(h);
2730 static void pic_as_field(Picture *pic, const int parity){
2731 int i;
2732 for (i = 0; i < 4; ++i) {
2733 if (parity == PICT_BOTTOM_FIELD)
2734 pic->data[i] += pic->linesize[i];
2735 pic->reference = parity;
2736 pic->linesize[i] *= 2;
2738 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2741 static int split_field_copy(Picture *dest, Picture *src,
2742 int parity, int id_add){
2743 int match = !!(src->reference & parity);
2745 if (match) {
2746 *dest = *src;
2747 if(parity != PICT_FRAME){
2748 pic_as_field(dest, parity);
2749 dest->pic_id *= 2;
2750 dest->pic_id += id_add;
2754 return match;
2757 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2758 int i[2]={0};
2759 int index=0;
2761 while(i[0]<len || i[1]<len){
2762 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2763 i[0]++;
2764 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2765 i[1]++;
2766 if(i[0] < len){
2767 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2768 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2770 if(i[1] < len){
2771 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2772 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2776 return index;
2779 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2780 int i, best_poc;
2781 int out_i= 0;
2783 for(;;){
2784 best_poc= dir ? INT_MIN : INT_MAX;
2786 for(i=0; i<len; i++){
2787 const int poc= src[i]->poc;
2788 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2789 best_poc= poc;
2790 sorted[out_i]= src[i];
2793 if(best_poc == (dir ? INT_MIN : INT_MAX))
2794 break;
2795 limit= sorted[out_i++]->poc - dir;
2797 return out_i;
2801 * fills the default_ref_list.
2803 static int fill_default_ref_list(H264Context *h){
2804 MpegEncContext * const s = &h->s;
2805 int i, len;
2807 if(h->slice_type_nos==FF_B_TYPE){
2808 Picture *sorted[32];
2809 int cur_poc, list;
2810 int lens[2];
2812 if(FIELD_PICTURE)
2813 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2814 else
2815 cur_poc= s->current_picture_ptr->poc;
2817 for(list= 0; list<2; list++){
2818 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2819 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2820 assert(len<=32);
2821 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2822 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2823 assert(len<=32);
2825 if(len < h->ref_count[list])
2826 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2827 lens[list]= len;
2830 if(lens[0] == lens[1] && lens[1] > 1){
2831 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2832 if(i == lens[0])
2833 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2835 }else{
2836 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2837 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2838 assert(len <= 32);
2839 if(len < h->ref_count[0])
2840 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2842 #ifdef TRACE
2843 for (i=0; i<h->ref_count[0]; i++) {
2844 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2846 if(h->slice_type_nos==FF_B_TYPE){
2847 for (i=0; i<h->ref_count[1]; i++) {
2848 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2851 #endif
2852 return 0;
2855 static void print_short_term(H264Context *h);
2856 static void print_long_term(H264Context *h);
2859 * Extract structure information about the picture described by pic_num in
2860 * the current decoding context (frame or field). Note that pic_num is
2861 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2862 * @param pic_num picture number for which to extract structure information
2863 * @param structure one of PICT_XXX describing structure of picture
2864 * with pic_num
2865 * @return frame number (short term) or long term index of picture
2866 * described by pic_num
2868 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2869 MpegEncContext * const s = &h->s;
2871 *structure = s->picture_structure;
2872 if(FIELD_PICTURE){
2873 if (!(pic_num & 1))
2874 /* opposite field */
2875 *structure ^= PICT_FRAME;
2876 pic_num >>= 1;
2879 return pic_num;
2882 static int decode_ref_pic_list_reordering(H264Context *h){
2883 MpegEncContext * const s = &h->s;
2884 int list, index, pic_structure;
2886 print_short_term(h);
2887 print_long_term(h);
2889 for(list=0; list<h->list_count; list++){
2890 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2892 if(get_bits1(&s->gb)){
2893 int pred= h->curr_pic_num;
2895 for(index=0; ; index++){
2896 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2897 unsigned int pic_id;
2898 int i;
2899 Picture *ref = NULL;
2901 if(reordering_of_pic_nums_idc==3)
2902 break;
2904 if(index >= h->ref_count[list]){
2905 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2906 return -1;
2909 if(reordering_of_pic_nums_idc<3){
2910 if(reordering_of_pic_nums_idc<2){
2911 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2912 int frame_num;
2914 if(abs_diff_pic_num > h->max_pic_num){
2915 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2916 return -1;
2919 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2920 else pred+= abs_diff_pic_num;
2921 pred &= h->max_pic_num - 1;
2923 frame_num = pic_num_extract(h, pred, &pic_structure);
2925 for(i= h->short_ref_count-1; i>=0; i--){
2926 ref = h->short_ref[i];
2927 assert(ref->reference);
2928 assert(!ref->long_ref);
2930 ref->frame_num == frame_num &&
2931 (ref->reference & pic_structure)
2933 break;
2935 if(i>=0)
2936 ref->pic_id= pred;
2937 }else{
2938 int long_idx;
2939 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2941 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2943 if(long_idx>31){
2944 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2945 return -1;
2947 ref = h->long_ref[long_idx];
2948 assert(!(ref && !ref->reference));
2949 if(ref && (ref->reference & pic_structure)){
2950 ref->pic_id= pic_id;
2951 assert(ref->long_ref);
2952 i=0;
2953 }else{
2954 i=-1;
2958 if (i < 0) {
2959 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2960 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2961 } else {
2962 for(i=index; i+1<h->ref_count[list]; i++){
2963 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2964 break;
2966 for(; i > index; i--){
2967 h->ref_list[list][i]= h->ref_list[list][i-1];
2969 h->ref_list[list][index]= *ref;
2970 if (FIELD_PICTURE){
2971 pic_as_field(&h->ref_list[list][index], pic_structure);
2974 }else{
2975 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2976 return -1;
2981 for(list=0; list<h->list_count; list++){
2982 for(index= 0; index < h->ref_count[list]; index++){
2983 if(!h->ref_list[list][index].data[0]){
2984 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2985 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2990 return 0;
2993 static void fill_mbaff_ref_list(H264Context *h){
2994 int list, i, j;
2995 for(list=0; list<2; list++){ //FIXME try list_count
2996 for(i=0; i<h->ref_count[list]; i++){
2997 Picture *frame = &h->ref_list[list][i];
2998 Picture *field = &h->ref_list[list][16+2*i];
2999 field[0] = *frame;
3000 for(j=0; j<3; j++)
3001 field[0].linesize[j] <<= 1;
3002 field[0].reference = PICT_TOP_FIELD;
3003 field[0].poc= field[0].field_poc[0];
3004 field[1] = field[0];
3005 for(j=0; j<3; j++)
3006 field[1].data[j] += frame->linesize[j];
3007 field[1].reference = PICT_BOTTOM_FIELD;
3008 field[1].poc= field[1].field_poc[1];
3010 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3011 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3012 for(j=0; j<2; j++){
3013 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3014 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3018 for(j=0; j<h->ref_count[1]; j++){
3019 for(i=0; i<h->ref_count[0]; i++)
3020 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3021 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3022 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3026 static int pred_weight_table(H264Context *h){
3027 MpegEncContext * const s = &h->s;
3028 int list, i;
3029 int luma_def, chroma_def;
3031 h->use_weight= 0;
3032 h->use_weight_chroma= 0;
3033 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3034 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3035 luma_def = 1<<h->luma_log2_weight_denom;
3036 chroma_def = 1<<h->chroma_log2_weight_denom;
3038 for(list=0; list<2; list++){
3039 for(i=0; i<h->ref_count[list]; i++){
3040 int luma_weight_flag, chroma_weight_flag;
3042 luma_weight_flag= get_bits1(&s->gb);
3043 if(luma_weight_flag){
3044 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3045 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3046 if( h->luma_weight[list][i] != luma_def
3047 || h->luma_offset[list][i] != 0)
3048 h->use_weight= 1;
3049 }else{
3050 h->luma_weight[list][i]= luma_def;
3051 h->luma_offset[list][i]= 0;
3054 if(CHROMA){
3055 chroma_weight_flag= get_bits1(&s->gb);
3056 if(chroma_weight_flag){
3057 int j;
3058 for(j=0; j<2; j++){
3059 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3060 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3061 if( h->chroma_weight[list][i][j] != chroma_def
3062 || h->chroma_offset[list][i][j] != 0)
3063 h->use_weight_chroma= 1;
3065 }else{
3066 int j;
3067 for(j=0; j<2; j++){
3068 h->chroma_weight[list][i][j]= chroma_def;
3069 h->chroma_offset[list][i][j]= 0;
3074 if(h->slice_type_nos != FF_B_TYPE) break;
3076 h->use_weight= h->use_weight || h->use_weight_chroma;
3077 return 0;
3080 static void implicit_weight_table(H264Context *h){
3081 MpegEncContext * const s = &h->s;
3082 int ref0, ref1;
3083 int cur_poc = s->current_picture_ptr->poc;
3085 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3086 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3087 h->use_weight= 0;
3088 h->use_weight_chroma= 0;
3089 return;
3092 h->use_weight= 2;
3093 h->use_weight_chroma= 2;
3094 h->luma_log2_weight_denom= 5;
3095 h->chroma_log2_weight_denom= 5;
3097 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3098 int poc0 = h->ref_list[0][ref0].poc;
3099 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3100 int poc1 = h->ref_list[1][ref1].poc;
3101 int td = av_clip(poc1 - poc0, -128, 127);
3102 if(td){
3103 int tb = av_clip(cur_poc - poc0, -128, 127);
3104 int tx = (16384 + (FFABS(td) >> 1)) / td;
3105 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3106 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3107 h->implicit_weight[ref0][ref1] = 32;
3108 else
3109 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3110 }else
3111 h->implicit_weight[ref0][ref1] = 32;
3117 * Mark a picture as no longer needed for reference. The refmask
3118 * argument allows unreferencing of individual fields or the whole frame.
3119 * If the picture becomes entirely unreferenced, but is being held for
3120 * display purposes, it is marked as such.
3121 * @param refmask mask of fields to unreference; the mask is bitwise
3122 * anded with the reference marking of pic
3123 * @return non-zero if pic becomes entirely unreferenced (except possibly
3124 * for display purposes) zero if one of the fields remains in
3125 * reference
3127 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3128 int i;
3129 if (pic->reference &= refmask) {
3130 return 0;
3131 } else {
3132 for(i = 0; h->delayed_pic[i]; i++)
3133 if(pic == h->delayed_pic[i]){
3134 pic->reference=DELAYED_PIC_REF;
3135 break;
3137 return 1;
3142 * instantaneous decoder refresh.
3144 static void idr(H264Context *h){
3145 int i;
3147 for(i=0; i<16; i++){
3148 remove_long(h, i, 0);
3150 assert(h->long_ref_count==0);
3152 for(i=0; i<h->short_ref_count; i++){
3153 unreference_pic(h, h->short_ref[i], 0);
3154 h->short_ref[i]= NULL;
3156 h->short_ref_count=0;
3157 h->prev_frame_num= 0;
3158 h->prev_frame_num_offset= 0;
3159 h->prev_poc_msb=
3160 h->prev_poc_lsb= 0;
3163 /* forget old pics after a seek */
3164 static void flush_dpb(AVCodecContext *avctx){
3165 H264Context *h= avctx->priv_data;
3166 int i;
3167 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3168 if(h->delayed_pic[i])
3169 h->delayed_pic[i]->reference= 0;
3170 h->delayed_pic[i]= NULL;
3172 h->outputed_poc= INT_MIN;
3173 idr(h);
3174 if(h->s.current_picture_ptr)
3175 h->s.current_picture_ptr->reference= 0;
3176 h->s.first_field= 0;
3177 ff_mpeg_flush(avctx);
3181 * Find a Picture in the short term reference list by frame number.
3182 * @param frame_num frame number to search for
3183 * @param idx the index into h->short_ref where returned picture is found
3184 * undefined if no picture found.
3185 * @return pointer to the found picture, or NULL if no pic with the provided
3186 * frame number is found
3188 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3189 MpegEncContext * const s = &h->s;
3190 int i;
3192 for(i=0; i<h->short_ref_count; i++){
3193 Picture *pic= h->short_ref[i];
3194 if(s->avctx->debug&FF_DEBUG_MMCO)
3195 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3196 if(pic->frame_num == frame_num) {
3197 *idx = i;
3198 return pic;
3201 return NULL;
3205 * Remove a picture from the short term reference list by its index in
3206 * that list. This does no checking on the provided index; it is assumed
3207 * to be valid. Other list entries are shifted down.
3208 * @param i index into h->short_ref of picture to remove.
3210 static void remove_short_at_index(H264Context *h, int i){
3211 assert(i >= 0 && i < h->short_ref_count);
3212 h->short_ref[i]= NULL;
3213 if (--h->short_ref_count)
3214 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3219 * @return the removed picture or NULL if an error occurs
3221 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3222 MpegEncContext * const s = &h->s;
3223 Picture *pic;
3224 int i;
3226 if(s->avctx->debug&FF_DEBUG_MMCO)
3227 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3229 pic = find_short(h, frame_num, &i);
3230 if (pic){
3231 if(unreference_pic(h, pic, ref_mask))
3232 remove_short_at_index(h, i);
3235 return pic;
3239 * Remove a picture from the long term reference list by its index in
3240 * that list.
3241 * @return the removed picture or NULL if an error occurs
3243 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3244 Picture *pic;
3246 pic= h->long_ref[i];
3247 if (pic){
3248 if(unreference_pic(h, pic, ref_mask)){
3249 assert(h->long_ref[i]->long_ref == 1);
3250 h->long_ref[i]->long_ref= 0;
3251 h->long_ref[i]= NULL;
3252 h->long_ref_count--;
3256 return pic;
3260 * print short term list
3262 static void print_short_term(H264Context *h) {
3263 uint32_t i;
3264 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3265 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3266 for(i=0; i<h->short_ref_count; i++){
3267 Picture *pic= h->short_ref[i];
3268 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3274 * print long term list
3276 static void print_long_term(H264Context *h) {
3277 uint32_t i;
3278 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3279 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3280 for(i = 0; i < 16; i++){
3281 Picture *pic= h->long_ref[i];
3282 if (pic) {
3283 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3290 * Executes the reference picture marking (memory management control operations).
3292 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3293 MpegEncContext * const s = &h->s;
3294 int i, j;
3295 int current_ref_assigned=0;
3296 Picture *pic;
3298 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3299 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3301 for(i=0; i<mmco_count; i++){
3302 int structure, frame_num;
3303 if(s->avctx->debug&FF_DEBUG_MMCO)
3304 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3306 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3307 || mmco[i].opcode == MMCO_SHORT2LONG){
3308 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3309 pic = find_short(h, frame_num, &j);
3310 if(!pic){
3311 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3312 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3313 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3314 continue;
3318 switch(mmco[i].opcode){
3319 case MMCO_SHORT2UNUSED:
3320 if(s->avctx->debug&FF_DEBUG_MMCO)
3321 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3322 remove_short(h, frame_num, structure ^ PICT_FRAME);
3323 break;
3324 case MMCO_SHORT2LONG:
3325 if (h->long_ref[mmco[i].long_arg] != pic)
3326 remove_long(h, mmco[i].long_arg, 0);
3328 remove_short_at_index(h, j);
3329 h->long_ref[ mmco[i].long_arg ]= pic;
3330 if (h->long_ref[ mmco[i].long_arg ]){
3331 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3332 h->long_ref_count++;
3334 break;
3335 case MMCO_LONG2UNUSED:
3336 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3337 pic = h->long_ref[j];
3338 if (pic) {
3339 remove_long(h, j, structure ^ PICT_FRAME);
3340 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3341 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3342 break;
3343 case MMCO_LONG:
3344 // Comment below left from previous code as it is an interresting note.
3345 /* First field in pair is in short term list or
3346 * at a different long term index.
3347 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3348 * Report the problem and keep the pair where it is,
3349 * and mark this field valid.
3352 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3353 remove_long(h, mmco[i].long_arg, 0);
3355 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3356 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3357 h->long_ref_count++;
3360 s->current_picture_ptr->reference |= s->picture_structure;
3361 current_ref_assigned=1;
3362 break;
3363 case MMCO_SET_MAX_LONG:
3364 assert(mmco[i].long_arg <= 16);
3365 // just remove the long term which index is greater than new max
3366 for(j = mmco[i].long_arg; j<16; j++){
3367 remove_long(h, j, 0);
3369 break;
3370 case MMCO_RESET:
3371 while(h->short_ref_count){
3372 remove_short(h, h->short_ref[0]->frame_num, 0);
3374 for(j = 0; j < 16; j++) {
3375 remove_long(h, j, 0);
3377 s->current_picture_ptr->poc=
3378 s->current_picture_ptr->field_poc[0]=
3379 s->current_picture_ptr->field_poc[1]=
3380 h->poc_lsb=
3381 h->poc_msb=
3382 h->frame_num=
3383 s->current_picture_ptr->frame_num= 0;
3384 break;
3385 default: assert(0);
3389 if (!current_ref_assigned) {
3390 /* Second field of complementary field pair; the first field of
3391 * which is already referenced. If short referenced, it
3392 * should be first entry in short_ref. If not, it must exist
3393 * in long_ref; trying to put it on the short list here is an
3394 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3396 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3397 /* Just mark the second field valid */
3398 s->current_picture_ptr->reference = PICT_FRAME;
3399 } else if (s->current_picture_ptr->long_ref) {
3400 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3401 "assignment for second field "
3402 "in complementary field pair "
3403 "(first field is long term)\n");
3404 } else {
3405 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3406 if(pic){
3407 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3410 if(h->short_ref_count)
3411 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3413 h->short_ref[0]= s->current_picture_ptr;
3414 h->short_ref_count++;
3415 s->current_picture_ptr->reference |= s->picture_structure;
3419 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3421 /* We have too many reference frames, probably due to corrupted
3422 * stream. Need to discard one frame. Prevents overrun of the
3423 * short_ref and long_ref buffers.
3425 av_log(h->s.avctx, AV_LOG_ERROR,
3426 "number of reference frames exceeds max (probably "
3427 "corrupt input), discarding one\n");
3429 if (h->long_ref_count && !h->short_ref_count) {
3430 for (i = 0; i < 16; ++i)
3431 if (h->long_ref[i])
3432 break;
3434 assert(i < 16);
3435 remove_long(h, i, 0);
3436 } else {
3437 pic = h->short_ref[h->short_ref_count - 1];
3438 remove_short(h, pic->frame_num, 0);
3442 print_short_term(h);
3443 print_long_term(h);
3444 return 0;
3447 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3448 MpegEncContext * const s = &h->s;
3449 int i;
3451 h->mmco_index= 0;
3452 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3453 s->broken_link= get_bits1(gb) -1;
3454 if(get_bits1(gb)){
3455 h->mmco[0].opcode= MMCO_LONG;
3456 h->mmco[0].long_arg= 0;
3457 h->mmco_index= 1;
3459 }else{
3460 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3461 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3462 MMCOOpcode opcode= get_ue_golomb(gb);
3464 h->mmco[i].opcode= opcode;
3465 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3466 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3467 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3468 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3469 return -1;
3472 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3473 unsigned int long_arg= get_ue_golomb(gb);
3474 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3475 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3476 return -1;
3478 h->mmco[i].long_arg= long_arg;
3481 if(opcode > (unsigned)MMCO_LONG){
3482 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3483 return -1;
3485 if(opcode == MMCO_END)
3486 break;
3488 h->mmco_index= i;
3489 }else{
3490 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3492 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3493 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3494 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3495 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3496 h->mmco_index= 1;
3497 if (FIELD_PICTURE) {
3498 h->mmco[0].short_pic_num *= 2;
3499 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3500 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3501 h->mmco_index= 2;
3507 return 0;
3510 static int init_poc(H264Context *h){
3511 MpegEncContext * const s = &h->s;
3512 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3513 int field_poc[2];
3514 Picture *cur = s->current_picture_ptr;
3516 h->frame_num_offset= h->prev_frame_num_offset;
3517 if(h->frame_num < h->prev_frame_num)
3518 h->frame_num_offset += max_frame_num;
3520 if(h->sps.poc_type==0){
3521 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3523 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3524 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3525 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3526 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3527 else
3528 h->poc_msb = h->prev_poc_msb;
3529 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3530 field_poc[0] =
3531 field_poc[1] = h->poc_msb + h->poc_lsb;
3532 if(s->picture_structure == PICT_FRAME)
3533 field_poc[1] += h->delta_poc_bottom;
3534 }else if(h->sps.poc_type==1){
3535 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3536 int i;
3538 if(h->sps.poc_cycle_length != 0)
3539 abs_frame_num = h->frame_num_offset + h->frame_num;
3540 else
3541 abs_frame_num = 0;
3543 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3544 abs_frame_num--;
3546 expected_delta_per_poc_cycle = 0;
3547 for(i=0; i < h->sps.poc_cycle_length; i++)
3548 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3550 if(abs_frame_num > 0){
3551 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3552 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3554 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3555 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3556 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3557 } else
3558 expectedpoc = 0;
3560 if(h->nal_ref_idc == 0)
3561 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3563 field_poc[0] = expectedpoc + h->delta_poc[0];
3564 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3566 if(s->picture_structure == PICT_FRAME)
3567 field_poc[1] += h->delta_poc[1];
3568 }else{
3569 int poc= 2*(h->frame_num_offset + h->frame_num);
3571 if(!h->nal_ref_idc)
3572 poc--;
3574 field_poc[0]= poc;
3575 field_poc[1]= poc;
3578 if(s->picture_structure != PICT_BOTTOM_FIELD)
3579 s->current_picture_ptr->field_poc[0]= field_poc[0];
3580 if(s->picture_structure != PICT_TOP_FIELD)
3581 s->current_picture_ptr->field_poc[1]= field_poc[1];
3582 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3584 return 0;
3589 * initialize scan tables
3591 static void init_scan_tables(H264Context *h){
3592 MpegEncContext * const s = &h->s;
3593 int i;
3594 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3595 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3596 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3597 }else{
3598 for(i=0; i<16; i++){
3599 #define T(x) (x>>2) | ((x<<2) & 0xF)
3600 h->zigzag_scan[i] = T(zigzag_scan[i]);
3601 h-> field_scan[i] = T( field_scan[i]);
3602 #undef T
3605 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3606 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3607 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3608 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3609 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3610 }else{
3611 for(i=0; i<64; i++){
3612 #define T(x) (x>>3) | ((x&7)<<3)
3613 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3614 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3615 h->field_scan8x8[i] = T(field_scan8x8[i]);
3616 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3617 #undef T
3620 if(h->sps.transform_bypass){ //FIXME same ugly
3621 h->zigzag_scan_q0 = zigzag_scan;
3622 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3623 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3624 h->field_scan_q0 = field_scan;
3625 h->field_scan8x8_q0 = field_scan8x8;
3626 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3627 }else{
3628 h->zigzag_scan_q0 = h->zigzag_scan;
3629 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3630 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3631 h->field_scan_q0 = h->field_scan;
3632 h->field_scan8x8_q0 = h->field_scan8x8;
3633 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3638 * Replicates H264 "master" context to thread contexts.
3640 static void clone_slice(H264Context *dst, H264Context *src)
3642 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3643 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3644 dst->s.current_picture = src->s.current_picture;
3645 dst->s.linesize = src->s.linesize;
3646 dst->s.uvlinesize = src->s.uvlinesize;
3647 dst->s.first_field = src->s.first_field;
3649 dst->prev_poc_msb = src->prev_poc_msb;
3650 dst->prev_poc_lsb = src->prev_poc_lsb;
3651 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3652 dst->prev_frame_num = src->prev_frame_num;
3653 dst->short_ref_count = src->short_ref_count;
3655 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3656 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3657 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3658 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3660 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3661 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3665 * decodes a slice header.
3666 * This will also call MPV_common_init() and frame_start() as needed.
3668 * @param h h264context
3669 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3671 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3673 static int decode_slice_header(H264Context *h, H264Context *h0){
3674 MpegEncContext * const s = &h->s;
3675 MpegEncContext * const s0 = &h0->s;
3676 unsigned int first_mb_in_slice;
3677 unsigned int pps_id;
3678 int num_ref_idx_active_override_flag;
3679 unsigned int slice_type, tmp, i, j;
3680 int default_ref_list_done = 0;
3681 int last_pic_structure;
3683 s->dropable= h->nal_ref_idc == 0;
3685 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3686 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3687 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3688 }else{
3689 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3690 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3693 first_mb_in_slice= get_ue_golomb(&s->gb);
3695 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3696 h0->current_slice = 0;
3697 if (!s0->first_field)
3698 s->current_picture_ptr= NULL;
3701 slice_type= get_ue_golomb(&s->gb);
3702 if(slice_type > 9){
3703 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3704 return -1;
3706 if(slice_type > 4){
3707 slice_type -= 5;
3708 h->slice_type_fixed=1;
3709 }else
3710 h->slice_type_fixed=0;
3712 slice_type= golomb_to_pict_type[ slice_type ];
3713 if (slice_type == FF_I_TYPE
3714 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3715 default_ref_list_done = 1;
3717 h->slice_type= slice_type;
3718 h->slice_type_nos= slice_type & 3;
3720 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3721 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3722 av_log(h->s.avctx, AV_LOG_ERROR,
3723 "B picture before any references, skipping\n");
3724 return -1;
3727 pps_id= get_ue_golomb(&s->gb);
3728 if(pps_id>=MAX_PPS_COUNT){
3729 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3730 return -1;
3732 if(!h0->pps_buffers[pps_id]) {
3733 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3734 return -1;
3736 h->pps= *h0->pps_buffers[pps_id];
3738 if(!h0->sps_buffers[h->pps.sps_id]) {
3739 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3740 return -1;
3742 h->sps = *h0->sps_buffers[h->pps.sps_id];
3744 if(h == h0 && h->dequant_coeff_pps != pps_id){
3745 h->dequant_coeff_pps = pps_id;
3746 init_dequant_tables(h);
3749 s->mb_width= h->sps.mb_width;
3750 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3752 h->b_stride= s->mb_width*4;
3753 h->b8_stride= s->mb_width*2;
3755 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3756 if(h->sps.frame_mbs_only_flag)
3757 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3758 else
3759 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3761 if (s->context_initialized
3762 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3763 if(h != h0)
3764 return -1; // width / height changed during parallelized decoding
3765 free_tables(h);
3766 MPV_common_end(s);
3768 if (!s->context_initialized) {
3769 if(h != h0)
3770 return -1; // we cant (re-)initialize context during parallel decoding
3771 if (MPV_common_init(s) < 0)
3772 return -1;
3773 s->first_field = 0;
3775 init_scan_tables(h);
3776 alloc_tables(h);
3778 for(i = 1; i < s->avctx->thread_count; i++) {
3779 H264Context *c;
3780 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3781 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3782 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3783 c->sps = h->sps;
3784 c->pps = h->pps;
3785 init_scan_tables(c);
3786 clone_tables(c, h);
3789 for(i = 0; i < s->avctx->thread_count; i++)
3790 if(context_init(h->thread_context[i]) < 0)
3791 return -1;
3793 s->avctx->width = s->width;
3794 s->avctx->height = s->height;
3795 s->avctx->sample_aspect_ratio= h->sps.sar;
3796 if(!s->avctx->sample_aspect_ratio.den)
3797 s->avctx->sample_aspect_ratio.den = 1;
3799 if(h->sps.timing_info_present_flag){
3800 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3801 if(h->x264_build > 0 && h->x264_build < 44)
3802 s->avctx->time_base.den *= 2;
3803 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3804 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3808 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3810 h->mb_mbaff = 0;
3811 h->mb_aff_frame = 0;
3812 last_pic_structure = s0->picture_structure;
3813 if(h->sps.frame_mbs_only_flag){
3814 s->picture_structure= PICT_FRAME;
3815 }else{
3816 if(get_bits1(&s->gb)) { //field_pic_flag
3817 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3818 } else {
3819 s->picture_structure= PICT_FRAME;
3820 h->mb_aff_frame = h->sps.mb_aff;
3823 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3825 if(h0->current_slice == 0){
3826 while(h->frame_num != h->prev_frame_num &&
3827 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3828 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3829 frame_start(h);
3830 h->prev_frame_num++;
3831 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3832 s->current_picture_ptr->frame_num= h->prev_frame_num;
3833 execute_ref_pic_marking(h, NULL, 0);
3836 /* See if we have a decoded first field looking for a pair... */
3837 if (s0->first_field) {
3838 assert(s0->current_picture_ptr);
3839 assert(s0->current_picture_ptr->data[0]);
3840 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3842 /* figure out if we have a complementary field pair */
3843 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3845 * Previous field is unmatched. Don't display it, but let it
3846 * remain for reference if marked as such.
3848 s0->current_picture_ptr = NULL;
3849 s0->first_field = FIELD_PICTURE;
3851 } else {
3852 if (h->nal_ref_idc &&
3853 s0->current_picture_ptr->reference &&
3854 s0->current_picture_ptr->frame_num != h->frame_num) {
3856 * This and previous field were reference, but had
3857 * different frame_nums. Consider this field first in
3858 * pair. Throw away previous field except for reference
3859 * purposes.
3861 s0->first_field = 1;
3862 s0->current_picture_ptr = NULL;
3864 } else {
3865 /* Second field in complementary pair */
3866 s0->first_field = 0;
3870 } else {
3871 /* Frame or first field in a potentially complementary pair */
3872 assert(!s0->current_picture_ptr);
3873 s0->first_field = FIELD_PICTURE;
3876 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3877 s0->first_field = 0;
3878 return -1;
3881 if(h != h0)
3882 clone_slice(h, h0);
3884 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3886 assert(s->mb_num == s->mb_width * s->mb_height);
3887 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3888 first_mb_in_slice >= s->mb_num){
3889 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3890 return -1;
3892 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3893 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3894 if (s->picture_structure == PICT_BOTTOM_FIELD)
3895 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3896 assert(s->mb_y < s->mb_height);
3898 if(s->picture_structure==PICT_FRAME){
3899 h->curr_pic_num= h->frame_num;
3900 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3901 }else{
3902 h->curr_pic_num= 2*h->frame_num + 1;
3903 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3906 if(h->nal_unit_type == NAL_IDR_SLICE){
3907 get_ue_golomb(&s->gb); /* idr_pic_id */
3910 if(h->sps.poc_type==0){
3911 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3913 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3914 h->delta_poc_bottom= get_se_golomb(&s->gb);
3918 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3919 h->delta_poc[0]= get_se_golomb(&s->gb);
3921 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3922 h->delta_poc[1]= get_se_golomb(&s->gb);
3925 init_poc(h);
3927 if(h->pps.redundant_pic_cnt_present){
3928 h->redundant_pic_count= get_ue_golomb(&s->gb);
3931 //set defaults, might be overridden a few lines later
3932 h->ref_count[0]= h->pps.ref_count[0];
3933 h->ref_count[1]= h->pps.ref_count[1];
3935 if(h->slice_type_nos != FF_I_TYPE){
3936 if(h->slice_type_nos == FF_B_TYPE){
3937 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3939 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3941 if(num_ref_idx_active_override_flag){
3942 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3943 if(h->slice_type_nos==FF_B_TYPE)
3944 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3946 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3947 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3948 h->ref_count[0]= h->ref_count[1]= 1;
3949 return -1;
3952 if(h->slice_type_nos == FF_B_TYPE)
3953 h->list_count= 2;
3954 else
3955 h->list_count= 1;
3956 }else
3957 h->list_count= 0;
3959 if(!default_ref_list_done){
3960 fill_default_ref_list(h);
3963 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3964 return -1;
3966 if(h->slice_type_nos!=FF_I_TYPE){
3967 s->last_picture_ptr= &h->ref_list[0][0];
3968 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3970 if(h->slice_type_nos==FF_B_TYPE){
3971 s->next_picture_ptr= &h->ref_list[1][0];
3972 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3975 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3976 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3977 pred_weight_table(h);
3978 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3979 implicit_weight_table(h);
3980 else
3981 h->use_weight = 0;
3983 if(h->nal_ref_idc)
3984 decode_ref_pic_marking(h0, &s->gb);
3986 if(FRAME_MBAFF)
3987 fill_mbaff_ref_list(h);
3989 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3990 direct_dist_scale_factor(h);
3991 direct_ref_list_init(h);
3993 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3994 tmp = get_ue_golomb(&s->gb);
3995 if(tmp > 2){
3996 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3997 return -1;
3999 h->cabac_init_idc= tmp;
4002 h->last_qscale_diff = 0;
4003 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4004 if(tmp>51){
4005 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4006 return -1;
4008 s->qscale= tmp;
4009 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4010 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4011 //FIXME qscale / qp ... stuff
4012 if(h->slice_type == FF_SP_TYPE){
4013 get_bits1(&s->gb); /* sp_for_switch_flag */
4015 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4016 get_se_golomb(&s->gb); /* slice_qs_delta */
4019 h->deblocking_filter = 1;
4020 h->slice_alpha_c0_offset = 0;
4021 h->slice_beta_offset = 0;
4022 if( h->pps.deblocking_filter_parameters_present ) {
4023 tmp= get_ue_golomb(&s->gb);
4024 if(tmp > 2){
4025 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4026 return -1;
4028 h->deblocking_filter= tmp;
4029 if(h->deblocking_filter < 2)
4030 h->deblocking_filter^= 1; // 1<->0
4032 if( h->deblocking_filter ) {
4033 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4034 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4038 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4039 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4040 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4041 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4042 h->deblocking_filter= 0;
4044 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4045 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4046 /* Cheat slightly for speed:
4047 Do not bother to deblock across slices. */
4048 h->deblocking_filter = 2;
4049 } else {
4050 h0->max_contexts = 1;
4051 if(!h0->single_decode_warning) {
4052 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4053 h0->single_decode_warning = 1;
4055 if(h != h0)
4056 return 1; // deblocking switched inside frame
4060 #if 0 //FMO
4061 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4062 slice_group_change_cycle= get_bits(&s->gb, ?);
4063 #endif
4065 h0->last_slice_type = slice_type;
4066 h->slice_num = ++h0->current_slice;
4067 if(h->slice_num >= MAX_SLICES){
4068 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
4071 for(j=0; j<2; j++){
4072 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
4073 ref2frm[0]=
4074 ref2frm[1]= -1;
4075 for(i=0; i<16; i++)
4076 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4077 +(h->ref_list[j][i].reference&3);
4078 ref2frm[18+0]=
4079 ref2frm[18+1]= -1;
4080 for(i=16; i<48; i++)
4081 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4082 +(h->ref_list[j][i].reference&3);
4085 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4086 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4088 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4089 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4090 h->slice_num,
4091 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4092 first_mb_in_slice,
4093 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4094 pps_id, h->frame_num,
4095 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4096 h->ref_count[0], h->ref_count[1],
4097 s->qscale,
4098 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4099 h->use_weight,
4100 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4101 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4105 return 0;
4111 static inline int get_level_prefix(GetBitContext *gb){
4112 unsigned int buf;
4113 int log;
4115 OPEN_READER(re, gb);
4116 UPDATE_CACHE(re, gb);
4117 buf=GET_CACHE(re, gb);
4119 log= 32 - av_log2(buf);
4120 #ifdef TRACE
4121 print_bin(buf>>(32-log), log);
4122 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4123 #endif
4125 LAST_SKIP_BITS(re, gb, log);
4126 CLOSE_READER(re, gb);
4128 return log-1;
4131 static inline int get_dct8x8_allowed(H264Context *h){
4132 int i;
4133 for(i=0; i<4; i++){
4134 if(!IS_SUB_8X8(h->sub_mb_type[i])
4135 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4136 return 0;
4138 return 1;
4142 * decodes a residual block.
4143 * @param n block index
4144 * @param scantable scantable
4145 * @param max_coeff number of coefficients in the block
4146 * @return <0 if an error occurred
4148 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4149 MpegEncContext * const s = &h->s;
4150 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4151 int level[16];
4152 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4154 //FIXME put trailing_onex into the context
4156 if(n == CHROMA_DC_BLOCK_INDEX){
4157 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4158 total_coeff= coeff_token>>2;
4159 }else{
4160 if(n == LUMA_DC_BLOCK_INDEX){
4161 total_coeff= pred_non_zero_count(h, 0);
4162 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4163 total_coeff= coeff_token>>2;
4164 }else{
4165 total_coeff= pred_non_zero_count(h, n);
4166 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4167 total_coeff= coeff_token>>2;
4168 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4172 //FIXME set last_non_zero?
4174 if(total_coeff==0)
4175 return 0;
4176 if(total_coeff > (unsigned)max_coeff) {
4177 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4178 return -1;
4181 trailing_ones= coeff_token&3;
4182 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4183 assert(total_coeff<=16);
4185 for(i=0; i<trailing_ones; i++){
4186 level[i]= 1 - 2*get_bits1(gb);
4189 if(i<total_coeff) {
4190 int level_code, mask;
4191 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4192 int prefix= get_level_prefix(gb);
4194 //first coefficient has suffix_length equal to 0 or 1
4195 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4196 if(suffix_length)
4197 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4198 else
4199 level_code= (prefix<<suffix_length); //part
4200 }else if(prefix==14){
4201 if(suffix_length)
4202 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4203 else
4204 level_code= prefix + get_bits(gb, 4); //part
4205 }else{
4206 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4207 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4208 if(prefix>=16)
4209 level_code += (1<<(prefix-3))-4096;
4212 if(trailing_ones < 3) level_code += 2;
4214 suffix_length = 1;
4215 if(level_code > 5)
4216 suffix_length++;
4217 mask= -(level_code&1);
4218 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4219 i++;
4221 //remaining coefficients have suffix_length > 0
4222 for(;i<total_coeff;i++) {
4223 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4224 prefix = get_level_prefix(gb);
4225 if(prefix<15){
4226 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4227 }else{
4228 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4229 if(prefix>=16)
4230 level_code += (1<<(prefix-3))-4096;
4232 mask= -(level_code&1);
4233 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4234 if(level_code > suffix_limit[suffix_length])
4235 suffix_length++;
4239 if(total_coeff == max_coeff)
4240 zeros_left=0;
4241 else{
4242 if(n == CHROMA_DC_BLOCK_INDEX)
4243 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4244 else
4245 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4248 coeff_num = zeros_left + total_coeff - 1;
4249 j = scantable[coeff_num];
4250 if(n > 24){
4251 block[j] = level[0];
4252 for(i=1;i<total_coeff;i++) {
4253 if(zeros_left <= 0)
4254 run_before = 0;
4255 else if(zeros_left < 7){
4256 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4257 }else{
4258 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4260 zeros_left -= run_before;
4261 coeff_num -= 1 + run_before;
4262 j= scantable[ coeff_num ];
4264 block[j]= level[i];
4266 }else{
4267 block[j] = (level[0] * qmul[j] + 32)>>6;
4268 for(i=1;i<total_coeff;i++) {
4269 if(zeros_left <= 0)
4270 run_before = 0;
4271 else if(zeros_left < 7){
4272 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4273 }else{
4274 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4276 zeros_left -= run_before;
4277 coeff_num -= 1 + run_before;
4278 j= scantable[ coeff_num ];
4280 block[j]= (level[i] * qmul[j] + 32)>>6;
4284 if(zeros_left<0){
4285 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4286 return -1;
4289 return 0;
4292 static void predict_field_decoding_flag(H264Context *h){
4293 MpegEncContext * const s = &h->s;
4294 const int mb_xy= h->mb_xy;
4295 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4296 ? s->current_picture.mb_type[mb_xy-1]
4297 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4298 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4299 : 0;
4300 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4304 * decodes a P_SKIP or B_SKIP macroblock
4306 static void decode_mb_skip(H264Context *h){
4307 MpegEncContext * const s = &h->s;
4308 const int mb_xy= h->mb_xy;
4309 int mb_type=0;
4311 memset(h->non_zero_count[mb_xy], 0, 16);
4312 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4314 if(MB_FIELD)
4315 mb_type|= MB_TYPE_INTERLACED;
4317 if( h->slice_type_nos == FF_B_TYPE )
4319 // just for fill_caches. pred_direct_motion will set the real mb_type
4320 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4322 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4323 pred_direct_motion(h, &mb_type);
4324 mb_type|= MB_TYPE_SKIP;
4326 else
4328 int mx, my;
4329 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4331 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4332 pred_pskip_motion(h, &mx, &my);
4333 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4334 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4337 write_back_motion(h, mb_type);
4338 s->current_picture.mb_type[mb_xy]= mb_type;
4339 s->current_picture.qscale_table[mb_xy]= s->qscale;
4340 h->slice_table[ mb_xy ]= h->slice_num;
4341 h->prev_mb_skipped= 1;
4345 * decodes a macroblock
4346 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4348 static int decode_mb_cavlc(H264Context *h){
4349 MpegEncContext * const s = &h->s;
4350 int mb_xy;
4351 int partition_count;
4352 unsigned int mb_type, cbp;
4353 int dct8x8_allowed= h->pps.transform_8x8_mode;
4355 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4357 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4359 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4360 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4361 down the code */
4362 if(h->slice_type_nos != FF_I_TYPE){
4363 if(s->mb_skip_run==-1)
4364 s->mb_skip_run= get_ue_golomb(&s->gb);
4366 if (s->mb_skip_run--) {
4367 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4368 if(s->mb_skip_run==0)
4369 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4370 else
4371 predict_field_decoding_flag(h);
4373 decode_mb_skip(h);
4374 return 0;
4377 if(FRAME_MBAFF){
4378 if( (s->mb_y&1) == 0 )
4379 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4382 h->prev_mb_skipped= 0;
4384 mb_type= get_ue_golomb(&s->gb);
4385 if(h->slice_type_nos == FF_B_TYPE){
4386 if(mb_type < 23){
4387 partition_count= b_mb_type_info[mb_type].partition_count;
4388 mb_type= b_mb_type_info[mb_type].type;
4389 }else{
4390 mb_type -= 23;
4391 goto decode_intra_mb;
4393 }else if(h->slice_type_nos == FF_P_TYPE){
4394 if(mb_type < 5){
4395 partition_count= p_mb_type_info[mb_type].partition_count;
4396 mb_type= p_mb_type_info[mb_type].type;
4397 }else{
4398 mb_type -= 5;
4399 goto decode_intra_mb;
4401 }else{
4402 assert(h->slice_type_nos == FF_I_TYPE);
4403 if(h->slice_type == FF_SI_TYPE && mb_type)
4404 mb_type--;
4405 decode_intra_mb:
4406 if(mb_type > 25){
4407 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4408 return -1;
4410 partition_count=0;
4411 cbp= i_mb_type_info[mb_type].cbp;
4412 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4413 mb_type= i_mb_type_info[mb_type].type;
4416 if(MB_FIELD)
4417 mb_type |= MB_TYPE_INTERLACED;
4419 h->slice_table[ mb_xy ]= h->slice_num;
4421 if(IS_INTRA_PCM(mb_type)){
4422 unsigned int x;
4424 // We assume these blocks are very rare so we do not optimize it.
4425 align_get_bits(&s->gb);
4427 // The pixels are stored in the same order as levels in h->mb array.
4428 for(x=0; x < (CHROMA ? 384 : 256); x++){
4429 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4432 // In deblocking, the quantizer is 0
4433 s->current_picture.qscale_table[mb_xy]= 0;
4434 // All coeffs are present
4435 memset(h->non_zero_count[mb_xy], 16, 16);
4437 s->current_picture.mb_type[mb_xy]= mb_type;
4438 return 0;
4441 if(MB_MBAFF){
4442 h->ref_count[0] <<= 1;
4443 h->ref_count[1] <<= 1;
4446 fill_caches(h, mb_type, 0);
4448 //mb_pred
4449 if(IS_INTRA(mb_type)){
4450 int pred_mode;
4451 // init_top_left_availability(h);
4452 if(IS_INTRA4x4(mb_type)){
4453 int i;
4454 int di = 1;
4455 if(dct8x8_allowed && get_bits1(&s->gb)){
4456 mb_type |= MB_TYPE_8x8DCT;
4457 di = 4;
4460 // fill_intra4x4_pred_table(h);
4461 for(i=0; i<16; i+=di){
4462 int mode= pred_intra_mode(h, i);
4464 if(!get_bits1(&s->gb)){
4465 const int rem_mode= get_bits(&s->gb, 3);
4466 mode = rem_mode + (rem_mode >= mode);
4469 if(di==4)
4470 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4471 else
4472 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4474 write_back_intra_pred_mode(h);
4475 if( check_intra4x4_pred_mode(h) < 0)
4476 return -1;
4477 }else{
4478 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4479 if(h->intra16x16_pred_mode < 0)
4480 return -1;
4482 if(CHROMA){
4483 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4484 if(pred_mode < 0)
4485 return -1;
4486 h->chroma_pred_mode= pred_mode;
4488 }else if(partition_count==4){
4489 int i, j, sub_partition_count[4], list, ref[2][4];
4491 if(h->slice_type_nos == FF_B_TYPE){
4492 for(i=0; i<4; i++){
4493 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4494 if(h->sub_mb_type[i] >=13){
4495 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4496 return -1;
4498 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4499 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4501 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4502 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4503 pred_direct_motion(h, &mb_type);
4504 h->ref_cache[0][scan8[4]] =
4505 h->ref_cache[1][scan8[4]] =
4506 h->ref_cache[0][scan8[12]] =
4507 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4509 }else{
4510 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4511 for(i=0; i<4; i++){
4512 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4513 if(h->sub_mb_type[i] >=4){
4514 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4515 return -1;
4517 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4518 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4522 for(list=0; list<h->list_count; list++){
4523 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4524 for(i=0; i<4; i++){
4525 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4526 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4527 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4528 if(tmp>=ref_count){
4529 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4530 return -1;
4532 ref[list][i]= tmp;
4533 }else{
4534 //FIXME
4535 ref[list][i] = -1;
4540 if(dct8x8_allowed)
4541 dct8x8_allowed = get_dct8x8_allowed(h);
4543 for(list=0; list<h->list_count; list++){
4544 for(i=0; i<4; i++){
4545 if(IS_DIRECT(h->sub_mb_type[i])) {
4546 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4547 continue;
4549 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4550 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4552 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4553 const int sub_mb_type= h->sub_mb_type[i];
4554 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4555 for(j=0; j<sub_partition_count[i]; j++){
4556 int mx, my;
4557 const int index= 4*i + block_width*j;
4558 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4559 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4560 mx += get_se_golomb(&s->gb);
4561 my += get_se_golomb(&s->gb);
4562 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4564 if(IS_SUB_8X8(sub_mb_type)){
4565 mv_cache[ 1 ][0]=
4566 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4567 mv_cache[ 1 ][1]=
4568 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4569 }else if(IS_SUB_8X4(sub_mb_type)){
4570 mv_cache[ 1 ][0]= mx;
4571 mv_cache[ 1 ][1]= my;
4572 }else if(IS_SUB_4X8(sub_mb_type)){
4573 mv_cache[ 8 ][0]= mx;
4574 mv_cache[ 8 ][1]= my;
4576 mv_cache[ 0 ][0]= mx;
4577 mv_cache[ 0 ][1]= my;
4579 }else{
4580 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4581 p[0] = p[1]=
4582 p[8] = p[9]= 0;
4586 }else if(IS_DIRECT(mb_type)){
4587 pred_direct_motion(h, &mb_type);
4588 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4589 }else{
4590 int list, mx, my, i;
4591 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4592 if(IS_16X16(mb_type)){
4593 for(list=0; list<h->list_count; list++){
4594 unsigned int val;
4595 if(IS_DIR(mb_type, 0, list)){
4596 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4597 if(val >= h->ref_count[list]){
4598 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4599 return -1;
4601 }else
4602 val= LIST_NOT_USED&0xFF;
4603 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4605 for(list=0; list<h->list_count; list++){
4606 unsigned int val;
4607 if(IS_DIR(mb_type, 0, list)){
4608 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4609 mx += get_se_golomb(&s->gb);
4610 my += get_se_golomb(&s->gb);
4611 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4613 val= pack16to32(mx,my);
4614 }else
4615 val=0;
4616 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4619 else if(IS_16X8(mb_type)){
4620 for(list=0; list<h->list_count; list++){
4621 for(i=0; i<2; i++){
4622 unsigned int val;
4623 if(IS_DIR(mb_type, i, list)){
4624 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4625 if(val >= h->ref_count[list]){
4626 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4627 return -1;
4629 }else
4630 val= LIST_NOT_USED&0xFF;
4631 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4634 for(list=0; list<h->list_count; list++){
4635 for(i=0; i<2; i++){
4636 unsigned int val;
4637 if(IS_DIR(mb_type, i, list)){
4638 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4639 mx += get_se_golomb(&s->gb);
4640 my += get_se_golomb(&s->gb);
4641 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4643 val= pack16to32(mx,my);
4644 }else
4645 val=0;
4646 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4649 }else{
4650 assert(IS_8X16(mb_type));
4651 for(list=0; list<h->list_count; list++){
4652 for(i=0; i<2; i++){
4653 unsigned int val;
4654 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4655 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4656 if(val >= h->ref_count[list]){
4657 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4658 return -1;
4660 }else
4661 val= LIST_NOT_USED&0xFF;
4662 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4665 for(list=0; list<h->list_count; list++){
4666 for(i=0; i<2; i++){
4667 unsigned int val;
4668 if(IS_DIR(mb_type, i, list)){
4669 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4670 mx += get_se_golomb(&s->gb);
4671 my += get_se_golomb(&s->gb);
4672 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4674 val= pack16to32(mx,my);
4675 }else
4676 val=0;
4677 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4683 if(IS_INTER(mb_type))
4684 write_back_motion(h, mb_type);
4686 if(!IS_INTRA16x16(mb_type)){
4687 cbp= get_ue_golomb(&s->gb);
4688 if(cbp > 47){
4689 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4690 return -1;
4693 if(CHROMA){
4694 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4695 else cbp= golomb_to_inter_cbp [cbp];
4696 }else{
4697 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4698 else cbp= golomb_to_inter_cbp_gray[cbp];
4701 h->cbp = cbp;
4703 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4704 if(get_bits1(&s->gb)){
4705 mb_type |= MB_TYPE_8x8DCT;
4706 h->cbp_table[mb_xy]= cbp;
4709 s->current_picture.mb_type[mb_xy]= mb_type;
4711 if(cbp || IS_INTRA16x16(mb_type)){
4712 int i8x8, i4x4, chroma_idx;
4713 int dquant;
4714 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4715 const uint8_t *scan, *scan8x8, *dc_scan;
4717 // fill_non_zero_count_cache(h);
4719 if(IS_INTERLACED(mb_type)){
4720 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4721 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4722 dc_scan= luma_dc_field_scan;
4723 }else{
4724 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4725 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4726 dc_scan= luma_dc_zigzag_scan;
4729 dquant= get_se_golomb(&s->gb);
4731 if( dquant > 25 || dquant < -26 ){
4732 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4733 return -1;
4736 s->qscale += dquant;
4737 if(((unsigned)s->qscale) > 51){
4738 if(s->qscale<0) s->qscale+= 52;
4739 else s->qscale-= 52;
4742 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4743 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4744 if(IS_INTRA16x16(mb_type)){
4745 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4746 return -1; //FIXME continue if partitioned and other return -1 too
4749 assert((cbp&15) == 0 || (cbp&15) == 15);
4751 if(cbp&15){
4752 for(i8x8=0; i8x8<4; i8x8++){
4753 for(i4x4=0; i4x4<4; i4x4++){
4754 const int index= i4x4 + 4*i8x8;
4755 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4756 return -1;
4760 }else{
4761 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4763 }else{
4764 for(i8x8=0; i8x8<4; i8x8++){
4765 if(cbp & (1<<i8x8)){
4766 if(IS_8x8DCT(mb_type)){
4767 DCTELEM *buf = &h->mb[64*i8x8];
4768 uint8_t *nnz;
4769 for(i4x4=0; i4x4<4; i4x4++){
4770 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4771 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4772 return -1;
4774 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4775 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4776 }else{
4777 for(i4x4=0; i4x4<4; i4x4++){
4778 const int index= i4x4 + 4*i8x8;
4780 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4781 return -1;
4785 }else{
4786 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4787 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4792 if(cbp&0x30){
4793 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4794 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4795 return -1;
4799 if(cbp&0x20){
4800 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4801 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4802 for(i4x4=0; i4x4<4; i4x4++){
4803 const int index= 16 + 4*chroma_idx + i4x4;
4804 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4805 return -1;
4809 }else{
4810 uint8_t * const nnz= &h->non_zero_count_cache[0];
4811 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4812 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4814 }else{
4815 uint8_t * const nnz= &h->non_zero_count_cache[0];
4816 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4817 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4818 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4820 s->current_picture.qscale_table[mb_xy]= s->qscale;
4821 write_back_non_zero_count(h);
4823 if(MB_MBAFF){
4824 h->ref_count[0] >>= 1;
4825 h->ref_count[1] >>= 1;
4828 return 0;
4831 static int decode_cabac_field_decoding_flag(H264Context *h) {
4832 MpegEncContext * const s = &h->s;
4833 const int mb_x = s->mb_x;
4834 const int mb_y = s->mb_y & ~1;
4835 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4836 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4838 unsigned int ctx = 0;
4840 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4841 ctx += 1;
4843 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4844 ctx += 1;
4847 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4850 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4851 uint8_t *state= &h->cabac_state[ctx_base];
4852 int mb_type;
4854 if(intra_slice){
4855 MpegEncContext * const s = &h->s;
4856 const int mba_xy = h->left_mb_xy[0];
4857 const int mbb_xy = h->top_mb_xy;
4858 int ctx=0;
4859 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4860 ctx++;
4861 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4862 ctx++;
4863 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4864 return 0; /* I4x4 */
4865 state += 2;
4866 }else{
4867 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4868 return 0; /* I4x4 */
4871 if( get_cabac_terminate( &h->cabac ) )
4872 return 25; /* PCM */
4874 mb_type = 1; /* I16x16 */
4875 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4876 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4877 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4878 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4879 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4880 return mb_type;
4883 static int decode_cabac_mb_type( H264Context *h ) {
4884 MpegEncContext * const s = &h->s;
4886 if( h->slice_type_nos == FF_I_TYPE ) {
4887 return decode_cabac_intra_mb_type(h, 3, 1);
4888 } else if( h->slice_type_nos == FF_P_TYPE ) {
4889 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4890 /* P-type */
4891 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4892 /* P_L0_D16x16, P_8x8 */
4893 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4894 } else {
4895 /* P_L0_D8x16, P_L0_D16x8 */
4896 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4898 } else {
4899 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4901 } else if( h->slice_type_nos == FF_B_TYPE ) {
4902 const int mba_xy = h->left_mb_xy[0];
4903 const int mbb_xy = h->top_mb_xy;
4904 int ctx = 0;
4905 int bits;
4907 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4908 ctx++;
4909 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4910 ctx++;
4912 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4913 return 0; /* B_Direct_16x16 */
4915 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4916 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4919 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4920 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4921 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4922 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4923 if( bits < 8 )
4924 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4925 else if( bits == 13 ) {
4926 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4927 } else if( bits == 14 )
4928 return 11; /* B_L1_L0_8x16 */
4929 else if( bits == 15 )
4930 return 22; /* B_8x8 */
4932 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4933 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4934 } else {
4935 /* TODO SI/SP frames? */
4936 return -1;
4940 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4941 MpegEncContext * const s = &h->s;
4942 int mba_xy, mbb_xy;
4943 int ctx = 0;
4945 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4946 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4947 mba_xy = mb_xy - 1;
4948 if( (mb_y&1)
4949 && h->slice_table[mba_xy] == h->slice_num
4950 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4951 mba_xy += s->mb_stride;
4952 if( MB_FIELD ){
4953 mbb_xy = mb_xy - s->mb_stride;
4954 if( !(mb_y&1)
4955 && h->slice_table[mbb_xy] == h->slice_num
4956 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4957 mbb_xy -= s->mb_stride;
4958 }else
4959 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4960 }else{
4961 int mb_xy = h->mb_xy;
4962 mba_xy = mb_xy - 1;
4963 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4966 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4967 ctx++;
4968 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4969 ctx++;
4971 if( h->slice_type_nos == FF_B_TYPE )
4972 ctx += 13;
4973 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4976 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4977 int mode = 0;
4979 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4980 return pred_mode;
4982 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4983 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4984 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4986 if( mode >= pred_mode )
4987 return mode + 1;
4988 else
4989 return mode;
4992 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4993 const int mba_xy = h->left_mb_xy[0];
4994 const int mbb_xy = h->top_mb_xy;
4996 int ctx = 0;
4998 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4999 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5000 ctx++;
5002 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5003 ctx++;
5005 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5006 return 0;
5008 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5009 return 1;
5010 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5011 return 2;
5012 else
5013 return 3;
5016 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5017 int cbp_b, cbp_a, ctx, cbp = 0;
5019 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5020 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5022 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5023 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5024 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5025 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5026 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5027 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5028 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5029 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5030 return cbp;
5032 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5033 int ctx;
5034 int cbp_a, cbp_b;
5036 cbp_a = (h->left_cbp>>4)&0x03;
5037 cbp_b = (h-> top_cbp>>4)&0x03;
5039 ctx = 0;
5040 if( cbp_a > 0 ) ctx++;
5041 if( cbp_b > 0 ) ctx += 2;
5042 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5043 return 0;
5045 ctx = 4;
5046 if( cbp_a == 2 ) ctx++;
5047 if( cbp_b == 2 ) ctx += 2;
5048 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5050 static int decode_cabac_mb_dqp( H264Context *h) {
5051 int ctx = 0;
5052 int val = 0;
5054 if( h->last_qscale_diff != 0 )
5055 ctx++;
5057 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5058 if( ctx < 2 )
5059 ctx = 2;
5060 else
5061 ctx = 3;
5062 val++;
5063 if(val > 102) //prevent infinite loop
5064 return INT_MIN;
5067 if( val&0x01 )
5068 return (val + 1)/2;
5069 else
5070 return -(val + 1)/2;
5072 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5073 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5074 return 0; /* 8x8 */
5075 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5076 return 1; /* 8x4 */
5077 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5078 return 2; /* 4x8 */
5079 return 3; /* 4x4 */
5081 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5082 int type;
5083 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5084 return 0; /* B_Direct_8x8 */
5085 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5086 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5087 type = 3;
5088 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5089 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5090 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5091 type += 4;
5093 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5094 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5095 return type;
5098 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5099 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5102 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5103 int refa = h->ref_cache[list][scan8[n] - 1];
5104 int refb = h->ref_cache[list][scan8[n] - 8];
5105 int ref = 0;
5106 int ctx = 0;
5108 if( h->slice_type_nos == FF_B_TYPE) {
5109 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5110 ctx++;
5111 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5112 ctx += 2;
5113 } else {
5114 if( refa > 0 )
5115 ctx++;
5116 if( refb > 0 )
5117 ctx += 2;
5120 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5121 ref++;
5122 if( ctx < 4 )
5123 ctx = 4;
5124 else
5125 ctx = 5;
5126 if(ref >= 32 /*h->ref_list[list]*/){
5127 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5128 return 0; //FIXME we should return -1 and check the return everywhere
5131 return ref;
5134 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5135 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5136 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5137 int ctxbase = (l == 0) ? 40 : 47;
5138 int ctx, mvd;
5140 if( amvd < 3 )
5141 ctx = 0;
5142 else if( amvd > 32 )
5143 ctx = 2;
5144 else
5145 ctx = 1;
5147 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5148 return 0;
5150 mvd= 1;
5151 ctx= 3;
5152 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5153 mvd++;
5154 if( ctx < 6 )
5155 ctx++;
5158 if( mvd >= 9 ) {
5159 int k = 3;
5160 while( get_cabac_bypass( &h->cabac ) ) {
5161 mvd += 1 << k;
5162 k++;
5163 if(k>24){
5164 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5165 return INT_MIN;
5168 while( k-- ) {
5169 if( get_cabac_bypass( &h->cabac ) )
5170 mvd += 1 << k;
5173 return get_cabac_bypass_sign( &h->cabac, -mvd );
5176 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5177 int nza, nzb;
5178 int ctx = 0;
5180 if( is_dc ) {
5181 if( cat == 0 ) {
5182 nza = h->left_cbp&0x100;
5183 nzb = h-> top_cbp&0x100;
5184 } else {
5185 nza = (h->left_cbp>>(6+idx))&0x01;
5186 nzb = (h-> top_cbp>>(6+idx))&0x01;
5188 } else {
5189 if( cat == 4 ) {
5190 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5191 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5192 } else {
5193 assert(cat == 1 || cat == 2);
5194 nza = h->non_zero_count_cache[scan8[idx] - 1];
5195 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5199 if( nza > 0 )
5200 ctx++;
5202 if( nzb > 0 )
5203 ctx += 2;
5205 return ctx + 4 * cat;
5208 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5209 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5210 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5211 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5212 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5215 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5216 static const int significant_coeff_flag_offset[2][6] = {
5217 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5218 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5220 static const int last_coeff_flag_offset[2][6] = {
5221 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5222 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5224 static const int coeff_abs_level_m1_offset[6] = {
5225 227+0, 227+10, 227+20, 227+30, 227+39, 426
5227 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5228 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5229 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5230 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5231 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5232 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5233 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5234 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5235 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5237 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5238 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5239 * map node ctx => cabac ctx for level=1 */
5240 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5241 /* map node ctx => cabac ctx for level>1 */
5242 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5243 static const uint8_t coeff_abs_level_transition[2][8] = {
5244 /* update node ctx after decoding a level=1 */
5245 { 1, 2, 3, 3, 4, 5, 6, 7 },
5246 /* update node ctx after decoding a level>1 */
5247 { 4, 4, 4, 4, 5, 6, 7, 7 }
5250 int index[64];
5252 int av_unused last;
5253 int coeff_count = 0;
5254 int node_ctx = 0;
5256 uint8_t *significant_coeff_ctx_base;
5257 uint8_t *last_coeff_ctx_base;
5258 uint8_t *abs_level_m1_ctx_base;
5260 #ifndef ARCH_X86
5261 #define CABAC_ON_STACK
5262 #endif
5263 #ifdef CABAC_ON_STACK
5264 #define CC &cc
5265 CABACContext cc;
5266 cc.range = h->cabac.range;
5267 cc.low = h->cabac.low;
5268 cc.bytestream= h->cabac.bytestream;
5269 #else
5270 #define CC &h->cabac
5271 #endif
5274 /* cat: 0-> DC 16x16 n = 0
5275 * 1-> AC 16x16 n = luma4x4idx
5276 * 2-> Luma4x4 n = luma4x4idx
5277 * 3-> DC Chroma n = iCbCr
5278 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5279 * 5-> Luma8x8 n = 4 * luma8x8idx
5282 /* read coded block flag */
5283 if( is_dc || cat != 5 ) {
5284 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5285 if( !is_dc ) {
5286 if( cat == 4 )
5287 h->non_zero_count_cache[scan8[16+n]] = 0;
5288 else
5289 h->non_zero_count_cache[scan8[n]] = 0;
5292 #ifdef CABAC_ON_STACK
5293 h->cabac.range = cc.range ;
5294 h->cabac.low = cc.low ;
5295 h->cabac.bytestream= cc.bytestream;
5296 #endif
5297 return;
5301 significant_coeff_ctx_base = h->cabac_state
5302 + significant_coeff_flag_offset[MB_FIELD][cat];
5303 last_coeff_ctx_base = h->cabac_state
5304 + last_coeff_flag_offset[MB_FIELD][cat];
5305 abs_level_m1_ctx_base = h->cabac_state
5306 + coeff_abs_level_m1_offset[cat];
5308 if( !is_dc && cat == 5 ) {
5309 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5310 for(last= 0; last < coefs; last++) { \
5311 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5312 if( get_cabac( CC, sig_ctx )) { \
5313 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5314 index[coeff_count++] = last; \
5315 if( get_cabac( CC, last_ctx ) ) { \
5316 last= max_coeff; \
5317 break; \
5321 if( last == max_coeff -1 ) {\
5322 index[coeff_count++] = last;\
5324 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5325 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5326 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5327 } else {
5328 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5329 #else
5330 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5331 } else {
5332 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5333 #endif
5335 assert(coeff_count > 0);
5337 if( is_dc ) {
5338 if( cat == 0 )
5339 h->cbp_table[h->mb_xy] |= 0x100;
5340 else
5341 h->cbp_table[h->mb_xy] |= 0x40 << n;
5342 } else {
5343 if( cat == 5 )
5344 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5345 else if( cat == 4 )
5346 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5347 else {
5348 assert( cat == 1 || cat == 2 );
5349 h->non_zero_count_cache[scan8[n]] = coeff_count;
5353 do {
5354 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5356 int j= scantable[index[--coeff_count]];
5358 if( get_cabac( CC, ctx ) == 0 ) {
5359 node_ctx = coeff_abs_level_transition[0][node_ctx];
5360 if( is_dc ) {
5361 block[j] = get_cabac_bypass_sign( CC, -1);
5362 }else{
5363 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5365 } else {
5366 int coeff_abs = 2;
5367 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5368 node_ctx = coeff_abs_level_transition[1][node_ctx];
5370 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5371 coeff_abs++;
5374 if( coeff_abs >= 15 ) {
5375 int j = 0;
5376 while( get_cabac_bypass( CC ) ) {
5377 j++;
5380 coeff_abs=1;
5381 while( j-- ) {
5382 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5384 coeff_abs+= 14;
5387 if( is_dc ) {
5388 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5389 }else{
5390 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5393 } while( coeff_count );
5394 #ifdef CABAC_ON_STACK
5395 h->cabac.range = cc.range ;
5396 h->cabac.low = cc.low ;
5397 h->cabac.bytestream= cc.bytestream;
5398 #endif
5402 #ifndef CONFIG_SMALL
5403 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5404 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5407 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5408 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5410 #endif
5412 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5413 #ifdef CONFIG_SMALL
5414 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5415 #else
5416 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5417 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5418 #endif
5421 static inline void compute_mb_neighbors(H264Context *h)
5423 MpegEncContext * const s = &h->s;
5424 const int mb_xy = h->mb_xy;
5425 h->top_mb_xy = mb_xy - s->mb_stride;
5426 h->left_mb_xy[0] = mb_xy - 1;
5427 if(FRAME_MBAFF){
5428 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5429 const int top_pair_xy = pair_xy - s->mb_stride;
5430 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5431 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5432 const int curr_mb_frame_flag = !MB_FIELD;
5433 const int bottom = (s->mb_y & 1);
5434 if (bottom
5435 ? !curr_mb_frame_flag // bottom macroblock
5436 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5438 h->top_mb_xy -= s->mb_stride;
5440 if (left_mb_frame_flag != curr_mb_frame_flag) {
5441 h->left_mb_xy[0] = pair_xy - 1;
5443 } else if (FIELD_PICTURE) {
5444 h->top_mb_xy -= s->mb_stride;
5446 return;
5450 * decodes a macroblock
5451 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5453 static int decode_mb_cabac(H264Context *h) {
5454 MpegEncContext * const s = &h->s;
5455 int mb_xy;
5456 int mb_type, partition_count, cbp = 0;
5457 int dct8x8_allowed= h->pps.transform_8x8_mode;
5459 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5461 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5463 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5464 if( h->slice_type_nos != FF_I_TYPE ) {
5465 int skip;
5466 /* a skipped mb needs the aff flag from the following mb */
5467 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5468 predict_field_decoding_flag(h);
5469 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5470 skip = h->next_mb_skipped;
5471 else
5472 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5473 /* read skip flags */
5474 if( skip ) {
5475 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5476 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5477 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5478 if(h->next_mb_skipped)
5479 predict_field_decoding_flag(h);
5480 else
5481 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5484 decode_mb_skip(h);
5486 h->cbp_table[mb_xy] = 0;
5487 h->chroma_pred_mode_table[mb_xy] = 0;
5488 h->last_qscale_diff = 0;
5490 return 0;
5494 if(FRAME_MBAFF){
5495 if( (s->mb_y&1) == 0 )
5496 h->mb_mbaff =
5497 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5500 h->prev_mb_skipped = 0;
5502 compute_mb_neighbors(h);
5503 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5504 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5505 return -1;
5508 if( h->slice_type_nos == FF_B_TYPE ) {
5509 if( mb_type < 23 ){
5510 partition_count= b_mb_type_info[mb_type].partition_count;
5511 mb_type= b_mb_type_info[mb_type].type;
5512 }else{
5513 mb_type -= 23;
5514 goto decode_intra_mb;
5516 } else if( h->slice_type_nos == FF_P_TYPE ) {
5517 if( mb_type < 5) {
5518 partition_count= p_mb_type_info[mb_type].partition_count;
5519 mb_type= p_mb_type_info[mb_type].type;
5520 } else {
5521 mb_type -= 5;
5522 goto decode_intra_mb;
5524 } else {
5525 if(h->slice_type == FF_SI_TYPE && mb_type)
5526 mb_type--;
5527 assert(h->slice_type_nos == FF_I_TYPE);
5528 decode_intra_mb:
5529 partition_count = 0;
5530 cbp= i_mb_type_info[mb_type].cbp;
5531 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5532 mb_type= i_mb_type_info[mb_type].type;
5534 if(MB_FIELD)
5535 mb_type |= MB_TYPE_INTERLACED;
5537 h->slice_table[ mb_xy ]= h->slice_num;
5539 if(IS_INTRA_PCM(mb_type)) {
5540 const uint8_t *ptr;
5542 // We assume these blocks are very rare so we do not optimize it.
5543 // FIXME The two following lines get the bitstream position in the cabac
5544 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5545 ptr= h->cabac.bytestream;
5546 if(h->cabac.low&0x1) ptr--;
5547 if(CABAC_BITS==16){
5548 if(h->cabac.low&0x1FF) ptr--;
5551 // The pixels are stored in the same order as levels in h->mb array.
5552 memcpy(h->mb, ptr, 256); ptr+=256;
5553 if(CHROMA){
5554 memcpy(h->mb+128, ptr, 128); ptr+=128;
5557 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5559 // All blocks are present
5560 h->cbp_table[mb_xy] = 0x1ef;
5561 h->chroma_pred_mode_table[mb_xy] = 0;
5562 // In deblocking, the quantizer is 0
5563 s->current_picture.qscale_table[mb_xy]= 0;
5564 // All coeffs are present
5565 memset(h->non_zero_count[mb_xy], 16, 16);
5566 s->current_picture.mb_type[mb_xy]= mb_type;
5567 h->last_qscale_diff = 0;
5568 return 0;
5571 if(MB_MBAFF){
5572 h->ref_count[0] <<= 1;
5573 h->ref_count[1] <<= 1;
5576 fill_caches(h, mb_type, 0);
5578 if( IS_INTRA( mb_type ) ) {
5579 int i, pred_mode;
5580 if( IS_INTRA4x4( mb_type ) ) {
5581 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5582 mb_type |= MB_TYPE_8x8DCT;
5583 for( i = 0; i < 16; i+=4 ) {
5584 int pred = pred_intra_mode( h, i );
5585 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5586 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5588 } else {
5589 for( i = 0; i < 16; i++ ) {
5590 int pred = pred_intra_mode( h, i );
5591 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5593 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5596 write_back_intra_pred_mode(h);
5597 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5598 } else {
5599 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5600 if( h->intra16x16_pred_mode < 0 ) return -1;
5602 if(CHROMA){
5603 h->chroma_pred_mode_table[mb_xy] =
5604 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5606 pred_mode= check_intra_pred_mode( h, pred_mode );
5607 if( pred_mode < 0 ) return -1;
5608 h->chroma_pred_mode= pred_mode;
5610 } else if( partition_count == 4 ) {
5611 int i, j, sub_partition_count[4], list, ref[2][4];
5613 if( h->slice_type_nos == FF_B_TYPE ) {
5614 for( i = 0; i < 4; i++ ) {
5615 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5616 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5617 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5619 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5620 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5621 pred_direct_motion(h, &mb_type);
5622 h->ref_cache[0][scan8[4]] =
5623 h->ref_cache[1][scan8[4]] =
5624 h->ref_cache[0][scan8[12]] =
5625 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5626 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5627 for( i = 0; i < 4; i++ )
5628 if( IS_DIRECT(h->sub_mb_type[i]) )
5629 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5632 } else {
5633 for( i = 0; i < 4; i++ ) {
5634 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5635 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5636 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5640 for( list = 0; list < h->list_count; list++ ) {
5641 for( i = 0; i < 4; i++ ) {
5642 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5643 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5644 if( h->ref_count[list] > 1 )
5645 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5646 else
5647 ref[list][i] = 0;
5648 } else {
5649 ref[list][i] = -1;
5651 h->ref_cache[list][ scan8[4*i]+1 ]=
5652 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5656 if(dct8x8_allowed)
5657 dct8x8_allowed = get_dct8x8_allowed(h);
5659 for(list=0; list<h->list_count; list++){
5660 for(i=0; i<4; i++){
5661 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5662 if(IS_DIRECT(h->sub_mb_type[i])){
5663 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5664 continue;
5667 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5668 const int sub_mb_type= h->sub_mb_type[i];
5669 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5670 for(j=0; j<sub_partition_count[i]; j++){
5671 int mpx, mpy;
5672 int mx, my;
5673 const int index= 4*i + block_width*j;
5674 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5675 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5676 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5678 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5679 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5680 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5682 if(IS_SUB_8X8(sub_mb_type)){
5683 mv_cache[ 1 ][0]=
5684 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5685 mv_cache[ 1 ][1]=
5686 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5688 mvd_cache[ 1 ][0]=
5689 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5690 mvd_cache[ 1 ][1]=
5691 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5692 }else if(IS_SUB_8X4(sub_mb_type)){
5693 mv_cache[ 1 ][0]= mx;
5694 mv_cache[ 1 ][1]= my;
5696 mvd_cache[ 1 ][0]= mx - mpx;
5697 mvd_cache[ 1 ][1]= my - mpy;
5698 }else if(IS_SUB_4X8(sub_mb_type)){
5699 mv_cache[ 8 ][0]= mx;
5700 mv_cache[ 8 ][1]= my;
5702 mvd_cache[ 8 ][0]= mx - mpx;
5703 mvd_cache[ 8 ][1]= my - mpy;
5705 mv_cache[ 0 ][0]= mx;
5706 mv_cache[ 0 ][1]= my;
5708 mvd_cache[ 0 ][0]= mx - mpx;
5709 mvd_cache[ 0 ][1]= my - mpy;
5711 }else{
5712 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5713 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5714 p[0] = p[1] = p[8] = p[9] = 0;
5715 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5719 } else if( IS_DIRECT(mb_type) ) {
5720 pred_direct_motion(h, &mb_type);
5721 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5722 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5723 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5724 } else {
5725 int list, mx, my, i, mpx, mpy;
5726 if(IS_16X16(mb_type)){
5727 for(list=0; list<h->list_count; list++){
5728 if(IS_DIR(mb_type, 0, list)){
5729 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5730 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5731 }else
5732 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5734 for(list=0; list<h->list_count; list++){
5735 if(IS_DIR(mb_type, 0, list)){
5736 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5738 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5739 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5740 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5742 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5743 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5744 }else
5745 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5748 else if(IS_16X8(mb_type)){
5749 for(list=0; list<h->list_count; list++){
5750 for(i=0; i<2; i++){
5751 if(IS_DIR(mb_type, i, list)){
5752 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5753 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5754 }else
5755 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5758 for(list=0; list<h->list_count; list++){
5759 for(i=0; i<2; i++){
5760 if(IS_DIR(mb_type, i, list)){
5761 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5762 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5763 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5764 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5766 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5767 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5768 }else{
5769 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5770 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5774 }else{
5775 assert(IS_8X16(mb_type));
5776 for(list=0; list<h->list_count; list++){
5777 for(i=0; i<2; i++){
5778 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5779 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5780 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5781 }else
5782 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5785 for(list=0; list<h->list_count; list++){
5786 for(i=0; i<2; i++){
5787 if(IS_DIR(mb_type, i, list)){
5788 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5789 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5790 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5792 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5793 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5794 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5795 }else{
5796 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5797 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5804 if( IS_INTER( mb_type ) ) {
5805 h->chroma_pred_mode_table[mb_xy] = 0;
5806 write_back_motion( h, mb_type );
5809 if( !IS_INTRA16x16( mb_type ) ) {
5810 cbp = decode_cabac_mb_cbp_luma( h );
5811 if(CHROMA)
5812 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5815 h->cbp_table[mb_xy] = h->cbp = cbp;
5817 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5818 if( decode_cabac_mb_transform_size( h ) )
5819 mb_type |= MB_TYPE_8x8DCT;
5821 s->current_picture.mb_type[mb_xy]= mb_type;
5823 if( cbp || IS_INTRA16x16( mb_type ) ) {
5824 const uint8_t *scan, *scan8x8, *dc_scan;
5825 const uint32_t *qmul;
5826 int dqp;
5828 if(IS_INTERLACED(mb_type)){
5829 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5830 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5831 dc_scan= luma_dc_field_scan;
5832 }else{
5833 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5834 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5835 dc_scan= luma_dc_zigzag_scan;
5838 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5839 if( dqp == INT_MIN ){
5840 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5841 return -1;
5843 s->qscale += dqp;
5844 if(((unsigned)s->qscale) > 51){
5845 if(s->qscale<0) s->qscale+= 52;
5846 else s->qscale-= 52;
5848 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5849 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5851 if( IS_INTRA16x16( mb_type ) ) {
5852 int i;
5853 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5854 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5856 if( cbp&15 ) {
5857 qmul = h->dequant4_coeff[0][s->qscale];
5858 for( i = 0; i < 16; i++ ) {
5859 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5860 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5862 } else {
5863 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5865 } else {
5866 int i8x8, i4x4;
5867 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5868 if( cbp & (1<<i8x8) ) {
5869 if( IS_8x8DCT(mb_type) ) {
5870 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5871 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5872 } else {
5873 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5874 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5875 const int index = 4*i8x8 + i4x4;
5876 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5877 //START_TIMER
5878 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5879 //STOP_TIMER("decode_residual")
5882 } else {
5883 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5884 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5889 if( cbp&0x30 ){
5890 int c;
5891 for( c = 0; c < 2; c++ ) {
5892 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5893 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5897 if( cbp&0x20 ) {
5898 int c, i;
5899 for( c = 0; c < 2; c++ ) {
5900 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5901 for( i = 0; i < 4; i++ ) {
5902 const int index = 16 + 4 * c + i;
5903 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5904 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5907 } else {
5908 uint8_t * const nnz= &h->non_zero_count_cache[0];
5909 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5910 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5912 } else {
5913 uint8_t * const nnz= &h->non_zero_count_cache[0];
5914 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5915 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5916 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5917 h->last_qscale_diff = 0;
5920 s->current_picture.qscale_table[mb_xy]= s->qscale;
5921 write_back_non_zero_count(h);
5923 if(MB_MBAFF){
5924 h->ref_count[0] >>= 1;
5925 h->ref_count[1] >>= 1;
5928 return 0;
5932 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5933 int i, d;
5934 const int index_a = qp + h->slice_alpha_c0_offset;
5935 const int alpha = (alpha_table+52)[index_a];
5936 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5938 if( bS[0] < 4 ) {
5939 int8_t tc[4];
5940 for(i=0; i<4; i++)
5941 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5942 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5943 } else {
5944 /* 16px edge length, because bS=4 is triggered by being at
5945 * the edge of an intra MB, so all 4 bS are the same */
5946 for( d = 0; d < 16; d++ ) {
5947 const int p0 = pix[-1];
5948 const int p1 = pix[-2];
5949 const int p2 = pix[-3];
5951 const int q0 = pix[0];
5952 const int q1 = pix[1];
5953 const int q2 = pix[2];
5955 if( FFABS( p0 - q0 ) < alpha &&
5956 FFABS( p1 - p0 ) < beta &&
5957 FFABS( q1 - q0 ) < beta ) {
5959 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5960 if( FFABS( p2 - p0 ) < beta)
5962 const int p3 = pix[-4];
5963 /* p0', p1', p2' */
5964 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5965 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5966 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5967 } else {
5968 /* p0' */
5969 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5971 if( FFABS( q2 - q0 ) < beta)
5973 const int q3 = pix[3];
5974 /* q0', q1', q2' */
5975 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5976 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5977 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5978 } else {
5979 /* q0' */
5980 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5982 }else{
5983 /* p0', q0' */
5984 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5985 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5987 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5989 pix += stride;
5993 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5994 int i;
5995 const int index_a = qp + h->slice_alpha_c0_offset;
5996 const int alpha = (alpha_table+52)[index_a];
5997 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5999 if( bS[0] < 4 ) {
6000 int8_t tc[4];
6001 for(i=0; i<4; i++)
6002 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6003 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6004 } else {
6005 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6009 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6010 int i;
6011 for( i = 0; i < 16; i++, pix += stride) {
6012 int index_a;
6013 int alpha;
6014 int beta;
6016 int qp_index;
6017 int bS_index = (i >> 1);
6018 if (!MB_FIELD) {
6019 bS_index &= ~1;
6020 bS_index |= (i & 1);
6023 if( bS[bS_index] == 0 ) {
6024 continue;
6027 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6028 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6029 alpha = (alpha_table+52)[index_a];
6030 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6032 if( bS[bS_index] < 4 ) {
6033 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6034 const int p0 = pix[-1];
6035 const int p1 = pix[-2];
6036 const int p2 = pix[-3];
6037 const int q0 = pix[0];
6038 const int q1 = pix[1];
6039 const int q2 = pix[2];
6041 if( FFABS( p0 - q0 ) < alpha &&
6042 FFABS( p1 - p0 ) < beta &&
6043 FFABS( q1 - q0 ) < beta ) {
6044 int tc = tc0;
6045 int i_delta;
6047 if( FFABS( p2 - p0 ) < beta ) {
6048 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6049 tc++;
6051 if( FFABS( q2 - q0 ) < beta ) {
6052 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6053 tc++;
6056 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6057 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6058 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6059 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6061 }else{
6062 const int p0 = pix[-1];
6063 const int p1 = pix[-2];
6064 const int p2 = pix[-3];
6066 const int q0 = pix[0];
6067 const int q1 = pix[1];
6068 const int q2 = pix[2];
6070 if( FFABS( p0 - q0 ) < alpha &&
6071 FFABS( p1 - p0 ) < beta &&
6072 FFABS( q1 - q0 ) < beta ) {
6074 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6075 if( FFABS( p2 - p0 ) < beta)
6077 const int p3 = pix[-4];
6078 /* p0', p1', p2' */
6079 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6080 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6081 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6082 } else {
6083 /* p0' */
6084 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6086 if( FFABS( q2 - q0 ) < beta)
6088 const int q3 = pix[3];
6089 /* q0', q1', q2' */
6090 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6091 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6092 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6093 } else {
6094 /* q0' */
6095 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6097 }else{
6098 /* p0', q0' */
6099 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6100 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6102 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6107 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6108 int i;
6109 for( i = 0; i < 8; i++, pix += stride) {
6110 int index_a;
6111 int alpha;
6112 int beta;
6114 int qp_index;
6115 int bS_index = i;
6117 if( bS[bS_index] == 0 ) {
6118 continue;
6121 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6122 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6123 alpha = (alpha_table+52)[index_a];
6124 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6126 if( bS[bS_index] < 4 ) {
6127 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6128 const int p0 = pix[-1];
6129 const int p1 = pix[-2];
6130 const int q0 = pix[0];
6131 const int q1 = pix[1];
6133 if( FFABS( p0 - q0 ) < alpha &&
6134 FFABS( p1 - p0 ) < beta &&
6135 FFABS( q1 - q0 ) < beta ) {
6136 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6138 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6139 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6140 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6142 }else{
6143 const int p0 = pix[-1];
6144 const int p1 = pix[-2];
6145 const int q0 = pix[0];
6146 const int q1 = pix[1];
6148 if( FFABS( p0 - q0 ) < alpha &&
6149 FFABS( p1 - p0 ) < beta &&
6150 FFABS( q1 - q0 ) < beta ) {
6152 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6153 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6154 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6160 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6161 int i, d;
6162 const int index_a = qp + h->slice_alpha_c0_offset;
6163 const int alpha = (alpha_table+52)[index_a];
6164 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6165 const int pix_next = stride;
6167 if( bS[0] < 4 ) {
6168 int8_t tc[4];
6169 for(i=0; i<4; i++)
6170 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6171 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6172 } else {
6173 /* 16px edge length, see filter_mb_edgev */
6174 for( d = 0; d < 16; d++ ) {
6175 const int p0 = pix[-1*pix_next];
6176 const int p1 = pix[-2*pix_next];
6177 const int p2 = pix[-3*pix_next];
6178 const int q0 = pix[0];
6179 const int q1 = pix[1*pix_next];
6180 const int q2 = pix[2*pix_next];
6182 if( FFABS( p0 - q0 ) < alpha &&
6183 FFABS( p1 - p0 ) < beta &&
6184 FFABS( q1 - q0 ) < beta ) {
6186 const int p3 = pix[-4*pix_next];
6187 const int q3 = pix[ 3*pix_next];
6189 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6190 if( FFABS( p2 - p0 ) < beta) {
6191 /* p0', p1', p2' */
6192 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6193 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6194 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6195 } else {
6196 /* p0' */
6197 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6199 if( FFABS( q2 - q0 ) < beta) {
6200 /* q0', q1', q2' */
6201 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6202 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6203 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6204 } else {
6205 /* q0' */
6206 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6208 }else{
6209 /* p0', q0' */
6210 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6211 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6213 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6215 pix++;
6220 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6221 int i;
6222 const int index_a = qp + h->slice_alpha_c0_offset;
6223 const int alpha = (alpha_table+52)[index_a];
6224 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6226 if( bS[0] < 4 ) {
6227 int8_t tc[4];
6228 for(i=0; i<4; i++)
6229 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6230 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6231 } else {
6232 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6236 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6237 MpegEncContext * const s = &h->s;
6238 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6239 int mb_xy, mb_type;
6240 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6242 mb_xy = h->mb_xy;
6244 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6245 1 ||
6246 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6247 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6248 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6249 return;
6251 assert(!FRAME_MBAFF);
6253 mb_type = s->current_picture.mb_type[mb_xy];
6254 qp = s->current_picture.qscale_table[mb_xy];
6255 qp0 = s->current_picture.qscale_table[mb_xy-1];
6256 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6257 qpc = get_chroma_qp( h, 0, qp );
6258 qpc0 = get_chroma_qp( h, 0, qp0 );
6259 qpc1 = get_chroma_qp( h, 0, qp1 );
6260 qp0 = (qp + qp0 + 1) >> 1;
6261 qp1 = (qp + qp1 + 1) >> 1;
6262 qpc0 = (qpc + qpc0 + 1) >> 1;
6263 qpc1 = (qpc + qpc1 + 1) >> 1;
6264 qp_thresh = 15 - h->slice_alpha_c0_offset;
6265 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6266 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6267 return;
6269 if( IS_INTRA(mb_type) ) {
6270 int16_t bS4[4] = {4,4,4,4};
6271 int16_t bS3[4] = {3,3,3,3};
6272 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6273 if( IS_8x8DCT(mb_type) ) {
6274 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6275 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6276 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6277 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6278 } else {
6279 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6280 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6281 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6282 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6283 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6284 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6285 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6286 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6288 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6289 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6290 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6291 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6292 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6293 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6294 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6295 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6296 return;
6297 } else {
6298 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6299 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6300 int edges;
6301 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6302 edges = 4;
6303 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6304 } else {
6305 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6306 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6307 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6308 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6309 ? 3 : 0;
6310 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6311 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6312 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6313 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6315 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6316 bSv[0][0] = 0x0004000400040004ULL;
6317 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6318 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6320 #define FILTER(hv,dir,edge)\
6321 if(bSv[dir][edge]) {\
6322 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6323 if(!(edge&1)) {\
6324 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6325 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6328 if( edges == 1 ) {
6329 FILTER(v,0,0);
6330 FILTER(h,1,0);
6331 } else if( IS_8x8DCT(mb_type) ) {
6332 FILTER(v,0,0);
6333 FILTER(v,0,2);
6334 FILTER(h,1,0);
6335 FILTER(h,1,2);
6336 } else {
6337 FILTER(v,0,0);
6338 FILTER(v,0,1);
6339 FILTER(v,0,2);
6340 FILTER(v,0,3);
6341 FILTER(h,1,0);
6342 FILTER(h,1,1);
6343 FILTER(h,1,2);
6344 FILTER(h,1,3);
6346 #undef FILTER
6350 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6351 MpegEncContext * const s = &h->s;
6352 const int mb_xy= mb_x + mb_y*s->mb_stride;
6353 const int mb_type = s->current_picture.mb_type[mb_xy];
6354 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6355 int first_vertical_edge_done = 0;
6356 int dir;
6358 //for sufficiently low qp, filtering wouldn't do anything
6359 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6360 if(!FRAME_MBAFF){
6361 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6362 int qp = s->current_picture.qscale_table[mb_xy];
6363 if(qp <= qp_thresh
6364 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6365 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6366 return;
6370 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6371 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6372 int top_type, left_type[2];
6373 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6374 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6375 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6377 if(IS_8x8DCT(top_type)){
6378 h->non_zero_count_cache[4+8*0]=
6379 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6380 h->non_zero_count_cache[6+8*0]=
6381 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6383 if(IS_8x8DCT(left_type[0])){
6384 h->non_zero_count_cache[3+8*1]=
6385 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6387 if(IS_8x8DCT(left_type[1])){
6388 h->non_zero_count_cache[3+8*3]=
6389 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6392 if(IS_8x8DCT(mb_type)){
6393 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6394 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6396 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6397 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6399 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6400 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6402 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6403 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6407 if (FRAME_MBAFF
6408 // left mb is in picture
6409 && h->slice_table[mb_xy-1] != 0xFFFF
6410 // and current and left pair do not have the same interlaced type
6411 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6412 // and left mb is in the same slice if deblocking_filter == 2
6413 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6414 /* First vertical edge is different in MBAFF frames
6415 * There are 8 different bS to compute and 2 different Qp
6417 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6418 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6419 int16_t bS[8];
6420 int qp[2];
6421 int bqp[2];
6422 int rqp[2];
6423 int mb_qp, mbn0_qp, mbn1_qp;
6424 int i;
6425 first_vertical_edge_done = 1;
6427 if( IS_INTRA(mb_type) )
6428 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6429 else {
6430 for( i = 0; i < 8; i++ ) {
6431 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6433 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6434 bS[i] = 4;
6435 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6436 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6437 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6439 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6440 bS[i] = 2;
6441 else
6442 bS[i] = 1;
6446 mb_qp = s->current_picture.qscale_table[mb_xy];
6447 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6448 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6449 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6450 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6451 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6452 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6453 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6454 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6455 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6456 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6457 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6458 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6460 /* Filter edge */
6461 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6462 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6463 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6464 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6465 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6467 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6468 for( dir = 0; dir < 2; dir++ )
6470 int edge;
6471 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6472 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6473 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6474 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
6475 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
6477 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6478 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6479 // how often to recheck mv-based bS when iterating between edges
6480 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6481 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6482 // how often to recheck mv-based bS when iterating along each edge
6483 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6485 if (first_vertical_edge_done) {
6486 start = 1;
6487 first_vertical_edge_done = 0;
6490 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6491 start = 1;
6493 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6494 && !IS_INTERLACED(mb_type)
6495 && IS_INTERLACED(mbm_type)
6497 // This is a special case in the norm where the filtering must
6498 // be done twice (one each of the field) even if we are in a
6499 // frame macroblock.
6501 static const int nnz_idx[4] = {4,5,6,3};
6502 unsigned int tmp_linesize = 2 * linesize;
6503 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6504 int mbn_xy = mb_xy - 2 * s->mb_stride;
6505 int qp;
6506 int i, j;
6507 int16_t bS[4];
6509 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6510 if( IS_INTRA(mb_type) ||
6511 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6512 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6513 } else {
6514 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6515 for( i = 0; i < 4; i++ ) {
6516 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6517 mbn_nnz[nnz_idx[i]] != 0 )
6518 bS[i] = 2;
6519 else
6520 bS[i] = 1;
6523 // Do not use s->qscale as luma quantizer because it has not the same
6524 // value in IPCM macroblocks.
6525 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6526 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6527 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6528 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6529 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6530 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6531 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6532 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6535 start = 1;
6538 /* Calculate bS */
6539 for( edge = start; edge < edges; edge++ ) {
6540 /* mbn_xy: neighbor macroblock */
6541 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6542 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6543 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6544 int16_t bS[4];
6545 int qp;
6547 if( (edge&1) && IS_8x8DCT(mb_type) )
6548 continue;
6550 if( IS_INTRA(mb_type) ||
6551 IS_INTRA(mbn_type) ) {
6552 int value;
6553 if (edge == 0) {
6554 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6555 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6557 value = 4;
6558 } else {
6559 value = 3;
6561 } else {
6562 value = 3;
6564 bS[0] = bS[1] = bS[2] = bS[3] = value;
6565 } else {
6566 int i, l;
6567 int mv_done;
6569 if( edge & mask_edge ) {
6570 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6571 mv_done = 1;
6573 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6574 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6575 mv_done = 1;
6577 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6578 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6579 int bn_idx= b_idx - (dir ? 8:1);
6580 int v = 0;
6582 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6583 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6584 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6585 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6588 if(h->slice_type_nos == FF_B_TYPE && v){
6589 v=0;
6590 for( l = 0; !v && l < 2; l++ ) {
6591 int ln= 1-l;
6592 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6593 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6594 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6598 bS[0] = bS[1] = bS[2] = bS[3] = v;
6599 mv_done = 1;
6601 else
6602 mv_done = 0;
6604 for( i = 0; i < 4; i++ ) {
6605 int x = dir == 0 ? edge : i;
6606 int y = dir == 0 ? i : edge;
6607 int b_idx= 8 + 4 + x + 8*y;
6608 int bn_idx= b_idx - (dir ? 8:1);
6610 if( h->non_zero_count_cache[b_idx] != 0 ||
6611 h->non_zero_count_cache[bn_idx] != 0 ) {
6612 bS[i] = 2;
6614 else if(!mv_done)
6616 bS[i] = 0;
6617 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6618 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6619 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6620 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6621 bS[i] = 1;
6622 break;
6626 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6627 bS[i] = 0;
6628 for( l = 0; l < 2; l++ ) {
6629 int ln= 1-l;
6630 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6631 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6632 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6633 bS[i] = 1;
6634 break;
6641 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6642 continue;
6645 /* Filter edge */
6646 // Do not use s->qscale as luma quantizer because it has not the same
6647 // value in IPCM macroblocks.
6648 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6649 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6650 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6651 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6652 if( dir == 0 ) {
6653 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6654 if( (edge&1) == 0 ) {
6655 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6656 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6657 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6658 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6660 } else {
6661 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6662 if( (edge&1) == 0 ) {
6663 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6664 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6665 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6666 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6673 static int decode_slice(struct AVCodecContext *avctx, void *arg){
6674 H264Context *h = *(void**)arg;
6675 MpegEncContext * const s = &h->s;
6676 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6678 s->mb_skip_run= -1;
6680 if( h->pps.cabac ) {
6681 int i;
6683 /* realign */
6684 align_get_bits( &s->gb );
6686 /* init cabac */
6687 ff_init_cabac_states( &h->cabac);
6688 ff_init_cabac_decoder( &h->cabac,
6689 s->gb.buffer + get_bits_count(&s->gb)/8,
6690 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6691 /* calculate pre-state */
6692 for( i= 0; i < 460; i++ ) {
6693 int pre;
6694 if( h->slice_type_nos == FF_I_TYPE )
6695 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6696 else
6697 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6699 if( pre <= 63 )
6700 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6701 else
6702 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6705 for(;;){
6706 //START_TIMER
6707 int ret = decode_mb_cabac(h);
6708 int eos;
6709 //STOP_TIMER("decode_mb_cabac")
6711 if(ret>=0) hl_decode_mb(h);
6713 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6714 s->mb_y++;
6716 if(ret>=0) ret = decode_mb_cabac(h);
6718 if(ret>=0) hl_decode_mb(h);
6719 s->mb_y--;
6721 eos = get_cabac_terminate( &h->cabac );
6723 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6724 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6725 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6726 return -1;
6729 if( ++s->mb_x >= s->mb_width ) {
6730 s->mb_x = 0;
6731 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6732 ++s->mb_y;
6733 if(FIELD_OR_MBAFF_PICTURE) {
6734 ++s->mb_y;
6738 if( eos || s->mb_y >= s->mb_height ) {
6739 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6740 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6741 return 0;
6745 } else {
6746 for(;;){
6747 int ret = decode_mb_cavlc(h);
6749 if(ret>=0) hl_decode_mb(h);
6751 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6752 s->mb_y++;
6753 ret = decode_mb_cavlc(h);
6755 if(ret>=0) hl_decode_mb(h);
6756 s->mb_y--;
6759 if(ret<0){
6760 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6761 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6763 return -1;
6766 if(++s->mb_x >= s->mb_width){
6767 s->mb_x=0;
6768 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6769 ++s->mb_y;
6770 if(FIELD_OR_MBAFF_PICTURE) {
6771 ++s->mb_y;
6773 if(s->mb_y >= s->mb_height){
6774 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6776 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6777 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6779 return 0;
6780 }else{
6781 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6783 return -1;
6788 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6789 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6790 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6791 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6793 return 0;
6794 }else{
6795 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6797 return -1;
6803 #if 0
6804 for(;s->mb_y < s->mb_height; s->mb_y++){
6805 for(;s->mb_x < s->mb_width; s->mb_x++){
6806 int ret= decode_mb(h);
6808 hl_decode_mb(h);
6810 if(ret<0){
6811 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6812 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6814 return -1;
6817 if(++s->mb_x >= s->mb_width){
6818 s->mb_x=0;
6819 if(++s->mb_y >= s->mb_height){
6820 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6821 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6823 return 0;
6824 }else{
6825 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6827 return -1;
6832 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6833 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6834 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6836 return 0;
6837 }else{
6838 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6840 return -1;
6844 s->mb_x=0;
6845 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6847 #endif
6848 return -1; //not reached
6851 static int decode_picture_timing(H264Context *h){
6852 MpegEncContext * const s = &h->s;
6853 if(h->sps.nal_hrd_parameters_present_flag || h->sps.vcl_hrd_parameters_present_flag){
6854 skip_bits(&s->gb, h->sps.cpb_removal_delay_length); /* cpb_removal_delay */
6855 skip_bits(&s->gb, h->sps.dpb_output_delay_length); /* dpb_output_delay */
6857 if(h->sps.pic_struct_present_flag){
6858 unsigned int i, num_clock_ts;
6859 h->sei_pic_struct = get_bits(&s->gb, 4);
6861 if (h->sei_pic_struct > SEI_PIC_STRUCT_FRAME_TRIPLING)
6862 return -1;
6864 num_clock_ts = sei_num_clock_ts_table[h->sei_pic_struct];
6866 for (i = 0 ; i < num_clock_ts ; i++){
6867 if(get_bits(&s->gb, 1)){ /* clock_timestamp_flag */
6868 unsigned int full_timestamp_flag;
6869 skip_bits(&s->gb, 2); /* ct_type */
6870 skip_bits(&s->gb, 1); /* nuit_field_based_flag */
6871 skip_bits(&s->gb, 5); /* counting_type */
6872 full_timestamp_flag = get_bits(&s->gb, 1);
6873 skip_bits(&s->gb, 1); /* discontinuity_flag */
6874 skip_bits(&s->gb, 1); /* cnt_dropped_flag */
6875 skip_bits(&s->gb, 8); /* n_frames */
6876 if(full_timestamp_flag){
6877 skip_bits(&s->gb, 6); /* seconds_value 0..59 */
6878 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6879 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6880 }else{
6881 if(get_bits(&s->gb, 1)){ /* seconds_flag */
6882 skip_bits(&s->gb, 6); /* seconds_value range 0..59 */
6883 if(get_bits(&s->gb, 1)){ /* minutes_flag */
6884 skip_bits(&s->gb, 6); /* minutes_value 0..59 */
6885 if(get_bits(&s->gb, 1)) /* hours_flag */
6886 skip_bits(&s->gb, 5); /* hours_value 0..23 */
6890 if(h->sps.time_offset_length > 0)
6891 skip_bits(&s->gb, h->sps.time_offset_length); /* time_offset */
6895 return 0;
6898 static int decode_unregistered_user_data(H264Context *h, int size){
6899 MpegEncContext * const s = &h->s;
6900 uint8_t user_data[16+256];
6901 int e, build, i;
6903 if(size<16)
6904 return -1;
6906 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6907 user_data[i]= get_bits(&s->gb, 8);
6910 user_data[i]= 0;
6911 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6912 if(e==1 && build>=0)
6913 h->x264_build= build;
6915 if(s->avctx->debug & FF_DEBUG_BUGS)
6916 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6918 for(; i<size; i++)
6919 skip_bits(&s->gb, 8);
6921 return 0;
6924 static int decode_sei(H264Context *h){
6925 MpegEncContext * const s = &h->s;
6927 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6928 int size, type;
6930 type=0;
6932 type+= show_bits(&s->gb, 8);
6933 }while(get_bits(&s->gb, 8) == 255);
6935 size=0;
6937 size+= show_bits(&s->gb, 8);
6938 }while(get_bits(&s->gb, 8) == 255);
6940 switch(type){
6941 case 1: // Picture timing SEI
6942 if(decode_picture_timing(h) < 0)
6943 return -1;
6944 break;
6945 case 5:
6946 if(decode_unregistered_user_data(h, size) < 0)
6947 return -1;
6948 break;
6949 default:
6950 skip_bits(&s->gb, 8*size);
6953 //FIXME check bits here
6954 align_get_bits(&s->gb);
6957 return 0;
6960 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6961 MpegEncContext * const s = &h->s;
6962 int cpb_count, i;
6963 cpb_count = get_ue_golomb(&s->gb) + 1;
6964 get_bits(&s->gb, 4); /* bit_rate_scale */
6965 get_bits(&s->gb, 4); /* cpb_size_scale */
6966 for(i=0; i<cpb_count; i++){
6967 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6968 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6969 get_bits1(&s->gb); /* cbr_flag */
6971 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6972 sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
6973 sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
6974 sps->time_offset_length = get_bits(&s->gb, 5);
6977 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6978 MpegEncContext * const s = &h->s;
6979 int aspect_ratio_info_present_flag;
6980 unsigned int aspect_ratio_idc;
6982 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6984 if( aspect_ratio_info_present_flag ) {
6985 aspect_ratio_idc= get_bits(&s->gb, 8);
6986 if( aspect_ratio_idc == EXTENDED_SAR ) {
6987 sps->sar.num= get_bits(&s->gb, 16);
6988 sps->sar.den= get_bits(&s->gb, 16);
6989 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6990 sps->sar= pixel_aspect[aspect_ratio_idc];
6991 }else{
6992 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6993 return -1;
6995 }else{
6996 sps->sar.num=
6997 sps->sar.den= 0;
6999 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7001 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7002 get_bits1(&s->gb); /* overscan_appropriate_flag */
7005 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7006 get_bits(&s->gb, 3); /* video_format */
7007 get_bits1(&s->gb); /* video_full_range_flag */
7008 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7009 get_bits(&s->gb, 8); /* colour_primaries */
7010 get_bits(&s->gb, 8); /* transfer_characteristics */
7011 get_bits(&s->gb, 8); /* matrix_coefficients */
7015 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7016 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7017 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7020 sps->timing_info_present_flag = get_bits1(&s->gb);
7021 if(sps->timing_info_present_flag){
7022 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7023 sps->time_scale = get_bits_long(&s->gb, 32);
7024 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7027 sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7028 if(sps->nal_hrd_parameters_present_flag)
7029 decode_hrd_parameters(h, sps);
7030 sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7031 if(sps->vcl_hrd_parameters_present_flag)
7032 decode_hrd_parameters(h, sps);
7033 if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
7034 get_bits1(&s->gb); /* low_delay_hrd_flag */
7035 sps->pic_struct_present_flag = get_bits1(&s->gb);
7037 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7038 if(sps->bitstream_restriction_flag){
7039 unsigned int num_reorder_frames;
7040 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7041 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7042 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7043 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7044 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7045 num_reorder_frames= get_ue_golomb(&s->gb);
7046 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7048 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7049 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7050 return -1;
7053 sps->num_reorder_frames= num_reorder_frames;
7056 return 0;
7059 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7060 const uint8_t *jvt_list, const uint8_t *fallback_list){
7061 MpegEncContext * const s = &h->s;
7062 int i, last = 8, next = 8;
7063 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7064 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7065 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7066 else
7067 for(i=0;i<size;i++){
7068 if(next)
7069 next = (last + get_se_golomb(&s->gb)) & 0xff;
7070 if(!i && !next){ /* matrix not written, we use the preset one */
7071 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7072 break;
7074 last = factors[scan[i]] = next ? next : last;
7078 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7079 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7080 MpegEncContext * const s = &h->s;
7081 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7082 const uint8_t *fallback[4] = {
7083 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7084 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7085 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7086 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7088 if(get_bits1(&s->gb)){
7089 sps->scaling_matrix_present |= is_sps;
7090 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7091 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7092 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7093 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7094 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7095 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7096 if(is_sps || pps->transform_8x8_mode){
7097 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7098 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7104 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7106 static void *
7107 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7108 const size_t size, const char *name)
7110 if(id>=max) {
7111 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7112 return NULL;
7115 if(!vec[id]) {
7116 vec[id] = av_mallocz(size);
7117 if(vec[id] == NULL)
7118 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7120 return vec[id];
7123 static inline int decode_seq_parameter_set(H264Context *h){
7124 MpegEncContext * const s = &h->s;
7125 int profile_idc, level_idc;
7126 unsigned int sps_id, tmp, mb_width, mb_height;
7127 int i;
7128 SPS *sps;
7130 profile_idc= get_bits(&s->gb, 8);
7131 get_bits1(&s->gb); //constraint_set0_flag
7132 get_bits1(&s->gb); //constraint_set1_flag
7133 get_bits1(&s->gb); //constraint_set2_flag
7134 get_bits1(&s->gb); //constraint_set3_flag
7135 get_bits(&s->gb, 4); // reserved
7136 level_idc= get_bits(&s->gb, 8);
7137 sps_id= get_ue_golomb(&s->gb);
7139 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7140 if(sps == NULL)
7141 return -1;
7143 sps->profile_idc= profile_idc;
7144 sps->level_idc= level_idc;
7146 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7147 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7148 sps->scaling_matrix_present = 0;
7150 if(sps->profile_idc >= 100){ //high profile
7151 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7152 if(sps->chroma_format_idc == 3)
7153 get_bits1(&s->gb); //residual_color_transform_flag
7154 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7155 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7156 sps->transform_bypass = get_bits1(&s->gb);
7157 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7158 }else{
7159 sps->chroma_format_idc= 1;
7162 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7163 sps->poc_type= get_ue_golomb(&s->gb);
7165 if(sps->poc_type == 0){ //FIXME #define
7166 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7167 } else if(sps->poc_type == 1){//FIXME #define
7168 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7169 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7170 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7171 tmp= get_ue_golomb(&s->gb);
7173 if(tmp >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7174 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7175 return -1;
7177 sps->poc_cycle_length= tmp;
7179 for(i=0; i<sps->poc_cycle_length; i++)
7180 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7181 }else if(sps->poc_type != 2){
7182 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7183 return -1;
7186 tmp= get_ue_golomb(&s->gb);
7187 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7188 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7189 return -1;
7191 sps->ref_frame_count= tmp;
7192 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7193 mb_width= get_ue_golomb(&s->gb) + 1;
7194 mb_height= get_ue_golomb(&s->gb) + 1;
7195 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7196 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7197 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7198 return -1;
7200 sps->mb_width = mb_width;
7201 sps->mb_height= mb_height;
7203 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7204 if(!sps->frame_mbs_only_flag)
7205 sps->mb_aff= get_bits1(&s->gb);
7206 else
7207 sps->mb_aff= 0;
7209 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7211 #ifndef ALLOW_INTERLACE
7212 if(sps->mb_aff)
7213 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7214 #endif
7215 sps->crop= get_bits1(&s->gb);
7216 if(sps->crop){
7217 sps->crop_left = get_ue_golomb(&s->gb);
7218 sps->crop_right = get_ue_golomb(&s->gb);
7219 sps->crop_top = get_ue_golomb(&s->gb);
7220 sps->crop_bottom= get_ue_golomb(&s->gb);
7221 if(sps->crop_left || sps->crop_top){
7222 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7224 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7225 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7227 }else{
7228 sps->crop_left =
7229 sps->crop_right =
7230 sps->crop_top =
7231 sps->crop_bottom= 0;
7234 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7235 if( sps->vui_parameters_present_flag )
7236 decode_vui_parameters(h, sps);
7238 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7239 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7240 sps_id, sps->profile_idc, sps->level_idc,
7241 sps->poc_type,
7242 sps->ref_frame_count,
7243 sps->mb_width, sps->mb_height,
7244 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7245 sps->direct_8x8_inference_flag ? "8B8" : "",
7246 sps->crop_left, sps->crop_right,
7247 sps->crop_top, sps->crop_bottom,
7248 sps->vui_parameters_present_flag ? "VUI" : "",
7249 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7253 if (decode_postinit(h, sps) < 0)
7254 return -1;
7256 return 0;
7259 static void
7260 build_qp_table(PPS *pps, int t, int index)
7262 int i;
7263 for(i = 0; i < 52; i++)
7264 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7267 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7268 MpegEncContext * const s = &h->s;
7269 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7270 PPS *pps;
7272 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7273 if(pps == NULL)
7274 return -1;
7276 tmp= get_ue_golomb(&s->gb);
7277 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7278 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7279 return -1;
7281 pps->sps_id= tmp;
7283 pps->cabac= get_bits1(&s->gb);
7284 pps->pic_order_present= get_bits1(&s->gb);
7285 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7286 if(pps->slice_group_count > 1 ){
7287 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7288 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7289 switch(pps->mb_slice_group_map_type){
7290 case 0:
7291 #if 0
7292 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7293 | run_length[ i ] |1 |ue(v) |
7294 #endif
7295 break;
7296 case 2:
7297 #if 0
7298 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7299 |{ | | |
7300 | top_left_mb[ i ] |1 |ue(v) |
7301 | bottom_right_mb[ i ] |1 |ue(v) |
7302 | } | | |
7303 #endif
7304 break;
7305 case 3:
7306 case 4:
7307 case 5:
7308 #if 0
7309 | slice_group_change_direction_flag |1 |u(1) |
7310 | slice_group_change_rate_minus1 |1 |ue(v) |
7311 #endif
7312 break;
7313 case 6:
7314 #if 0
7315 | slice_group_id_cnt_minus1 |1 |ue(v) |
7316 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7317 |) | | |
7318 | slice_group_id[ i ] |1 |u(v) |
7319 #endif
7320 break;
7323 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7324 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7325 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7326 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7327 pps->ref_count[0]= pps->ref_count[1]= 1;
7328 return -1;
7331 pps->weighted_pred= get_bits1(&s->gb);
7332 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7333 pps->init_qp= get_se_golomb(&s->gb) + 26;
7334 pps->init_qs= get_se_golomb(&s->gb) + 26;
7335 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7336 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7337 pps->constrained_intra_pred= get_bits1(&s->gb);
7338 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7340 pps->transform_8x8_mode= 0;
7341 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7342 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7343 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7345 if(get_bits_count(&s->gb) < bit_length){
7346 pps->transform_8x8_mode= get_bits1(&s->gb);
7347 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7348 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7349 } else {
7350 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7353 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7354 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7355 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7356 h->pps.chroma_qp_diff= 1;
7358 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7359 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7360 pps_id, pps->sps_id,
7361 pps->cabac ? "CABAC" : "CAVLC",
7362 pps->slice_group_count,
7363 pps->ref_count[0], pps->ref_count[1],
7364 pps->weighted_pred ? "weighted" : "",
7365 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7366 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7367 pps->constrained_intra_pred ? "CONSTR" : "",
7368 pps->redundant_pic_cnt_present ? "REDU" : "",
7369 pps->transform_8x8_mode ? "8x8DCT" : ""
7373 return 0;
7377 * Call decode_slice() for each context.
7379 * @param h h264 master context
7380 * @param context_count number of contexts to execute
7382 static void execute_decode_slices(H264Context *h, int context_count){
7383 MpegEncContext * const s = &h->s;
7384 AVCodecContext * const avctx= s->avctx;
7385 H264Context *hx;
7386 int i;
7388 if(avctx->vdpau_acceleration) {
7389 return;
7390 } else
7391 if(context_count == 1) {
7392 decode_slice(avctx, &h);
7393 } else {
7394 for(i = 1; i < context_count; i++) {
7395 hx = h->thread_context[i];
7396 hx->s.error_recognition = avctx->error_recognition;
7397 hx->s.error_count = 0;
7400 avctx->execute(avctx, (void *)decode_slice,
7401 (void **)h->thread_context, NULL, context_count, sizeof(void*));
7403 /* pull back stuff from slices to master context */
7404 hx = h->thread_context[context_count - 1];
7405 s->mb_x = hx->s.mb_x;
7406 s->mb_y = hx->s.mb_y;
7407 s->dropable = hx->s.dropable;
7408 s->picture_structure = hx->s.picture_structure;
7409 for(i = 1; i < context_count; i++)
7410 h->s.error_count += h->thread_context[i]->s.error_count;
7415 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7416 MpegEncContext * const s = &h->s;
7417 AVCodecContext * const avctx= s->avctx;
7418 int buf_index=0;
7419 H264Context *hx; ///< thread context
7420 int context_count = 0;
7422 h->max_contexts = avctx->thread_count;
7423 #if 0
7424 int i;
7425 for(i=0; i<50; i++){
7426 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7428 #endif
7429 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7430 h->current_slice = 0;
7431 if (!s->first_field)
7432 s->current_picture_ptr= NULL;
7435 for(;;){
7436 int consumed;
7437 int dst_length;
7438 int bit_length;
7439 const uint8_t *ptr;
7440 int i, nalsize = 0;
7441 int err;
7443 if(h->is_avc) {
7444 if(buf_index >= buf_size) break;
7445 nalsize = 0;
7446 for(i = 0; i < h->nal_length_size; i++)
7447 nalsize = (nalsize << 8) | buf[buf_index++];
7448 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7449 if(nalsize == 1){
7450 buf_index++;
7451 continue;
7452 }else{
7453 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7454 break;
7457 } else {
7458 // start code prefix search
7459 for(; buf_index + 3 < buf_size; buf_index++){
7460 // This should always succeed in the first iteration.
7461 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7462 break;
7465 if(buf_index+3 >= buf_size) break;
7467 buf_index+=3;
7470 hx = h->thread_context[context_count];
7472 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7473 if (ptr==NULL || dst_length < 0){
7474 return -1;
7476 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7477 dst_length--;
7478 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7480 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7481 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7484 if (h->is_avc && (nalsize != consumed)){
7485 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7486 consumed= nalsize;
7489 buf_index += consumed;
7491 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7492 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7493 continue;
7495 again:
7496 err = 0;
7497 switch(hx->nal_unit_type){
7498 case NAL_IDR_SLICE:
7499 if (h->nal_unit_type != NAL_IDR_SLICE) {
7500 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7501 return -1;
7503 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7504 case NAL_SLICE:
7505 init_get_bits(&hx->s.gb, ptr, bit_length);
7506 hx->intra_gb_ptr=
7507 hx->inter_gb_ptr= &hx->s.gb;
7508 hx->s.data_partitioning = 0;
7510 if((err = decode_slice_header(hx, h)))
7511 break;
7513 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7514 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7515 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7516 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7517 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7518 && avctx->skip_frame < AVDISCARD_ALL)
7519 context_count++;
7520 break;
7521 case NAL_DPA:
7522 init_get_bits(&hx->s.gb, ptr, bit_length);
7523 hx->intra_gb_ptr=
7524 hx->inter_gb_ptr= NULL;
7525 hx->s.data_partitioning = 1;
7527 err = decode_slice_header(hx, h);
7528 break;
7529 case NAL_DPB:
7530 init_get_bits(&hx->intra_gb, ptr, bit_length);
7531 hx->intra_gb_ptr= &hx->intra_gb;
7532 break;
7533 case NAL_DPC:
7534 init_get_bits(&hx->inter_gb, ptr, bit_length);
7535 hx->inter_gb_ptr= &hx->inter_gb;
7537 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7538 && s->context_initialized
7539 && s->hurry_up < 5
7540 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7541 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7542 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7543 && avctx->skip_frame < AVDISCARD_ALL)
7544 context_count++;
7545 break;
7546 case NAL_SEI:
7547 init_get_bits(&s->gb, ptr, bit_length);
7548 decode_sei(h);
7549 break;
7550 case NAL_SPS:
7551 init_get_bits(&s->gb, ptr, bit_length);
7552 decode_seq_parameter_set(h);
7554 if(s->flags& CODEC_FLAG_LOW_DELAY)
7555 s->low_delay=1;
7557 if(avctx->has_b_frames < 2)
7558 avctx->has_b_frames= !s->low_delay;
7559 break;
7560 case NAL_PPS:
7561 init_get_bits(&s->gb, ptr, bit_length);
7563 decode_picture_parameter_set(h, bit_length);
7565 break;
7566 case NAL_AUD:
7567 case NAL_END_SEQUENCE:
7568 case NAL_END_STREAM:
7569 case NAL_FILLER_DATA:
7570 case NAL_SPS_EXT:
7571 case NAL_AUXILIARY_SLICE:
7572 break;
7573 default:
7574 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7577 if(context_count == h->max_contexts) {
7578 execute_decode_slices(h, context_count);
7579 context_count = 0;
7582 if (err < 0)
7583 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7584 else if(err == 1) {
7585 /* Slice could not be decoded in parallel mode, copy down
7586 * NAL unit stuff to context 0 and restart. Note that
7587 * rbsp_buffer is not transferred, but since we no longer
7588 * run in parallel mode this should not be an issue. */
7589 h->nal_unit_type = hx->nal_unit_type;
7590 h->nal_ref_idc = hx->nal_ref_idc;
7591 hx = h;
7592 goto again;
7595 if(context_count)
7596 execute_decode_slices(h, context_count);
7597 return buf_index;
7601 * returns the number of bytes consumed for building the current frame
7603 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7604 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7605 if(pos+10>buf_size) pos=buf_size; // oops ;)
7607 return pos;
7610 static int decode_frame(AVCodecContext *avctx,
7611 void *data, int *data_size,
7612 const uint8_t *buf, int buf_size)
7614 H264Context *h = avctx->priv_data;
7615 MpegEncContext *s = &h->s;
7616 AVFrame *pict = data;
7617 int buf_index;
7619 s->flags= avctx->flags;
7620 s->flags2= avctx->flags2;
7622 /* end of stream, output what is still in the buffers */
7623 if (buf_size == 0) {
7624 Picture *out;
7625 int i, out_idx;
7627 //FIXME factorize this with the output code below
7628 out = h->delayed_pic[0];
7629 out_idx = 0;
7630 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7631 if(h->delayed_pic[i]->poc < out->poc){
7632 out = h->delayed_pic[i];
7633 out_idx = i;
7636 for(i=out_idx; h->delayed_pic[i]; i++)
7637 h->delayed_pic[i] = h->delayed_pic[i+1];
7639 if(out){
7640 *data_size = sizeof(AVFrame);
7641 *pict= *(AVFrame*)out;
7644 return 0;
7647 if(h->is_avc && !h->got_avcC) {
7648 int i, cnt, nalsize;
7649 unsigned char *p = avctx->extradata;
7650 if(avctx->extradata_size < 7) {
7651 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7652 return -1;
7654 if(*p != 1) {
7655 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7656 return -1;
7658 /* sps and pps in the avcC always have length coded with 2 bytes,
7659 so put a fake nal_length_size = 2 while parsing them */
7660 h->nal_length_size = 2;
7661 // Decode sps from avcC
7662 cnt = *(p+5) & 0x1f; // Number of sps
7663 p += 6;
7664 for (i = 0; i < cnt; i++) {
7665 nalsize = AV_RB16(p) + 2;
7666 if(decode_nal_units(h, p, nalsize) < 0) {
7667 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7668 return -1;
7670 p += nalsize;
7672 // Decode pps from avcC
7673 cnt = *(p++); // Number of pps
7674 for (i = 0; i < cnt; i++) {
7675 nalsize = AV_RB16(p) + 2;
7676 if(decode_nal_units(h, p, nalsize) != nalsize) {
7677 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7678 return -1;
7680 p += nalsize;
7682 // Now store right nal length size, that will be use to parse all other nals
7683 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7684 // Do not reparse avcC
7685 h->got_avcC = 1;
7688 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7689 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7690 return -1;
7691 h->got_avcC = 1;
7694 buf_index=decode_nal_units(h, buf, buf_size);
7695 if(buf_index < 0)
7696 return -1;
7698 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7699 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7700 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7701 return -1;
7704 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7705 Picture *out = s->current_picture_ptr;
7706 Picture *cur = s->current_picture_ptr;
7707 int i, pics, cross_idr, out_of_order, out_idx;
7709 s->mb_y= 0;
7711 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7712 s->current_picture_ptr->pict_type= s->pict_type;
7714 #ifdef HAVE_VDPAU
7715 if (avctx->vdpau_acceleration) {
7716 ff_VDPAU_h264_set_reference_frames(h);
7718 #endif
7720 if(!s->dropable) {
7721 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7722 h->prev_poc_msb= h->poc_msb;
7723 h->prev_poc_lsb= h->poc_lsb;
7725 h->prev_frame_num_offset= h->frame_num_offset;
7726 h->prev_frame_num= h->frame_num;
7728 #ifdef HAVE_VDPAU
7729 if (avctx->vdpau_acceleration) {
7730 ff_VDPAU_h264_picture_complete(h, buf, buf_size);
7732 #endif
7735 * FIXME: Error handling code does not seem to support interlaced
7736 * when slices span multiple rows
7737 * The ff_er_add_slice calls don't work right for bottom
7738 * fields; they cause massive erroneous error concealing
7739 * Error marking covers both fields (top and bottom).
7740 * This causes a mismatched s->error_count
7741 * and a bad error table. Further, the error count goes to
7742 * INT_MAX when called for bottom field, because mb_y is
7743 * past end by one (callers fault) and resync_mb_y != 0
7744 * causes problems for the first MB line, too.
7746 #ifdef HAVE_VDPAU
7747 if (!avctx->vdpau_acceleration)
7748 #endif
7749 if (!FIELD_PICTURE)
7750 ff_er_frame_end(s);
7752 MPV_frame_end(s);
7754 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7755 /* Wait for second field. */
7756 *data_size = 0;
7758 } else {
7759 cur->repeat_pict = 0;
7761 /* Signal interlacing information externally. */
7762 /* Prioritize picture timing SEI information over used decoding process if it exists. */
7763 if(h->sps.pic_struct_present_flag){
7764 switch (h->sei_pic_struct)
7766 case SEI_PIC_STRUCT_FRAME:
7767 cur->interlaced_frame = 0;
7768 break;
7769 case SEI_PIC_STRUCT_TOP_FIELD:
7770 case SEI_PIC_STRUCT_BOTTOM_FIELD:
7771 case SEI_PIC_STRUCT_TOP_BOTTOM:
7772 case SEI_PIC_STRUCT_BOTTOM_TOP:
7773 cur->interlaced_frame = 1;
7774 break;
7775 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
7776 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
7777 // Signal the possibility of telecined film externally (pic_struct 5,6)
7778 // From these hints, let the applications decide if they apply deinterlacing.
7779 cur->repeat_pict = 1;
7780 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7781 break;
7782 case SEI_PIC_STRUCT_FRAME_DOUBLING:
7783 // Force progressive here, as doubling interlaced frame is a bad idea.
7784 cur->interlaced_frame = 0;
7785 cur->repeat_pict = 2;
7786 break;
7787 case SEI_PIC_STRUCT_FRAME_TRIPLING:
7788 cur->interlaced_frame = 0;
7789 cur->repeat_pict = 4;
7790 break;
7792 }else{
7793 /* Derive interlacing flag from used decoding process. */
7794 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7797 if (cur->field_poc[0] != cur->field_poc[1]){
7798 /* Derive top_field_first from field pocs. */
7799 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7800 }else{
7801 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
7802 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
7803 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
7804 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
7805 cur->top_field_first = 1;
7806 else
7807 cur->top_field_first = 0;
7808 }else{
7809 /* Most likely progressive */
7810 cur->top_field_first = 0;
7814 //FIXME do something with unavailable reference frames
7816 /* Sort B-frames into display order */
7818 if(h->sps.bitstream_restriction_flag
7819 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7820 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7821 s->low_delay = 0;
7824 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7825 && !h->sps.bitstream_restriction_flag){
7826 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7827 s->low_delay= 0;
7830 pics = 0;
7831 while(h->delayed_pic[pics]) pics++;
7833 assert(pics <= MAX_DELAYED_PIC_COUNT);
7835 h->delayed_pic[pics++] = cur;
7836 if(cur->reference == 0)
7837 cur->reference = DELAYED_PIC_REF;
7839 out = h->delayed_pic[0];
7840 out_idx = 0;
7841 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7842 if(h->delayed_pic[i]->poc < out->poc){
7843 out = h->delayed_pic[i];
7844 out_idx = i;
7846 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7848 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7850 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7852 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7853 || (s->low_delay &&
7854 ((!cross_idr && out->poc > h->outputed_poc + 2)
7855 || cur->pict_type == FF_B_TYPE)))
7857 s->low_delay = 0;
7858 s->avctx->has_b_frames++;
7861 if(out_of_order || pics > s->avctx->has_b_frames){
7862 out->reference &= ~DELAYED_PIC_REF;
7863 for(i=out_idx; h->delayed_pic[i]; i++)
7864 h->delayed_pic[i] = h->delayed_pic[i+1];
7866 if(!out_of_order && pics > s->avctx->has_b_frames){
7867 *data_size = sizeof(AVFrame);
7869 h->outputed_poc = out->poc;
7870 *pict= *(AVFrame*)out;
7871 }else{
7872 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7877 assert(pict->data[0] || !*data_size);
7878 ff_print_debug_info(s, pict);
7879 //printf("out %d\n", (int)pict->data[0]);
7880 #if 0 //?
7882 /* Return the Picture timestamp as the frame number */
7883 /* we subtract 1 because it is added on utils.c */
7884 avctx->frame_number = s->picture_number - 1;
7885 #endif
7886 return get_consumed_bytes(s, buf_index, buf_size);
7888 #if 0
7889 static inline void fill_mb_avail(H264Context *h){
7890 MpegEncContext * const s = &h->s;
7891 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7893 if(s->mb_y){
7894 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7895 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7896 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7897 }else{
7898 h->mb_avail[0]=
7899 h->mb_avail[1]=
7900 h->mb_avail[2]= 0;
7902 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7903 h->mb_avail[4]= 1; //FIXME move out
7904 h->mb_avail[5]= 0; //FIXME move out
7906 #endif
7908 #ifdef TEST
7909 #undef printf
7910 #undef random
7911 #define COUNT 8000
7912 #define SIZE (COUNT*40)
7913 int main(void){
7914 int i;
7915 uint8_t temp[SIZE];
7916 PutBitContext pb;
7917 GetBitContext gb;
7918 // int int_temp[10000];
7919 DSPContext dsp;
7920 AVCodecContext avctx;
7922 dsputil_init(&dsp, &avctx);
7924 init_put_bits(&pb, temp, SIZE);
7925 printf("testing unsigned exp golomb\n");
7926 for(i=0; i<COUNT; i++){
7927 START_TIMER
7928 set_ue_golomb(&pb, i);
7929 STOP_TIMER("set_ue_golomb");
7931 flush_put_bits(&pb);
7933 init_get_bits(&gb, temp, 8*SIZE);
7934 for(i=0; i<COUNT; i++){
7935 int j, s;
7937 s= show_bits(&gb, 24);
7939 START_TIMER
7940 j= get_ue_golomb(&gb);
7941 if(j != i){
7942 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7943 // return -1;
7945 STOP_TIMER("get_ue_golomb");
7949 init_put_bits(&pb, temp, SIZE);
7950 printf("testing signed exp golomb\n");
7951 for(i=0; i<COUNT; i++){
7952 START_TIMER
7953 set_se_golomb(&pb, i - COUNT/2);
7954 STOP_TIMER("set_se_golomb");
7956 flush_put_bits(&pb);
7958 init_get_bits(&gb, temp, 8*SIZE);
7959 for(i=0; i<COUNT; i++){
7960 int j, s;
7962 s= show_bits(&gb, 24);
7964 START_TIMER
7965 j= get_se_golomb(&gb);
7966 if(j != i - COUNT/2){
7967 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7968 // return -1;
7970 STOP_TIMER("get_se_golomb");
7973 #if 0
7974 printf("testing 4x4 (I)DCT\n");
7976 DCTELEM block[16];
7977 uint8_t src[16], ref[16];
7978 uint64_t error= 0, max_error=0;
7980 for(i=0; i<COUNT; i++){
7981 int j;
7982 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7983 for(j=0; j<16; j++){
7984 ref[j]= random()%255;
7985 src[j]= random()%255;
7988 h264_diff_dct_c(block, src, ref, 4);
7990 //normalize
7991 for(j=0; j<16; j++){
7992 // printf("%d ", block[j]);
7993 block[j]= block[j]*4;
7994 if(j&1) block[j]= (block[j]*4 + 2)/5;
7995 if(j&4) block[j]= (block[j]*4 + 2)/5;
7997 // printf("\n");
7999 s->dsp.h264_idct_add(ref, block, 4);
8000 /* for(j=0; j<16; j++){
8001 printf("%d ", ref[j]);
8003 printf("\n");*/
8005 for(j=0; j<16; j++){
8006 int diff= FFABS(src[j] - ref[j]);
8008 error+= diff*diff;
8009 max_error= FFMAX(max_error, diff);
8012 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8013 printf("testing quantizer\n");
8014 for(qp=0; qp<52; qp++){
8015 for(i=0; i<16; i++)
8016 src1_block[i]= src2_block[i]= random()%255;
8019 printf("Testing NAL layer\n");
8021 uint8_t bitstream[COUNT];
8022 uint8_t nal[COUNT*2];
8023 H264Context h;
8024 memset(&h, 0, sizeof(H264Context));
8026 for(i=0; i<COUNT; i++){
8027 int zeros= i;
8028 int nal_length;
8029 int consumed;
8030 int out_length;
8031 uint8_t *out;
8032 int j;
8034 for(j=0; j<COUNT; j++){
8035 bitstream[j]= (random() % 255) + 1;
8038 for(j=0; j<zeros; j++){
8039 int pos= random() % COUNT;
8040 while(bitstream[pos] == 0){
8041 pos++;
8042 pos %= COUNT;
8044 bitstream[pos]=0;
8047 START_TIMER
8049 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8050 if(nal_length<0){
8051 printf("encoding failed\n");
8052 return -1;
8055 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8057 STOP_TIMER("NAL")
8059 if(out_length != COUNT){
8060 printf("incorrect length %d %d\n", out_length, COUNT);
8061 return -1;
8064 if(consumed != nal_length){
8065 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8066 return -1;
8069 if(memcmp(bitstream, out, COUNT)){
8070 printf("mismatch\n");
8071 return -1;
8074 #endif
8076 printf("Testing RBSP\n");
8079 return 0;
8081 #endif /* TEST */
8084 static av_cold int decode_end(AVCodecContext *avctx)
8086 H264Context *h = avctx->priv_data;
8087 MpegEncContext *s = &h->s;
8088 int i;
8090 av_freep(&h->rbsp_buffer[0]);
8091 av_freep(&h->rbsp_buffer[1]);
8092 free_tables(h); //FIXME cleanup init stuff perhaps
8094 for(i = 0; i < MAX_SPS_COUNT; i++)
8095 av_freep(h->sps_buffers + i);
8097 for(i = 0; i < MAX_PPS_COUNT; i++)
8098 av_freep(h->pps_buffers + i);
8100 MPV_common_end(s);
8102 // memset(h, 0, sizeof(H264Context));
8104 return 0;
8108 AVCodec h264_decoder = {
8109 "h264",
8110 CODEC_TYPE_VIDEO,
8111 CODEC_ID_H264,
8112 sizeof(H264Context),
8113 decode_init,
8114 NULL,
8115 decode_end,
8116 decode_frame,
8117 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
8118 .flush= flush_dpb,
8119 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
8122 #ifdef HAVE_VDPAU
8123 static av_cold int h264_vdpau_decode_init(AVCodecContext *avctx){
8124 if( avctx->thread_count > 1)
8125 return -1;
8126 if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) )
8127 return -1;
8128 if( !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD) ){
8129 dprintf(avctx, "h264.c: VDPAU decoder does not set SLICE_FLAG_ALLOW_FIELD\n");
8131 decode_init(avctx);
8133 avctx->vdpau_acceleration = 1;
8135 return 0;
8138 AVCodec h264_vdpau_decoder = {
8139 "h264_vdpau",
8140 CODEC_TYPE_VIDEO,
8141 CODEC_ID_H264_VDPAU,
8142 sizeof(H264Context),
8143 h264_vdpau_decode_init,
8144 NULL,
8145 decode_end,
8146 decode_frame,
8147 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
8148 .flush= flush_dpb,
8149 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
8151 #endif
8153 #include "svq3.c"