Mark formats requiring external libs with an 'E' in the format support tables.
[FFMpeg-mirror/ffmpeg-vdpau.git] / libavcodec / h264.c
blobecd4fc742ad0adc6534af521e3d9bfb2980e53d4
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264.h"
32 #include "h264data.h"
33 #include "h264_parser.h"
34 #include "golomb.h"
35 #include "rectangle.h"
37 #include "cabac.h"
38 #ifdef ARCH_X86
39 #include "i386/h264_i386.h"
40 #endif
42 //#undef NDEBUG
43 #include <assert.h>
45 /**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55 static VLC chroma_dc_coeff_token_vlc;
56 static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57 static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 static VLC total_zeros_vlc[15];
60 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61 static const int total_zeros_vlc_tables_size = 512;
63 static VLC chroma_dc_total_zeros_vlc[3];
64 static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65 static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 static VLC run_vlc[6];
68 static VLC_TYPE run_vlc_tables[6][8][2];
69 static const int run_vlc_tables_size = 8;
71 static VLC run7_vlc;
72 static VLC_TYPE run7_vlc_table[96][2];
73 static const int run7_vlc_table_size = 96;
75 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 static av_always_inline uint32_t pack16to32(int a, int b){
82 #ifdef WORDS_BIGENDIAN
83 return (b&0xFFFF) + (a<<16);
84 #else
85 return (a&0xFFFF) + (b<<16);
86 #endif
89 static const uint8_t rem6[52]={
90 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
93 static const uint8_t div6[52]={
94 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
97 static const int left_block_options[4][8]={
98 {0,1,2,3,7,10,8,11},
99 {2,2,3,3,8,11,8,11},
100 {0,0,1,1,7,10,7,10},
101 {0,2,0,2,7,10,7,10}
104 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 MpegEncContext * const s = &h->s;
106 const int mb_xy= h->mb_xy;
107 int topleft_xy, top_xy, topright_xy, left_xy[2];
108 int topleft_type, top_type, topright_type, left_type[2];
109 int * left_block;
110 int topleft_partition= -1;
111 int i;
113 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
115 //FIXME deblocking could skip the intra and nnz parts.
116 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
117 return;
119 /* Wow, what a mess, why didn't they simplify the interlacing & intra
120 * stuff, I can't imagine that these complex rules are worth it. */
122 topleft_xy = top_xy - 1;
123 topright_xy= top_xy + 1;
124 left_xy[1] = left_xy[0] = mb_xy-1;
125 left_block = left_block_options[0];
126 if(FRAME_MBAFF){
127 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
128 const int top_pair_xy = pair_xy - s->mb_stride;
129 const int topleft_pair_xy = top_pair_xy - 1;
130 const int topright_pair_xy = top_pair_xy + 1;
131 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
136 const int bottom = (s->mb_y & 1);
137 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
138 if (bottom
139 ? !curr_mb_frame_flag // bottom macroblock
140 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
142 top_xy -= s->mb_stride;
144 if (bottom
145 ? !curr_mb_frame_flag // bottom macroblock
146 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
148 topleft_xy -= s->mb_stride;
149 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
150 topleft_xy += s->mb_stride;
151 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
152 topleft_partition = 0;
154 if (bottom
155 ? !curr_mb_frame_flag // bottom macroblock
156 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
158 topright_xy -= s->mb_stride;
160 if (left_mb_frame_flag != curr_mb_frame_flag) {
161 left_xy[1] = left_xy[0] = pair_xy - 1;
162 if (curr_mb_frame_flag) {
163 if (bottom) {
164 left_block = left_block_options[1];
165 } else {
166 left_block= left_block_options[2];
168 } else {
169 left_xy[1] += s->mb_stride;
170 left_block = left_block_options[3];
175 h->top_mb_xy = top_xy;
176 h->left_mb_xy[0] = left_xy[0];
177 h->left_mb_xy[1] = left_xy[1];
178 if(for_deblock){
179 topleft_type = 0;
180 topright_type = 0;
181 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
182 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
183 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
185 if(MB_MBAFF && !IS_INTRA(mb_type)){
186 int list;
187 for(list=0; list<h->list_count; list++){
188 //These values where changed for ease of performing MC, we need to change them back
189 //FIXME maybe we can make MC and loop filter use the same values or prevent
190 //the MC code from changing ref_cache and rather use a temporary array.
191 if(USES_LIST(mb_type,list)){
192 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
193 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
194 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
195 ref += h->b8_stride;
196 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
197 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
201 }else{
202 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
203 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
204 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
205 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
206 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
208 if(IS_INTRA(mb_type)){
209 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
210 h->topleft_samples_available=
211 h->top_samples_available=
212 h->left_samples_available= 0xFFFF;
213 h->topright_samples_available= 0xEEEA;
215 if(!(top_type & type_mask)){
216 h->topleft_samples_available= 0xB3FF;
217 h->top_samples_available= 0x33FF;
218 h->topright_samples_available= 0x26EA;
220 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
221 if(IS_INTERLACED(mb_type)){
222 if(!(left_type[0] & type_mask)){
223 h->topleft_samples_available&= 0xDFFF;
224 h->left_samples_available&= 0x5FFF;
226 if(!(left_type[1] & type_mask)){
227 h->topleft_samples_available&= 0xFF5F;
228 h->left_samples_available&= 0xFF5F;
230 }else{
231 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
232 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
233 assert(left_xy[0] == left_xy[1]);
234 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
235 h->topleft_samples_available&= 0xDF5F;
236 h->left_samples_available&= 0x5F5F;
239 }else{
240 if(!(left_type[0] & type_mask)){
241 h->topleft_samples_available&= 0xDF5F;
242 h->left_samples_available&= 0x5F5F;
246 if(!(topleft_type & type_mask))
247 h->topleft_samples_available&= 0x7FFF;
249 if(!(topright_type & type_mask))
250 h->topright_samples_available&= 0xFBFF;
252 if(IS_INTRA4x4(mb_type)){
253 if(IS_INTRA4x4(top_type)){
254 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
255 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
256 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
257 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
258 }else{
259 int pred;
260 if(!(top_type & type_mask))
261 pred= -1;
262 else{
263 pred= 2;
265 h->intra4x4_pred_mode_cache[4+8*0]=
266 h->intra4x4_pred_mode_cache[5+8*0]=
267 h->intra4x4_pred_mode_cache[6+8*0]=
268 h->intra4x4_pred_mode_cache[7+8*0]= pred;
270 for(i=0; i<2; i++){
271 if(IS_INTRA4x4(left_type[i])){
272 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
273 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
274 }else{
275 int pred;
276 if(!(left_type[i] & type_mask))
277 pred= -1;
278 else{
279 pred= 2;
281 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
282 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
291 0 . T T. T T T T
292 1 L . .L . . . .
293 2 L . .L . . . .
294 3 . T TL . . . .
295 4 L . .L . . . .
296 5 L . .. . . . .
298 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
299 if(top_type){
300 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
301 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
302 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
303 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
305 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
306 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
308 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
309 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
311 }else{
312 h->non_zero_count_cache[4+8*0]=
313 h->non_zero_count_cache[5+8*0]=
314 h->non_zero_count_cache[6+8*0]=
315 h->non_zero_count_cache[7+8*0]=
317 h->non_zero_count_cache[1+8*0]=
318 h->non_zero_count_cache[2+8*0]=
320 h->non_zero_count_cache[1+8*3]=
321 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
325 for (i=0; i<2; i++) {
326 if(left_type[i]){
327 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
328 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
329 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
330 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
331 }else{
332 h->non_zero_count_cache[3+8*1 + 2*8*i]=
333 h->non_zero_count_cache[3+8*2 + 2*8*i]=
334 h->non_zero_count_cache[0+8*1 + 8*i]=
335 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
339 if( h->pps.cabac ) {
340 // top_cbp
341 if(top_type) {
342 h->top_cbp = h->cbp_table[top_xy];
343 } else if(IS_INTRA(mb_type)) {
344 h->top_cbp = 0x1C0;
345 } else {
346 h->top_cbp = 0;
348 // left_cbp
349 if (left_type[0]) {
350 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
351 } else if(IS_INTRA(mb_type)) {
352 h->left_cbp = 0x1C0;
353 } else {
354 h->left_cbp = 0;
356 if (left_type[0]) {
357 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
359 if (left_type[1]) {
360 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
364 #if 1
365 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
366 int list;
367 for(list=0; list<h->list_count; list++){
368 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
369 /*if(!h->mv_cache_clean[list]){
370 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
371 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
372 h->mv_cache_clean[list]= 1;
374 continue;
376 h->mv_cache_clean[list]= 0;
378 if(USES_LIST(top_type, list)){
379 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
380 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
382 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
383 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
384 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
385 h->ref_cache[list][scan8[0] + 0 - 1*8]=
386 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
387 h->ref_cache[list][scan8[0] + 2 - 1*8]=
388 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
389 }else{
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
391 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
392 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
393 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
394 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
397 for(i=0; i<2; i++){
398 int cache_idx = scan8[0] - 1 + i*2*8;
399 if(USES_LIST(left_type[i], list)){
400 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
401 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
402 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
403 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
404 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
405 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
406 }else{
407 *(uint32_t*)h->mv_cache [list][cache_idx ]=
408 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
409 h->ref_cache[list][cache_idx ]=
410 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
414 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
415 continue;
417 if(USES_LIST(topleft_type, list)){
418 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
419 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
422 }else{
423 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
424 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427 if(USES_LIST(topright_type, list)){
428 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
429 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
430 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
432 }else{
433 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
434 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
437 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
438 continue;
440 h->ref_cache[list][scan8[5 ]+1] =
441 h->ref_cache[list][scan8[7 ]+1] =
442 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
443 h->ref_cache[list][scan8[4 ]] =
444 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
445 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
446 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
447 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
448 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
449 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
451 if( h->pps.cabac ) {
452 /* XXX beurk, Load mvd */
453 if(USES_LIST(top_type, list)){
454 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
456 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
457 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
458 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
459 }else{
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
461 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
462 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
463 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
465 if(USES_LIST(left_type[0], list)){
466 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
467 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
468 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
469 }else{
470 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
471 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
473 if(USES_LIST(left_type[1], list)){
474 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
475 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
476 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
477 }else{
478 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
479 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
481 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
482 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
483 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
484 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
485 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
487 if(h->slice_type_nos == FF_B_TYPE){
488 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
490 if(IS_DIRECT(top_type)){
491 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
492 }else if(IS_8X8(top_type)){
493 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
494 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
495 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
496 }else{
497 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
500 if(IS_DIRECT(left_type[0]))
501 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
502 else if(IS_8X8(left_type[0]))
503 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
504 else
505 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
507 if(IS_DIRECT(left_type[1]))
508 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
509 else if(IS_8X8(left_type[1]))
510 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
511 else
512 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
516 if(FRAME_MBAFF){
517 #define MAP_MVS\
518 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
519 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
521 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
522 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
523 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
524 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
525 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
526 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
527 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
528 if(MB_FIELD){
529 #define MAP_F2F(idx, mb_type)\
530 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
531 h->ref_cache[list][idx] <<= 1;\
532 h->mv_cache[list][idx][1] /= 2;\
533 h->mvd_cache[list][idx][1] /= 2;\
535 MAP_MVS
536 #undef MAP_F2F
537 }else{
538 #define MAP_F2F(idx, mb_type)\
539 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
540 h->ref_cache[list][idx] >>= 1;\
541 h->mv_cache[list][idx][1] <<= 1;\
542 h->mvd_cache[list][idx][1] <<= 1;\
544 MAP_MVS
545 #undef MAP_F2F
550 #endif
552 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
555 static inline void write_back_intra_pred_mode(H264Context *h){
556 const int mb_xy= h->mb_xy;
558 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
559 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
560 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
561 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
562 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
563 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
564 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
568 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
570 static inline int check_intra4x4_pred_mode(H264Context *h){
571 MpegEncContext * const s = &h->s;
572 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
573 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
574 int i;
576 if(!(h->top_samples_available&0x8000)){
577 for(i=0; i<4; i++){
578 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
579 if(status<0){
580 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
581 return -1;
582 } else if(status){
583 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
588 if((h->left_samples_available&0x8888)!=0x8888){
589 static const int mask[4]={0x8000,0x2000,0x80,0x20};
590 for(i=0; i<4; i++){
591 if(!(h->left_samples_available&mask[i])){
592 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
593 if(status<0){
594 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
595 return -1;
596 } else if(status){
597 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
603 return 0;
604 } //FIXME cleanup like next
607 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
609 static inline int check_intra_pred_mode(H264Context *h, int mode){
610 MpegEncContext * const s = &h->s;
611 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
612 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
614 if(mode > 6U) {
615 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
616 return -1;
619 if(!(h->top_samples_available&0x8000)){
620 mode= top[ mode ];
621 if(mode<0){
622 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
623 return -1;
627 if((h->left_samples_available&0x8080) != 0x8080){
628 mode= left[ mode ];
629 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
630 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
632 if(mode<0){
633 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
634 return -1;
638 return mode;
642 * gets the predicted intra4x4 prediction mode.
644 static inline int pred_intra_mode(H264Context *h, int n){
645 const int index8= scan8[n];
646 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
647 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
648 const int min= FFMIN(left, top);
650 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
652 if(min<0) return DC_PRED;
653 else return min;
656 static inline void write_back_non_zero_count(H264Context *h){
657 const int mb_xy= h->mb_xy;
659 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
660 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
661 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
662 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
663 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
664 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
665 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
667 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
668 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
669 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
671 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
672 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
673 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
677 * gets the predicted number of non-zero coefficients.
678 * @param n block index
680 static inline int pred_non_zero_count(H264Context *h, int n){
681 const int index8= scan8[n];
682 const int left= h->non_zero_count_cache[index8 - 1];
683 const int top = h->non_zero_count_cache[index8 - 8];
684 int i= left + top;
686 if(i<64) i= (i+1)>>1;
688 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
690 return i&31;
693 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
694 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
695 MpegEncContext *s = &h->s;
697 /* there is no consistent mapping of mvs to neighboring locations that will
698 * make mbaff happy, so we can't move all this logic to fill_caches */
699 if(FRAME_MBAFF){
700 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
701 const int16_t *mv;
702 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
703 *C = h->mv_cache[list][scan8[0]-2];
705 if(!MB_FIELD
706 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
707 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
708 if(IS_INTERLACED(mb_types[topright_xy])){
709 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
710 const int x4 = X4, y4 = Y4;\
711 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
712 if(!USES_LIST(mb_type,list))\
713 return LIST_NOT_USED;\
714 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
715 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
716 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
717 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
719 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
722 if(topright_ref == PART_NOT_AVAILABLE
723 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
724 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
725 if(!MB_FIELD
726 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
727 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
729 if(MB_FIELD
730 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
731 && i >= scan8[0]+8){
732 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
733 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
736 #undef SET_DIAG_MV
739 if(topright_ref != PART_NOT_AVAILABLE){
740 *C= h->mv_cache[list][ i - 8 + part_width ];
741 return topright_ref;
742 }else{
743 tprintf(s->avctx, "topright MV not available\n");
745 *C= h->mv_cache[list][ i - 8 - 1 ];
746 return h->ref_cache[list][ i - 8 - 1 ];
751 * gets the predicted MV.
752 * @param n the block index
753 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
754 * @param mx the x component of the predicted motion vector
755 * @param my the y component of the predicted motion vector
757 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
758 const int index8= scan8[n];
759 const int top_ref= h->ref_cache[list][ index8 - 8 ];
760 const int left_ref= h->ref_cache[list][ index8 - 1 ];
761 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
762 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
763 const int16_t * C;
764 int diagonal_ref, match_count;
766 assert(part_width==1 || part_width==2 || part_width==4);
768 /* mv_cache
769 B . . A T T T T
770 U . . L . . , .
771 U . . L . . . .
772 U . . L . . , .
773 . . . L . . . .
776 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
777 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
778 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
779 if(match_count > 1){ //most common
780 *mx= mid_pred(A[0], B[0], C[0]);
781 *my= mid_pred(A[1], B[1], C[1]);
782 }else if(match_count==1){
783 if(left_ref==ref){
784 *mx= A[0];
785 *my= A[1];
786 }else if(top_ref==ref){
787 *mx= B[0];
788 *my= B[1];
789 }else{
790 *mx= C[0];
791 *my= C[1];
793 }else{
794 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
795 *mx= A[0];
796 *my= A[1];
797 }else{
798 *mx= mid_pred(A[0], B[0], C[0]);
799 *my= mid_pred(A[1], B[1], C[1]);
803 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
807 * gets the directionally predicted 16x8 MV.
808 * @param n the block index
809 * @param mx the x component of the predicted motion vector
810 * @param my the y component of the predicted motion vector
812 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
813 if(n==0){
814 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
815 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
817 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
819 if(top_ref == ref){
820 *mx= B[0];
821 *my= B[1];
822 return;
824 }else{
825 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
826 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
828 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
830 if(left_ref == ref){
831 *mx= A[0];
832 *my= A[1];
833 return;
837 //RARE
838 pred_motion(h, n, 4, list, ref, mx, my);
842 * gets the directionally predicted 8x16 MV.
843 * @param n the block index
844 * @param mx the x component of the predicted motion vector
845 * @param my the y component of the predicted motion vector
847 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
848 if(n==0){
849 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
850 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
852 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
854 if(left_ref == ref){
855 *mx= A[0];
856 *my= A[1];
857 return;
859 }else{
860 const int16_t * C;
861 int diagonal_ref;
863 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
865 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
867 if(diagonal_ref == ref){
868 *mx= C[0];
869 *my= C[1];
870 return;
874 //RARE
875 pred_motion(h, n, 2, list, ref, mx, my);
878 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
879 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
880 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
882 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
884 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
885 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
886 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
888 *mx = *my = 0;
889 return;
892 pred_motion(h, 0, 4, 0, 0, mx, my);
894 return;
897 static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
898 int poc0 = h->ref_list[0][i].poc;
899 int td = av_clip(poc1 - poc0, -128, 127);
900 if(td == 0 || h->ref_list[0][i].long_ref){
901 return 256;
902 }else{
903 int tb = av_clip(poc - poc0, -128, 127);
904 int tx = (16384 + (FFABS(td) >> 1)) / td;
905 return av_clip((tb*tx + 32) >> 6, -1024, 1023);
909 static inline void direct_dist_scale_factor(H264Context * const h){
910 MpegEncContext * const s = &h->s;
911 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
912 const int poc1 = h->ref_list[1][0].poc;
913 int i, field;
914 for(field=0; field<2; field++){
915 const int poc = h->s.current_picture_ptr->field_poc[field];
916 const int poc1 = h->ref_list[1][0].field_poc[field];
917 for(i=0; i < 2*h->ref_count[0]; i++)
918 h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
921 for(i=0; i<h->ref_count[0]; i++){
922 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
926 static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
927 MpegEncContext * const s = &h->s;
928 Picture * const ref1 = &h->ref_list[1][0];
929 int j, old_ref, rfield;
930 int start= mbafi ? 16 : 0;
931 int end = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
932 int interl= mbafi || s->picture_structure != PICT_FRAME;
934 /* bogus; fills in for missing frames */
935 memset(map[list], 0, sizeof(map[list]));
937 for(rfield=0; rfield<2; rfield++){
938 for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
939 int poc = ref1->ref_poc[colfield][list][old_ref];
941 if (!interl)
942 poc |= 3;
943 else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
944 poc= (poc&~3) + rfield + 1;
946 for(j=start; j<end; j++){
947 if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
948 int cur_ref= mbafi ? (j-16)^field : j;
949 map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
950 if(rfield == field)
951 map[list][old_ref] = cur_ref;
952 break;
959 static inline void direct_ref_list_init(H264Context * const h){
960 MpegEncContext * const s = &h->s;
961 Picture * const ref1 = &h->ref_list[1][0];
962 Picture * const cur = s->current_picture_ptr;
963 int list, j, field;
964 int sidx= (s->picture_structure&1)^1;
965 int ref1sidx= (ref1->reference&1)^1;
967 for(list=0; list<2; list++){
968 cur->ref_count[sidx][list] = h->ref_count[list];
969 for(j=0; j<h->ref_count[list]; j++)
970 cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
973 if(s->picture_structure == PICT_FRAME){
974 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
975 memcpy(cur->ref_poc [1], cur->ref_poc [0], sizeof(cur->ref_poc [0]));
978 cur->mbaff= FRAME_MBAFF;
980 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
981 return;
983 for(list=0; list<2; list++){
984 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
985 for(field=0; field<2; field++)
986 fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
990 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
991 MpegEncContext * const s = &h->s;
992 int b8_stride = h->b8_stride;
993 int b4_stride = h->b_stride;
994 int mb_xy = h->mb_xy;
995 int mb_type_col[2];
996 const int16_t (*l1mv0)[2], (*l1mv1)[2];
997 const int8_t *l1ref0, *l1ref1;
998 const int is_b8x8 = IS_8X8(*mb_type);
999 unsigned int sub_mb_type;
1000 int i8, i4;
1002 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1004 if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
1005 if(!IS_INTERLACED(*mb_type)){ // AFR/FR -> AFL/FL
1006 int cur_poc = s->current_picture_ptr->poc;
1007 int *col_poc = h->ref_list[1]->field_poc;
1008 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1009 mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1010 b8_stride = 0;
1011 }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1012 int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1013 mb_xy += s->mb_stride*fieldoff;
1015 goto single_col;
1016 }else{ // AFL/AFR/FR/FL -> AFR/FR
1017 if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR
1018 mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1019 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1020 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1021 b8_stride *= 3;
1022 b4_stride *= 6;
1023 //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1024 if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1025 && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1026 && !is_b8x8){
1027 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1028 *mb_type |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1029 }else{
1030 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1031 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1033 }else{ // AFR/FR -> AFR/FR
1034 single_col:
1035 mb_type_col[0] =
1036 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1037 if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1038 /* FIXME save sub mb types from previous frames (or derive from MVs)
1039 * so we know exactly what block size to use */
1040 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1041 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1042 }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1043 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1044 *mb_type |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1045 }else{
1046 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1047 *mb_type |= MB_TYPE_8x8|MB_TYPE_L0L1;
1052 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1053 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1054 l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1055 l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1056 if(!b8_stride){
1057 if(s->mb_y&1){
1058 l1ref0 += h->b8_stride;
1059 l1ref1 += h->b8_stride;
1060 l1mv0 += 2*b4_stride;
1061 l1mv1 += 2*b4_stride;
1065 if(h->direct_spatial_mv_pred){
1066 int ref[2];
1067 int mv[2][2];
1068 int list;
1070 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1072 /* ref = min(neighbors) */
1073 for(list=0; list<2; list++){
1074 int refa = h->ref_cache[list][scan8[0] - 1];
1075 int refb = h->ref_cache[list][scan8[0] - 8];
1076 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1077 if(refc == PART_NOT_AVAILABLE)
1078 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1079 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1080 if(ref[list] < 0)
1081 ref[list] = -1;
1084 if(ref[0] < 0 && ref[1] < 0){
1085 ref[0] = ref[1] = 0;
1086 mv[0][0] = mv[0][1] =
1087 mv[1][0] = mv[1][1] = 0;
1088 }else{
1089 for(list=0; list<2; list++){
1090 if(ref[list] >= 0)
1091 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1092 else
1093 mv[list][0] = mv[list][1] = 0;
1097 if(ref[1] < 0){
1098 if(!is_b8x8)
1099 *mb_type &= ~MB_TYPE_L1;
1100 sub_mb_type &= ~MB_TYPE_L1;
1101 }else if(ref[0] < 0){
1102 if(!is_b8x8)
1103 *mb_type &= ~MB_TYPE_L0;
1104 sub_mb_type &= ~MB_TYPE_L0;
1107 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1108 for(i8=0; i8<4; i8++){
1109 int x8 = i8&1;
1110 int y8 = i8>>1;
1111 int xy8 = x8+y8*b8_stride;
1112 int xy4 = 3*x8+y8*b4_stride;
1113 int a=0, b=0;
1115 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1116 continue;
1117 h->sub_mb_type[i8] = sub_mb_type;
1119 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1120 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1121 if(!IS_INTRA(mb_type_col[y8])
1122 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1123 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1124 if(ref[0] > 0)
1125 a= pack16to32(mv[0][0],mv[0][1]);
1126 if(ref[1] > 0)
1127 b= pack16to32(mv[1][0],mv[1][1]);
1128 }else{
1129 a= pack16to32(mv[0][0],mv[0][1]);
1130 b= pack16to32(mv[1][0],mv[1][1]);
1132 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1133 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1135 }else if(IS_16X16(*mb_type)){
1136 int a=0, b=0;
1138 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1139 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1140 if(!IS_INTRA(mb_type_col[0])
1141 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1142 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1143 && (h->x264_build>33 || !h->x264_build)))){
1144 if(ref[0] > 0)
1145 a= pack16to32(mv[0][0],mv[0][1]);
1146 if(ref[1] > 0)
1147 b= pack16to32(mv[1][0],mv[1][1]);
1148 }else{
1149 a= pack16to32(mv[0][0],mv[0][1]);
1150 b= pack16to32(mv[1][0],mv[1][1]);
1152 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1153 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1154 }else{
1155 for(i8=0; i8<4; i8++){
1156 const int x8 = i8&1;
1157 const int y8 = i8>>1;
1159 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1160 continue;
1161 h->sub_mb_type[i8] = sub_mb_type;
1163 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1164 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1165 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1166 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1168 /* col_zero_flag */
1169 if(!IS_INTRA(mb_type_col[0]) && ( l1ref0[x8 + y8*b8_stride] == 0
1170 || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1171 && (h->x264_build>33 || !h->x264_build)))){
1172 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1173 if(IS_SUB_8X8(sub_mb_type)){
1174 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1175 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1176 if(ref[0] == 0)
1177 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1178 if(ref[1] == 0)
1179 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1181 }else
1182 for(i4=0; i4<4; i4++){
1183 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1184 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1185 if(ref[0] == 0)
1186 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1187 if(ref[1] == 0)
1188 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1194 }else{ /* direct temporal mv pred */
1195 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1196 const int *dist_scale_factor = h->dist_scale_factor;
1197 int ref_offset= 0;
1199 if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1200 map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1201 map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1202 dist_scale_factor =h->dist_scale_factor_field[s->mb_y&1];
1204 if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1205 ref_offset += 16;
1207 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1208 /* FIXME assumes direct_8x8_inference == 1 */
1209 int y_shift = 2*!IS_INTERLACED(*mb_type);
1211 for(i8=0; i8<4; i8++){
1212 const int x8 = i8&1;
1213 const int y8 = i8>>1;
1214 int ref0, scale;
1215 const int16_t (*l1mv)[2]= l1mv0;
1217 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1218 continue;
1219 h->sub_mb_type[i8] = sub_mb_type;
1221 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1222 if(IS_INTRA(mb_type_col[y8])){
1223 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1224 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1225 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1226 continue;
1229 ref0 = l1ref0[x8 + y8*b8_stride];
1230 if(ref0 >= 0)
1231 ref0 = map_col_to_list0[0][ref0 + ref_offset];
1232 else{
1233 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1234 l1mv= l1mv1;
1236 scale = dist_scale_factor[ref0];
1237 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1240 const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1241 int my_col = (mv_col[1]<<y_shift)/2;
1242 int mx = (scale * mv_col[0] + 128) >> 8;
1243 int my = (scale * my_col + 128) >> 8;
1244 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1245 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1248 return;
1251 /* one-to-one mv scaling */
1253 if(IS_16X16(*mb_type)){
1254 int ref, mv0, mv1;
1256 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1257 if(IS_INTRA(mb_type_col[0])){
1258 ref=mv0=mv1=0;
1259 }else{
1260 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1261 : map_col_to_list0[1][l1ref1[0] + ref_offset];
1262 const int scale = dist_scale_factor[ref0];
1263 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1264 int mv_l0[2];
1265 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1266 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1267 ref= ref0;
1268 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1269 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1271 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1272 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1273 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1274 }else{
1275 for(i8=0; i8<4; i8++){
1276 const int x8 = i8&1;
1277 const int y8 = i8>>1;
1278 int ref0, scale;
1279 const int16_t (*l1mv)[2]= l1mv0;
1281 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1282 continue;
1283 h->sub_mb_type[i8] = sub_mb_type;
1284 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1285 if(IS_INTRA(mb_type_col[0])){
1286 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1287 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1288 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1289 continue;
1292 ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1293 if(ref0 >= 0)
1294 ref0 = map_col_to_list0[0][ref0];
1295 else{
1296 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1297 l1mv= l1mv1;
1299 scale = dist_scale_factor[ref0];
1301 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1302 if(IS_SUB_8X8(sub_mb_type)){
1303 const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1304 int mx = (scale * mv_col[0] + 128) >> 8;
1305 int my = (scale * mv_col[1] + 128) >> 8;
1306 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1307 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1308 }else
1309 for(i4=0; i4<4; i4++){
1310 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1311 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1312 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1313 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1314 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1315 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1322 static inline void write_back_motion(H264Context *h, int mb_type){
1323 MpegEncContext * const s = &h->s;
1324 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1325 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1326 int list;
1328 if(!USES_LIST(mb_type, 0))
1329 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1331 for(list=0; list<h->list_count; list++){
1332 int y;
1333 if(!USES_LIST(mb_type, list))
1334 continue;
1336 for(y=0; y<4; y++){
1337 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1338 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1340 if( h->pps.cabac ) {
1341 if(IS_SKIP(mb_type))
1342 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1343 else
1344 for(y=0; y<4; y++){
1345 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1346 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1351 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1352 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1353 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1354 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1355 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1359 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1360 if(IS_8X8(mb_type)){
1361 uint8_t *direct_table = &h->direct_table[b8_xy];
1362 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1363 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1364 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1370 * Decodes a network abstraction layer unit.
1371 * @param consumed is the number of bytes used as input
1372 * @param length is the length of the array
1373 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1374 * @returns decoded bytes, might be src+1 if no escapes
1376 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1377 int i, si, di;
1378 uint8_t *dst;
1379 int bufidx;
1381 // src[0]&0x80; //forbidden bit
1382 h->nal_ref_idc= src[0]>>5;
1383 h->nal_unit_type= src[0]&0x1F;
1385 src++; length--;
1386 #if 0
1387 for(i=0; i<length; i++)
1388 printf("%2X ", src[i]);
1389 #endif
1390 for(i=0; i+1<length; i+=2){
1391 if(src[i]) continue;
1392 if(i>0 && src[i-1]==0) i--;
1393 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1394 if(src[i+2]!=3){
1395 /* startcode, so we must be past the end */
1396 length=i;
1398 break;
1402 if(i>=length-1){ //no escaped 0
1403 *dst_length= length;
1404 *consumed= length+1; //+1 for the header
1405 return src;
1408 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1409 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1410 dst= h->rbsp_buffer[bufidx];
1412 if (dst == NULL){
1413 return NULL;
1416 //printf("decoding esc\n");
1417 si=di=0;
1418 while(si<length){
1419 //remove escapes (very rare 1:2^22)
1420 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1421 if(src[si+2]==3){ //escape
1422 dst[di++]= 0;
1423 dst[di++]= 0;
1424 si+=3;
1425 continue;
1426 }else //next start code
1427 break;
1430 dst[di++]= src[si++];
1433 *dst_length= di;
1434 *consumed= si + 1;//+1 for the header
1435 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1436 return dst;
1440 * identifies the exact end of the bitstream
1441 * @return the length of the trailing, or 0 if damaged
1443 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1444 int v= *src;
1445 int r;
1447 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1449 for(r=1; r<9; r++){
1450 if(v&1) return r;
1451 v>>=1;
1453 return 0;
1457 * IDCT transforms the 16 dc values and dequantizes them.
1458 * @param qp quantization parameter
1460 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1461 #define stride 16
1462 int i;
1463 int temp[16]; //FIXME check if this is a good idea
1464 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1465 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1467 //memset(block, 64, 2*256);
1468 //return;
1469 for(i=0; i<4; i++){
1470 const int offset= y_offset[i];
1471 const int z0= block[offset+stride*0] + block[offset+stride*4];
1472 const int z1= block[offset+stride*0] - block[offset+stride*4];
1473 const int z2= block[offset+stride*1] - block[offset+stride*5];
1474 const int z3= block[offset+stride*1] + block[offset+stride*5];
1476 temp[4*i+0]= z0+z3;
1477 temp[4*i+1]= z1+z2;
1478 temp[4*i+2]= z1-z2;
1479 temp[4*i+3]= z0-z3;
1482 for(i=0; i<4; i++){
1483 const int offset= x_offset[i];
1484 const int z0= temp[4*0+i] + temp[4*2+i];
1485 const int z1= temp[4*0+i] - temp[4*2+i];
1486 const int z2= temp[4*1+i] - temp[4*3+i];
1487 const int z3= temp[4*1+i] + temp[4*3+i];
1489 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1490 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1491 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1492 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1496 #if 0
1498 * DCT transforms the 16 dc values.
1499 * @param qp quantization parameter ??? FIXME
1501 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1502 // const int qmul= dequant_coeff[qp][0];
1503 int i;
1504 int temp[16]; //FIXME check if this is a good idea
1505 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1506 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1508 for(i=0; i<4; i++){
1509 const int offset= y_offset[i];
1510 const int z0= block[offset+stride*0] + block[offset+stride*4];
1511 const int z1= block[offset+stride*0] - block[offset+stride*4];
1512 const int z2= block[offset+stride*1] - block[offset+stride*5];
1513 const int z3= block[offset+stride*1] + block[offset+stride*5];
1515 temp[4*i+0]= z0+z3;
1516 temp[4*i+1]= z1+z2;
1517 temp[4*i+2]= z1-z2;
1518 temp[4*i+3]= z0-z3;
1521 for(i=0; i<4; i++){
1522 const int offset= x_offset[i];
1523 const int z0= temp[4*0+i] + temp[4*2+i];
1524 const int z1= temp[4*0+i] - temp[4*2+i];
1525 const int z2= temp[4*1+i] - temp[4*3+i];
1526 const int z3= temp[4*1+i] + temp[4*3+i];
1528 block[stride*0 +offset]= (z0 + z3)>>1;
1529 block[stride*2 +offset]= (z1 + z2)>>1;
1530 block[stride*8 +offset]= (z1 - z2)>>1;
1531 block[stride*10+offset]= (z0 - z3)>>1;
1534 #endif
1536 #undef xStride
1537 #undef stride
1539 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1540 const int stride= 16*2;
1541 const int xStride= 16;
1542 int a,b,c,d,e;
1544 a= block[stride*0 + xStride*0];
1545 b= block[stride*0 + xStride*1];
1546 c= block[stride*1 + xStride*0];
1547 d= block[stride*1 + xStride*1];
1549 e= a-b;
1550 a= a+b;
1551 b= c-d;
1552 c= c+d;
1554 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1555 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1556 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1557 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1560 #if 0
1561 static void chroma_dc_dct_c(DCTELEM *block){
1562 const int stride= 16*2;
1563 const int xStride= 16;
1564 int a,b,c,d,e;
1566 a= block[stride*0 + xStride*0];
1567 b= block[stride*0 + xStride*1];
1568 c= block[stride*1 + xStride*0];
1569 d= block[stride*1 + xStride*1];
1571 e= a-b;
1572 a= a+b;
1573 b= c-d;
1574 c= c+d;
1576 block[stride*0 + xStride*0]= (a+c);
1577 block[stride*0 + xStride*1]= (e+b);
1578 block[stride*1 + xStride*0]= (a-c);
1579 block[stride*1 + xStride*1]= (e-b);
1581 #endif
1584 * gets the chroma qp.
1586 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1587 return h->pps.chroma_qp_table[t][qscale];
1590 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1591 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1592 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1593 int i;
1594 const int * const quant_table= quant_coeff[qscale];
1595 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1596 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1597 const unsigned int threshold2= (threshold1<<1);
1598 int last_non_zero;
1600 if(separate_dc){
1601 if(qscale<=18){
1602 //avoid overflows
1603 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1604 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1605 const unsigned int dc_threshold2= (dc_threshold1<<1);
1607 int level= block[0]*quant_coeff[qscale+18][0];
1608 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1609 if(level>0){
1610 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1611 block[0]= level;
1612 }else{
1613 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1614 block[0]= -level;
1616 // last_non_zero = i;
1617 }else{
1618 block[0]=0;
1620 }else{
1621 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1622 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1623 const unsigned int dc_threshold2= (dc_threshold1<<1);
1625 int level= block[0]*quant_table[0];
1626 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1627 if(level>0){
1628 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1629 block[0]= level;
1630 }else{
1631 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1632 block[0]= -level;
1634 // last_non_zero = i;
1635 }else{
1636 block[0]=0;
1639 last_non_zero= 0;
1640 i=1;
1641 }else{
1642 last_non_zero= -1;
1643 i=0;
1646 for(; i<16; i++){
1647 const int j= scantable[i];
1648 int level= block[j]*quant_table[j];
1650 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1651 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1652 if(((unsigned)(level+threshold1))>threshold2){
1653 if(level>0){
1654 level= (bias + level)>>QUANT_SHIFT;
1655 block[j]= level;
1656 }else{
1657 level= (bias - level)>>QUANT_SHIFT;
1658 block[j]= -level;
1660 last_non_zero = i;
1661 }else{
1662 block[j]=0;
1666 return last_non_zero;
1669 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1670 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1671 int src_x_offset, int src_y_offset,
1672 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1673 MpegEncContext * const s = &h->s;
1674 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1675 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1676 const int luma_xy= (mx&3) + ((my&3)<<2);
1677 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1678 uint8_t * src_cb, * src_cr;
1679 int extra_width= h->emu_edge_width;
1680 int extra_height= h->emu_edge_height;
1681 int emu=0;
1682 const int full_mx= mx>>2;
1683 const int full_my= my>>2;
1684 const int pic_width = 16*s->mb_width;
1685 const int pic_height = 16*s->mb_height >> MB_FIELD;
1687 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1688 return;
1690 if(mx&7) extra_width -= 3;
1691 if(my&7) extra_height -= 3;
1693 if( full_mx < 0-extra_width
1694 || full_my < 0-extra_height
1695 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1696 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1697 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1698 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1699 emu=1;
1702 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1703 if(!square){
1704 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1707 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1709 if(MB_FIELD){
1710 // chroma offset when predicting from a field of opposite parity
1711 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1712 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1714 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1715 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1717 if(emu){
1718 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1719 src_cb= s->edge_emu_buffer;
1721 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1723 if(emu){
1724 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1725 src_cr= s->edge_emu_buffer;
1727 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1730 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1731 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1732 int x_offset, int y_offset,
1733 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1734 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1735 int list0, int list1){
1736 MpegEncContext * const s = &h->s;
1737 qpel_mc_func *qpix_op= qpix_put;
1738 h264_chroma_mc_func chroma_op= chroma_put;
1740 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1741 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1742 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1743 x_offset += 8*s->mb_x;
1744 y_offset += 8*(s->mb_y >> MB_FIELD);
1746 if(list0){
1747 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1748 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1749 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1750 qpix_op, chroma_op);
1752 qpix_op= qpix_avg;
1753 chroma_op= chroma_avg;
1756 if(list1){
1757 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1758 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1759 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1760 qpix_op, chroma_op);
1764 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1765 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1766 int x_offset, int y_offset,
1767 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1768 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1769 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1770 int list0, int list1){
1771 MpegEncContext * const s = &h->s;
1773 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1774 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1775 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1776 x_offset += 8*s->mb_x;
1777 y_offset += 8*(s->mb_y >> MB_FIELD);
1779 if(list0 && list1){
1780 /* don't optimize for luma-only case, since B-frames usually
1781 * use implicit weights => chroma too. */
1782 uint8_t *tmp_cb = s->obmc_scratchpad;
1783 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1784 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1785 int refn0 = h->ref_cache[0][ scan8[n] ];
1786 int refn1 = h->ref_cache[1][ scan8[n] ];
1788 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1789 dest_y, dest_cb, dest_cr,
1790 x_offset, y_offset, qpix_put, chroma_put);
1791 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1792 tmp_y, tmp_cb, tmp_cr,
1793 x_offset, y_offset, qpix_put, chroma_put);
1795 if(h->use_weight == 2){
1796 int weight0 = h->implicit_weight[refn0][refn1];
1797 int weight1 = 64 - weight0;
1798 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1799 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1800 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1801 }else{
1802 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1803 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1804 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1805 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1806 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1807 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1808 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1809 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1810 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1812 }else{
1813 int list = list1 ? 1 : 0;
1814 int refn = h->ref_cache[list][ scan8[n] ];
1815 Picture *ref= &h->ref_list[list][refn];
1816 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1817 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1818 qpix_put, chroma_put);
1820 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1821 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1822 if(h->use_weight_chroma){
1823 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1824 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1825 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1826 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1831 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1832 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1833 int x_offset, int y_offset,
1834 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1835 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1836 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1837 int list0, int list1){
1838 if((h->use_weight==2 && list0 && list1
1839 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1840 || h->use_weight==1)
1841 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1842 x_offset, y_offset, qpix_put, chroma_put,
1843 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1844 else
1845 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1849 static inline void prefetch_motion(H264Context *h, int list){
1850 /* fetch pixels for estimated mv 4 macroblocks ahead
1851 * optimized for 64byte cache lines */
1852 MpegEncContext * const s = &h->s;
1853 const int refn = h->ref_cache[list][scan8[0]];
1854 if(refn >= 0){
1855 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1856 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1857 uint8_t **src= h->ref_list[list][refn].data;
1858 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1859 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1860 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1861 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1865 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1866 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1867 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1868 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1869 MpegEncContext * const s = &h->s;
1870 const int mb_xy= h->mb_xy;
1871 const int mb_type= s->current_picture.mb_type[mb_xy];
1873 assert(IS_INTER(mb_type));
1875 prefetch_motion(h, 0);
1877 if(IS_16X16(mb_type)){
1878 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1879 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1880 &weight_op[0], &weight_avg[0],
1881 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1882 }else if(IS_16X8(mb_type)){
1883 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1884 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1885 &weight_op[1], &weight_avg[1],
1886 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1887 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1891 }else if(IS_8X16(mb_type)){
1892 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1893 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1894 &weight_op[2], &weight_avg[2],
1895 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1896 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1900 }else{
1901 int i;
1903 assert(IS_8X8(mb_type));
1905 for(i=0; i<4; i++){
1906 const int sub_mb_type= h->sub_mb_type[i];
1907 const int n= 4*i;
1908 int x_offset= (i&1)<<2;
1909 int y_offset= (i&2)<<1;
1911 if(IS_SUB_8X8(sub_mb_type)){
1912 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1913 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1914 &weight_op[3], &weight_avg[3],
1915 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1916 }else if(IS_SUB_8X4(sub_mb_type)){
1917 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1918 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1919 &weight_op[4], &weight_avg[4],
1920 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1921 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 }else if(IS_SUB_4X8(sub_mb_type)){
1926 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1927 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1928 &weight_op[5], &weight_avg[5],
1929 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1930 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1934 }else{
1935 int j;
1936 assert(IS_SUB_4X4(sub_mb_type));
1937 for(j=0; j<4; j++){
1938 int sub_x_offset= x_offset + 2*(j&1);
1939 int sub_y_offset= y_offset + (j&2);
1940 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1941 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1942 &weight_op[6], &weight_avg[6],
1943 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1949 prefetch_motion(h, 1);
1952 static av_cold void decode_init_vlc(void){
1953 static int done = 0;
1955 if (!done) {
1956 int i;
1957 int offset;
1958 done = 1;
1960 chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1961 chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1962 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1963 &chroma_dc_coeff_token_len [0], 1, 1,
1964 &chroma_dc_coeff_token_bits[0], 1, 1,
1965 INIT_VLC_USE_NEW_STATIC);
1967 offset = 0;
1968 for(i=0; i<4; i++){
1969 coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1970 coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1971 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1972 &coeff_token_len [i][0], 1, 1,
1973 &coeff_token_bits[i][0], 1, 1,
1974 INIT_VLC_USE_NEW_STATIC);
1975 offset += coeff_token_vlc_tables_size[i];
1978 * This is a one time safety check to make sure that
1979 * the packed static coeff_token_vlc table sizes
1980 * were initialized correctly.
1982 assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1984 for(i=0; i<3; i++){
1985 chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1986 chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1987 init_vlc(&chroma_dc_total_zeros_vlc[i],
1988 CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1989 &chroma_dc_total_zeros_len [i][0], 1, 1,
1990 &chroma_dc_total_zeros_bits[i][0], 1, 1,
1991 INIT_VLC_USE_NEW_STATIC);
1993 for(i=0; i<15; i++){
1994 total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1995 total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1996 init_vlc(&total_zeros_vlc[i],
1997 TOTAL_ZEROS_VLC_BITS, 16,
1998 &total_zeros_len [i][0], 1, 1,
1999 &total_zeros_bits[i][0], 1, 1,
2000 INIT_VLC_USE_NEW_STATIC);
2003 for(i=0; i<6; i++){
2004 run_vlc[i].table = run_vlc_tables[i];
2005 run_vlc[i].table_allocated = run_vlc_tables_size;
2006 init_vlc(&run_vlc[i],
2007 RUN_VLC_BITS, 7,
2008 &run_len [i][0], 1, 1,
2009 &run_bits[i][0], 1, 1,
2010 INIT_VLC_USE_NEW_STATIC);
2012 run7_vlc.table = run7_vlc_table,
2013 run7_vlc.table_allocated = run7_vlc_table_size;
2014 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
2015 &run_len [6][0], 1, 1,
2016 &run_bits[6][0], 1, 1,
2017 INIT_VLC_USE_NEW_STATIC);
2021 static void free_tables(H264Context *h){
2022 int i;
2023 H264Context *hx;
2024 av_freep(&h->intra4x4_pred_mode);
2025 av_freep(&h->chroma_pred_mode_table);
2026 av_freep(&h->cbp_table);
2027 av_freep(&h->mvd_table[0]);
2028 av_freep(&h->mvd_table[1]);
2029 av_freep(&h->direct_table);
2030 av_freep(&h->non_zero_count);
2031 av_freep(&h->slice_table_base);
2032 h->slice_table= NULL;
2034 av_freep(&h->mb2b_xy);
2035 av_freep(&h->mb2b8_xy);
2037 for(i = 0; i < h->s.avctx->thread_count; i++) {
2038 hx = h->thread_context[i];
2039 if(!hx) continue;
2040 av_freep(&hx->top_borders[1]);
2041 av_freep(&hx->top_borders[0]);
2042 av_freep(&hx->s.obmc_scratchpad);
2046 static void init_dequant8_coeff_table(H264Context *h){
2047 int i,q,x;
2048 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2049 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2050 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2052 for(i=0; i<2; i++ ){
2053 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2054 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2055 break;
2058 for(q=0; q<52; q++){
2059 int shift = div6[q];
2060 int idx = rem6[q];
2061 for(x=0; x<64; x++)
2062 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2063 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2064 h->pps.scaling_matrix8[i][x]) << shift;
2069 static void init_dequant4_coeff_table(H264Context *h){
2070 int i,j,q,x;
2071 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2072 for(i=0; i<6; i++ ){
2073 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2074 for(j=0; j<i; j++){
2075 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2076 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2077 break;
2080 if(j<i)
2081 continue;
2083 for(q=0; q<52; q++){
2084 int shift = div6[q] + 2;
2085 int idx = rem6[q];
2086 for(x=0; x<16; x++)
2087 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2088 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2089 h->pps.scaling_matrix4[i][x]) << shift;
2094 static void init_dequant_tables(H264Context *h){
2095 int i,x;
2096 init_dequant4_coeff_table(h);
2097 if(h->pps.transform_8x8_mode)
2098 init_dequant8_coeff_table(h);
2099 if(h->sps.transform_bypass){
2100 for(i=0; i<6; i++)
2101 for(x=0; x<16; x++)
2102 h->dequant4_coeff[i][0][x] = 1<<6;
2103 if(h->pps.transform_8x8_mode)
2104 for(i=0; i<2; i++)
2105 for(x=0; x<64; x++)
2106 h->dequant8_coeff[i][0][x] = 1<<6;
2112 * allocates tables.
2113 * needs width/height
2115 static int alloc_tables(H264Context *h){
2116 MpegEncContext * const s = &h->s;
2117 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2118 int x,y;
2120 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2122 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2123 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2124 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2126 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2127 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2128 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2129 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2131 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2132 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2134 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2135 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2136 for(y=0; y<s->mb_height; y++){
2137 for(x=0; x<s->mb_width; x++){
2138 const int mb_xy= x + y*s->mb_stride;
2139 const int b_xy = 4*x + 4*y*h->b_stride;
2140 const int b8_xy= 2*x + 2*y*h->b8_stride;
2142 h->mb2b_xy [mb_xy]= b_xy;
2143 h->mb2b8_xy[mb_xy]= b8_xy;
2147 s->obmc_scratchpad = NULL;
2149 if(!h->dequant4_coeff[0])
2150 init_dequant_tables(h);
2152 return 0;
2153 fail:
2154 free_tables(h);
2155 return -1;
2159 * Mimic alloc_tables(), but for every context thread.
2161 static void clone_tables(H264Context *dst, H264Context *src){
2162 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2163 dst->non_zero_count = src->non_zero_count;
2164 dst->slice_table = src->slice_table;
2165 dst->cbp_table = src->cbp_table;
2166 dst->mb2b_xy = src->mb2b_xy;
2167 dst->mb2b8_xy = src->mb2b8_xy;
2168 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2169 dst->mvd_table[0] = src->mvd_table[0];
2170 dst->mvd_table[1] = src->mvd_table[1];
2171 dst->direct_table = src->direct_table;
2173 dst->s.obmc_scratchpad = NULL;
2174 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2178 * Init context
2179 * Allocate buffers which are not shared amongst multiple threads.
2181 static int context_init(H264Context *h){
2182 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2183 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2185 return 0;
2186 fail:
2187 return -1; // free_tables will clean up for us
2190 static av_cold void common_init(H264Context *h){
2191 MpegEncContext * const s = &h->s;
2193 s->width = s->avctx->width;
2194 s->height = s->avctx->height;
2195 s->codec_id= s->avctx->codec->id;
2197 ff_h264_pred_init(&h->hpc, s->codec_id);
2199 h->dequant_coeff_pps= -1;
2200 s->unrestricted_mv=1;
2201 s->decode=1; //FIXME
2203 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2204 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2207 static av_cold int decode_init(AVCodecContext *avctx){
2208 H264Context *h= avctx->priv_data;
2209 MpegEncContext * const s = &h->s;
2211 MPV_decode_defaults(s);
2213 s->avctx = avctx;
2214 common_init(h);
2216 s->out_format = FMT_H264;
2217 s->workaround_bugs= avctx->workaround_bugs;
2219 // set defaults
2220 // s->decode_mb= ff_h263_decode_mb;
2221 s->quarter_sample = 1;
2222 s->low_delay= 1;
2224 if(avctx->codec_id == CODEC_ID_SVQ3)
2225 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2226 else
2227 avctx->pix_fmt= PIX_FMT_YUV420P;
2229 decode_init_vlc();
2231 if(avctx->extradata_size > 0 && avctx->extradata &&
2232 *(char *)avctx->extradata == 1){
2233 h->is_avc = 1;
2234 h->got_avcC = 0;
2235 } else {
2236 h->is_avc = 0;
2239 h->thread_context[0] = h;
2240 h->outputed_poc = INT_MIN;
2241 h->prev_poc_msb= 1<<16;
2242 return 0;
2245 static int frame_start(H264Context *h){
2246 MpegEncContext * const s = &h->s;
2247 int i;
2249 if(MPV_frame_start(s, s->avctx) < 0)
2250 return -1;
2251 ff_er_frame_start(s);
2253 * MPV_frame_start uses pict_type to derive key_frame.
2254 * This is incorrect for H.264; IDR markings must be used.
2255 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2256 * See decode_nal_units().
2258 s->current_picture_ptr->key_frame= 0;
2260 assert(s->linesize && s->uvlinesize);
2262 for(i=0; i<16; i++){
2263 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2264 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2266 for(i=0; i<4; i++){
2267 h->block_offset[16+i]=
2268 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2269 h->block_offset[24+16+i]=
2270 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2273 /* can't be in alloc_tables because linesize isn't known there.
2274 * FIXME: redo bipred weight to not require extra buffer? */
2275 for(i = 0; i < s->avctx->thread_count; i++)
2276 if(!h->thread_context[i]->s.obmc_scratchpad)
2277 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2279 /* some macroblocks will be accessed before they're available */
2280 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2281 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2283 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2285 // We mark the current picture as non-reference after allocating it, so
2286 // that if we break out due to an error it can be released automatically
2287 // in the next MPV_frame_start().
2288 // SVQ3 as well as most other codecs have only last/next/current and thus
2289 // get released even with set reference, besides SVQ3 and others do not
2290 // mark frames as reference later "naturally".
2291 if(s->codec_id != CODEC_ID_SVQ3)
2292 s->current_picture_ptr->reference= 0;
2294 s->current_picture_ptr->field_poc[0]=
2295 s->current_picture_ptr->field_poc[1]= INT_MAX;
2296 assert(s->current_picture_ptr->long_ref==0);
2298 return 0;
2301 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2302 MpegEncContext * const s = &h->s;
2303 int i;
2304 int step = 1;
2305 int offset = 1;
2306 int uvoffset= 1;
2307 int top_idx = 1;
2308 int skiplast= 0;
2310 src_y -= linesize;
2311 src_cb -= uvlinesize;
2312 src_cr -= uvlinesize;
2314 if(!simple && FRAME_MBAFF){
2315 if(s->mb_y&1){
2316 offset = MB_MBAFF ? 1 : 17;
2317 uvoffset= MB_MBAFF ? 1 : 9;
2318 if(!MB_MBAFF){
2319 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize);
2320 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2321 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2322 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2323 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2326 }else{
2327 if(!MB_MBAFF){
2328 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2329 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2330 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7 ];
2331 h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2333 skiplast= 1;
2335 offset =
2336 uvoffset=
2337 top_idx = MB_MBAFF ? 0 : 1;
2339 step= MB_MBAFF ? 2 : 1;
2342 // There are two lines saved, the line above the the top macroblock of a pair,
2343 // and the line above the bottom macroblock
2344 h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2345 for(i=1; i<17 - skiplast; i++){
2346 h->left_border[offset+i*step]= src_y[15+i* linesize];
2349 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2350 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2352 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2353 h->left_border[uvoffset+34 ]= h->top_borders[top_idx][s->mb_x][16+7];
2354 h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2355 for(i=1; i<9 - skiplast; i++){
2356 h->left_border[uvoffset+34 +i*step]= src_cb[7+i*uvlinesize];
2357 h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2359 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2360 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2364 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2365 MpegEncContext * const s = &h->s;
2366 int temp8, i;
2367 uint64_t temp64;
2368 int deblock_left;
2369 int deblock_top;
2370 int mb_xy;
2371 int step = 1;
2372 int offset = 1;
2373 int uvoffset= 1;
2374 int top_idx = 1;
2376 if(!simple && FRAME_MBAFF){
2377 if(s->mb_y&1){
2378 offset = MB_MBAFF ? 1 : 17;
2379 uvoffset= MB_MBAFF ? 1 : 9;
2380 }else{
2381 offset =
2382 uvoffset=
2383 top_idx = MB_MBAFF ? 0 : 1;
2385 step= MB_MBAFF ? 2 : 1;
2388 if(h->deblocking_filter == 2) {
2389 mb_xy = h->mb_xy;
2390 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2391 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2392 } else {
2393 deblock_left = (s->mb_x > 0);
2394 deblock_top = (s->mb_y > !!MB_FIELD);
2397 src_y -= linesize + 1;
2398 src_cb -= uvlinesize + 1;
2399 src_cr -= uvlinesize + 1;
2401 #define XCHG(a,b,t,xchg)\
2402 t= a;\
2403 if(xchg)\
2404 a= b;\
2405 b= t;
2407 if(deblock_left){
2408 for(i = !deblock_top; i<16; i++){
2409 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, xchg);
2411 XCHG(h->left_border[offset+i*step], src_y [i* linesize], temp8, 1);
2414 if(deblock_top){
2415 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2416 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2417 if(s->mb_x+1 < s->mb_width){
2418 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2422 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2423 if(deblock_left){
2424 for(i = !deblock_top; i<8; i++){
2425 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, xchg);
2426 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2428 XCHG(h->left_border[uvoffset+34 +i*step], src_cb[i*uvlinesize], temp8, 1);
2429 XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2431 if(deblock_top){
2432 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2433 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2438 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2439 MpegEncContext * const s = &h->s;
2440 const int mb_x= s->mb_x;
2441 const int mb_y= s->mb_y;
2442 const int mb_xy= h->mb_xy;
2443 const int mb_type= s->current_picture.mb_type[mb_xy];
2444 uint8_t *dest_y, *dest_cb, *dest_cr;
2445 int linesize, uvlinesize /*dct_offset*/;
2446 int i;
2447 int *block_offset = &h->block_offset[0];
2448 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2449 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2450 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2452 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2453 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2454 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2456 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2457 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2459 if (!simple && MB_FIELD) {
2460 linesize = h->mb_linesize = s->linesize * 2;
2461 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2462 block_offset = &h->block_offset[24];
2463 if(mb_y&1){ //FIXME move out of this function?
2464 dest_y -= s->linesize*15;
2465 dest_cb-= s->uvlinesize*7;
2466 dest_cr-= s->uvlinesize*7;
2468 if(FRAME_MBAFF) {
2469 int list;
2470 for(list=0; list<h->list_count; list++){
2471 if(!USES_LIST(mb_type, list))
2472 continue;
2473 if(IS_16X16(mb_type)){
2474 int8_t *ref = &h->ref_cache[list][scan8[0]];
2475 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2476 }else{
2477 for(i=0; i<16; i+=4){
2478 int ref = h->ref_cache[list][scan8[i]];
2479 if(ref >= 0)
2480 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2485 } else {
2486 linesize = h->mb_linesize = s->linesize;
2487 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2488 // dct_offset = s->linesize * 16;
2491 if(transform_bypass){
2492 idct_dc_add =
2493 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2494 }else if(IS_8x8DCT(mb_type)){
2495 idct_dc_add = s->dsp.h264_idct8_dc_add;
2496 idct_add = s->dsp.h264_idct8_add;
2497 }else{
2498 idct_dc_add = s->dsp.h264_idct_dc_add;
2499 idct_add = s->dsp.h264_idct_add;
2502 if (!simple && IS_INTRA_PCM(mb_type)) {
2503 for (i=0; i<16; i++) {
2504 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2506 for (i=0; i<8; i++) {
2507 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2508 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2510 } else {
2511 if(IS_INTRA(mb_type)){
2512 if(h->deblocking_filter)
2513 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2515 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2516 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2517 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2520 if(IS_INTRA4x4(mb_type)){
2521 if(simple || !s->encoding){
2522 if(IS_8x8DCT(mb_type)){
2523 for(i=0; i<16; i+=4){
2524 uint8_t * const ptr= dest_y + block_offset[i];
2525 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2526 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2527 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2528 (h->topright_samples_available<<i)&0x4000, linesize);
2529 if(nnz){
2530 if(nnz == 1 && h->mb[i*16])
2531 idct_dc_add(ptr, h->mb + i*16, linesize);
2532 else
2533 idct_add(ptr, h->mb + i*16, linesize);
2536 }else
2537 for(i=0; i<16; i++){
2538 uint8_t * const ptr= dest_y + block_offset[i];
2539 uint8_t *topright;
2540 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2541 int nnz, tr;
2543 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2544 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2545 assert(mb_y || linesize <= block_offset[i]);
2546 if(!topright_avail){
2547 tr= ptr[3 - linesize]*0x01010101;
2548 topright= (uint8_t*) &tr;
2549 }else
2550 topright= ptr + 4 - linesize;
2551 }else
2552 topright= NULL;
2554 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2555 nnz = h->non_zero_count_cache[ scan8[i] ];
2556 if(nnz){
2557 if(is_h264){
2558 if(nnz == 1 && h->mb[i*16])
2559 idct_dc_add(ptr, h->mb + i*16, linesize);
2560 else
2561 idct_add(ptr, h->mb + i*16, linesize);
2562 }else
2563 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2567 }else{
2568 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2569 if(is_h264){
2570 if(!transform_bypass)
2571 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2572 }else
2573 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2575 if(h->deblocking_filter)
2576 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2577 }else if(is_h264){
2578 hl_motion(h, dest_y, dest_cb, dest_cr,
2579 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2580 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2581 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2585 if(!IS_INTRA4x4(mb_type)){
2586 if(is_h264){
2587 if(IS_INTRA16x16(mb_type)){
2588 for(i=0; i<16; i++){
2589 if(h->non_zero_count_cache[ scan8[i] ])
2590 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2591 else if(h->mb[i*16])
2592 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2594 }else{
2595 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2596 for(i=0; i<16; i+=di){
2597 int nnz = h->non_zero_count_cache[ scan8[i] ];
2598 if(nnz){
2599 if(nnz==1 && h->mb[i*16])
2600 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2601 else
2602 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2606 }else{
2607 for(i=0; i<16; i++){
2608 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2609 uint8_t * const ptr= dest_y + block_offset[i];
2610 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2616 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2617 uint8_t *dest[2] = {dest_cb, dest_cr};
2618 if(transform_bypass){
2619 idct_add = idct_dc_add = s->dsp.add_pixels4;
2620 }else{
2621 idct_add = s->dsp.h264_idct_add;
2622 idct_dc_add = s->dsp.h264_idct_dc_add;
2623 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2624 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2626 if(is_h264){
2627 for(i=16; i<16+8; i++){
2628 if(h->non_zero_count_cache[ scan8[i] ])
2629 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2630 else if(h->mb[i*16])
2631 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2633 }else{
2634 for(i=16; i<16+8; i++){
2635 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2636 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2637 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2643 if(h->deblocking_filter) {
2644 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2645 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2646 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2647 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2648 if (!simple && FRAME_MBAFF) {
2649 filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2650 } else {
2651 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2657 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2659 static void hl_decode_mb_simple(H264Context *h){
2660 hl_decode_mb_internal(h, 1);
2664 * Process a macroblock; this handles edge cases, such as interlacing.
2666 static void av_noinline hl_decode_mb_complex(H264Context *h){
2667 hl_decode_mb_internal(h, 0);
2670 static void hl_decode_mb(H264Context *h){
2671 MpegEncContext * const s = &h->s;
2672 const int mb_xy= h->mb_xy;
2673 const int mb_type= s->current_picture.mb_type[mb_xy];
2674 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2675 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2677 if(ENABLE_H264_ENCODER && !s->decode)
2678 return;
2680 if (is_complex)
2681 hl_decode_mb_complex(h);
2682 else hl_decode_mb_simple(h);
2685 static void pic_as_field(Picture *pic, const int parity){
2686 int i;
2687 for (i = 0; i < 4; ++i) {
2688 if (parity == PICT_BOTTOM_FIELD)
2689 pic->data[i] += pic->linesize[i];
2690 pic->reference = parity;
2691 pic->linesize[i] *= 2;
2693 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2696 static int split_field_copy(Picture *dest, Picture *src,
2697 int parity, int id_add){
2698 int match = !!(src->reference & parity);
2700 if (match) {
2701 *dest = *src;
2702 if(parity != PICT_FRAME){
2703 pic_as_field(dest, parity);
2704 dest->pic_id *= 2;
2705 dest->pic_id += id_add;
2709 return match;
2712 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2713 int i[2]={0};
2714 int index=0;
2716 while(i[0]<len || i[1]<len){
2717 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2718 i[0]++;
2719 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2720 i[1]++;
2721 if(i[0] < len){
2722 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2723 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2725 if(i[1] < len){
2726 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2727 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2731 return index;
2734 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2735 int i, best_poc;
2736 int out_i= 0;
2738 for(;;){
2739 best_poc= dir ? INT_MIN : INT_MAX;
2741 for(i=0; i<len; i++){
2742 const int poc= src[i]->poc;
2743 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2744 best_poc= poc;
2745 sorted[out_i]= src[i];
2748 if(best_poc == (dir ? INT_MIN : INT_MAX))
2749 break;
2750 limit= sorted[out_i++]->poc - dir;
2752 return out_i;
2756 * fills the default_ref_list.
2758 static int fill_default_ref_list(H264Context *h){
2759 MpegEncContext * const s = &h->s;
2760 int i, len;
2762 if(h->slice_type_nos==FF_B_TYPE){
2763 Picture *sorted[32];
2764 int cur_poc, list;
2765 int lens[2];
2767 if(FIELD_PICTURE)
2768 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2769 else
2770 cur_poc= s->current_picture_ptr->poc;
2772 for(list= 0; list<2; list++){
2773 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2774 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2775 assert(len<=32);
2776 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2777 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2778 assert(len<=32);
2780 if(len < h->ref_count[list])
2781 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2782 lens[list]= len;
2785 if(lens[0] == lens[1] && lens[1] > 1){
2786 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2787 if(i == lens[0])
2788 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2790 }else{
2791 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2792 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2793 assert(len <= 32);
2794 if(len < h->ref_count[0])
2795 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2797 #ifdef TRACE
2798 for (i=0; i<h->ref_count[0]; i++) {
2799 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2801 if(h->slice_type_nos==FF_B_TYPE){
2802 for (i=0; i<h->ref_count[1]; i++) {
2803 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2806 #endif
2807 return 0;
2810 static void print_short_term(H264Context *h);
2811 static void print_long_term(H264Context *h);
2814 * Extract structure information about the picture described by pic_num in
2815 * the current decoding context (frame or field). Note that pic_num is
2816 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2817 * @param pic_num picture number for which to extract structure information
2818 * @param structure one of PICT_XXX describing structure of picture
2819 * with pic_num
2820 * @return frame number (short term) or long term index of picture
2821 * described by pic_num
2823 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2824 MpegEncContext * const s = &h->s;
2826 *structure = s->picture_structure;
2827 if(FIELD_PICTURE){
2828 if (!(pic_num & 1))
2829 /* opposite field */
2830 *structure ^= PICT_FRAME;
2831 pic_num >>= 1;
2834 return pic_num;
2837 static int decode_ref_pic_list_reordering(H264Context *h){
2838 MpegEncContext * const s = &h->s;
2839 int list, index, pic_structure;
2841 print_short_term(h);
2842 print_long_term(h);
2844 for(list=0; list<h->list_count; list++){
2845 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2847 if(get_bits1(&s->gb)){
2848 int pred= h->curr_pic_num;
2850 for(index=0; ; index++){
2851 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2852 unsigned int pic_id;
2853 int i;
2854 Picture *ref = NULL;
2856 if(reordering_of_pic_nums_idc==3)
2857 break;
2859 if(index >= h->ref_count[list]){
2860 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2861 return -1;
2864 if(reordering_of_pic_nums_idc<3){
2865 if(reordering_of_pic_nums_idc<2){
2866 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2867 int frame_num;
2869 if(abs_diff_pic_num > h->max_pic_num){
2870 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2871 return -1;
2874 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2875 else pred+= abs_diff_pic_num;
2876 pred &= h->max_pic_num - 1;
2878 frame_num = pic_num_extract(h, pred, &pic_structure);
2880 for(i= h->short_ref_count-1; i>=0; i--){
2881 ref = h->short_ref[i];
2882 assert(ref->reference);
2883 assert(!ref->long_ref);
2885 ref->frame_num == frame_num &&
2886 (ref->reference & pic_structure)
2888 break;
2890 if(i>=0)
2891 ref->pic_id= pred;
2892 }else{
2893 int long_idx;
2894 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2896 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2898 if(long_idx>31){
2899 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2900 return -1;
2902 ref = h->long_ref[long_idx];
2903 assert(!(ref && !ref->reference));
2904 if(ref && (ref->reference & pic_structure)){
2905 ref->pic_id= pic_id;
2906 assert(ref->long_ref);
2907 i=0;
2908 }else{
2909 i=-1;
2913 if (i < 0) {
2914 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2915 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2916 } else {
2917 for(i=index; i+1<h->ref_count[list]; i++){
2918 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2919 break;
2921 for(; i > index; i--){
2922 h->ref_list[list][i]= h->ref_list[list][i-1];
2924 h->ref_list[list][index]= *ref;
2925 if (FIELD_PICTURE){
2926 pic_as_field(&h->ref_list[list][index], pic_structure);
2929 }else{
2930 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2931 return -1;
2936 for(list=0; list<h->list_count; list++){
2937 for(index= 0; index < h->ref_count[list]; index++){
2938 if(!h->ref_list[list][index].data[0]){
2939 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2940 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2945 return 0;
2948 static void fill_mbaff_ref_list(H264Context *h){
2949 int list, i, j;
2950 for(list=0; list<2; list++){ //FIXME try list_count
2951 for(i=0; i<h->ref_count[list]; i++){
2952 Picture *frame = &h->ref_list[list][i];
2953 Picture *field = &h->ref_list[list][16+2*i];
2954 field[0] = *frame;
2955 for(j=0; j<3; j++)
2956 field[0].linesize[j] <<= 1;
2957 field[0].reference = PICT_TOP_FIELD;
2958 field[0].poc= field[0].field_poc[0];
2959 field[1] = field[0];
2960 for(j=0; j<3; j++)
2961 field[1].data[j] += frame->linesize[j];
2962 field[1].reference = PICT_BOTTOM_FIELD;
2963 field[1].poc= field[1].field_poc[1];
2965 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
2966 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
2967 for(j=0; j<2; j++){
2968 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
2969 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
2973 for(j=0; j<h->ref_count[1]; j++){
2974 for(i=0; i<h->ref_count[0]; i++)
2975 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
2976 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
2977 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
2981 static int pred_weight_table(H264Context *h){
2982 MpegEncContext * const s = &h->s;
2983 int list, i;
2984 int luma_def, chroma_def;
2986 h->use_weight= 0;
2987 h->use_weight_chroma= 0;
2988 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2989 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2990 luma_def = 1<<h->luma_log2_weight_denom;
2991 chroma_def = 1<<h->chroma_log2_weight_denom;
2993 for(list=0; list<2; list++){
2994 for(i=0; i<h->ref_count[list]; i++){
2995 int luma_weight_flag, chroma_weight_flag;
2997 luma_weight_flag= get_bits1(&s->gb);
2998 if(luma_weight_flag){
2999 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3000 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3001 if( h->luma_weight[list][i] != luma_def
3002 || h->luma_offset[list][i] != 0)
3003 h->use_weight= 1;
3004 }else{
3005 h->luma_weight[list][i]= luma_def;
3006 h->luma_offset[list][i]= 0;
3009 if(CHROMA){
3010 chroma_weight_flag= get_bits1(&s->gb);
3011 if(chroma_weight_flag){
3012 int j;
3013 for(j=0; j<2; j++){
3014 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3015 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3016 if( h->chroma_weight[list][i][j] != chroma_def
3017 || h->chroma_offset[list][i][j] != 0)
3018 h->use_weight_chroma= 1;
3020 }else{
3021 int j;
3022 for(j=0; j<2; j++){
3023 h->chroma_weight[list][i][j]= chroma_def;
3024 h->chroma_offset[list][i][j]= 0;
3029 if(h->slice_type_nos != FF_B_TYPE) break;
3031 h->use_weight= h->use_weight || h->use_weight_chroma;
3032 return 0;
3035 static void implicit_weight_table(H264Context *h){
3036 MpegEncContext * const s = &h->s;
3037 int ref0, ref1;
3038 int cur_poc = s->current_picture_ptr->poc;
3040 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3041 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3042 h->use_weight= 0;
3043 h->use_weight_chroma= 0;
3044 return;
3047 h->use_weight= 2;
3048 h->use_weight_chroma= 2;
3049 h->luma_log2_weight_denom= 5;
3050 h->chroma_log2_weight_denom= 5;
3052 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3053 int poc0 = h->ref_list[0][ref0].poc;
3054 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3055 int poc1 = h->ref_list[1][ref1].poc;
3056 int td = av_clip(poc1 - poc0, -128, 127);
3057 if(td){
3058 int tb = av_clip(cur_poc - poc0, -128, 127);
3059 int tx = (16384 + (FFABS(td) >> 1)) / td;
3060 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3061 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3062 h->implicit_weight[ref0][ref1] = 32;
3063 else
3064 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3065 }else
3066 h->implicit_weight[ref0][ref1] = 32;
3072 * Mark a picture as no longer needed for reference. The refmask
3073 * argument allows unreferencing of individual fields or the whole frame.
3074 * If the picture becomes entirely unreferenced, but is being held for
3075 * display purposes, it is marked as such.
3076 * @param refmask mask of fields to unreference; the mask is bitwise
3077 * anded with the reference marking of pic
3078 * @return non-zero if pic becomes entirely unreferenced (except possibly
3079 * for display purposes) zero if one of the fields remains in
3080 * reference
3082 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3083 int i;
3084 if (pic->reference &= refmask) {
3085 return 0;
3086 } else {
3087 for(i = 0; h->delayed_pic[i]; i++)
3088 if(pic == h->delayed_pic[i]){
3089 pic->reference=DELAYED_PIC_REF;
3090 break;
3092 return 1;
3097 * instantaneous decoder refresh.
3099 static void idr(H264Context *h){
3100 int i;
3102 for(i=0; i<16; i++){
3103 remove_long(h, i, 0);
3105 assert(h->long_ref_count==0);
3107 for(i=0; i<h->short_ref_count; i++){
3108 unreference_pic(h, h->short_ref[i], 0);
3109 h->short_ref[i]= NULL;
3111 h->short_ref_count=0;
3112 h->prev_frame_num= 0;
3113 h->prev_frame_num_offset= 0;
3114 h->prev_poc_msb=
3115 h->prev_poc_lsb= 0;
3118 /* forget old pics after a seek */
3119 static void flush_dpb(AVCodecContext *avctx){
3120 H264Context *h= avctx->priv_data;
3121 int i;
3122 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3123 if(h->delayed_pic[i])
3124 h->delayed_pic[i]->reference= 0;
3125 h->delayed_pic[i]= NULL;
3127 h->outputed_poc= INT_MIN;
3128 idr(h);
3129 if(h->s.current_picture_ptr)
3130 h->s.current_picture_ptr->reference= 0;
3131 h->s.first_field= 0;
3132 ff_mpeg_flush(avctx);
3136 * Find a Picture in the short term reference list by frame number.
3137 * @param frame_num frame number to search for
3138 * @param idx the index into h->short_ref where returned picture is found
3139 * undefined if no picture found.
3140 * @return pointer to the found picture, or NULL if no pic with the provided
3141 * frame number is found
3143 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3144 MpegEncContext * const s = &h->s;
3145 int i;
3147 for(i=0; i<h->short_ref_count; i++){
3148 Picture *pic= h->short_ref[i];
3149 if(s->avctx->debug&FF_DEBUG_MMCO)
3150 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3151 if(pic->frame_num == frame_num) {
3152 *idx = i;
3153 return pic;
3156 return NULL;
3160 * Remove a picture from the short term reference list by its index in
3161 * that list. This does no checking on the provided index; it is assumed
3162 * to be valid. Other list entries are shifted down.
3163 * @param i index into h->short_ref of picture to remove.
3165 static void remove_short_at_index(H264Context *h, int i){
3166 assert(i >= 0 && i < h->short_ref_count);
3167 h->short_ref[i]= NULL;
3168 if (--h->short_ref_count)
3169 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3174 * @return the removed picture or NULL if an error occurs
3176 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3177 MpegEncContext * const s = &h->s;
3178 Picture *pic;
3179 int i;
3181 if(s->avctx->debug&FF_DEBUG_MMCO)
3182 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3184 pic = find_short(h, frame_num, &i);
3185 if (pic){
3186 if(unreference_pic(h, pic, ref_mask))
3187 remove_short_at_index(h, i);
3190 return pic;
3194 * Remove a picture from the long term reference list by its index in
3195 * that list.
3196 * @return the removed picture or NULL if an error occurs
3198 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3199 Picture *pic;
3201 pic= h->long_ref[i];
3202 if (pic){
3203 if(unreference_pic(h, pic, ref_mask)){
3204 assert(h->long_ref[i]->long_ref == 1);
3205 h->long_ref[i]->long_ref= 0;
3206 h->long_ref[i]= NULL;
3207 h->long_ref_count--;
3211 return pic;
3215 * print short term list
3217 static void print_short_term(H264Context *h) {
3218 uint32_t i;
3219 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3220 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3221 for(i=0; i<h->short_ref_count; i++){
3222 Picture *pic= h->short_ref[i];
3223 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3229 * print long term list
3231 static void print_long_term(H264Context *h) {
3232 uint32_t i;
3233 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3234 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3235 for(i = 0; i < 16; i++){
3236 Picture *pic= h->long_ref[i];
3237 if (pic) {
3238 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3245 * Executes the reference picture marking (memory management control operations).
3247 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3248 MpegEncContext * const s = &h->s;
3249 int i, j;
3250 int current_ref_assigned=0;
3251 Picture *pic;
3253 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3254 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3256 for(i=0; i<mmco_count; i++){
3257 int structure, frame_num;
3258 if(s->avctx->debug&FF_DEBUG_MMCO)
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3261 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3262 || mmco[i].opcode == MMCO_SHORT2LONG){
3263 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3264 pic = find_short(h, frame_num, &j);
3265 if(!pic){
3266 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3267 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3268 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3269 continue;
3273 switch(mmco[i].opcode){
3274 case MMCO_SHORT2UNUSED:
3275 if(s->avctx->debug&FF_DEBUG_MMCO)
3276 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3277 remove_short(h, frame_num, structure ^ PICT_FRAME);
3278 break;
3279 case MMCO_SHORT2LONG:
3280 if (h->long_ref[mmco[i].long_arg] != pic)
3281 remove_long(h, mmco[i].long_arg, 0);
3283 remove_short_at_index(h, j);
3284 h->long_ref[ mmco[i].long_arg ]= pic;
3285 if (h->long_ref[ mmco[i].long_arg ]){
3286 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3287 h->long_ref_count++;
3289 break;
3290 case MMCO_LONG2UNUSED:
3291 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3292 pic = h->long_ref[j];
3293 if (pic) {
3294 remove_long(h, j, structure ^ PICT_FRAME);
3295 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3296 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3297 break;
3298 case MMCO_LONG:
3299 // Comment below left from previous code as it is an interresting note.
3300 /* First field in pair is in short term list or
3301 * at a different long term index.
3302 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3303 * Report the problem and keep the pair where it is,
3304 * and mark this field valid.
3307 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3308 remove_long(h, mmco[i].long_arg, 0);
3310 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3311 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3312 h->long_ref_count++;
3315 s->current_picture_ptr->reference |= s->picture_structure;
3316 current_ref_assigned=1;
3317 break;
3318 case MMCO_SET_MAX_LONG:
3319 assert(mmco[i].long_arg <= 16);
3320 // just remove the long term which index is greater than new max
3321 for(j = mmco[i].long_arg; j<16; j++){
3322 remove_long(h, j, 0);
3324 break;
3325 case MMCO_RESET:
3326 while(h->short_ref_count){
3327 remove_short(h, h->short_ref[0]->frame_num, 0);
3329 for(j = 0; j < 16; j++) {
3330 remove_long(h, j, 0);
3332 s->current_picture_ptr->poc=
3333 s->current_picture_ptr->field_poc[0]=
3334 s->current_picture_ptr->field_poc[1]=
3335 h->poc_lsb=
3336 h->poc_msb=
3337 h->frame_num=
3338 s->current_picture_ptr->frame_num= 0;
3339 break;
3340 default: assert(0);
3344 if (!current_ref_assigned) {
3345 /* Second field of complementary field pair; the first field of
3346 * which is already referenced. If short referenced, it
3347 * should be first entry in short_ref. If not, it must exist
3348 * in long_ref; trying to put it on the short list here is an
3349 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3351 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3352 /* Just mark the second field valid */
3353 s->current_picture_ptr->reference = PICT_FRAME;
3354 } else if (s->current_picture_ptr->long_ref) {
3355 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3356 "assignment for second field "
3357 "in complementary field pair "
3358 "(first field is long term)\n");
3359 } else {
3360 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3361 if(pic){
3362 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3365 if(h->short_ref_count)
3366 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3368 h->short_ref[0]= s->current_picture_ptr;
3369 h->short_ref_count++;
3370 s->current_picture_ptr->reference |= s->picture_structure;
3374 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3376 /* We have too many reference frames, probably due to corrupted
3377 * stream. Need to discard one frame. Prevents overrun of the
3378 * short_ref and long_ref buffers.
3380 av_log(h->s.avctx, AV_LOG_ERROR,
3381 "number of reference frames exceeds max (probably "
3382 "corrupt input), discarding one\n");
3384 if (h->long_ref_count && !h->short_ref_count) {
3385 for (i = 0; i < 16; ++i)
3386 if (h->long_ref[i])
3387 break;
3389 assert(i < 16);
3390 remove_long(h, i, 0);
3391 } else {
3392 pic = h->short_ref[h->short_ref_count - 1];
3393 remove_short(h, pic->frame_num, 0);
3397 print_short_term(h);
3398 print_long_term(h);
3399 return 0;
3402 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3403 MpegEncContext * const s = &h->s;
3404 int i;
3406 h->mmco_index= 0;
3407 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3408 s->broken_link= get_bits1(gb) -1;
3409 if(get_bits1(gb)){
3410 h->mmco[0].opcode= MMCO_LONG;
3411 h->mmco[0].long_arg= 0;
3412 h->mmco_index= 1;
3414 }else{
3415 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3416 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3417 MMCOOpcode opcode= get_ue_golomb(gb);
3419 h->mmco[i].opcode= opcode;
3420 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3421 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3422 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3423 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3424 return -1;
3427 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3428 unsigned int long_arg= get_ue_golomb(gb);
3429 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3430 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3431 return -1;
3433 h->mmco[i].long_arg= long_arg;
3436 if(opcode > (unsigned)MMCO_LONG){
3437 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3438 return -1;
3440 if(opcode == MMCO_END)
3441 break;
3443 h->mmco_index= i;
3444 }else{
3445 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3447 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3448 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3449 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3450 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3451 h->mmco_index= 1;
3452 if (FIELD_PICTURE) {
3453 h->mmco[0].short_pic_num *= 2;
3454 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3455 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3456 h->mmco_index= 2;
3462 return 0;
3465 static int init_poc(H264Context *h){
3466 MpegEncContext * const s = &h->s;
3467 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3468 int field_poc[2];
3469 Picture *cur = s->current_picture_ptr;
3471 h->frame_num_offset= h->prev_frame_num_offset;
3472 if(h->frame_num < h->prev_frame_num)
3473 h->frame_num_offset += max_frame_num;
3475 if(h->sps.poc_type==0){
3476 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3478 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3479 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3480 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3481 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3482 else
3483 h->poc_msb = h->prev_poc_msb;
3484 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3485 field_poc[0] =
3486 field_poc[1] = h->poc_msb + h->poc_lsb;
3487 if(s->picture_structure == PICT_FRAME)
3488 field_poc[1] += h->delta_poc_bottom;
3489 }else if(h->sps.poc_type==1){
3490 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3491 int i;
3493 if(h->sps.poc_cycle_length != 0)
3494 abs_frame_num = h->frame_num_offset + h->frame_num;
3495 else
3496 abs_frame_num = 0;
3498 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3499 abs_frame_num--;
3501 expected_delta_per_poc_cycle = 0;
3502 for(i=0; i < h->sps.poc_cycle_length; i++)
3503 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3505 if(abs_frame_num > 0){
3506 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3507 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3509 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3510 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3511 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3512 } else
3513 expectedpoc = 0;
3515 if(h->nal_ref_idc == 0)
3516 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3518 field_poc[0] = expectedpoc + h->delta_poc[0];
3519 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3521 if(s->picture_structure == PICT_FRAME)
3522 field_poc[1] += h->delta_poc[1];
3523 }else{
3524 int poc= 2*(h->frame_num_offset + h->frame_num);
3526 if(!h->nal_ref_idc)
3527 poc--;
3529 field_poc[0]= poc;
3530 field_poc[1]= poc;
3533 if(s->picture_structure != PICT_BOTTOM_FIELD)
3534 s->current_picture_ptr->field_poc[0]= field_poc[0];
3535 if(s->picture_structure != PICT_TOP_FIELD)
3536 s->current_picture_ptr->field_poc[1]= field_poc[1];
3537 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3539 return 0;
3544 * initialize scan tables
3546 static void init_scan_tables(H264Context *h){
3547 MpegEncContext * const s = &h->s;
3548 int i;
3549 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3550 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3551 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3552 }else{
3553 for(i=0; i<16; i++){
3554 #define T(x) (x>>2) | ((x<<2) & 0xF)
3555 h->zigzag_scan[i] = T(zigzag_scan[i]);
3556 h-> field_scan[i] = T( field_scan[i]);
3557 #undef T
3560 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3561 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3562 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3563 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3564 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3565 }else{
3566 for(i=0; i<64; i++){
3567 #define T(x) (x>>3) | ((x&7)<<3)
3568 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3569 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3570 h->field_scan8x8[i] = T(field_scan8x8[i]);
3571 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3572 #undef T
3575 if(h->sps.transform_bypass){ //FIXME same ugly
3576 h->zigzag_scan_q0 = zigzag_scan;
3577 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3578 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3579 h->field_scan_q0 = field_scan;
3580 h->field_scan8x8_q0 = field_scan8x8;
3581 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3582 }else{
3583 h->zigzag_scan_q0 = h->zigzag_scan;
3584 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3585 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3586 h->field_scan_q0 = h->field_scan;
3587 h->field_scan8x8_q0 = h->field_scan8x8;
3588 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3593 * Replicates H264 "master" context to thread contexts.
3595 static void clone_slice(H264Context *dst, H264Context *src)
3597 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3598 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3599 dst->s.current_picture = src->s.current_picture;
3600 dst->s.linesize = src->s.linesize;
3601 dst->s.uvlinesize = src->s.uvlinesize;
3602 dst->s.first_field = src->s.first_field;
3604 dst->prev_poc_msb = src->prev_poc_msb;
3605 dst->prev_poc_lsb = src->prev_poc_lsb;
3606 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3607 dst->prev_frame_num = src->prev_frame_num;
3608 dst->short_ref_count = src->short_ref_count;
3610 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3611 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3612 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3613 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3615 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3616 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3620 * decodes a slice header.
3621 * This will also call MPV_common_init() and frame_start() as needed.
3623 * @param h h264context
3624 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3626 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3628 static int decode_slice_header(H264Context *h, H264Context *h0){
3629 MpegEncContext * const s = &h->s;
3630 MpegEncContext * const s0 = &h0->s;
3631 unsigned int first_mb_in_slice;
3632 unsigned int pps_id;
3633 int num_ref_idx_active_override_flag;
3634 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3635 unsigned int slice_type, tmp, i, j;
3636 int default_ref_list_done = 0;
3637 int last_pic_structure;
3639 s->dropable= h->nal_ref_idc == 0;
3641 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3642 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3643 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3644 }else{
3645 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3646 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3649 first_mb_in_slice= get_ue_golomb(&s->gb);
3651 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3652 h0->current_slice = 0;
3653 if (!s0->first_field)
3654 s->current_picture_ptr= NULL;
3657 slice_type= get_ue_golomb(&s->gb);
3658 if(slice_type > 9){
3659 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3660 return -1;
3662 if(slice_type > 4){
3663 slice_type -= 5;
3664 h->slice_type_fixed=1;
3665 }else
3666 h->slice_type_fixed=0;
3668 slice_type= slice_type_map[ slice_type ];
3669 if (slice_type == FF_I_TYPE
3670 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3671 default_ref_list_done = 1;
3673 h->slice_type= slice_type;
3674 h->slice_type_nos= slice_type & 3;
3676 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3677 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3678 av_log(h->s.avctx, AV_LOG_ERROR,
3679 "B picture before any references, skipping\n");
3680 return -1;
3683 pps_id= get_ue_golomb(&s->gb);
3684 if(pps_id>=MAX_PPS_COUNT){
3685 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3686 return -1;
3688 if(!h0->pps_buffers[pps_id]) {
3689 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3690 return -1;
3692 h->pps= *h0->pps_buffers[pps_id];
3694 if(!h0->sps_buffers[h->pps.sps_id]) {
3695 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3696 return -1;
3698 h->sps = *h0->sps_buffers[h->pps.sps_id];
3700 if(h == h0 && h->dequant_coeff_pps != pps_id){
3701 h->dequant_coeff_pps = pps_id;
3702 init_dequant_tables(h);
3705 s->mb_width= h->sps.mb_width;
3706 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3708 h->b_stride= s->mb_width*4;
3709 h->b8_stride= s->mb_width*2;
3711 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3712 if(h->sps.frame_mbs_only_flag)
3713 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3714 else
3715 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3717 if (s->context_initialized
3718 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3719 if(h != h0)
3720 return -1; // width / height changed during parallelized decoding
3721 free_tables(h);
3722 MPV_common_end(s);
3724 if (!s->context_initialized) {
3725 if(h != h0)
3726 return -1; // we cant (re-)initialize context during parallel decoding
3727 if (MPV_common_init(s) < 0)
3728 return -1;
3729 s->first_field = 0;
3731 init_scan_tables(h);
3732 alloc_tables(h);
3734 for(i = 1; i < s->avctx->thread_count; i++) {
3735 H264Context *c;
3736 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3737 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3738 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3739 c->sps = h->sps;
3740 c->pps = h->pps;
3741 init_scan_tables(c);
3742 clone_tables(c, h);
3745 for(i = 0; i < s->avctx->thread_count; i++)
3746 if(context_init(h->thread_context[i]) < 0)
3747 return -1;
3749 s->avctx->width = s->width;
3750 s->avctx->height = s->height;
3751 s->avctx->sample_aspect_ratio= h->sps.sar;
3752 if(!s->avctx->sample_aspect_ratio.den)
3753 s->avctx->sample_aspect_ratio.den = 1;
3755 if(h->sps.timing_info_present_flag){
3756 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3757 if(h->x264_build > 0 && h->x264_build < 44)
3758 s->avctx->time_base.den *= 2;
3759 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3760 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3764 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3766 h->mb_mbaff = 0;
3767 h->mb_aff_frame = 0;
3768 last_pic_structure = s0->picture_structure;
3769 if(h->sps.frame_mbs_only_flag){
3770 s->picture_structure= PICT_FRAME;
3771 }else{
3772 if(get_bits1(&s->gb)) { //field_pic_flag
3773 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3774 } else {
3775 s->picture_structure= PICT_FRAME;
3776 h->mb_aff_frame = h->sps.mb_aff;
3779 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3781 if(h0->current_slice == 0){
3782 while(h->frame_num != h->prev_frame_num &&
3783 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3784 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3785 frame_start(h);
3786 h->prev_frame_num++;
3787 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3788 s->current_picture_ptr->frame_num= h->prev_frame_num;
3789 execute_ref_pic_marking(h, NULL, 0);
3792 /* See if we have a decoded first field looking for a pair... */
3793 if (s0->first_field) {
3794 assert(s0->current_picture_ptr);
3795 assert(s0->current_picture_ptr->data[0]);
3796 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3798 /* figure out if we have a complementary field pair */
3799 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3801 * Previous field is unmatched. Don't display it, but let it
3802 * remain for reference if marked as such.
3804 s0->current_picture_ptr = NULL;
3805 s0->first_field = FIELD_PICTURE;
3807 } else {
3808 if (h->nal_ref_idc &&
3809 s0->current_picture_ptr->reference &&
3810 s0->current_picture_ptr->frame_num != h->frame_num) {
3812 * This and previous field were reference, but had
3813 * different frame_nums. Consider this field first in
3814 * pair. Throw away previous field except for reference
3815 * purposes.
3817 s0->first_field = 1;
3818 s0->current_picture_ptr = NULL;
3820 } else {
3821 /* Second field in complementary pair */
3822 s0->first_field = 0;
3826 } else {
3827 /* Frame or first field in a potentially complementary pair */
3828 assert(!s0->current_picture_ptr);
3829 s0->first_field = FIELD_PICTURE;
3832 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3833 s0->first_field = 0;
3834 return -1;
3837 if(h != h0)
3838 clone_slice(h, h0);
3840 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3842 assert(s->mb_num == s->mb_width * s->mb_height);
3843 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3844 first_mb_in_slice >= s->mb_num){
3845 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3846 return -1;
3848 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3849 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3850 if (s->picture_structure == PICT_BOTTOM_FIELD)
3851 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3852 assert(s->mb_y < s->mb_height);
3854 if(s->picture_structure==PICT_FRAME){
3855 h->curr_pic_num= h->frame_num;
3856 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3857 }else{
3858 h->curr_pic_num= 2*h->frame_num + 1;
3859 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3862 if(h->nal_unit_type == NAL_IDR_SLICE){
3863 get_ue_golomb(&s->gb); /* idr_pic_id */
3866 if(h->sps.poc_type==0){
3867 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3869 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3870 h->delta_poc_bottom= get_se_golomb(&s->gb);
3874 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3875 h->delta_poc[0]= get_se_golomb(&s->gb);
3877 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3878 h->delta_poc[1]= get_se_golomb(&s->gb);
3881 init_poc(h);
3883 if(h->pps.redundant_pic_cnt_present){
3884 h->redundant_pic_count= get_ue_golomb(&s->gb);
3887 //set defaults, might be overridden a few lines later
3888 h->ref_count[0]= h->pps.ref_count[0];
3889 h->ref_count[1]= h->pps.ref_count[1];
3891 if(h->slice_type_nos != FF_I_TYPE){
3892 if(h->slice_type_nos == FF_B_TYPE){
3893 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3895 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3897 if(num_ref_idx_active_override_flag){
3898 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3899 if(h->slice_type_nos==FF_B_TYPE)
3900 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3902 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3903 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3904 h->ref_count[0]= h->ref_count[1]= 1;
3905 return -1;
3908 if(h->slice_type_nos == FF_B_TYPE)
3909 h->list_count= 2;
3910 else
3911 h->list_count= 1;
3912 }else
3913 h->list_count= 0;
3915 if(!default_ref_list_done){
3916 fill_default_ref_list(h);
3919 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3920 return -1;
3922 if(h->slice_type_nos!=FF_I_TYPE){
3923 s->last_picture_ptr= &h->ref_list[0][0];
3924 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
3926 if(h->slice_type_nos==FF_B_TYPE){
3927 s->next_picture_ptr= &h->ref_list[1][0];
3928 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
3931 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3932 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3933 pred_weight_table(h);
3934 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3935 implicit_weight_table(h);
3936 else
3937 h->use_weight = 0;
3939 if(h->nal_ref_idc)
3940 decode_ref_pic_marking(h0, &s->gb);
3942 if(FRAME_MBAFF)
3943 fill_mbaff_ref_list(h);
3945 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
3946 direct_dist_scale_factor(h);
3947 direct_ref_list_init(h);
3949 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3950 tmp = get_ue_golomb(&s->gb);
3951 if(tmp > 2){
3952 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3953 return -1;
3955 h->cabac_init_idc= tmp;
3958 h->last_qscale_diff = 0;
3959 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3960 if(tmp>51){
3961 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3962 return -1;
3964 s->qscale= tmp;
3965 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3966 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3967 //FIXME qscale / qp ... stuff
3968 if(h->slice_type == FF_SP_TYPE){
3969 get_bits1(&s->gb); /* sp_for_switch_flag */
3971 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3972 get_se_golomb(&s->gb); /* slice_qs_delta */
3975 h->deblocking_filter = 1;
3976 h->slice_alpha_c0_offset = 0;
3977 h->slice_beta_offset = 0;
3978 if( h->pps.deblocking_filter_parameters_present ) {
3979 tmp= get_ue_golomb(&s->gb);
3980 if(tmp > 2){
3981 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
3982 return -1;
3984 h->deblocking_filter= tmp;
3985 if(h->deblocking_filter < 2)
3986 h->deblocking_filter^= 1; // 1<->0
3988 if( h->deblocking_filter ) {
3989 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3990 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3994 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
3995 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
3996 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
3997 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
3998 h->deblocking_filter= 0;
4000 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4001 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4002 /* Cheat slightly for speed:
4003 Do not bother to deblock across slices. */
4004 h->deblocking_filter = 2;
4005 } else {
4006 h0->max_contexts = 1;
4007 if(!h0->single_decode_warning) {
4008 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4009 h0->single_decode_warning = 1;
4011 if(h != h0)
4012 return 1; // deblocking switched inside frame
4016 #if 0 //FMO
4017 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4018 slice_group_change_cycle= get_bits(&s->gb, ?);
4019 #endif
4021 h0->last_slice_type = slice_type;
4022 h->slice_num = ++h0->current_slice;
4024 for(j=0; j<2; j++){
4025 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4026 ref2frm[0]=
4027 ref2frm[1]= -1;
4028 for(i=0; i<16; i++)
4029 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4030 +(h->ref_list[j][i].reference&3);
4031 ref2frm[18+0]=
4032 ref2frm[18+1]= -1;
4033 for(i=16; i<48; i++)
4034 ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
4035 +(h->ref_list[j][i].reference&3);
4038 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4039 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4041 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4042 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4043 h->slice_num,
4044 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4045 first_mb_in_slice,
4046 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
4047 pps_id, h->frame_num,
4048 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4049 h->ref_count[0], h->ref_count[1],
4050 s->qscale,
4051 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4052 h->use_weight,
4053 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4054 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4058 return 0;
4064 static inline int get_level_prefix(GetBitContext *gb){
4065 unsigned int buf;
4066 int log;
4068 OPEN_READER(re, gb);
4069 UPDATE_CACHE(re, gb);
4070 buf=GET_CACHE(re, gb);
4072 log= 32 - av_log2(buf);
4073 #ifdef TRACE
4074 print_bin(buf>>(32-log), log);
4075 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4076 #endif
4078 LAST_SKIP_BITS(re, gb, log);
4079 CLOSE_READER(re, gb);
4081 return log-1;
4084 static inline int get_dct8x8_allowed(H264Context *h){
4085 int i;
4086 for(i=0; i<4; i++){
4087 if(!IS_SUB_8X8(h->sub_mb_type[i])
4088 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4089 return 0;
4091 return 1;
4095 * decodes a residual block.
4096 * @param n block index
4097 * @param scantable scantable
4098 * @param max_coeff number of coefficients in the block
4099 * @return <0 if an error occurred
4101 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4102 MpegEncContext * const s = &h->s;
4103 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4104 int level[16];
4105 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4107 //FIXME put trailing_onex into the context
4109 if(n == CHROMA_DC_BLOCK_INDEX){
4110 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4111 total_coeff= coeff_token>>2;
4112 }else{
4113 if(n == LUMA_DC_BLOCK_INDEX){
4114 total_coeff= pred_non_zero_count(h, 0);
4115 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4116 total_coeff= coeff_token>>2;
4117 }else{
4118 total_coeff= pred_non_zero_count(h, n);
4119 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4120 total_coeff= coeff_token>>2;
4121 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4125 //FIXME set last_non_zero?
4127 if(total_coeff==0)
4128 return 0;
4129 if(total_coeff > (unsigned)max_coeff) {
4130 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4131 return -1;
4134 trailing_ones= coeff_token&3;
4135 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4136 assert(total_coeff<=16);
4138 for(i=0; i<trailing_ones; i++){
4139 level[i]= 1 - 2*get_bits1(gb);
4142 if(i<total_coeff) {
4143 int level_code, mask;
4144 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4145 int prefix= get_level_prefix(gb);
4147 //first coefficient has suffix_length equal to 0 or 1
4148 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4149 if(suffix_length)
4150 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4151 else
4152 level_code= (prefix<<suffix_length); //part
4153 }else if(prefix==14){
4154 if(suffix_length)
4155 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4156 else
4157 level_code= prefix + get_bits(gb, 4); //part
4158 }else{
4159 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4160 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4161 if(prefix>=16)
4162 level_code += (1<<(prefix-3))-4096;
4165 if(trailing_ones < 3) level_code += 2;
4167 suffix_length = 1;
4168 if(level_code > 5)
4169 suffix_length++;
4170 mask= -(level_code&1);
4171 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4172 i++;
4174 //remaining coefficients have suffix_length > 0
4175 for(;i<total_coeff;i++) {
4176 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4177 prefix = get_level_prefix(gb);
4178 if(prefix<15){
4179 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4180 }else{
4181 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4182 if(prefix>=16)
4183 level_code += (1<<(prefix-3))-4096;
4185 mask= -(level_code&1);
4186 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4187 if(level_code > suffix_limit[suffix_length])
4188 suffix_length++;
4192 if(total_coeff == max_coeff)
4193 zeros_left=0;
4194 else{
4195 if(n == CHROMA_DC_BLOCK_INDEX)
4196 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4197 else
4198 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4201 coeff_num = zeros_left + total_coeff - 1;
4202 j = scantable[coeff_num];
4203 if(n > 24){
4204 block[j] = level[0];
4205 for(i=1;i<total_coeff;i++) {
4206 if(zeros_left <= 0)
4207 run_before = 0;
4208 else if(zeros_left < 7){
4209 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4210 }else{
4211 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4213 zeros_left -= run_before;
4214 coeff_num -= 1 + run_before;
4215 j= scantable[ coeff_num ];
4217 block[j]= level[i];
4219 }else{
4220 block[j] = (level[0] * qmul[j] + 32)>>6;
4221 for(i=1;i<total_coeff;i++) {
4222 if(zeros_left <= 0)
4223 run_before = 0;
4224 else if(zeros_left < 7){
4225 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4226 }else{
4227 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4229 zeros_left -= run_before;
4230 coeff_num -= 1 + run_before;
4231 j= scantable[ coeff_num ];
4233 block[j]= (level[i] * qmul[j] + 32)>>6;
4237 if(zeros_left<0){
4238 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4239 return -1;
4242 return 0;
4245 static void predict_field_decoding_flag(H264Context *h){
4246 MpegEncContext * const s = &h->s;
4247 const int mb_xy= h->mb_xy;
4248 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4249 ? s->current_picture.mb_type[mb_xy-1]
4250 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4251 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4252 : 0;
4253 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4257 * decodes a P_SKIP or B_SKIP macroblock
4259 static void decode_mb_skip(H264Context *h){
4260 MpegEncContext * const s = &h->s;
4261 const int mb_xy= h->mb_xy;
4262 int mb_type=0;
4264 memset(h->non_zero_count[mb_xy], 0, 16);
4265 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4267 if(MB_FIELD)
4268 mb_type|= MB_TYPE_INTERLACED;
4270 if( h->slice_type_nos == FF_B_TYPE )
4272 // just for fill_caches. pred_direct_motion will set the real mb_type
4273 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4275 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4276 pred_direct_motion(h, &mb_type);
4277 mb_type|= MB_TYPE_SKIP;
4279 else
4281 int mx, my;
4282 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4284 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4285 pred_pskip_motion(h, &mx, &my);
4286 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4287 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4290 write_back_motion(h, mb_type);
4291 s->current_picture.mb_type[mb_xy]= mb_type;
4292 s->current_picture.qscale_table[mb_xy]= s->qscale;
4293 h->slice_table[ mb_xy ]= h->slice_num;
4294 h->prev_mb_skipped= 1;
4298 * decodes a macroblock
4299 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4301 static int decode_mb_cavlc(H264Context *h){
4302 MpegEncContext * const s = &h->s;
4303 int mb_xy;
4304 int partition_count;
4305 unsigned int mb_type, cbp;
4306 int dct8x8_allowed= h->pps.transform_8x8_mode;
4308 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4310 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4312 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4313 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4314 down the code */
4315 if(h->slice_type_nos != FF_I_TYPE){
4316 if(s->mb_skip_run==-1)
4317 s->mb_skip_run= get_ue_golomb(&s->gb);
4319 if (s->mb_skip_run--) {
4320 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4321 if(s->mb_skip_run==0)
4322 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4323 else
4324 predict_field_decoding_flag(h);
4326 decode_mb_skip(h);
4327 return 0;
4330 if(FRAME_MBAFF){
4331 if( (s->mb_y&1) == 0 )
4332 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4335 h->prev_mb_skipped= 0;
4337 mb_type= get_ue_golomb(&s->gb);
4338 if(h->slice_type_nos == FF_B_TYPE){
4339 if(mb_type < 23){
4340 partition_count= b_mb_type_info[mb_type].partition_count;
4341 mb_type= b_mb_type_info[mb_type].type;
4342 }else{
4343 mb_type -= 23;
4344 goto decode_intra_mb;
4346 }else if(h->slice_type_nos == FF_P_TYPE){
4347 if(mb_type < 5){
4348 partition_count= p_mb_type_info[mb_type].partition_count;
4349 mb_type= p_mb_type_info[mb_type].type;
4350 }else{
4351 mb_type -= 5;
4352 goto decode_intra_mb;
4354 }else{
4355 assert(h->slice_type_nos == FF_I_TYPE);
4356 if(h->slice_type == FF_SI_TYPE && mb_type)
4357 mb_type--;
4358 decode_intra_mb:
4359 if(mb_type > 25){
4360 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4361 return -1;
4363 partition_count=0;
4364 cbp= i_mb_type_info[mb_type].cbp;
4365 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4366 mb_type= i_mb_type_info[mb_type].type;
4369 if(MB_FIELD)
4370 mb_type |= MB_TYPE_INTERLACED;
4372 h->slice_table[ mb_xy ]= h->slice_num;
4374 if(IS_INTRA_PCM(mb_type)){
4375 unsigned int x;
4377 // We assume these blocks are very rare so we do not optimize it.
4378 align_get_bits(&s->gb);
4380 // The pixels are stored in the same order as levels in h->mb array.
4381 for(x=0; x < (CHROMA ? 384 : 256); x++){
4382 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4385 // In deblocking, the quantizer is 0
4386 s->current_picture.qscale_table[mb_xy]= 0;
4387 // All coeffs are present
4388 memset(h->non_zero_count[mb_xy], 16, 16);
4390 s->current_picture.mb_type[mb_xy]= mb_type;
4391 return 0;
4394 if(MB_MBAFF){
4395 h->ref_count[0] <<= 1;
4396 h->ref_count[1] <<= 1;
4399 fill_caches(h, mb_type, 0);
4401 //mb_pred
4402 if(IS_INTRA(mb_type)){
4403 int pred_mode;
4404 // init_top_left_availability(h);
4405 if(IS_INTRA4x4(mb_type)){
4406 int i;
4407 int di = 1;
4408 if(dct8x8_allowed && get_bits1(&s->gb)){
4409 mb_type |= MB_TYPE_8x8DCT;
4410 di = 4;
4413 // fill_intra4x4_pred_table(h);
4414 for(i=0; i<16; i+=di){
4415 int mode= pred_intra_mode(h, i);
4417 if(!get_bits1(&s->gb)){
4418 const int rem_mode= get_bits(&s->gb, 3);
4419 mode = rem_mode + (rem_mode >= mode);
4422 if(di==4)
4423 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4424 else
4425 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4427 write_back_intra_pred_mode(h);
4428 if( check_intra4x4_pred_mode(h) < 0)
4429 return -1;
4430 }else{
4431 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4432 if(h->intra16x16_pred_mode < 0)
4433 return -1;
4435 if(CHROMA){
4436 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4437 if(pred_mode < 0)
4438 return -1;
4439 h->chroma_pred_mode= pred_mode;
4441 }else if(partition_count==4){
4442 int i, j, sub_partition_count[4], list, ref[2][4];
4444 if(h->slice_type_nos == FF_B_TYPE){
4445 for(i=0; i<4; i++){
4446 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4447 if(h->sub_mb_type[i] >=13){
4448 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4449 return -1;
4451 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4452 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4454 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4455 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4456 pred_direct_motion(h, &mb_type);
4457 h->ref_cache[0][scan8[4]] =
4458 h->ref_cache[1][scan8[4]] =
4459 h->ref_cache[0][scan8[12]] =
4460 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4462 }else{
4463 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4464 for(i=0; i<4; i++){
4465 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4466 if(h->sub_mb_type[i] >=4){
4467 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4468 return -1;
4470 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4471 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4475 for(list=0; list<h->list_count; list++){
4476 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4477 for(i=0; i<4; i++){
4478 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4479 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4480 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4481 if(tmp>=ref_count){
4482 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4483 return -1;
4485 ref[list][i]= tmp;
4486 }else{
4487 //FIXME
4488 ref[list][i] = -1;
4493 if(dct8x8_allowed)
4494 dct8x8_allowed = get_dct8x8_allowed(h);
4496 for(list=0; list<h->list_count; list++){
4497 for(i=0; i<4; i++){
4498 if(IS_DIRECT(h->sub_mb_type[i])) {
4499 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4500 continue;
4502 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4503 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4505 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4506 const int sub_mb_type= h->sub_mb_type[i];
4507 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4508 for(j=0; j<sub_partition_count[i]; j++){
4509 int mx, my;
4510 const int index= 4*i + block_width*j;
4511 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4512 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4513 mx += get_se_golomb(&s->gb);
4514 my += get_se_golomb(&s->gb);
4515 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4517 if(IS_SUB_8X8(sub_mb_type)){
4518 mv_cache[ 1 ][0]=
4519 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4520 mv_cache[ 1 ][1]=
4521 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4522 }else if(IS_SUB_8X4(sub_mb_type)){
4523 mv_cache[ 1 ][0]= mx;
4524 mv_cache[ 1 ][1]= my;
4525 }else if(IS_SUB_4X8(sub_mb_type)){
4526 mv_cache[ 8 ][0]= mx;
4527 mv_cache[ 8 ][1]= my;
4529 mv_cache[ 0 ][0]= mx;
4530 mv_cache[ 0 ][1]= my;
4532 }else{
4533 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4534 p[0] = p[1]=
4535 p[8] = p[9]= 0;
4539 }else if(IS_DIRECT(mb_type)){
4540 pred_direct_motion(h, &mb_type);
4541 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4542 }else{
4543 int list, mx, my, i;
4544 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4545 if(IS_16X16(mb_type)){
4546 for(list=0; list<h->list_count; list++){
4547 unsigned int val;
4548 if(IS_DIR(mb_type, 0, list)){
4549 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4550 if(val >= h->ref_count[list]){
4551 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4552 return -1;
4554 }else
4555 val= LIST_NOT_USED&0xFF;
4556 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4558 for(list=0; list<h->list_count; list++){
4559 unsigned int val;
4560 if(IS_DIR(mb_type, 0, list)){
4561 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4562 mx += get_se_golomb(&s->gb);
4563 my += get_se_golomb(&s->gb);
4564 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4566 val= pack16to32(mx,my);
4567 }else
4568 val=0;
4569 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4572 else if(IS_16X8(mb_type)){
4573 for(list=0; list<h->list_count; list++){
4574 for(i=0; i<2; i++){
4575 unsigned int val;
4576 if(IS_DIR(mb_type, i, list)){
4577 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4578 if(val >= h->ref_count[list]){
4579 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4580 return -1;
4582 }else
4583 val= LIST_NOT_USED&0xFF;
4584 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4587 for(list=0; list<h->list_count; list++){
4588 for(i=0; i<2; i++){
4589 unsigned int val;
4590 if(IS_DIR(mb_type, i, list)){
4591 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4592 mx += get_se_golomb(&s->gb);
4593 my += get_se_golomb(&s->gb);
4594 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4596 val= pack16to32(mx,my);
4597 }else
4598 val=0;
4599 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4602 }else{
4603 assert(IS_8X16(mb_type));
4604 for(list=0; list<h->list_count; list++){
4605 for(i=0; i<2; i++){
4606 unsigned int val;
4607 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4608 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4609 if(val >= h->ref_count[list]){
4610 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4611 return -1;
4613 }else
4614 val= LIST_NOT_USED&0xFF;
4615 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4618 for(list=0; list<h->list_count; list++){
4619 for(i=0; i<2; i++){
4620 unsigned int val;
4621 if(IS_DIR(mb_type, i, list)){
4622 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4623 mx += get_se_golomb(&s->gb);
4624 my += get_se_golomb(&s->gb);
4625 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4627 val= pack16to32(mx,my);
4628 }else
4629 val=0;
4630 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4636 if(IS_INTER(mb_type))
4637 write_back_motion(h, mb_type);
4639 if(!IS_INTRA16x16(mb_type)){
4640 cbp= get_ue_golomb(&s->gb);
4641 if(cbp > 47){
4642 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4643 return -1;
4646 if(CHROMA){
4647 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4648 else cbp= golomb_to_inter_cbp [cbp];
4649 }else{
4650 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4651 else cbp= golomb_to_inter_cbp_gray[cbp];
4654 h->cbp = cbp;
4656 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4657 if(get_bits1(&s->gb)){
4658 mb_type |= MB_TYPE_8x8DCT;
4659 h->cbp_table[mb_xy]= cbp;
4662 s->current_picture.mb_type[mb_xy]= mb_type;
4664 if(cbp || IS_INTRA16x16(mb_type)){
4665 int i8x8, i4x4, chroma_idx;
4666 int dquant;
4667 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4668 const uint8_t *scan, *scan8x8, *dc_scan;
4670 // fill_non_zero_count_cache(h);
4672 if(IS_INTERLACED(mb_type)){
4673 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4674 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4675 dc_scan= luma_dc_field_scan;
4676 }else{
4677 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4678 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4679 dc_scan= luma_dc_zigzag_scan;
4682 dquant= get_se_golomb(&s->gb);
4684 if( dquant > 25 || dquant < -26 ){
4685 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4686 return -1;
4689 s->qscale += dquant;
4690 if(((unsigned)s->qscale) > 51){
4691 if(s->qscale<0) s->qscale+= 52;
4692 else s->qscale-= 52;
4695 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4696 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4697 if(IS_INTRA16x16(mb_type)){
4698 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4699 return -1; //FIXME continue if partitioned and other return -1 too
4702 assert((cbp&15) == 0 || (cbp&15) == 15);
4704 if(cbp&15){
4705 for(i8x8=0; i8x8<4; i8x8++){
4706 for(i4x4=0; i4x4<4; i4x4++){
4707 const int index= i4x4 + 4*i8x8;
4708 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4709 return -1;
4713 }else{
4714 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4716 }else{
4717 for(i8x8=0; i8x8<4; i8x8++){
4718 if(cbp & (1<<i8x8)){
4719 if(IS_8x8DCT(mb_type)){
4720 DCTELEM *buf = &h->mb[64*i8x8];
4721 uint8_t *nnz;
4722 for(i4x4=0; i4x4<4; i4x4++){
4723 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4724 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4725 return -1;
4727 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4728 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4729 }else{
4730 for(i4x4=0; i4x4<4; i4x4++){
4731 const int index= i4x4 + 4*i8x8;
4733 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4734 return -1;
4738 }else{
4739 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4740 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4745 if(cbp&0x30){
4746 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4747 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4748 return -1;
4752 if(cbp&0x20){
4753 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4754 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4755 for(i4x4=0; i4x4<4; i4x4++){
4756 const int index= 16 + 4*chroma_idx + i4x4;
4757 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4758 return -1;
4762 }else{
4763 uint8_t * const nnz= &h->non_zero_count_cache[0];
4764 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4765 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4767 }else{
4768 uint8_t * const nnz= &h->non_zero_count_cache[0];
4769 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4770 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4771 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4773 s->current_picture.qscale_table[mb_xy]= s->qscale;
4774 write_back_non_zero_count(h);
4776 if(MB_MBAFF){
4777 h->ref_count[0] >>= 1;
4778 h->ref_count[1] >>= 1;
4781 return 0;
4784 static int decode_cabac_field_decoding_flag(H264Context *h) {
4785 MpegEncContext * const s = &h->s;
4786 const int mb_x = s->mb_x;
4787 const int mb_y = s->mb_y & ~1;
4788 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4789 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4791 unsigned int ctx = 0;
4793 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4794 ctx += 1;
4796 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4797 ctx += 1;
4800 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4803 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4804 uint8_t *state= &h->cabac_state[ctx_base];
4805 int mb_type;
4807 if(intra_slice){
4808 MpegEncContext * const s = &h->s;
4809 const int mba_xy = h->left_mb_xy[0];
4810 const int mbb_xy = h->top_mb_xy;
4811 int ctx=0;
4812 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4813 ctx++;
4814 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4815 ctx++;
4816 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4817 return 0; /* I4x4 */
4818 state += 2;
4819 }else{
4820 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4821 return 0; /* I4x4 */
4824 if( get_cabac_terminate( &h->cabac ) )
4825 return 25; /* PCM */
4827 mb_type = 1; /* I16x16 */
4828 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4829 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4830 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4831 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4832 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4833 return mb_type;
4836 static int decode_cabac_mb_type( H264Context *h ) {
4837 MpegEncContext * const s = &h->s;
4839 if( h->slice_type_nos == FF_I_TYPE ) {
4840 return decode_cabac_intra_mb_type(h, 3, 1);
4841 } else if( h->slice_type_nos == FF_P_TYPE ) {
4842 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4843 /* P-type */
4844 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4845 /* P_L0_D16x16, P_8x8 */
4846 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4847 } else {
4848 /* P_L0_D8x16, P_L0_D16x8 */
4849 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4851 } else {
4852 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4854 } else if( h->slice_type_nos == FF_B_TYPE ) {
4855 const int mba_xy = h->left_mb_xy[0];
4856 const int mbb_xy = h->top_mb_xy;
4857 int ctx = 0;
4858 int bits;
4860 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4861 ctx++;
4862 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4863 ctx++;
4865 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4866 return 0; /* B_Direct_16x16 */
4868 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4869 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4872 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4873 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4874 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4875 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4876 if( bits < 8 )
4877 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4878 else if( bits == 13 ) {
4879 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4880 } else if( bits == 14 )
4881 return 11; /* B_L1_L0_8x16 */
4882 else if( bits == 15 )
4883 return 22; /* B_8x8 */
4885 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4886 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4887 } else {
4888 /* TODO SI/SP frames? */
4889 return -1;
4893 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4894 MpegEncContext * const s = &h->s;
4895 int mba_xy, mbb_xy;
4896 int ctx = 0;
4898 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4899 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4900 mba_xy = mb_xy - 1;
4901 if( (mb_y&1)
4902 && h->slice_table[mba_xy] == h->slice_num
4903 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4904 mba_xy += s->mb_stride;
4905 if( MB_FIELD ){
4906 mbb_xy = mb_xy - s->mb_stride;
4907 if( !(mb_y&1)
4908 && h->slice_table[mbb_xy] == h->slice_num
4909 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4910 mbb_xy -= s->mb_stride;
4911 }else
4912 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4913 }else{
4914 int mb_xy = h->mb_xy;
4915 mba_xy = mb_xy - 1;
4916 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4919 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4920 ctx++;
4921 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4922 ctx++;
4924 if( h->slice_type_nos == FF_B_TYPE )
4925 ctx += 13;
4926 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4929 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4930 int mode = 0;
4932 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4933 return pred_mode;
4935 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4936 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4937 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4939 if( mode >= pred_mode )
4940 return mode + 1;
4941 else
4942 return mode;
4945 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4946 const int mba_xy = h->left_mb_xy[0];
4947 const int mbb_xy = h->top_mb_xy;
4949 int ctx = 0;
4951 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4952 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4953 ctx++;
4955 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4956 ctx++;
4958 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4959 return 0;
4961 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4962 return 1;
4963 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4964 return 2;
4965 else
4966 return 3;
4969 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4970 int cbp_b, cbp_a, ctx, cbp = 0;
4972 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4973 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4975 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4976 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4977 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4978 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4979 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4980 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4981 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
4982 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
4983 return cbp;
4985 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
4986 int ctx;
4987 int cbp_a, cbp_b;
4989 cbp_a = (h->left_cbp>>4)&0x03;
4990 cbp_b = (h-> top_cbp>>4)&0x03;
4992 ctx = 0;
4993 if( cbp_a > 0 ) ctx++;
4994 if( cbp_b > 0 ) ctx += 2;
4995 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
4996 return 0;
4998 ctx = 4;
4999 if( cbp_a == 2 ) ctx++;
5000 if( cbp_b == 2 ) ctx += 2;
5001 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5003 static int decode_cabac_mb_dqp( H264Context *h) {
5004 int ctx = 0;
5005 int val = 0;
5007 if( h->last_qscale_diff != 0 )
5008 ctx++;
5010 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5011 if( ctx < 2 )
5012 ctx = 2;
5013 else
5014 ctx = 3;
5015 val++;
5016 if(val > 102) //prevent infinite loop
5017 return INT_MIN;
5020 if( val&0x01 )
5021 return (val + 1)/2;
5022 else
5023 return -(val + 1)/2;
5025 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5026 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5027 return 0; /* 8x8 */
5028 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5029 return 1; /* 8x4 */
5030 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5031 return 2; /* 4x8 */
5032 return 3; /* 4x4 */
5034 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5035 int type;
5036 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5037 return 0; /* B_Direct_8x8 */
5038 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5039 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5040 type = 3;
5041 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5042 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5043 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5044 type += 4;
5046 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5047 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5048 return type;
5051 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5052 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5055 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5056 int refa = h->ref_cache[list][scan8[n] - 1];
5057 int refb = h->ref_cache[list][scan8[n] - 8];
5058 int ref = 0;
5059 int ctx = 0;
5061 if( h->slice_type_nos == FF_B_TYPE) {
5062 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5063 ctx++;
5064 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5065 ctx += 2;
5066 } else {
5067 if( refa > 0 )
5068 ctx++;
5069 if( refb > 0 )
5070 ctx += 2;
5073 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5074 ref++;
5075 if( ctx < 4 )
5076 ctx = 4;
5077 else
5078 ctx = 5;
5079 if(ref >= 32 /*h->ref_list[list]*/){
5080 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5081 return 0; //FIXME we should return -1 and check the return everywhere
5084 return ref;
5087 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5088 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5089 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5090 int ctxbase = (l == 0) ? 40 : 47;
5091 int ctx, mvd;
5093 if( amvd < 3 )
5094 ctx = 0;
5095 else if( amvd > 32 )
5096 ctx = 2;
5097 else
5098 ctx = 1;
5100 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5101 return 0;
5103 mvd= 1;
5104 ctx= 3;
5105 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5106 mvd++;
5107 if( ctx < 6 )
5108 ctx++;
5111 if( mvd >= 9 ) {
5112 int k = 3;
5113 while( get_cabac_bypass( &h->cabac ) ) {
5114 mvd += 1 << k;
5115 k++;
5116 if(k>24){
5117 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5118 return INT_MIN;
5121 while( k-- ) {
5122 if( get_cabac_bypass( &h->cabac ) )
5123 mvd += 1 << k;
5126 return get_cabac_bypass_sign( &h->cabac, -mvd );
5129 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5130 int nza, nzb;
5131 int ctx = 0;
5133 if( is_dc ) {
5134 if( cat == 0 ) {
5135 nza = h->left_cbp&0x100;
5136 nzb = h-> top_cbp&0x100;
5137 } else {
5138 nza = (h->left_cbp>>(6+idx))&0x01;
5139 nzb = (h-> top_cbp>>(6+idx))&0x01;
5141 } else {
5142 if( cat == 4 ) {
5143 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5144 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5145 } else {
5146 assert(cat == 1 || cat == 2);
5147 nza = h->non_zero_count_cache[scan8[idx] - 1];
5148 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5152 if( nza > 0 )
5153 ctx++;
5155 if( nzb > 0 )
5156 ctx += 2;
5158 return ctx + 4 * cat;
5161 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5162 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5163 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5164 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5165 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5168 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5169 static const int significant_coeff_flag_offset[2][6] = {
5170 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5171 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5173 static const int last_coeff_flag_offset[2][6] = {
5174 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5175 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5177 static const int coeff_abs_level_m1_offset[6] = {
5178 227+0, 227+10, 227+20, 227+30, 227+39, 426
5180 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5181 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5182 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5183 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5184 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5185 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5186 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5187 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5188 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5190 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5191 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5192 * map node ctx => cabac ctx for level=1 */
5193 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5194 /* map node ctx => cabac ctx for level>1 */
5195 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5196 static const uint8_t coeff_abs_level_transition[2][8] = {
5197 /* update node ctx after decoding a level=1 */
5198 { 1, 2, 3, 3, 4, 5, 6, 7 },
5199 /* update node ctx after decoding a level>1 */
5200 { 4, 4, 4, 4, 5, 6, 7, 7 }
5203 int index[64];
5205 int av_unused last;
5206 int coeff_count = 0;
5207 int node_ctx = 0;
5209 uint8_t *significant_coeff_ctx_base;
5210 uint8_t *last_coeff_ctx_base;
5211 uint8_t *abs_level_m1_ctx_base;
5213 #ifndef ARCH_X86
5214 #define CABAC_ON_STACK
5215 #endif
5216 #ifdef CABAC_ON_STACK
5217 #define CC &cc
5218 CABACContext cc;
5219 cc.range = h->cabac.range;
5220 cc.low = h->cabac.low;
5221 cc.bytestream= h->cabac.bytestream;
5222 #else
5223 #define CC &h->cabac
5224 #endif
5227 /* cat: 0-> DC 16x16 n = 0
5228 * 1-> AC 16x16 n = luma4x4idx
5229 * 2-> Luma4x4 n = luma4x4idx
5230 * 3-> DC Chroma n = iCbCr
5231 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5232 * 5-> Luma8x8 n = 4 * luma8x8idx
5235 /* read coded block flag */
5236 if( is_dc || cat != 5 ) {
5237 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5238 if( !is_dc ) {
5239 if( cat == 4 )
5240 h->non_zero_count_cache[scan8[16+n]] = 0;
5241 else
5242 h->non_zero_count_cache[scan8[n]] = 0;
5245 #ifdef CABAC_ON_STACK
5246 h->cabac.range = cc.range ;
5247 h->cabac.low = cc.low ;
5248 h->cabac.bytestream= cc.bytestream;
5249 #endif
5250 return;
5254 significant_coeff_ctx_base = h->cabac_state
5255 + significant_coeff_flag_offset[MB_FIELD][cat];
5256 last_coeff_ctx_base = h->cabac_state
5257 + last_coeff_flag_offset[MB_FIELD][cat];
5258 abs_level_m1_ctx_base = h->cabac_state
5259 + coeff_abs_level_m1_offset[cat];
5261 if( !is_dc && cat == 5 ) {
5262 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5263 for(last= 0; last < coefs; last++) { \
5264 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5265 if( get_cabac( CC, sig_ctx )) { \
5266 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5267 index[coeff_count++] = last; \
5268 if( get_cabac( CC, last_ctx ) ) { \
5269 last= max_coeff; \
5270 break; \
5274 if( last == max_coeff -1 ) {\
5275 index[coeff_count++] = last;\
5277 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5278 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5279 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5280 } else {
5281 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5282 #else
5283 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5284 } else {
5285 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5286 #endif
5288 assert(coeff_count > 0);
5290 if( is_dc ) {
5291 if( cat == 0 )
5292 h->cbp_table[h->mb_xy] |= 0x100;
5293 else
5294 h->cbp_table[h->mb_xy] |= 0x40 << n;
5295 } else {
5296 if( cat == 5 )
5297 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5298 else if( cat == 4 )
5299 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5300 else {
5301 assert( cat == 1 || cat == 2 );
5302 h->non_zero_count_cache[scan8[n]] = coeff_count;
5306 do {
5307 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5309 int j= scantable[index[--coeff_count]];
5311 if( get_cabac( CC, ctx ) == 0 ) {
5312 node_ctx = coeff_abs_level_transition[0][node_ctx];
5313 if( is_dc ) {
5314 block[j] = get_cabac_bypass_sign( CC, -1);
5315 }else{
5316 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5318 } else {
5319 int coeff_abs = 2;
5320 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5321 node_ctx = coeff_abs_level_transition[1][node_ctx];
5323 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5324 coeff_abs++;
5327 if( coeff_abs >= 15 ) {
5328 int j = 0;
5329 while( get_cabac_bypass( CC ) ) {
5330 j++;
5333 coeff_abs=1;
5334 while( j-- ) {
5335 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5337 coeff_abs+= 14;
5340 if( is_dc ) {
5341 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5342 }else{
5343 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5346 } while( coeff_count );
5347 #ifdef CABAC_ON_STACK
5348 h->cabac.range = cc.range ;
5349 h->cabac.low = cc.low ;
5350 h->cabac.bytestream= cc.bytestream;
5351 #endif
5355 #ifndef CONFIG_SMALL
5356 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5357 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5360 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5361 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5363 #endif
5365 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5366 #ifdef CONFIG_SMALL
5367 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5368 #else
5369 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5370 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5371 #endif
5374 static inline void compute_mb_neighbors(H264Context *h)
5376 MpegEncContext * const s = &h->s;
5377 const int mb_xy = h->mb_xy;
5378 h->top_mb_xy = mb_xy - s->mb_stride;
5379 h->left_mb_xy[0] = mb_xy - 1;
5380 if(FRAME_MBAFF){
5381 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5382 const int top_pair_xy = pair_xy - s->mb_stride;
5383 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5384 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5385 const int curr_mb_frame_flag = !MB_FIELD;
5386 const int bottom = (s->mb_y & 1);
5387 if (bottom
5388 ? !curr_mb_frame_flag // bottom macroblock
5389 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5391 h->top_mb_xy -= s->mb_stride;
5393 if (left_mb_frame_flag != curr_mb_frame_flag) {
5394 h->left_mb_xy[0] = pair_xy - 1;
5396 } else if (FIELD_PICTURE) {
5397 h->top_mb_xy -= s->mb_stride;
5399 return;
5403 * decodes a macroblock
5404 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5406 static int decode_mb_cabac(H264Context *h) {
5407 MpegEncContext * const s = &h->s;
5408 int mb_xy;
5409 int mb_type, partition_count, cbp = 0;
5410 int dct8x8_allowed= h->pps.transform_8x8_mode;
5412 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5414 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5416 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5417 if( h->slice_type_nos != FF_I_TYPE ) {
5418 int skip;
5419 /* a skipped mb needs the aff flag from the following mb */
5420 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5421 predict_field_decoding_flag(h);
5422 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5423 skip = h->next_mb_skipped;
5424 else
5425 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5426 /* read skip flags */
5427 if( skip ) {
5428 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5429 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5430 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5431 if(h->next_mb_skipped)
5432 predict_field_decoding_flag(h);
5433 else
5434 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5437 decode_mb_skip(h);
5439 h->cbp_table[mb_xy] = 0;
5440 h->chroma_pred_mode_table[mb_xy] = 0;
5441 h->last_qscale_diff = 0;
5443 return 0;
5447 if(FRAME_MBAFF){
5448 if( (s->mb_y&1) == 0 )
5449 h->mb_mbaff =
5450 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5453 h->prev_mb_skipped = 0;
5455 compute_mb_neighbors(h);
5456 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5457 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5458 return -1;
5461 if( h->slice_type_nos == FF_B_TYPE ) {
5462 if( mb_type < 23 ){
5463 partition_count= b_mb_type_info[mb_type].partition_count;
5464 mb_type= b_mb_type_info[mb_type].type;
5465 }else{
5466 mb_type -= 23;
5467 goto decode_intra_mb;
5469 } else if( h->slice_type_nos == FF_P_TYPE ) {
5470 if( mb_type < 5) {
5471 partition_count= p_mb_type_info[mb_type].partition_count;
5472 mb_type= p_mb_type_info[mb_type].type;
5473 } else {
5474 mb_type -= 5;
5475 goto decode_intra_mb;
5477 } else {
5478 if(h->slice_type == FF_SI_TYPE && mb_type)
5479 mb_type--;
5480 assert(h->slice_type_nos == FF_I_TYPE);
5481 decode_intra_mb:
5482 partition_count = 0;
5483 cbp= i_mb_type_info[mb_type].cbp;
5484 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5485 mb_type= i_mb_type_info[mb_type].type;
5487 if(MB_FIELD)
5488 mb_type |= MB_TYPE_INTERLACED;
5490 h->slice_table[ mb_xy ]= h->slice_num;
5492 if(IS_INTRA_PCM(mb_type)) {
5493 const uint8_t *ptr;
5495 // We assume these blocks are very rare so we do not optimize it.
5496 // FIXME The two following lines get the bitstream position in the cabac
5497 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5498 ptr= h->cabac.bytestream;
5499 if(h->cabac.low&0x1) ptr--;
5500 if(CABAC_BITS==16){
5501 if(h->cabac.low&0x1FF) ptr--;
5504 // The pixels are stored in the same order as levels in h->mb array.
5505 memcpy(h->mb, ptr, 256); ptr+=256;
5506 if(CHROMA){
5507 memcpy(h->mb+128, ptr, 128); ptr+=128;
5510 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5512 // All blocks are present
5513 h->cbp_table[mb_xy] = 0x1ef;
5514 h->chroma_pred_mode_table[mb_xy] = 0;
5515 // In deblocking, the quantizer is 0
5516 s->current_picture.qscale_table[mb_xy]= 0;
5517 // All coeffs are present
5518 memset(h->non_zero_count[mb_xy], 16, 16);
5519 s->current_picture.mb_type[mb_xy]= mb_type;
5520 h->last_qscale_diff = 0;
5521 return 0;
5524 if(MB_MBAFF){
5525 h->ref_count[0] <<= 1;
5526 h->ref_count[1] <<= 1;
5529 fill_caches(h, mb_type, 0);
5531 if( IS_INTRA( mb_type ) ) {
5532 int i, pred_mode;
5533 if( IS_INTRA4x4( mb_type ) ) {
5534 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5535 mb_type |= MB_TYPE_8x8DCT;
5536 for( i = 0; i < 16; i+=4 ) {
5537 int pred = pred_intra_mode( h, i );
5538 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5539 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5541 } else {
5542 for( i = 0; i < 16; i++ ) {
5543 int pred = pred_intra_mode( h, i );
5544 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5546 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5549 write_back_intra_pred_mode(h);
5550 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5551 } else {
5552 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5553 if( h->intra16x16_pred_mode < 0 ) return -1;
5555 if(CHROMA){
5556 h->chroma_pred_mode_table[mb_xy] =
5557 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5559 pred_mode= check_intra_pred_mode( h, pred_mode );
5560 if( pred_mode < 0 ) return -1;
5561 h->chroma_pred_mode= pred_mode;
5563 } else if( partition_count == 4 ) {
5564 int i, j, sub_partition_count[4], list, ref[2][4];
5566 if( h->slice_type_nos == FF_B_TYPE ) {
5567 for( i = 0; i < 4; i++ ) {
5568 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5569 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5570 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5572 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5573 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5574 pred_direct_motion(h, &mb_type);
5575 h->ref_cache[0][scan8[4]] =
5576 h->ref_cache[1][scan8[4]] =
5577 h->ref_cache[0][scan8[12]] =
5578 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5579 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5580 for( i = 0; i < 4; i++ )
5581 if( IS_DIRECT(h->sub_mb_type[i]) )
5582 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5585 } else {
5586 for( i = 0; i < 4; i++ ) {
5587 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5588 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5589 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5593 for( list = 0; list < h->list_count; list++ ) {
5594 for( i = 0; i < 4; i++ ) {
5595 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5596 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5597 if( h->ref_count[list] > 1 )
5598 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5599 else
5600 ref[list][i] = 0;
5601 } else {
5602 ref[list][i] = -1;
5604 h->ref_cache[list][ scan8[4*i]+1 ]=
5605 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5609 if(dct8x8_allowed)
5610 dct8x8_allowed = get_dct8x8_allowed(h);
5612 for(list=0; list<h->list_count; list++){
5613 for(i=0; i<4; i++){
5614 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5615 if(IS_DIRECT(h->sub_mb_type[i])){
5616 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5617 continue;
5620 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5621 const int sub_mb_type= h->sub_mb_type[i];
5622 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5623 for(j=0; j<sub_partition_count[i]; j++){
5624 int mpx, mpy;
5625 int mx, my;
5626 const int index= 4*i + block_width*j;
5627 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5628 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5629 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5631 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5632 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5633 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5635 if(IS_SUB_8X8(sub_mb_type)){
5636 mv_cache[ 1 ][0]=
5637 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5638 mv_cache[ 1 ][1]=
5639 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5641 mvd_cache[ 1 ][0]=
5642 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5643 mvd_cache[ 1 ][1]=
5644 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5645 }else if(IS_SUB_8X4(sub_mb_type)){
5646 mv_cache[ 1 ][0]= mx;
5647 mv_cache[ 1 ][1]= my;
5649 mvd_cache[ 1 ][0]= mx - mpx;
5650 mvd_cache[ 1 ][1]= my - mpy;
5651 }else if(IS_SUB_4X8(sub_mb_type)){
5652 mv_cache[ 8 ][0]= mx;
5653 mv_cache[ 8 ][1]= my;
5655 mvd_cache[ 8 ][0]= mx - mpx;
5656 mvd_cache[ 8 ][1]= my - mpy;
5658 mv_cache[ 0 ][0]= mx;
5659 mv_cache[ 0 ][1]= my;
5661 mvd_cache[ 0 ][0]= mx - mpx;
5662 mvd_cache[ 0 ][1]= my - mpy;
5664 }else{
5665 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5666 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5667 p[0] = p[1] = p[8] = p[9] = 0;
5668 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5672 } else if( IS_DIRECT(mb_type) ) {
5673 pred_direct_motion(h, &mb_type);
5674 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5675 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5676 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5677 } else {
5678 int list, mx, my, i, mpx, mpy;
5679 if(IS_16X16(mb_type)){
5680 for(list=0; list<h->list_count; list++){
5681 if(IS_DIR(mb_type, 0, list)){
5682 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5683 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5684 }else
5685 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5687 for(list=0; list<h->list_count; list++){
5688 if(IS_DIR(mb_type, 0, list)){
5689 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5691 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5692 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5693 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5695 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5696 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5697 }else
5698 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5701 else if(IS_16X8(mb_type)){
5702 for(list=0; list<h->list_count; list++){
5703 for(i=0; i<2; i++){
5704 if(IS_DIR(mb_type, i, list)){
5705 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5706 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5707 }else
5708 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5711 for(list=0; list<h->list_count; list++){
5712 for(i=0; i<2; i++){
5713 if(IS_DIR(mb_type, i, list)){
5714 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5715 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5716 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5717 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5719 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5720 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5721 }else{
5722 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5723 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5727 }else{
5728 assert(IS_8X16(mb_type));
5729 for(list=0; list<h->list_count; list++){
5730 for(i=0; i<2; i++){
5731 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5732 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5733 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5734 }else
5735 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5738 for(list=0; list<h->list_count; list++){
5739 for(i=0; i<2; i++){
5740 if(IS_DIR(mb_type, i, list)){
5741 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5742 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5743 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5745 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5746 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5747 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5748 }else{
5749 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5750 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5757 if( IS_INTER( mb_type ) ) {
5758 h->chroma_pred_mode_table[mb_xy] = 0;
5759 write_back_motion( h, mb_type );
5762 if( !IS_INTRA16x16( mb_type ) ) {
5763 cbp = decode_cabac_mb_cbp_luma( h );
5764 if(CHROMA)
5765 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5768 h->cbp_table[mb_xy] = h->cbp = cbp;
5770 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5771 if( decode_cabac_mb_transform_size( h ) )
5772 mb_type |= MB_TYPE_8x8DCT;
5774 s->current_picture.mb_type[mb_xy]= mb_type;
5776 if( cbp || IS_INTRA16x16( mb_type ) ) {
5777 const uint8_t *scan, *scan8x8, *dc_scan;
5778 const uint32_t *qmul;
5779 int dqp;
5781 if(IS_INTERLACED(mb_type)){
5782 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5783 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5784 dc_scan= luma_dc_field_scan;
5785 }else{
5786 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5787 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5788 dc_scan= luma_dc_zigzag_scan;
5791 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5792 if( dqp == INT_MIN ){
5793 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5794 return -1;
5796 s->qscale += dqp;
5797 if(((unsigned)s->qscale) > 51){
5798 if(s->qscale<0) s->qscale+= 52;
5799 else s->qscale-= 52;
5801 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5802 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5804 if( IS_INTRA16x16( mb_type ) ) {
5805 int i;
5806 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5807 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5809 if( cbp&15 ) {
5810 qmul = h->dequant4_coeff[0][s->qscale];
5811 for( i = 0; i < 16; i++ ) {
5812 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5813 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5815 } else {
5816 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5818 } else {
5819 int i8x8, i4x4;
5820 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5821 if( cbp & (1<<i8x8) ) {
5822 if( IS_8x8DCT(mb_type) ) {
5823 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5824 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5825 } else {
5826 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5827 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5828 const int index = 4*i8x8 + i4x4;
5829 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5830 //START_TIMER
5831 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5832 //STOP_TIMER("decode_residual")
5835 } else {
5836 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5837 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5842 if( cbp&0x30 ){
5843 int c;
5844 for( c = 0; c < 2; c++ ) {
5845 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5846 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5850 if( cbp&0x20 ) {
5851 int c, i;
5852 for( c = 0; c < 2; c++ ) {
5853 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5854 for( i = 0; i < 4; i++ ) {
5855 const int index = 16 + 4 * c + i;
5856 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5857 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5860 } else {
5861 uint8_t * const nnz= &h->non_zero_count_cache[0];
5862 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5863 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5865 } else {
5866 uint8_t * const nnz= &h->non_zero_count_cache[0];
5867 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5868 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5869 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5870 h->last_qscale_diff = 0;
5873 s->current_picture.qscale_table[mb_xy]= s->qscale;
5874 write_back_non_zero_count(h);
5876 if(MB_MBAFF){
5877 h->ref_count[0] >>= 1;
5878 h->ref_count[1] >>= 1;
5881 return 0;
5885 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5886 int i, d;
5887 const int index_a = qp + h->slice_alpha_c0_offset;
5888 const int alpha = (alpha_table+52)[index_a];
5889 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5891 if( bS[0] < 4 ) {
5892 int8_t tc[4];
5893 for(i=0; i<4; i++)
5894 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5895 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5896 } else {
5897 /* 16px edge length, because bS=4 is triggered by being at
5898 * the edge of an intra MB, so all 4 bS are the same */
5899 for( d = 0; d < 16; d++ ) {
5900 const int p0 = pix[-1];
5901 const int p1 = pix[-2];
5902 const int p2 = pix[-3];
5904 const int q0 = pix[0];
5905 const int q1 = pix[1];
5906 const int q2 = pix[2];
5908 if( FFABS( p0 - q0 ) < alpha &&
5909 FFABS( p1 - p0 ) < beta &&
5910 FFABS( q1 - q0 ) < beta ) {
5912 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5913 if( FFABS( p2 - p0 ) < beta)
5915 const int p3 = pix[-4];
5916 /* p0', p1', p2' */
5917 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5918 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5919 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5920 } else {
5921 /* p0' */
5922 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5924 if( FFABS( q2 - q0 ) < beta)
5926 const int q3 = pix[3];
5927 /* q0', q1', q2' */
5928 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5929 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5930 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5931 } else {
5932 /* q0' */
5933 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5935 }else{
5936 /* p0', q0' */
5937 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5938 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5940 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5942 pix += stride;
5946 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5947 int i;
5948 const int index_a = qp + h->slice_alpha_c0_offset;
5949 const int alpha = (alpha_table+52)[index_a];
5950 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5952 if( bS[0] < 4 ) {
5953 int8_t tc[4];
5954 for(i=0; i<4; i++)
5955 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5956 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5957 } else {
5958 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5962 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5963 int i;
5964 for( i = 0; i < 16; i++, pix += stride) {
5965 int index_a;
5966 int alpha;
5967 int beta;
5969 int qp_index;
5970 int bS_index = (i >> 1);
5971 if (!MB_FIELD) {
5972 bS_index &= ~1;
5973 bS_index |= (i & 1);
5976 if( bS[bS_index] == 0 ) {
5977 continue;
5980 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5981 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
5982 alpha = (alpha_table+52)[index_a];
5983 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
5985 if( bS[bS_index] < 4 ) {
5986 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
5987 const int p0 = pix[-1];
5988 const int p1 = pix[-2];
5989 const int p2 = pix[-3];
5990 const int q0 = pix[0];
5991 const int q1 = pix[1];
5992 const int q2 = pix[2];
5994 if( FFABS( p0 - q0 ) < alpha &&
5995 FFABS( p1 - p0 ) < beta &&
5996 FFABS( q1 - q0 ) < beta ) {
5997 int tc = tc0;
5998 int i_delta;
6000 if( FFABS( p2 - p0 ) < beta ) {
6001 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6002 tc++;
6004 if( FFABS( q2 - q0 ) < beta ) {
6005 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6006 tc++;
6009 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6010 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6011 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6012 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6014 }else{
6015 const int p0 = pix[-1];
6016 const int p1 = pix[-2];
6017 const int p2 = pix[-3];
6019 const int q0 = pix[0];
6020 const int q1 = pix[1];
6021 const int q2 = pix[2];
6023 if( FFABS( p0 - q0 ) < alpha &&
6024 FFABS( p1 - p0 ) < beta &&
6025 FFABS( q1 - q0 ) < beta ) {
6027 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6028 if( FFABS( p2 - p0 ) < beta)
6030 const int p3 = pix[-4];
6031 /* p0', p1', p2' */
6032 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6033 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6034 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6035 } else {
6036 /* p0' */
6037 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6039 if( FFABS( q2 - q0 ) < beta)
6041 const int q3 = pix[3];
6042 /* q0', q1', q2' */
6043 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6044 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6045 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6046 } else {
6047 /* q0' */
6048 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6050 }else{
6051 /* p0', q0' */
6052 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6053 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6055 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6060 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6061 int i;
6062 for( i = 0; i < 8; i++, pix += stride) {
6063 int index_a;
6064 int alpha;
6065 int beta;
6067 int qp_index;
6068 int bS_index = i;
6070 if( bS[bS_index] == 0 ) {
6071 continue;
6074 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6075 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6076 alpha = (alpha_table+52)[index_a];
6077 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6079 if( bS[bS_index] < 4 ) {
6080 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6081 const int p0 = pix[-1];
6082 const int p1 = pix[-2];
6083 const int q0 = pix[0];
6084 const int q1 = pix[1];
6086 if( FFABS( p0 - q0 ) < alpha &&
6087 FFABS( p1 - p0 ) < beta &&
6088 FFABS( q1 - q0 ) < beta ) {
6089 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6091 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6092 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6093 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6095 }else{
6096 const int p0 = pix[-1];
6097 const int p1 = pix[-2];
6098 const int q0 = pix[0];
6099 const int q1 = pix[1];
6101 if( FFABS( p0 - q0 ) < alpha &&
6102 FFABS( p1 - p0 ) < beta &&
6103 FFABS( q1 - q0 ) < beta ) {
6105 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6106 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6107 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6113 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6114 int i, d;
6115 const int index_a = qp + h->slice_alpha_c0_offset;
6116 const int alpha = (alpha_table+52)[index_a];
6117 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6118 const int pix_next = stride;
6120 if( bS[0] < 4 ) {
6121 int8_t tc[4];
6122 for(i=0; i<4; i++)
6123 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6124 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6125 } else {
6126 /* 16px edge length, see filter_mb_edgev */
6127 for( d = 0; d < 16; d++ ) {
6128 const int p0 = pix[-1*pix_next];
6129 const int p1 = pix[-2*pix_next];
6130 const int p2 = pix[-3*pix_next];
6131 const int q0 = pix[0];
6132 const int q1 = pix[1*pix_next];
6133 const int q2 = pix[2*pix_next];
6135 if( FFABS( p0 - q0 ) < alpha &&
6136 FFABS( p1 - p0 ) < beta &&
6137 FFABS( q1 - q0 ) < beta ) {
6139 const int p3 = pix[-4*pix_next];
6140 const int q3 = pix[ 3*pix_next];
6142 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6143 if( FFABS( p2 - p0 ) < beta) {
6144 /* p0', p1', p2' */
6145 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6146 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6147 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6148 } else {
6149 /* p0' */
6150 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6152 if( FFABS( q2 - q0 ) < beta) {
6153 /* q0', q1', q2' */
6154 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6155 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6156 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6157 } else {
6158 /* q0' */
6159 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6161 }else{
6162 /* p0', q0' */
6163 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6164 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6166 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6168 pix++;
6173 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6174 int i;
6175 const int index_a = qp + h->slice_alpha_c0_offset;
6176 const int alpha = (alpha_table+52)[index_a];
6177 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6179 if( bS[0] < 4 ) {
6180 int8_t tc[4];
6181 for(i=0; i<4; i++)
6182 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6183 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6184 } else {
6185 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6189 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6190 MpegEncContext * const s = &h->s;
6191 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6192 int mb_xy, mb_type;
6193 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6195 mb_xy = h->mb_xy;
6197 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6198 1 ||
6199 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6200 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6201 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6202 return;
6204 assert(!FRAME_MBAFF);
6206 mb_type = s->current_picture.mb_type[mb_xy];
6207 qp = s->current_picture.qscale_table[mb_xy];
6208 qp0 = s->current_picture.qscale_table[mb_xy-1];
6209 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6210 qpc = get_chroma_qp( h, 0, qp );
6211 qpc0 = get_chroma_qp( h, 0, qp0 );
6212 qpc1 = get_chroma_qp( h, 0, qp1 );
6213 qp0 = (qp + qp0 + 1) >> 1;
6214 qp1 = (qp + qp1 + 1) >> 1;
6215 qpc0 = (qpc + qpc0 + 1) >> 1;
6216 qpc1 = (qpc + qpc1 + 1) >> 1;
6217 qp_thresh = 15 - h->slice_alpha_c0_offset;
6218 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6219 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6220 return;
6222 if( IS_INTRA(mb_type) ) {
6223 int16_t bS4[4] = {4,4,4,4};
6224 int16_t bS3[4] = {3,3,3,3};
6225 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6226 if( IS_8x8DCT(mb_type) ) {
6227 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6228 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6229 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6230 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6231 } else {
6232 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6233 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6234 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6235 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6236 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6237 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6238 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6239 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6241 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6242 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6243 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6244 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6245 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6246 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6247 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6248 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6249 return;
6250 } else {
6251 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6252 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6253 int edges;
6254 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6255 edges = 4;
6256 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6257 } else {
6258 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6259 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6260 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6261 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6262 ? 3 : 0;
6263 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6264 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6265 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6266 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6268 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6269 bSv[0][0] = 0x0004000400040004ULL;
6270 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6271 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6273 #define FILTER(hv,dir,edge)\
6274 if(bSv[dir][edge]) {\
6275 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6276 if(!(edge&1)) {\
6277 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6278 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6281 if( edges == 1 ) {
6282 FILTER(v,0,0);
6283 FILTER(h,1,0);
6284 } else if( IS_8x8DCT(mb_type) ) {
6285 FILTER(v,0,0);
6286 FILTER(v,0,2);
6287 FILTER(h,1,0);
6288 FILTER(h,1,2);
6289 } else {
6290 FILTER(v,0,0);
6291 FILTER(v,0,1);
6292 FILTER(v,0,2);
6293 FILTER(v,0,3);
6294 FILTER(h,1,0);
6295 FILTER(h,1,1);
6296 FILTER(h,1,2);
6297 FILTER(h,1,3);
6299 #undef FILTER
6303 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6304 MpegEncContext * const s = &h->s;
6305 const int mb_xy= mb_x + mb_y*s->mb_stride;
6306 const int mb_type = s->current_picture.mb_type[mb_xy];
6307 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6308 int first_vertical_edge_done = 0;
6309 int dir;
6311 //for sufficiently low qp, filtering wouldn't do anything
6312 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6313 if(!FRAME_MBAFF){
6314 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6315 int qp = s->current_picture.qscale_table[mb_xy];
6316 if(qp <= qp_thresh
6317 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6318 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6319 return;
6323 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6324 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6325 int top_type, left_type[2];
6326 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6327 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6328 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6330 if(IS_8x8DCT(top_type)){
6331 h->non_zero_count_cache[4+8*0]=
6332 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6333 h->non_zero_count_cache[6+8*0]=
6334 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6336 if(IS_8x8DCT(left_type[0])){
6337 h->non_zero_count_cache[3+8*1]=
6338 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6340 if(IS_8x8DCT(left_type[1])){
6341 h->non_zero_count_cache[3+8*3]=
6342 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6345 if(IS_8x8DCT(mb_type)){
6346 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6347 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6349 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6350 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6352 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6353 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6355 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6356 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6360 if (FRAME_MBAFF
6361 // left mb is in picture
6362 && h->slice_table[mb_xy-1] != 255
6363 // and current and left pair do not have the same interlaced type
6364 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6365 // and left mb is in the same slice if deblocking_filter == 2
6366 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6367 /* First vertical edge is different in MBAFF frames
6368 * There are 8 different bS to compute and 2 different Qp
6370 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6371 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6372 int16_t bS[8];
6373 int qp[2];
6374 int bqp[2];
6375 int rqp[2];
6376 int mb_qp, mbn0_qp, mbn1_qp;
6377 int i;
6378 first_vertical_edge_done = 1;
6380 if( IS_INTRA(mb_type) )
6381 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6382 else {
6383 for( i = 0; i < 8; i++ ) {
6384 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6386 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6387 bS[i] = 4;
6388 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6389 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
6390 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
6392 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
6393 bS[i] = 2;
6394 else
6395 bS[i] = 1;
6399 mb_qp = s->current_picture.qscale_table[mb_xy];
6400 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6401 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6402 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6403 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6404 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6405 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6406 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6407 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6408 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6409 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6410 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6411 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6413 /* Filter edge */
6414 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6415 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6416 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6417 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6418 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6420 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6421 for( dir = 0; dir < 2; dir++ )
6423 int edge;
6424 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6425 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6426 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &15 ][0] + (MB_MBAFF ? 20 : 2);
6427 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&15 ][0] + (MB_MBAFF ? 20 : 2);
6428 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6430 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6431 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6432 // how often to recheck mv-based bS when iterating between edges
6433 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6434 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6435 // how often to recheck mv-based bS when iterating along each edge
6436 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6438 if (first_vertical_edge_done) {
6439 start = 1;
6440 first_vertical_edge_done = 0;
6443 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6444 start = 1;
6446 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6447 && !IS_INTERLACED(mb_type)
6448 && IS_INTERLACED(mbm_type)
6450 // This is a special case in the norm where the filtering must
6451 // be done twice (one each of the field) even if we are in a
6452 // frame macroblock.
6454 static const int nnz_idx[4] = {4,5,6,3};
6455 unsigned int tmp_linesize = 2 * linesize;
6456 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6457 int mbn_xy = mb_xy - 2 * s->mb_stride;
6458 int qp;
6459 int i, j;
6460 int16_t bS[4];
6462 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6463 if( IS_INTRA(mb_type) ||
6464 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6465 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6466 } else {
6467 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6468 for( i = 0; i < 4; i++ ) {
6469 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6470 mbn_nnz[nnz_idx[i]] != 0 )
6471 bS[i] = 2;
6472 else
6473 bS[i] = 1;
6476 // Do not use s->qscale as luma quantizer because it has not the same
6477 // value in IPCM macroblocks.
6478 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6479 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6480 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6481 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6482 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6483 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6484 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6485 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6488 start = 1;
6491 /* Calculate bS */
6492 for( edge = start; edge < edges; edge++ ) {
6493 /* mbn_xy: neighbor macroblock */
6494 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6495 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6496 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
6497 int16_t bS[4];
6498 int qp;
6500 if( (edge&1) && IS_8x8DCT(mb_type) )
6501 continue;
6503 if( IS_INTRA(mb_type) ||
6504 IS_INTRA(mbn_type) ) {
6505 int value;
6506 if (edge == 0) {
6507 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6508 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6510 value = 4;
6511 } else {
6512 value = 3;
6514 } else {
6515 value = 3;
6517 bS[0] = bS[1] = bS[2] = bS[3] = value;
6518 } else {
6519 int i, l;
6520 int mv_done;
6522 if( edge & mask_edge ) {
6523 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6524 mv_done = 1;
6526 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6527 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6528 mv_done = 1;
6530 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6531 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6532 int bn_idx= b_idx - (dir ? 8:1);
6533 int v = 0;
6535 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6536 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6537 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6538 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6541 if(h->slice_type_nos == FF_B_TYPE && v){
6542 v=0;
6543 for( l = 0; !v && l < 2; l++ ) {
6544 int ln= 1-l;
6545 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6546 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6547 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6551 bS[0] = bS[1] = bS[2] = bS[3] = v;
6552 mv_done = 1;
6554 else
6555 mv_done = 0;
6557 for( i = 0; i < 4; i++ ) {
6558 int x = dir == 0 ? edge : i;
6559 int y = dir == 0 ? i : edge;
6560 int b_idx= 8 + 4 + x + 8*y;
6561 int bn_idx= b_idx - (dir ? 8:1);
6563 if( h->non_zero_count_cache[b_idx] != 0 ||
6564 h->non_zero_count_cache[bn_idx] != 0 ) {
6565 bS[i] = 2;
6567 else if(!mv_done)
6569 bS[i] = 0;
6570 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6571 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
6572 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6573 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6574 bS[i] = 1;
6575 break;
6579 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6580 bS[i] = 0;
6581 for( l = 0; l < 2; l++ ) {
6582 int ln= 1-l;
6583 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
6584 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6585 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6586 bS[i] = 1;
6587 break;
6594 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6595 continue;
6598 /* Filter edge */
6599 // Do not use s->qscale as luma quantizer because it has not the same
6600 // value in IPCM macroblocks.
6601 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6602 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6603 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6604 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6605 if( dir == 0 ) {
6606 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6607 if( (edge&1) == 0 ) {
6608 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6609 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6610 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6611 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6613 } else {
6614 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6615 if( (edge&1) == 0 ) {
6616 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6617 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6618 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6619 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6626 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6627 MpegEncContext * const s = &h->s;
6628 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6630 s->mb_skip_run= -1;
6632 if( h->pps.cabac ) {
6633 int i;
6635 /* realign */
6636 align_get_bits( &s->gb );
6638 /* init cabac */
6639 ff_init_cabac_states( &h->cabac);
6640 ff_init_cabac_decoder( &h->cabac,
6641 s->gb.buffer + get_bits_count(&s->gb)/8,
6642 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6643 /* calculate pre-state */
6644 for( i= 0; i < 460; i++ ) {
6645 int pre;
6646 if( h->slice_type_nos == FF_I_TYPE )
6647 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6648 else
6649 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6651 if( pre <= 63 )
6652 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6653 else
6654 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6657 for(;;){
6658 //START_TIMER
6659 int ret = decode_mb_cabac(h);
6660 int eos;
6661 //STOP_TIMER("decode_mb_cabac")
6663 if(ret>=0) hl_decode_mb(h);
6665 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6666 s->mb_y++;
6668 if(ret>=0) ret = decode_mb_cabac(h);
6670 if(ret>=0) hl_decode_mb(h);
6671 s->mb_y--;
6673 eos = get_cabac_terminate( &h->cabac );
6675 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6676 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6677 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6678 return -1;
6681 if( ++s->mb_x >= s->mb_width ) {
6682 s->mb_x = 0;
6683 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6684 ++s->mb_y;
6685 if(FIELD_OR_MBAFF_PICTURE) {
6686 ++s->mb_y;
6690 if( eos || s->mb_y >= s->mb_height ) {
6691 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6692 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6693 return 0;
6697 } else {
6698 for(;;){
6699 int ret = decode_mb_cavlc(h);
6701 if(ret>=0) hl_decode_mb(h);
6703 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6704 s->mb_y++;
6705 ret = decode_mb_cavlc(h);
6707 if(ret>=0) hl_decode_mb(h);
6708 s->mb_y--;
6711 if(ret<0){
6712 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6713 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6715 return -1;
6718 if(++s->mb_x >= s->mb_width){
6719 s->mb_x=0;
6720 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6721 ++s->mb_y;
6722 if(FIELD_OR_MBAFF_PICTURE) {
6723 ++s->mb_y;
6725 if(s->mb_y >= s->mb_height){
6726 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6728 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6729 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6731 return 0;
6732 }else{
6733 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6735 return -1;
6740 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6741 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6742 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6743 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6745 return 0;
6746 }else{
6747 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6749 return -1;
6755 #if 0
6756 for(;s->mb_y < s->mb_height; s->mb_y++){
6757 for(;s->mb_x < s->mb_width; s->mb_x++){
6758 int ret= decode_mb(h);
6760 hl_decode_mb(h);
6762 if(ret<0){
6763 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6764 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6766 return -1;
6769 if(++s->mb_x >= s->mb_width){
6770 s->mb_x=0;
6771 if(++s->mb_y >= s->mb_height){
6772 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6773 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6775 return 0;
6776 }else{
6777 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6779 return -1;
6784 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6785 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6786 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6788 return 0;
6789 }else{
6790 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6792 return -1;
6796 s->mb_x=0;
6797 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6799 #endif
6800 return -1; //not reached
6803 static int decode_unregistered_user_data(H264Context *h, int size){
6804 MpegEncContext * const s = &h->s;
6805 uint8_t user_data[16+256];
6806 int e, build, i;
6808 if(size<16)
6809 return -1;
6811 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6812 user_data[i]= get_bits(&s->gb, 8);
6815 user_data[i]= 0;
6816 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6817 if(e==1 && build>=0)
6818 h->x264_build= build;
6820 if(s->avctx->debug & FF_DEBUG_BUGS)
6821 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6823 for(; i<size; i++)
6824 skip_bits(&s->gb, 8);
6826 return 0;
6829 static int decode_sei(H264Context *h){
6830 MpegEncContext * const s = &h->s;
6832 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6833 int size, type;
6835 type=0;
6837 type+= show_bits(&s->gb, 8);
6838 }while(get_bits(&s->gb, 8) == 255);
6840 size=0;
6842 size+= show_bits(&s->gb, 8);
6843 }while(get_bits(&s->gb, 8) == 255);
6845 switch(type){
6846 case 5:
6847 if(decode_unregistered_user_data(h, size) < 0)
6848 return -1;
6849 break;
6850 default:
6851 skip_bits(&s->gb, 8*size);
6854 //FIXME check bits here
6855 align_get_bits(&s->gb);
6858 return 0;
6861 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6862 MpegEncContext * const s = &h->s;
6863 int cpb_count, i;
6864 cpb_count = get_ue_golomb(&s->gb) + 1;
6865 get_bits(&s->gb, 4); /* bit_rate_scale */
6866 get_bits(&s->gb, 4); /* cpb_size_scale */
6867 for(i=0; i<cpb_count; i++){
6868 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6869 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6870 get_bits1(&s->gb); /* cbr_flag */
6872 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6873 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6874 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6875 get_bits(&s->gb, 5); /* time_offset_length */
6878 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6879 MpegEncContext * const s = &h->s;
6880 int aspect_ratio_info_present_flag;
6881 unsigned int aspect_ratio_idc;
6882 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6884 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6886 if( aspect_ratio_info_present_flag ) {
6887 aspect_ratio_idc= get_bits(&s->gb, 8);
6888 if( aspect_ratio_idc == EXTENDED_SAR ) {
6889 sps->sar.num= get_bits(&s->gb, 16);
6890 sps->sar.den= get_bits(&s->gb, 16);
6891 }else if(aspect_ratio_idc < FF_ARRAY_ELEMS(pixel_aspect)){
6892 sps->sar= pixel_aspect[aspect_ratio_idc];
6893 }else{
6894 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6895 return -1;
6897 }else{
6898 sps->sar.num=
6899 sps->sar.den= 0;
6901 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6903 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6904 get_bits1(&s->gb); /* overscan_appropriate_flag */
6907 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6908 get_bits(&s->gb, 3); /* video_format */
6909 get_bits1(&s->gb); /* video_full_range_flag */
6910 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6911 get_bits(&s->gb, 8); /* colour_primaries */
6912 get_bits(&s->gb, 8); /* transfer_characteristics */
6913 get_bits(&s->gb, 8); /* matrix_coefficients */
6917 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6918 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6919 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6922 sps->timing_info_present_flag = get_bits1(&s->gb);
6923 if(sps->timing_info_present_flag){
6924 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6925 sps->time_scale = get_bits_long(&s->gb, 32);
6926 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6929 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6930 if(nal_hrd_parameters_present_flag)
6931 decode_hrd_parameters(h, sps);
6932 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6933 if(vcl_hrd_parameters_present_flag)
6934 decode_hrd_parameters(h, sps);
6935 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6936 get_bits1(&s->gb); /* low_delay_hrd_flag */
6937 get_bits1(&s->gb); /* pic_struct_present_flag */
6939 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6940 if(sps->bitstream_restriction_flag){
6941 unsigned int num_reorder_frames;
6942 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6943 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6944 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6945 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6946 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6947 num_reorder_frames= get_ue_golomb(&s->gb);
6948 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6950 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6951 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6952 return -1;
6955 sps->num_reorder_frames= num_reorder_frames;
6958 return 0;
6961 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6962 const uint8_t *jvt_list, const uint8_t *fallback_list){
6963 MpegEncContext * const s = &h->s;
6964 int i, last = 8, next = 8;
6965 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6966 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6967 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6968 else
6969 for(i=0;i<size;i++){
6970 if(next)
6971 next = (last + get_se_golomb(&s->gb)) & 0xff;
6972 if(!i && !next){ /* matrix not written, we use the preset one */
6973 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6974 break;
6976 last = factors[scan[i]] = next ? next : last;
6980 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6981 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6982 MpegEncContext * const s = &h->s;
6983 int fallback_sps = !is_sps && sps->scaling_matrix_present;
6984 const uint8_t *fallback[4] = {
6985 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
6986 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
6987 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
6988 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
6990 if(get_bits1(&s->gb)){
6991 sps->scaling_matrix_present |= is_sps;
6992 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
6993 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
6994 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
6995 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
6996 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
6997 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
6998 if(is_sps || pps->transform_8x8_mode){
6999 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7000 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7006 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7008 static void *
7009 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7010 const size_t size, const char *name)
7012 if(id>=max) {
7013 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7014 return NULL;
7017 if(!vec[id]) {
7018 vec[id] = av_mallocz(size);
7019 if(vec[id] == NULL)
7020 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7022 return vec[id];
7025 static inline int decode_seq_parameter_set(H264Context *h){
7026 MpegEncContext * const s = &h->s;
7027 int profile_idc, level_idc;
7028 unsigned int sps_id, tmp, mb_width, mb_height;
7029 int i;
7030 SPS *sps;
7032 profile_idc= get_bits(&s->gb, 8);
7033 get_bits1(&s->gb); //constraint_set0_flag
7034 get_bits1(&s->gb); //constraint_set1_flag
7035 get_bits1(&s->gb); //constraint_set2_flag
7036 get_bits1(&s->gb); //constraint_set3_flag
7037 get_bits(&s->gb, 4); // reserved
7038 level_idc= get_bits(&s->gb, 8);
7039 sps_id= get_ue_golomb(&s->gb);
7041 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7042 if(sps == NULL)
7043 return -1;
7045 sps->profile_idc= profile_idc;
7046 sps->level_idc= level_idc;
7048 memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
7049 memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
7050 sps->scaling_matrix_present = 0;
7052 if(sps->profile_idc >= 100){ //high profile
7053 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7054 if(sps->chroma_format_idc == 3)
7055 get_bits1(&s->gb); //residual_color_transform_flag
7056 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7057 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7058 sps->transform_bypass = get_bits1(&s->gb);
7059 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7060 }else{
7061 sps->chroma_format_idc= 1;
7064 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7065 sps->poc_type= get_ue_golomb(&s->gb);
7067 if(sps->poc_type == 0){ //FIXME #define
7068 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7069 } else if(sps->poc_type == 1){//FIXME #define
7070 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7071 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7072 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7073 tmp= get_ue_golomb(&s->gb);
7075 if(tmp >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
7076 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7077 return -1;
7079 sps->poc_cycle_length= tmp;
7081 for(i=0; i<sps->poc_cycle_length; i++)
7082 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7083 }else if(sps->poc_type != 2){
7084 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7085 return -1;
7088 tmp= get_ue_golomb(&s->gb);
7089 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7090 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7091 return -1;
7093 sps->ref_frame_count= tmp;
7094 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7095 mb_width= get_ue_golomb(&s->gb) + 1;
7096 mb_height= get_ue_golomb(&s->gb) + 1;
7097 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7098 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7099 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7100 return -1;
7102 sps->mb_width = mb_width;
7103 sps->mb_height= mb_height;
7105 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7106 if(!sps->frame_mbs_only_flag)
7107 sps->mb_aff= get_bits1(&s->gb);
7108 else
7109 sps->mb_aff= 0;
7111 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7113 #ifndef ALLOW_INTERLACE
7114 if(sps->mb_aff)
7115 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7116 #endif
7117 sps->crop= get_bits1(&s->gb);
7118 if(sps->crop){
7119 sps->crop_left = get_ue_golomb(&s->gb);
7120 sps->crop_right = get_ue_golomb(&s->gb);
7121 sps->crop_top = get_ue_golomb(&s->gb);
7122 sps->crop_bottom= get_ue_golomb(&s->gb);
7123 if(sps->crop_left || sps->crop_top){
7124 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7126 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !sps->frame_mbs_only_flag)){
7127 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7129 }else{
7130 sps->crop_left =
7131 sps->crop_right =
7132 sps->crop_top =
7133 sps->crop_bottom= 0;
7136 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7137 if( sps->vui_parameters_present_flag )
7138 decode_vui_parameters(h, sps);
7140 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7141 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7142 sps_id, sps->profile_idc, sps->level_idc,
7143 sps->poc_type,
7144 sps->ref_frame_count,
7145 sps->mb_width, sps->mb_height,
7146 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7147 sps->direct_8x8_inference_flag ? "8B8" : "",
7148 sps->crop_left, sps->crop_right,
7149 sps->crop_top, sps->crop_bottom,
7150 sps->vui_parameters_present_flag ? "VUI" : "",
7151 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7154 return 0;
7157 static void
7158 build_qp_table(PPS *pps, int t, int index)
7160 int i;
7161 for(i = 0; i < 52; i++)
7162 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7165 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7166 MpegEncContext * const s = &h->s;
7167 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7168 PPS *pps;
7170 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7171 if(pps == NULL)
7172 return -1;
7174 tmp= get_ue_golomb(&s->gb);
7175 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7176 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7177 return -1;
7179 pps->sps_id= tmp;
7181 pps->cabac= get_bits1(&s->gb);
7182 pps->pic_order_present= get_bits1(&s->gb);
7183 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7184 if(pps->slice_group_count > 1 ){
7185 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7186 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7187 switch(pps->mb_slice_group_map_type){
7188 case 0:
7189 #if 0
7190 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7191 | run_length[ i ] |1 |ue(v) |
7192 #endif
7193 break;
7194 case 2:
7195 #if 0
7196 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7197 |{ | | |
7198 | top_left_mb[ i ] |1 |ue(v) |
7199 | bottom_right_mb[ i ] |1 |ue(v) |
7200 | } | | |
7201 #endif
7202 break;
7203 case 3:
7204 case 4:
7205 case 5:
7206 #if 0
7207 | slice_group_change_direction_flag |1 |u(1) |
7208 | slice_group_change_rate_minus1 |1 |ue(v) |
7209 #endif
7210 break;
7211 case 6:
7212 #if 0
7213 | slice_group_id_cnt_minus1 |1 |ue(v) |
7214 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7215 |) | | |
7216 | slice_group_id[ i ] |1 |u(v) |
7217 #endif
7218 break;
7221 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7222 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7223 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7224 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7225 pps->ref_count[0]= pps->ref_count[1]= 1;
7226 return -1;
7229 pps->weighted_pred= get_bits1(&s->gb);
7230 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7231 pps->init_qp= get_se_golomb(&s->gb) + 26;
7232 pps->init_qs= get_se_golomb(&s->gb) + 26;
7233 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7234 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7235 pps->constrained_intra_pred= get_bits1(&s->gb);
7236 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7238 pps->transform_8x8_mode= 0;
7239 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7240 memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
7241 memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
7243 if(get_bits_count(&s->gb) < bit_length){
7244 pps->transform_8x8_mode= get_bits1(&s->gb);
7245 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7246 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7247 } else {
7248 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7251 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7252 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7253 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7254 h->pps.chroma_qp_diff= 1;
7256 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7257 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7258 pps_id, pps->sps_id,
7259 pps->cabac ? "CABAC" : "CAVLC",
7260 pps->slice_group_count,
7261 pps->ref_count[0], pps->ref_count[1],
7262 pps->weighted_pred ? "weighted" : "",
7263 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7264 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7265 pps->constrained_intra_pred ? "CONSTR" : "",
7266 pps->redundant_pic_cnt_present ? "REDU" : "",
7267 pps->transform_8x8_mode ? "8x8DCT" : ""
7271 return 0;
7275 * Call decode_slice() for each context.
7277 * @param h h264 master context
7278 * @param context_count number of contexts to execute
7280 static void execute_decode_slices(H264Context *h, int context_count){
7281 MpegEncContext * const s = &h->s;
7282 AVCodecContext * const avctx= s->avctx;
7283 H264Context *hx;
7284 int i;
7286 if(context_count == 1) {
7287 decode_slice(avctx, h);
7288 } else {
7289 for(i = 1; i < context_count; i++) {
7290 hx = h->thread_context[i];
7291 hx->s.error_recognition = avctx->error_recognition;
7292 hx->s.error_count = 0;
7295 avctx->execute(avctx, (void *)decode_slice,
7296 (void **)h->thread_context, NULL, context_count);
7298 /* pull back stuff from slices to master context */
7299 hx = h->thread_context[context_count - 1];
7300 s->mb_x = hx->s.mb_x;
7301 s->mb_y = hx->s.mb_y;
7302 s->dropable = hx->s.dropable;
7303 s->picture_structure = hx->s.picture_structure;
7304 for(i = 1; i < context_count; i++)
7305 h->s.error_count += h->thread_context[i]->s.error_count;
7310 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7311 MpegEncContext * const s = &h->s;
7312 AVCodecContext * const avctx= s->avctx;
7313 int buf_index=0;
7314 H264Context *hx; ///< thread context
7315 int context_count = 0;
7317 h->max_contexts = avctx->thread_count;
7318 #if 0
7319 int i;
7320 for(i=0; i<50; i++){
7321 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7323 #endif
7324 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7325 h->current_slice = 0;
7326 if (!s->first_field)
7327 s->current_picture_ptr= NULL;
7330 for(;;){
7331 int consumed;
7332 int dst_length;
7333 int bit_length;
7334 const uint8_t *ptr;
7335 int i, nalsize = 0;
7336 int err;
7338 if(h->is_avc) {
7339 if(buf_index >= buf_size) break;
7340 nalsize = 0;
7341 for(i = 0; i < h->nal_length_size; i++)
7342 nalsize = (nalsize << 8) | buf[buf_index++];
7343 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7344 if(nalsize == 1){
7345 buf_index++;
7346 continue;
7347 }else{
7348 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7349 break;
7352 } else {
7353 // start code prefix search
7354 for(; buf_index + 3 < buf_size; buf_index++){
7355 // This should always succeed in the first iteration.
7356 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7357 break;
7360 if(buf_index+3 >= buf_size) break;
7362 buf_index+=3;
7365 hx = h->thread_context[context_count];
7367 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7368 if (ptr==NULL || dst_length < 0){
7369 return -1;
7371 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7372 dst_length--;
7373 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7375 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7376 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7379 if (h->is_avc && (nalsize != consumed)){
7380 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7381 consumed= nalsize;
7384 buf_index += consumed;
7386 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7387 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7388 continue;
7390 again:
7391 err = 0;
7392 switch(hx->nal_unit_type){
7393 case NAL_IDR_SLICE:
7394 if (h->nal_unit_type != NAL_IDR_SLICE) {
7395 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7396 return -1;
7398 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7399 case NAL_SLICE:
7400 init_get_bits(&hx->s.gb, ptr, bit_length);
7401 hx->intra_gb_ptr=
7402 hx->inter_gb_ptr= &hx->s.gb;
7403 hx->s.data_partitioning = 0;
7405 if((err = decode_slice_header(hx, h)))
7406 break;
7408 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7409 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7410 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7411 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7412 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7413 && avctx->skip_frame < AVDISCARD_ALL)
7414 context_count++;
7415 break;
7416 case NAL_DPA:
7417 init_get_bits(&hx->s.gb, ptr, bit_length);
7418 hx->intra_gb_ptr=
7419 hx->inter_gb_ptr= NULL;
7420 hx->s.data_partitioning = 1;
7422 err = decode_slice_header(hx, h);
7423 break;
7424 case NAL_DPB:
7425 init_get_bits(&hx->intra_gb, ptr, bit_length);
7426 hx->intra_gb_ptr= &hx->intra_gb;
7427 break;
7428 case NAL_DPC:
7429 init_get_bits(&hx->inter_gb, ptr, bit_length);
7430 hx->inter_gb_ptr= &hx->inter_gb;
7432 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7433 && s->context_initialized
7434 && s->hurry_up < 5
7435 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7436 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7437 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7438 && avctx->skip_frame < AVDISCARD_ALL)
7439 context_count++;
7440 break;
7441 case NAL_SEI:
7442 init_get_bits(&s->gb, ptr, bit_length);
7443 decode_sei(h);
7444 break;
7445 case NAL_SPS:
7446 init_get_bits(&s->gb, ptr, bit_length);
7447 decode_seq_parameter_set(h);
7449 if(s->flags& CODEC_FLAG_LOW_DELAY)
7450 s->low_delay=1;
7452 if(avctx->has_b_frames < 2)
7453 avctx->has_b_frames= !s->low_delay;
7454 break;
7455 case NAL_PPS:
7456 init_get_bits(&s->gb, ptr, bit_length);
7458 decode_picture_parameter_set(h, bit_length);
7460 break;
7461 case NAL_AUD:
7462 case NAL_END_SEQUENCE:
7463 case NAL_END_STREAM:
7464 case NAL_FILLER_DATA:
7465 case NAL_SPS_EXT:
7466 case NAL_AUXILIARY_SLICE:
7467 break;
7468 default:
7469 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7472 if(context_count == h->max_contexts) {
7473 execute_decode_slices(h, context_count);
7474 context_count = 0;
7477 if (err < 0)
7478 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7479 else if(err == 1) {
7480 /* Slice could not be decoded in parallel mode, copy down
7481 * NAL unit stuff to context 0 and restart. Note that
7482 * rbsp_buffer is not transferred, but since we no longer
7483 * run in parallel mode this should not be an issue. */
7484 h->nal_unit_type = hx->nal_unit_type;
7485 h->nal_ref_idc = hx->nal_ref_idc;
7486 hx = h;
7487 goto again;
7490 if(context_count)
7491 execute_decode_slices(h, context_count);
7492 return buf_index;
7496 * returns the number of bytes consumed for building the current frame
7498 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7499 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7500 if(pos+10>buf_size) pos=buf_size; // oops ;)
7502 return pos;
7505 static int decode_frame(AVCodecContext *avctx,
7506 void *data, int *data_size,
7507 const uint8_t *buf, int buf_size)
7509 H264Context *h = avctx->priv_data;
7510 MpegEncContext *s = &h->s;
7511 AVFrame *pict = data;
7512 int buf_index;
7514 s->flags= avctx->flags;
7515 s->flags2= avctx->flags2;
7517 /* end of stream, output what is still in the buffers */
7518 if (buf_size == 0) {
7519 Picture *out;
7520 int i, out_idx;
7522 //FIXME factorize this with the output code below
7523 out = h->delayed_pic[0];
7524 out_idx = 0;
7525 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7526 if(h->delayed_pic[i]->poc < out->poc){
7527 out = h->delayed_pic[i];
7528 out_idx = i;
7531 for(i=out_idx; h->delayed_pic[i]; i++)
7532 h->delayed_pic[i] = h->delayed_pic[i+1];
7534 if(out){
7535 *data_size = sizeof(AVFrame);
7536 *pict= *(AVFrame*)out;
7539 return 0;
7542 if(h->is_avc && !h->got_avcC) {
7543 int i, cnt, nalsize;
7544 unsigned char *p = avctx->extradata;
7545 if(avctx->extradata_size < 7) {
7546 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7547 return -1;
7549 if(*p != 1) {
7550 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7551 return -1;
7553 /* sps and pps in the avcC always have length coded with 2 bytes,
7554 so put a fake nal_length_size = 2 while parsing them */
7555 h->nal_length_size = 2;
7556 // Decode sps from avcC
7557 cnt = *(p+5) & 0x1f; // Number of sps
7558 p += 6;
7559 for (i = 0; i < cnt; i++) {
7560 nalsize = AV_RB16(p) + 2;
7561 if(decode_nal_units(h, p, nalsize) < 0) {
7562 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7563 return -1;
7565 p += nalsize;
7567 // Decode pps from avcC
7568 cnt = *(p++); // Number of pps
7569 for (i = 0; i < cnt; i++) {
7570 nalsize = AV_RB16(p) + 2;
7571 if(decode_nal_units(h, p, nalsize) != nalsize) {
7572 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7573 return -1;
7575 p += nalsize;
7577 // Now store right nal length size, that will be use to parse all other nals
7578 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7579 // Do not reparse avcC
7580 h->got_avcC = 1;
7583 if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
7584 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7585 return -1;
7586 h->got_avcC = 1;
7589 buf_index=decode_nal_units(h, buf, buf_size);
7590 if(buf_index < 0)
7591 return -1;
7593 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7594 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7595 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7596 return -1;
7599 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7600 Picture *out = s->current_picture_ptr;
7601 Picture *cur = s->current_picture_ptr;
7602 int i, pics, cross_idr, out_of_order, out_idx;
7604 s->mb_y= 0;
7606 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7607 s->current_picture_ptr->pict_type= s->pict_type;
7609 if(!s->dropable) {
7610 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7611 h->prev_poc_msb= h->poc_msb;
7612 h->prev_poc_lsb= h->poc_lsb;
7614 h->prev_frame_num_offset= h->frame_num_offset;
7615 h->prev_frame_num= h->frame_num;
7618 * FIXME: Error handling code does not seem to support interlaced
7619 * when slices span multiple rows
7620 * The ff_er_add_slice calls don't work right for bottom
7621 * fields; they cause massive erroneous error concealing
7622 * Error marking covers both fields (top and bottom).
7623 * This causes a mismatched s->error_count
7624 * and a bad error table. Further, the error count goes to
7625 * INT_MAX when called for bottom field, because mb_y is
7626 * past end by one (callers fault) and resync_mb_y != 0
7627 * causes problems for the first MB line, too.
7629 if (!FIELD_PICTURE)
7630 ff_er_frame_end(s);
7632 MPV_frame_end(s);
7634 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7635 /* Wait for second field. */
7636 *data_size = 0;
7638 } else {
7639 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7640 /* Derive top_field_first from field pocs. */
7641 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7643 //FIXME do something with unavailable reference frames
7645 /* Sort B-frames into display order */
7647 if(h->sps.bitstream_restriction_flag
7648 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7649 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7650 s->low_delay = 0;
7653 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7654 && !h->sps.bitstream_restriction_flag){
7655 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7656 s->low_delay= 0;
7659 pics = 0;
7660 while(h->delayed_pic[pics]) pics++;
7662 assert(pics <= MAX_DELAYED_PIC_COUNT);
7664 h->delayed_pic[pics++] = cur;
7665 if(cur->reference == 0)
7666 cur->reference = DELAYED_PIC_REF;
7668 out = h->delayed_pic[0];
7669 out_idx = 0;
7670 for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++)
7671 if(h->delayed_pic[i]->poc < out->poc){
7672 out = h->delayed_pic[i];
7673 out_idx = i;
7675 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame;
7677 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7679 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7681 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7682 || (s->low_delay &&
7683 ((!cross_idr && out->poc > h->outputed_poc + 2)
7684 || cur->pict_type == FF_B_TYPE)))
7686 s->low_delay = 0;
7687 s->avctx->has_b_frames++;
7690 if(out_of_order || pics > s->avctx->has_b_frames){
7691 out->reference &= ~DELAYED_PIC_REF;
7692 for(i=out_idx; h->delayed_pic[i]; i++)
7693 h->delayed_pic[i] = h->delayed_pic[i+1];
7695 if(!out_of_order && pics > s->avctx->has_b_frames){
7696 *data_size = sizeof(AVFrame);
7698 h->outputed_poc = out->poc;
7699 *pict= *(AVFrame*)out;
7700 }else{
7701 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7706 assert(pict->data[0] || !*data_size);
7707 ff_print_debug_info(s, pict);
7708 //printf("out %d\n", (int)pict->data[0]);
7709 #if 0 //?
7711 /* Return the Picture timestamp as the frame number */
7712 /* we subtract 1 because it is added on utils.c */
7713 avctx->frame_number = s->picture_number - 1;
7714 #endif
7715 return get_consumed_bytes(s, buf_index, buf_size);
7717 #if 0
7718 static inline void fill_mb_avail(H264Context *h){
7719 MpegEncContext * const s = &h->s;
7720 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7722 if(s->mb_y){
7723 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7724 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7725 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7726 }else{
7727 h->mb_avail[0]=
7728 h->mb_avail[1]=
7729 h->mb_avail[2]= 0;
7731 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7732 h->mb_avail[4]= 1; //FIXME move out
7733 h->mb_avail[5]= 0; //FIXME move out
7735 #endif
7737 #ifdef TEST
7738 #undef printf
7739 #undef random
7740 #define COUNT 8000
7741 #define SIZE (COUNT*40)
7742 int main(void){
7743 int i;
7744 uint8_t temp[SIZE];
7745 PutBitContext pb;
7746 GetBitContext gb;
7747 // int int_temp[10000];
7748 DSPContext dsp;
7749 AVCodecContext avctx;
7751 dsputil_init(&dsp, &avctx);
7753 init_put_bits(&pb, temp, SIZE);
7754 printf("testing unsigned exp golomb\n");
7755 for(i=0; i<COUNT; i++){
7756 START_TIMER
7757 set_ue_golomb(&pb, i);
7758 STOP_TIMER("set_ue_golomb");
7760 flush_put_bits(&pb);
7762 init_get_bits(&gb, temp, 8*SIZE);
7763 for(i=0; i<COUNT; i++){
7764 int j, s;
7766 s= show_bits(&gb, 24);
7768 START_TIMER
7769 j= get_ue_golomb(&gb);
7770 if(j != i){
7771 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7772 // return -1;
7774 STOP_TIMER("get_ue_golomb");
7778 init_put_bits(&pb, temp, SIZE);
7779 printf("testing signed exp golomb\n");
7780 for(i=0; i<COUNT; i++){
7781 START_TIMER
7782 set_se_golomb(&pb, i - COUNT/2);
7783 STOP_TIMER("set_se_golomb");
7785 flush_put_bits(&pb);
7787 init_get_bits(&gb, temp, 8*SIZE);
7788 for(i=0; i<COUNT; i++){
7789 int j, s;
7791 s= show_bits(&gb, 24);
7793 START_TIMER
7794 j= get_se_golomb(&gb);
7795 if(j != i - COUNT/2){
7796 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7797 // return -1;
7799 STOP_TIMER("get_se_golomb");
7802 #if 0
7803 printf("testing 4x4 (I)DCT\n");
7805 DCTELEM block[16];
7806 uint8_t src[16], ref[16];
7807 uint64_t error= 0, max_error=0;
7809 for(i=0; i<COUNT; i++){
7810 int j;
7811 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7812 for(j=0; j<16; j++){
7813 ref[j]= random()%255;
7814 src[j]= random()%255;
7817 h264_diff_dct_c(block, src, ref, 4);
7819 //normalize
7820 for(j=0; j<16; j++){
7821 // printf("%d ", block[j]);
7822 block[j]= block[j]*4;
7823 if(j&1) block[j]= (block[j]*4 + 2)/5;
7824 if(j&4) block[j]= (block[j]*4 + 2)/5;
7826 // printf("\n");
7828 s->dsp.h264_idct_add(ref, block, 4);
7829 /* for(j=0; j<16; j++){
7830 printf("%d ", ref[j]);
7832 printf("\n");*/
7834 for(j=0; j<16; j++){
7835 int diff= FFABS(src[j] - ref[j]);
7837 error+= diff*diff;
7838 max_error= FFMAX(max_error, diff);
7841 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7842 printf("testing quantizer\n");
7843 for(qp=0; qp<52; qp++){
7844 for(i=0; i<16; i++)
7845 src1_block[i]= src2_block[i]= random()%255;
7848 printf("Testing NAL layer\n");
7850 uint8_t bitstream[COUNT];
7851 uint8_t nal[COUNT*2];
7852 H264Context h;
7853 memset(&h, 0, sizeof(H264Context));
7855 for(i=0; i<COUNT; i++){
7856 int zeros= i;
7857 int nal_length;
7858 int consumed;
7859 int out_length;
7860 uint8_t *out;
7861 int j;
7863 for(j=0; j<COUNT; j++){
7864 bitstream[j]= (random() % 255) + 1;
7867 for(j=0; j<zeros; j++){
7868 int pos= random() % COUNT;
7869 while(bitstream[pos] == 0){
7870 pos++;
7871 pos %= COUNT;
7873 bitstream[pos]=0;
7876 START_TIMER
7878 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7879 if(nal_length<0){
7880 printf("encoding failed\n");
7881 return -1;
7884 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7886 STOP_TIMER("NAL")
7888 if(out_length != COUNT){
7889 printf("incorrect length %d %d\n", out_length, COUNT);
7890 return -1;
7893 if(consumed != nal_length){
7894 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7895 return -1;
7898 if(memcmp(bitstream, out, COUNT)){
7899 printf("mismatch\n");
7900 return -1;
7903 #endif
7905 printf("Testing RBSP\n");
7908 return 0;
7910 #endif /* TEST */
7913 static av_cold int decode_end(AVCodecContext *avctx)
7915 H264Context *h = avctx->priv_data;
7916 MpegEncContext *s = &h->s;
7917 int i;
7919 av_freep(&h->rbsp_buffer[0]);
7920 av_freep(&h->rbsp_buffer[1]);
7921 free_tables(h); //FIXME cleanup init stuff perhaps
7923 for(i = 0; i < MAX_SPS_COUNT; i++)
7924 av_freep(h->sps_buffers + i);
7926 for(i = 0; i < MAX_PPS_COUNT; i++)
7927 av_freep(h->pps_buffers + i);
7929 MPV_common_end(s);
7931 // memset(h, 0, sizeof(H264Context));
7933 return 0;
7937 AVCodec h264_decoder = {
7938 "h264",
7939 CODEC_TYPE_VIDEO,
7940 CODEC_ID_H264,
7941 sizeof(H264Context),
7942 decode_init,
7943 NULL,
7944 decode_end,
7945 decode_frame,
7946 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7947 .flush= flush_dpb,
7948 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
7951 #include "svq3.c"