Add undefine macro.
[plumiferos.git] / extern / ffmpeg / libavcodec / h264.c
blob99172a182305b0da2993f1c4c1ba3616f2ca130c
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 /**
22 * @file h264.c
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
27 #include "common.h"
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264data.h"
32 #include "golomb.h"
34 #include "cabac.h"
36 //#undef NDEBUG
37 #include <assert.h>
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
57 /* Compiling in interlaced support reduces the speed
58 * of progressive decoding by about 2%. */
59 #define ALLOW_INTERLACE
61 #ifdef ALLOW_INTERLACE
62 #define MB_MBAFF h->mb_mbaff
63 #define MB_FIELD h->mb_field_decoding_flag
64 #define FRAME_MBAFF h->mb_aff_frame
65 #else
66 #define MB_MBAFF 0
67 #define MB_FIELD 0
68 #define FRAME_MBAFF 0
69 #undef IS_INTERLACED
70 #define IS_INTERLACED(mb_type) 0
71 #endif
73 /**
74 * Sequence parameter set
76 typedef struct SPS{
78 int profile_idc;
79 int level_idc;
80 int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
81 int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
82 int poc_type; ///< pic_order_cnt_type
83 int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
84 int delta_pic_order_always_zero_flag;
85 int offset_for_non_ref_pic;
86 int offset_for_top_to_bottom_field;
87 int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
88 int ref_frame_count; ///< num_ref_frames
89 int gaps_in_frame_num_allowed_flag;
90 int mb_width; ///< frame_width_in_mbs_minus1 + 1
91 int mb_height; ///< frame_height_in_mbs_minus1 + 1
92 int frame_mbs_only_flag;
93 int mb_aff; ///<mb_adaptive_frame_field_flag
94 int direct_8x8_inference_flag;
95 int crop; ///< frame_cropping_flag
96 int crop_left; ///< frame_cropping_rect_left_offset
97 int crop_right; ///< frame_cropping_rect_right_offset
98 int crop_top; ///< frame_cropping_rect_top_offset
99 int crop_bottom; ///< frame_cropping_rect_bottom_offset
100 int vui_parameters_present_flag;
101 AVRational sar;
102 int timing_info_present_flag;
103 uint32_t num_units_in_tick;
104 uint32_t time_scale;
105 int fixed_frame_rate_flag;
106 short offset_for_ref_frame[256]; //FIXME dyn aloc?
107 int bitstream_restriction_flag;
108 int num_reorder_frames;
109 int scaling_matrix_present;
110 uint8_t scaling_matrix4[6][16];
111 uint8_t scaling_matrix8[2][64];
112 }SPS;
115 * Picture parameter set
117 typedef struct PPS{
118 int sps_id;
119 int cabac; ///< entropy_coding_mode_flag
120 int pic_order_present; ///< pic_order_present_flag
121 int slice_group_count; ///< num_slice_groups_minus1 + 1
122 int mb_slice_group_map_type;
123 int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
124 int weighted_pred; ///< weighted_pred_flag
125 int weighted_bipred_idc;
126 int init_qp; ///< pic_init_qp_minus26 + 26
127 int init_qs; ///< pic_init_qs_minus26 + 26
128 int chroma_qp_index_offset;
129 int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
130 int constrained_intra_pred; ///< constrained_intra_pred_flag
131 int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
132 int transform_8x8_mode; ///< transform_8x8_mode_flag
133 uint8_t scaling_matrix4[6][16];
134 uint8_t scaling_matrix8[2][64];
135 }PPS;
138 * Memory management control operation opcode.
140 typedef enum MMCOOpcode{
141 MMCO_END=0,
142 MMCO_SHORT2UNUSED,
143 MMCO_LONG2UNUSED,
144 MMCO_SHORT2LONG,
145 MMCO_SET_MAX_LONG,
146 MMCO_RESET,
147 MMCO_LONG,
148 } MMCOOpcode;
151 * Memory management control operation.
153 typedef struct MMCO{
154 MMCOOpcode opcode;
155 int short_frame_num;
156 int long_index;
157 } MMCO;
160 * H264Context
162 typedef struct H264Context{
163 MpegEncContext s;
164 int nal_ref_idc;
165 int nal_unit_type;
166 #define NAL_SLICE 1
167 #define NAL_DPA 2
168 #define NAL_DPB 3
169 #define NAL_DPC 4
170 #define NAL_IDR_SLICE 5
171 #define NAL_SEI 6
172 #define NAL_SPS 7
173 #define NAL_PPS 8
174 #define NAL_AUD 9
175 #define NAL_END_SEQUENCE 10
176 #define NAL_END_STREAM 11
177 #define NAL_FILLER_DATA 12
178 #define NAL_SPS_EXT 13
179 #define NAL_AUXILIARY_SLICE 19
180 uint8_t *rbsp_buffer;
181 unsigned int rbsp_buffer_size;
184 * Used to parse AVC variant of h264
186 int is_avc; ///< this flag is != 0 if codec is avc1
187 int got_avcC; ///< flag used to parse avcC data only once
188 int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
190 int chroma_qp; //QPc
192 int prev_mb_skipped;
193 int next_mb_skipped;
195 //prediction stuff
196 int chroma_pred_mode;
197 int intra16x16_pred_mode;
199 int top_mb_xy;
200 int left_mb_xy[2];
202 int8_t intra4x4_pred_mode_cache[5*8];
203 int8_t (*intra4x4_pred_mode)[8];
204 void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
205 void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
206 void (*pred8x8 [4+3])(uint8_t *src, int stride);
207 void (*pred16x16[4+3])(uint8_t *src, int stride);
208 unsigned int topleft_samples_available;
209 unsigned int top_samples_available;
210 unsigned int topright_samples_available;
211 unsigned int left_samples_available;
212 uint8_t (*top_borders[2])[16+2*8];
213 uint8_t left_border[2*(17+2*9)];
216 * non zero coeff count cache.
217 * is 64 if not available.
219 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
220 uint8_t (*non_zero_count)[16];
223 * Motion vector cache.
225 DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
226 DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
227 #define LIST_NOT_USED -1 //FIXME rename?
228 #define PART_NOT_AVAILABLE -2
231 * is 1 if the specific list MV&references are set to 0,0,-2.
233 int mv_cache_clean[2];
236 * number of neighbors (top and/or left) that used 8x8 dct
238 int neighbor_transform_size;
241 * block_offset[ 0..23] for frame macroblocks
242 * block_offset[24..47] for field macroblocks
244 int block_offset[2*(16+8)];
246 uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
247 uint32_t *mb2b8_xy;
248 int b_stride; //FIXME use s->b4_stride
249 int b8_stride;
251 int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
252 int mb_uvlinesize;
254 int emu_edge_width;
255 int emu_edge_height;
257 int halfpel_flag;
258 int thirdpel_flag;
260 int unknown_svq3_flag;
261 int next_slice_index;
263 SPS sps_buffer[MAX_SPS_COUNT];
264 SPS sps; ///< current sps
266 PPS pps_buffer[MAX_PPS_COUNT];
268 * current pps
270 PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
272 uint32_t dequant4_buffer[6][52][16];
273 uint32_t dequant8_buffer[2][52][64];
274 uint32_t (*dequant4_coeff[6])[16];
275 uint32_t (*dequant8_coeff[2])[64];
276 int dequant_coeff_pps; ///< reinit tables when pps changes
278 int slice_num;
279 uint8_t *slice_table_base;
280 uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
281 int slice_type;
282 int slice_type_fixed;
284 //interlacing specific flags
285 int mb_aff_frame;
286 int mb_field_decoding_flag;
287 int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
289 int sub_mb_type[4];
291 //POC stuff
292 int poc_lsb;
293 int poc_msb;
294 int delta_poc_bottom;
295 int delta_poc[2];
296 int frame_num;
297 int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
298 int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
299 int frame_num_offset; ///< for POC type 2
300 int prev_frame_num_offset; ///< for POC type 2
301 int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
304 * frame_num for frames or 2*frame_num for field pics.
306 int curr_pic_num;
309 * max_frame_num or 2*max_frame_num for field pics.
311 int max_pic_num;
313 //Weighted pred stuff
314 int use_weight;
315 int use_weight_chroma;
316 int luma_log2_weight_denom;
317 int chroma_log2_weight_denom;
318 int luma_weight[2][48];
319 int luma_offset[2][48];
320 int chroma_weight[2][48][2];
321 int chroma_offset[2][48][2];
322 int implicit_weight[48][48];
324 //deblock
325 int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
326 int slice_alpha_c0_offset;
327 int slice_beta_offset;
329 int redundant_pic_count;
331 int direct_spatial_mv_pred;
332 int dist_scale_factor[16];
333 int dist_scale_factor_field[32];
334 int map_col_to_list0[2][16];
335 int map_col_to_list0_field[2][32];
338 * num_ref_idx_l0/1_active_minus1 + 1
340 int ref_count[2]; ///< counts frames or fields, depending on current mb mode
341 Picture *short_ref[32];
342 Picture *long_ref[32];
343 Picture default_ref_list[2][32];
344 Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
345 Picture *delayed_pic[16]; //FIXME size?
346 Picture *delayed_output_pic;
349 * memory management control operations buffer.
351 MMCO mmco[MAX_MMCO_COUNT];
352 int mmco_index;
354 int long_ref_count; ///< number of actual long term references
355 int short_ref_count; ///< number of actual short term references
357 //data partitioning
358 GetBitContext intra_gb;
359 GetBitContext inter_gb;
360 GetBitContext *intra_gb_ptr;
361 GetBitContext *inter_gb_ptr;
363 DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
366 * Cabac
368 CABACContext cabac;
369 uint8_t cabac_state[460];
370 int cabac_init_idc;
372 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
373 uint16_t *cbp_table;
374 int top_cbp;
375 int left_cbp;
376 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
377 uint8_t *chroma_pred_mode_table;
378 int last_qscale_diff;
379 int16_t (*mvd_table[2])[2];
380 DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
381 uint8_t *direct_table;
382 uint8_t direct_cache[5*8];
384 uint8_t zigzag_scan[16];
385 uint8_t zigzag_scan8x8[64];
386 uint8_t zigzag_scan8x8_cavlc[64];
387 uint8_t field_scan[16];
388 uint8_t field_scan8x8[64];
389 uint8_t field_scan8x8_cavlc[64];
390 const uint8_t *zigzag_scan_q0;
391 const uint8_t *zigzag_scan8x8_q0;
392 const uint8_t *zigzag_scan8x8_cavlc_q0;
393 const uint8_t *field_scan_q0;
394 const uint8_t *field_scan8x8_q0;
395 const uint8_t *field_scan8x8_cavlc_q0;
397 int x264_build;
398 }H264Context;
400 static VLC coeff_token_vlc[4];
401 static VLC chroma_dc_coeff_token_vlc;
403 static VLC total_zeros_vlc[15];
404 static VLC chroma_dc_total_zeros_vlc[3];
406 static VLC run_vlc[6];
407 static VLC run7_vlc;
409 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
410 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
411 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
413 static always_inline uint32_t pack16to32(int a, int b){
414 #ifdef WORDS_BIGENDIAN
415 return (b&0xFFFF) + (a<<16);
416 #else
417 return (a&0xFFFF) + (b<<16);
418 #endif
422 * fill a rectangle.
423 * @param h height of the rectangle, should be a constant
424 * @param w width of the rectangle, should be a constant
425 * @param size the size of val (1 or 4), should be a constant
427 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
428 uint8_t *p= (uint8_t*)vp;
429 assert(size==1 || size==4);
430 assert(w<=4);
432 w *= size;
433 stride *= size;
435 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
436 assert((stride&(w-1))==0);
437 if(w==2){
438 const uint16_t v= size==4 ? val : val*0x0101;
439 *(uint16_t*)(p + 0*stride)= v;
440 if(h==1) return;
441 *(uint16_t*)(p + 1*stride)= v;
442 if(h==2) return;
443 *(uint16_t*)(p + 2*stride)=
444 *(uint16_t*)(p + 3*stride)= v;
445 }else if(w==4){
446 const uint32_t v= size==4 ? val : val*0x01010101;
447 *(uint32_t*)(p + 0*stride)= v;
448 if(h==1) return;
449 *(uint32_t*)(p + 1*stride)= v;
450 if(h==2) return;
451 *(uint32_t*)(p + 2*stride)=
452 *(uint32_t*)(p + 3*stride)= v;
453 }else if(w==8){
454 //gcc can't optimize 64bit math on x86_32
455 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
456 const uint64_t v= val*0x0100000001ULL;
457 *(uint64_t*)(p + 0*stride)= v;
458 if(h==1) return;
459 *(uint64_t*)(p + 1*stride)= v;
460 if(h==2) return;
461 *(uint64_t*)(p + 2*stride)=
462 *(uint64_t*)(p + 3*stride)= v;
463 }else if(w==16){
464 const uint64_t v= val*0x0100000001ULL;
465 *(uint64_t*)(p + 0+0*stride)=
466 *(uint64_t*)(p + 8+0*stride)=
467 *(uint64_t*)(p + 0+1*stride)=
468 *(uint64_t*)(p + 8+1*stride)= v;
469 if(h==2) return;
470 *(uint64_t*)(p + 0+2*stride)=
471 *(uint64_t*)(p + 8+2*stride)=
472 *(uint64_t*)(p + 0+3*stride)=
473 *(uint64_t*)(p + 8+3*stride)= v;
474 #else
475 *(uint32_t*)(p + 0+0*stride)=
476 *(uint32_t*)(p + 4+0*stride)= val;
477 if(h==1) return;
478 *(uint32_t*)(p + 0+1*stride)=
479 *(uint32_t*)(p + 4+1*stride)= val;
480 if(h==2) return;
481 *(uint32_t*)(p + 0+2*stride)=
482 *(uint32_t*)(p + 4+2*stride)=
483 *(uint32_t*)(p + 0+3*stride)=
484 *(uint32_t*)(p + 4+3*stride)= val;
485 }else if(w==16){
486 *(uint32_t*)(p + 0+0*stride)=
487 *(uint32_t*)(p + 4+0*stride)=
488 *(uint32_t*)(p + 8+0*stride)=
489 *(uint32_t*)(p +12+0*stride)=
490 *(uint32_t*)(p + 0+1*stride)=
491 *(uint32_t*)(p + 4+1*stride)=
492 *(uint32_t*)(p + 8+1*stride)=
493 *(uint32_t*)(p +12+1*stride)= val;
494 if(h==2) return;
495 *(uint32_t*)(p + 0+2*stride)=
496 *(uint32_t*)(p + 4+2*stride)=
497 *(uint32_t*)(p + 8+2*stride)=
498 *(uint32_t*)(p +12+2*stride)=
499 *(uint32_t*)(p + 0+3*stride)=
500 *(uint32_t*)(p + 4+3*stride)=
501 *(uint32_t*)(p + 8+3*stride)=
502 *(uint32_t*)(p +12+3*stride)= val;
503 #endif
504 }else
505 assert(0);
506 assert(h==4);
509 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
510 MpegEncContext * const s = &h->s;
511 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
512 int topleft_xy, top_xy, topright_xy, left_xy[2];
513 int topleft_type, top_type, topright_type, left_type[2];
514 int left_block[8];
515 int i;
517 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
518 // the actual condition is whether we're on the edge of a slice,
519 // and even then the intra and nnz parts are unnecessary.
520 if(for_deblock && h->slice_num == 1 && !FRAME_MBAFF)
521 return;
523 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
525 top_xy = mb_xy - s->mb_stride;
526 topleft_xy = top_xy - 1;
527 topright_xy= top_xy + 1;
528 left_xy[1] = left_xy[0] = mb_xy-1;
529 left_block[0]= 0;
530 left_block[1]= 1;
531 left_block[2]= 2;
532 left_block[3]= 3;
533 left_block[4]= 7;
534 left_block[5]= 10;
535 left_block[6]= 8;
536 left_block[7]= 11;
537 if(FRAME_MBAFF){
538 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
539 const int top_pair_xy = pair_xy - s->mb_stride;
540 const int topleft_pair_xy = top_pair_xy - 1;
541 const int topright_pair_xy = top_pair_xy + 1;
542 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
543 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
544 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
545 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
546 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
547 const int bottom = (s->mb_y & 1);
548 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
549 if (bottom
550 ? !curr_mb_frame_flag // bottom macroblock
551 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
553 top_xy -= s->mb_stride;
555 if (bottom
556 ? !curr_mb_frame_flag // bottom macroblock
557 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
559 topleft_xy -= s->mb_stride;
561 if (bottom
562 ? !curr_mb_frame_flag // bottom macroblock
563 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
565 topright_xy -= s->mb_stride;
567 if (left_mb_frame_flag != curr_mb_frame_flag) {
568 left_xy[1] = left_xy[0] = pair_xy - 1;
569 if (curr_mb_frame_flag) {
570 if (bottom) {
571 left_block[0]= 2;
572 left_block[1]= 2;
573 left_block[2]= 3;
574 left_block[3]= 3;
575 left_block[4]= 8;
576 left_block[5]= 11;
577 left_block[6]= 8;
578 left_block[7]= 11;
579 } else {
580 left_block[0]= 0;
581 left_block[1]= 0;
582 left_block[2]= 1;
583 left_block[3]= 1;
584 left_block[4]= 7;
585 left_block[5]= 10;
586 left_block[6]= 7;
587 left_block[7]= 10;
589 } else {
590 left_xy[1] += s->mb_stride;
591 //left_block[0]= 0;
592 left_block[1]= 2;
593 left_block[2]= 0;
594 left_block[3]= 2;
595 //left_block[4]= 7;
596 left_block[5]= 10;
597 left_block[6]= 7;
598 left_block[7]= 10;
603 h->top_mb_xy = top_xy;
604 h->left_mb_xy[0] = left_xy[0];
605 h->left_mb_xy[1] = left_xy[1];
606 if(for_deblock){
607 topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
608 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
609 topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
610 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
611 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
613 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
614 int list;
615 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
616 for(i=0; i<16; i++)
617 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
618 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
619 if(USES_LIST(mb_type,list)){
620 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
621 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
622 uint8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
623 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
624 dst[0] = src[0];
625 dst[1] = src[1];
626 dst[2] = src[2];
627 dst[3] = src[3];
629 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
630 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
631 ref += h->b8_stride;
632 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
633 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
634 }else{
635 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
636 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
640 }else{
641 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
642 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
643 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
644 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
645 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
648 if(IS_INTRA(mb_type)){
649 h->topleft_samples_available=
650 h->top_samples_available=
651 h->left_samples_available= 0xFFFF;
652 h->topright_samples_available= 0xEEEA;
654 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
655 h->topleft_samples_available= 0xB3FF;
656 h->top_samples_available= 0x33FF;
657 h->topright_samples_available= 0x26EA;
659 for(i=0; i<2; i++){
660 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
661 h->topleft_samples_available&= 0xDF5F;
662 h->left_samples_available&= 0x5F5F;
666 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
667 h->topleft_samples_available&= 0x7FFF;
669 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
670 h->topright_samples_available&= 0xFBFF;
672 if(IS_INTRA4x4(mb_type)){
673 if(IS_INTRA4x4(top_type)){
674 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
675 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
676 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
677 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
678 }else{
679 int pred;
680 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
681 pred= -1;
682 else{
683 pred= 2;
685 h->intra4x4_pred_mode_cache[4+8*0]=
686 h->intra4x4_pred_mode_cache[5+8*0]=
687 h->intra4x4_pred_mode_cache[6+8*0]=
688 h->intra4x4_pred_mode_cache[7+8*0]= pred;
690 for(i=0; i<2; i++){
691 if(IS_INTRA4x4(left_type[i])){
692 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
693 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
694 }else{
695 int pred;
696 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
697 pred= -1;
698 else{
699 pred= 2;
701 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
702 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
710 0 . T T. T T T T
711 1 L . .L . . . .
712 2 L . .L . . . .
713 3 . T TL . . . .
714 4 L . .L . . . .
715 5 L . .. . . . .
717 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
718 if(top_type){
719 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
720 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
721 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
722 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
724 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
725 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
727 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
728 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
730 }else{
731 h->non_zero_count_cache[4+8*0]=
732 h->non_zero_count_cache[5+8*0]=
733 h->non_zero_count_cache[6+8*0]=
734 h->non_zero_count_cache[7+8*0]=
736 h->non_zero_count_cache[1+8*0]=
737 h->non_zero_count_cache[2+8*0]=
739 h->non_zero_count_cache[1+8*3]=
740 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
744 for (i=0; i<2; i++) {
745 if(left_type[i]){
746 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
747 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
748 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
749 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
750 }else{
751 h->non_zero_count_cache[3+8*1 + 2*8*i]=
752 h->non_zero_count_cache[3+8*2 + 2*8*i]=
753 h->non_zero_count_cache[0+8*1 + 8*i]=
754 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
758 if( h->pps.cabac ) {
759 // top_cbp
760 if(top_type) {
761 h->top_cbp = h->cbp_table[top_xy];
762 } else if(IS_INTRA(mb_type)) {
763 h->top_cbp = 0x1C0;
764 } else {
765 h->top_cbp = 0;
767 // left_cbp
768 if (left_type[0]) {
769 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
770 } else if(IS_INTRA(mb_type)) {
771 h->left_cbp = 0x1C0;
772 } else {
773 h->left_cbp = 0;
775 if (left_type[0]) {
776 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
778 if (left_type[1]) {
779 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
783 #if 1
784 //FIXME direct mb can skip much of this
785 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
786 int list;
787 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
788 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
789 /*if(!h->mv_cache_clean[list]){
790 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
791 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
792 h->mv_cache_clean[list]= 1;
794 continue;
796 h->mv_cache_clean[list]= 0;
798 if(USES_LIST(top_type, list)){
799 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
800 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
801 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
802 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
803 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
804 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
805 h->ref_cache[list][scan8[0] + 0 - 1*8]=
806 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
807 h->ref_cache[list][scan8[0] + 2 - 1*8]=
808 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
809 }else{
810 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
811 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
812 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
813 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
814 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
817 //FIXME unify cleanup or sth
818 if(USES_LIST(left_type[0], list)){
819 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
820 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
821 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
822 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
823 h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
824 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
825 }else{
826 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
827 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
828 h->ref_cache[list][scan8[0] - 1 + 0*8]=
829 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
832 if(USES_LIST(left_type[1], list)){
833 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
834 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
835 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
836 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
837 h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
838 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
839 }else{
840 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
841 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
842 h->ref_cache[list][scan8[0] - 1 + 2*8]=
843 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
844 assert((!left_type[0]) == (!left_type[1]));
847 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
848 continue;
850 if(USES_LIST(topleft_type, list)){
851 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
852 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
853 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
854 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
855 }else{
856 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
857 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
860 if(USES_LIST(topright_type, list)){
861 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
862 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
863 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
864 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
865 }else{
866 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
867 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
871 h->ref_cache[list][scan8[5 ]+1] =
872 h->ref_cache[list][scan8[7 ]+1] =
873 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
874 h->ref_cache[list][scan8[4 ]] =
875 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
876 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
877 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
878 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
879 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
880 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
882 if( h->pps.cabac ) {
883 /* XXX beurk, Load mvd */
884 if(USES_LIST(top_type, list)){
885 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
886 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
887 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
888 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
889 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
890 }else{
891 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
892 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
893 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
894 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
896 if(USES_LIST(left_type[0], list)){
897 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
898 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
899 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
900 }else{
901 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
902 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
904 if(USES_LIST(left_type[1], list)){
905 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
906 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
907 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
908 }else{
909 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
910 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
912 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
913 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
914 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
915 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
916 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
918 if(h->slice_type == B_TYPE){
919 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
921 if(IS_DIRECT(top_type)){
922 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
923 }else if(IS_8X8(top_type)){
924 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
925 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
926 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
927 }else{
928 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
931 if(IS_DIRECT(left_type[0]))
932 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
933 else if(IS_8X8(left_type[0]))
934 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
935 else
936 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
938 if(IS_DIRECT(left_type[1]))
939 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
940 else if(IS_8X8(left_type[1]))
941 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
942 else
943 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
947 if(FRAME_MBAFF){
948 #define MAP_MVS\
949 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
950 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
951 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
952 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
953 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
954 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
955 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
956 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
957 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
958 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
959 if(MB_FIELD){
960 #define MAP_F2F(idx, mb_type)\
961 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
962 h->ref_cache[list][idx] <<= 1;\
963 h->mv_cache[list][idx][1] /= 2;\
964 h->mvd_cache[list][idx][1] /= 2;\
966 MAP_MVS
967 #undef MAP_F2F
968 }else{
969 #define MAP_F2F(idx, mb_type)\
970 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
971 h->ref_cache[list][idx] >>= 1;\
972 h->mv_cache[list][idx][1] <<= 1;\
973 h->mvd_cache[list][idx][1] <<= 1;\
975 MAP_MVS
976 #undef MAP_F2F
981 #endif
983 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
986 static inline void write_back_intra_pred_mode(H264Context *h){
987 MpegEncContext * const s = &h->s;
988 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
990 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
991 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
992 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
993 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
994 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
995 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
996 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
1000 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1002 static inline int check_intra4x4_pred_mode(H264Context *h){
1003 MpegEncContext * const s = &h->s;
1004 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
1005 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
1006 int i;
1008 if(!(h->top_samples_available&0x8000)){
1009 for(i=0; i<4; i++){
1010 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
1011 if(status<0){
1012 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1013 return -1;
1014 } else if(status){
1015 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
1020 if(!(h->left_samples_available&0x8000)){
1021 for(i=0; i<4; i++){
1022 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
1023 if(status<0){
1024 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
1025 return -1;
1026 } else if(status){
1027 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
1032 return 0;
1033 } //FIXME cleanup like next
1036 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1038 static inline int check_intra_pred_mode(H264Context *h, int mode){
1039 MpegEncContext * const s = &h->s;
1040 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
1041 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
1043 if(mode < 0 || mode > 6) {
1044 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
1045 return -1;
1048 if(!(h->top_samples_available&0x8000)){
1049 mode= top[ mode ];
1050 if(mode<0){
1051 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1052 return -1;
1056 if(!(h->left_samples_available&0x8000)){
1057 mode= left[ mode ];
1058 if(mode<0){
1059 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
1060 return -1;
1064 return mode;
1068 * gets the predicted intra4x4 prediction mode.
1070 static inline int pred_intra_mode(H264Context *h, int n){
1071 const int index8= scan8[n];
1072 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1073 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1074 const int min= FFMIN(left, top);
1076 tprintf("mode:%d %d min:%d\n", left ,top, min);
1078 if(min<0) return DC_PRED;
1079 else return min;
1082 static inline void write_back_non_zero_count(H264Context *h){
1083 MpegEncContext * const s = &h->s;
1084 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1086 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1087 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1088 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1089 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1090 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1091 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1092 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1094 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1095 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1096 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1098 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1099 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1100 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1102 if(FRAME_MBAFF){
1103 // store all luma nnzs, for deblocking
1104 int v = 0, i;
1105 for(i=0; i<16; i++)
1106 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1107 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1112 * gets the predicted number of non zero coefficients.
1113 * @param n block index
1115 static inline int pred_non_zero_count(H264Context *h, int n){
1116 const int index8= scan8[n];
1117 const int left= h->non_zero_count_cache[index8 - 1];
1118 const int top = h->non_zero_count_cache[index8 - 8];
1119 int i= left + top;
1121 if(i<64) i= (i+1)>>1;
1123 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
1125 return i&31;
1128 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1129 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1131 /* there is no consistent mapping of mvs to neighboring locations that will
1132 * make mbaff happy, so we can't move all this logic to fill_caches */
1133 if(FRAME_MBAFF){
1134 MpegEncContext *s = &h->s;
1135 const int *mb_types = s->current_picture_ptr->mb_type;
1136 const int16_t *mv;
1137 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1138 *C = h->mv_cache[list][scan8[0]-2];
1140 if(!MB_FIELD
1141 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1142 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1143 if(IS_INTERLACED(mb_types[topright_xy])){
1144 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1145 const int x4 = X4, y4 = Y4;\
1146 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1147 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1148 return LIST_NOT_USED;\
1149 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1150 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1151 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1152 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1154 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1157 if(topright_ref == PART_NOT_AVAILABLE
1158 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1159 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1160 if(!MB_FIELD
1161 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1162 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1164 if(MB_FIELD
1165 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1166 && i >= scan8[0]+8){
1167 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1168 SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1171 #undef SET_DIAG_MV
1174 if(topright_ref != PART_NOT_AVAILABLE){
1175 *C= h->mv_cache[list][ i - 8 + part_width ];
1176 return topright_ref;
1177 }else{
1178 tprintf("topright MV not available\n");
1180 *C= h->mv_cache[list][ i - 8 - 1 ];
1181 return h->ref_cache[list][ i - 8 - 1 ];
1186 * gets the predicted MV.
1187 * @param n the block index
1188 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1189 * @param mx the x component of the predicted motion vector
1190 * @param my the y component of the predicted motion vector
1192 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
1193 const int index8= scan8[n];
1194 const int top_ref= h->ref_cache[list][ index8 - 8 ];
1195 const int left_ref= h->ref_cache[list][ index8 - 1 ];
1196 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
1197 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
1198 const int16_t * C;
1199 int diagonal_ref, match_count;
1201 assert(part_width==1 || part_width==2 || part_width==4);
1203 /* mv_cache
1204 B . . A T T T T
1205 U . . L . . , .
1206 U . . L . . . .
1207 U . . L . . , .
1208 . . . L . . . .
1211 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
1212 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
1213 tprintf("pred_motion match_count=%d\n", match_count);
1214 if(match_count > 1){ //most common
1215 *mx= mid_pred(A[0], B[0], C[0]);
1216 *my= mid_pred(A[1], B[1], C[1]);
1217 }else if(match_count==1){
1218 if(left_ref==ref){
1219 *mx= A[0];
1220 *my= A[1];
1221 }else if(top_ref==ref){
1222 *mx= B[0];
1223 *my= B[1];
1224 }else{
1225 *mx= C[0];
1226 *my= C[1];
1228 }else{
1229 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
1230 *mx= A[0];
1231 *my= A[1];
1232 }else{
1233 *mx= mid_pred(A[0], B[0], C[0]);
1234 *my= mid_pred(A[1], B[1], C[1]);
1238 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
1242 * gets the directionally predicted 16x8 MV.
1243 * @param n the block index
1244 * @param mx the x component of the predicted motion vector
1245 * @param my the y component of the predicted motion vector
1247 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1248 if(n==0){
1249 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
1250 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
1252 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
1254 if(top_ref == ref){
1255 *mx= B[0];
1256 *my= B[1];
1257 return;
1259 }else{
1260 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
1261 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
1263 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1265 if(left_ref == ref){
1266 *mx= A[0];
1267 *my= A[1];
1268 return;
1272 //RARE
1273 pred_motion(h, n, 4, list, ref, mx, my);
1277 * gets the directionally predicted 8x16 MV.
1278 * @param n the block index
1279 * @param mx the x component of the predicted motion vector
1280 * @param my the y component of the predicted motion vector
1282 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
1283 if(n==0){
1284 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
1285 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
1287 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
1289 if(left_ref == ref){
1290 *mx= A[0];
1291 *my= A[1];
1292 return;
1294 }else{
1295 const int16_t * C;
1296 int diagonal_ref;
1298 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
1300 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
1302 if(diagonal_ref == ref){
1303 *mx= C[0];
1304 *my= C[1];
1305 return;
1309 //RARE
1310 pred_motion(h, n, 2, list, ref, mx, my);
1313 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
1314 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
1315 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
1317 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
1319 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
1320 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
1321 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
1323 *mx = *my = 0;
1324 return;
1327 pred_motion(h, 0, 4, 0, 0, mx, my);
1329 return;
1332 static inline void direct_dist_scale_factor(H264Context * const h){
1333 const int poc = h->s.current_picture_ptr->poc;
1334 const int poc1 = h->ref_list[1][0].poc;
1335 int i;
1336 for(i=0; i<h->ref_count[0]; i++){
1337 int poc0 = h->ref_list[0][i].poc;
1338 int td = clip(poc1 - poc0, -128, 127);
1339 if(td == 0 /* FIXME || pic0 is a long-term ref */){
1340 h->dist_scale_factor[i] = 256;
1341 }else{
1342 int tb = clip(poc - poc0, -128, 127);
1343 int tx = (16384 + (ABS(td) >> 1)) / td;
1344 h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1347 if(FRAME_MBAFF){
1348 for(i=0; i<h->ref_count[0]; i++){
1349 h->dist_scale_factor_field[2*i] =
1350 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1354 static inline void direct_ref_list_init(H264Context * const h){
1355 MpegEncContext * const s = &h->s;
1356 Picture * const ref1 = &h->ref_list[1][0];
1357 Picture * const cur = s->current_picture_ptr;
1358 int list, i, j;
1359 if(cur->pict_type == I_TYPE)
1360 cur->ref_count[0] = 0;
1361 if(cur->pict_type != B_TYPE)
1362 cur->ref_count[1] = 0;
1363 for(list=0; list<2; list++){
1364 cur->ref_count[list] = h->ref_count[list];
1365 for(j=0; j<h->ref_count[list]; j++)
1366 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
1368 if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
1369 return;
1370 for(list=0; list<2; list++){
1371 for(i=0; i<ref1->ref_count[list]; i++){
1372 const int poc = ref1->ref_poc[list][i];
1373 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
1374 for(j=0; j<h->ref_count[list]; j++)
1375 if(h->ref_list[list][j].poc == poc){
1376 h->map_col_to_list0[list][i] = j;
1377 break;
1381 if(FRAME_MBAFF){
1382 for(list=0; list<2; list++){
1383 for(i=0; i<ref1->ref_count[list]; i++){
1384 j = h->map_col_to_list0[list][i];
1385 h->map_col_to_list0_field[list][2*i] = 2*j;
1386 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1392 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
1393 MpegEncContext * const s = &h->s;
1394 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
1395 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1396 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1397 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
1398 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
1399 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
1400 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
1401 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
1402 const int is_b8x8 = IS_8X8(*mb_type);
1403 int sub_mb_type;
1404 int i8, i4;
1406 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1407 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1408 /* FIXME save sub mb types from previous frames (or derive from MVs)
1409 * so we know exactly what block size to use */
1410 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1411 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1412 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1413 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1414 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1415 }else{
1416 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1417 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
1419 if(!is_b8x8)
1420 *mb_type |= MB_TYPE_DIRECT2;
1421 if(MB_FIELD)
1422 *mb_type |= MB_TYPE_INTERLACED;
1424 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1426 if(h->direct_spatial_mv_pred){
1427 int ref[2];
1428 int mv[2][2];
1429 int list;
1431 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1433 /* ref = min(neighbors) */
1434 for(list=0; list<2; list++){
1435 int refa = h->ref_cache[list][scan8[0] - 1];
1436 int refb = h->ref_cache[list][scan8[0] - 8];
1437 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1438 if(refc == -2)
1439 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1440 ref[list] = refa;
1441 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1442 ref[list] = refb;
1443 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1444 ref[list] = refc;
1445 if(ref[list] < 0)
1446 ref[list] = -1;
1449 if(ref[0] < 0 && ref[1] < 0){
1450 ref[0] = ref[1] = 0;
1451 mv[0][0] = mv[0][1] =
1452 mv[1][0] = mv[1][1] = 0;
1453 }else{
1454 for(list=0; list<2; list++){
1455 if(ref[list] >= 0)
1456 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1457 else
1458 mv[list][0] = mv[list][1] = 0;
1462 if(ref[1] < 0){
1463 *mb_type &= ~MB_TYPE_P0L1;
1464 sub_mb_type &= ~MB_TYPE_P0L1;
1465 }else if(ref[0] < 0){
1466 *mb_type &= ~MB_TYPE_P0L0;
1467 sub_mb_type &= ~MB_TYPE_P0L0;
1470 if(IS_16X16(*mb_type)){
1471 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1472 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1473 if(!IS_INTRA(mb_type_col)
1474 && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
1475 || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
1476 && (h->x264_build>33 || !h->x264_build)))){
1477 if(ref[0] > 0)
1478 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1479 else
1480 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1481 if(ref[1] > 0)
1482 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1483 else
1484 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1485 }else{
1486 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1487 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1489 }else{
1490 for(i8=0; i8<4; i8++){
1491 const int x8 = i8&1;
1492 const int y8 = i8>>1;
1494 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1495 continue;
1496 h->sub_mb_type[i8] = sub_mb_type;
1498 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1499 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1500 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1501 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1503 /* col_zero_flag */
1504 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1505 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1506 && (h->x264_build>33 || !h->x264_build)))){
1507 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1508 if(IS_SUB_8X8(sub_mb_type)){
1509 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1510 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1511 if(ref[0] == 0)
1512 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1513 if(ref[1] == 0)
1514 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1516 }else
1517 for(i4=0; i4<4; i4++){
1518 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1519 if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
1520 if(ref[0] == 0)
1521 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1522 if(ref[1] == 0)
1523 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1529 }else{ /* direct temporal mv pred */
1530 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1531 const int *dist_scale_factor = h->dist_scale_factor;
1533 if(FRAME_MBAFF){
1534 if(IS_INTERLACED(*mb_type)){
1535 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1536 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1537 dist_scale_factor = h->dist_scale_factor_field;
1539 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1540 /* FIXME assumes direct_8x8_inference == 1 */
1541 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1542 int mb_types_col[2];
1543 int y_shift;
1545 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1546 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1547 | (*mb_type & MB_TYPE_INTERLACED);
1548 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1550 if(IS_INTERLACED(*mb_type)){
1551 /* frame to field scaling */
1552 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1553 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1554 if(s->mb_y&1){
1555 l1ref0 -= 2*h->b8_stride;
1556 l1ref1 -= 2*h->b8_stride;
1557 l1mv0 -= 4*h->b_stride;
1558 l1mv1 -= 4*h->b_stride;
1560 y_shift = 0;
1562 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1563 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1564 && !is_b8x8)
1565 *mb_type |= MB_TYPE_16x8;
1566 else
1567 *mb_type |= MB_TYPE_8x8;
1568 }else{
1569 /* field to frame scaling */
1570 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1571 * but in MBAFF, top and bottom POC are equal */
1572 int dy = (s->mb_y&1) ? 1 : 2;
1573 mb_types_col[0] =
1574 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1575 l1ref0 += dy*h->b8_stride;
1576 l1ref1 += dy*h->b8_stride;
1577 l1mv0 += 2*dy*h->b_stride;
1578 l1mv1 += 2*dy*h->b_stride;
1579 y_shift = 2;
1581 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1582 && !is_b8x8)
1583 *mb_type |= MB_TYPE_16x16;
1584 else
1585 *mb_type |= MB_TYPE_8x8;
1588 for(i8=0; i8<4; i8++){
1589 const int x8 = i8&1;
1590 const int y8 = i8>>1;
1591 int ref0, scale;
1592 const int16_t (*l1mv)[2]= l1mv0;
1594 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1595 continue;
1596 h->sub_mb_type[i8] = sub_mb_type;
1598 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1599 if(IS_INTRA(mb_types_col[y8])){
1600 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1601 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1602 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1603 continue;
1606 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1607 if(ref0 >= 0)
1608 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1609 else{
1610 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1611 l1mv= l1mv1;
1613 scale = dist_scale_factor[ref0];
1614 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1617 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1618 int my_col = (mv_col[1]<<y_shift)/2;
1619 int mx = (scale * mv_col[0] + 128) >> 8;
1620 int my = (scale * my_col + 128) >> 8;
1621 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1622 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1625 return;
1629 /* one-to-one mv scaling */
1631 if(IS_16X16(*mb_type)){
1632 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1633 if(IS_INTRA(mb_type_col)){
1634 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1635 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1636 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1637 }else{
1638 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1639 : map_col_to_list0[1][l1ref1[0]];
1640 const int scale = dist_scale_factor[ref0];
1641 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1642 int mv_l0[2];
1643 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1644 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1645 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1646 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1647 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
1649 }else{
1650 for(i8=0; i8<4; i8++){
1651 const int x8 = i8&1;
1652 const int y8 = i8>>1;
1653 int ref0, scale;
1654 const int16_t (*l1mv)[2]= l1mv0;
1656 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1657 continue;
1658 h->sub_mb_type[i8] = sub_mb_type;
1659 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1660 if(IS_INTRA(mb_type_col)){
1661 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1662 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1663 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1664 continue;
1667 ref0 = l1ref0[x8 + y8*h->b8_stride];
1668 if(ref0 >= 0)
1669 ref0 = map_col_to_list0[0][ref0];
1670 else{
1671 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1672 l1mv= l1mv1;
1674 scale = dist_scale_factor[ref0];
1676 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1677 if(IS_SUB_8X8(sub_mb_type)){
1678 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1679 int mx = (scale * mv_col[0] + 128) >> 8;
1680 int my = (scale * mv_col[1] + 128) >> 8;
1681 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1682 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1683 }else
1684 for(i4=0; i4<4; i4++){
1685 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1686 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1687 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1688 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1689 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1690 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1697 static inline void write_back_motion(H264Context *h, int mb_type){
1698 MpegEncContext * const s = &h->s;
1699 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1700 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1701 int list;
1703 if(!USES_LIST(mb_type, 0))
1704 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1706 for(list=0; list<2; list++){
1707 int y;
1708 if(!USES_LIST(mb_type, list))
1709 continue;
1711 for(y=0; y<4; y++){
1712 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1713 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1715 if( h->pps.cabac ) {
1716 for(y=0; y<4; y++){
1717 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1718 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1723 uint8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1724 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1725 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1726 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1727 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1731 if(h->slice_type == B_TYPE && h->pps.cabac){
1732 if(IS_8X8(mb_type)){
1733 uint8_t *direct_table = &h->direct_table[b8_xy];
1734 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1735 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1736 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1742 * Decodes a network abstraction layer unit.
1743 * @param consumed is the number of bytes used as input
1744 * @param length is the length of the array
1745 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1746 * @returns decoded bytes, might be src+1 if no escapes
1748 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
1749 int i, si, di;
1750 uint8_t *dst;
1752 // src[0]&0x80; //forbidden bit
1753 h->nal_ref_idc= src[0]>>5;
1754 h->nal_unit_type= src[0]&0x1F;
1756 src++; length--;
1757 #if 0
1758 for(i=0; i<length; i++)
1759 printf("%2X ", src[i]);
1760 #endif
1761 for(i=0; i+1<length; i+=2){
1762 if(src[i]) continue;
1763 if(i>0 && src[i-1]==0) i--;
1764 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1765 if(src[i+2]!=3){
1766 /* startcode, so we must be past the end */
1767 length=i;
1769 break;
1773 if(i>=length-1){ //no escaped 0
1774 *dst_length= length;
1775 *consumed= length+1; //+1 for the header
1776 return src;
1779 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
1780 dst= h->rbsp_buffer;
1782 //printf("decoding esc\n");
1783 si=di=0;
1784 while(si<length){
1785 //remove escapes (very rare 1:2^22)
1786 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1787 if(src[si+2]==3){ //escape
1788 dst[di++]= 0;
1789 dst[di++]= 0;
1790 si+=3;
1791 continue;
1792 }else //next start code
1793 break;
1796 dst[di++]= src[si++];
1799 *dst_length= di;
1800 *consumed= si + 1;//+1 for the header
1801 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1802 return dst;
1805 #if 0
1807 * @param src the data which should be escaped
1808 * @param dst the target buffer, dst+1 == src is allowed as a special case
1809 * @param length the length of the src data
1810 * @param dst_length the length of the dst array
1811 * @returns length of escaped data in bytes or -1 if an error occured
1813 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1814 int i, escape_count, si, di;
1815 uint8_t *temp;
1817 assert(length>=0);
1818 assert(dst_length>0);
1820 dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1822 if(length==0) return 1;
1824 escape_count= 0;
1825 for(i=0; i<length; i+=2){
1826 if(src[i]) continue;
1827 if(i>0 && src[i-1]==0)
1828 i--;
1829 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1830 escape_count++;
1831 i+=2;
1835 if(escape_count==0){
1836 if(dst+1 != src)
1837 memcpy(dst+1, src, length);
1838 return length + 1;
1841 if(length + escape_count + 1> dst_length)
1842 return -1;
1844 //this should be damn rare (hopefully)
1846 h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1847 temp= h->rbsp_buffer;
1848 //printf("encoding esc\n");
1850 si= 0;
1851 di= 0;
1852 while(si < length){
1853 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1854 temp[di++]= 0; si++;
1855 temp[di++]= 0; si++;
1856 temp[di++]= 3;
1857 temp[di++]= src[si++];
1859 else
1860 temp[di++]= src[si++];
1862 memcpy(dst+1, temp, length+escape_count);
1864 assert(di == length+escape_count);
1866 return di + 1;
1870 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1872 static void encode_rbsp_trailing(PutBitContext *pb){
1873 int length;
1874 put_bits(pb, 1, 1);
1875 length= (-put_bits_count(pb))&7;
1876 if(length) put_bits(pb, length, 0);
1878 #endif
1881 * identifies the exact end of the bitstream
1882 * @return the length of the trailing, or 0 if damaged
1884 static int decode_rbsp_trailing(uint8_t *src){
1885 int v= *src;
1886 int r;
1888 tprintf("rbsp trailing %X\n", v);
1890 for(r=1; r<9; r++){
1891 if(v&1) return r;
1892 v>>=1;
1894 return 0;
1898 * idct tranforms the 16 dc values and dequantize them.
1899 * @param qp quantization parameter
1901 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1902 #define stride 16
1903 int i;
1904 int temp[16]; //FIXME check if this is a good idea
1905 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1906 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1908 //memset(block, 64, 2*256);
1909 //return;
1910 for(i=0; i<4; i++){
1911 const int offset= y_offset[i];
1912 const int z0= block[offset+stride*0] + block[offset+stride*4];
1913 const int z1= block[offset+stride*0] - block[offset+stride*4];
1914 const int z2= block[offset+stride*1] - block[offset+stride*5];
1915 const int z3= block[offset+stride*1] + block[offset+stride*5];
1917 temp[4*i+0]= z0+z3;
1918 temp[4*i+1]= z1+z2;
1919 temp[4*i+2]= z1-z2;
1920 temp[4*i+3]= z0-z3;
1923 for(i=0; i<4; i++){
1924 const int offset= x_offset[i];
1925 const int z0= temp[4*0+i] + temp[4*2+i];
1926 const int z1= temp[4*0+i] - temp[4*2+i];
1927 const int z2= temp[4*1+i] - temp[4*3+i];
1928 const int z3= temp[4*1+i] + temp[4*3+i];
1930 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1931 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1932 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1933 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1937 #if 0
1939 * dct tranforms the 16 dc values.
1940 * @param qp quantization parameter ??? FIXME
1942 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1943 // const int qmul= dequant_coeff[qp][0];
1944 int i;
1945 int temp[16]; //FIXME check if this is a good idea
1946 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1947 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1949 for(i=0; i<4; i++){
1950 const int offset= y_offset[i];
1951 const int z0= block[offset+stride*0] + block[offset+stride*4];
1952 const int z1= block[offset+stride*0] - block[offset+stride*4];
1953 const int z2= block[offset+stride*1] - block[offset+stride*5];
1954 const int z3= block[offset+stride*1] + block[offset+stride*5];
1956 temp[4*i+0]= z0+z3;
1957 temp[4*i+1]= z1+z2;
1958 temp[4*i+2]= z1-z2;
1959 temp[4*i+3]= z0-z3;
1962 for(i=0; i<4; i++){
1963 const int offset= x_offset[i];
1964 const int z0= temp[4*0+i] + temp[4*2+i];
1965 const int z1= temp[4*0+i] - temp[4*2+i];
1966 const int z2= temp[4*1+i] - temp[4*3+i];
1967 const int z3= temp[4*1+i] + temp[4*3+i];
1969 block[stride*0 +offset]= (z0 + z3)>>1;
1970 block[stride*2 +offset]= (z1 + z2)>>1;
1971 block[stride*8 +offset]= (z1 - z2)>>1;
1972 block[stride*10+offset]= (z0 - z3)>>1;
1975 #endif
1977 #undef xStride
1978 #undef stride
1980 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1981 const int stride= 16*2;
1982 const int xStride= 16;
1983 int a,b,c,d,e;
1985 a= block[stride*0 + xStride*0];
1986 b= block[stride*0 + xStride*1];
1987 c= block[stride*1 + xStride*0];
1988 d= block[stride*1 + xStride*1];
1990 e= a-b;
1991 a= a+b;
1992 b= c-d;
1993 c= c+d;
1995 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1996 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1997 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1998 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
2001 #if 0
2002 static void chroma_dc_dct_c(DCTELEM *block){
2003 const int stride= 16*2;
2004 const int xStride= 16;
2005 int a,b,c,d,e;
2007 a= block[stride*0 + xStride*0];
2008 b= block[stride*0 + xStride*1];
2009 c= block[stride*1 + xStride*0];
2010 d= block[stride*1 + xStride*1];
2012 e= a-b;
2013 a= a+b;
2014 b= c-d;
2015 c= c+d;
2017 block[stride*0 + xStride*0]= (a+c);
2018 block[stride*0 + xStride*1]= (e+b);
2019 block[stride*1 + xStride*0]= (a-c);
2020 block[stride*1 + xStride*1]= (e-b);
2022 #endif
2025 * gets the chroma qp.
2027 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
2029 return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
2033 #if 0
2034 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
2035 int i;
2036 //FIXME try int temp instead of block
2038 for(i=0; i<4; i++){
2039 const int d0= src1[0 + i*stride] - src2[0 + i*stride];
2040 const int d1= src1[1 + i*stride] - src2[1 + i*stride];
2041 const int d2= src1[2 + i*stride] - src2[2 + i*stride];
2042 const int d3= src1[3 + i*stride] - src2[3 + i*stride];
2043 const int z0= d0 + d3;
2044 const int z3= d0 - d3;
2045 const int z1= d1 + d2;
2046 const int z2= d1 - d2;
2048 block[0 + 4*i]= z0 + z1;
2049 block[1 + 4*i]= 2*z3 + z2;
2050 block[2 + 4*i]= z0 - z1;
2051 block[3 + 4*i]= z3 - 2*z2;
2054 for(i=0; i<4; i++){
2055 const int z0= block[0*4 + i] + block[3*4 + i];
2056 const int z3= block[0*4 + i] - block[3*4 + i];
2057 const int z1= block[1*4 + i] + block[2*4 + i];
2058 const int z2= block[1*4 + i] - block[2*4 + i];
2060 block[0*4 + i]= z0 + z1;
2061 block[1*4 + i]= 2*z3 + z2;
2062 block[2*4 + i]= z0 - z1;
2063 block[3*4 + i]= z3 - 2*z2;
2066 #endif
2068 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
2069 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
2070 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
2071 int i;
2072 const int * const quant_table= quant_coeff[qscale];
2073 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
2074 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
2075 const unsigned int threshold2= (threshold1<<1);
2076 int last_non_zero;
2078 if(seperate_dc){
2079 if(qscale<=18){
2080 //avoid overflows
2081 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
2082 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
2083 const unsigned int dc_threshold2= (dc_threshold1<<1);
2085 int level= block[0]*quant_coeff[qscale+18][0];
2086 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2087 if(level>0){
2088 level= (dc_bias + level)>>(QUANT_SHIFT-2);
2089 block[0]= level;
2090 }else{
2091 level= (dc_bias - level)>>(QUANT_SHIFT-2);
2092 block[0]= -level;
2094 // last_non_zero = i;
2095 }else{
2096 block[0]=0;
2098 }else{
2099 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
2100 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
2101 const unsigned int dc_threshold2= (dc_threshold1<<1);
2103 int level= block[0]*quant_table[0];
2104 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
2105 if(level>0){
2106 level= (dc_bias + level)>>(QUANT_SHIFT+1);
2107 block[0]= level;
2108 }else{
2109 level= (dc_bias - level)>>(QUANT_SHIFT+1);
2110 block[0]= -level;
2112 // last_non_zero = i;
2113 }else{
2114 block[0]=0;
2117 last_non_zero= 0;
2118 i=1;
2119 }else{
2120 last_non_zero= -1;
2121 i=0;
2124 for(; i<16; i++){
2125 const int j= scantable[i];
2126 int level= block[j]*quant_table[j];
2128 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2129 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2130 if(((unsigned)(level+threshold1))>threshold2){
2131 if(level>0){
2132 level= (bias + level)>>QUANT_SHIFT;
2133 block[j]= level;
2134 }else{
2135 level= (bias - level)>>QUANT_SHIFT;
2136 block[j]= -level;
2138 last_non_zero = i;
2139 }else{
2140 block[j]=0;
2144 return last_non_zero;
2147 static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
2148 const uint32_t a= ((uint32_t*)(src-stride))[0];
2149 ((uint32_t*)(src+0*stride))[0]= a;
2150 ((uint32_t*)(src+1*stride))[0]= a;
2151 ((uint32_t*)(src+2*stride))[0]= a;
2152 ((uint32_t*)(src+3*stride))[0]= a;
2155 static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
2156 ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
2157 ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
2158 ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
2159 ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
2162 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
2163 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
2164 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
2166 ((uint32_t*)(src+0*stride))[0]=
2167 ((uint32_t*)(src+1*stride))[0]=
2168 ((uint32_t*)(src+2*stride))[0]=
2169 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2172 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
2173 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
2175 ((uint32_t*)(src+0*stride))[0]=
2176 ((uint32_t*)(src+1*stride))[0]=
2177 ((uint32_t*)(src+2*stride))[0]=
2178 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2181 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
2182 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
2184 ((uint32_t*)(src+0*stride))[0]=
2185 ((uint32_t*)(src+1*stride))[0]=
2186 ((uint32_t*)(src+2*stride))[0]=
2187 ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
2190 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
2191 ((uint32_t*)(src+0*stride))[0]=
2192 ((uint32_t*)(src+1*stride))[0]=
2193 ((uint32_t*)(src+2*stride))[0]=
2194 ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
2198 #define LOAD_TOP_RIGHT_EDGE\
2199 const int t4= topright[0];\
2200 const int t5= topright[1];\
2201 const int t6= topright[2];\
2202 const int t7= topright[3];\
2204 #define LOAD_LEFT_EDGE\
2205 const int l0= src[-1+0*stride];\
2206 const int l1= src[-1+1*stride];\
2207 const int l2= src[-1+2*stride];\
2208 const int l3= src[-1+3*stride];\
2210 #define LOAD_TOP_EDGE\
2211 const int t0= src[ 0-1*stride];\
2212 const int t1= src[ 1-1*stride];\
2213 const int t2= src[ 2-1*stride];\
2214 const int t3= src[ 3-1*stride];\
2216 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2217 const int lt= src[-1-1*stride];
2218 LOAD_TOP_EDGE
2219 LOAD_LEFT_EDGE
2221 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
2222 src[0+2*stride]=
2223 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
2224 src[0+1*stride]=
2225 src[1+2*stride]=
2226 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
2227 src[0+0*stride]=
2228 src[1+1*stride]=
2229 src[2+2*stride]=
2230 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2231 src[1+0*stride]=
2232 src[2+1*stride]=
2233 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
2234 src[2+0*stride]=
2235 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2236 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2239 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
2240 LOAD_TOP_EDGE
2241 LOAD_TOP_RIGHT_EDGE
2242 // LOAD_LEFT_EDGE
2244 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
2245 src[1+0*stride]=
2246 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
2247 src[2+0*stride]=
2248 src[1+1*stride]=
2249 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
2250 src[3+0*stride]=
2251 src[2+1*stride]=
2252 src[1+2*stride]=
2253 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
2254 src[3+1*stride]=
2255 src[2+2*stride]=
2256 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
2257 src[3+2*stride]=
2258 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
2259 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
2262 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
2263 const int lt= src[-1-1*stride];
2264 LOAD_TOP_EDGE
2265 LOAD_LEFT_EDGE
2266 const __attribute__((unused)) int unu= l3;
2268 src[0+0*stride]=
2269 src[1+2*stride]=(lt + t0 + 1)>>1;
2270 src[1+0*stride]=
2271 src[2+2*stride]=(t0 + t1 + 1)>>1;
2272 src[2+0*stride]=
2273 src[3+2*stride]=(t1 + t2 + 1)>>1;
2274 src[3+0*stride]=(t2 + t3 + 1)>>1;
2275 src[0+1*stride]=
2276 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
2277 src[1+1*stride]=
2278 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
2279 src[2+1*stride]=
2280 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2281 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2282 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2283 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2286 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
2287 LOAD_TOP_EDGE
2288 LOAD_TOP_RIGHT_EDGE
2289 const __attribute__((unused)) int unu= t7;
2291 src[0+0*stride]=(t0 + t1 + 1)>>1;
2292 src[1+0*stride]=
2293 src[0+2*stride]=(t1 + t2 + 1)>>1;
2294 src[2+0*stride]=
2295 src[1+2*stride]=(t2 + t3 + 1)>>1;
2296 src[3+0*stride]=
2297 src[2+2*stride]=(t3 + t4+ 1)>>1;
2298 src[3+2*stride]=(t4 + t5+ 1)>>1;
2299 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2300 src[1+1*stride]=
2301 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
2302 src[2+1*stride]=
2303 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
2304 src[3+1*stride]=
2305 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
2306 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
2309 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
2310 LOAD_LEFT_EDGE
2312 src[0+0*stride]=(l0 + l1 + 1)>>1;
2313 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2314 src[2+0*stride]=
2315 src[0+1*stride]=(l1 + l2 + 1)>>1;
2316 src[3+0*stride]=
2317 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2318 src[2+1*stride]=
2319 src[0+2*stride]=(l2 + l3 + 1)>>1;
2320 src[3+1*stride]=
2321 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
2322 src[3+2*stride]=
2323 src[1+3*stride]=
2324 src[0+3*stride]=
2325 src[2+2*stride]=
2326 src[2+3*stride]=
2327 src[3+3*stride]=l3;
2330 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
2331 const int lt= src[-1-1*stride];
2332 LOAD_TOP_EDGE
2333 LOAD_LEFT_EDGE
2334 const __attribute__((unused)) int unu= t3;
2336 src[0+0*stride]=
2337 src[2+1*stride]=(lt + l0 + 1)>>1;
2338 src[1+0*stride]=
2339 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
2340 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
2341 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
2342 src[0+1*stride]=
2343 src[2+2*stride]=(l0 + l1 + 1)>>1;
2344 src[1+1*stride]=
2345 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
2346 src[0+2*stride]=
2347 src[2+3*stride]=(l1 + l2+ 1)>>1;
2348 src[1+2*stride]=
2349 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
2350 src[0+3*stride]=(l2 + l3 + 1)>>1;
2351 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
2354 static void pred16x16_vertical_c(uint8_t *src, int stride){
2355 int i;
2356 const uint32_t a= ((uint32_t*)(src-stride))[0];
2357 const uint32_t b= ((uint32_t*)(src-stride))[1];
2358 const uint32_t c= ((uint32_t*)(src-stride))[2];
2359 const uint32_t d= ((uint32_t*)(src-stride))[3];
2361 for(i=0; i<16; i++){
2362 ((uint32_t*)(src+i*stride))[0]= a;
2363 ((uint32_t*)(src+i*stride))[1]= b;
2364 ((uint32_t*)(src+i*stride))[2]= c;
2365 ((uint32_t*)(src+i*stride))[3]= d;
2369 static void pred16x16_horizontal_c(uint8_t *src, int stride){
2370 int i;
2372 for(i=0; i<16; i++){
2373 ((uint32_t*)(src+i*stride))[0]=
2374 ((uint32_t*)(src+i*stride))[1]=
2375 ((uint32_t*)(src+i*stride))[2]=
2376 ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
2380 static void pred16x16_dc_c(uint8_t *src, int stride){
2381 int i, dc=0;
2383 for(i=0;i<16; i++){
2384 dc+= src[-1+i*stride];
2387 for(i=0;i<16; i++){
2388 dc+= src[i-stride];
2391 dc= 0x01010101*((dc + 16)>>5);
2393 for(i=0; i<16; i++){
2394 ((uint32_t*)(src+i*stride))[0]=
2395 ((uint32_t*)(src+i*stride))[1]=
2396 ((uint32_t*)(src+i*stride))[2]=
2397 ((uint32_t*)(src+i*stride))[3]= dc;
2401 static void pred16x16_left_dc_c(uint8_t *src, int stride){
2402 int i, dc=0;
2404 for(i=0;i<16; i++){
2405 dc+= src[-1+i*stride];
2408 dc= 0x01010101*((dc + 8)>>4);
2410 for(i=0; i<16; i++){
2411 ((uint32_t*)(src+i*stride))[0]=
2412 ((uint32_t*)(src+i*stride))[1]=
2413 ((uint32_t*)(src+i*stride))[2]=
2414 ((uint32_t*)(src+i*stride))[3]= dc;
2418 static void pred16x16_top_dc_c(uint8_t *src, int stride){
2419 int i, dc=0;
2421 for(i=0;i<16; i++){
2422 dc+= src[i-stride];
2424 dc= 0x01010101*((dc + 8)>>4);
2426 for(i=0; i<16; i++){
2427 ((uint32_t*)(src+i*stride))[0]=
2428 ((uint32_t*)(src+i*stride))[1]=
2429 ((uint32_t*)(src+i*stride))[2]=
2430 ((uint32_t*)(src+i*stride))[3]= dc;
2434 static void pred16x16_128_dc_c(uint8_t *src, int stride){
2435 int i;
2437 for(i=0; i<16; i++){
2438 ((uint32_t*)(src+i*stride))[0]=
2439 ((uint32_t*)(src+i*stride))[1]=
2440 ((uint32_t*)(src+i*stride))[2]=
2441 ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
2445 static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
2446 int i, j, k;
2447 int a;
2448 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2449 const uint8_t * const src0 = src+7-stride;
2450 const uint8_t *src1 = src+8*stride-1;
2451 const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
2452 int H = src0[1] - src0[-1];
2453 int V = src1[0] - src2[ 0];
2454 for(k=2; k<=8; ++k) {
2455 src1 += stride; src2 -= stride;
2456 H += k*(src0[k] - src0[-k]);
2457 V += k*(src1[0] - src2[ 0]);
2459 if(svq3){
2460 H = ( 5*(H/4) ) / 16;
2461 V = ( 5*(V/4) ) / 16;
2463 /* required for 100% accuracy */
2464 i = H; H = V; V = i;
2465 }else{
2466 H = ( 5*H+32 ) >> 6;
2467 V = ( 5*V+32 ) >> 6;
2470 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
2471 for(j=16; j>0; --j) {
2472 int b = a;
2473 a += V;
2474 for(i=-16; i<0; i+=4) {
2475 src[16+i] = cm[ (b ) >> 5 ];
2476 src[17+i] = cm[ (b+ H) >> 5 ];
2477 src[18+i] = cm[ (b+2*H) >> 5 ];
2478 src[19+i] = cm[ (b+3*H) >> 5 ];
2479 b += 4*H;
2481 src += stride;
2485 static void pred16x16_plane_c(uint8_t *src, int stride){
2486 pred16x16_plane_compat_c(src, stride, 0);
2489 static void pred8x8_vertical_c(uint8_t *src, int stride){
2490 int i;
2491 const uint32_t a= ((uint32_t*)(src-stride))[0];
2492 const uint32_t b= ((uint32_t*)(src-stride))[1];
2494 for(i=0; i<8; i++){
2495 ((uint32_t*)(src+i*stride))[0]= a;
2496 ((uint32_t*)(src+i*stride))[1]= b;
2500 static void pred8x8_horizontal_c(uint8_t *src, int stride){
2501 int i;
2503 for(i=0; i<8; i++){
2504 ((uint32_t*)(src+i*stride))[0]=
2505 ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
2509 static void pred8x8_128_dc_c(uint8_t *src, int stride){
2510 int i;
2512 for(i=0; i<8; i++){
2513 ((uint32_t*)(src+i*stride))[0]=
2514 ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
2518 static void pred8x8_left_dc_c(uint8_t *src, int stride){
2519 int i;
2520 int dc0, dc2;
2522 dc0=dc2=0;
2523 for(i=0;i<4; i++){
2524 dc0+= src[-1+i*stride];
2525 dc2+= src[-1+(i+4)*stride];
2527 dc0= 0x01010101*((dc0 + 2)>>2);
2528 dc2= 0x01010101*((dc2 + 2)>>2);
2530 for(i=0; i<4; i++){
2531 ((uint32_t*)(src+i*stride))[0]=
2532 ((uint32_t*)(src+i*stride))[1]= dc0;
2534 for(i=4; i<8; i++){
2535 ((uint32_t*)(src+i*stride))[0]=
2536 ((uint32_t*)(src+i*stride))[1]= dc2;
2540 static void pred8x8_top_dc_c(uint8_t *src, int stride){
2541 int i;
2542 int dc0, dc1;
2544 dc0=dc1=0;
2545 for(i=0;i<4; i++){
2546 dc0+= src[i-stride];
2547 dc1+= src[4+i-stride];
2549 dc0= 0x01010101*((dc0 + 2)>>2);
2550 dc1= 0x01010101*((dc1 + 2)>>2);
2552 for(i=0; i<4; i++){
2553 ((uint32_t*)(src+i*stride))[0]= dc0;
2554 ((uint32_t*)(src+i*stride))[1]= dc1;
2556 for(i=4; i<8; i++){
2557 ((uint32_t*)(src+i*stride))[0]= dc0;
2558 ((uint32_t*)(src+i*stride))[1]= dc1;
2563 static void pred8x8_dc_c(uint8_t *src, int stride){
2564 int i;
2565 int dc0, dc1, dc2, dc3;
2567 dc0=dc1=dc2=0;
2568 for(i=0;i<4; i++){
2569 dc0+= src[-1+i*stride] + src[i-stride];
2570 dc1+= src[4+i-stride];
2571 dc2+= src[-1+(i+4)*stride];
2573 dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
2574 dc0= 0x01010101*((dc0 + 4)>>3);
2575 dc1= 0x01010101*((dc1 + 2)>>2);
2576 dc2= 0x01010101*((dc2 + 2)>>2);
2578 for(i=0; i<4; i++){
2579 ((uint32_t*)(src+i*stride))[0]= dc0;
2580 ((uint32_t*)(src+i*stride))[1]= dc1;
2582 for(i=4; i<8; i++){
2583 ((uint32_t*)(src+i*stride))[0]= dc2;
2584 ((uint32_t*)(src+i*stride))[1]= dc3;
2588 static void pred8x8_plane_c(uint8_t *src, int stride){
2589 int j, k;
2590 int a;
2591 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2592 const uint8_t * const src0 = src+3-stride;
2593 const uint8_t *src1 = src+4*stride-1;
2594 const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
2595 int H = src0[1] - src0[-1];
2596 int V = src1[0] - src2[ 0];
2597 for(k=2; k<=4; ++k) {
2598 src1 += stride; src2 -= stride;
2599 H += k*(src0[k] - src0[-k]);
2600 V += k*(src1[0] - src2[ 0]);
2602 H = ( 17*H+16 ) >> 5;
2603 V = ( 17*V+16 ) >> 5;
2605 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
2606 for(j=8; j>0; --j) {
2607 int b = a;
2608 a += V;
2609 src[0] = cm[ (b ) >> 5 ];
2610 src[1] = cm[ (b+ H) >> 5 ];
2611 src[2] = cm[ (b+2*H) >> 5 ];
2612 src[3] = cm[ (b+3*H) >> 5 ];
2613 src[4] = cm[ (b+4*H) >> 5 ];
2614 src[5] = cm[ (b+5*H) >> 5 ];
2615 src[6] = cm[ (b+6*H) >> 5 ];
2616 src[7] = cm[ (b+7*H) >> 5 ];
2617 src += stride;
2621 #define SRC(x,y) src[(x)+(y)*stride]
2622 #define PL(y) \
2623 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2624 #define PREDICT_8x8_LOAD_LEFT \
2625 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2626 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2627 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2628 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2630 #define PT(x) \
2631 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2632 #define PREDICT_8x8_LOAD_TOP \
2633 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2634 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2635 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2636 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2637 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2639 #define PTR(x) \
2640 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2641 #define PREDICT_8x8_LOAD_TOPRIGHT \
2642 int t8, t9, t10, t11, t12, t13, t14, t15; \
2643 if(has_topright) { \
2644 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2645 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2646 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2648 #define PREDICT_8x8_LOAD_TOPLEFT \
2649 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2651 #define PREDICT_8x8_DC(v) \
2652 int y; \
2653 for( y = 0; y < 8; y++ ) { \
2654 ((uint32_t*)src)[0] = \
2655 ((uint32_t*)src)[1] = v; \
2656 src += stride; \
2659 static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2661 PREDICT_8x8_DC(0x80808080);
2663 static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2665 PREDICT_8x8_LOAD_LEFT;
2666 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
2667 PREDICT_8x8_DC(dc);
2669 static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2671 PREDICT_8x8_LOAD_TOP;
2672 const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
2673 PREDICT_8x8_DC(dc);
2675 static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2677 PREDICT_8x8_LOAD_LEFT;
2678 PREDICT_8x8_LOAD_TOP;
2679 const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
2680 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
2681 PREDICT_8x8_DC(dc);
2683 static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2685 PREDICT_8x8_LOAD_LEFT;
2686 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2687 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2688 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2689 #undef ROW
2691 static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2693 int y;
2694 PREDICT_8x8_LOAD_TOP;
2695 src[0] = t0;
2696 src[1] = t1;
2697 src[2] = t2;
2698 src[3] = t3;
2699 src[4] = t4;
2700 src[5] = t5;
2701 src[6] = t6;
2702 src[7] = t7;
2703 for( y = 1; y < 8; y++ )
2704 *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
2706 static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2708 PREDICT_8x8_LOAD_TOP;
2709 PREDICT_8x8_LOAD_TOPRIGHT;
2710 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
2711 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
2712 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
2713 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
2714 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
2715 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2716 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
2717 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
2718 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
2719 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
2720 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
2721 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
2722 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
2723 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
2724 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
2726 static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2728 PREDICT_8x8_LOAD_TOP;
2729 PREDICT_8x8_LOAD_LEFT;
2730 PREDICT_8x8_LOAD_TOPLEFT;
2731 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
2732 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2733 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
2734 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2735 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
2736 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2737 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
2738 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
2739 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
2740 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
2741 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
2742 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
2743 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
2744 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2745 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
2748 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2750 PREDICT_8x8_LOAD_TOP;
2751 PREDICT_8x8_LOAD_LEFT;
2752 PREDICT_8x8_LOAD_TOPLEFT;
2753 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
2754 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
2755 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
2756 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
2757 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
2758 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
2759 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
2760 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
2761 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
2762 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
2763 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
2764 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
2765 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
2766 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
2767 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
2768 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
2769 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
2770 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
2771 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
2772 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
2773 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2774 SRC(7,0)= (t6 + t7 + 1) >> 1;
2776 static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2778 PREDICT_8x8_LOAD_TOP;
2779 PREDICT_8x8_LOAD_LEFT;
2780 PREDICT_8x8_LOAD_TOPLEFT;
2781 SRC(0,7)= (l6 + l7 + 1) >> 1;
2782 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
2783 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
2784 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
2785 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
2786 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
2787 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
2788 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
2789 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
2790 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
2791 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
2792 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
2793 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
2794 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
2795 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
2796 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
2797 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
2798 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
2799 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
2800 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
2801 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
2802 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
2804 static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2806 PREDICT_8x8_LOAD_TOP;
2807 PREDICT_8x8_LOAD_TOPRIGHT;
2808 SRC(0,0)= (t0 + t1 + 1) >> 1;
2809 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
2810 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
2811 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
2812 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
2813 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
2814 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
2815 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
2816 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
2817 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
2818 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
2819 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
2820 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
2821 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
2822 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
2823 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
2824 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
2825 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
2826 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
2827 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
2828 SRC(7,6)= (t10 + t11 + 1) >> 1;
2829 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
2831 static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
2833 PREDICT_8x8_LOAD_LEFT;
2834 SRC(0,0)= (l0 + l1 + 1) >> 1;
2835 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
2836 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
2837 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
2838 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
2839 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
2840 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
2841 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
2842 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
2843 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
2844 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
2845 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
2846 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
2847 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
2848 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2849 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2850 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2851 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
2853 #undef PREDICT_8x8_LOAD_LEFT
2854 #undef PREDICT_8x8_LOAD_TOP
2855 #undef PREDICT_8x8_LOAD_TOPLEFT
2856 #undef PREDICT_8x8_LOAD_TOPRIGHT
2857 #undef PREDICT_8x8_DC
2858 #undef PTR
2859 #undef PT
2860 #undef PL
2861 #undef SRC
2863 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
2864 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2865 int src_x_offset, int src_y_offset,
2866 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2867 MpegEncContext * const s = &h->s;
2868 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2869 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2870 const int luma_xy= (mx&3) + ((my&3)<<2);
2871 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2872 uint8_t * src_cb, * src_cr;
2873 int extra_width= h->emu_edge_width;
2874 int extra_height= h->emu_edge_height;
2875 int emu=0;
2876 const int full_mx= mx>>2;
2877 const int full_my= my>>2;
2878 const int pic_width = 16*s->mb_width;
2879 const int pic_height = 16*s->mb_height >> MB_MBAFF;
2881 if(!pic->data[0])
2882 return;
2884 if(mx&7) extra_width -= 3;
2885 if(my&7) extra_height -= 3;
2887 if( full_mx < 0-extra_width
2888 || full_my < 0-extra_height
2889 || full_mx + 16/*FIXME*/ > pic_width + extra_width
2890 || full_my + 16/*FIXME*/ > pic_height + extra_height){
2891 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2892 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2893 emu=1;
2896 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2897 if(!square){
2898 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2901 if(s->flags&CODEC_FLAG_GRAY) return;
2903 if(MB_MBAFF){
2904 // chroma offset when predicting from a field of opposite parity
2905 my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2906 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2908 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2909 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2911 if(emu){
2912 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2913 src_cb= s->edge_emu_buffer;
2915 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2917 if(emu){
2918 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2919 src_cr= s->edge_emu_buffer;
2921 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2924 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
2925 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2926 int x_offset, int y_offset,
2927 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2928 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
2929 int list0, int list1){
2930 MpegEncContext * const s = &h->s;
2931 qpel_mc_func *qpix_op= qpix_put;
2932 h264_chroma_mc_func chroma_op= chroma_put;
2934 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2935 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2936 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2937 x_offset += 8*s->mb_x;
2938 y_offset += 8*(s->mb_y >> MB_MBAFF);
2940 if(list0){
2941 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
2942 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
2943 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2944 qpix_op, chroma_op);
2946 qpix_op= qpix_avg;
2947 chroma_op= chroma_avg;
2950 if(list1){
2951 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
2952 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
2953 dest_y, dest_cb, dest_cr, x_offset, y_offset,
2954 qpix_op, chroma_op);
2958 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
2959 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2960 int x_offset, int y_offset,
2961 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
2962 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
2963 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
2964 int list0, int list1){
2965 MpegEncContext * const s = &h->s;
2967 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
2968 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
2969 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
2970 x_offset += 8*s->mb_x;
2971 y_offset += 8*(s->mb_y >> MB_MBAFF);
2973 if(list0 && list1){
2974 /* don't optimize for luma-only case, since B-frames usually
2975 * use implicit weights => chroma too. */
2976 uint8_t *tmp_cb = s->obmc_scratchpad;
2977 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2978 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2979 int refn0 = h->ref_cache[0][ scan8[n] ];
2980 int refn1 = h->ref_cache[1][ scan8[n] ];
2982 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
2983 dest_y, dest_cb, dest_cr,
2984 x_offset, y_offset, qpix_put, chroma_put);
2985 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
2986 tmp_y, tmp_cb, tmp_cr,
2987 x_offset, y_offset, qpix_put, chroma_put);
2989 if(h->use_weight == 2){
2990 int weight0 = h->implicit_weight[refn0][refn1];
2991 int weight1 = 64 - weight0;
2992 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
2993 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2994 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2995 }else{
2996 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2997 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2998 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2999 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3000 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
3001 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
3002 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3003 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
3004 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
3006 }else{
3007 int list = list1 ? 1 : 0;
3008 int refn = h->ref_cache[list][ scan8[n] ];
3009 Picture *ref= &h->ref_list[list][refn];
3010 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
3011 dest_y, dest_cb, dest_cr, x_offset, y_offset,
3012 qpix_put, chroma_put);
3014 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
3015 h->luma_weight[list][refn], h->luma_offset[list][refn]);
3016 if(h->use_weight_chroma){
3017 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3018 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
3019 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
3020 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
3025 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
3026 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3027 int x_offset, int y_offset,
3028 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
3029 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
3030 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
3031 int list0, int list1){
3032 if((h->use_weight==2 && list0 && list1
3033 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
3034 || h->use_weight==1)
3035 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3036 x_offset, y_offset, qpix_put, chroma_put,
3037 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
3038 else
3039 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
3040 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
3043 static inline void prefetch_motion(H264Context *h, int list){
3044 /* fetch pixels for estimated mv 4 macroblocks ahead
3045 * optimized for 64byte cache lines */
3046 MpegEncContext * const s = &h->s;
3047 const int refn = h->ref_cache[list][scan8[0]];
3048 if(refn >= 0){
3049 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
3050 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
3051 uint8_t **src= h->ref_list[list][refn].data;
3052 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
3053 s->dsp.prefetch(src[0]+off, s->linesize, 4);
3054 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3055 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
3059 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3060 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
3061 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
3062 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
3063 MpegEncContext * const s = &h->s;
3064 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3065 const int mb_type= s->current_picture.mb_type[mb_xy];
3067 assert(IS_INTER(mb_type));
3069 prefetch_motion(h, 0);
3071 if(IS_16X16(mb_type)){
3072 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
3073 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
3074 &weight_op[0], &weight_avg[0],
3075 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3076 }else if(IS_16X8(mb_type)){
3077 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
3078 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3079 &weight_op[1], &weight_avg[1],
3080 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3081 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
3082 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
3083 &weight_op[1], &weight_avg[1],
3084 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3085 }else if(IS_8X16(mb_type)){
3086 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
3087 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3088 &weight_op[2], &weight_avg[2],
3089 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
3090 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
3091 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3092 &weight_op[2], &weight_avg[2],
3093 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
3094 }else{
3095 int i;
3097 assert(IS_8X8(mb_type));
3099 for(i=0; i<4; i++){
3100 const int sub_mb_type= h->sub_mb_type[i];
3101 const int n= 4*i;
3102 int x_offset= (i&1)<<2;
3103 int y_offset= (i&2)<<1;
3105 if(IS_SUB_8X8(sub_mb_type)){
3106 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3107 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
3108 &weight_op[3], &weight_avg[3],
3109 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3110 }else if(IS_SUB_8X4(sub_mb_type)){
3111 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3112 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3113 &weight_op[4], &weight_avg[4],
3114 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3115 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
3116 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
3117 &weight_op[4], &weight_avg[4],
3118 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3119 }else if(IS_SUB_4X8(sub_mb_type)){
3120 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3121 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3122 &weight_op[5], &weight_avg[5],
3123 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3124 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3125 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3126 &weight_op[5], &weight_avg[5],
3127 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3128 }else{
3129 int j;
3130 assert(IS_SUB_4X4(sub_mb_type));
3131 for(j=0; j<4; j++){
3132 int sub_x_offset= x_offset + 2*(j&1);
3133 int sub_y_offset= y_offset + (j&2);
3134 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
3135 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
3136 &weight_op[6], &weight_avg[6],
3137 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
3143 prefetch_motion(h, 1);
3146 static void decode_init_vlc(H264Context *h){
3147 static int done = 0;
3149 if (!done) {
3150 int i;
3151 done = 1;
3153 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
3154 &chroma_dc_coeff_token_len [0], 1, 1,
3155 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
3157 for(i=0; i<4; i++){
3158 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
3159 &coeff_token_len [i][0], 1, 1,
3160 &coeff_token_bits[i][0], 1, 1, 1);
3163 for(i=0; i<3; i++){
3164 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
3165 &chroma_dc_total_zeros_len [i][0], 1, 1,
3166 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
3168 for(i=0; i<15; i++){
3169 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
3170 &total_zeros_len [i][0], 1, 1,
3171 &total_zeros_bits[i][0], 1, 1, 1);
3174 for(i=0; i<6; i++){
3175 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
3176 &run_len [i][0], 1, 1,
3177 &run_bits[i][0], 1, 1, 1);
3179 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
3180 &run_len [6][0], 1, 1,
3181 &run_bits[6][0], 1, 1, 1);
3186 * Sets the intra prediction function pointers.
3188 static void init_pred_ptrs(H264Context *h){
3189 // MpegEncContext * const s = &h->s;
3191 h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
3192 h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
3193 h->pred4x4[DC_PRED ]= pred4x4_dc_c;
3194 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
3195 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
3196 h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
3197 h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
3198 h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
3199 h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
3200 h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
3201 h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
3202 h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
3204 h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
3205 h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
3206 h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
3207 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
3208 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
3209 h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
3210 h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
3211 h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
3212 h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
3213 h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
3214 h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
3215 h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
3217 h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
3218 h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
3219 h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
3220 h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c;
3221 h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
3222 h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
3223 h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
3225 h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
3226 h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
3227 h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
3228 h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
3229 h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
3230 h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
3231 h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
3234 static void free_tables(H264Context *h){
3235 av_freep(&h->intra4x4_pred_mode);
3236 av_freep(&h->chroma_pred_mode_table);
3237 av_freep(&h->cbp_table);
3238 av_freep(&h->mvd_table[0]);
3239 av_freep(&h->mvd_table[1]);
3240 av_freep(&h->direct_table);
3241 av_freep(&h->non_zero_count);
3242 av_freep(&h->slice_table_base);
3243 av_freep(&h->top_borders[1]);
3244 av_freep(&h->top_borders[0]);
3245 h->slice_table= NULL;
3247 av_freep(&h->mb2b_xy);
3248 av_freep(&h->mb2b8_xy);
3250 av_freep(&h->s.obmc_scratchpad);
3253 static void init_dequant8_coeff_table(H264Context *h){
3254 int i,q,x;
3255 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
3256 h->dequant8_coeff[0] = h->dequant8_buffer[0];
3257 h->dequant8_coeff[1] = h->dequant8_buffer[1];
3259 for(i=0; i<2; i++ ){
3260 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
3261 h->dequant8_coeff[1] = h->dequant8_buffer[0];
3262 break;
3265 for(q=0; q<52; q++){
3266 int shift = div6[q];
3267 int idx = rem6[q];
3268 for(x=0; x<64; x++)
3269 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
3270 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
3271 h->pps.scaling_matrix8[i][x]) << shift;
3276 static void init_dequant4_coeff_table(H264Context *h){
3277 int i,j,q,x;
3278 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
3279 for(i=0; i<6; i++ ){
3280 h->dequant4_coeff[i] = h->dequant4_buffer[i];
3281 for(j=0; j<i; j++){
3282 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
3283 h->dequant4_coeff[i] = h->dequant4_buffer[j];
3284 break;
3287 if(j<i)
3288 continue;
3290 for(q=0; q<52; q++){
3291 int shift = div6[q] + 2;
3292 int idx = rem6[q];
3293 for(x=0; x<16; x++)
3294 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
3295 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
3296 h->pps.scaling_matrix4[i][x]) << shift;
3301 static void init_dequant_tables(H264Context *h){
3302 int i,x;
3303 init_dequant4_coeff_table(h);
3304 if(h->pps.transform_8x8_mode)
3305 init_dequant8_coeff_table(h);
3306 if(h->sps.transform_bypass){
3307 for(i=0; i<6; i++)
3308 for(x=0; x<16; x++)
3309 h->dequant4_coeff[i][0][x] = 1<<6;
3310 if(h->pps.transform_8x8_mode)
3311 for(i=0; i<2; i++)
3312 for(x=0; x<64; x++)
3313 h->dequant8_coeff[i][0][x] = 1<<6;
3319 * allocates tables.
3320 * needs width/height
3322 static int alloc_tables(H264Context *h){
3323 MpegEncContext * const s = &h->s;
3324 const int big_mb_num= s->mb_stride * (s->mb_height+1);
3325 int x,y;
3327 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
3329 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
3330 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3331 CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3332 CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
3333 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
3335 if( h->pps.cabac ) {
3336 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
3337 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
3338 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
3339 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3342 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
3343 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3345 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
3346 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
3347 for(y=0; y<s->mb_height; y++){
3348 for(x=0; x<s->mb_width; x++){
3349 const int mb_xy= x + y*s->mb_stride;
3350 const int b_xy = 4*x + 4*y*h->b_stride;
3351 const int b8_xy= 2*x + 2*y*h->b8_stride;
3353 h->mb2b_xy [mb_xy]= b_xy;
3354 h->mb2b8_xy[mb_xy]= b8_xy;
3358 s->obmc_scratchpad = NULL;
3360 if(!h->dequant4_coeff[0])
3361 init_dequant_tables(h);
3363 return 0;
3364 fail:
3365 free_tables(h);
3366 return -1;
3369 static void common_init(H264Context *h){
3370 MpegEncContext * const s = &h->s;
3372 s->width = s->avctx->width;
3373 s->height = s->avctx->height;
3374 s->codec_id= s->avctx->codec->id;
3376 init_pred_ptrs(h);
3378 h->dequant_coeff_pps= -1;
3379 s->unrestricted_mv=1;
3380 s->decode=1; //FIXME
3382 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
3383 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
3386 static int decode_init(AVCodecContext *avctx){
3387 H264Context *h= avctx->priv_data;
3388 MpegEncContext * const s = &h->s;
3390 MPV_decode_defaults(s);
3392 s->avctx = avctx;
3393 common_init(h);
3395 s->out_format = FMT_H264;
3396 s->workaround_bugs= avctx->workaround_bugs;
3398 // set defaults
3399 // s->decode_mb= ff_h263_decode_mb;
3400 s->low_delay= 1;
3401 avctx->pix_fmt= PIX_FMT_YUV420P;
3403 decode_init_vlc(h);
3405 if(avctx->extradata_size > 0 && avctx->extradata &&
3406 *(char *)avctx->extradata == 1){
3407 h->is_avc = 1;
3408 h->got_avcC = 0;
3409 } else {
3410 h->is_avc = 0;
3413 return 0;
3416 static int frame_start(H264Context *h){
3417 MpegEncContext * const s = &h->s;
3418 int i;
3420 if(MPV_frame_start(s, s->avctx) < 0)
3421 return -1;
3422 ff_er_frame_start(s);
3424 assert(s->linesize && s->uvlinesize);
3426 for(i=0; i<16; i++){
3427 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
3428 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
3430 for(i=0; i<4; i++){
3431 h->block_offset[16+i]=
3432 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3433 h->block_offset[24+16+i]=
3434 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
3437 /* can't be in alloc_tables because linesize isn't known there.
3438 * FIXME: redo bipred weight to not require extra buffer? */
3439 if(!s->obmc_scratchpad)
3440 s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3442 /* some macroblocks will be accessed before they're available */
3443 if(FRAME_MBAFF)
3444 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3446 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3447 return 0;
3450 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3451 MpegEncContext * const s = &h->s;
3452 int i;
3454 src_y -= linesize;
3455 src_cb -= uvlinesize;
3456 src_cr -= uvlinesize;
3458 // There are two lines saved, the line above the the top macroblock of a pair,
3459 // and the line above the bottom macroblock
3460 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3461 for(i=1; i<17; i++){
3462 h->left_border[i]= src_y[15+i* linesize];
3465 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
3466 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
3468 if(!(s->flags&CODEC_FLAG_GRAY)){
3469 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
3470 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
3471 for(i=1; i<9; i++){
3472 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
3473 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
3475 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
3476 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
3480 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3481 MpegEncContext * const s = &h->s;
3482 int temp8, i;
3483 uint64_t temp64;
3484 int deblock_left = (s->mb_x > 0);
3485 int deblock_top = (s->mb_y > 0);
3487 src_y -= linesize + 1;
3488 src_cb -= uvlinesize + 1;
3489 src_cr -= uvlinesize + 1;
3491 #define XCHG(a,b,t,xchg)\
3492 t= a;\
3493 if(xchg)\
3494 a= b;\
3495 b= t;
3497 if(deblock_left){
3498 for(i = !deblock_top; i<17; i++){
3499 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3503 if(deblock_top){
3504 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3505 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3506 if(s->mb_x+1 < s->mb_width){
3507 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3511 if(!(s->flags&CODEC_FLAG_GRAY)){
3512 if(deblock_left){
3513 for(i = !deblock_top; i<9; i++){
3514 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
3515 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
3518 if(deblock_top){
3519 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3520 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3525 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
3526 MpegEncContext * const s = &h->s;
3527 int i;
3529 src_y -= 2 * linesize;
3530 src_cb -= 2 * uvlinesize;
3531 src_cr -= 2 * uvlinesize;
3533 // There are two lines saved, the line above the the top macroblock of a pair,
3534 // and the line above the bottom macroblock
3535 h->left_border[0]= h->top_borders[0][s->mb_x][15];
3536 h->left_border[1]= h->top_borders[1][s->mb_x][15];
3537 for(i=2; i<34; i++){
3538 h->left_border[i]= src_y[15+i* linesize];
3541 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
3542 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
3543 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
3544 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
3546 if(!(s->flags&CODEC_FLAG_GRAY)){
3547 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
3548 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
3549 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
3550 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
3551 for(i=2; i<18; i++){
3552 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
3553 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
3555 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
3556 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
3557 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
3558 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
3562 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
3563 MpegEncContext * const s = &h->s;
3564 int temp8, i;
3565 uint64_t temp64;
3566 int deblock_left = (s->mb_x > 0);
3567 int deblock_top = (s->mb_y > 1);
3569 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3571 src_y -= 2 * linesize + 1;
3572 src_cb -= 2 * uvlinesize + 1;
3573 src_cr -= 2 * uvlinesize + 1;
3575 #define XCHG(a,b,t,xchg)\
3576 t= a;\
3577 if(xchg)\
3578 a= b;\
3579 b= t;
3581 if(deblock_left){
3582 for(i = (!deblock_top)<<1; i<34; i++){
3583 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
3587 if(deblock_top){
3588 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
3589 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3590 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3591 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3592 if(s->mb_x+1 < s->mb_width){
3593 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3594 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3598 if(!(s->flags&CODEC_FLAG_GRAY)){
3599 if(deblock_left){
3600 for(i = (!deblock_top) << 1; i<18; i++){
3601 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
3602 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
3605 if(deblock_top){
3606 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
3607 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
3608 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
3609 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
3614 static void hl_decode_mb(H264Context *h){
3615 MpegEncContext * const s = &h->s;
3616 const int mb_x= s->mb_x;
3617 const int mb_y= s->mb_y;
3618 const int mb_xy= mb_x + mb_y*s->mb_stride;
3619 const int mb_type= s->current_picture.mb_type[mb_xy];
3620 uint8_t *dest_y, *dest_cb, *dest_cr;
3621 int linesize, uvlinesize /*dct_offset*/;
3622 int i;
3623 int *block_offset = &h->block_offset[0];
3624 const unsigned int bottom = mb_y & 1;
3625 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass);
3626 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
3627 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
3629 if(!s->decode)
3630 return;
3632 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3633 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3634 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3636 if (MB_FIELD) {
3637 linesize = h->mb_linesize = s->linesize * 2;
3638 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3639 block_offset = &h->block_offset[24];
3640 if(mb_y&1){ //FIXME move out of this func?
3641 dest_y -= s->linesize*15;
3642 dest_cb-= s->uvlinesize*7;
3643 dest_cr-= s->uvlinesize*7;
3645 if(FRAME_MBAFF) {
3646 int list;
3647 for(list=0; list<2; list++){
3648 if(!USES_LIST(mb_type, list))
3649 continue;
3650 if(IS_16X16(mb_type)){
3651 int8_t *ref = &h->ref_cache[list][scan8[0]];
3652 fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3653 }else{
3654 for(i=0; i<16; i+=4){
3655 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3656 int ref = h->ref_cache[list][scan8[i]];
3657 if(ref >= 0)
3658 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3663 } else {
3664 linesize = h->mb_linesize = s->linesize;
3665 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3666 // dct_offset = s->linesize * 16;
3669 if(transform_bypass){
3670 idct_dc_add =
3671 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
3672 }else if(IS_8x8DCT(mb_type)){
3673 idct_dc_add = s->dsp.h264_idct8_dc_add;
3674 idct_add = s->dsp.h264_idct8_add;
3675 }else{
3676 idct_dc_add = s->dsp.h264_idct_dc_add;
3677 idct_add = s->dsp.h264_idct_add;
3680 if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3681 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3682 int mbt_y = mb_y&~1;
3683 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
3684 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3685 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3686 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3689 if (IS_INTRA_PCM(mb_type)) {
3690 unsigned int x, y;
3692 // The pixels are stored in h->mb array in the same order as levels,
3693 // copy them in output in the correct order.
3694 for(i=0; i<16; i++) {
3695 for (y=0; y<4; y++) {
3696 for (x=0; x<4; x++) {
3697 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
3701 for(i=16; i<16+4; i++) {
3702 for (y=0; y<4; y++) {
3703 for (x=0; x<4; x++) {
3704 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3708 for(i=20; i<20+4; i++) {
3709 for (y=0; y<4; y++) {
3710 for (x=0; x<4; x++) {
3711 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
3715 } else {
3716 if(IS_INTRA(mb_type)){
3717 if(h->deblocking_filter && !FRAME_MBAFF)
3718 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3720 if(!(s->flags&CODEC_FLAG_GRAY)){
3721 h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
3722 h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
3725 if(IS_INTRA4x4(mb_type)){
3726 if(!s->encoding){
3727 if(IS_8x8DCT(mb_type)){
3728 for(i=0; i<16; i+=4){
3729 uint8_t * const ptr= dest_y + block_offset[i];
3730 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3731 const int nnz = h->non_zero_count_cache[ scan8[i] ];
3732 h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
3733 (h->topright_samples_available<<(i+1))&0x8000, linesize);
3734 if(nnz){
3735 if(nnz == 1 && h->mb[i*16])
3736 idct_dc_add(ptr, h->mb + i*16, linesize);
3737 else
3738 idct_add(ptr, h->mb + i*16, linesize);
3741 }else
3742 for(i=0; i<16; i++){
3743 uint8_t * const ptr= dest_y + block_offset[i];
3744 uint8_t *topright;
3745 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
3746 int nnz, tr;
3748 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
3749 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
3750 assert(mb_y || linesize <= block_offset[i]);
3751 if(!topright_avail){
3752 tr= ptr[3 - linesize]*0x01010101;
3753 topright= (uint8_t*) &tr;
3754 }else
3755 topright= ptr + 4 - linesize;
3756 }else
3757 topright= NULL;
3759 h->pred4x4[ dir ](ptr, topright, linesize);
3760 nnz = h->non_zero_count_cache[ scan8[i] ];
3761 if(nnz){
3762 if(s->codec_id == CODEC_ID_H264){
3763 if(nnz == 1 && h->mb[i*16])
3764 idct_dc_add(ptr, h->mb + i*16, linesize);
3765 else
3766 idct_add(ptr, h->mb + i*16, linesize);
3767 }else
3768 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
3772 }else{
3773 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
3774 if(s->codec_id == CODEC_ID_H264){
3775 if(!transform_bypass)
3776 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
3777 }else
3778 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3780 if(h->deblocking_filter && !FRAME_MBAFF)
3781 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3782 }else if(s->codec_id == CODEC_ID_H264){
3783 hl_motion(h, dest_y, dest_cb, dest_cr,
3784 s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
3785 s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
3786 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
3790 if(!IS_INTRA4x4(mb_type)){
3791 if(s->codec_id == CODEC_ID_H264){
3792 if(IS_INTRA16x16(mb_type)){
3793 for(i=0; i<16; i++){
3794 if(h->non_zero_count_cache[ scan8[i] ])
3795 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3796 else if(h->mb[i*16])
3797 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3799 }else{
3800 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
3801 for(i=0; i<16; i+=di){
3802 int nnz = h->non_zero_count_cache[ scan8[i] ];
3803 if(nnz){
3804 if(nnz==1 && h->mb[i*16])
3805 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3806 else
3807 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
3811 }else{
3812 for(i=0; i<16; i++){
3813 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
3814 uint8_t * const ptr= dest_y + block_offset[i];
3815 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
3821 if(!(s->flags&CODEC_FLAG_GRAY)){
3822 uint8_t *dest[2] = {dest_cb, dest_cr};
3823 if(transform_bypass){
3824 idct_add = idct_dc_add = s->dsp.add_pixels4;
3825 }else{
3826 idct_add = s->dsp.h264_idct_add;
3827 idct_dc_add = s->dsp.h264_idct_dc_add;
3828 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
3829 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
3831 if(s->codec_id == CODEC_ID_H264){
3832 for(i=16; i<16+8; i++){
3833 if(h->non_zero_count_cache[ scan8[i] ])
3834 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3835 else if(h->mb[i*16])
3836 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
3838 }else{
3839 for(i=16; i<16+8; i++){
3840 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
3841 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
3842 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
3848 if(h->deblocking_filter) {
3849 if (FRAME_MBAFF) {
3850 //FIXME try deblocking one mb at a time?
3851 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3852 const int mb_y = s->mb_y - 1;
3853 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3854 const int mb_xy= mb_x + mb_y*s->mb_stride;
3855 const int mb_type_top = s->current_picture.mb_type[mb_xy];
3856 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3857 if (!bottom) return;
3858 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
3859 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3860 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3862 if(IS_INTRA(mb_type_top | mb_type_bottom))
3863 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3865 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3866 // deblock a pair
3867 // top
3868 s->mb_y--;
3869 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3870 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3871 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3872 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3873 // bottom
3874 s->mb_y++;
3875 tprintf("call mbaff filter_mb\n");
3876 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3877 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3878 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3879 } else {
3880 tprintf("call filter_mb\n");
3881 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3882 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
3883 filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3889 * fills the default_ref_list.
3891 static int fill_default_ref_list(H264Context *h){
3892 MpegEncContext * const s = &h->s;
3893 int i;
3894 int smallest_poc_greater_than_current = -1;
3895 Picture sorted_short_ref[32];
3897 if(h->slice_type==B_TYPE){
3898 int out_i;
3899 int limit= INT_MIN;
3901 /* sort frame according to poc in B slice */
3902 for(out_i=0; out_i<h->short_ref_count; out_i++){
3903 int best_i=INT_MIN;
3904 int best_poc=INT_MAX;
3906 for(i=0; i<h->short_ref_count; i++){
3907 const int poc= h->short_ref[i]->poc;
3908 if(poc > limit && poc < best_poc){
3909 best_poc= poc;
3910 best_i= i;
3914 assert(best_i != INT_MIN);
3916 limit= best_poc;
3917 sorted_short_ref[out_i]= *h->short_ref[best_i];
3918 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
3919 if (-1 == smallest_poc_greater_than_current) {
3920 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
3921 smallest_poc_greater_than_current = out_i;
3927 if(s->picture_structure == PICT_FRAME){
3928 if(h->slice_type==B_TYPE){
3929 int list;
3930 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
3932 // find the largest poc
3933 for(list=0; list<2; list++){
3934 int index = 0;
3935 int j= -99;
3936 int step= list ? -1 : 1;
3938 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
3939 while(j<0 || j>= h->short_ref_count){
3940 if(j != -99 && step == (list ? -1 : 1))
3941 return -1;
3942 step = -step;
3943 j= smallest_poc_greater_than_current + (step>>1);
3945 if(sorted_short_ref[j].reference != 3) continue;
3946 h->default_ref_list[list][index ]= sorted_short_ref[j];
3947 h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
3950 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
3951 if(h->long_ref[i] == NULL) continue;
3952 if(h->long_ref[i]->reference != 3) continue;
3954 h->default_ref_list[ list ][index ]= *h->long_ref[i];
3955 h->default_ref_list[ list ][index++].pic_id= i;;
3958 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
3959 // swap the two first elements of L1 when
3960 // L0 and L1 are identical
3961 Picture temp= h->default_ref_list[1][0];
3962 h->default_ref_list[1][0] = h->default_ref_list[1][1];
3963 h->default_ref_list[1][1] = temp;
3966 if(index < h->ref_count[ list ])
3967 memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
3969 }else{
3970 int index=0;
3971 for(i=0; i<h->short_ref_count; i++){
3972 if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
3973 h->default_ref_list[0][index ]= *h->short_ref[i];
3974 h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
3976 for(i = 0; i < 16; i++){
3977 if(h->long_ref[i] == NULL) continue;
3978 if(h->long_ref[i]->reference != 3) continue;
3979 h->default_ref_list[0][index ]= *h->long_ref[i];
3980 h->default_ref_list[0][index++].pic_id= i;;
3982 if(index < h->ref_count[0])
3983 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
3985 }else{ //FIELD
3986 if(h->slice_type==B_TYPE){
3987 }else{
3988 //FIXME second field balh
3991 #ifdef TRACE
3992 for (i=0; i<h->ref_count[0]; i++) {
3993 tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
3995 if(h->slice_type==B_TYPE){
3996 for (i=0; i<h->ref_count[1]; i++) {
3997 tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
4000 #endif
4001 return 0;
4004 static void print_short_term(H264Context *h);
4005 static void print_long_term(H264Context *h);
4007 static int decode_ref_pic_list_reordering(H264Context *h){
4008 MpegEncContext * const s = &h->s;
4009 int list, index;
4011 print_short_term(h);
4012 print_long_term(h);
4013 if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
4015 for(list=0; list<2; list++){
4016 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
4018 if(get_bits1(&s->gb)){
4019 int pred= h->curr_pic_num;
4021 for(index=0; ; index++){
4022 int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
4023 int pic_id;
4024 int i;
4025 Picture *ref = NULL;
4027 if(reordering_of_pic_nums_idc==3)
4028 break;
4030 if(index >= h->ref_count[list]){
4031 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
4032 return -1;
4035 if(reordering_of_pic_nums_idc<3){
4036 if(reordering_of_pic_nums_idc<2){
4037 const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
4039 if(abs_diff_pic_num >= h->max_pic_num){
4040 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
4041 return -1;
4044 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
4045 else pred+= abs_diff_pic_num;
4046 pred &= h->max_pic_num - 1;
4048 for(i= h->short_ref_count-1; i>=0; i--){
4049 ref = h->short_ref[i];
4050 assert(ref->reference == 3);
4051 assert(!ref->long_ref);
4052 if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
4053 break;
4055 if(i>=0)
4056 ref->pic_id= ref->frame_num;
4057 }else{
4058 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
4059 ref = h->long_ref[pic_id];
4060 ref->pic_id= pic_id;
4061 assert(ref->reference == 3);
4062 assert(ref->long_ref);
4063 i=0;
4066 if (i < 0) {
4067 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
4068 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
4069 } else {
4070 for(i=index; i+1<h->ref_count[list]; i++){
4071 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
4072 break;
4074 for(; i > index; i--){
4075 h->ref_list[list][i]= h->ref_list[list][i-1];
4077 h->ref_list[list][index]= *ref;
4079 }else{
4080 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
4081 return -1;
4086 if(h->slice_type!=B_TYPE) break;
4088 for(list=0; list<2; list++){
4089 for(index= 0; index < h->ref_count[list]; index++){
4090 if(!h->ref_list[list][index].data[0])
4091 h->ref_list[list][index]= s->current_picture;
4093 if(h->slice_type!=B_TYPE) break;
4096 if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
4097 direct_dist_scale_factor(h);
4098 direct_ref_list_init(h);
4099 return 0;
4102 static int fill_mbaff_ref_list(H264Context *h){
4103 int list, i, j;
4104 for(list=0; list<2; list++){
4105 for(i=0; i<h->ref_count[list]; i++){
4106 Picture *frame = &h->ref_list[list][i];
4107 Picture *field = &h->ref_list[list][16+2*i];
4108 field[0] = *frame;
4109 for(j=0; j<3; j++)
4110 field[0].linesize[j] <<= 1;
4111 field[1] = field[0];
4112 for(j=0; j<3; j++)
4113 field[1].data[j] += frame->linesize[j];
4115 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4116 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4117 for(j=0; j<2; j++){
4118 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4119 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4123 for(j=0; j<h->ref_count[1]; j++){
4124 for(i=0; i<h->ref_count[0]; i++)
4125 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4126 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
4127 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4131 static int pred_weight_table(H264Context *h){
4132 MpegEncContext * const s = &h->s;
4133 int list, i;
4134 int luma_def, chroma_def;
4136 h->use_weight= 0;
4137 h->use_weight_chroma= 0;
4138 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
4139 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
4140 luma_def = 1<<h->luma_log2_weight_denom;
4141 chroma_def = 1<<h->chroma_log2_weight_denom;
4143 for(list=0; list<2; list++){
4144 for(i=0; i<h->ref_count[list]; i++){
4145 int luma_weight_flag, chroma_weight_flag;
4147 luma_weight_flag= get_bits1(&s->gb);
4148 if(luma_weight_flag){
4149 h->luma_weight[list][i]= get_se_golomb(&s->gb);
4150 h->luma_offset[list][i]= get_se_golomb(&s->gb);
4151 if( h->luma_weight[list][i] != luma_def
4152 || h->luma_offset[list][i] != 0)
4153 h->use_weight= 1;
4154 }else{
4155 h->luma_weight[list][i]= luma_def;
4156 h->luma_offset[list][i]= 0;
4159 chroma_weight_flag= get_bits1(&s->gb);
4160 if(chroma_weight_flag){
4161 int j;
4162 for(j=0; j<2; j++){
4163 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
4164 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
4165 if( h->chroma_weight[list][i][j] != chroma_def
4166 || h->chroma_offset[list][i][j] != 0)
4167 h->use_weight_chroma= 1;
4169 }else{
4170 int j;
4171 for(j=0; j<2; j++){
4172 h->chroma_weight[list][i][j]= chroma_def;
4173 h->chroma_offset[list][i][j]= 0;
4177 if(h->slice_type != B_TYPE) break;
4179 h->use_weight= h->use_weight || h->use_weight_chroma;
4180 return 0;
4183 static void implicit_weight_table(H264Context *h){
4184 MpegEncContext * const s = &h->s;
4185 int ref0, ref1;
4186 int cur_poc = s->current_picture_ptr->poc;
4188 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
4189 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
4190 h->use_weight= 0;
4191 h->use_weight_chroma= 0;
4192 return;
4195 h->use_weight= 2;
4196 h->use_weight_chroma= 2;
4197 h->luma_log2_weight_denom= 5;
4198 h->chroma_log2_weight_denom= 5;
4200 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
4201 int poc0 = h->ref_list[0][ref0].poc;
4202 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
4203 int poc1 = h->ref_list[1][ref1].poc;
4204 int td = clip(poc1 - poc0, -128, 127);
4205 if(td){
4206 int tb = clip(cur_poc - poc0, -128, 127);
4207 int tx = (16384 + (ABS(td) >> 1)) / td;
4208 int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
4209 if(dist_scale_factor < -64 || dist_scale_factor > 128)
4210 h->implicit_weight[ref0][ref1] = 32;
4211 else
4212 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
4213 }else
4214 h->implicit_weight[ref0][ref1] = 32;
4219 static inline void unreference_pic(H264Context *h, Picture *pic){
4220 int i;
4221 pic->reference=0;
4222 if(pic == h->delayed_output_pic)
4223 pic->reference=1;
4224 else{
4225 for(i = 0; h->delayed_pic[i]; i++)
4226 if(pic == h->delayed_pic[i]){
4227 pic->reference=1;
4228 break;
4234 * instantaneous decoder refresh.
4236 static void idr(H264Context *h){
4237 int i;
4239 for(i=0; i<16; i++){
4240 if (h->long_ref[i] != NULL) {
4241 unreference_pic(h, h->long_ref[i]);
4242 h->long_ref[i]= NULL;
4245 h->long_ref_count=0;
4247 for(i=0; i<h->short_ref_count; i++){
4248 unreference_pic(h, h->short_ref[i]);
4249 h->short_ref[i]= NULL;
4251 h->short_ref_count=0;
4254 /* forget old pics after a seek */
4255 static void flush_dpb(AVCodecContext *avctx){
4256 H264Context *h= avctx->priv_data;
4257 int i;
4258 for(i=0; i<16; i++) {
4259 if(h->delayed_pic[i])
4260 h->delayed_pic[i]->reference= 0;
4261 h->delayed_pic[i]= NULL;
4263 if(h->delayed_output_pic)
4264 h->delayed_output_pic->reference= 0;
4265 h->delayed_output_pic= NULL;
4266 idr(h);
4267 if(h->s.current_picture_ptr)
4268 h->s.current_picture_ptr->reference= 0;
4273 * @return the removed picture or NULL if an error occurs
4275 static Picture * remove_short(H264Context *h, int frame_num){
4276 MpegEncContext * const s = &h->s;
4277 int i;
4279 if(s->avctx->debug&FF_DEBUG_MMCO)
4280 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
4282 for(i=0; i<h->short_ref_count; i++){
4283 Picture *pic= h->short_ref[i];
4284 if(s->avctx->debug&FF_DEBUG_MMCO)
4285 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
4286 if(pic->frame_num == frame_num){
4287 h->short_ref[i]= NULL;
4288 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
4289 h->short_ref_count--;
4290 return pic;
4293 return NULL;
4298 * @return the removed picture or NULL if an error occurs
4300 static Picture * remove_long(H264Context *h, int i){
4301 Picture *pic;
4303 pic= h->long_ref[i];
4304 h->long_ref[i]= NULL;
4305 if(pic) h->long_ref_count--;
4307 return pic;
4311 * print short term list
4313 static void print_short_term(H264Context *h) {
4314 uint32_t i;
4315 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4316 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
4317 for(i=0; i<h->short_ref_count; i++){
4318 Picture *pic= h->short_ref[i];
4319 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4325 * print long term list
4327 static void print_long_term(H264Context *h) {
4328 uint32_t i;
4329 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
4330 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
4331 for(i = 0; i < 16; i++){
4332 Picture *pic= h->long_ref[i];
4333 if (pic) {
4334 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
4341 * Executes the reference picture marking (memory management control operations).
4343 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
4344 MpegEncContext * const s = &h->s;
4345 int i, j;
4346 int current_is_long=0;
4347 Picture *pic;
4349 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
4350 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
4352 for(i=0; i<mmco_count; i++){
4353 if(s->avctx->debug&FF_DEBUG_MMCO)
4354 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
4356 switch(mmco[i].opcode){
4357 case MMCO_SHORT2UNUSED:
4358 pic= remove_short(h, mmco[i].short_frame_num);
4359 if(pic)
4360 unreference_pic(h, pic);
4361 else if(s->avctx->debug&FF_DEBUG_MMCO)
4362 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
4363 break;
4364 case MMCO_SHORT2LONG:
4365 pic= remove_long(h, mmco[i].long_index);
4366 if(pic) unreference_pic(h, pic);
4368 h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
4369 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4370 h->long_ref_count++;
4371 break;
4372 case MMCO_LONG2UNUSED:
4373 pic= remove_long(h, mmco[i].long_index);
4374 if(pic)
4375 unreference_pic(h, pic);
4376 else if(s->avctx->debug&FF_DEBUG_MMCO)
4377 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
4378 break;
4379 case MMCO_LONG:
4380 pic= remove_long(h, mmco[i].long_index);
4381 if(pic) unreference_pic(h, pic);
4383 h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
4384 h->long_ref[ mmco[i].long_index ]->long_ref=1;
4385 h->long_ref_count++;
4387 current_is_long=1;
4388 break;
4389 case MMCO_SET_MAX_LONG:
4390 assert(mmco[i].long_index <= 16);
4391 // just remove the long term which index is greater than new max
4392 for(j = mmco[i].long_index; j<16; j++){
4393 pic = remove_long(h, j);
4394 if (pic) unreference_pic(h, pic);
4396 break;
4397 case MMCO_RESET:
4398 while(h->short_ref_count){
4399 pic= remove_short(h, h->short_ref[0]->frame_num);
4400 unreference_pic(h, pic);
4402 for(j = 0; j < 16; j++) {
4403 pic= remove_long(h, j);
4404 if(pic) unreference_pic(h, pic);
4406 break;
4407 default: assert(0);
4411 if(!current_is_long){
4412 pic= remove_short(h, s->current_picture_ptr->frame_num);
4413 if(pic){
4414 unreference_pic(h, pic);
4415 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
4418 if(h->short_ref_count)
4419 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
4421 h->short_ref[0]= s->current_picture_ptr;
4422 h->short_ref[0]->long_ref=0;
4423 h->short_ref_count++;
4426 print_short_term(h);
4427 print_long_term(h);
4428 return 0;
4431 static int decode_ref_pic_marking(H264Context *h){
4432 MpegEncContext * const s = &h->s;
4433 int i;
4435 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
4436 s->broken_link= get_bits1(&s->gb) -1;
4437 h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
4438 if(h->mmco[0].long_index == -1)
4439 h->mmco_index= 0;
4440 else{
4441 h->mmco[0].opcode= MMCO_LONG;
4442 h->mmco_index= 1;
4444 }else{
4445 if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
4446 for(i= 0; i<MAX_MMCO_COUNT; i++) {
4447 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
4449 h->mmco[i].opcode= opcode;
4450 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
4451 h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
4452 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4453 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4454 return -1;
4457 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
4458 h->mmco[i].long_index= get_ue_golomb(&s->gb);
4459 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
4460 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
4461 return -1;
4465 if(opcode > MMCO_LONG){
4466 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
4467 return -1;
4469 if(opcode == MMCO_END)
4470 break;
4472 h->mmco_index= i;
4473 }else{
4474 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
4476 if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
4477 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
4478 h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
4479 h->mmco_index= 1;
4480 }else
4481 h->mmco_index= 0;
4485 return 0;
4488 static int init_poc(H264Context *h){
4489 MpegEncContext * const s = &h->s;
4490 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
4491 int field_poc[2];
4493 if(h->nal_unit_type == NAL_IDR_SLICE){
4494 h->frame_num_offset= 0;
4495 }else{
4496 if(h->frame_num < h->prev_frame_num)
4497 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
4498 else
4499 h->frame_num_offset= h->prev_frame_num_offset;
4502 if(h->sps.poc_type==0){
4503 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
4505 if(h->nal_unit_type == NAL_IDR_SLICE){
4506 h->prev_poc_msb=
4507 h->prev_poc_lsb= 0;
4510 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
4511 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
4512 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
4513 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
4514 else
4515 h->poc_msb = h->prev_poc_msb;
4516 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4517 field_poc[0] =
4518 field_poc[1] = h->poc_msb + h->poc_lsb;
4519 if(s->picture_structure == PICT_FRAME)
4520 field_poc[1] += h->delta_poc_bottom;
4521 }else if(h->sps.poc_type==1){
4522 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
4523 int i;
4525 if(h->sps.poc_cycle_length != 0)
4526 abs_frame_num = h->frame_num_offset + h->frame_num;
4527 else
4528 abs_frame_num = 0;
4530 if(h->nal_ref_idc==0 && abs_frame_num > 0)
4531 abs_frame_num--;
4533 expected_delta_per_poc_cycle = 0;
4534 for(i=0; i < h->sps.poc_cycle_length; i++)
4535 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
4537 if(abs_frame_num > 0){
4538 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
4539 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
4541 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
4542 for(i = 0; i <= frame_num_in_poc_cycle; i++)
4543 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
4544 } else
4545 expectedpoc = 0;
4547 if(h->nal_ref_idc == 0)
4548 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
4550 field_poc[0] = expectedpoc + h->delta_poc[0];
4551 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
4553 if(s->picture_structure == PICT_FRAME)
4554 field_poc[1] += h->delta_poc[1];
4555 }else{
4556 int poc;
4557 if(h->nal_unit_type == NAL_IDR_SLICE){
4558 poc= 0;
4559 }else{
4560 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
4561 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
4563 field_poc[0]= poc;
4564 field_poc[1]= poc;
4567 if(s->picture_structure != PICT_BOTTOM_FIELD)
4568 s->current_picture_ptr->field_poc[0]= field_poc[0];
4569 if(s->picture_structure != PICT_TOP_FIELD)
4570 s->current_picture_ptr->field_poc[1]= field_poc[1];
4571 if(s->picture_structure == PICT_FRAME) // FIXME field pix?
4572 s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
4574 return 0;
4578 * decodes a slice header.
4579 * this will allso call MPV_common_init() and frame_start() as needed
4581 static int decode_slice_header(H264Context *h){
4582 MpegEncContext * const s = &h->s;
4583 int first_mb_in_slice, pps_id;
4584 int num_ref_idx_active_override_flag;
4585 static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
4586 int slice_type;
4587 int default_ref_list_done = 0;
4589 s->current_picture.reference= h->nal_ref_idc != 0;
4590 s->dropable= h->nal_ref_idc == 0;
4592 first_mb_in_slice= get_ue_golomb(&s->gb);
4594 slice_type= get_ue_golomb(&s->gb);
4595 if(slice_type > 9){
4596 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
4597 return -1;
4599 if(slice_type > 4){
4600 slice_type -= 5;
4601 h->slice_type_fixed=1;
4602 }else
4603 h->slice_type_fixed=0;
4605 slice_type= slice_type_map[ slice_type ];
4606 if (slice_type == I_TYPE
4607 || (h->slice_num != 0 && slice_type == h->slice_type) ) {
4608 default_ref_list_done = 1;
4610 h->slice_type= slice_type;
4612 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
4614 pps_id= get_ue_golomb(&s->gb);
4615 if(pps_id>255){
4616 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
4617 return -1;
4619 h->pps= h->pps_buffer[pps_id];
4620 if(h->pps.slice_group_count == 0){
4621 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
4622 return -1;
4625 h->sps= h->sps_buffer[ h->pps.sps_id ];
4626 if(h->sps.log2_max_frame_num == 0){
4627 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
4628 return -1;
4631 if(h->dequant_coeff_pps != pps_id){
4632 h->dequant_coeff_pps = pps_id;
4633 init_dequant_tables(h);
4636 s->mb_width= h->sps.mb_width;
4637 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
4639 h->b_stride= s->mb_width*4;
4640 h->b8_stride= s->mb_width*2;
4642 s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
4643 if(h->sps.frame_mbs_only_flag)
4644 s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
4645 else
4646 s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
4648 if (s->context_initialized
4649 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
4650 free_tables(h);
4651 MPV_common_end(s);
4653 if (!s->context_initialized) {
4654 if (MPV_common_init(s) < 0)
4655 return -1;
4657 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
4658 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
4659 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
4660 }else{
4661 int i;
4662 for(i=0; i<16; i++){
4663 #define T(x) (x>>2) | ((x<<2) & 0xF)
4664 h->zigzag_scan[i] = T(zigzag_scan[i]);
4665 h-> field_scan[i] = T( field_scan[i]);
4666 #undef T
4669 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4670 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4671 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4672 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
4673 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
4674 }else{
4675 int i;
4676 for(i=0; i<64; i++){
4677 #define T(x) (x>>3) | ((x&7)<<3)
4678 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4679 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4680 h->field_scan8x8[i] = T(field_scan8x8[i]);
4681 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
4682 #undef T
4685 if(h->sps.transform_bypass){ //FIXME same ugly
4686 h->zigzag_scan_q0 = zigzag_scan;
4687 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4688 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4689 h->field_scan_q0 = field_scan;
4690 h->field_scan8x8_q0 = field_scan8x8;
4691 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
4692 }else{
4693 h->zigzag_scan_q0 = h->zigzag_scan;
4694 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4695 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4696 h->field_scan_q0 = h->field_scan;
4697 h->field_scan8x8_q0 = h->field_scan8x8;
4698 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
4701 alloc_tables(h);
4703 s->avctx->width = s->width;
4704 s->avctx->height = s->height;
4705 s->avctx->sample_aspect_ratio= h->sps.sar;
4706 if(!s->avctx->sample_aspect_ratio.den)
4707 s->avctx->sample_aspect_ratio.den = 1;
4709 if(h->sps.timing_info_present_flag){
4710 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
4711 if(h->x264_build > 0 && h->x264_build < 44)
4712 s->avctx->time_base.den *= 2;
4713 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
4714 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
4718 if(h->slice_num == 0){
4719 if(frame_start(h) < 0)
4720 return -1;
4723 s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4724 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4726 h->mb_mbaff = 0;
4727 h->mb_aff_frame = 0;
4728 if(h->sps.frame_mbs_only_flag){
4729 s->picture_structure= PICT_FRAME;
4730 }else{
4731 if(get_bits1(&s->gb)) { //field_pic_flag
4732 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4733 av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4734 } else {
4735 s->picture_structure= PICT_FRAME;
4736 h->mb_aff_frame = h->sps.mb_aff;
4740 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4741 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4742 if(s->mb_y >= s->mb_height){
4743 return -1;
4746 if(s->picture_structure==PICT_FRAME){
4747 h->curr_pic_num= h->frame_num;
4748 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4749 }else{
4750 h->curr_pic_num= 2*h->frame_num;
4751 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4754 if(h->nal_unit_type == NAL_IDR_SLICE){
4755 get_ue_golomb(&s->gb); /* idr_pic_id */
4758 if(h->sps.poc_type==0){
4759 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4761 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4762 h->delta_poc_bottom= get_se_golomb(&s->gb);
4766 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4767 h->delta_poc[0]= get_se_golomb(&s->gb);
4769 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4770 h->delta_poc[1]= get_se_golomb(&s->gb);
4773 init_poc(h);
4775 if(h->pps.redundant_pic_cnt_present){
4776 h->redundant_pic_count= get_ue_golomb(&s->gb);
4779 //set defaults, might be overriden a few line later
4780 h->ref_count[0]= h->pps.ref_count[0];
4781 h->ref_count[1]= h->pps.ref_count[1];
4783 if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4784 if(h->slice_type == B_TYPE){
4785 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4786 if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4787 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4789 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4791 if(num_ref_idx_active_override_flag){
4792 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4793 if(h->slice_type==B_TYPE)
4794 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4796 if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
4797 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4798 return -1;
4803 if(!default_ref_list_done){
4804 fill_default_ref_list(h);
4807 if(decode_ref_pic_list_reordering(h) < 0)
4808 return -1;
4810 if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
4811 || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
4812 pred_weight_table(h);
4813 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
4814 implicit_weight_table(h);
4815 else
4816 h->use_weight = 0;
4818 if(s->current_picture.reference)
4819 decode_ref_pic_marking(h);
4821 if(FRAME_MBAFF)
4822 fill_mbaff_ref_list(h);
4824 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4825 h->cabac_init_idc = get_ue_golomb(&s->gb);
4827 h->last_qscale_diff = 0;
4828 s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
4829 if(s->qscale<0 || s->qscale>51){
4830 av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
4831 return -1;
4833 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
4834 //FIXME qscale / qp ... stuff
4835 if(h->slice_type == SP_TYPE){
4836 get_bits1(&s->gb); /* sp_for_switch_flag */
4838 if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
4839 get_se_golomb(&s->gb); /* slice_qs_delta */
4842 h->deblocking_filter = 1;
4843 h->slice_alpha_c0_offset = 0;
4844 h->slice_beta_offset = 0;
4845 if( h->pps.deblocking_filter_parameters_present ) {
4846 h->deblocking_filter= get_ue_golomb(&s->gb);
4847 if(h->deblocking_filter < 2)
4848 h->deblocking_filter^= 1; // 1<->0
4850 if( h->deblocking_filter ) {
4851 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4852 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4855 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4856 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
4857 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
4858 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4859 h->deblocking_filter= 0;
4861 #if 0 //FMO
4862 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4863 slice_group_change_cycle= get_bits(&s->gb, ?);
4864 #endif
4866 h->slice_num++;
4868 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4869 h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4871 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4872 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4873 h->slice_num,
4874 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4875 first_mb_in_slice,
4876 av_get_pict_type_char(h->slice_type),
4877 pps_id, h->frame_num,
4878 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4879 h->ref_count[0], h->ref_count[1],
4880 s->qscale,
4881 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4882 h->use_weight,
4883 h->use_weight==1 && h->use_weight_chroma ? "c" : ""
4887 return 0;
4893 static inline int get_level_prefix(GetBitContext *gb){
4894 unsigned int buf;
4895 int log;
4897 OPEN_READER(re, gb);
4898 UPDATE_CACHE(re, gb);
4899 buf=GET_CACHE(re, gb);
4901 log= 32 - av_log2(buf);
4902 #ifdef TRACE
4903 print_bin(buf>>(32-log), log);
4904 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4905 #endif
4907 LAST_SKIP_BITS(re, gb, log);
4908 CLOSE_READER(re, gb);
4910 return log-1;
4913 static inline int get_dct8x8_allowed(H264Context *h){
4914 int i;
4915 for(i=0; i<4; i++){
4916 if(!IS_SUB_8X8(h->sub_mb_type[i])
4917 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4918 return 0;
4920 return 1;
4924 * decodes a residual block.
4925 * @param n block index
4926 * @param scantable scantable
4927 * @param max_coeff number of coefficients in the block
4928 * @return <0 if an error occured
4930 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4931 MpegEncContext * const s = &h->s;
4932 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4933 int level[16];
4934 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4936 //FIXME put trailing_onex into the context
4938 if(n == CHROMA_DC_BLOCK_INDEX){
4939 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4940 total_coeff= coeff_token>>2;
4941 }else{
4942 if(n == LUMA_DC_BLOCK_INDEX){
4943 total_coeff= pred_non_zero_count(h, 0);
4944 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4945 total_coeff= coeff_token>>2;
4946 }else{
4947 total_coeff= pred_non_zero_count(h, n);
4948 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4949 total_coeff= coeff_token>>2;
4950 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4954 //FIXME set last_non_zero?
4956 if(total_coeff==0)
4957 return 0;
4959 trailing_ones= coeff_token&3;
4960 tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
4961 assert(total_coeff<=16);
4963 for(i=0; i<trailing_ones; i++){
4964 level[i]= 1 - 2*get_bits1(gb);
4967 if(i<total_coeff) {
4968 int level_code, mask;
4969 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4970 int prefix= get_level_prefix(gb);
4972 //first coefficient has suffix_length equal to 0 or 1
4973 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4974 if(suffix_length)
4975 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4976 else
4977 level_code= (prefix<<suffix_length); //part
4978 }else if(prefix==14){
4979 if(suffix_length)
4980 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4981 else
4982 level_code= prefix + get_bits(gb, 4); //part
4983 }else if(prefix==15){
4984 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4985 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4986 }else{
4987 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4988 return -1;
4991 if(trailing_ones < 3) level_code += 2;
4993 suffix_length = 1;
4994 if(level_code > 5)
4995 suffix_length++;
4996 mask= -(level_code&1);
4997 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4998 i++;
5000 //remaining coefficients have suffix_length > 0
5001 for(;i<total_coeff;i++) {
5002 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
5003 prefix = get_level_prefix(gb);
5004 if(prefix<15){
5005 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
5006 }else if(prefix==15){
5007 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
5008 }else{
5009 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
5010 return -1;
5012 mask= -(level_code&1);
5013 level[i]= (((2+level_code)>>1) ^ mask) - mask;
5014 if(level_code > suffix_limit[suffix_length])
5015 suffix_length++;
5019 if(total_coeff == max_coeff)
5020 zeros_left=0;
5021 else{
5022 if(n == CHROMA_DC_BLOCK_INDEX)
5023 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
5024 else
5025 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
5028 coeff_num = zeros_left + total_coeff - 1;
5029 j = scantable[coeff_num];
5030 if(n > 24){
5031 block[j] = level[0];
5032 for(i=1;i<total_coeff;i++) {
5033 if(zeros_left <= 0)
5034 run_before = 0;
5035 else if(zeros_left < 7){
5036 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5037 }else{
5038 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5040 zeros_left -= run_before;
5041 coeff_num -= 1 + run_before;
5042 j= scantable[ coeff_num ];
5044 block[j]= level[i];
5046 }else{
5047 block[j] = (level[0] * qmul[j] + 32)>>6;
5048 for(i=1;i<total_coeff;i++) {
5049 if(zeros_left <= 0)
5050 run_before = 0;
5051 else if(zeros_left < 7){
5052 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
5053 }else{
5054 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
5056 zeros_left -= run_before;
5057 coeff_num -= 1 + run_before;
5058 j= scantable[ coeff_num ];
5060 block[j]= (level[i] * qmul[j] + 32)>>6;
5064 if(zeros_left<0){
5065 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
5066 return -1;
5069 return 0;
5072 static void predict_field_decoding_flag(H264Context *h){
5073 MpegEncContext * const s = &h->s;
5074 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5075 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5076 ? s->current_picture.mb_type[mb_xy-1]
5077 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5078 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5079 : 0;
5080 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5084 * decodes a P_SKIP or B_SKIP macroblock
5086 static void decode_mb_skip(H264Context *h){
5087 MpegEncContext * const s = &h->s;
5088 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5089 int mb_type=0;
5091 memset(h->non_zero_count[mb_xy], 0, 16);
5092 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
5094 if(MB_FIELD)
5095 mb_type|= MB_TYPE_INTERLACED;
5097 if( h->slice_type == B_TYPE )
5099 // just for fill_caches. pred_direct_motion will set the real mb_type
5100 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
5102 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5103 pred_direct_motion(h, &mb_type);
5104 if(h->pps.cabac){
5105 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5106 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5109 else
5111 int mx, my;
5112 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
5114 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
5115 pred_pskip_motion(h, &mx, &my);
5116 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
5117 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
5118 if(h->pps.cabac)
5119 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5122 write_back_motion(h, mb_type);
5123 s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP;
5124 s->current_picture.qscale_table[mb_xy]= s->qscale;
5125 h->slice_table[ mb_xy ]= h->slice_num;
5126 h->prev_mb_skipped= 1;
5130 * decodes a macroblock
5131 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5133 static int decode_mb_cavlc(H264Context *h){
5134 MpegEncContext * const s = &h->s;
5135 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5136 int mb_type, partition_count, cbp;
5137 int dct8x8_allowed= h->pps.transform_8x8_mode;
5139 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
5141 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5142 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
5143 down the code */
5144 if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
5145 if(s->mb_skip_run==-1)
5146 s->mb_skip_run= get_ue_golomb(&s->gb);
5148 if (s->mb_skip_run--) {
5149 if(FRAME_MBAFF && (s->mb_y&1) == 0){
5150 if(s->mb_skip_run==0)
5151 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5152 else
5153 predict_field_decoding_flag(h);
5155 decode_mb_skip(h);
5156 return 0;
5159 if(FRAME_MBAFF){
5160 if( (s->mb_y&1) == 0 )
5161 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5162 }else
5163 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5165 h->prev_mb_skipped= 0;
5167 mb_type= get_ue_golomb(&s->gb);
5168 if(h->slice_type == B_TYPE){
5169 if(mb_type < 23){
5170 partition_count= b_mb_type_info[mb_type].partition_count;
5171 mb_type= b_mb_type_info[mb_type].type;
5172 }else{
5173 mb_type -= 23;
5174 goto decode_intra_mb;
5176 }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
5177 if(mb_type < 5){
5178 partition_count= p_mb_type_info[mb_type].partition_count;
5179 mb_type= p_mb_type_info[mb_type].type;
5180 }else{
5181 mb_type -= 5;
5182 goto decode_intra_mb;
5184 }else{
5185 assert(h->slice_type == I_TYPE);
5186 decode_intra_mb:
5187 if(mb_type > 25){
5188 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
5189 return -1;
5191 partition_count=0;
5192 cbp= i_mb_type_info[mb_type].cbp;
5193 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5194 mb_type= i_mb_type_info[mb_type].type;
5197 if(MB_FIELD)
5198 mb_type |= MB_TYPE_INTERLACED;
5200 h->slice_table[ mb_xy ]= h->slice_num;
5202 if(IS_INTRA_PCM(mb_type)){
5203 unsigned int x, y;
5205 // we assume these blocks are very rare so we dont optimize it
5206 align_get_bits(&s->gb);
5208 // The pixels are stored in the same order as levels in h->mb array.
5209 for(y=0; y<16; y++){
5210 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5211 for(x=0; x<16; x++){
5212 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5213 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
5216 for(y=0; y<8; y++){
5217 const int index= 256 + 4*(y&3) + 32*(y>>2);
5218 for(x=0; x<8; x++){
5219 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5220 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5223 for(y=0; y<8; y++){
5224 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5225 for(x=0; x<8; x++){
5226 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
5227 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
5231 // In deblocking, the quantizer is 0
5232 s->current_picture.qscale_table[mb_xy]= 0;
5233 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
5234 // All coeffs are present
5235 memset(h->non_zero_count[mb_xy], 16, 16);
5237 s->current_picture.mb_type[mb_xy]= mb_type;
5238 return 0;
5241 if(MB_MBAFF){
5242 h->ref_count[0] <<= 1;
5243 h->ref_count[1] <<= 1;
5246 fill_caches(h, mb_type, 0);
5248 //mb_pred
5249 if(IS_INTRA(mb_type)){
5250 // init_top_left_availability(h);
5251 if(IS_INTRA4x4(mb_type)){
5252 int i;
5253 int di = 1;
5254 if(dct8x8_allowed && get_bits1(&s->gb)){
5255 mb_type |= MB_TYPE_8x8DCT;
5256 di = 4;
5259 // fill_intra4x4_pred_table(h);
5260 for(i=0; i<16; i+=di){
5261 const int mode_coded= !get_bits1(&s->gb);
5262 const int predicted_mode= pred_intra_mode(h, i);
5263 int mode;
5265 if(mode_coded){
5266 const int rem_mode= get_bits(&s->gb, 3);
5267 if(rem_mode<predicted_mode)
5268 mode= rem_mode;
5269 else
5270 mode= rem_mode + 1;
5271 }else{
5272 mode= predicted_mode;
5275 if(di==4)
5276 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5277 else
5278 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
5280 write_back_intra_pred_mode(h);
5281 if( check_intra4x4_pred_mode(h) < 0)
5282 return -1;
5283 }else{
5284 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
5285 if(h->intra16x16_pred_mode < 0)
5286 return -1;
5288 h->chroma_pred_mode= get_ue_golomb(&s->gb);
5290 h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
5291 if(h->chroma_pred_mode < 0)
5292 return -1;
5293 }else if(partition_count==4){
5294 int i, j, sub_partition_count[4], list, ref[2][4];
5296 if(h->slice_type == B_TYPE){
5297 for(i=0; i<4; i++){
5298 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5299 if(h->sub_mb_type[i] >=13){
5300 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5301 return -1;
5303 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5304 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5306 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
5307 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
5308 pred_direct_motion(h, &mb_type);
5309 h->ref_cache[0][scan8[4]] =
5310 h->ref_cache[1][scan8[4]] =
5311 h->ref_cache[0][scan8[12]] =
5312 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5314 }else{
5315 assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
5316 for(i=0; i<4; i++){
5317 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
5318 if(h->sub_mb_type[i] >=4){
5319 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
5320 return -1;
5322 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5323 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5327 for(list=0; list<2; list++){
5328 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5329 if(ref_count == 0) continue;
5330 for(i=0; i<4; i++){
5331 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5332 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5333 ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
5334 }else{
5335 //FIXME
5336 ref[list][i] = -1;
5341 if(dct8x8_allowed)
5342 dct8x8_allowed = get_dct8x8_allowed(h);
5344 for(list=0; list<2; list++){
5345 const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
5346 if(ref_count == 0) continue;
5348 for(i=0; i<4; i++){
5349 if(IS_DIRECT(h->sub_mb_type[i])) {
5350 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
5351 continue;
5353 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
5354 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5356 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5357 const int sub_mb_type= h->sub_mb_type[i];
5358 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5359 for(j=0; j<sub_partition_count[i]; j++){
5360 int mx, my;
5361 const int index= 4*i + block_width*j;
5362 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5363 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
5364 mx += get_se_golomb(&s->gb);
5365 my += get_se_golomb(&s->gb);
5366 tprintf("final mv:%d %d\n", mx, my);
5368 if(IS_SUB_8X8(sub_mb_type)){
5369 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
5370 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5371 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
5372 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5373 }else if(IS_SUB_8X4(sub_mb_type)){
5374 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
5375 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
5376 }else if(IS_SUB_4X8(sub_mb_type)){
5377 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
5378 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
5379 }else{
5380 assert(IS_SUB_4X4(sub_mb_type));
5381 mv_cache[ 0 ][0]= mx;
5382 mv_cache[ 0 ][1]= my;
5385 }else{
5386 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5387 p[0] = p[1]=
5388 p[8] = p[9]= 0;
5392 }else if(IS_DIRECT(mb_type)){
5393 pred_direct_motion(h, &mb_type);
5394 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5395 }else{
5396 int list, mx, my, i;
5397 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5398 if(IS_16X16(mb_type)){
5399 for(list=0; list<2; list++){
5400 if(h->ref_count[list]>0){
5401 if(IS_DIR(mb_type, 0, list)){
5402 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5403 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
5404 }else
5405 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
5408 for(list=0; list<2; list++){
5409 if(IS_DIR(mb_type, 0, list)){
5410 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
5411 mx += get_se_golomb(&s->gb);
5412 my += get_se_golomb(&s->gb);
5413 tprintf("final mv:%d %d\n", mx, my);
5415 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5416 }else
5417 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5420 else if(IS_16X8(mb_type)){
5421 for(list=0; list<2; list++){
5422 if(h->ref_count[list]>0){
5423 for(i=0; i<2; i++){
5424 if(IS_DIR(mb_type, i, list)){
5425 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5426 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
5427 }else
5428 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5432 for(list=0; list<2; list++){
5433 for(i=0; i<2; i++){
5434 if(IS_DIR(mb_type, i, list)){
5435 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
5436 mx += get_se_golomb(&s->gb);
5437 my += get_se_golomb(&s->gb);
5438 tprintf("final mv:%d %d\n", mx, my);
5440 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5441 }else
5442 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5445 }else{
5446 assert(IS_8X16(mb_type));
5447 for(list=0; list<2; list++){
5448 if(h->ref_count[list]>0){
5449 for(i=0; i<2; i++){
5450 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5451 const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
5452 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
5453 }else
5454 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5458 for(list=0; list<2; list++){
5459 for(i=0; i<2; i++){
5460 if(IS_DIR(mb_type, i, list)){
5461 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
5462 mx += get_se_golomb(&s->gb);
5463 my += get_se_golomb(&s->gb);
5464 tprintf("final mv:%d %d\n", mx, my);
5466 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5467 }else
5468 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5474 if(IS_INTER(mb_type))
5475 write_back_motion(h, mb_type);
5477 if(!IS_INTRA16x16(mb_type)){
5478 cbp= get_ue_golomb(&s->gb);
5479 if(cbp > 47){
5480 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
5481 return -1;
5484 if(IS_INTRA4x4(mb_type))
5485 cbp= golomb_to_intra4x4_cbp[cbp];
5486 else
5487 cbp= golomb_to_inter_cbp[cbp];
5490 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
5491 if(get_bits1(&s->gb))
5492 mb_type |= MB_TYPE_8x8DCT;
5494 s->current_picture.mb_type[mb_xy]= mb_type;
5496 if(cbp || IS_INTRA16x16(mb_type)){
5497 int i8x8, i4x4, chroma_idx;
5498 int chroma_qp, dquant;
5499 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
5500 const uint8_t *scan, *scan8x8, *dc_scan;
5502 // fill_non_zero_count_cache(h);
5504 if(IS_INTERLACED(mb_type)){
5505 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5506 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5507 dc_scan= luma_dc_field_scan;
5508 }else{
5509 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5510 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5511 dc_scan= luma_dc_zigzag_scan;
5514 dquant= get_se_golomb(&s->gb);
5516 if( dquant > 25 || dquant < -26 ){
5517 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
5518 return -1;
5521 s->qscale += dquant;
5522 if(((unsigned)s->qscale) > 51){
5523 if(s->qscale<0) s->qscale+= 52;
5524 else s->qscale-= 52;
5527 h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
5528 if(IS_INTRA16x16(mb_type)){
5529 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
5530 return -1; //FIXME continue if partitioned and other return -1 too
5533 assert((cbp&15) == 0 || (cbp&15) == 15);
5535 if(cbp&15){
5536 for(i8x8=0; i8x8<4; i8x8++){
5537 for(i4x4=0; i4x4<4; i4x4++){
5538 const int index= i4x4 + 4*i8x8;
5539 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
5540 return -1;
5544 }else{
5545 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5547 }else{
5548 for(i8x8=0; i8x8<4; i8x8++){
5549 if(cbp & (1<<i8x8)){
5550 if(IS_8x8DCT(mb_type)){
5551 DCTELEM *buf = &h->mb[64*i8x8];
5552 uint8_t *nnz;
5553 for(i4x4=0; i4x4<4; i4x4++){
5554 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
5555 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
5556 return -1;
5558 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5559 nnz[0] += nnz[1] + nnz[8] + nnz[9];
5560 }else{
5561 for(i4x4=0; i4x4<4; i4x4++){
5562 const int index= i4x4 + 4*i8x8;
5564 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
5565 return -1;
5569 }else{
5570 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5571 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5576 if(cbp&0x30){
5577 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
5578 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
5579 return -1;
5583 if(cbp&0x20){
5584 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
5585 for(i4x4=0; i4x4<4; i4x4++){
5586 const int index= 16 + 4*chroma_idx + i4x4;
5587 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
5588 return -1;
5592 }else{
5593 uint8_t * const nnz= &h->non_zero_count_cache[0];
5594 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5595 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5597 }else{
5598 uint8_t * const nnz= &h->non_zero_count_cache[0];
5599 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5600 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5601 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5603 s->current_picture.qscale_table[mb_xy]= s->qscale;
5604 write_back_non_zero_count(h);
5606 if(MB_MBAFF){
5607 h->ref_count[0] >>= 1;
5608 h->ref_count[1] >>= 1;
5611 return 0;
5614 static int decode_cabac_field_decoding_flag(H264Context *h) {
5615 MpegEncContext * const s = &h->s;
5616 const int mb_x = s->mb_x;
5617 const int mb_y = s->mb_y & ~1;
5618 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
5619 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5621 unsigned int ctx = 0;
5623 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5624 ctx += 1;
5626 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5627 ctx += 1;
5630 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
5633 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5634 uint8_t *state= &h->cabac_state[ctx_base];
5635 int mb_type;
5637 if(intra_slice){
5638 MpegEncContext * const s = &h->s;
5639 const int mba_xy = h->left_mb_xy[0];
5640 const int mbb_xy = h->top_mb_xy;
5641 int ctx=0;
5642 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5643 ctx++;
5644 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5645 ctx++;
5646 if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
5647 return 0; /* I4x4 */
5648 state += 2;
5649 }else{
5650 if( get_cabac( &h->cabac, &state[0] ) == 0 )
5651 return 0; /* I4x4 */
5654 if( get_cabac_terminate( &h->cabac ) )
5655 return 25; /* PCM */
5657 mb_type = 1; /* I16x16 */
5658 mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5659 if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */
5660 mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] );
5661 mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] );
5662 mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] );
5663 return mb_type;
5666 static int decode_cabac_mb_type( H264Context *h ) {
5667 MpegEncContext * const s = &h->s;
5669 if( h->slice_type == I_TYPE ) {
5670 return decode_cabac_intra_mb_type(h, 3, 1);
5671 } else if( h->slice_type == P_TYPE ) {
5672 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5673 /* P-type */
5674 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5675 /* P_L0_D16x16, P_8x8 */
5676 return 3 * get_cabac( &h->cabac, &h->cabac_state[16] );
5677 } else {
5678 /* P_L0_D8x16, P_L0_D16x8 */
5679 return 2 - get_cabac( &h->cabac, &h->cabac_state[17] );
5681 } else {
5682 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5684 } else if( h->slice_type == B_TYPE ) {
5685 const int mba_xy = h->left_mb_xy[0];
5686 const int mbb_xy = h->top_mb_xy;
5687 int ctx = 0;
5688 int bits;
5690 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5691 ctx++;
5692 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5693 ctx++;
5695 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
5696 return 0; /* B_Direct_16x16 */
5698 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
5699 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5702 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
5703 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
5704 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
5705 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
5706 if( bits < 8 )
5707 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5708 else if( bits == 13 ) {
5709 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5710 } else if( bits == 14 )
5711 return 11; /* B_L1_L0_8x16 */
5712 else if( bits == 15 )
5713 return 22; /* B_8x8 */
5715 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
5716 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5717 } else {
5718 /* TODO SI/SP frames? */
5719 return -1;
5723 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5724 MpegEncContext * const s = &h->s;
5725 int mba_xy, mbb_xy;
5726 int ctx = 0;
5728 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5729 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5730 mba_xy = mb_xy - 1;
5731 if( (mb_y&1)
5732 && h->slice_table[mba_xy] == h->slice_num
5733 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5734 mba_xy += s->mb_stride;
5735 if( MB_FIELD ){
5736 mbb_xy = mb_xy - s->mb_stride;
5737 if( !(mb_y&1)
5738 && h->slice_table[mbb_xy] == h->slice_num
5739 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5740 mbb_xy -= s->mb_stride;
5741 }else
5742 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5743 }else{
5744 int mb_xy = mb_x + mb_y*s->mb_stride;
5745 mba_xy = mb_xy - 1;
5746 mbb_xy = mb_xy - s->mb_stride;
5749 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5750 ctx++;
5751 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5752 ctx++;
5754 if( h->slice_type == B_TYPE )
5755 ctx += 13;
5756 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
5759 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5760 int mode = 0;
5762 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5763 return pred_mode;
5765 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5766 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5767 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5769 if( mode >= pred_mode )
5770 return mode + 1;
5771 else
5772 return mode;
5775 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5776 const int mba_xy = h->left_mb_xy[0];
5777 const int mbb_xy = h->top_mb_xy;
5779 int ctx = 0;
5781 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5782 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5783 ctx++;
5785 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5786 ctx++;
5788 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5789 return 0;
5791 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5792 return 1;
5793 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5794 return 2;
5795 else
5796 return 3;
5799 static const uint8_t block_idx_x[16] = {
5800 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5802 static const uint8_t block_idx_y[16] = {
5803 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5805 static const uint8_t block_idx_xy[4][4] = {
5806 { 0, 2, 8, 10},
5807 { 1, 3, 9, 11},
5808 { 4, 6, 12, 14},
5809 { 5, 7, 13, 15}
5812 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5813 int cbp = 0;
5814 int cbp_b = -1;
5815 int i8x8;
5817 if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
5818 cbp_b = h->top_cbp;
5819 tprintf("cbp_b = top_cbp = %x\n", cbp_b);
5822 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5823 int cbp_a = -1;
5824 int x, y;
5825 int ctx = 0;
5827 x = block_idx_x[4*i8x8];
5828 y = block_idx_y[4*i8x8];
5830 if( x > 0 )
5831 cbp_a = cbp;
5832 else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
5833 cbp_a = h->left_cbp;
5834 tprintf("cbp_a = left_cbp = %x\n", cbp_a);
5837 if( y > 0 )
5838 cbp_b = cbp;
5840 /* No need to test for skip as we put 0 for skip block */
5841 /* No need to test for IPCM as we put 1 for IPCM block */
5842 if( cbp_a >= 0 ) {
5843 int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
5844 if( ((cbp_a >> i8x8a)&0x01) == 0 )
5845 ctx++;
5848 if( cbp_b >= 0 ) {
5849 int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
5850 if( ((cbp_b >> i8x8b)&0x01) == 0 )
5851 ctx += 2;
5854 if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
5855 cbp |= 1 << i8x8;
5858 return cbp;
5860 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5861 int ctx;
5862 int cbp_a, cbp_b;
5864 cbp_a = (h->left_cbp>>4)&0x03;
5865 cbp_b = (h-> top_cbp>>4)&0x03;
5867 ctx = 0;
5868 if( cbp_a > 0 ) ctx++;
5869 if( cbp_b > 0 ) ctx += 2;
5870 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5871 return 0;
5873 ctx = 4;
5874 if( cbp_a == 2 ) ctx++;
5875 if( cbp_b == 2 ) ctx += 2;
5876 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
5878 static int decode_cabac_mb_dqp( H264Context *h) {
5879 MpegEncContext * const s = &h->s;
5880 int mbn_xy;
5881 int ctx = 0;
5882 int val = 0;
5884 if( s->mb_x > 0 )
5885 mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
5886 else
5887 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5889 if( h->last_qscale_diff != 0 )
5890 ctx++;
5892 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5893 if( ctx < 2 )
5894 ctx = 2;
5895 else
5896 ctx = 3;
5897 val++;
5898 if(val > 102) //prevent infinite loop
5899 return INT_MIN;
5902 if( val&0x01 )
5903 return (val + 1)/2;
5904 else
5905 return -(val + 1)/2;
5907 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5908 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5909 return 0; /* 8x8 */
5910 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5911 return 1; /* 8x4 */
5912 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5913 return 2; /* 4x8 */
5914 return 3; /* 4x4 */
5916 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5917 int type;
5918 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5919 return 0; /* B_Direct_8x8 */
5920 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5921 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5922 type = 3;
5923 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5924 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5925 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5926 type += 4;
5928 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5929 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5930 return type;
5933 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5934 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5937 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5938 int refa = h->ref_cache[list][scan8[n] - 1];
5939 int refb = h->ref_cache[list][scan8[n] - 8];
5940 int ref = 0;
5941 int ctx = 0;
5943 if( h->slice_type == B_TYPE) {
5944 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5945 ctx++;
5946 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5947 ctx += 2;
5948 } else {
5949 if( refa > 0 )
5950 ctx++;
5951 if( refb > 0 )
5952 ctx += 2;
5955 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5956 ref++;
5957 if( ctx < 4 )
5958 ctx = 4;
5959 else
5960 ctx = 5;
5962 return ref;
5965 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5966 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5967 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5968 int ctxbase = (l == 0) ? 40 : 47;
5969 int ctx, mvd;
5971 if( amvd < 3 )
5972 ctx = 0;
5973 else if( amvd > 32 )
5974 ctx = 2;
5975 else
5976 ctx = 1;
5978 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5979 return 0;
5981 mvd= 1;
5982 ctx= 3;
5983 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5984 mvd++;
5985 if( ctx < 6 )
5986 ctx++;
5989 if( mvd >= 9 ) {
5990 int k = 3;
5991 while( get_cabac_bypass( &h->cabac ) ) {
5992 mvd += 1 << k;
5993 k++;
5995 while( k-- ) {
5996 if( get_cabac_bypass( &h->cabac ) )
5997 mvd += 1 << k;
6000 if( get_cabac_bypass( &h->cabac ) ) return -mvd;
6001 else return mvd;
6004 static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
6005 int nza, nzb;
6006 int ctx = 0;
6008 if( cat == 0 ) {
6009 nza = h->left_cbp&0x100;
6010 nzb = h-> top_cbp&0x100;
6011 } else if( cat == 1 || cat == 2 ) {
6012 nza = h->non_zero_count_cache[scan8[idx] - 1];
6013 nzb = h->non_zero_count_cache[scan8[idx] - 8];
6014 } else if( cat == 3 ) {
6015 nza = (h->left_cbp>>(6+idx))&0x01;
6016 nzb = (h-> top_cbp>>(6+idx))&0x01;
6017 } else {
6018 assert(cat == 4);
6019 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
6020 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
6023 if( nza > 0 )
6024 ctx++;
6026 if( nzb > 0 )
6027 ctx += 2;
6029 return ctx + 4 * cat;
6032 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
6033 const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
6034 static const int significant_coeff_flag_offset[2][6] = {
6035 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6036 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6038 static const int last_coeff_flag_offset[2][6] = {
6039 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6040 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6042 static const int coeff_abs_level_m1_offset[6] = {
6043 227+0, 227+10, 227+20, 227+30, 227+39, 426
6045 static const int significant_coeff_flag_offset_8x8[2][63] = {
6046 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6047 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6048 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6049 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6050 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6051 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6052 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6053 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6055 static const int last_coeff_flag_offset_8x8[63] = {
6056 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6057 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6058 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6059 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6062 int index[64];
6064 int i, last;
6065 int coeff_count = 0;
6067 int abslevel1 = 1;
6068 int abslevelgt1 = 0;
6070 uint8_t *significant_coeff_ctx_base;
6071 uint8_t *last_coeff_ctx_base;
6072 uint8_t *abs_level_m1_ctx_base;
6074 /* cat: 0-> DC 16x16 n = 0
6075 * 1-> AC 16x16 n = luma4x4idx
6076 * 2-> Luma4x4 n = luma4x4idx
6077 * 3-> DC Chroma n = iCbCr
6078 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6079 * 5-> Luma8x8 n = 4 * luma8x8idx
6082 /* read coded block flag */
6083 if( cat != 5 ) {
6084 if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
6085 if( cat == 1 || cat == 2 )
6086 h->non_zero_count_cache[scan8[n]] = 0;
6087 else if( cat == 4 )
6088 h->non_zero_count_cache[scan8[16+n]] = 0;
6090 return 0;
6094 significant_coeff_ctx_base = h->cabac_state
6095 + significant_coeff_flag_offset[MB_FIELD][cat];
6096 last_coeff_ctx_base = h->cabac_state
6097 + last_coeff_flag_offset[MB_FIELD][cat];
6098 abs_level_m1_ctx_base = h->cabac_state
6099 + coeff_abs_level_m1_offset[cat];
6101 if( cat == 5 ) {
6102 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6103 for(last= 0; last < coefs; last++) { \
6104 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6105 if( get_cabac( &h->cabac, sig_ctx )) { \
6106 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6107 index[coeff_count++] = last; \
6108 if( get_cabac( &h->cabac, last_ctx ) ) { \
6109 last= max_coeff; \
6110 break; \
6114 const int *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6115 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
6116 } else {
6117 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
6119 if( last == max_coeff -1 ) {
6120 index[coeff_count++] = last;
6122 assert(coeff_count > 0);
6124 if( cat == 0 )
6125 h->cbp_table[mb_xy] |= 0x100;
6126 else if( cat == 1 || cat == 2 )
6127 h->non_zero_count_cache[scan8[n]] = coeff_count;
6128 else if( cat == 3 )
6129 h->cbp_table[mb_xy] |= 0x40 << n;
6130 else if( cat == 4 )
6131 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
6132 else {
6133 assert( cat == 5 );
6134 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
6137 for( i = coeff_count - 1; i >= 0; i-- ) {
6138 uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base;
6139 int j= scantable[index[i]];
6141 if( get_cabac( &h->cabac, ctx ) == 0 ) {
6142 if( !qmul ) {
6143 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
6144 else block[j] = 1;
6145 }else{
6146 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
6147 else block[j] = ( qmul[j] + 32) >> 6;
6150 abslevel1++;
6151 } else {
6152 int coeff_abs = 2;
6153 ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base;
6154 while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) {
6155 coeff_abs++;
6158 if( coeff_abs >= 15 ) {
6159 int j = 0;
6160 while( get_cabac_bypass( &h->cabac ) ) {
6161 coeff_abs += 1 << j;
6162 j++;
6165 while( j-- ) {
6166 if( get_cabac_bypass( &h->cabac ) )
6167 coeff_abs += 1 << j ;
6171 if( !qmul ) {
6172 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
6173 else block[j] = coeff_abs;
6174 }else{
6175 if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
6176 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
6179 abslevelgt1++;
6182 return 0;
6185 static void inline compute_mb_neighbors(H264Context *h)
6187 MpegEncContext * const s = &h->s;
6188 const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
6189 h->top_mb_xy = mb_xy - s->mb_stride;
6190 h->left_mb_xy[0] = mb_xy - 1;
6191 if(FRAME_MBAFF){
6192 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
6193 const int top_pair_xy = pair_xy - s->mb_stride;
6194 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
6195 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
6196 const int curr_mb_frame_flag = !MB_FIELD;
6197 const int bottom = (s->mb_y & 1);
6198 if (bottom
6199 ? !curr_mb_frame_flag // bottom macroblock
6200 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
6202 h->top_mb_xy -= s->mb_stride;
6204 if (left_mb_frame_flag != curr_mb_frame_flag) {
6205 h->left_mb_xy[0] = pair_xy - 1;
6208 return;
6212 * decodes a macroblock
6213 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6215 static int decode_mb_cabac(H264Context *h) {
6216 MpegEncContext * const s = &h->s;
6217 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
6218 int mb_type, partition_count, cbp = 0;
6219 int dct8x8_allowed= h->pps.transform_8x8_mode;
6221 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
6223 tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
6224 if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6225 int skip;
6226 /* a skipped mb needs the aff flag from the following mb */
6227 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6228 predict_field_decoding_flag(h);
6229 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6230 skip = h->next_mb_skipped;
6231 else
6232 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
6233 /* read skip flags */
6234 if( skip ) {
6235 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6236 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6237 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6238 if(h->next_mb_skipped)
6239 predict_field_decoding_flag(h);
6240 else
6241 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6244 decode_mb_skip(h);
6246 h->cbp_table[mb_xy] = 0;
6247 h->chroma_pred_mode_table[mb_xy] = 0;
6248 h->last_qscale_diff = 0;
6250 return 0;
6254 if(FRAME_MBAFF){
6255 if( (s->mb_y&1) == 0 )
6256 h->mb_mbaff =
6257 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6258 }else
6259 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
6261 h->prev_mb_skipped = 0;
6263 compute_mb_neighbors(h);
6264 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
6265 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
6266 return -1;
6269 if( h->slice_type == B_TYPE ) {
6270 if( mb_type < 23 ){
6271 partition_count= b_mb_type_info[mb_type].partition_count;
6272 mb_type= b_mb_type_info[mb_type].type;
6273 }else{
6274 mb_type -= 23;
6275 goto decode_intra_mb;
6277 } else if( h->slice_type == P_TYPE ) {
6278 if( mb_type < 5) {
6279 partition_count= p_mb_type_info[mb_type].partition_count;
6280 mb_type= p_mb_type_info[mb_type].type;
6281 } else {
6282 mb_type -= 5;
6283 goto decode_intra_mb;
6285 } else {
6286 assert(h->slice_type == I_TYPE);
6287 decode_intra_mb:
6288 partition_count = 0;
6289 cbp= i_mb_type_info[mb_type].cbp;
6290 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
6291 mb_type= i_mb_type_info[mb_type].type;
6293 if(MB_FIELD)
6294 mb_type |= MB_TYPE_INTERLACED;
6296 h->slice_table[ mb_xy ]= h->slice_num;
6298 if(IS_INTRA_PCM(mb_type)) {
6299 const uint8_t *ptr;
6300 unsigned int x, y;
6302 // We assume these blocks are very rare so we dont optimize it.
6303 // FIXME The two following lines get the bitstream position in the cabac
6304 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6305 ptr= h->cabac.bytestream;
6306 if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
6308 // The pixels are stored in the same order as levels in h->mb array.
6309 for(y=0; y<16; y++){
6310 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
6311 for(x=0; x<16; x++){
6312 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
6313 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
6316 for(y=0; y<8; y++){
6317 const int index= 256 + 4*(y&3) + 32*(y>>2);
6318 for(x=0; x<8; x++){
6319 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
6320 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6323 for(y=0; y<8; y++){
6324 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
6325 for(x=0; x<8; x++){
6326 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
6327 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
6331 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
6333 // All blocks are present
6334 h->cbp_table[mb_xy] = 0x1ef;
6335 h->chroma_pred_mode_table[mb_xy] = 0;
6336 // In deblocking, the quantizer is 0
6337 s->current_picture.qscale_table[mb_xy]= 0;
6338 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
6339 // All coeffs are present
6340 memset(h->non_zero_count[mb_xy], 16, 16);
6341 s->current_picture.mb_type[mb_xy]= mb_type;
6342 return 0;
6345 if(MB_MBAFF){
6346 h->ref_count[0] <<= 1;
6347 h->ref_count[1] <<= 1;
6350 fill_caches(h, mb_type, 0);
6352 if( IS_INTRA( mb_type ) ) {
6353 int i;
6354 if( IS_INTRA4x4( mb_type ) ) {
6355 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
6356 mb_type |= MB_TYPE_8x8DCT;
6357 for( i = 0; i < 16; i+=4 ) {
6358 int pred = pred_intra_mode( h, i );
6359 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6360 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
6362 } else {
6363 for( i = 0; i < 16; i++ ) {
6364 int pred = pred_intra_mode( h, i );
6365 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
6367 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6370 write_back_intra_pred_mode(h);
6371 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
6372 } else {
6373 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
6374 if( h->intra16x16_pred_mode < 0 ) return -1;
6376 h->chroma_pred_mode_table[mb_xy] =
6377 h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h );
6379 h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
6380 if( h->chroma_pred_mode < 0 ) return -1;
6381 } else if( partition_count == 4 ) {
6382 int i, j, sub_partition_count[4], list, ref[2][4];
6384 if( h->slice_type == B_TYPE ) {
6385 for( i = 0; i < 4; i++ ) {
6386 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
6387 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6388 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6390 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
6391 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
6392 pred_direct_motion(h, &mb_type);
6393 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
6394 for( i = 0; i < 4; i++ )
6395 if( IS_DIRECT(h->sub_mb_type[i]) )
6396 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
6399 } else {
6400 for( i = 0; i < 4; i++ ) {
6401 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
6402 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
6403 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
6407 for( list = 0; list < 2; list++ ) {
6408 if( h->ref_count[list] > 0 ) {
6409 for( i = 0; i < 4; i++ ) {
6410 if(IS_DIRECT(h->sub_mb_type[i])) continue;
6411 if(IS_DIR(h->sub_mb_type[i], 0, list)){
6412 if( h->ref_count[list] > 1 )
6413 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
6414 else
6415 ref[list][i] = 0;
6416 } else {
6417 ref[list][i] = -1;
6419 h->ref_cache[list][ scan8[4*i]+1 ]=
6420 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
6425 if(dct8x8_allowed)
6426 dct8x8_allowed = get_dct8x8_allowed(h);
6428 for(list=0; list<2; list++){
6429 for(i=0; i<4; i++){
6430 if(IS_DIRECT(h->sub_mb_type[i])){
6431 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
6432 continue;
6434 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
6436 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
6437 const int sub_mb_type= h->sub_mb_type[i];
6438 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
6439 for(j=0; j<sub_partition_count[i]; j++){
6440 int mpx, mpy;
6441 int mx, my;
6442 const int index= 4*i + block_width*j;
6443 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
6444 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
6445 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
6447 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
6448 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
6449 tprintf("final mv:%d %d\n", mx, my);
6451 if(IS_SUB_8X8(sub_mb_type)){
6452 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
6453 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
6454 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
6455 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
6457 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]=
6458 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
6459 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]=
6460 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
6461 }else if(IS_SUB_8X4(sub_mb_type)){
6462 mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
6463 mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
6465 mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx;
6466 mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy;
6467 }else if(IS_SUB_4X8(sub_mb_type)){
6468 mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
6469 mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
6471 mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx;
6472 mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy;
6473 }else{
6474 assert(IS_SUB_4X4(sub_mb_type));
6475 mv_cache[ 0 ][0]= mx;
6476 mv_cache[ 0 ][1]= my;
6478 mvd_cache[ 0 ][0]= mx - mpx;
6479 mvd_cache[ 0 ][1]= my - mpy;
6482 }else{
6483 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
6484 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
6485 p[0] = p[1] = p[8] = p[9] = 0;
6486 pd[0]= pd[1]= pd[8]= pd[9]= 0;
6490 } else if( IS_DIRECT(mb_type) ) {
6491 pred_direct_motion(h, &mb_type);
6492 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
6493 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
6494 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
6495 } else {
6496 int list, mx, my, i, mpx, mpy;
6497 if(IS_16X16(mb_type)){
6498 for(list=0; list<2; list++){
6499 if(IS_DIR(mb_type, 0, list)){
6500 if(h->ref_count[list] > 0 ){
6501 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
6502 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
6504 }else
6505 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
6507 for(list=0; list<2; list++){
6508 if(IS_DIR(mb_type, 0, list)){
6509 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
6511 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
6512 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
6513 tprintf("final mv:%d %d\n", mx, my);
6515 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6516 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
6517 }else
6518 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
6521 else if(IS_16X8(mb_type)){
6522 for(list=0; list<2; list++){
6523 if(h->ref_count[list]>0){
6524 for(i=0; i<2; i++){
6525 if(IS_DIR(mb_type, i, list)){
6526 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
6527 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
6528 }else
6529 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
6533 for(list=0; list<2; list++){
6534 for(i=0; i<2; i++){
6535 if(IS_DIR(mb_type, i, list)){
6536 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
6537 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
6538 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
6539 tprintf("final mv:%d %d\n", mx, my);
6541 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
6542 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
6543 }else{
6544 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6545 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
6549 }else{
6550 assert(IS_8X16(mb_type));
6551 for(list=0; list<2; list++){
6552 if(h->ref_count[list]>0){
6553 for(i=0; i<2; i++){
6554 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
6555 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
6556 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
6557 }else
6558 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
6562 for(list=0; list<2; list++){
6563 for(i=0; i<2; i++){
6564 if(IS_DIR(mb_type, i, list)){
6565 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
6566 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
6567 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
6569 tprintf("final mv:%d %d\n", mx, my);
6570 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
6571 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
6572 }else{
6573 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6574 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
6581 if( IS_INTER( mb_type ) ) {
6582 h->chroma_pred_mode_table[mb_xy] = 0;
6583 write_back_motion( h, mb_type );
6586 if( !IS_INTRA16x16( mb_type ) ) {
6587 cbp = decode_cabac_mb_cbp_luma( h );
6588 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
6591 h->cbp_table[mb_xy] = cbp;
6593 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
6594 if( decode_cabac_mb_transform_size( h ) )
6595 mb_type |= MB_TYPE_8x8DCT;
6597 s->current_picture.mb_type[mb_xy]= mb_type;
6599 if( cbp || IS_INTRA16x16( mb_type ) ) {
6600 const uint8_t *scan, *scan8x8, *dc_scan;
6601 int dqp;
6603 if(IS_INTERLACED(mb_type)){
6604 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6605 scan= s->qscale ? h->field_scan : h->field_scan_q0;
6606 dc_scan= luma_dc_field_scan;
6607 }else{
6608 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6609 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6610 dc_scan= luma_dc_zigzag_scan;
6613 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6614 if( dqp == INT_MIN ){
6615 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
6616 return -1;
6618 s->qscale += dqp;
6619 if(((unsigned)s->qscale) > 51){
6620 if(s->qscale<0) s->qscale+= 52;
6621 else s->qscale-= 52;
6623 h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
6625 if( IS_INTRA16x16( mb_type ) ) {
6626 int i;
6627 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6628 if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
6629 return -1;
6630 if( cbp&15 ) {
6631 for( i = 0; i < 16; i++ ) {
6632 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6633 if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
6634 return -1;
6636 } else {
6637 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6639 } else {
6640 int i8x8, i4x4;
6641 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6642 if( cbp & (1<<i8x8) ) {
6643 if( IS_8x8DCT(mb_type) ) {
6644 if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6645 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
6646 return -1;
6647 } else
6648 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6649 const int index = 4*i8x8 + i4x4;
6650 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6651 if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
6652 return -1;
6654 } else {
6655 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6656 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6661 if( cbp&0x30 ){
6662 int c;
6663 for( c = 0; c < 2; c++ ) {
6664 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6665 if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
6666 return -1;
6670 if( cbp&0x20 ) {
6671 int c, i;
6672 for( c = 0; c < 2; c++ ) {
6673 for( i = 0; i < 4; i++ ) {
6674 const int index = 16 + 4 * c + i;
6675 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6676 if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
6677 return -1;
6680 } else {
6681 uint8_t * const nnz= &h->non_zero_count_cache[0];
6682 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6683 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6685 } else {
6686 uint8_t * const nnz= &h->non_zero_count_cache[0];
6687 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6688 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6689 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6690 h->last_qscale_diff = 0;
6693 s->current_picture.qscale_table[mb_xy]= s->qscale;
6694 write_back_non_zero_count(h);
6696 if(MB_MBAFF){
6697 h->ref_count[0] >>= 1;
6698 h->ref_count[1] >>= 1;
6701 return 0;
6705 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6706 int i, d;
6707 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6708 const int alpha = alpha_table[index_a];
6709 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6711 if( bS[0] < 4 ) {
6712 int8_t tc[4];
6713 for(i=0; i<4; i++)
6714 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6715 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6716 } else {
6717 /* 16px edge length, because bS=4 is triggered by being at
6718 * the edge of an intra MB, so all 4 bS are the same */
6719 for( d = 0; d < 16; d++ ) {
6720 const int p0 = pix[-1];
6721 const int p1 = pix[-2];
6722 const int p2 = pix[-3];
6724 const int q0 = pix[0];
6725 const int q1 = pix[1];
6726 const int q2 = pix[2];
6728 if( ABS( p0 - q0 ) < alpha &&
6729 ABS( p1 - p0 ) < beta &&
6730 ABS( q1 - q0 ) < beta ) {
6732 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6733 if( ABS( p2 - p0 ) < beta)
6735 const int p3 = pix[-4];
6736 /* p0', p1', p2' */
6737 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6738 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6739 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6740 } else {
6741 /* p0' */
6742 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6744 if( ABS( q2 - q0 ) < beta)
6746 const int q3 = pix[3];
6747 /* q0', q1', q2' */
6748 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6749 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6750 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6751 } else {
6752 /* q0' */
6753 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6755 }else{
6756 /* p0', q0' */
6757 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6758 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6760 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6762 pix += stride;
6766 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6767 int i;
6768 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6769 const int alpha = alpha_table[index_a];
6770 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6772 if( bS[0] < 4 ) {
6773 int8_t tc[4];
6774 for(i=0; i<4; i++)
6775 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
6776 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6777 } else {
6778 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6782 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6783 int i;
6784 for( i = 0; i < 16; i++, pix += stride) {
6785 int index_a;
6786 int alpha;
6787 int beta;
6789 int qp_index;
6790 int bS_index = (i >> 1);
6791 if (!MB_FIELD) {
6792 bS_index &= ~1;
6793 bS_index |= (i & 1);
6796 if( bS[bS_index] == 0 ) {
6797 continue;
6800 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6801 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6802 alpha = alpha_table[index_a];
6803 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6805 if( bS[bS_index] < 4 ) {
6806 const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6807 const int p0 = pix[-1];
6808 const int p1 = pix[-2];
6809 const int p2 = pix[-3];
6810 const int q0 = pix[0];
6811 const int q1 = pix[1];
6812 const int q2 = pix[2];
6814 if( ABS( p0 - q0 ) < alpha &&
6815 ABS( p1 - p0 ) < beta &&
6816 ABS( q1 - q0 ) < beta ) {
6817 int tc = tc0;
6818 int i_delta;
6820 if( ABS( p2 - p0 ) < beta ) {
6821 pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6822 tc++;
6824 if( ABS( q2 - q0 ) < beta ) {
6825 pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6826 tc++;
6829 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6830 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6831 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6832 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6834 }else{
6835 const int p0 = pix[-1];
6836 const int p1 = pix[-2];
6837 const int p2 = pix[-3];
6839 const int q0 = pix[0];
6840 const int q1 = pix[1];
6841 const int q2 = pix[2];
6843 if( ABS( p0 - q0 ) < alpha &&
6844 ABS( p1 - p0 ) < beta &&
6845 ABS( q1 - q0 ) < beta ) {
6847 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6848 if( ABS( p2 - p0 ) < beta)
6850 const int p3 = pix[-4];
6851 /* p0', p1', p2' */
6852 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6853 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6854 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6855 } else {
6856 /* p0' */
6857 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6859 if( ABS( q2 - q0 ) < beta)
6861 const int q3 = pix[3];
6862 /* q0', q1', q2' */
6863 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6864 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6865 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6866 } else {
6867 /* q0' */
6868 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6870 }else{
6871 /* p0', q0' */
6872 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6873 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6875 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6880 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6881 int i;
6882 for( i = 0; i < 8; i++, pix += stride) {
6883 int index_a;
6884 int alpha;
6885 int beta;
6887 int qp_index;
6888 int bS_index = i;
6890 if( bS[bS_index] == 0 ) {
6891 continue;
6894 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6895 index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6896 alpha = alpha_table[index_a];
6897 beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6899 if( bS[bS_index] < 4 ) {
6900 const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
6901 const int p0 = pix[-1];
6902 const int p1 = pix[-2];
6903 const int q0 = pix[0];
6904 const int q1 = pix[1];
6906 if( ABS( p0 - q0 ) < alpha &&
6907 ABS( p1 - p0 ) < beta &&
6908 ABS( q1 - q0 ) < beta ) {
6909 const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6911 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */
6912 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
6913 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6915 }else{
6916 const int p0 = pix[-1];
6917 const int p1 = pix[-2];
6918 const int q0 = pix[0];
6919 const int q1 = pix[1];
6921 if( ABS( p0 - q0 ) < alpha &&
6922 ABS( p1 - p0 ) < beta &&
6923 ABS( q1 - q0 ) < beta ) {
6925 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6926 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6927 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6933 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6934 int i, d;
6935 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6936 const int alpha = alpha_table[index_a];
6937 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6938 const int pix_next = stride;
6940 if( bS[0] < 4 ) {
6941 int8_t tc[4];
6942 for(i=0; i<4; i++)
6943 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1;
6944 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6945 } else {
6946 /* 16px edge length, see filter_mb_edgev */
6947 for( d = 0; d < 16; d++ ) {
6948 const int p0 = pix[-1*pix_next];
6949 const int p1 = pix[-2*pix_next];
6950 const int p2 = pix[-3*pix_next];
6951 const int q0 = pix[0];
6952 const int q1 = pix[1*pix_next];
6953 const int q2 = pix[2*pix_next];
6955 if( ABS( p0 - q0 ) < alpha &&
6956 ABS( p1 - p0 ) < beta &&
6957 ABS( q1 - q0 ) < beta ) {
6959 const int p3 = pix[-4*pix_next];
6960 const int q3 = pix[ 3*pix_next];
6962 if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6963 if( ABS( p2 - p0 ) < beta) {
6964 /* p0', p1', p2' */
6965 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6966 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6967 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6968 } else {
6969 /* p0' */
6970 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6972 if( ABS( q2 - q0 ) < beta) {
6973 /* q0', q1', q2' */
6974 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6975 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6976 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6977 } else {
6978 /* q0' */
6979 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6981 }else{
6982 /* p0', q0' */
6983 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6984 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6986 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6988 pix++;
6993 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
6994 int i;
6995 const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
6996 const int alpha = alpha_table[index_a];
6997 const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
6999 if( bS[0] < 4 ) {
7000 int8_t tc[4];
7001 for(i=0; i<4; i++)
7002 tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0;
7003 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
7004 } else {
7005 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
7009 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
7010 MpegEncContext * const s = &h->s;
7011 const int mb_xy= mb_x + mb_y*s->mb_stride;
7012 const int mb_type = s->current_picture.mb_type[mb_xy];
7013 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
7014 int first_vertical_edge_done = 0;
7015 int dir;
7016 /* FIXME: A given frame may occupy more than one position in
7017 * the reference list. So ref2frm should be populated with
7018 * frame numbers, not indices. */
7019 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7020 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7022 //for sufficiently low qp, filtering wouldn't do anything
7023 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7024 if(!FRAME_MBAFF){
7025 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
7026 int qp = s->current_picture.qscale_table[mb_xy];
7027 if(qp <= qp_thresh
7028 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
7029 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
7030 return;
7034 if (FRAME_MBAFF
7035 // left mb is in picture
7036 && h->slice_table[mb_xy-1] != 255
7037 // and current and left pair do not have the same interlaced type
7038 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
7039 // and left mb is in the same slice if deblocking_filter == 2
7040 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
7041 /* First vertical edge is different in MBAFF frames
7042 * There are 8 different bS to compute and 2 different Qp
7044 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
7045 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
7046 int bS[8];
7047 int qp[2];
7048 int chroma_qp[2];
7049 int mb_qp, mbn0_qp, mbn1_qp;
7050 int i;
7051 first_vertical_edge_done = 1;
7053 if( IS_INTRA(mb_type) )
7054 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
7055 else {
7056 for( i = 0; i < 8; i++ ) {
7057 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
7059 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
7060 bS[i] = 4;
7061 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
7062 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7063 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
7064 bS[i] = 2;
7065 else
7066 bS[i] = 1;
7070 mb_qp = s->current_picture.qscale_table[mb_xy];
7071 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
7072 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
7073 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
7074 chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7075 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
7076 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
7077 chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
7078 get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
7080 /* Filter edge */
7081 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
7082 { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7083 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
7084 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
7085 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
7087 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7088 for( dir = 0; dir < 2; dir++ )
7090 int edge;
7091 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
7092 const int mbm_type = s->current_picture.mb_type[mbm_xy];
7093 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
7095 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
7096 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
7097 // how often to recheck mv-based bS when iterating between edges
7098 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
7099 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
7100 // how often to recheck mv-based bS when iterating along each edge
7101 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
7103 if (first_vertical_edge_done) {
7104 start = 1;
7105 first_vertical_edge_done = 0;
7108 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
7109 start = 1;
7111 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
7112 && !IS_INTERLACED(mb_type)
7113 && IS_INTERLACED(mbm_type)
7115 // This is a special case in the norm where the filtering must
7116 // be done twice (one each of the field) even if we are in a
7117 // frame macroblock.
7119 static const int nnz_idx[4] = {4,5,6,3};
7120 unsigned int tmp_linesize = 2 * linesize;
7121 unsigned int tmp_uvlinesize = 2 * uvlinesize;
7122 int mbn_xy = mb_xy - 2 * s->mb_stride;
7123 int qp, chroma_qp;
7124 int i, j;
7125 int bS[4];
7127 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
7128 if( IS_INTRA(mb_type) ||
7129 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
7130 bS[0] = bS[1] = bS[2] = bS[3] = 3;
7131 } else {
7132 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
7133 for( i = 0; i < 4; i++ ) {
7134 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
7135 mbn_nnz[nnz_idx[i]] != 0 )
7136 bS[i] = 2;
7137 else
7138 bS[i] = 1;
7141 // Do not use s->qscale as luma quantizer because it has not the same
7142 // value in IPCM macroblocks.
7143 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7144 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
7145 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7146 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
7147 chroma_qp = ( h->chroma_qp +
7148 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7149 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7150 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
7153 start = 1;
7156 /* Calculate bS */
7157 for( edge = start; edge < edges; edge++ ) {
7158 /* mbn_xy: neighbor macroblock */
7159 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
7160 const int mbn_type = s->current_picture.mb_type[mbn_xy];
7161 int bS[4];
7162 int qp;
7164 if( (edge&1) && IS_8x8DCT(mb_type) )
7165 continue;
7167 if( IS_INTRA(mb_type) ||
7168 IS_INTRA(mbn_type) ) {
7169 int value;
7170 if (edge == 0) {
7171 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
7172 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
7174 value = 4;
7175 } else {
7176 value = 3;
7178 } else {
7179 value = 3;
7181 bS[0] = bS[1] = bS[2] = bS[3] = value;
7182 } else {
7183 int i, l;
7184 int mv_done;
7186 if( edge & mask_edge ) {
7187 bS[0] = bS[1] = bS[2] = bS[3] = 0;
7188 mv_done = 1;
7190 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
7191 bS[0] = bS[1] = bS[2] = bS[3] = 1;
7192 mv_done = 1;
7194 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
7195 int b_idx= 8 + 4 + edge * (dir ? 8:1);
7196 int bn_idx= b_idx - (dir ? 8:1);
7197 int v = 0;
7198 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
7199 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7200 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7201 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
7203 bS[0] = bS[1] = bS[2] = bS[3] = v;
7204 mv_done = 1;
7206 else
7207 mv_done = 0;
7209 for( i = 0; i < 4; i++ ) {
7210 int x = dir == 0 ? edge : i;
7211 int y = dir == 0 ? i : edge;
7212 int b_idx= 8 + 4 + x + 8*y;
7213 int bn_idx= b_idx - (dir ? 8:1);
7215 if( h->non_zero_count_cache[b_idx] != 0 ||
7216 h->non_zero_count_cache[bn_idx] != 0 ) {
7217 bS[i] = 2;
7219 else if(!mv_done)
7221 bS[i] = 0;
7222 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
7223 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
7224 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
7225 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
7226 bS[i] = 1;
7227 break;
7233 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
7234 continue;
7237 /* Filter edge */
7238 // Do not use s->qscale as luma quantizer because it has not the same
7239 // value in IPCM macroblocks.
7240 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
7241 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7242 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
7243 { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
7244 if( dir == 0 ) {
7245 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
7246 if( (edge&1) == 0 ) {
7247 int chroma_qp = ( h->chroma_qp +
7248 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7249 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
7250 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
7252 } else {
7253 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
7254 if( (edge&1) == 0 ) {
7255 int chroma_qp = ( h->chroma_qp +
7256 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
7257 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7258 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
7265 static int decode_slice(H264Context *h){
7266 MpegEncContext * const s = &h->s;
7267 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
7269 s->mb_skip_run= -1;
7271 if( h->pps.cabac ) {
7272 int i;
7274 /* realign */
7275 align_get_bits( &s->gb );
7277 /* init cabac */
7278 ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 );
7279 ff_init_cabac_decoder( &h->cabac,
7280 s->gb.buffer + get_bits_count(&s->gb)/8,
7281 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
7282 /* calculate pre-state */
7283 for( i= 0; i < 460; i++ ) {
7284 int pre;
7285 if( h->slice_type == I_TYPE )
7286 pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
7287 else
7288 pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
7290 if( pre <= 63 )
7291 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
7292 else
7293 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7296 for(;;){
7297 int ret = decode_mb_cabac(h);
7298 int eos;
7300 if(ret>=0) hl_decode_mb(h);
7302 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7303 s->mb_y++;
7305 if(ret>=0) ret = decode_mb_cabac(h);
7307 if(ret>=0) hl_decode_mb(h);
7308 s->mb_y--;
7310 eos = get_cabac_terminate( &h->cabac );
7312 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
7313 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
7314 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7315 return -1;
7318 if( ++s->mb_x >= s->mb_width ) {
7319 s->mb_x = 0;
7320 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7321 ++s->mb_y;
7322 if(FRAME_MBAFF) {
7323 ++s->mb_y;
7327 if( eos || s->mb_y >= s->mb_height ) {
7328 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7329 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7330 return 0;
7334 } else {
7335 for(;;){
7336 int ret = decode_mb_cavlc(h);
7338 if(ret>=0) hl_decode_mb(h);
7340 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
7341 s->mb_y++;
7342 ret = decode_mb_cavlc(h);
7344 if(ret>=0) hl_decode_mb(h);
7345 s->mb_y--;
7348 if(ret<0){
7349 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7350 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7352 return -1;
7355 if(++s->mb_x >= s->mb_width){
7356 s->mb_x=0;
7357 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7358 ++s->mb_y;
7359 if(FRAME_MBAFF) {
7360 ++s->mb_y;
7362 if(s->mb_y >= s->mb_height){
7363 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7365 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
7366 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7368 return 0;
7369 }else{
7370 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7372 return -1;
7377 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
7378 tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
7379 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
7380 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7382 return 0;
7383 }else{
7384 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7386 return -1;
7392 #if 0
7393 for(;s->mb_y < s->mb_height; s->mb_y++){
7394 for(;s->mb_x < s->mb_width; s->mb_x++){
7395 int ret= decode_mb(h);
7397 hl_decode_mb(h);
7399 if(ret<0){
7400 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
7401 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7403 return -1;
7406 if(++s->mb_x >= s->mb_width){
7407 s->mb_x=0;
7408 if(++s->mb_y >= s->mb_height){
7409 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7410 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7412 return 0;
7413 }else{
7414 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7416 return -1;
7421 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
7422 if(get_bits_count(s->gb) == s->gb.size_in_bits){
7423 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
7425 return 0;
7426 }else{
7427 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
7429 return -1;
7433 s->mb_x=0;
7434 ff_draw_horiz_band(s, 16*s->mb_y, 16);
7436 #endif
7437 return -1; //not reached
7440 static int decode_unregistered_user_data(H264Context *h, int size){
7441 MpegEncContext * const s = &h->s;
7442 uint8_t user_data[16+256];
7443 int e, build, i;
7445 if(size<16)
7446 return -1;
7448 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
7449 user_data[i]= get_bits(&s->gb, 8);
7452 user_data[i]= 0;
7453 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
7454 if(e==1 && build>=0)
7455 h->x264_build= build;
7457 if(s->avctx->debug & FF_DEBUG_BUGS)
7458 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
7460 for(; i<size; i++)
7461 skip_bits(&s->gb, 8);
7463 return 0;
7466 static int decode_sei(H264Context *h){
7467 MpegEncContext * const s = &h->s;
7469 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
7470 int size, type;
7472 type=0;
7474 type+= show_bits(&s->gb, 8);
7475 }while(get_bits(&s->gb, 8) == 255);
7477 size=0;
7479 size+= show_bits(&s->gb, 8);
7480 }while(get_bits(&s->gb, 8) == 255);
7482 switch(type){
7483 case 5:
7484 if(decode_unregistered_user_data(h, size) < 0)
7485 return -1;
7486 break;
7487 default:
7488 skip_bits(&s->gb, 8*size);
7491 //FIXME check bits here
7492 align_get_bits(&s->gb);
7495 return 0;
7498 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
7499 MpegEncContext * const s = &h->s;
7500 int cpb_count, i;
7501 cpb_count = get_ue_golomb(&s->gb) + 1;
7502 get_bits(&s->gb, 4); /* bit_rate_scale */
7503 get_bits(&s->gb, 4); /* cpb_size_scale */
7504 for(i=0; i<cpb_count; i++){
7505 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7506 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7507 get_bits1(&s->gb); /* cbr_flag */
7509 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7510 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7511 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7512 get_bits(&s->gb, 5); /* time_offset_length */
7515 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7516 MpegEncContext * const s = &h->s;
7517 int aspect_ratio_info_present_flag, aspect_ratio_idc;
7518 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7520 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7522 if( aspect_ratio_info_present_flag ) {
7523 aspect_ratio_idc= get_bits(&s->gb, 8);
7524 if( aspect_ratio_idc == EXTENDED_SAR ) {
7525 sps->sar.num= get_bits(&s->gb, 16);
7526 sps->sar.den= get_bits(&s->gb, 16);
7527 }else if(aspect_ratio_idc < 14){
7528 sps->sar= pixel_aspect[aspect_ratio_idc];
7529 }else{
7530 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7531 return -1;
7533 }else{
7534 sps->sar.num=
7535 sps->sar.den= 0;
7537 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7539 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7540 get_bits1(&s->gb); /* overscan_appropriate_flag */
7543 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7544 get_bits(&s->gb, 3); /* video_format */
7545 get_bits1(&s->gb); /* video_full_range_flag */
7546 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7547 get_bits(&s->gb, 8); /* colour_primaries */
7548 get_bits(&s->gb, 8); /* transfer_characteristics */
7549 get_bits(&s->gb, 8); /* matrix_coefficients */
7553 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7554 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7555 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7558 sps->timing_info_present_flag = get_bits1(&s->gb);
7559 if(sps->timing_info_present_flag){
7560 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7561 sps->time_scale = get_bits_long(&s->gb, 32);
7562 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7565 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7566 if(nal_hrd_parameters_present_flag)
7567 decode_hrd_parameters(h, sps);
7568 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7569 if(vcl_hrd_parameters_present_flag)
7570 decode_hrd_parameters(h, sps);
7571 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7572 get_bits1(&s->gb); /* low_delay_hrd_flag */
7573 get_bits1(&s->gb); /* pic_struct_present_flag */
7575 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7576 if(sps->bitstream_restriction_flag){
7577 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7578 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7579 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7580 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7581 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7582 sps->num_reorder_frames = get_ue_golomb(&s->gb);
7583 get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
7586 return 0;
7589 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7590 const uint8_t *jvt_list, const uint8_t *fallback_list){
7591 MpegEncContext * const s = &h->s;
7592 int i, last = 8, next = 8;
7593 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7594 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7595 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7596 else
7597 for(i=0;i<size;i++){
7598 if(next)
7599 next = (last + get_se_golomb(&s->gb)) & 0xff;
7600 if(!i && !next){ /* matrix not written, we use the preset one */
7601 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7602 break;
7604 last = factors[scan[i]] = next ? next : last;
7608 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7609 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7610 MpegEncContext * const s = &h->s;
7611 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7612 const uint8_t *fallback[4] = {
7613 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7614 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7615 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7616 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7618 if(get_bits1(&s->gb)){
7619 sps->scaling_matrix_present |= is_sps;
7620 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7621 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7622 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7623 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7624 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7625 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7626 if(is_sps || pps->transform_8x8_mode){
7627 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7628 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7630 } else if(fallback_sps) {
7631 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7632 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7636 static inline int decode_seq_parameter_set(H264Context *h){
7637 MpegEncContext * const s = &h->s;
7638 int profile_idc, level_idc;
7639 int sps_id, i;
7640 SPS *sps;
7642 profile_idc= get_bits(&s->gb, 8);
7643 get_bits1(&s->gb); //constraint_set0_flag
7644 get_bits1(&s->gb); //constraint_set1_flag
7645 get_bits1(&s->gb); //constraint_set2_flag
7646 get_bits1(&s->gb); //constraint_set3_flag
7647 get_bits(&s->gb, 4); // reserved
7648 level_idc= get_bits(&s->gb, 8);
7649 sps_id= get_ue_golomb(&s->gb);
7651 sps= &h->sps_buffer[ sps_id ];
7652 sps->profile_idc= profile_idc;
7653 sps->level_idc= level_idc;
7655 if(sps->profile_idc >= 100){ //high profile
7656 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7657 get_bits1(&s->gb); //residual_color_transform_flag
7658 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7659 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7660 sps->transform_bypass = get_bits1(&s->gb);
7661 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7662 }else
7663 sps->scaling_matrix_present = 0;
7665 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7666 sps->poc_type= get_ue_golomb(&s->gb);
7668 if(sps->poc_type == 0){ //FIXME #define
7669 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7670 } else if(sps->poc_type == 1){//FIXME #define
7671 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7672 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7673 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7674 sps->poc_cycle_length= get_ue_golomb(&s->gb);
7676 for(i=0; i<sps->poc_cycle_length; i++)
7677 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7679 if(sps->poc_type > 2){
7680 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7681 return -1;
7684 sps->ref_frame_count= get_ue_golomb(&s->gb);
7685 if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){
7686 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7688 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7689 sps->mb_width= get_ue_golomb(&s->gb) + 1;
7690 sps->mb_height= get_ue_golomb(&s->gb) + 1;
7691 if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
7692 avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
7693 return -1;
7695 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7696 if(!sps->frame_mbs_only_flag)
7697 sps->mb_aff= get_bits1(&s->gb);
7698 else
7699 sps->mb_aff= 0;
7701 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7703 #ifndef ALLOW_INTERLACE
7704 if(sps->mb_aff)
7705 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it compilation time\n");
7706 #endif
7707 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7708 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7710 sps->crop= get_bits1(&s->gb);
7711 if(sps->crop){
7712 sps->crop_left = get_ue_golomb(&s->gb);
7713 sps->crop_right = get_ue_golomb(&s->gb);
7714 sps->crop_top = get_ue_golomb(&s->gb);
7715 sps->crop_bottom= get_ue_golomb(&s->gb);
7716 if(sps->crop_left || sps->crop_top){
7717 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7719 }else{
7720 sps->crop_left =
7721 sps->crop_right =
7722 sps->crop_top =
7723 sps->crop_bottom= 0;
7726 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7727 if( sps->vui_parameters_present_flag )
7728 decode_vui_parameters(h, sps);
7730 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7731 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7732 sps_id, sps->profile_idc, sps->level_idc,
7733 sps->poc_type,
7734 sps->ref_frame_count,
7735 sps->mb_width, sps->mb_height,
7736 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7737 sps->direct_8x8_inference_flag ? "8B8" : "",
7738 sps->crop_left, sps->crop_right,
7739 sps->crop_top, sps->crop_bottom,
7740 sps->vui_parameters_present_flag ? "VUI" : ""
7743 return 0;
7746 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7747 MpegEncContext * const s = &h->s;
7748 int pps_id= get_ue_golomb(&s->gb);
7749 PPS *pps= &h->pps_buffer[pps_id];
7751 pps->sps_id= get_ue_golomb(&s->gb);
7752 pps->cabac= get_bits1(&s->gb);
7753 pps->pic_order_present= get_bits1(&s->gb);
7754 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7755 if(pps->slice_group_count > 1 ){
7756 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7757 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7758 switch(pps->mb_slice_group_map_type){
7759 case 0:
7760 #if 0
7761 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7762 | run_length[ i ] |1 |ue(v) |
7763 #endif
7764 break;
7765 case 2:
7766 #if 0
7767 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7768 |{ | | |
7769 | top_left_mb[ i ] |1 |ue(v) |
7770 | bottom_right_mb[ i ] |1 |ue(v) |
7771 | } | | |
7772 #endif
7773 break;
7774 case 3:
7775 case 4:
7776 case 5:
7777 #if 0
7778 | slice_group_change_direction_flag |1 |u(1) |
7779 | slice_group_change_rate_minus1 |1 |ue(v) |
7780 #endif
7781 break;
7782 case 6:
7783 #if 0
7784 | slice_group_id_cnt_minus1 |1 |ue(v) |
7785 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7786 |) | | |
7787 | slice_group_id[ i ] |1 |u(v) |
7788 #endif
7789 break;
7792 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7793 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7794 if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
7795 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7796 return -1;
7799 pps->weighted_pred= get_bits1(&s->gb);
7800 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7801 pps->init_qp= get_se_golomb(&s->gb) + 26;
7802 pps->init_qs= get_se_golomb(&s->gb) + 26;
7803 pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
7804 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7805 pps->constrained_intra_pred= get_bits1(&s->gb);
7806 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7808 pps->transform_8x8_mode= 0;
7809 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7810 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7811 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7813 if(get_bits_count(&s->gb) < bit_length){
7814 pps->transform_8x8_mode= get_bits1(&s->gb);
7815 decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7816 get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7819 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7820 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7821 pps_id, pps->sps_id,
7822 pps->cabac ? "CABAC" : "CAVLC",
7823 pps->slice_group_count,
7824 pps->ref_count[0], pps->ref_count[1],
7825 pps->weighted_pred ? "weighted" : "",
7826 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
7827 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7828 pps->constrained_intra_pred ? "CONSTR" : "",
7829 pps->redundant_pic_cnt_present ? "REDU" : "",
7830 pps->transform_8x8_mode ? "8x8DCT" : ""
7834 return 0;
7838 * finds the end of the current frame in the bitstream.
7839 * @return the position of the first byte of the next frame, or -1
7841 static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
7842 int i;
7843 uint32_t state;
7844 ParseContext *pc = &(h->s.parse_context);
7845 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7846 // mb_addr= pc->mb_addr - 1;
7847 state= pc->state;
7848 for(i=0; i<=buf_size; i++){
7849 if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7850 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i);
7851 if(pc->frame_start_found){
7852 // If there isn't one more byte in the buffer
7853 // the test on first_mb_in_slice cannot be done yet
7854 // do it at next call.
7855 if (i >= buf_size) break;
7856 if (buf[i] & 0x80) {
7857 // first_mb_in_slice is 0, probably the first nal of a new
7858 // slice
7859 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
7860 pc->state=-1;
7861 pc->frame_start_found= 0;
7862 return i-4;
7865 pc->frame_start_found = 1;
7867 if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
7868 if(pc->frame_start_found){
7869 pc->state=-1;
7870 pc->frame_start_found= 0;
7871 return i-4;
7874 if (i<buf_size)
7875 state= (state<<8) | buf[i];
7878 pc->state= state;
7879 return END_NOT_FOUND;
7882 static int h264_parse(AVCodecParserContext *s,
7883 AVCodecContext *avctx,
7884 uint8_t **poutbuf, int *poutbuf_size,
7885 const uint8_t *buf, int buf_size)
7887 H264Context *h = s->priv_data;
7888 ParseContext *pc = &h->s.parse_context;
7889 int next;
7891 next= find_frame_end(h, buf, buf_size);
7893 if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
7894 *poutbuf = NULL;
7895 *poutbuf_size = 0;
7896 return buf_size;
7899 *poutbuf = (uint8_t *)buf;
7900 *poutbuf_size = buf_size;
7901 return next;
7904 static int h264_split(AVCodecContext *avctx,
7905 const uint8_t *buf, int buf_size)
7907 int i;
7908 uint32_t state = -1;
7909 int has_sps= 0;
7911 for(i=0; i<=buf_size; i++){
7912 if((state&0xFFFFFF1F) == 0x107)
7913 has_sps=1;
7914 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7916 if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
7917 if(has_sps){
7918 while(i>4 && buf[i-5]==0) i--;
7919 return i-4;
7922 if (i<buf_size)
7923 state= (state<<8) | buf[i];
7925 return 0;
7929 static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
7930 MpegEncContext * const s = &h->s;
7931 AVCodecContext * const avctx= s->avctx;
7932 int buf_index=0;
7933 #if 0
7934 int i;
7935 for(i=0; i<50; i++){
7936 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7938 #endif
7939 h->slice_num = 0;
7940 s->current_picture_ptr= NULL;
7941 for(;;){
7942 int consumed;
7943 int dst_length;
7944 int bit_length;
7945 uint8_t *ptr;
7946 int i, nalsize = 0;
7948 if(h->is_avc) {
7949 if(buf_index >= buf_size) break;
7950 nalsize = 0;
7951 for(i = 0; i < h->nal_length_size; i++)
7952 nalsize = (nalsize << 8) | buf[buf_index++];
7953 if(nalsize <= 1){
7954 if(nalsize == 1){
7955 buf_index++;
7956 continue;
7957 }else{
7958 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7959 break;
7962 } else {
7963 // start code prefix search
7964 for(; buf_index + 3 < buf_size; buf_index++){
7965 // this should allways succeed in the first iteration
7966 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7967 break;
7970 if(buf_index+3 >= buf_size) break;
7972 buf_index+=3;
7975 ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7976 if(ptr[dst_length - 1] == 0) dst_length--;
7977 bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
7979 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7980 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
7983 if (h->is_avc && (nalsize != consumed))
7984 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7986 buf_index += consumed;
7988 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
7989 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7990 continue;
7992 switch(h->nal_unit_type){
7993 case NAL_IDR_SLICE:
7994 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7995 case NAL_SLICE:
7996 init_get_bits(&s->gb, ptr, bit_length);
7997 h->intra_gb_ptr=
7998 h->inter_gb_ptr= &s->gb;
7999 s->data_partitioning = 0;
8001 if(decode_slice_header(h) < 0){
8002 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8003 break;
8005 s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
8006 if(h->redundant_pic_count==0 && s->hurry_up < 5
8007 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8008 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8009 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8010 && avctx->skip_frame < AVDISCARD_ALL)
8011 decode_slice(h);
8012 break;
8013 case NAL_DPA:
8014 init_get_bits(&s->gb, ptr, bit_length);
8015 h->intra_gb_ptr=
8016 h->inter_gb_ptr= NULL;
8017 s->data_partitioning = 1;
8019 if(decode_slice_header(h) < 0){
8020 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
8022 break;
8023 case NAL_DPB:
8024 init_get_bits(&h->intra_gb, ptr, bit_length);
8025 h->intra_gb_ptr= &h->intra_gb;
8026 break;
8027 case NAL_DPC:
8028 init_get_bits(&h->inter_gb, ptr, bit_length);
8029 h->inter_gb_ptr= &h->inter_gb;
8031 if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
8032 && s->hurry_up < 5
8033 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
8034 && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
8035 && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
8036 && avctx->skip_frame < AVDISCARD_ALL)
8037 decode_slice(h);
8038 break;
8039 case NAL_SEI:
8040 init_get_bits(&s->gb, ptr, bit_length);
8041 decode_sei(h);
8042 break;
8043 case NAL_SPS:
8044 init_get_bits(&s->gb, ptr, bit_length);
8045 decode_seq_parameter_set(h);
8047 if(s->flags& CODEC_FLAG_LOW_DELAY)
8048 s->low_delay=1;
8050 if(avctx->has_b_frames < 2)
8051 avctx->has_b_frames= !s->low_delay;
8052 break;
8053 case NAL_PPS:
8054 init_get_bits(&s->gb, ptr, bit_length);
8056 decode_picture_parameter_set(h, bit_length);
8058 break;
8059 case NAL_AUD:
8060 case NAL_END_SEQUENCE:
8061 case NAL_END_STREAM:
8062 case NAL_FILLER_DATA:
8063 case NAL_SPS_EXT:
8064 case NAL_AUXILIARY_SLICE:
8065 break;
8066 default:
8067 av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
8071 if(!s->current_picture_ptr) return buf_index; //no frame
8073 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
8074 s->current_picture_ptr->pict_type= s->pict_type;
8076 h->prev_frame_num_offset= h->frame_num_offset;
8077 h->prev_frame_num= h->frame_num;
8078 if(s->current_picture_ptr->reference){
8079 h->prev_poc_msb= h->poc_msb;
8080 h->prev_poc_lsb= h->poc_lsb;
8082 if(s->current_picture_ptr->reference)
8083 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
8085 ff_er_frame_end(s);
8087 MPV_frame_end(s);
8089 return buf_index;
8093 * returns the number of bytes consumed for building the current frame
8095 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
8096 if(s->flags&CODEC_FLAG_TRUNCATED){
8097 pos -= s->parse_context.last_index;
8098 if(pos<0) pos=0; // FIXME remove (unneeded?)
8100 return pos;
8101 }else{
8102 if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
8103 if(pos+10>buf_size) pos=buf_size; // oops ;)
8105 return pos;
8109 static int decode_frame(AVCodecContext *avctx,
8110 void *data, int *data_size,
8111 uint8_t *buf, int buf_size)
8113 H264Context *h = avctx->priv_data;
8114 MpegEncContext *s = &h->s;
8115 AVFrame *pict = data;
8116 int buf_index;
8118 s->flags= avctx->flags;
8119 s->flags2= avctx->flags2;
8121 /* no supplementary picture */
8122 if (buf_size == 0) {
8123 return 0;
8126 if(s->flags&CODEC_FLAG_TRUNCATED){
8127 int next= find_frame_end(h, buf, buf_size);
8129 if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
8130 return buf_size;
8131 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8134 if(h->is_avc && !h->got_avcC) {
8135 int i, cnt, nalsize;
8136 unsigned char *p = avctx->extradata;
8137 if(avctx->extradata_size < 7) {
8138 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
8139 return -1;
8141 if(*p != 1) {
8142 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
8143 return -1;
8145 /* sps and pps in the avcC always have length coded with 2 bytes,
8146 so put a fake nal_length_size = 2 while parsing them */
8147 h->nal_length_size = 2;
8148 // Decode sps from avcC
8149 cnt = *(p+5) & 0x1f; // Number of sps
8150 p += 6;
8151 for (i = 0; i < cnt; i++) {
8152 nalsize = BE_16(p) + 2;
8153 if(decode_nal_units(h, p, nalsize) < 0) {
8154 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
8155 return -1;
8157 p += nalsize;
8159 // Decode pps from avcC
8160 cnt = *(p++); // Number of pps
8161 for (i = 0; i < cnt; i++) {
8162 nalsize = BE_16(p) + 2;
8163 if(decode_nal_units(h, p, nalsize) != nalsize) {
8164 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
8165 return -1;
8167 p += nalsize;
8169 // Now store right nal length size, that will be use to parse all other nals
8170 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
8171 // Do not reparse avcC
8172 h->got_avcC = 1;
8175 if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
8176 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
8177 return -1;
8180 buf_index=decode_nal_units(h, buf, buf_size);
8181 if(buf_index < 0)
8182 return -1;
8184 //FIXME do something with unavailable reference frames
8186 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8187 if(!s->current_picture_ptr){
8188 av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
8189 return -1;
8193 Picture *out = s->current_picture_ptr;
8194 #if 0 //decode order
8195 *data_size = sizeof(AVFrame);
8196 #else
8197 /* Sort B-frames into display order */
8198 Picture *cur = s->current_picture_ptr;
8199 Picture *prev = h->delayed_output_pic;
8200 int i, pics, cross_idr, out_of_order, out_idx;
8202 if(h->sps.bitstream_restriction_flag
8203 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
8204 s->avctx->has_b_frames = h->sps.num_reorder_frames;
8205 s->low_delay = 0;
8208 pics = 0;
8209 while(h->delayed_pic[pics]) pics++;
8210 h->delayed_pic[pics++] = cur;
8211 if(cur->reference == 0)
8212 cur->reference = 1;
8214 cross_idr = 0;
8215 for(i=0; h->delayed_pic[i]; i++)
8216 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
8217 cross_idr = 1;
8219 out = h->delayed_pic[0];
8220 out_idx = 0;
8221 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
8222 if(h->delayed_pic[i]->poc < out->poc){
8223 out = h->delayed_pic[i];
8224 out_idx = i;
8227 out_of_order = !cross_idr && prev && out->poc < prev->poc;
8228 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
8230 else if(prev && pics <= s->avctx->has_b_frames)
8231 out = prev;
8232 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
8233 || (s->low_delay &&
8234 ((!cross_idr && prev && out->poc > prev->poc + 2)
8235 || cur->pict_type == B_TYPE)))
8237 s->low_delay = 0;
8238 s->avctx->has_b_frames++;
8239 out = prev;
8241 else if(out_of_order)
8242 out = prev;
8244 if(out_of_order || pics > s->avctx->has_b_frames){
8245 for(i=out_idx; h->delayed_pic[i]; i++)
8246 h->delayed_pic[i] = h->delayed_pic[i+1];
8249 if(prev == out)
8250 *data_size = 0;
8251 else
8252 *data_size = sizeof(AVFrame);
8253 if(prev && prev != out && prev->reference == 1)
8254 prev->reference = 0;
8255 h->delayed_output_pic = out;
8256 #endif
8258 if(out)
8259 *pict= *(AVFrame*)out;
8260 else
8261 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
8264 assert(pict->data[0] || !*data_size);
8265 ff_print_debug_info(s, pict);
8266 //printf("out %d\n", (int)pict->data[0]);
8267 #if 0 //?
8269 /* Return the Picture timestamp as the frame number */
8270 /* we substract 1 because it is added on utils.c */
8271 avctx->frame_number = s->picture_number - 1;
8272 #endif
8273 return get_consumed_bytes(s, buf_index, buf_size);
8275 #if 0
8276 static inline void fill_mb_avail(H264Context *h){
8277 MpegEncContext * const s = &h->s;
8278 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
8280 if(s->mb_y){
8281 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
8282 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
8283 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
8284 }else{
8285 h->mb_avail[0]=
8286 h->mb_avail[1]=
8287 h->mb_avail[2]= 0;
8289 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
8290 h->mb_avail[4]= 1; //FIXME move out
8291 h->mb_avail[5]= 0; //FIXME move out
8293 #endif
8295 #if 0 //selftest
8296 #define COUNT 8000
8297 #define SIZE (COUNT*40)
8298 int main(){
8299 int i;
8300 uint8_t temp[SIZE];
8301 PutBitContext pb;
8302 GetBitContext gb;
8303 // int int_temp[10000];
8304 DSPContext dsp;
8305 AVCodecContext avctx;
8307 dsputil_init(&dsp, &avctx);
8309 init_put_bits(&pb, temp, SIZE);
8310 printf("testing unsigned exp golomb\n");
8311 for(i=0; i<COUNT; i++){
8312 START_TIMER
8313 set_ue_golomb(&pb, i);
8314 STOP_TIMER("set_ue_golomb");
8316 flush_put_bits(&pb);
8318 init_get_bits(&gb, temp, 8*SIZE);
8319 for(i=0; i<COUNT; i++){
8320 int j, s;
8322 s= show_bits(&gb, 24);
8324 START_TIMER
8325 j= get_ue_golomb(&gb);
8326 if(j != i){
8327 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8328 // return -1;
8330 STOP_TIMER("get_ue_golomb");
8334 init_put_bits(&pb, temp, SIZE);
8335 printf("testing signed exp golomb\n");
8336 for(i=0; i<COUNT; i++){
8337 START_TIMER
8338 set_se_golomb(&pb, i - COUNT/2);
8339 STOP_TIMER("set_se_golomb");
8341 flush_put_bits(&pb);
8343 init_get_bits(&gb, temp, 8*SIZE);
8344 for(i=0; i<COUNT; i++){
8345 int j, s;
8347 s= show_bits(&gb, 24);
8349 START_TIMER
8350 j= get_se_golomb(&gb);
8351 if(j != i - COUNT/2){
8352 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
8353 // return -1;
8355 STOP_TIMER("get_se_golomb");
8358 printf("testing 4x4 (I)DCT\n");
8360 DCTELEM block[16];
8361 uint8_t src[16], ref[16];
8362 uint64_t error= 0, max_error=0;
8364 for(i=0; i<COUNT; i++){
8365 int j;
8366 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8367 for(j=0; j<16; j++){
8368 ref[j]= random()%255;
8369 src[j]= random()%255;
8372 h264_diff_dct_c(block, src, ref, 4);
8374 //normalize
8375 for(j=0; j<16; j++){
8376 // printf("%d ", block[j]);
8377 block[j]= block[j]*4;
8378 if(j&1) block[j]= (block[j]*4 + 2)/5;
8379 if(j&4) block[j]= (block[j]*4 + 2)/5;
8381 // printf("\n");
8383 s->dsp.h264_idct_add(ref, block, 4);
8384 /* for(j=0; j<16; j++){
8385 printf("%d ", ref[j]);
8387 printf("\n");*/
8389 for(j=0; j<16; j++){
8390 int diff= ABS(src[j] - ref[j]);
8392 error+= diff*diff;
8393 max_error= FFMAX(max_error, diff);
8396 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8397 #if 0
8398 printf("testing quantizer\n");
8399 for(qp=0; qp<52; qp++){
8400 for(i=0; i<16; i++)
8401 src1_block[i]= src2_block[i]= random()%255;
8404 #endif
8405 printf("Testing NAL layer\n");
8407 uint8_t bitstream[COUNT];
8408 uint8_t nal[COUNT*2];
8409 H264Context h;
8410 memset(&h, 0, sizeof(H264Context));
8412 for(i=0; i<COUNT; i++){
8413 int zeros= i;
8414 int nal_length;
8415 int consumed;
8416 int out_length;
8417 uint8_t *out;
8418 int j;
8420 for(j=0; j<COUNT; j++){
8421 bitstream[j]= (random() % 255) + 1;
8424 for(j=0; j<zeros; j++){
8425 int pos= random() % COUNT;
8426 while(bitstream[pos] == 0){
8427 pos++;
8428 pos %= COUNT;
8430 bitstream[pos]=0;
8433 START_TIMER
8435 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8436 if(nal_length<0){
8437 printf("encoding failed\n");
8438 return -1;
8441 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8443 STOP_TIMER("NAL")
8445 if(out_length != COUNT){
8446 printf("incorrect length %d %d\n", out_length, COUNT);
8447 return -1;
8450 if(consumed != nal_length){
8451 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8452 return -1;
8455 if(memcmp(bitstream, out, COUNT)){
8456 printf("missmatch\n");
8457 return -1;
8461 printf("Testing RBSP\n");
8464 return 0;
8466 #endif
8469 static int decode_end(AVCodecContext *avctx)
8471 H264Context *h = avctx->priv_data;
8472 MpegEncContext *s = &h->s;
8474 av_freep(&h->rbsp_buffer);
8475 free_tables(h); //FIXME cleanup init stuff perhaps
8476 MPV_common_end(s);
8478 // memset(h, 0, sizeof(H264Context));
8480 return 0;
8484 AVCodec h264_decoder = {
8485 "h264",
8486 CODEC_TYPE_VIDEO,
8487 CODEC_ID_H264,
8488 sizeof(H264Context),
8489 decode_init,
8490 NULL,
8491 decode_end,
8492 decode_frame,
8493 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8494 .flush= flush_dpb,
8497 AVCodecParser h264_parser = {
8498 { CODEC_ID_H264 },
8499 sizeof(H264Context),
8500 NULL,
8501 h264_parse,
8502 ff_parse_close,
8503 h264_split,
8506 #include "svq3.c"