2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
57 /* Compiling in interlaced support reduces the speed
58 * of progressive decoding by about 2%. */
59 #define ALLOW_INTERLACE
61 #ifdef ALLOW_INTERLACE
62 #define MB_MBAFF h->mb_mbaff
63 #define MB_FIELD h->mb_field_decoding_flag
64 #define FRAME_MBAFF h->mb_aff_frame
70 #define IS_INTERLACED(mb_type) 0
74 * Sequence parameter set
80 int transform_bypass
; ///< qpprime_y_zero_transform_bypass_flag
81 int log2_max_frame_num
; ///< log2_max_frame_num_minus4 + 4
82 int poc_type
; ///< pic_order_cnt_type
83 int log2_max_poc_lsb
; ///< log2_max_pic_order_cnt_lsb_minus4
84 int delta_pic_order_always_zero_flag
;
85 int offset_for_non_ref_pic
;
86 int offset_for_top_to_bottom_field
;
87 int poc_cycle_length
; ///< num_ref_frames_in_pic_order_cnt_cycle
88 int ref_frame_count
; ///< num_ref_frames
89 int gaps_in_frame_num_allowed_flag
;
90 int mb_width
; ///< frame_width_in_mbs_minus1 + 1
91 int mb_height
; ///< frame_height_in_mbs_minus1 + 1
92 int frame_mbs_only_flag
;
93 int mb_aff
; ///<mb_adaptive_frame_field_flag
94 int direct_8x8_inference_flag
;
95 int crop
; ///< frame_cropping_flag
96 int crop_left
; ///< frame_cropping_rect_left_offset
97 int crop_right
; ///< frame_cropping_rect_right_offset
98 int crop_top
; ///< frame_cropping_rect_top_offset
99 int crop_bottom
; ///< frame_cropping_rect_bottom_offset
100 int vui_parameters_present_flag
;
102 int timing_info_present_flag
;
103 uint32_t num_units_in_tick
;
105 int fixed_frame_rate_flag
;
106 short offset_for_ref_frame
[256]; //FIXME dyn aloc?
107 int bitstream_restriction_flag
;
108 int num_reorder_frames
;
109 int scaling_matrix_present
;
110 uint8_t scaling_matrix4
[6][16];
111 uint8_t scaling_matrix8
[2][64];
115 * Picture parameter set
119 int cabac
; ///< entropy_coding_mode_flag
120 int pic_order_present
; ///< pic_order_present_flag
121 int slice_group_count
; ///< num_slice_groups_minus1 + 1
122 int mb_slice_group_map_type
;
123 int ref_count
[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
124 int weighted_pred
; ///< weighted_pred_flag
125 int weighted_bipred_idc
;
126 int init_qp
; ///< pic_init_qp_minus26 + 26
127 int init_qs
; ///< pic_init_qs_minus26 + 26
128 int chroma_qp_index_offset
;
129 int deblocking_filter_parameters_present
; ///< deblocking_filter_parameters_present_flag
130 int constrained_intra_pred
; ///< constrained_intra_pred_flag
131 int redundant_pic_cnt_present
; ///< redundant_pic_cnt_present_flag
132 int transform_8x8_mode
; ///< transform_8x8_mode_flag
133 uint8_t scaling_matrix4
[6][16];
134 uint8_t scaling_matrix8
[2][64];
138 * Memory management control operation opcode.
140 typedef enum MMCOOpcode
{
151 * Memory management control operation.
162 typedef struct H264Context
{
170 #define NAL_IDR_SLICE 5
175 #define NAL_END_SEQUENCE 10
176 #define NAL_END_STREAM 11
177 #define NAL_FILLER_DATA 12
178 #define NAL_SPS_EXT 13
179 #define NAL_AUXILIARY_SLICE 19
180 uint8_t *rbsp_buffer
;
181 unsigned int rbsp_buffer_size
;
184 * Used to parse AVC variant of h264
186 int is_avc
; ///< this flag is != 0 if codec is avc1
187 int got_avcC
; ///< flag used to parse avcC data only once
188 int nal_length_size
; ///< Number of bytes used for nal length (1, 2 or 4)
196 int chroma_pred_mode
;
197 int intra16x16_pred_mode
;
202 int8_t intra4x4_pred_mode_cache
[5*8];
203 int8_t (*intra4x4_pred_mode
)[8];
204 void (*pred4x4
[9+3])(uint8_t *src
, uint8_t *topright
, int stride
);//FIXME move to dsp?
205 void (*pred8x8l
[9+3])(uint8_t *src
, int topleft
, int topright
, int stride
);
206 void (*pred8x8
[4+3])(uint8_t *src
, int stride
);
207 void (*pred16x16
[4+3])(uint8_t *src
, int stride
);
208 unsigned int topleft_samples_available
;
209 unsigned int top_samples_available
;
210 unsigned int topright_samples_available
;
211 unsigned int left_samples_available
;
212 uint8_t (*top_borders
[2])[16+2*8];
213 uint8_t left_border
[2*(17+2*9)];
216 * non zero coeff count cache.
217 * is 64 if not available.
219 DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache
[6*8]);
220 uint8_t (*non_zero_count
)[16];
223 * Motion vector cache.
225 DECLARE_ALIGNED_8(int16_t, mv_cache
[2][5*8][2]);
226 DECLARE_ALIGNED_8(int8_t, ref_cache
[2][5*8]);
227 #define LIST_NOT_USED -1 //FIXME rename?
228 #define PART_NOT_AVAILABLE -2
231 * is 1 if the specific list MV&references are set to 0,0,-2.
233 int mv_cache_clean
[2];
236 * number of neighbors (top and/or left) that used 8x8 dct
238 int neighbor_transform_size
;
241 * block_offset[ 0..23] for frame macroblocks
242 * block_offset[24..47] for field macroblocks
244 int block_offset
[2*(16+8)];
246 uint32_t *mb2b_xy
; //FIXME are these 4 a good idea?
248 int b_stride
; //FIXME use s->b4_stride
251 int mb_linesize
; ///< may be equal to s->linesize or s->linesize*2, for mbaff
260 int unknown_svq3_flag
;
261 int next_slice_index
;
263 SPS sps_buffer
[MAX_SPS_COUNT
];
264 SPS sps
; ///< current sps
266 PPS pps_buffer
[MAX_PPS_COUNT
];
270 PPS pps
; //FIXME move to Picture perhaps? (->no) do we need that?
272 uint32_t dequant4_buffer
[6][52][16];
273 uint32_t dequant8_buffer
[2][52][64];
274 uint32_t (*dequant4_coeff
[6])[16];
275 uint32_t (*dequant8_coeff
[2])[64];
276 int dequant_coeff_pps
; ///< reinit tables when pps changes
279 uint8_t *slice_table_base
;
280 uint8_t *slice_table
; ///< slice_table_base + 2*mb_stride + 1
282 int slice_type_fixed
;
284 //interlacing specific flags
286 int mb_field_decoding_flag
;
287 int mb_mbaff
; ///< mb_aff_frame && mb_field_decoding_flag
294 int delta_poc_bottom
;
297 int prev_poc_msb
; ///< poc_msb of the last reference pic for POC type 0
298 int prev_poc_lsb
; ///< poc_lsb of the last reference pic for POC type 0
299 int frame_num_offset
; ///< for POC type 2
300 int prev_frame_num_offset
; ///< for POC type 2
301 int prev_frame_num
; ///< frame_num of the last pic for POC type 1/2
304 * frame_num for frames or 2*frame_num for field pics.
309 * max_frame_num or 2*max_frame_num for field pics.
313 //Weighted pred stuff
315 int use_weight_chroma
;
316 int luma_log2_weight_denom
;
317 int chroma_log2_weight_denom
;
318 int luma_weight
[2][48];
319 int luma_offset
[2][48];
320 int chroma_weight
[2][48][2];
321 int chroma_offset
[2][48][2];
322 int implicit_weight
[48][48];
325 int deblocking_filter
; ///< disable_deblocking_filter_idc with 1<->0
326 int slice_alpha_c0_offset
;
327 int slice_beta_offset
;
329 int redundant_pic_count
;
331 int direct_spatial_mv_pred
;
332 int dist_scale_factor
[16];
333 int dist_scale_factor_field
[32];
334 int map_col_to_list0
[2][16];
335 int map_col_to_list0_field
[2][32];
338 * num_ref_idx_l0/1_active_minus1 + 1
340 int ref_count
[2]; ///< counts frames or fields, depending on current mb mode
341 Picture
*short_ref
[32];
342 Picture
*long_ref
[32];
343 Picture default_ref_list
[2][32];
344 Picture ref_list
[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
345 Picture
*delayed_pic
[16]; //FIXME size?
346 Picture
*delayed_output_pic
;
349 * memory management control operations buffer.
351 MMCO mmco
[MAX_MMCO_COUNT
];
354 int long_ref_count
; ///< number of actual long term references
355 int short_ref_count
; ///< number of actual short term references
358 GetBitContext intra_gb
;
359 GetBitContext inter_gb
;
360 GetBitContext
*intra_gb_ptr
;
361 GetBitContext
*inter_gb_ptr
;
363 DECLARE_ALIGNED_8(DCTELEM
, mb
[16*24]);
369 uint8_t cabac_state
[460];
372 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
376 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
377 uint8_t *chroma_pred_mode_table
;
378 int last_qscale_diff
;
379 int16_t (*mvd_table
[2])[2];
380 DECLARE_ALIGNED_8(int16_t, mvd_cache
[2][5*8][2]);
381 uint8_t *direct_table
;
382 uint8_t direct_cache
[5*8];
384 uint8_t zigzag_scan
[16];
385 uint8_t zigzag_scan8x8
[64];
386 uint8_t zigzag_scan8x8_cavlc
[64];
387 uint8_t field_scan
[16];
388 uint8_t field_scan8x8
[64];
389 uint8_t field_scan8x8_cavlc
[64];
390 const uint8_t *zigzag_scan_q0
;
391 const uint8_t *zigzag_scan8x8_q0
;
392 const uint8_t *zigzag_scan8x8_cavlc_q0
;
393 const uint8_t *field_scan_q0
;
394 const uint8_t *field_scan8x8_q0
;
395 const uint8_t *field_scan8x8_cavlc_q0
;
400 static VLC coeff_token_vlc
[4];
401 static VLC chroma_dc_coeff_token_vlc
;
403 static VLC total_zeros_vlc
[15];
404 static VLC chroma_dc_total_zeros_vlc
[3];
406 static VLC run_vlc
[6];
409 static void svq3_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
);
410 static void svq3_add_idct_c(uint8_t *dst
, DCTELEM
*block
, int stride
, int qp
, int dc
);
411 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
413 static always_inline
uint32_t pack16to32(int a
, int b
){
414 #ifdef WORDS_BIGENDIAN
415 return (b
&0xFFFF) + (a
<<16);
417 return (a
&0xFFFF) + (b
<<16);
423 * @param h height of the rectangle, should be a constant
424 * @param w width of the rectangle, should be a constant
425 * @param size the size of val (1 or 4), should be a constant
427 static always_inline
void fill_rectangle(void *vp
, int w
, int h
, int stride
, uint32_t val
, int size
){
428 uint8_t *p
= (uint8_t*)vp
;
429 assert(size
==1 || size
==4);
435 assert((((long)vp
)&(FFMIN(w
, STRIDE_ALIGN
)-1)) == 0);
436 assert((stride
&(w
-1))==0);
438 const uint16_t v
= size
==4 ? val
: val
*0x0101;
439 *(uint16_t*)(p
+ 0*stride
)= v
;
441 *(uint16_t*)(p
+ 1*stride
)= v
;
443 *(uint16_t*)(p
+ 2*stride
)=
444 *(uint16_t*)(p
+ 3*stride
)= v
;
446 const uint32_t v
= size
==4 ? val
: val
*0x01010101;
447 *(uint32_t*)(p
+ 0*stride
)= v
;
449 *(uint32_t*)(p
+ 1*stride
)= v
;
451 *(uint32_t*)(p
+ 2*stride
)=
452 *(uint32_t*)(p
+ 3*stride
)= v
;
454 //gcc can't optimize 64bit math on x86_32
455 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
456 const uint64_t v
= val
*0x0100000001ULL
;
457 *(uint64_t*)(p
+ 0*stride
)= v
;
459 *(uint64_t*)(p
+ 1*stride
)= v
;
461 *(uint64_t*)(p
+ 2*stride
)=
462 *(uint64_t*)(p
+ 3*stride
)= v
;
464 const uint64_t v
= val
*0x0100000001ULL
;
465 *(uint64_t*)(p
+ 0+0*stride
)=
466 *(uint64_t*)(p
+ 8+0*stride
)=
467 *(uint64_t*)(p
+ 0+1*stride
)=
468 *(uint64_t*)(p
+ 8+1*stride
)= v
;
470 *(uint64_t*)(p
+ 0+2*stride
)=
471 *(uint64_t*)(p
+ 8+2*stride
)=
472 *(uint64_t*)(p
+ 0+3*stride
)=
473 *(uint64_t*)(p
+ 8+3*stride
)= v
;
475 *(uint32_t*)(p
+ 0+0*stride
)=
476 *(uint32_t*)(p
+ 4+0*stride
)= val
;
478 *(uint32_t*)(p
+ 0+1*stride
)=
479 *(uint32_t*)(p
+ 4+1*stride
)= val
;
481 *(uint32_t*)(p
+ 0+2*stride
)=
482 *(uint32_t*)(p
+ 4+2*stride
)=
483 *(uint32_t*)(p
+ 0+3*stride
)=
484 *(uint32_t*)(p
+ 4+3*stride
)= val
;
486 *(uint32_t*)(p
+ 0+0*stride
)=
487 *(uint32_t*)(p
+ 4+0*stride
)=
488 *(uint32_t*)(p
+ 8+0*stride
)=
489 *(uint32_t*)(p
+12+0*stride
)=
490 *(uint32_t*)(p
+ 0+1*stride
)=
491 *(uint32_t*)(p
+ 4+1*stride
)=
492 *(uint32_t*)(p
+ 8+1*stride
)=
493 *(uint32_t*)(p
+12+1*stride
)= val
;
495 *(uint32_t*)(p
+ 0+2*stride
)=
496 *(uint32_t*)(p
+ 4+2*stride
)=
497 *(uint32_t*)(p
+ 8+2*stride
)=
498 *(uint32_t*)(p
+12+2*stride
)=
499 *(uint32_t*)(p
+ 0+3*stride
)=
500 *(uint32_t*)(p
+ 4+3*stride
)=
501 *(uint32_t*)(p
+ 8+3*stride
)=
502 *(uint32_t*)(p
+12+3*stride
)= val
;
509 static void fill_caches(H264Context
*h
, int mb_type
, int for_deblock
){
510 MpegEncContext
* const s
= &h
->s
;
511 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
512 int topleft_xy
, top_xy
, topright_xy
, left_xy
[2];
513 int topleft_type
, top_type
, topright_type
, left_type
[2];
517 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
518 // the actual condition is whether we're on the edge of a slice,
519 // and even then the intra and nnz parts are unnecessary.
520 if(for_deblock
&& h
->slice_num
== 1 && !FRAME_MBAFF
)
523 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
525 top_xy
= mb_xy
- s
->mb_stride
;
526 topleft_xy
= top_xy
- 1;
527 topright_xy
= top_xy
+ 1;
528 left_xy
[1] = left_xy
[0] = mb_xy
-1;
538 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
539 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
540 const int topleft_pair_xy
= top_pair_xy
- 1;
541 const int topright_pair_xy
= top_pair_xy
+ 1;
542 const int topleft_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topleft_pair_xy
]);
543 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
544 const int topright_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topright_pair_xy
]);
545 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
546 const int curr_mb_frame_flag
= !IS_INTERLACED(mb_type
);
547 const int bottom
= (s
->mb_y
& 1);
548 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag
, left_mb_frame_flag
, topleft_mb_frame_flag
, top_mb_frame_flag
, topright_mb_frame_flag
);
550 ? !curr_mb_frame_flag
// bottom macroblock
551 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
553 top_xy
-= s
->mb_stride
;
556 ? !curr_mb_frame_flag
// bottom macroblock
557 : (!curr_mb_frame_flag
&& !topleft_mb_frame_flag
) // top macroblock
559 topleft_xy
-= s
->mb_stride
;
562 ? !curr_mb_frame_flag
// bottom macroblock
563 : (!curr_mb_frame_flag
&& !topright_mb_frame_flag
) // top macroblock
565 topright_xy
-= s
->mb_stride
;
567 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
568 left_xy
[1] = left_xy
[0] = pair_xy
- 1;
569 if (curr_mb_frame_flag
) {
590 left_xy
[1] += s
->mb_stride
;
603 h
->top_mb_xy
= top_xy
;
604 h
->left_mb_xy
[0] = left_xy
[0];
605 h
->left_mb_xy
[1] = left_xy
[1];
607 topleft_type
= h
->slice_table
[topleft_xy
] < 255 ? s
->current_picture
.mb_type
[topleft_xy
] : 0;
608 top_type
= h
->slice_table
[top_xy
] < 255 ? s
->current_picture
.mb_type
[top_xy
] : 0;
609 topright_type
= h
->slice_table
[topright_xy
] < 255 ? s
->current_picture
.mb_type
[topright_xy
]: 0;
610 left_type
[0] = h
->slice_table
[left_xy
[0] ] < 255 ? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
611 left_type
[1] = h
->slice_table
[left_xy
[1] ] < 255 ? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
613 if(FRAME_MBAFF
&& !IS_INTRA(mb_type
)){
615 int v
= *(uint16_t*)&h
->non_zero_count
[mb_xy
][14];
617 h
->non_zero_count_cache
[scan8
[i
]] = (v
>>i
)&1;
618 for(list
=0; list
<1+(h
->slice_type
==B_TYPE
); list
++){
619 if(USES_LIST(mb_type
,list
)){
620 uint32_t *src
= (uint32_t*)s
->current_picture
.motion_val
[list
][h
->mb2b_xy
[mb_xy
]];
621 uint32_t *dst
= (uint32_t*)h
->mv_cache
[list
][scan8
[0]];
622 uint8_t *ref
= &s
->current_picture
.ref_index
[list
][h
->mb2b8_xy
[mb_xy
]];
623 for(i
=0; i
<4; i
++, dst
+=8, src
+=h
->b_stride
){
629 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 0]] =
630 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 2]] = pack16to32(ref
[0],ref
[1])*0x0101;
632 *(uint32_t*)&h
->ref_cache
[list
][scan8
[ 8]] =
633 *(uint32_t*)&h
->ref_cache
[list
][scan8
[10]] = pack16to32(ref
[0],ref
[1])*0x0101;
635 fill_rectangle(&h
-> mv_cache
[list
][scan8
[ 0]], 4, 4, 8, 0, 4);
636 fill_rectangle(&h
->ref_cache
[list
][scan8
[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED
, 1);
641 topleft_type
= h
->slice_table
[topleft_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topleft_xy
] : 0;
642 top_type
= h
->slice_table
[top_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[top_xy
] : 0;
643 topright_type
= h
->slice_table
[topright_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topright_xy
]: 0;
644 left_type
[0] = h
->slice_table
[left_xy
[0] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
645 left_type
[1] = h
->slice_table
[left_xy
[1] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
648 if(IS_INTRA(mb_type
)){
649 h
->topleft_samples_available
=
650 h
->top_samples_available
=
651 h
->left_samples_available
= 0xFFFF;
652 h
->topright_samples_available
= 0xEEEA;
654 if(!IS_INTRA(top_type
) && (top_type
==0 || h
->pps
.constrained_intra_pred
)){
655 h
->topleft_samples_available
= 0xB3FF;
656 h
->top_samples_available
= 0x33FF;
657 h
->topright_samples_available
= 0x26EA;
660 if(!IS_INTRA(left_type
[i
]) && (left_type
[i
]==0 || h
->pps
.constrained_intra_pred
)){
661 h
->topleft_samples_available
&= 0xDF5F;
662 h
->left_samples_available
&= 0x5F5F;
666 if(!IS_INTRA(topleft_type
) && (topleft_type
==0 || h
->pps
.constrained_intra_pred
))
667 h
->topleft_samples_available
&= 0x7FFF;
669 if(!IS_INTRA(topright_type
) && (topright_type
==0 || h
->pps
.constrained_intra_pred
))
670 h
->topright_samples_available
&= 0xFBFF;
672 if(IS_INTRA4x4(mb_type
)){
673 if(IS_INTRA4x4(top_type
)){
674 h
->intra4x4_pred_mode_cache
[4+8*0]= h
->intra4x4_pred_mode
[top_xy
][4];
675 h
->intra4x4_pred_mode_cache
[5+8*0]= h
->intra4x4_pred_mode
[top_xy
][5];
676 h
->intra4x4_pred_mode_cache
[6+8*0]= h
->intra4x4_pred_mode
[top_xy
][6];
677 h
->intra4x4_pred_mode_cache
[7+8*0]= h
->intra4x4_pred_mode
[top_xy
][3];
680 if(!top_type
|| (IS_INTER(top_type
) && h
->pps
.constrained_intra_pred
))
685 h
->intra4x4_pred_mode_cache
[4+8*0]=
686 h
->intra4x4_pred_mode_cache
[5+8*0]=
687 h
->intra4x4_pred_mode_cache
[6+8*0]=
688 h
->intra4x4_pred_mode_cache
[7+8*0]= pred
;
691 if(IS_INTRA4x4(left_type
[i
])){
692 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[0+2*i
]];
693 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[1+2*i
]];
696 if(!left_type
[i
] || (IS_INTER(left_type
[i
]) && h
->pps
.constrained_intra_pred
))
701 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]=
702 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= pred
;
717 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
719 h
->non_zero_count_cache
[4+8*0]= h
->non_zero_count
[top_xy
][4];
720 h
->non_zero_count_cache
[5+8*0]= h
->non_zero_count
[top_xy
][5];
721 h
->non_zero_count_cache
[6+8*0]= h
->non_zero_count
[top_xy
][6];
722 h
->non_zero_count_cache
[7+8*0]= h
->non_zero_count
[top_xy
][3];
724 h
->non_zero_count_cache
[1+8*0]= h
->non_zero_count
[top_xy
][9];
725 h
->non_zero_count_cache
[2+8*0]= h
->non_zero_count
[top_xy
][8];
727 h
->non_zero_count_cache
[1+8*3]= h
->non_zero_count
[top_xy
][12];
728 h
->non_zero_count_cache
[2+8*3]= h
->non_zero_count
[top_xy
][11];
731 h
->non_zero_count_cache
[4+8*0]=
732 h
->non_zero_count_cache
[5+8*0]=
733 h
->non_zero_count_cache
[6+8*0]=
734 h
->non_zero_count_cache
[7+8*0]=
736 h
->non_zero_count_cache
[1+8*0]=
737 h
->non_zero_count_cache
[2+8*0]=
739 h
->non_zero_count_cache
[1+8*3]=
740 h
->non_zero_count_cache
[2+8*3]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
744 for (i
=0; i
<2; i
++) {
746 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[0+2*i
]];
747 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[1+2*i
]];
748 h
->non_zero_count_cache
[0+8*1 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[4+2*i
]];
749 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[5+2*i
]];
751 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]=
752 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]=
753 h
->non_zero_count_cache
[0+8*1 + 8*i
]=
754 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
761 h
->top_cbp
= h
->cbp_table
[top_xy
];
762 } else if(IS_INTRA(mb_type
)) {
769 h
->left_cbp
= h
->cbp_table
[left_xy
[0]] & 0x1f0;
770 } else if(IS_INTRA(mb_type
)) {
776 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[0]]>>((left_block
[0]&(~1))+1))&0x1) << 1;
779 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[1]]>>((left_block
[2]&(~1))+1))&0x1) << 3;
784 //FIXME direct mb can skip much of this
785 if(IS_INTER(mb_type
) || IS_DIRECT(mb_type
)){
787 for(list
=0; list
<1+(h
->slice_type
==B_TYPE
); list
++){
788 if(!USES_LIST(mb_type
, list
) && !IS_DIRECT(mb_type
) && !h
->deblocking_filter
){
789 /*if(!h->mv_cache_clean[list]){
790 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
791 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
792 h->mv_cache_clean[list]= 1;
796 h
->mv_cache_clean
[list
]= 0;
798 if(USES_LIST(top_type
, list
)){
799 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
800 const int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
801 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0];
802 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 1];
803 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2];
804 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 3];
805 h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]=
806 h
->ref_cache
[list
][scan8
[0] + 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 0];
807 h
->ref_cache
[list
][scan8
[0] + 2 - 1*8]=
808 h
->ref_cache
[list
][scan8
[0] + 3 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 1];
810 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]=
811 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]=
812 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]=
813 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
814 *(uint32_t*)&h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]= ((top_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
)&0xFF)*0x01010101;
817 //FIXME unify cleanup or sth
818 if(USES_LIST(left_type
[0], list
)){
819 const int b_xy
= h
->mb2b_xy
[left_xy
[0]] + 3;
820 const int b8_xy
= h
->mb2b8_xy
[left_xy
[0]] + 1;
821 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 0*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[0]];
822 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[1]];
823 h
->ref_cache
[list
][scan8
[0] - 1 + 0*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[0]>>1)];
824 h
->ref_cache
[list
][scan8
[0] - 1 + 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[1]>>1)];
826 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 0*8]=
827 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 1*8]= 0;
828 h
->ref_cache
[list
][scan8
[0] - 1 + 0*8]=
829 h
->ref_cache
[list
][scan8
[0] - 1 + 1*8]= left_type
[0] ? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
832 if(USES_LIST(left_type
[1], list
)){
833 const int b_xy
= h
->mb2b_xy
[left_xy
[1]] + 3;
834 const int b8_xy
= h
->mb2b8_xy
[left_xy
[1]] + 1;
835 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 2*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[2]];
836 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 3*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[3]];
837 h
->ref_cache
[list
][scan8
[0] - 1 + 2*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[2]>>1)];
838 h
->ref_cache
[list
][scan8
[0] - 1 + 3*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[3]>>1)];
840 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 2*8]=
841 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 3*8]= 0;
842 h
->ref_cache
[list
][scan8
[0] - 1 + 2*8]=
843 h
->ref_cache
[list
][scan8
[0] - 1 + 3*8]= left_type
[0] ? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
844 assert((!left_type
[0]) == (!left_type
[1]));
847 if(for_deblock
|| (IS_DIRECT(mb_type
) && !h
->direct_spatial_mv_pred
))
850 if(USES_LIST(topleft_type
, list
)){
851 const int b_xy
= h
->mb2b_xy
[topleft_xy
] + 3 + 3*h
->b_stride
;
852 const int b8_xy
= h
->mb2b8_xy
[topleft_xy
] + 1 + h
->b8_stride
;
853 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
854 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
856 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= 0;
857 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= topleft_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
860 if(USES_LIST(topright_type
, list
)){
861 const int b_xy
= h
->mb2b_xy
[topright_xy
] + 3*h
->b_stride
;
862 const int b8_xy
= h
->mb2b8_xy
[topright_xy
] + h
->b8_stride
;
863 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
864 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
866 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= 0;
867 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= topright_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
871 h
->ref_cache
[list
][scan8
[5 ]+1] =
872 h
->ref_cache
[list
][scan8
[7 ]+1] =
873 h
->ref_cache
[list
][scan8
[13]+1] = //FIXME remove past 3 (init somewhere else)
874 h
->ref_cache
[list
][scan8
[4 ]] =
875 h
->ref_cache
[list
][scan8
[12]] = PART_NOT_AVAILABLE
;
876 *(uint32_t*)h
->mv_cache
[list
][scan8
[5 ]+1]=
877 *(uint32_t*)h
->mv_cache
[list
][scan8
[7 ]+1]=
878 *(uint32_t*)h
->mv_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
879 *(uint32_t*)h
->mv_cache
[list
][scan8
[4 ]]=
880 *(uint32_t*)h
->mv_cache
[list
][scan8
[12]]= 0;
883 /* XXX beurk, Load mvd */
884 if(USES_LIST(top_type
, list
)){
885 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
886 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 0];
887 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 1];
888 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 2];
889 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 3];
891 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]=
892 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]=
893 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]=
894 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
896 if(USES_LIST(left_type
[0], list
)){
897 const int b_xy
= h
->mb2b_xy
[left_xy
[0]] + 3;
898 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[0]];
899 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[1]];
901 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]=
902 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= 0;
904 if(USES_LIST(left_type
[1], list
)){
905 const int b_xy
= h
->mb2b_xy
[left_xy
[1]] + 3;
906 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[2]];
907 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[3]];
909 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]=
910 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= 0;
912 *(uint32_t*)h
->mvd_cache
[list
][scan8
[5 ]+1]=
913 *(uint32_t*)h
->mvd_cache
[list
][scan8
[7 ]+1]=
914 *(uint32_t*)h
->mvd_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
915 *(uint32_t*)h
->mvd_cache
[list
][scan8
[4 ]]=
916 *(uint32_t*)h
->mvd_cache
[list
][scan8
[12]]= 0;
918 if(h
->slice_type
== B_TYPE
){
919 fill_rectangle(&h
->direct_cache
[scan8
[0]], 4, 4, 8, 0, 1);
921 if(IS_DIRECT(top_type
)){
922 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0x01010101;
923 }else if(IS_8X8(top_type
)){
924 int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
925 h
->direct_cache
[scan8
[0] + 0 - 1*8]= h
->direct_table
[b8_xy
];
926 h
->direct_cache
[scan8
[0] + 2 - 1*8]= h
->direct_table
[b8_xy
+ 1];
928 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0;
931 if(IS_DIRECT(left_type
[0]))
932 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 1;
933 else if(IS_8X8(left_type
[0]))
934 h
->direct_cache
[scan8
[0] - 1 + 0*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[0]] + 1 + h
->b8_stride
*(left_block
[0]>>1)];
936 h
->direct_cache
[scan8
[0] - 1 + 0*8]= 0;
938 if(IS_DIRECT(left_type
[1]))
939 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 1;
940 else if(IS_8X8(left_type
[1]))
941 h
->direct_cache
[scan8
[0] - 1 + 2*8]= h
->direct_table
[h
->mb2b8_xy
[left_xy
[1]] + 1 + h
->b8_stride
*(left_block
[2]>>1)];
943 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 0;
949 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
950 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
951 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
952 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
953 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
954 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
955 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
956 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
957 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
958 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
960 #define MAP_F2F(idx, mb_type)\
961 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
962 h->ref_cache[list][idx] <<= 1;\
963 h->mv_cache[list][idx][1] /= 2;\
964 h->mvd_cache[list][idx][1] /= 2;\
969 #define MAP_F2F(idx, mb_type)\
970 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
971 h->ref_cache[list][idx] >>= 1;\
972 h->mv_cache[list][idx][1] <<= 1;\
973 h->mvd_cache[list][idx][1] <<= 1;\
983 h
->neighbor_transform_size
= !!IS_8x8DCT(top_type
) + !!IS_8x8DCT(left_type
[0]);
986 static inline void write_back_intra_pred_mode(H264Context
*h
){
987 MpegEncContext
* const s
= &h
->s
;
988 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
990 h
->intra4x4_pred_mode
[mb_xy
][0]= h
->intra4x4_pred_mode_cache
[7+8*1];
991 h
->intra4x4_pred_mode
[mb_xy
][1]= h
->intra4x4_pred_mode_cache
[7+8*2];
992 h
->intra4x4_pred_mode
[mb_xy
][2]= h
->intra4x4_pred_mode_cache
[7+8*3];
993 h
->intra4x4_pred_mode
[mb_xy
][3]= h
->intra4x4_pred_mode_cache
[7+8*4];
994 h
->intra4x4_pred_mode
[mb_xy
][4]= h
->intra4x4_pred_mode_cache
[4+8*4];
995 h
->intra4x4_pred_mode
[mb_xy
][5]= h
->intra4x4_pred_mode_cache
[5+8*4];
996 h
->intra4x4_pred_mode
[mb_xy
][6]= h
->intra4x4_pred_mode_cache
[6+8*4];
1000 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1002 static inline int check_intra4x4_pred_mode(H264Context
*h
){
1003 MpegEncContext
* const s
= &h
->s
;
1004 static const int8_t top
[12]= {-1, 0,LEFT_DC_PRED
,-1,-1,-1,-1,-1, 0};
1005 static const int8_t left
[12]= { 0,-1, TOP_DC_PRED
, 0,-1,-1,-1, 0,-1,DC_128_PRED
};
1008 if(!(h
->top_samples_available
&0x8000)){
1010 int status
= top
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + i
] ];
1012 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
1015 h
->intra4x4_pred_mode_cache
[scan8
[0] + i
]= status
;
1020 if(!(h
->left_samples_available
&0x8000)){
1022 int status
= left
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
] ];
1024 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
1027 h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
]= status
;
1033 } //FIXME cleanup like next
1036 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
1038 static inline int check_intra_pred_mode(H264Context
*h
, int mode
){
1039 MpegEncContext
* const s
= &h
->s
;
1040 static const int8_t top
[7]= {LEFT_DC_PRED8x8
, 1,-1,-1};
1041 static const int8_t left
[7]= { TOP_DC_PRED8x8
,-1, 2,-1,DC_128_PRED8x8
};
1043 if(mode
< 0 || mode
> 6) {
1044 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "out of range intra chroma pred mode at %d %d\n", s
->mb_x
, s
->mb_y
);
1048 if(!(h
->top_samples_available
&0x8000)){
1051 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
1056 if(!(h
->left_samples_available
&0x8000)){
1059 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
1068 * gets the predicted intra4x4 prediction mode.
1070 static inline int pred_intra_mode(H264Context
*h
, int n
){
1071 const int index8
= scan8
[n
];
1072 const int left
= h
->intra4x4_pred_mode_cache
[index8
- 1];
1073 const int top
= h
->intra4x4_pred_mode_cache
[index8
- 8];
1074 const int min
= FFMIN(left
, top
);
1076 tprintf("mode:%d %d min:%d\n", left
,top
, min
);
1078 if(min
<0) return DC_PRED
;
1082 static inline void write_back_non_zero_count(H264Context
*h
){
1083 MpegEncContext
* const s
= &h
->s
;
1084 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
1086 h
->non_zero_count
[mb_xy
][0]= h
->non_zero_count_cache
[7+8*1];
1087 h
->non_zero_count
[mb_xy
][1]= h
->non_zero_count_cache
[7+8*2];
1088 h
->non_zero_count
[mb_xy
][2]= h
->non_zero_count_cache
[7+8*3];
1089 h
->non_zero_count
[mb_xy
][3]= h
->non_zero_count_cache
[7+8*4];
1090 h
->non_zero_count
[mb_xy
][4]= h
->non_zero_count_cache
[4+8*4];
1091 h
->non_zero_count
[mb_xy
][5]= h
->non_zero_count_cache
[5+8*4];
1092 h
->non_zero_count
[mb_xy
][6]= h
->non_zero_count_cache
[6+8*4];
1094 h
->non_zero_count
[mb_xy
][9]= h
->non_zero_count_cache
[1+8*2];
1095 h
->non_zero_count
[mb_xy
][8]= h
->non_zero_count_cache
[2+8*2];
1096 h
->non_zero_count
[mb_xy
][7]= h
->non_zero_count_cache
[2+8*1];
1098 h
->non_zero_count
[mb_xy
][12]=h
->non_zero_count_cache
[1+8*5];
1099 h
->non_zero_count
[mb_xy
][11]=h
->non_zero_count_cache
[2+8*5];
1100 h
->non_zero_count
[mb_xy
][10]=h
->non_zero_count_cache
[2+8*4];
1103 // store all luma nnzs, for deblocking
1106 v
+= (!!h
->non_zero_count_cache
[scan8
[i
]]) << i
;
1107 *(uint16_t*)&h
->non_zero_count
[mb_xy
][14] = v
;
1112 * gets the predicted number of non zero coefficients.
1113 * @param n block index
1115 static inline int pred_non_zero_count(H264Context
*h
, int n
){
1116 const int index8
= scan8
[n
];
1117 const int left
= h
->non_zero_count_cache
[index8
- 1];
1118 const int top
= h
->non_zero_count_cache
[index8
- 8];
1121 if(i
<64) i
= (i
+1)>>1;
1123 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left
, top
, n
, scan8
[n
], i
&31);
1128 static inline int fetch_diagonal_mv(H264Context
*h
, const int16_t **C
, int i
, int list
, int part_width
){
1129 const int topright_ref
= h
->ref_cache
[list
][ i
- 8 + part_width
];
1131 /* there is no consistent mapping of mvs to neighboring locations that will
1132 * make mbaff happy, so we can't move all this logic to fill_caches */
1134 MpegEncContext
*s
= &h
->s
;
1135 const int *mb_types
= s
->current_picture_ptr
->mb_type
;
1137 *(uint32_t*)h
->mv_cache
[list
][scan8
[0]-2] = 0;
1138 *C
= h
->mv_cache
[list
][scan8
[0]-2];
1141 && (s
->mb_y
&1) && i
< scan8
[0]+8 && topright_ref
!= PART_NOT_AVAILABLE
){
1142 int topright_xy
= s
->mb_x
+ (s
->mb_y
-1)*s
->mb_stride
+ (i
== scan8
[0]+3);
1143 if(IS_INTERLACED(mb_types
[topright_xy
])){
1144 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1145 const int x4 = X4, y4 = Y4;\
1146 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1147 if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1148 return LIST_NOT_USED;\
1149 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1150 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1151 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1152 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1154 SET_DIAG_MV(*2, >>1, s
->mb_x
*4+(i
&7)-4+part_width
, s
->mb_y
*4-1);
1157 if(topright_ref
== PART_NOT_AVAILABLE
1158 && ((s
->mb_y
&1) || i
>= scan8
[0]+8) && (i
&7)==4
1159 && h
->ref_cache
[list
][scan8
[0]-1] != PART_NOT_AVAILABLE
){
1161 && IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])){
1162 SET_DIAG_MV(*2, >>1, s
->mb_x
*4-1, (s
->mb_y
|1)*4+(s
->mb_y
&1)*2+(i
>>4)-1);
1165 && !IS_INTERLACED(mb_types
[h
->left_mb_xy
[0]])
1166 && i
>= scan8
[0]+8){
1167 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1168 SET_DIAG_MV(>>1, <<1, s
->mb_x
*4-1, (s
->mb_y
&~1)*4 - 1 + ((i
-scan8
[0])>>3)*2);
1174 if(topright_ref
!= PART_NOT_AVAILABLE
){
1175 *C
= h
->mv_cache
[list
][ i
- 8 + part_width
];
1176 return topright_ref
;
1178 tprintf("topright MV not available\n");
1180 *C
= h
->mv_cache
[list
][ i
- 8 - 1 ];
1181 return h
->ref_cache
[list
][ i
- 8 - 1 ];
1186 * gets the predicted MV.
1187 * @param n the block index
1188 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1189 * @param mx the x component of the predicted motion vector
1190 * @param my the y component of the predicted motion vector
1192 static inline void pred_motion(H264Context
* const h
, int n
, int part_width
, int list
, int ref
, int * const mx
, int * const my
){
1193 const int index8
= scan8
[n
];
1194 const int top_ref
= h
->ref_cache
[list
][ index8
- 8 ];
1195 const int left_ref
= h
->ref_cache
[list
][ index8
- 1 ];
1196 const int16_t * const A
= h
->mv_cache
[list
][ index8
- 1 ];
1197 const int16_t * const B
= h
->mv_cache
[list
][ index8
- 8 ];
1199 int diagonal_ref
, match_count
;
1201 assert(part_width
==1 || part_width
==2 || part_width
==4);
1211 diagonal_ref
= fetch_diagonal_mv(h
, &C
, index8
, list
, part_width
);
1212 match_count
= (diagonal_ref
==ref
) + (top_ref
==ref
) + (left_ref
==ref
);
1213 tprintf("pred_motion match_count=%d\n", match_count
);
1214 if(match_count
> 1){ //most common
1215 *mx
= mid_pred(A
[0], B
[0], C
[0]);
1216 *my
= mid_pred(A
[1], B
[1], C
[1]);
1217 }else if(match_count
==1){
1221 }else if(top_ref
==ref
){
1229 if(top_ref
== PART_NOT_AVAILABLE
&& diagonal_ref
== PART_NOT_AVAILABLE
&& left_ref
!= PART_NOT_AVAILABLE
){
1233 *mx
= mid_pred(A
[0], B
[0], C
[0]);
1234 *my
= mid_pred(A
[1], B
[1], C
[1]);
1238 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], diagonal_ref
, C
[0], C
[1], left_ref
, A
[0], A
[1], ref
, *mx
, *my
, h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
1242 * gets the directionally predicted 16x8 MV.
1243 * @param n the block index
1244 * @param mx the x component of the predicted motion vector
1245 * @param my the y component of the predicted motion vector
1247 static inline void pred_16x8_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
1249 const int top_ref
= h
->ref_cache
[list
][ scan8
[0] - 8 ];
1250 const int16_t * const B
= h
->mv_cache
[list
][ scan8
[0] - 8 ];
1252 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
1260 const int left_ref
= h
->ref_cache
[list
][ scan8
[8] - 1 ];
1261 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[8] - 1 ];
1263 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
1265 if(left_ref
== ref
){
1273 pred_motion(h
, n
, 4, list
, ref
, mx
, my
);
1277 * gets the directionally predicted 8x16 MV.
1278 * @param n the block index
1279 * @param mx the x component of the predicted motion vector
1280 * @param my the y component of the predicted motion vector
1282 static inline void pred_8x16_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
1284 const int left_ref
= h
->ref_cache
[list
][ scan8
[0] - 1 ];
1285 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[0] - 1 ];
1287 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
1289 if(left_ref
== ref
){
1298 diagonal_ref
= fetch_diagonal_mv(h
, &C
, scan8
[4], list
, 2);
1300 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref
, C
[0], C
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
1302 if(diagonal_ref
== ref
){
1310 pred_motion(h
, n
, 2, list
, ref
, mx
, my
);
1313 static inline void pred_pskip_motion(H264Context
* const h
, int * const mx
, int * const my
){
1314 const int top_ref
= h
->ref_cache
[0][ scan8
[0] - 8 ];
1315 const int left_ref
= h
->ref_cache
[0][ scan8
[0] - 1 ];
1317 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref
, left_ref
, h
->s
.mb_x
, h
->s
.mb_y
);
1319 if(top_ref
== PART_NOT_AVAILABLE
|| left_ref
== PART_NOT_AVAILABLE
1320 || (top_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 8 ] == 0)
1321 || (left_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 1 ] == 0)){
1327 pred_motion(h
, 0, 4, 0, 0, mx
, my
);
1332 static inline void direct_dist_scale_factor(H264Context
* const h
){
1333 const int poc
= h
->s
.current_picture_ptr
->poc
;
1334 const int poc1
= h
->ref_list
[1][0].poc
;
1336 for(i
=0; i
<h
->ref_count
[0]; i
++){
1337 int poc0
= h
->ref_list
[0][i
].poc
;
1338 int td
= clip(poc1
- poc0
, -128, 127);
1339 if(td
== 0 /* FIXME || pic0 is a long-term ref */){
1340 h
->dist_scale_factor
[i
] = 256;
1342 int tb
= clip(poc
- poc0
, -128, 127);
1343 int tx
= (16384 + (ABS(td
) >> 1)) / td
;
1344 h
->dist_scale_factor
[i
] = clip((tb
*tx
+ 32) >> 6, -1024, 1023);
1348 for(i
=0; i
<h
->ref_count
[0]; i
++){
1349 h
->dist_scale_factor_field
[2*i
] =
1350 h
->dist_scale_factor_field
[2*i
+1] = h
->dist_scale_factor
[i
];
1354 static inline void direct_ref_list_init(H264Context
* const h
){
1355 MpegEncContext
* const s
= &h
->s
;
1356 Picture
* const ref1
= &h
->ref_list
[1][0];
1357 Picture
* const cur
= s
->current_picture_ptr
;
1359 if(cur
->pict_type
== I_TYPE
)
1360 cur
->ref_count
[0] = 0;
1361 if(cur
->pict_type
!= B_TYPE
)
1362 cur
->ref_count
[1] = 0;
1363 for(list
=0; list
<2; list
++){
1364 cur
->ref_count
[list
] = h
->ref_count
[list
];
1365 for(j
=0; j
<h
->ref_count
[list
]; j
++)
1366 cur
->ref_poc
[list
][j
] = h
->ref_list
[list
][j
].poc
;
1368 if(cur
->pict_type
!= B_TYPE
|| h
->direct_spatial_mv_pred
)
1370 for(list
=0; list
<2; list
++){
1371 for(i
=0; i
<ref1
->ref_count
[list
]; i
++){
1372 const int poc
= ref1
->ref_poc
[list
][i
];
1373 h
->map_col_to_list0
[list
][i
] = 0; /* bogus; fills in for missing frames */
1374 for(j
=0; j
<h
->ref_count
[list
]; j
++)
1375 if(h
->ref_list
[list
][j
].poc
== poc
){
1376 h
->map_col_to_list0
[list
][i
] = j
;
1382 for(list
=0; list
<2; list
++){
1383 for(i
=0; i
<ref1
->ref_count
[list
]; i
++){
1384 j
= h
->map_col_to_list0
[list
][i
];
1385 h
->map_col_to_list0_field
[list
][2*i
] = 2*j
;
1386 h
->map_col_to_list0_field
[list
][2*i
+1] = 2*j
+1;
1392 static inline void pred_direct_motion(H264Context
* const h
, int *mb_type
){
1393 MpegEncContext
* const s
= &h
->s
;
1394 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
1395 const int b8_xy
= 2*s
->mb_x
+ 2*s
->mb_y
*h
->b8_stride
;
1396 const int b4_xy
= 4*s
->mb_x
+ 4*s
->mb_y
*h
->b_stride
;
1397 const int mb_type_col
= h
->ref_list
[1][0].mb_type
[mb_xy
];
1398 const int16_t (*l1mv0
)[2] = (const int16_t (*)[2]) &h
->ref_list
[1][0].motion_val
[0][b4_xy
];
1399 const int16_t (*l1mv1
)[2] = (const int16_t (*)[2]) &h
->ref_list
[1][0].motion_val
[1][b4_xy
];
1400 const int8_t *l1ref0
= &h
->ref_list
[1][0].ref_index
[0][b8_xy
];
1401 const int8_t *l1ref1
= &h
->ref_list
[1][0].ref_index
[1][b8_xy
];
1402 const int is_b8x8
= IS_8X8(*mb_type
);
1406 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1407 if(IS_8X8(mb_type_col
) && !h
->sps
.direct_8x8_inference_flag
){
1408 /* FIXME save sub mb types from previous frames (or derive from MVs)
1409 * so we know exactly what block size to use */
1410 sub_mb_type
= MB_TYPE_8x8
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_4x4 */
1411 *mb_type
= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1412 }else if(!is_b8x8
&& (mb_type_col
& MB_TYPE_16x16_OR_INTRA
)){
1413 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1414 *mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_16x16 */
1416 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1417 *mb_type
= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1420 *mb_type
|= MB_TYPE_DIRECT2
;
1422 *mb_type
|= MB_TYPE_INTERLACED
;
1424 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type
, sub_mb_type
, is_b8x8
, mb_type_col
);
1426 if(h
->direct_spatial_mv_pred
){
1431 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1433 /* ref = min(neighbors) */
1434 for(list
=0; list
<2; list
++){
1435 int refa
= h
->ref_cache
[list
][scan8
[0] - 1];
1436 int refb
= h
->ref_cache
[list
][scan8
[0] - 8];
1437 int refc
= h
->ref_cache
[list
][scan8
[0] - 8 + 4];
1439 refc
= h
->ref_cache
[list
][scan8
[0] - 8 - 1];
1441 if(ref
[list
] < 0 || (refb
< ref
[list
] && refb
>= 0))
1443 if(ref
[list
] < 0 || (refc
< ref
[list
] && refc
>= 0))
1449 if(ref
[0] < 0 && ref
[1] < 0){
1450 ref
[0] = ref
[1] = 0;
1451 mv
[0][0] = mv
[0][1] =
1452 mv
[1][0] = mv
[1][1] = 0;
1454 for(list
=0; list
<2; list
++){
1456 pred_motion(h
, 0, 4, list
, ref
[list
], &mv
[list
][0], &mv
[list
][1]);
1458 mv
[list
][0] = mv
[list
][1] = 0;
1463 *mb_type
&= ~MB_TYPE_P0L1
;
1464 sub_mb_type
&= ~MB_TYPE_P0L1
;
1465 }else if(ref
[0] < 0){
1466 *mb_type
&= ~MB_TYPE_P0L0
;
1467 sub_mb_type
&= ~MB_TYPE_P0L0
;
1470 if(IS_16X16(*mb_type
)){
1471 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, (uint8_t)ref
[0], 1);
1472 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, (uint8_t)ref
[1], 1);
1473 if(!IS_INTRA(mb_type_col
)
1474 && ( (l1ref0
[0] == 0 && ABS(l1mv0
[0][0]) <= 1 && ABS(l1mv0
[0][1]) <= 1)
1475 || (l1ref0
[0] < 0 && l1ref1
[0] == 0 && ABS(l1mv1
[0][0]) <= 1 && ABS(l1mv1
[0][1]) <= 1
1476 && (h
->x264_build
>33 || !h
->x264_build
)))){
1478 fill_rectangle(&h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mv
[0][0],mv
[0][1]), 4);
1480 fill_rectangle(&h
->mv_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
1482 fill_rectangle(&h
->mv_cache
[1][scan8
[0]], 4, 4, 8, pack16to32(mv
[1][0],mv
[1][1]), 4);
1484 fill_rectangle(&h
->mv_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
1486 fill_rectangle(&h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mv
[0][0],mv
[0][1]), 4);
1487 fill_rectangle(&h
->mv_cache
[1][scan8
[0]], 4, 4, 8, pack16to32(mv
[1][0],mv
[1][1]), 4);
1490 for(i8
=0; i8
<4; i8
++){
1491 const int x8
= i8
&1;
1492 const int y8
= i8
>>1;
1494 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1496 h
->sub_mb_type
[i8
] = sub_mb_type
;
1498 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[0][0],mv
[0][1]), 4);
1499 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[1][0],mv
[1][1]), 4);
1500 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[0], 1);
1501 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, (uint8_t)ref
[1], 1);
1504 if(!IS_INTRA(mb_type_col
) && ( l1ref0
[x8
+ y8
*h
->b8_stride
] == 0
1505 || (l1ref0
[x8
+ y8
*h
->b8_stride
] < 0 && l1ref1
[x8
+ y8
*h
->b8_stride
] == 0
1506 && (h
->x264_build
>33 || !h
->x264_build
)))){
1507 const int16_t (*l1mv
)[2]= l1ref0
[x8
+ y8
*h
->b8_stride
] == 0 ? l1mv0
: l1mv1
;
1508 if(IS_SUB_8X8(sub_mb_type
)){
1509 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*h
->b_stride
];
1510 if(ABS(mv_col
[0]) <= 1 && ABS(mv_col
[1]) <= 1){
1512 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1514 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1517 for(i4
=0; i4
<4; i4
++){
1518 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*h
->b_stride
];
1519 if(ABS(mv_col
[0]) <= 1 && ABS(mv_col
[1]) <= 1){
1521 *(uint32_t*)h
->mv_cache
[0][scan8
[i8
*4+i4
]] = 0;
1523 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] = 0;
1529 }else{ /* direct temporal mv pred */
1530 const int *map_col_to_list0
[2] = {h
->map_col_to_list0
[0], h
->map_col_to_list0
[1]};
1531 const int *dist_scale_factor
= h
->dist_scale_factor
;
1534 if(IS_INTERLACED(*mb_type
)){
1535 map_col_to_list0
[0] = h
->map_col_to_list0_field
[0];
1536 map_col_to_list0
[1] = h
->map_col_to_list0_field
[1];
1537 dist_scale_factor
= h
->dist_scale_factor_field
;
1539 if(IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
)){
1540 /* FIXME assumes direct_8x8_inference == 1 */
1541 const int pair_xy
= s
->mb_x
+ (s
->mb_y
&~1)*s
->mb_stride
;
1542 int mb_types_col
[2];
1545 *mb_type
= MB_TYPE_8x8
|MB_TYPE_L0L1
1546 | (is_b8x8
? 0 : MB_TYPE_DIRECT2
)
1547 | (*mb_type
& MB_TYPE_INTERLACED
);
1548 sub_mb_type
= MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
|MB_TYPE_16x16
;
1550 if(IS_INTERLACED(*mb_type
)){
1551 /* frame to field scaling */
1552 mb_types_col
[0] = h
->ref_list
[1][0].mb_type
[pair_xy
];
1553 mb_types_col
[1] = h
->ref_list
[1][0].mb_type
[pair_xy
+s
->mb_stride
];
1555 l1ref0
-= 2*h
->b8_stride
;
1556 l1ref1
-= 2*h
->b8_stride
;
1557 l1mv0
-= 4*h
->b_stride
;
1558 l1mv1
-= 4*h
->b_stride
;
1562 if( (mb_types_col
[0] & MB_TYPE_16x16_OR_INTRA
)
1563 && (mb_types_col
[1] & MB_TYPE_16x16_OR_INTRA
)
1565 *mb_type
|= MB_TYPE_16x8
;
1567 *mb_type
|= MB_TYPE_8x8
;
1569 /* field to frame scaling */
1570 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1571 * but in MBAFF, top and bottom POC are equal */
1572 int dy
= (s
->mb_y
&1) ? 1 : 2;
1574 mb_types_col
[1] = h
->ref_list
[1][0].mb_type
[pair_xy
+s
->mb_stride
];
1575 l1ref0
+= dy
*h
->b8_stride
;
1576 l1ref1
+= dy
*h
->b8_stride
;
1577 l1mv0
+= 2*dy
*h
->b_stride
;
1578 l1mv1
+= 2*dy
*h
->b_stride
;
1581 if((mb_types_col
[0] & (MB_TYPE_16x16_OR_INTRA
|MB_TYPE_16x8
))
1583 *mb_type
|= MB_TYPE_16x16
;
1585 *mb_type
|= MB_TYPE_8x8
;
1588 for(i8
=0; i8
<4; i8
++){
1589 const int x8
= i8
&1;
1590 const int y8
= i8
>>1;
1592 const int16_t (*l1mv
)[2]= l1mv0
;
1594 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1596 h
->sub_mb_type
[i8
] = sub_mb_type
;
1598 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1599 if(IS_INTRA(mb_types_col
[y8
])){
1600 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1601 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1602 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1606 ref0
= l1ref0
[x8
+ (y8
*2>>y_shift
)*h
->b8_stride
];
1608 ref0
= map_col_to_list0
[0][ref0
*2>>y_shift
];
1610 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ (y8
*2>>y_shift
)*h
->b8_stride
]*2>>y_shift
];
1613 scale
= dist_scale_factor
[ref0
];
1614 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1617 const int16_t *mv_col
= l1mv
[x8
*3 + (y8
*6>>y_shift
)*h
->b_stride
];
1618 int my_col
= (mv_col
[1]<<y_shift
)/2;
1619 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1620 int my
= (scale
* my_col
+ 128) >> 8;
1621 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1622 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-my_col
), 4);
1629 /* one-to-one mv scaling */
1631 if(IS_16X16(*mb_type
)){
1632 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, 0, 1);
1633 if(IS_INTRA(mb_type_col
)){
1634 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, 0, 1);
1635 fill_rectangle(&h
-> mv_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
1636 fill_rectangle(&h
-> mv_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
1638 const int ref0
= l1ref0
[0] >= 0 ? map_col_to_list0
[0][l1ref0
[0]]
1639 : map_col_to_list0
[1][l1ref1
[0]];
1640 const int scale
= dist_scale_factor
[ref0
];
1641 const int16_t *mv_col
= l1ref0
[0] >= 0 ? l1mv0
[0] : l1mv1
[0];
1643 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1644 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1645 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, ref0
, 1);
1646 fill_rectangle(&h
-> mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mv_l0
[0],mv_l0
[1]), 4);
1647 fill_rectangle(&h
-> mv_cache
[1][scan8
[0]], 4, 4, 8, pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]), 4);
1650 for(i8
=0; i8
<4; i8
++){
1651 const int x8
= i8
&1;
1652 const int y8
= i8
>>1;
1654 const int16_t (*l1mv
)[2]= l1mv0
;
1656 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1658 h
->sub_mb_type
[i8
] = sub_mb_type
;
1659 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1660 if(IS_INTRA(mb_type_col
)){
1661 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1662 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1663 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1667 ref0
= l1ref0
[x8
+ y8
*h
->b8_stride
];
1669 ref0
= map_col_to_list0
[0][ref0
];
1671 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ y8
*h
->b8_stride
]];
1674 scale
= dist_scale_factor
[ref0
];
1676 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1677 if(IS_SUB_8X8(sub_mb_type
)){
1678 const int16_t *mv_col
= l1mv
[x8
*3 + y8
*3*h
->b_stride
];
1679 int mx
= (scale
* mv_col
[0] + 128) >> 8;
1680 int my
= (scale
* mv_col
[1] + 128) >> 8;
1681 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
,my
), 4);
1682 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mx
-mv_col
[0],my
-mv_col
[1]), 4);
1684 for(i4
=0; i4
<4; i4
++){
1685 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*h
->b_stride
];
1686 int16_t *mv_l0
= h
->mv_cache
[0][scan8
[i8
*4+i4
]];
1687 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
1688 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
1689 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] =
1690 pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1697 static inline void write_back_motion(H264Context
*h
, int mb_type
){
1698 MpegEncContext
* const s
= &h
->s
;
1699 const int b_xy
= 4*s
->mb_x
+ 4*s
->mb_y
*h
->b_stride
;
1700 const int b8_xy
= 2*s
->mb_x
+ 2*s
->mb_y
*h
->b8_stride
;
1703 if(!USES_LIST(mb_type
, 0))
1704 fill_rectangle(&s
->current_picture
.ref_index
[0][b8_xy
], 2, 2, h
->b8_stride
, (uint8_t)LIST_NOT_USED
, 1);
1706 for(list
=0; list
<2; list
++){
1708 if(!USES_LIST(mb_type
, list
))
1712 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+0 + 8*y
];
1713 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+2 + 8*y
];
1715 if( h
->pps
.cabac
) {
1717 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+0 + 8*y
];
1718 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+2 + 8*y
];
1723 uint8_t *ref_index
= &s
->current_picture
.ref_index
[list
][b8_xy
];
1724 ref_index
[0+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[0]];
1725 ref_index
[1+0*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[4]];
1726 ref_index
[0+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[8]];
1727 ref_index
[1+1*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[12]];
1731 if(h
->slice_type
== B_TYPE
&& h
->pps
.cabac
){
1732 if(IS_8X8(mb_type
)){
1733 uint8_t *direct_table
= &h
->direct_table
[b8_xy
];
1734 direct_table
[1+0*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[1]) ? 1 : 0;
1735 direct_table
[0+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[2]) ? 1 : 0;
1736 direct_table
[1+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[3]) ? 1 : 0;
1742 * Decodes a network abstraction layer unit.
1743 * @param consumed is the number of bytes used as input
1744 * @param length is the length of the array
1745 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1746 * @returns decoded bytes, might be src+1 if no escapes
1748 static uint8_t *decode_nal(H264Context
*h
, uint8_t *src
, int *dst_length
, int *consumed
, int length
){
1752 // src[0]&0x80; //forbidden bit
1753 h
->nal_ref_idc
= src
[0]>>5;
1754 h
->nal_unit_type
= src
[0]&0x1F;
1758 for(i
=0; i
<length
; i
++)
1759 printf("%2X ", src
[i
]);
1761 for(i
=0; i
+1<length
; i
+=2){
1762 if(src
[i
]) continue;
1763 if(i
>0 && src
[i
-1]==0) i
--;
1764 if(i
+2<length
&& src
[i
+1]==0 && src
[i
+2]<=3){
1766 /* startcode, so we must be past the end */
1773 if(i
>=length
-1){ //no escaped 0
1774 *dst_length
= length
;
1775 *consumed
= length
+1; //+1 for the header
1779 h
->rbsp_buffer
= av_fast_realloc(h
->rbsp_buffer
, &h
->rbsp_buffer_size
, length
);
1780 dst
= h
->rbsp_buffer
;
1782 //printf("decoding esc\n");
1785 //remove escapes (very rare 1:2^22)
1786 if(si
+2<length
&& src
[si
]==0 && src
[si
+1]==0 && src
[si
+2]<=3){
1787 if(src
[si
+2]==3){ //escape
1792 }else //next start code
1796 dst
[di
++]= src
[si
++];
1800 *consumed
= si
+ 1;//+1 for the header
1801 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1807 * @param src the data which should be escaped
1808 * @param dst the target buffer, dst+1 == src is allowed as a special case
1809 * @param length the length of the src data
1810 * @param dst_length the length of the dst array
1811 * @returns length of escaped data in bytes or -1 if an error occured
1813 static int encode_nal(H264Context
*h
, uint8_t *dst
, uint8_t *src
, int length
, int dst_length
){
1814 int i
, escape_count
, si
, di
;
1818 assert(dst_length
>0);
1820 dst
[0]= (h
->nal_ref_idc
<<5) + h
->nal_unit_type
;
1822 if(length
==0) return 1;
1825 for(i
=0; i
<length
; i
+=2){
1826 if(src
[i
]) continue;
1827 if(i
>0 && src
[i
-1]==0)
1829 if(i
+2<length
&& src
[i
+1]==0 && src
[i
+2]<=3){
1835 if(escape_count
==0){
1837 memcpy(dst
+1, src
, length
);
1841 if(length
+ escape_count
+ 1> dst_length
)
1844 //this should be damn rare (hopefully)
1846 h
->rbsp_buffer
= av_fast_realloc(h
->rbsp_buffer
, &h
->rbsp_buffer_size
, length
+ escape_count
);
1847 temp
= h
->rbsp_buffer
;
1848 //printf("encoding esc\n");
1853 if(si
+2<length
&& src
[si
]==0 && src
[si
+1]==0 && src
[si
+2]<=3){
1854 temp
[di
++]= 0; si
++;
1855 temp
[di
++]= 0; si
++;
1857 temp
[di
++]= src
[si
++];
1860 temp
[di
++]= src
[si
++];
1862 memcpy(dst
+1, temp
, length
+escape_count
);
1864 assert(di
== length
+escape_count
);
1870 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1872 static void encode_rbsp_trailing(PutBitContext
*pb
){
1875 length
= (-put_bits_count(pb
))&7;
1876 if(length
) put_bits(pb
, length
, 0);
1881 * identifies the exact end of the bitstream
1882 * @return the length of the trailing, or 0 if damaged
1884 static int decode_rbsp_trailing(uint8_t *src
){
1888 tprintf("rbsp trailing %X\n", v
);
1898 * idct tranforms the 16 dc values and dequantize them.
1899 * @param qp quantization parameter
1901 static void h264_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1904 int temp
[16]; //FIXME check if this is a good idea
1905 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1906 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1908 //memset(block, 64, 2*256);
1911 const int offset
= y_offset
[i
];
1912 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1913 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1914 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1915 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1924 const int offset
= x_offset
[i
];
1925 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1926 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1927 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1928 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1930 block
[stride
*0 +offset
]= ((((z0
+ z3
)*qmul
+ 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1931 block
[stride
*2 +offset
]= ((((z1
+ z2
)*qmul
+ 128 ) >> 8));
1932 block
[stride
*8 +offset
]= ((((z1
- z2
)*qmul
+ 128 ) >> 8));
1933 block
[stride
*10+offset
]= ((((z0
- z3
)*qmul
+ 128 ) >> 8));
1939 * dct tranforms the 16 dc values.
1940 * @param qp quantization parameter ??? FIXME
1942 static void h264_luma_dc_dct_c(DCTELEM
*block
/*, int qp*/){
1943 // const int qmul= dequant_coeff[qp][0];
1945 int temp
[16]; //FIXME check if this is a good idea
1946 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1947 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1950 const int offset
= y_offset
[i
];
1951 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1952 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1953 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1954 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1963 const int offset
= x_offset
[i
];
1964 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1965 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1966 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1967 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1969 block
[stride
*0 +offset
]= (z0
+ z3
)>>1;
1970 block
[stride
*2 +offset
]= (z1
+ z2
)>>1;
1971 block
[stride
*8 +offset
]= (z1
- z2
)>>1;
1972 block
[stride
*10+offset
]= (z0
- z3
)>>1;
1980 static void chroma_dc_dequant_idct_c(DCTELEM
*block
, int qp
, int qmul
){
1981 const int stride
= 16*2;
1982 const int xStride
= 16;
1985 a
= block
[stride
*0 + xStride
*0];
1986 b
= block
[stride
*0 + xStride
*1];
1987 c
= block
[stride
*1 + xStride
*0];
1988 d
= block
[stride
*1 + xStride
*1];
1995 block
[stride
*0 + xStride
*0]= ((a
+c
)*qmul
) >> 7;
1996 block
[stride
*0 + xStride
*1]= ((e
+b
)*qmul
) >> 7;
1997 block
[stride
*1 + xStride
*0]= ((a
-c
)*qmul
) >> 7;
1998 block
[stride
*1 + xStride
*1]= ((e
-b
)*qmul
) >> 7;
2002 static void chroma_dc_dct_c(DCTELEM
*block
){
2003 const int stride
= 16*2;
2004 const int xStride
= 16;
2007 a
= block
[stride
*0 + xStride
*0];
2008 b
= block
[stride
*0 + xStride
*1];
2009 c
= block
[stride
*1 + xStride
*0];
2010 d
= block
[stride
*1 + xStride
*1];
2017 block
[stride
*0 + xStride
*0]= (a
+c
);
2018 block
[stride
*0 + xStride
*1]= (e
+b
);
2019 block
[stride
*1 + xStride
*0]= (a
-c
);
2020 block
[stride
*1 + xStride
*1]= (e
-b
);
2025 * gets the chroma qp.
2027 static inline int get_chroma_qp(int chroma_qp_index_offset
, int qscale
){
2029 return chroma_qp
[clip(qscale
+ chroma_qp_index_offset
, 0, 51)];
2034 static void h264_diff_dct_c(DCTELEM
*block
, uint8_t *src1
, uint8_t *src2
, int stride
){
2036 //FIXME try int temp instead of block
2039 const int d0
= src1
[0 + i
*stride
] - src2
[0 + i
*stride
];
2040 const int d1
= src1
[1 + i
*stride
] - src2
[1 + i
*stride
];
2041 const int d2
= src1
[2 + i
*stride
] - src2
[2 + i
*stride
];
2042 const int d3
= src1
[3 + i
*stride
] - src2
[3 + i
*stride
];
2043 const int z0
= d0
+ d3
;
2044 const int z3
= d0
- d3
;
2045 const int z1
= d1
+ d2
;
2046 const int z2
= d1
- d2
;
2048 block
[0 + 4*i
]= z0
+ z1
;
2049 block
[1 + 4*i
]= 2*z3
+ z2
;
2050 block
[2 + 4*i
]= z0
- z1
;
2051 block
[3 + 4*i
]= z3
- 2*z2
;
2055 const int z0
= block
[0*4 + i
] + block
[3*4 + i
];
2056 const int z3
= block
[0*4 + i
] - block
[3*4 + i
];
2057 const int z1
= block
[1*4 + i
] + block
[2*4 + i
];
2058 const int z2
= block
[1*4 + i
] - block
[2*4 + i
];
2060 block
[0*4 + i
]= z0
+ z1
;
2061 block
[1*4 + i
]= 2*z3
+ z2
;
2062 block
[2*4 + i
]= z0
- z1
;
2063 block
[3*4 + i
]= z3
- 2*z2
;
2068 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
2069 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
2070 static inline int quantize_c(DCTELEM
*block
, uint8_t *scantable
, int qscale
, int intra
, int seperate_dc
){
2072 const int * const quant_table
= quant_coeff
[qscale
];
2073 const int bias
= intra
? (1<<QUANT_SHIFT
)/3 : (1<<QUANT_SHIFT
)/6;
2074 const unsigned int threshold1
= (1<<QUANT_SHIFT
) - bias
- 1;
2075 const unsigned int threshold2
= (threshold1
<<1);
2081 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
-2))/3 : (1<<(QUANT_SHIFT
-2))/6;
2082 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
-2)) - dc_bias
- 1;
2083 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
2085 int level
= block
[0]*quant_coeff
[qscale
+18][0];
2086 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
2088 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
-2);
2091 level
= (dc_bias
- level
)>>(QUANT_SHIFT
-2);
2094 // last_non_zero = i;
2099 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
+1))/3 : (1<<(QUANT_SHIFT
+1))/6;
2100 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
+1)) - dc_bias
- 1;
2101 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
2103 int level
= block
[0]*quant_table
[0];
2104 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
2106 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
+1);
2109 level
= (dc_bias
- level
)>>(QUANT_SHIFT
+1);
2112 // last_non_zero = i;
2125 const int j
= scantable
[i
];
2126 int level
= block
[j
]*quant_table
[j
];
2128 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
2129 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
2130 if(((unsigned)(level
+threshold1
))>threshold2
){
2132 level
= (bias
+ level
)>>QUANT_SHIFT
;
2135 level
= (bias
- level
)>>QUANT_SHIFT
;
2144 return last_non_zero
;
2147 static void pred4x4_vertical_c(uint8_t *src
, uint8_t *topright
, int stride
){
2148 const uint32_t a
= ((uint32_t*)(src
-stride
))[0];
2149 ((uint32_t*)(src
+0*stride
))[0]= a
;
2150 ((uint32_t*)(src
+1*stride
))[0]= a
;
2151 ((uint32_t*)(src
+2*stride
))[0]= a
;
2152 ((uint32_t*)(src
+3*stride
))[0]= a
;
2155 static void pred4x4_horizontal_c(uint8_t *src
, uint8_t *topright
, int stride
){
2156 ((uint32_t*)(src
+0*stride
))[0]= src
[-1+0*stride
]*0x01010101;
2157 ((uint32_t*)(src
+1*stride
))[0]= src
[-1+1*stride
]*0x01010101;
2158 ((uint32_t*)(src
+2*stride
))[0]= src
[-1+2*stride
]*0x01010101;
2159 ((uint32_t*)(src
+3*stride
))[0]= src
[-1+3*stride
]*0x01010101;
2162 static void pred4x4_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
2163 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
]
2164 + src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 4) >>3;
2166 ((uint32_t*)(src
+0*stride
))[0]=
2167 ((uint32_t*)(src
+1*stride
))[0]=
2168 ((uint32_t*)(src
+2*stride
))[0]=
2169 ((uint32_t*)(src
+3*stride
))[0]= dc
* 0x01010101;
2172 static void pred4x4_left_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
2173 const int dc
= ( src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 2) >>2;
2175 ((uint32_t*)(src
+0*stride
))[0]=
2176 ((uint32_t*)(src
+1*stride
))[0]=
2177 ((uint32_t*)(src
+2*stride
))[0]=
2178 ((uint32_t*)(src
+3*stride
))[0]= dc
* 0x01010101;
2181 static void pred4x4_top_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
2182 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
] + 2) >>2;
2184 ((uint32_t*)(src
+0*stride
))[0]=
2185 ((uint32_t*)(src
+1*stride
))[0]=
2186 ((uint32_t*)(src
+2*stride
))[0]=
2187 ((uint32_t*)(src
+3*stride
))[0]= dc
* 0x01010101;
2190 static void pred4x4_128_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
2191 ((uint32_t*)(src
+0*stride
))[0]=
2192 ((uint32_t*)(src
+1*stride
))[0]=
2193 ((uint32_t*)(src
+2*stride
))[0]=
2194 ((uint32_t*)(src
+3*stride
))[0]= 128U*0x01010101U
;
2198 #define LOAD_TOP_RIGHT_EDGE\
2199 const int t4= topright[0];\
2200 const int t5= topright[1];\
2201 const int t6= topright[2];\
2202 const int t7= topright[3];\
2204 #define LOAD_LEFT_EDGE\
2205 const int l0= src[-1+0*stride];\
2206 const int l1= src[-1+1*stride];\
2207 const int l2= src[-1+2*stride];\
2208 const int l3= src[-1+3*stride];\
2210 #define LOAD_TOP_EDGE\
2211 const int t0= src[ 0-1*stride];\
2212 const int t1= src[ 1-1*stride];\
2213 const int t2= src[ 2-1*stride];\
2214 const int t3= src[ 3-1*stride];\
2216 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
2217 const int lt
= src
[-1-1*stride
];
2221 src
[0+3*stride
]=(l3
+ 2*l2
+ l1
+ 2)>>2;
2223 src
[1+3*stride
]=(l2
+ 2*l1
+ l0
+ 2)>>2;
2226 src
[2+3*stride
]=(l1
+ 2*l0
+ lt
+ 2)>>2;
2230 src
[3+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
2233 src
[3+2*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
2235 src
[3+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
2236 src
[3+0*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
2239 static void pred4x4_down_left_c(uint8_t *src
, uint8_t *topright
, int stride
){
2244 src
[0+0*stride
]=(t0
+ t2
+ 2*t1
+ 2)>>2;
2246 src
[0+1*stride
]=(t1
+ t3
+ 2*t2
+ 2)>>2;
2249 src
[0+2*stride
]=(t2
+ t4
+ 2*t3
+ 2)>>2;
2253 src
[0+3*stride
]=(t3
+ t5
+ 2*t4
+ 2)>>2;
2256 src
[1+3*stride
]=(t4
+ t6
+ 2*t5
+ 2)>>2;
2258 src
[2+3*stride
]=(t5
+ t7
+ 2*t6
+ 2)>>2;
2259 src
[3+3*stride
]=(t6
+ 3*t7
+ 2)>>2;
2262 static void pred4x4_vertical_right_c(uint8_t *src
, uint8_t *topright
, int stride
){
2263 const int lt
= src
[-1-1*stride
];
2266 const __attribute__((unused
)) int unu
= l3
;
2269 src
[1+2*stride
]=(lt
+ t0
+ 1)>>1;
2271 src
[2+2*stride
]=(t0
+ t1
+ 1)>>1;
2273 src
[3+2*stride
]=(t1
+ t2
+ 1)>>1;
2274 src
[3+0*stride
]=(t2
+ t3
+ 1)>>1;
2276 src
[1+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
2278 src
[2+3*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
2280 src
[3+3*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
2281 src
[3+1*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
2282 src
[0+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
2283 src
[0+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
2286 static void pred4x4_vertical_left_c(uint8_t *src
, uint8_t *topright
, int stride
){
2289 const __attribute__((unused
)) int unu
= t7
;
2291 src
[0+0*stride
]=(t0
+ t1
+ 1)>>1;
2293 src
[0+2*stride
]=(t1
+ t2
+ 1)>>1;
2295 src
[1+2*stride
]=(t2
+ t3
+ 1)>>1;
2297 src
[2+2*stride
]=(t3
+ t4
+ 1)>>1;
2298 src
[3+2*stride
]=(t4
+ t5
+ 1)>>1;
2299 src
[0+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
2301 src
[0+3*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
2303 src
[1+3*stride
]=(t2
+ 2*t3
+ t4
+ 2)>>2;
2305 src
[2+3*stride
]=(t3
+ 2*t4
+ t5
+ 2)>>2;
2306 src
[3+3*stride
]=(t4
+ 2*t5
+ t6
+ 2)>>2;
2309 static void pred4x4_horizontal_up_c(uint8_t *src
, uint8_t *topright
, int stride
){
2312 src
[0+0*stride
]=(l0
+ l1
+ 1)>>1;
2313 src
[1+0*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
2315 src
[0+1*stride
]=(l1
+ l2
+ 1)>>1;
2317 src
[1+1*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
2319 src
[0+2*stride
]=(l2
+ l3
+ 1)>>1;
2321 src
[1+2*stride
]=(l2
+ 2*l3
+ l3
+ 2)>>2;
2330 static void pred4x4_horizontal_down_c(uint8_t *src
, uint8_t *topright
, int stride
){
2331 const int lt
= src
[-1-1*stride
];
2334 const __attribute__((unused
)) int unu
= t3
;
2337 src
[2+1*stride
]=(lt
+ l0
+ 1)>>1;
2339 src
[3+1*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
2340 src
[2+0*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
2341 src
[3+0*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
2343 src
[2+2*stride
]=(l0
+ l1
+ 1)>>1;
2345 src
[3+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
2347 src
[2+3*stride
]=(l1
+ l2
+ 1)>>1;
2349 src
[3+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
2350 src
[0+3*stride
]=(l2
+ l3
+ 1)>>1;
2351 src
[1+3*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
2354 static void pred16x16_vertical_c(uint8_t *src
, int stride
){
2356 const uint32_t a
= ((uint32_t*)(src
-stride
))[0];
2357 const uint32_t b
= ((uint32_t*)(src
-stride
))[1];
2358 const uint32_t c
= ((uint32_t*)(src
-stride
))[2];
2359 const uint32_t d
= ((uint32_t*)(src
-stride
))[3];
2361 for(i
=0; i
<16; i
++){
2362 ((uint32_t*)(src
+i
*stride
))[0]= a
;
2363 ((uint32_t*)(src
+i
*stride
))[1]= b
;
2364 ((uint32_t*)(src
+i
*stride
))[2]= c
;
2365 ((uint32_t*)(src
+i
*stride
))[3]= d
;
2369 static void pred16x16_horizontal_c(uint8_t *src
, int stride
){
2372 for(i
=0; i
<16; i
++){
2373 ((uint32_t*)(src
+i
*stride
))[0]=
2374 ((uint32_t*)(src
+i
*stride
))[1]=
2375 ((uint32_t*)(src
+i
*stride
))[2]=
2376 ((uint32_t*)(src
+i
*stride
))[3]= src
[-1+i
*stride
]*0x01010101;
2380 static void pred16x16_dc_c(uint8_t *src
, int stride
){
2384 dc
+= src
[-1+i
*stride
];
2391 dc
= 0x01010101*((dc
+ 16)>>5);
2393 for(i
=0; i
<16; i
++){
2394 ((uint32_t*)(src
+i
*stride
))[0]=
2395 ((uint32_t*)(src
+i
*stride
))[1]=
2396 ((uint32_t*)(src
+i
*stride
))[2]=
2397 ((uint32_t*)(src
+i
*stride
))[3]= dc
;
2401 static void pred16x16_left_dc_c(uint8_t *src
, int stride
){
2405 dc
+= src
[-1+i
*stride
];
2408 dc
= 0x01010101*((dc
+ 8)>>4);
2410 for(i
=0; i
<16; i
++){
2411 ((uint32_t*)(src
+i
*stride
))[0]=
2412 ((uint32_t*)(src
+i
*stride
))[1]=
2413 ((uint32_t*)(src
+i
*stride
))[2]=
2414 ((uint32_t*)(src
+i
*stride
))[3]= dc
;
2418 static void pred16x16_top_dc_c(uint8_t *src
, int stride
){
2424 dc
= 0x01010101*((dc
+ 8)>>4);
2426 for(i
=0; i
<16; i
++){
2427 ((uint32_t*)(src
+i
*stride
))[0]=
2428 ((uint32_t*)(src
+i
*stride
))[1]=
2429 ((uint32_t*)(src
+i
*stride
))[2]=
2430 ((uint32_t*)(src
+i
*stride
))[3]= dc
;
2434 static void pred16x16_128_dc_c(uint8_t *src
, int stride
){
2437 for(i
=0; i
<16; i
++){
2438 ((uint32_t*)(src
+i
*stride
))[0]=
2439 ((uint32_t*)(src
+i
*stride
))[1]=
2440 ((uint32_t*)(src
+i
*stride
))[2]=
2441 ((uint32_t*)(src
+i
*stride
))[3]= 0x01010101U
*128U;
2445 static inline void pred16x16_plane_compat_c(uint8_t *src
, int stride
, const int svq3
){
2448 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
2449 const uint8_t * const src0
= src
+7-stride
;
2450 const uint8_t *src1
= src
+8*stride
-1;
2451 const uint8_t *src2
= src1
-2*stride
; // == src+6*stride-1;
2452 int H
= src0
[1] - src0
[-1];
2453 int V
= src1
[0] - src2
[ 0];
2454 for(k
=2; k
<=8; ++k
) {
2455 src1
+= stride
; src2
-= stride
;
2456 H
+= k
*(src0
[k
] - src0
[-k
]);
2457 V
+= k
*(src1
[0] - src2
[ 0]);
2460 H
= ( 5*(H
/4) ) / 16;
2461 V
= ( 5*(V
/4) ) / 16;
2463 /* required for 100% accuracy */
2464 i
= H
; H
= V
; V
= i
;
2466 H
= ( 5*H
+32 ) >> 6;
2467 V
= ( 5*V
+32 ) >> 6;
2470 a
= 16*(src1
[0] + src2
[16] + 1) - 7*(V
+H
);
2471 for(j
=16; j
>0; --j
) {
2474 for(i
=-16; i
<0; i
+=4) {
2475 src
[16+i
] = cm
[ (b
) >> 5 ];
2476 src
[17+i
] = cm
[ (b
+ H
) >> 5 ];
2477 src
[18+i
] = cm
[ (b
+2*H
) >> 5 ];
2478 src
[19+i
] = cm
[ (b
+3*H
) >> 5 ];
2485 static void pred16x16_plane_c(uint8_t *src
, int stride
){
2486 pred16x16_plane_compat_c(src
, stride
, 0);
2489 static void pred8x8_vertical_c(uint8_t *src
, int stride
){
2491 const uint32_t a
= ((uint32_t*)(src
-stride
))[0];
2492 const uint32_t b
= ((uint32_t*)(src
-stride
))[1];
2495 ((uint32_t*)(src
+i
*stride
))[0]= a
;
2496 ((uint32_t*)(src
+i
*stride
))[1]= b
;
2500 static void pred8x8_horizontal_c(uint8_t *src
, int stride
){
2504 ((uint32_t*)(src
+i
*stride
))[0]=
2505 ((uint32_t*)(src
+i
*stride
))[1]= src
[-1+i
*stride
]*0x01010101;
2509 static void pred8x8_128_dc_c(uint8_t *src
, int stride
){
2513 ((uint32_t*)(src
+i
*stride
))[0]=
2514 ((uint32_t*)(src
+i
*stride
))[1]= 0x01010101U
*128U;
2518 static void pred8x8_left_dc_c(uint8_t *src
, int stride
){
2524 dc0
+= src
[-1+i
*stride
];
2525 dc2
+= src
[-1+(i
+4)*stride
];
2527 dc0
= 0x01010101*((dc0
+ 2)>>2);
2528 dc2
= 0x01010101*((dc2
+ 2)>>2);
2531 ((uint32_t*)(src
+i
*stride
))[0]=
2532 ((uint32_t*)(src
+i
*stride
))[1]= dc0
;
2535 ((uint32_t*)(src
+i
*stride
))[0]=
2536 ((uint32_t*)(src
+i
*stride
))[1]= dc2
;
2540 static void pred8x8_top_dc_c(uint8_t *src
, int stride
){
2546 dc0
+= src
[i
-stride
];
2547 dc1
+= src
[4+i
-stride
];
2549 dc0
= 0x01010101*((dc0
+ 2)>>2);
2550 dc1
= 0x01010101*((dc1
+ 2)>>2);
2553 ((uint32_t*)(src
+i
*stride
))[0]= dc0
;
2554 ((uint32_t*)(src
+i
*stride
))[1]= dc1
;
2557 ((uint32_t*)(src
+i
*stride
))[0]= dc0
;
2558 ((uint32_t*)(src
+i
*stride
))[1]= dc1
;
2563 static void pred8x8_dc_c(uint8_t *src
, int stride
){
2565 int dc0
, dc1
, dc2
, dc3
;
2569 dc0
+= src
[-1+i
*stride
] + src
[i
-stride
];
2570 dc1
+= src
[4+i
-stride
];
2571 dc2
+= src
[-1+(i
+4)*stride
];
2573 dc3
= 0x01010101*((dc1
+ dc2
+ 4)>>3);
2574 dc0
= 0x01010101*((dc0
+ 4)>>3);
2575 dc1
= 0x01010101*((dc1
+ 2)>>2);
2576 dc2
= 0x01010101*((dc2
+ 2)>>2);
2579 ((uint32_t*)(src
+i
*stride
))[0]= dc0
;
2580 ((uint32_t*)(src
+i
*stride
))[1]= dc1
;
2583 ((uint32_t*)(src
+i
*stride
))[0]= dc2
;
2584 ((uint32_t*)(src
+i
*stride
))[1]= dc3
;
2588 static void pred8x8_plane_c(uint8_t *src
, int stride
){
2591 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
2592 const uint8_t * const src0
= src
+3-stride
;
2593 const uint8_t *src1
= src
+4*stride
-1;
2594 const uint8_t *src2
= src1
-2*stride
; // == src+2*stride-1;
2595 int H
= src0
[1] - src0
[-1];
2596 int V
= src1
[0] - src2
[ 0];
2597 for(k
=2; k
<=4; ++k
) {
2598 src1
+= stride
; src2
-= stride
;
2599 H
+= k
*(src0
[k
] - src0
[-k
]);
2600 V
+= k
*(src1
[0] - src2
[ 0]);
2602 H
= ( 17*H
+16 ) >> 5;
2603 V
= ( 17*V
+16 ) >> 5;
2605 a
= 16*(src1
[0] + src2
[8]+1) - 3*(V
+H
);
2606 for(j
=8; j
>0; --j
) {
2609 src
[0] = cm
[ (b
) >> 5 ];
2610 src
[1] = cm
[ (b
+ H
) >> 5 ];
2611 src
[2] = cm
[ (b
+2*H
) >> 5 ];
2612 src
[3] = cm
[ (b
+3*H
) >> 5 ];
2613 src
[4] = cm
[ (b
+4*H
) >> 5 ];
2614 src
[5] = cm
[ (b
+5*H
) >> 5 ];
2615 src
[6] = cm
[ (b
+6*H
) >> 5 ];
2616 src
[7] = cm
[ (b
+7*H
) >> 5 ];
2621 #define SRC(x,y) src[(x)+(y)*stride]
2623 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2624 #define PREDICT_8x8_LOAD_LEFT \
2625 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2626 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2627 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2628 const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2631 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2632 #define PREDICT_8x8_LOAD_TOP \
2633 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2634 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2635 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2636 const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2637 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2640 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2641 #define PREDICT_8x8_LOAD_TOPRIGHT \
2642 int t8, t9, t10, t11, t12, t13, t14, t15; \
2643 if(has_topright) { \
2644 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2645 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2646 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2648 #define PREDICT_8x8_LOAD_TOPLEFT \
2649 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2651 #define PREDICT_8x8_DC(v) \
2653 for( y = 0; y < 8; y++ ) { \
2654 ((uint32_t*)src)[0] = \
2655 ((uint32_t*)src)[1] = v; \
2659 static void pred8x8l_128_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2661 PREDICT_8x8_DC(0x80808080);
2663 static void pred8x8l_left_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2665 PREDICT_8x8_LOAD_LEFT
;
2666 const uint32_t dc
= ((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
+4) >> 3) * 0x01010101;
2669 static void pred8x8l_top_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2671 PREDICT_8x8_LOAD_TOP
;
2672 const uint32_t dc
= ((t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+4) >> 3) * 0x01010101;
2675 static void pred8x8l_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2677 PREDICT_8x8_LOAD_LEFT
;
2678 PREDICT_8x8_LOAD_TOP
;
2679 const uint32_t dc
= ((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
2680 +t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+8) >> 4) * 0x01010101;
2683 static void pred8x8l_horizontal_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2685 PREDICT_8x8_LOAD_LEFT
;
2686 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2687 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2688 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2691 static void pred8x8l_vertical_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2694 PREDICT_8x8_LOAD_TOP
;
2703 for( y
= 1; y
< 8; y
++ )
2704 *(uint64_t*)(src
+y
*stride
) = *(uint64_t*)src
;
2706 static void pred8x8l_down_left_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2708 PREDICT_8x8_LOAD_TOP
;
2709 PREDICT_8x8_LOAD_TOPRIGHT
;
2710 SRC(0,0)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
2711 SRC(0,1)=SRC(1,0)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
2712 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
2713 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
2714 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
2715 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
2716 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
2717 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
2718 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
2719 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
2720 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
2721 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11
+ 2*t12
+ t13
+ 2) >> 2;
2722 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12
+ 2*t13
+ t14
+ 2) >> 2;
2723 SRC(6,7)=SRC(7,6)= (t13
+ 2*t14
+ t15
+ 2) >> 2;
2724 SRC(7,7)= (t14
+ 3*t15
+ 2) >> 2;
2726 static void pred8x8l_down_right_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2728 PREDICT_8x8_LOAD_TOP
;
2729 PREDICT_8x8_LOAD_LEFT
;
2730 PREDICT_8x8_LOAD_TOPLEFT
;
2731 SRC(0,7)= (l7
+ 2*l6
+ l5
+ 2) >> 2;
2732 SRC(0,6)=SRC(1,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
2733 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
2734 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
2735 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
2736 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
2737 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
2738 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
2739 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
2740 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
2741 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
2742 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
2743 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
2744 SRC(6,0)=SRC(7,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
2745 SRC(7,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
2748 static void pred8x8l_vertical_right_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2750 PREDICT_8x8_LOAD_TOP
;
2751 PREDICT_8x8_LOAD_LEFT
;
2752 PREDICT_8x8_LOAD_TOPLEFT
;
2753 SRC(0,6)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
2754 SRC(0,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
2755 SRC(0,4)=SRC(1,6)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
2756 SRC(0,5)=SRC(1,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
2757 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
2758 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
2759 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
2760 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt
+ t0
+ 1) >> 1;
2761 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
2762 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0
+ t1
+ 1) >> 1;
2763 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
2764 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1
+ t2
+ 1) >> 1;
2765 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
2766 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2
+ t3
+ 1) >> 1;
2767 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
2768 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3
+ t4
+ 1) >> 1;
2769 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
2770 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4
+ t5
+ 1) >> 1;
2771 SRC(6,1)=SRC(7,3)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
2772 SRC(6,0)=SRC(7,2)= (t5
+ t6
+ 1) >> 1;
2773 SRC(7,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
2774 SRC(7,0)= (t6
+ t7
+ 1) >> 1;
2776 static void pred8x8l_horizontal_down_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2778 PREDICT_8x8_LOAD_TOP
;
2779 PREDICT_8x8_LOAD_LEFT
;
2780 PREDICT_8x8_LOAD_TOPLEFT
;
2781 SRC(0,7)= (l6
+ l7
+ 1) >> 1;
2782 SRC(1,7)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
2783 SRC(0,6)=SRC(2,7)= (l5
+ l6
+ 1) >> 1;
2784 SRC(1,6)=SRC(3,7)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
2785 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4
+ l5
+ 1) >> 1;
2786 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
2787 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3
+ l4
+ 1) >> 1;
2788 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
2789 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2
+ l3
+ 1) >> 1;
2790 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
2791 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1
+ l2
+ 1) >> 1;
2792 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
2793 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0
+ l1
+ 1) >> 1;
2794 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt
+ 2*l0
+ l1
+ 2) >> 2;
2795 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt
+ l0
+ 1) >> 1;
2796 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
2797 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1
+ 2*t0
+ lt
+ 2) >> 2;
2798 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2
+ 2*t1
+ t0
+ 2) >> 2;
2799 SRC(4,0)=SRC(6,1)= (t3
+ 2*t2
+ t1
+ 2) >> 2;
2800 SRC(5,0)=SRC(7,1)= (t4
+ 2*t3
+ t2
+ 2) >> 2;
2801 SRC(6,0)= (t5
+ 2*t4
+ t3
+ 2) >> 2;
2802 SRC(7,0)= (t6
+ 2*t5
+ t4
+ 2) >> 2;
2804 static void pred8x8l_vertical_left_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2806 PREDICT_8x8_LOAD_TOP
;
2807 PREDICT_8x8_LOAD_TOPRIGHT
;
2808 SRC(0,0)= (t0
+ t1
+ 1) >> 1;
2809 SRC(0,1)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
2810 SRC(0,2)=SRC(1,0)= (t1
+ t2
+ 1) >> 1;
2811 SRC(0,3)=SRC(1,1)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
2812 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2
+ t3
+ 1) >> 1;
2813 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
2814 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3
+ t4
+ 1) >> 1;
2815 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
2816 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4
+ t5
+ 1) >> 1;
2817 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
2818 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5
+ t6
+ 1) >> 1;
2819 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
2820 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6
+ t7
+ 1) >> 1;
2821 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
2822 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7
+ t8
+ 1) >> 1;
2823 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
2824 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8
+ t9
+ 1) >> 1;
2825 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
2826 SRC(6,6)=SRC(7,4)= (t9
+ t10
+ 1) >> 1;
2827 SRC(6,7)=SRC(7,5)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
2828 SRC(7,6)= (t10
+ t11
+ 1) >> 1;
2829 SRC(7,7)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
2831 static void pred8x8l_horizontal_up_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2833 PREDICT_8x8_LOAD_LEFT
;
2834 SRC(0,0)= (l0
+ l1
+ 1) >> 1;
2835 SRC(1,0)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
2836 SRC(0,1)=SRC(2,0)= (l1
+ l2
+ 1) >> 1;
2837 SRC(1,1)=SRC(3,0)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
2838 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2
+ l3
+ 1) >> 1;
2839 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
2840 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3
+ l4
+ 1) >> 1;
2841 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
2842 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4
+ l5
+ 1) >> 1;
2843 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
2844 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5
+ l6
+ 1) >> 1;
2845 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
2846 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6
+ l7
+ 1) >> 1;
2847 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6
+ 3*l7
+ 2) >> 2;
2848 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2849 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2850 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2851 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7
;
2853 #undef PREDICT_8x8_LOAD_LEFT
2854 #undef PREDICT_8x8_LOAD_TOP
2855 #undef PREDICT_8x8_LOAD_TOPLEFT
2856 #undef PREDICT_8x8_LOAD_TOPRIGHT
2857 #undef PREDICT_8x8_DC
2863 static inline void mc_dir_part(H264Context
*h
, Picture
*pic
, int n
, int square
, int chroma_height
, int delta
, int list
,
2864 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
2865 int src_x_offset
, int src_y_offset
,
2866 qpel_mc_func
*qpix_op
, h264_chroma_mc_func chroma_op
){
2867 MpegEncContext
* const s
= &h
->s
;
2868 const int mx
= h
->mv_cache
[list
][ scan8
[n
] ][0] + src_x_offset
*8;
2869 int my
= h
->mv_cache
[list
][ scan8
[n
] ][1] + src_y_offset
*8;
2870 const int luma_xy
= (mx
&3) + ((my
&3)<<2);
2871 uint8_t * src_y
= pic
->data
[0] + (mx
>>2) + (my
>>2)*h
->mb_linesize
;
2872 uint8_t * src_cb
, * src_cr
;
2873 int extra_width
= h
->emu_edge_width
;
2874 int extra_height
= h
->emu_edge_height
;
2876 const int full_mx
= mx
>>2;
2877 const int full_my
= my
>>2;
2878 const int pic_width
= 16*s
->mb_width
;
2879 const int pic_height
= 16*s
->mb_height
>> MB_MBAFF
;
2884 if(mx
&7) extra_width
-= 3;
2885 if(my
&7) extra_height
-= 3;
2887 if( full_mx
< 0-extra_width
2888 || full_my
< 0-extra_height
2889 || full_mx
+ 16/*FIXME*/ > pic_width
+ extra_width
2890 || full_my
+ 16/*FIXME*/ > pic_height
+ extra_height
){
2891 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_y
- 2 - 2*h
->mb_linesize
, h
->mb_linesize
, 16+5, 16+5/*FIXME*/, full_mx
-2, full_my
-2, pic_width
, pic_height
);
2892 src_y
= s
->edge_emu_buffer
+ 2 + 2*h
->mb_linesize
;
2896 qpix_op
[luma_xy
](dest_y
, src_y
, h
->mb_linesize
); //FIXME try variable height perhaps?
2898 qpix_op
[luma_xy
](dest_y
+ delta
, src_y
+ delta
, h
->mb_linesize
);
2901 if(s
->flags
&CODEC_FLAG_GRAY
) return;
2904 // chroma offset when predicting from a field of opposite parity
2905 my
+= 2 * ((s
->mb_y
& 1) - (h
->ref_cache
[list
][scan8
[n
]] & 1));
2906 emu
|= (my
>>3) < 0 || (my
>>3) + 8 >= (pic_height
>>1);
2908 src_cb
= pic
->data
[1] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
2909 src_cr
= pic
->data
[2] + (mx
>>3) + (my
>>3)*h
->mb_uvlinesize
;
2912 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cb
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
2913 src_cb
= s
->edge_emu_buffer
;
2915 chroma_op(dest_cb
, src_cb
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
2918 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cr
, h
->mb_uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), pic_width
>>1, pic_height
>>1);
2919 src_cr
= s
->edge_emu_buffer
;
2921 chroma_op(dest_cr
, src_cr
, h
->mb_uvlinesize
, chroma_height
, mx
&7, my
&7);
2924 static inline void mc_part_std(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
2925 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
2926 int x_offset
, int y_offset
,
2927 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
2928 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
2929 int list0
, int list1
){
2930 MpegEncContext
* const s
= &h
->s
;
2931 qpel_mc_func
*qpix_op
= qpix_put
;
2932 h264_chroma_mc_func chroma_op
= chroma_put
;
2934 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
2935 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
2936 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
2937 x_offset
+= 8*s
->mb_x
;
2938 y_offset
+= 8*(s
->mb_y
>> MB_MBAFF
);
2941 Picture
*ref
= &h
->ref_list
[0][ h
->ref_cache
[0][ scan8
[n
] ] ];
2942 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 0,
2943 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
2944 qpix_op
, chroma_op
);
2947 chroma_op
= chroma_avg
;
2951 Picture
*ref
= &h
->ref_list
[1][ h
->ref_cache
[1][ scan8
[n
] ] ];
2952 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 1,
2953 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
2954 qpix_op
, chroma_op
);
2958 static inline void mc_part_weighted(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
2959 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
2960 int x_offset
, int y_offset
,
2961 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
2962 h264_weight_func luma_weight_op
, h264_weight_func chroma_weight_op
,
2963 h264_biweight_func luma_weight_avg
, h264_biweight_func chroma_weight_avg
,
2964 int list0
, int list1
){
2965 MpegEncContext
* const s
= &h
->s
;
2967 dest_y
+= 2*x_offset
+ 2*y_offset
*h
-> mb_linesize
;
2968 dest_cb
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
2969 dest_cr
+= x_offset
+ y_offset
*h
->mb_uvlinesize
;
2970 x_offset
+= 8*s
->mb_x
;
2971 y_offset
+= 8*(s
->mb_y
>> MB_MBAFF
);
2974 /* don't optimize for luma-only case, since B-frames usually
2975 * use implicit weights => chroma too. */
2976 uint8_t *tmp_cb
= s
->obmc_scratchpad
;
2977 uint8_t *tmp_cr
= s
->obmc_scratchpad
+ 8;
2978 uint8_t *tmp_y
= s
->obmc_scratchpad
+ 8*h
->mb_uvlinesize
;
2979 int refn0
= h
->ref_cache
[0][ scan8
[n
] ];
2980 int refn1
= h
->ref_cache
[1][ scan8
[n
] ];
2982 mc_dir_part(h
, &h
->ref_list
[0][refn0
], n
, square
, chroma_height
, delta
, 0,
2983 dest_y
, dest_cb
, dest_cr
,
2984 x_offset
, y_offset
, qpix_put
, chroma_put
);
2985 mc_dir_part(h
, &h
->ref_list
[1][refn1
], n
, square
, chroma_height
, delta
, 1,
2986 tmp_y
, tmp_cb
, tmp_cr
,
2987 x_offset
, y_offset
, qpix_put
, chroma_put
);
2989 if(h
->use_weight
== 2){
2990 int weight0
= h
->implicit_weight
[refn0
][refn1
];
2991 int weight1
= 64 - weight0
;
2992 luma_weight_avg( dest_y
, tmp_y
, h
-> mb_linesize
, 5, weight0
, weight1
, 0);
2993 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
2994 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, 5, weight0
, weight1
, 0);
2996 luma_weight_avg(dest_y
, tmp_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
2997 h
->luma_weight
[0][refn0
], h
->luma_weight
[1][refn1
],
2998 h
->luma_offset
[0][refn0
] + h
->luma_offset
[1][refn1
]);
2999 chroma_weight_avg(dest_cb
, tmp_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
3000 h
->chroma_weight
[0][refn0
][0], h
->chroma_weight
[1][refn1
][0],
3001 h
->chroma_offset
[0][refn0
][0] + h
->chroma_offset
[1][refn1
][0]);
3002 chroma_weight_avg(dest_cr
, tmp_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
3003 h
->chroma_weight
[0][refn0
][1], h
->chroma_weight
[1][refn1
][1],
3004 h
->chroma_offset
[0][refn0
][1] + h
->chroma_offset
[1][refn1
][1]);
3007 int list
= list1
? 1 : 0;
3008 int refn
= h
->ref_cache
[list
][ scan8
[n
] ];
3009 Picture
*ref
= &h
->ref_list
[list
][refn
];
3010 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, list
,
3011 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
3012 qpix_put
, chroma_put
);
3014 luma_weight_op(dest_y
, h
->mb_linesize
, h
->luma_log2_weight_denom
,
3015 h
->luma_weight
[list
][refn
], h
->luma_offset
[list
][refn
]);
3016 if(h
->use_weight_chroma
){
3017 chroma_weight_op(dest_cb
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
3018 h
->chroma_weight
[list
][refn
][0], h
->chroma_offset
[list
][refn
][0]);
3019 chroma_weight_op(dest_cr
, h
->mb_uvlinesize
, h
->chroma_log2_weight_denom
,
3020 h
->chroma_weight
[list
][refn
][1], h
->chroma_offset
[list
][refn
][1]);
3025 static inline void mc_part(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
3026 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
3027 int x_offset
, int y_offset
,
3028 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
3029 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
3030 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
,
3031 int list0
, int list1
){
3032 if((h
->use_weight
==2 && list0
&& list1
3033 && (h
->implicit_weight
[ h
->ref_cache
[0][scan8
[n
]] ][ h
->ref_cache
[1][scan8
[n
]] ] != 32))
3034 || h
->use_weight
==1)
3035 mc_part_weighted(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
3036 x_offset
, y_offset
, qpix_put
, chroma_put
,
3037 weight_op
[0], weight_op
[3], weight_avg
[0], weight_avg
[3], list0
, list1
);
3039 mc_part_std(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
3040 x_offset
, y_offset
, qpix_put
, chroma_put
, qpix_avg
, chroma_avg
, list0
, list1
);
3043 static inline void prefetch_motion(H264Context
*h
, int list
){
3044 /* fetch pixels for estimated mv 4 macroblocks ahead
3045 * optimized for 64byte cache lines */
3046 MpegEncContext
* const s
= &h
->s
;
3047 const int refn
= h
->ref_cache
[list
][scan8
[0]];
3049 const int mx
= (h
->mv_cache
[list
][scan8
[0]][0]>>2) + 16*s
->mb_x
+ 8;
3050 const int my
= (h
->mv_cache
[list
][scan8
[0]][1]>>2) + 16*s
->mb_y
;
3051 uint8_t **src
= h
->ref_list
[list
][refn
].data
;
3052 int off
= mx
+ (my
+ (s
->mb_x
&3)*4)*h
->mb_linesize
+ 64;
3053 s
->dsp
.prefetch(src
[0]+off
, s
->linesize
, 4);
3054 off
= (mx
>>1) + ((my
>>1) + (s
->mb_x
&7))*s
->uvlinesize
+ 64;
3055 s
->dsp
.prefetch(src
[1]+off
, src
[2]-src
[1], 2);
3059 static void hl_motion(H264Context
*h
, uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
3060 qpel_mc_func (*qpix_put
)[16], h264_chroma_mc_func (*chroma_put
),
3061 qpel_mc_func (*qpix_avg
)[16], h264_chroma_mc_func (*chroma_avg
),
3062 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
){
3063 MpegEncContext
* const s
= &h
->s
;
3064 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
3065 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
3067 assert(IS_INTER(mb_type
));
3069 prefetch_motion(h
, 0);
3071 if(IS_16X16(mb_type
)){
3072 mc_part(h
, 0, 1, 8, 0, dest_y
, dest_cb
, dest_cr
, 0, 0,
3073 qpix_put
[0], chroma_put
[0], qpix_avg
[0], chroma_avg
[0],
3074 &weight_op
[0], &weight_avg
[0],
3075 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
3076 }else if(IS_16X8(mb_type
)){
3077 mc_part(h
, 0, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 0,
3078 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
3079 &weight_op
[1], &weight_avg
[1],
3080 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
3081 mc_part(h
, 8, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 4,
3082 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
3083 &weight_op
[1], &weight_avg
[1],
3084 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
3085 }else if(IS_8X16(mb_type
)){
3086 mc_part(h
, 0, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 0, 0,
3087 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
3088 &weight_op
[2], &weight_avg
[2],
3089 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
3090 mc_part(h
, 4, 0, 8, 8*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, 4, 0,
3091 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
3092 &weight_op
[2], &weight_avg
[2],
3093 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
3097 assert(IS_8X8(mb_type
));
3100 const int sub_mb_type
= h
->sub_mb_type
[i
];
3102 int x_offset
= (i
&1)<<2;
3103 int y_offset
= (i
&2)<<1;
3105 if(IS_SUB_8X8(sub_mb_type
)){
3106 mc_part(h
, n
, 1, 4, 0, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
3107 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
3108 &weight_op
[3], &weight_avg
[3],
3109 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
3110 }else if(IS_SUB_8X4(sub_mb_type
)){
3111 mc_part(h
, n
, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
3112 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
3113 &weight_op
[4], &weight_avg
[4],
3114 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
3115 mc_part(h
, n
+2, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
+2,
3116 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
3117 &weight_op
[4], &weight_avg
[4],
3118 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
3119 }else if(IS_SUB_4X8(sub_mb_type
)){
3120 mc_part(h
, n
, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
3121 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
3122 &weight_op
[5], &weight_avg
[5],
3123 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
3124 mc_part(h
, n
+1, 0, 4, 4*h
->mb_linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
+2, y_offset
,
3125 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
3126 &weight_op
[5], &weight_avg
[5],
3127 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
3130 assert(IS_SUB_4X4(sub_mb_type
));
3132 int sub_x_offset
= x_offset
+ 2*(j
&1);
3133 int sub_y_offset
= y_offset
+ (j
&2);
3134 mc_part(h
, n
+j
, 1, 2, 0, dest_y
, dest_cb
, dest_cr
, sub_x_offset
, sub_y_offset
,
3135 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
3136 &weight_op
[6], &weight_avg
[6],
3137 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
3143 prefetch_motion(h
, 1);
3146 static void decode_init_vlc(H264Context
*h
){
3147 static int done
= 0;
3153 init_vlc(&chroma_dc_coeff_token_vlc
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 4*5,
3154 &chroma_dc_coeff_token_len
[0], 1, 1,
3155 &chroma_dc_coeff_token_bits
[0], 1, 1, 1);
3158 init_vlc(&coeff_token_vlc
[i
], COEFF_TOKEN_VLC_BITS
, 4*17,
3159 &coeff_token_len
[i
][0], 1, 1,
3160 &coeff_token_bits
[i
][0], 1, 1, 1);
3164 init_vlc(&chroma_dc_total_zeros_vlc
[i
], CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 4,
3165 &chroma_dc_total_zeros_len
[i
][0], 1, 1,
3166 &chroma_dc_total_zeros_bits
[i
][0], 1, 1, 1);
3168 for(i
=0; i
<15; i
++){
3169 init_vlc(&total_zeros_vlc
[i
], TOTAL_ZEROS_VLC_BITS
, 16,
3170 &total_zeros_len
[i
][0], 1, 1,
3171 &total_zeros_bits
[i
][0], 1, 1, 1);
3175 init_vlc(&run_vlc
[i
], RUN_VLC_BITS
, 7,
3176 &run_len
[i
][0], 1, 1,
3177 &run_bits
[i
][0], 1, 1, 1);
3179 init_vlc(&run7_vlc
, RUN7_VLC_BITS
, 16,
3180 &run_len
[6][0], 1, 1,
3181 &run_bits
[6][0], 1, 1, 1);
3186 * Sets the intra prediction function pointers.
3188 static void init_pred_ptrs(H264Context
*h
){
3189 // MpegEncContext * const s = &h->s;
3191 h
->pred4x4
[VERT_PRED
]= pred4x4_vertical_c
;
3192 h
->pred4x4
[HOR_PRED
]= pred4x4_horizontal_c
;
3193 h
->pred4x4
[DC_PRED
]= pred4x4_dc_c
;
3194 h
->pred4x4
[DIAG_DOWN_LEFT_PRED
]= pred4x4_down_left_c
;
3195 h
->pred4x4
[DIAG_DOWN_RIGHT_PRED
]= pred4x4_down_right_c
;
3196 h
->pred4x4
[VERT_RIGHT_PRED
]= pred4x4_vertical_right_c
;
3197 h
->pred4x4
[HOR_DOWN_PRED
]= pred4x4_horizontal_down_c
;
3198 h
->pred4x4
[VERT_LEFT_PRED
]= pred4x4_vertical_left_c
;
3199 h
->pred4x4
[HOR_UP_PRED
]= pred4x4_horizontal_up_c
;
3200 h
->pred4x4
[LEFT_DC_PRED
]= pred4x4_left_dc_c
;
3201 h
->pred4x4
[TOP_DC_PRED
]= pred4x4_top_dc_c
;
3202 h
->pred4x4
[DC_128_PRED
]= pred4x4_128_dc_c
;
3204 h
->pred8x8l
[VERT_PRED
]= pred8x8l_vertical_c
;
3205 h
->pred8x8l
[HOR_PRED
]= pred8x8l_horizontal_c
;
3206 h
->pred8x8l
[DC_PRED
]= pred8x8l_dc_c
;
3207 h
->pred8x8l
[DIAG_DOWN_LEFT_PRED
]= pred8x8l_down_left_c
;
3208 h
->pred8x8l
[DIAG_DOWN_RIGHT_PRED
]= pred8x8l_down_right_c
;
3209 h
->pred8x8l
[VERT_RIGHT_PRED
]= pred8x8l_vertical_right_c
;
3210 h
->pred8x8l
[HOR_DOWN_PRED
]= pred8x8l_horizontal_down_c
;
3211 h
->pred8x8l
[VERT_LEFT_PRED
]= pred8x8l_vertical_left_c
;
3212 h
->pred8x8l
[HOR_UP_PRED
]= pred8x8l_horizontal_up_c
;
3213 h
->pred8x8l
[LEFT_DC_PRED
]= pred8x8l_left_dc_c
;
3214 h
->pred8x8l
[TOP_DC_PRED
]= pred8x8l_top_dc_c
;
3215 h
->pred8x8l
[DC_128_PRED
]= pred8x8l_128_dc_c
;
3217 h
->pred8x8
[DC_PRED8x8
]= pred8x8_dc_c
;
3218 h
->pred8x8
[VERT_PRED8x8
]= pred8x8_vertical_c
;
3219 h
->pred8x8
[HOR_PRED8x8
]= pred8x8_horizontal_c
;
3220 h
->pred8x8
[PLANE_PRED8x8
]= pred8x8_plane_c
;
3221 h
->pred8x8
[LEFT_DC_PRED8x8
]= pred8x8_left_dc_c
;
3222 h
->pred8x8
[TOP_DC_PRED8x8
]= pred8x8_top_dc_c
;
3223 h
->pred8x8
[DC_128_PRED8x8
]= pred8x8_128_dc_c
;
3225 h
->pred16x16
[DC_PRED8x8
]= pred16x16_dc_c
;
3226 h
->pred16x16
[VERT_PRED8x8
]= pred16x16_vertical_c
;
3227 h
->pred16x16
[HOR_PRED8x8
]= pred16x16_horizontal_c
;
3228 h
->pred16x16
[PLANE_PRED8x8
]= pred16x16_plane_c
;
3229 h
->pred16x16
[LEFT_DC_PRED8x8
]= pred16x16_left_dc_c
;
3230 h
->pred16x16
[TOP_DC_PRED8x8
]= pred16x16_top_dc_c
;
3231 h
->pred16x16
[DC_128_PRED8x8
]= pred16x16_128_dc_c
;
3234 static void free_tables(H264Context
*h
){
3235 av_freep(&h
->intra4x4_pred_mode
);
3236 av_freep(&h
->chroma_pred_mode_table
);
3237 av_freep(&h
->cbp_table
);
3238 av_freep(&h
->mvd_table
[0]);
3239 av_freep(&h
->mvd_table
[1]);
3240 av_freep(&h
->direct_table
);
3241 av_freep(&h
->non_zero_count
);
3242 av_freep(&h
->slice_table_base
);
3243 av_freep(&h
->top_borders
[1]);
3244 av_freep(&h
->top_borders
[0]);
3245 h
->slice_table
= NULL
;
3247 av_freep(&h
->mb2b_xy
);
3248 av_freep(&h
->mb2b8_xy
);
3250 av_freep(&h
->s
.obmc_scratchpad
);
3253 static void init_dequant8_coeff_table(H264Context
*h
){
3255 const int transpose
= (h
->s
.dsp
.h264_idct8_add
!= ff_h264_idct8_add_c
); //FIXME ugly
3256 h
->dequant8_coeff
[0] = h
->dequant8_buffer
[0];
3257 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[1];
3259 for(i
=0; i
<2; i
++ ){
3260 if(i
&& !memcmp(h
->pps
.scaling_matrix8
[0], h
->pps
.scaling_matrix8
[1], 64*sizeof(uint8_t))){
3261 h
->dequant8_coeff
[1] = h
->dequant8_buffer
[0];
3265 for(q
=0; q
<52; q
++){
3266 int shift
= div6
[q
];
3269 h
->dequant8_coeff
[i
][q
][transpose
? (x
>>3)|((x
&7)<<3) : x
] =
3270 ((uint32_t)dequant8_coeff_init
[idx
][ dequant8_coeff_init_scan
[((x
>>1)&12) | (x
&3)] ] *
3271 h
->pps
.scaling_matrix8
[i
][x
]) << shift
;
3276 static void init_dequant4_coeff_table(H264Context
*h
){
3278 const int transpose
= (h
->s
.dsp
.h264_idct_add
!= ff_h264_idct_add_c
); //FIXME ugly
3279 for(i
=0; i
<6; i
++ ){
3280 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[i
];
3282 if(!memcmp(h
->pps
.scaling_matrix4
[j
], h
->pps
.scaling_matrix4
[i
], 16*sizeof(uint8_t))){
3283 h
->dequant4_coeff
[i
] = h
->dequant4_buffer
[j
];
3290 for(q
=0; q
<52; q
++){
3291 int shift
= div6
[q
] + 2;
3294 h
->dequant4_coeff
[i
][q
][transpose
? (x
>>2)|((x
<<2)&0xF) : x
] =
3295 ((uint32_t)dequant4_coeff_init
[idx
][(x
&1) + ((x
>>2)&1)] *
3296 h
->pps
.scaling_matrix4
[i
][x
]) << shift
;
3301 static void init_dequant_tables(H264Context
*h
){
3303 init_dequant4_coeff_table(h
);
3304 if(h
->pps
.transform_8x8_mode
)
3305 init_dequant8_coeff_table(h
);
3306 if(h
->sps
.transform_bypass
){
3309 h
->dequant4_coeff
[i
][0][x
] = 1<<6;
3310 if(h
->pps
.transform_8x8_mode
)
3313 h
->dequant8_coeff
[i
][0][x
] = 1<<6;
3320 * needs width/height
3322 static int alloc_tables(H264Context
*h
){
3323 MpegEncContext
* const s
= &h
->s
;
3324 const int big_mb_num
= s
->mb_stride
* (s
->mb_height
+1);
3327 CHECKED_ALLOCZ(h
->intra4x4_pred_mode
, big_mb_num
* 8 * sizeof(uint8_t))
3329 CHECKED_ALLOCZ(h
->non_zero_count
, big_mb_num
* 16 * sizeof(uint8_t))
3330 CHECKED_ALLOCZ(h
->slice_table_base
, (big_mb_num
+s
->mb_stride
) * sizeof(uint8_t))
3331 CHECKED_ALLOCZ(h
->top_borders
[0] , s
->mb_width
* (16+8+8) * sizeof(uint8_t))
3332 CHECKED_ALLOCZ(h
->top_borders
[1] , s
->mb_width
* (16+8+8) * sizeof(uint8_t))
3333 CHECKED_ALLOCZ(h
->cbp_table
, big_mb_num
* sizeof(uint16_t))
3335 if( h
->pps
.cabac
) {
3336 CHECKED_ALLOCZ(h
->chroma_pred_mode_table
, big_mb_num
* sizeof(uint8_t))
3337 CHECKED_ALLOCZ(h
->mvd_table
[0], 32*big_mb_num
* sizeof(uint16_t));
3338 CHECKED_ALLOCZ(h
->mvd_table
[1], 32*big_mb_num
* sizeof(uint16_t));
3339 CHECKED_ALLOCZ(h
->direct_table
, 32*big_mb_num
* sizeof(uint8_t));
3342 memset(h
->slice_table_base
, -1, (big_mb_num
+s
->mb_stride
) * sizeof(uint8_t));
3343 h
->slice_table
= h
->slice_table_base
+ s
->mb_stride
*2 + 1;
3345 CHECKED_ALLOCZ(h
->mb2b_xy
, big_mb_num
* sizeof(uint32_t));
3346 CHECKED_ALLOCZ(h
->mb2b8_xy
, big_mb_num
* sizeof(uint32_t));
3347 for(y
=0; y
<s
->mb_height
; y
++){
3348 for(x
=0; x
<s
->mb_width
; x
++){
3349 const int mb_xy
= x
+ y
*s
->mb_stride
;
3350 const int b_xy
= 4*x
+ 4*y
*h
->b_stride
;
3351 const int b8_xy
= 2*x
+ 2*y
*h
->b8_stride
;
3353 h
->mb2b_xy
[mb_xy
]= b_xy
;
3354 h
->mb2b8_xy
[mb_xy
]= b8_xy
;
3358 s
->obmc_scratchpad
= NULL
;
3360 if(!h
->dequant4_coeff
[0])
3361 init_dequant_tables(h
);
3369 static void common_init(H264Context
*h
){
3370 MpegEncContext
* const s
= &h
->s
;
3372 s
->width
= s
->avctx
->width
;
3373 s
->height
= s
->avctx
->height
;
3374 s
->codec_id
= s
->avctx
->codec
->id
;
3378 h
->dequant_coeff_pps
= -1;
3379 s
->unrestricted_mv
=1;
3380 s
->decode
=1; //FIXME
3382 memset(h
->pps
.scaling_matrix4
, 16, 6*16*sizeof(uint8_t));
3383 memset(h
->pps
.scaling_matrix8
, 16, 2*64*sizeof(uint8_t));
3386 static int decode_init(AVCodecContext
*avctx
){
3387 H264Context
*h
= avctx
->priv_data
;
3388 MpegEncContext
* const s
= &h
->s
;
3390 MPV_decode_defaults(s
);
3395 s
->out_format
= FMT_H264
;
3396 s
->workaround_bugs
= avctx
->workaround_bugs
;
3399 // s->decode_mb= ff_h263_decode_mb;
3401 avctx
->pix_fmt
= PIX_FMT_YUV420P
;
3405 if(avctx
->extradata_size
> 0 && avctx
->extradata
&&
3406 *(char *)avctx
->extradata
== 1){
3416 static int frame_start(H264Context
*h
){
3417 MpegEncContext
* const s
= &h
->s
;
3420 if(MPV_frame_start(s
, s
->avctx
) < 0)
3422 ff_er_frame_start(s
);
3424 assert(s
->linesize
&& s
->uvlinesize
);
3426 for(i
=0; i
<16; i
++){
3427 h
->block_offset
[i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
3428 h
->block_offset
[24+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
3431 h
->block_offset
[16+i
]=
3432 h
->block_offset
[20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
3433 h
->block_offset
[24+16+i
]=
3434 h
->block_offset
[24+20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
3437 /* can't be in alloc_tables because linesize isn't known there.
3438 * FIXME: redo bipred weight to not require extra buffer? */
3439 if(!s
->obmc_scratchpad
)
3440 s
->obmc_scratchpad
= av_malloc(16*2*s
->linesize
+ 8*2*s
->uvlinesize
);
3442 /* some macroblocks will be accessed before they're available */
3444 memset(h
->slice_table
, -1, (s
->mb_height
*s
->mb_stride
-1) * sizeof(uint8_t));
3446 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3450 static inline void backup_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
){
3451 MpegEncContext
* const s
= &h
->s
;
3455 src_cb
-= uvlinesize
;
3456 src_cr
-= uvlinesize
;
3458 // There are two lines saved, the line above the the top macroblock of a pair,
3459 // and the line above the bottom macroblock
3460 h
->left_border
[0]= h
->top_borders
[0][s
->mb_x
][15];
3461 for(i
=1; i
<17; i
++){
3462 h
->left_border
[i
]= src_y
[15+i
* linesize
];
3465 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 16*linesize
);
3466 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+16*linesize
);
3468 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3469 h
->left_border
[17 ]= h
->top_borders
[0][s
->mb_x
][16+7];
3470 h
->left_border
[17+9]= h
->top_borders
[0][s
->mb_x
][24+7];
3472 h
->left_border
[i
+17 ]= src_cb
[7+i
*uvlinesize
];
3473 h
->left_border
[i
+17+9]= src_cr
[7+i
*uvlinesize
];
3475 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+8*uvlinesize
);
3476 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+8*uvlinesize
);
3480 static inline void xchg_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int xchg
){
3481 MpegEncContext
* const s
= &h
->s
;
3484 int deblock_left
= (s
->mb_x
> 0);
3485 int deblock_top
= (s
->mb_y
> 0);
3487 src_y
-= linesize
+ 1;
3488 src_cb
-= uvlinesize
+ 1;
3489 src_cr
-= uvlinesize
+ 1;
3491 #define XCHG(a,b,t,xchg)\
3498 for(i
= !deblock_top
; i
<17; i
++){
3499 XCHG(h
->left_border
[i
], src_y
[i
* linesize
], temp8
, xchg
);
3504 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0), *(uint64_t*)(src_y
+1), temp64
, xchg
);
3505 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8), *(uint64_t*)(src_y
+9), temp64
, 1);
3506 if(s
->mb_x
+1 < s
->mb_width
){
3507 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
+1]), *(uint64_t*)(src_y
+17), temp64
, 1);
3511 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3513 for(i
= !deblock_top
; i
<9; i
++){
3514 XCHG(h
->left_border
[i
+17 ], src_cb
[i
*uvlinesize
], temp8
, xchg
);
3515 XCHG(h
->left_border
[i
+17+9], src_cr
[i
*uvlinesize
], temp8
, xchg
);
3519 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1), temp64
, 1);
3520 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1), temp64
, 1);
3525 static inline void backup_pair_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
){
3526 MpegEncContext
* const s
= &h
->s
;
3529 src_y
-= 2 * linesize
;
3530 src_cb
-= 2 * uvlinesize
;
3531 src_cr
-= 2 * uvlinesize
;
3533 // There are two lines saved, the line above the the top macroblock of a pair,
3534 // and the line above the bottom macroblock
3535 h
->left_border
[0]= h
->top_borders
[0][s
->mb_x
][15];
3536 h
->left_border
[1]= h
->top_borders
[1][s
->mb_x
][15];
3537 for(i
=2; i
<34; i
++){
3538 h
->left_border
[i
]= src_y
[15+i
* linesize
];
3541 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 32*linesize
);
3542 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+32*linesize
);
3543 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 33*linesize
);
3544 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+33*linesize
);
3546 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3547 h
->left_border
[34 ]= h
->top_borders
[0][s
->mb_x
][16+7];
3548 h
->left_border
[34+ 1]= h
->top_borders
[1][s
->mb_x
][16+7];
3549 h
->left_border
[34+18 ]= h
->top_borders
[0][s
->mb_x
][24+7];
3550 h
->left_border
[34+18+1]= h
->top_borders
[1][s
->mb_x
][24+7];
3551 for(i
=2; i
<18; i
++){
3552 h
->left_border
[i
+34 ]= src_cb
[7+i
*uvlinesize
];
3553 h
->left_border
[i
+34+18]= src_cr
[7+i
*uvlinesize
];
3555 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+16*uvlinesize
);
3556 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+16*uvlinesize
);
3557 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+17*uvlinesize
);
3558 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+17*uvlinesize
);
3562 static inline void xchg_pair_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int xchg
){
3563 MpegEncContext
* const s
= &h
->s
;
3566 int deblock_left
= (s
->mb_x
> 0);
3567 int deblock_top
= (s
->mb_y
> 1);
3569 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y
, src_cb
, src_cr
, linesize
, uvlinesize
);
3571 src_y
-= 2 * linesize
+ 1;
3572 src_cb
-= 2 * uvlinesize
+ 1;
3573 src_cr
-= 2 * uvlinesize
+ 1;
3575 #define XCHG(a,b,t,xchg)\
3582 for(i
= (!deblock_top
)<<1; i
<34; i
++){
3583 XCHG(h
->left_border
[i
], src_y
[i
* linesize
], temp8
, xchg
);
3588 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0), *(uint64_t*)(src_y
+1), temp64
, xchg
);
3589 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8), *(uint64_t*)(src_y
+9), temp64
, 1);
3590 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+0), *(uint64_t*)(src_y
+1 +linesize
), temp64
, xchg
);
3591 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+8), *(uint64_t*)(src_y
+9 +linesize
), temp64
, 1);
3592 if(s
->mb_x
+1 < s
->mb_width
){
3593 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
+1]), *(uint64_t*)(src_y
+17), temp64
, 1);
3594 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
+1]), *(uint64_t*)(src_y
+17 +linesize
), temp64
, 1);
3598 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3600 for(i
= (!deblock_top
) << 1; i
<18; i
++){
3601 XCHG(h
->left_border
[i
+34 ], src_cb
[i
*uvlinesize
], temp8
, xchg
);
3602 XCHG(h
->left_border
[i
+34+18], src_cr
[i
*uvlinesize
], temp8
, xchg
);
3606 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1), temp64
, 1);
3607 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1), temp64
, 1);
3608 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1 +uvlinesize
), temp64
, 1);
3609 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1 +uvlinesize
), temp64
, 1);
3614 static void hl_decode_mb(H264Context
*h
){
3615 MpegEncContext
* const s
= &h
->s
;
3616 const int mb_x
= s
->mb_x
;
3617 const int mb_y
= s
->mb_y
;
3618 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
3619 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
3620 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
3621 int linesize
, uvlinesize
/*dct_offset*/;
3623 int *block_offset
= &h
->block_offset
[0];
3624 const unsigned int bottom
= mb_y
& 1;
3625 const int transform_bypass
= (s
->qscale
== 0 && h
->sps
.transform_bypass
);
3626 void (*idct_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
3627 void (*idct_dc_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
3632 dest_y
= s
->current_picture
.data
[0] + (mb_y
* 16* s
->linesize
) + mb_x
* 16;
3633 dest_cb
= s
->current_picture
.data
[1] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
3634 dest_cr
= s
->current_picture
.data
[2] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
3637 linesize
= h
->mb_linesize
= s
->linesize
* 2;
3638 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
* 2;
3639 block_offset
= &h
->block_offset
[24];
3640 if(mb_y
&1){ //FIXME move out of this func?
3641 dest_y
-= s
->linesize
*15;
3642 dest_cb
-= s
->uvlinesize
*7;
3643 dest_cr
-= s
->uvlinesize
*7;
3647 for(list
=0; list
<2; list
++){
3648 if(!USES_LIST(mb_type
, list
))
3650 if(IS_16X16(mb_type
)){
3651 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
3652 fill_rectangle(ref
, 4, 4, 8, 16+*ref
^(s
->mb_y
&1), 1);
3654 for(i
=0; i
<16; i
+=4){
3655 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3656 int ref
= h
->ref_cache
[list
][scan8
[i
]];
3658 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2, 8, 16+ref
^(s
->mb_y
&1), 1);
3664 linesize
= h
->mb_linesize
= s
->linesize
;
3665 uvlinesize
= h
->mb_uvlinesize
= s
->uvlinesize
;
3666 // dct_offset = s->linesize * 16;
3669 if(transform_bypass
){
3671 idct_add
= IS_8x8DCT(mb_type
) ? s
->dsp
.add_pixels8
: s
->dsp
.add_pixels4
;
3672 }else if(IS_8x8DCT(mb_type
)){
3673 idct_dc_add
= s
->dsp
.h264_idct8_dc_add
;
3674 idct_add
= s
->dsp
.h264_idct8_add
;
3676 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
3677 idct_add
= s
->dsp
.h264_idct_add
;
3680 if(FRAME_MBAFF
&& h
->deblocking_filter
&& IS_INTRA(mb_type
)
3681 && (!bottom
|| !IS_INTRA(s
->current_picture
.mb_type
[mb_xy
-s
->mb_stride
]))){
3682 int mbt_y
= mb_y
&~1;
3683 uint8_t *top_y
= s
->current_picture
.data
[0] + (mbt_y
* 16* s
->linesize
) + mb_x
* 16;
3684 uint8_t *top_cb
= s
->current_picture
.data
[1] + (mbt_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
3685 uint8_t *top_cr
= s
->current_picture
.data
[2] + (mbt_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
3686 xchg_pair_border(h
, top_y
, top_cb
, top_cr
, s
->linesize
, s
->uvlinesize
, 1);
3689 if (IS_INTRA_PCM(mb_type
)) {
3692 // The pixels are stored in h->mb array in the same order as levels,
3693 // copy them in output in the correct order.
3694 for(i
=0; i
<16; i
++) {
3695 for (y
=0; y
<4; y
++) {
3696 for (x
=0; x
<4; x
++) {
3697 *(dest_y
+ block_offset
[i
] + y
*linesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
3701 for(i
=16; i
<16+4; i
++) {
3702 for (y
=0; y
<4; y
++) {
3703 for (x
=0; x
<4; x
++) {
3704 *(dest_cb
+ block_offset
[i
] + y
*uvlinesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
3708 for(i
=20; i
<20+4; i
++) {
3709 for (y
=0; y
<4; y
++) {
3710 for (x
=0; x
<4; x
++) {
3711 *(dest_cr
+ block_offset
[i
] + y
*uvlinesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
3716 if(IS_INTRA(mb_type
)){
3717 if(h
->deblocking_filter
&& !FRAME_MBAFF
)
3718 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 1);
3720 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3721 h
->pred8x8
[ h
->chroma_pred_mode
](dest_cb
, uvlinesize
);
3722 h
->pred8x8
[ h
->chroma_pred_mode
](dest_cr
, uvlinesize
);
3725 if(IS_INTRA4x4(mb_type
)){
3727 if(IS_8x8DCT(mb_type
)){
3728 for(i
=0; i
<16; i
+=4){
3729 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
3730 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
3731 const int nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
3732 h
->pred8x8l
[ dir
](ptr
, (h
->topleft_samples_available
<<i
)&0x8000,
3733 (h
->topright_samples_available
<<(i
+1))&0x8000, linesize
);
3735 if(nnz
== 1 && h
->mb
[i
*16])
3736 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
3738 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
3742 for(i
=0; i
<16; i
++){
3743 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
3745 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
3748 if(dir
== DIAG_DOWN_LEFT_PRED
|| dir
== VERT_LEFT_PRED
){
3749 const int topright_avail
= (h
->topright_samples_available
<<i
)&0x8000;
3750 assert(mb_y
|| linesize
<= block_offset
[i
]);
3751 if(!topright_avail
){
3752 tr
= ptr
[3 - linesize
]*0x01010101;
3753 topright
= (uint8_t*) &tr
;
3755 topright
= ptr
+ 4 - linesize
;
3759 h
->pred4x4
[ dir
](ptr
, topright
, linesize
);
3760 nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
3762 if(s
->codec_id
== CODEC_ID_H264
){
3763 if(nnz
== 1 && h
->mb
[i
*16])
3764 idct_dc_add(ptr
, h
->mb
+ i
*16, linesize
);
3766 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
3768 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, 0);
3773 h
->pred16x16
[ h
->intra16x16_pred_mode
](dest_y
, linesize
);
3774 if(s
->codec_id
== CODEC_ID_H264
){
3775 if(!transform_bypass
)
3776 h264_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
, h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 0:3][s
->qscale
][0]);
3778 svq3_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
);
3780 if(h
->deblocking_filter
&& !FRAME_MBAFF
)
3781 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 0);
3782 }else if(s
->codec_id
== CODEC_ID_H264
){
3783 hl_motion(h
, dest_y
, dest_cb
, dest_cr
,
3784 s
->dsp
.put_h264_qpel_pixels_tab
, s
->dsp
.put_h264_chroma_pixels_tab
,
3785 s
->dsp
.avg_h264_qpel_pixels_tab
, s
->dsp
.avg_h264_chroma_pixels_tab
,
3786 s
->dsp
.weight_h264_pixels_tab
, s
->dsp
.biweight_h264_pixels_tab
);
3790 if(!IS_INTRA4x4(mb_type
)){
3791 if(s
->codec_id
== CODEC_ID_H264
){
3792 if(IS_INTRA16x16(mb_type
)){
3793 for(i
=0; i
<16; i
++){
3794 if(h
->non_zero_count_cache
[ scan8
[i
] ])
3795 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
3796 else if(h
->mb
[i
*16])
3797 idct_dc_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
3800 const int di
= IS_8x8DCT(mb_type
) ? 4 : 1;
3801 for(i
=0; i
<16; i
+=di
){
3802 int nnz
= h
->non_zero_count_cache
[ scan8
[i
] ];
3804 if(nnz
==1 && h
->mb
[i
*16])
3805 idct_dc_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
3807 idct_add(dest_y
+ block_offset
[i
], h
->mb
+ i
*16, linesize
);
3812 for(i
=0; i
<16; i
++){
3813 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){ //FIXME benchmark weird rule, & below
3814 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
3815 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, IS_INTRA(mb_type
) ? 1 : 0);
3821 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3822 uint8_t *dest
[2] = {dest_cb
, dest_cr
};
3823 if(transform_bypass
){
3824 idct_add
= idct_dc_add
= s
->dsp
.add_pixels4
;
3826 idct_add
= s
->dsp
.h264_idct_add
;
3827 idct_dc_add
= s
->dsp
.h264_idct_dc_add
;
3828 chroma_dc_dequant_idct_c(h
->mb
+ 16*16, h
->chroma_qp
, h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 1:4][h
->chroma_qp
][0]);
3829 chroma_dc_dequant_idct_c(h
->mb
+ 16*16+4*16, h
->chroma_qp
, h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 2:5][h
->chroma_qp
][0]);
3831 if(s
->codec_id
== CODEC_ID_H264
){
3832 for(i
=16; i
<16+8; i
++){
3833 if(h
->non_zero_count_cache
[ scan8
[i
] ])
3834 idct_add(dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
3835 else if(h
->mb
[i
*16])
3836 idct_dc_add(dest
[(i
&4)>>2] + block_offset
[i
], h
->mb
+ i
*16, uvlinesize
);
3839 for(i
=16; i
<16+8; i
++){
3840 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){
3841 uint8_t * const ptr
= dest
[(i
&4)>>2] + block_offset
[i
];
3842 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, uvlinesize
, chroma_qp
[s
->qscale
+ 12] - 12, 2);
3848 if(h
->deblocking_filter
) {
3850 //FIXME try deblocking one mb at a time?
3851 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3852 const int mb_y
= s
->mb_y
- 1;
3853 uint8_t *pair_dest_y
, *pair_dest_cb
, *pair_dest_cr
;
3854 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
3855 const int mb_type_top
= s
->current_picture
.mb_type
[mb_xy
];
3856 const int mb_type_bottom
= s
->current_picture
.mb_type
[mb_xy
+s
->mb_stride
];
3857 if (!bottom
) return;
3858 pair_dest_y
= s
->current_picture
.data
[0] + (mb_y
* 16* s
->linesize
) + mb_x
* 16;
3859 pair_dest_cb
= s
->current_picture
.data
[1] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
3860 pair_dest_cr
= s
->current_picture
.data
[2] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
3862 if(IS_INTRA(mb_type_top
| mb_type_bottom
))
3863 xchg_pair_border(h
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, s
->linesize
, s
->uvlinesize
, 0);
3865 backup_pair_border(h
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, s
->linesize
, s
->uvlinesize
);
3869 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x
, mb_y
, pair_dest_y
, dest_y
);
3870 fill_caches(h
, mb_type_top
, 1); //FIXME don't fill stuff which isn't used by filter_mb
3871 h
->chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[mb_xy
]);
3872 filter_mb(h
, mb_x
, mb_y
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, linesize
, uvlinesize
);
3875 tprintf("call mbaff filter_mb\n");
3876 fill_caches(h
, mb_type_bottom
, 1); //FIXME don't fill stuff which isn't used by filter_mb
3877 h
->chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[mb_xy
+s
->mb_stride
]);
3878 filter_mb(h
, mb_x
, mb_y
+1, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
3880 tprintf("call filter_mb\n");
3881 backup_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
3882 fill_caches(h
, mb_type
, 1); //FIXME don't fill stuff which isn't used by filter_mb
3883 filter_mb(h
, mb_x
, mb_y
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
3889 * fills the default_ref_list.
3891 static int fill_default_ref_list(H264Context
*h
){
3892 MpegEncContext
* const s
= &h
->s
;
3894 int smallest_poc_greater_than_current
= -1;
3895 Picture sorted_short_ref
[32];
3897 if(h
->slice_type
==B_TYPE
){
3901 /* sort frame according to poc in B slice */
3902 for(out_i
=0; out_i
<h
->short_ref_count
; out_i
++){
3904 int best_poc
=INT_MAX
;
3906 for(i
=0; i
<h
->short_ref_count
; i
++){
3907 const int poc
= h
->short_ref
[i
]->poc
;
3908 if(poc
> limit
&& poc
< best_poc
){
3914 assert(best_i
!= INT_MIN
);
3917 sorted_short_ref
[out_i
]= *h
->short_ref
[best_i
];
3918 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i
, out_i
, sorted_short_ref
[out_i
].poc
, sorted_short_ref
[out_i
].frame_num
);
3919 if (-1 == smallest_poc_greater_than_current
) {
3920 if (h
->short_ref
[best_i
]->poc
>= s
->current_picture_ptr
->poc
) {
3921 smallest_poc_greater_than_current
= out_i
;
3927 if(s
->picture_structure
== PICT_FRAME
){
3928 if(h
->slice_type
==B_TYPE
){
3930 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s
->current_picture_ptr
->poc
, smallest_poc_greater_than_current
);
3932 // find the largest poc
3933 for(list
=0; list
<2; list
++){
3936 int step
= list
? -1 : 1;
3938 for(i
=0; i
<h
->short_ref_count
&& index
< h
->ref_count
[list
]; i
++, j
+=step
) {
3939 while(j
<0 || j
>= h
->short_ref_count
){
3940 if(j
!= -99 && step
== (list
? -1 : 1))
3943 j
= smallest_poc_greater_than_current
+ (step
>>1);
3945 if(sorted_short_ref
[j
].reference
!= 3) continue;
3946 h
->default_ref_list
[list
][index
]= sorted_short_ref
[j
];
3947 h
->default_ref_list
[list
][index
++].pic_id
= sorted_short_ref
[j
].frame_num
;
3950 for(i
= 0; i
< 16 && index
< h
->ref_count
[ list
]; i
++){
3951 if(h
->long_ref
[i
] == NULL
) continue;
3952 if(h
->long_ref
[i
]->reference
!= 3) continue;
3954 h
->default_ref_list
[ list
][index
]= *h
->long_ref
[i
];
3955 h
->default_ref_list
[ list
][index
++].pic_id
= i
;;
3958 if(list
&& (smallest_poc_greater_than_current
<=0 || smallest_poc_greater_than_current
>=h
->short_ref_count
) && (1 < index
)){
3959 // swap the two first elements of L1 when
3960 // L0 and L1 are identical
3961 Picture temp
= h
->default_ref_list
[1][0];
3962 h
->default_ref_list
[1][0] = h
->default_ref_list
[1][1];
3963 h
->default_ref_list
[1][1] = temp
;
3966 if(index
< h
->ref_count
[ list
])
3967 memset(&h
->default_ref_list
[list
][index
], 0, sizeof(Picture
)*(h
->ref_count
[ list
] - index
));
3971 for(i
=0; i
<h
->short_ref_count
; i
++){
3972 if(h
->short_ref
[i
]->reference
!= 3) continue; //FIXME refernce field shit
3973 h
->default_ref_list
[0][index
]= *h
->short_ref
[i
];
3974 h
->default_ref_list
[0][index
++].pic_id
= h
->short_ref
[i
]->frame_num
;
3976 for(i
= 0; i
< 16; i
++){
3977 if(h
->long_ref
[i
] == NULL
) continue;
3978 if(h
->long_ref
[i
]->reference
!= 3) continue;
3979 h
->default_ref_list
[0][index
]= *h
->long_ref
[i
];
3980 h
->default_ref_list
[0][index
++].pic_id
= i
;;
3982 if(index
< h
->ref_count
[0])
3983 memset(&h
->default_ref_list
[0][index
], 0, sizeof(Picture
)*(h
->ref_count
[0] - index
));
3986 if(h
->slice_type
==B_TYPE
){
3988 //FIXME second field balh
3992 for (i
=0; i
<h
->ref_count
[0]; i
++) {
3993 tprintf("List0: %s fn:%d 0x%p\n", (h
->default_ref_list
[0][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[0][i
].pic_id
, h
->default_ref_list
[0][i
].data
[0]);
3995 if(h
->slice_type
==B_TYPE
){
3996 for (i
=0; i
<h
->ref_count
[1]; i
++) {
3997 tprintf("List1: %s fn:%d 0x%p\n", (h
->default_ref_list
[1][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[1][i
].pic_id
, h
->default_ref_list
[0][i
].data
[0]);
4004 static void print_short_term(H264Context
*h
);
4005 static void print_long_term(H264Context
*h
);
4007 static int decode_ref_pic_list_reordering(H264Context
*h
){
4008 MpegEncContext
* const s
= &h
->s
;
4011 print_short_term(h
);
4013 if(h
->slice_type
==I_TYPE
|| h
->slice_type
==SI_TYPE
) return 0; //FIXME move before func
4015 for(list
=0; list
<2; list
++){
4016 memcpy(h
->ref_list
[list
], h
->default_ref_list
[list
], sizeof(Picture
)*h
->ref_count
[list
]);
4018 if(get_bits1(&s
->gb
)){
4019 int pred
= h
->curr_pic_num
;
4021 for(index
=0; ; index
++){
4022 int reordering_of_pic_nums_idc
= get_ue_golomb(&s
->gb
);
4025 Picture
*ref
= NULL
;
4027 if(reordering_of_pic_nums_idc
==3)
4030 if(index
>= h
->ref_count
[list
]){
4031 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference count overflow\n");
4035 if(reordering_of_pic_nums_idc
<3){
4036 if(reordering_of_pic_nums_idc
<2){
4037 const int abs_diff_pic_num
= get_ue_golomb(&s
->gb
) + 1;
4039 if(abs_diff_pic_num
>= h
->max_pic_num
){
4040 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "abs_diff_pic_num overflow\n");
4044 if(reordering_of_pic_nums_idc
== 0) pred
-= abs_diff_pic_num
;
4045 else pred
+= abs_diff_pic_num
;
4046 pred
&= h
->max_pic_num
- 1;
4048 for(i
= h
->short_ref_count
-1; i
>=0; i
--){
4049 ref
= h
->short_ref
[i
];
4050 assert(ref
->reference
== 3);
4051 assert(!ref
->long_ref
);
4052 if(ref
->data
[0] != NULL
&& ref
->frame_num
== pred
&& ref
->long_ref
== 0) // ignore non existing pictures by testing data[0] pointer
4056 ref
->pic_id
= ref
->frame_num
;
4058 pic_id
= get_ue_golomb(&s
->gb
); //long_term_pic_idx
4059 ref
= h
->long_ref
[pic_id
];
4060 ref
->pic_id
= pic_id
;
4061 assert(ref
->reference
== 3);
4062 assert(ref
->long_ref
);
4067 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference picture missing during reorder\n");
4068 memset(&h
->ref_list
[list
][index
], 0, sizeof(Picture
)); //FIXME
4070 for(i
=index
; i
+1<h
->ref_count
[list
]; i
++){
4071 if(ref
->long_ref
== h
->ref_list
[list
][i
].long_ref
&& ref
->pic_id
== h
->ref_list
[list
][i
].pic_id
)
4074 for(; i
> index
; i
--){
4075 h
->ref_list
[list
][i
]= h
->ref_list
[list
][i
-1];
4077 h
->ref_list
[list
][index
]= *ref
;
4080 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal reordering_of_pic_nums_idc\n");
4086 if(h
->slice_type
!=B_TYPE
) break;
4088 for(list
=0; list
<2; list
++){
4089 for(index
= 0; index
< h
->ref_count
[list
]; index
++){
4090 if(!h
->ref_list
[list
][index
].data
[0])
4091 h
->ref_list
[list
][index
]= s
->current_picture
;
4093 if(h
->slice_type
!=B_TYPE
) break;
4096 if(h
->slice_type
==B_TYPE
&& !h
->direct_spatial_mv_pred
)
4097 direct_dist_scale_factor(h
);
4098 direct_ref_list_init(h
);
4102 static int fill_mbaff_ref_list(H264Context
*h
){
4104 for(list
=0; list
<2; list
++){
4105 for(i
=0; i
<h
->ref_count
[list
]; i
++){
4106 Picture
*frame
= &h
->ref_list
[list
][i
];
4107 Picture
*field
= &h
->ref_list
[list
][16+2*i
];
4110 field
[0].linesize
[j
] <<= 1;
4111 field
[1] = field
[0];
4113 field
[1].data
[j
] += frame
->linesize
[j
];
4115 h
->luma_weight
[list
][16+2*i
] = h
->luma_weight
[list
][16+2*i
+1] = h
->luma_weight
[list
][i
];
4116 h
->luma_offset
[list
][16+2*i
] = h
->luma_offset
[list
][16+2*i
+1] = h
->luma_offset
[list
][i
];
4118 h
->chroma_weight
[list
][16+2*i
][j
] = h
->chroma_weight
[list
][16+2*i
+1][j
] = h
->chroma_weight
[list
][i
][j
];
4119 h
->chroma_offset
[list
][16+2*i
][j
] = h
->chroma_offset
[list
][16+2*i
+1][j
] = h
->chroma_offset
[list
][i
][j
];
4123 for(j
=0; j
<h
->ref_count
[1]; j
++){
4124 for(i
=0; i
<h
->ref_count
[0]; i
++)
4125 h
->implicit_weight
[j
][16+2*i
] = h
->implicit_weight
[j
][16+2*i
+1] = h
->implicit_weight
[j
][i
];
4126 memcpy(h
->implicit_weight
[16+2*j
], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
4127 memcpy(h
->implicit_weight
[16+2*j
+1], h
->implicit_weight
[j
], sizeof(*h
->implicit_weight
));
4131 static int pred_weight_table(H264Context
*h
){
4132 MpegEncContext
* const s
= &h
->s
;
4134 int luma_def
, chroma_def
;
4137 h
->use_weight_chroma
= 0;
4138 h
->luma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
4139 h
->chroma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
4140 luma_def
= 1<<h
->luma_log2_weight_denom
;
4141 chroma_def
= 1<<h
->chroma_log2_weight_denom
;
4143 for(list
=0; list
<2; list
++){
4144 for(i
=0; i
<h
->ref_count
[list
]; i
++){
4145 int luma_weight_flag
, chroma_weight_flag
;
4147 luma_weight_flag
= get_bits1(&s
->gb
);
4148 if(luma_weight_flag
){
4149 h
->luma_weight
[list
][i
]= get_se_golomb(&s
->gb
);
4150 h
->luma_offset
[list
][i
]= get_se_golomb(&s
->gb
);
4151 if( h
->luma_weight
[list
][i
] != luma_def
4152 || h
->luma_offset
[list
][i
] != 0)
4155 h
->luma_weight
[list
][i
]= luma_def
;
4156 h
->luma_offset
[list
][i
]= 0;
4159 chroma_weight_flag
= get_bits1(&s
->gb
);
4160 if(chroma_weight_flag
){
4163 h
->chroma_weight
[list
][i
][j
]= get_se_golomb(&s
->gb
);
4164 h
->chroma_offset
[list
][i
][j
]= get_se_golomb(&s
->gb
);
4165 if( h
->chroma_weight
[list
][i
][j
] != chroma_def
4166 || h
->chroma_offset
[list
][i
][j
] != 0)
4167 h
->use_weight_chroma
= 1;
4172 h
->chroma_weight
[list
][i
][j
]= chroma_def
;
4173 h
->chroma_offset
[list
][i
][j
]= 0;
4177 if(h
->slice_type
!= B_TYPE
) break;
4179 h
->use_weight
= h
->use_weight
|| h
->use_weight_chroma
;
4183 static void implicit_weight_table(H264Context
*h
){
4184 MpegEncContext
* const s
= &h
->s
;
4186 int cur_poc
= s
->current_picture_ptr
->poc
;
4188 if( h
->ref_count
[0] == 1 && h
->ref_count
[1] == 1
4189 && h
->ref_list
[0][0].poc
+ h
->ref_list
[1][0].poc
== 2*cur_poc
){
4191 h
->use_weight_chroma
= 0;
4196 h
->use_weight_chroma
= 2;
4197 h
->luma_log2_weight_denom
= 5;
4198 h
->chroma_log2_weight_denom
= 5;
4200 for(ref0
=0; ref0
< h
->ref_count
[0]; ref0
++){
4201 int poc0
= h
->ref_list
[0][ref0
].poc
;
4202 for(ref1
=0; ref1
< h
->ref_count
[1]; ref1
++){
4203 int poc1
= h
->ref_list
[1][ref1
].poc
;
4204 int td
= clip(poc1
- poc0
, -128, 127);
4206 int tb
= clip(cur_poc
- poc0
, -128, 127);
4207 int tx
= (16384 + (ABS(td
) >> 1)) / td
;
4208 int dist_scale_factor
= clip((tb
*tx
+ 32) >> 6, -1024, 1023) >> 2;
4209 if(dist_scale_factor
< -64 || dist_scale_factor
> 128)
4210 h
->implicit_weight
[ref0
][ref1
] = 32;
4212 h
->implicit_weight
[ref0
][ref1
] = 64 - dist_scale_factor
;
4214 h
->implicit_weight
[ref0
][ref1
] = 32;
4219 static inline void unreference_pic(H264Context
*h
, Picture
*pic
){
4222 if(pic
== h
->delayed_output_pic
)
4225 for(i
= 0; h
->delayed_pic
[i
]; i
++)
4226 if(pic
== h
->delayed_pic
[i
]){
4234 * instantaneous decoder refresh.
4236 static void idr(H264Context
*h
){
4239 for(i
=0; i
<16; i
++){
4240 if (h
->long_ref
[i
] != NULL
) {
4241 unreference_pic(h
, h
->long_ref
[i
]);
4242 h
->long_ref
[i
]= NULL
;
4245 h
->long_ref_count
=0;
4247 for(i
=0; i
<h
->short_ref_count
; i
++){
4248 unreference_pic(h
, h
->short_ref
[i
]);
4249 h
->short_ref
[i
]= NULL
;
4251 h
->short_ref_count
=0;
4254 /* forget old pics after a seek */
4255 static void flush_dpb(AVCodecContext
*avctx
){
4256 H264Context
*h
= avctx
->priv_data
;
4258 for(i
=0; i
<16; i
++) {
4259 if(h
->delayed_pic
[i
])
4260 h
->delayed_pic
[i
]->reference
= 0;
4261 h
->delayed_pic
[i
]= NULL
;
4263 if(h
->delayed_output_pic
)
4264 h
->delayed_output_pic
->reference
= 0;
4265 h
->delayed_output_pic
= NULL
;
4267 if(h
->s
.current_picture_ptr
)
4268 h
->s
.current_picture_ptr
->reference
= 0;
4273 * @return the removed picture or NULL if an error occurs
4275 static Picture
* remove_short(H264Context
*h
, int frame_num
){
4276 MpegEncContext
* const s
= &h
->s
;
4279 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
4280 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "remove short %d count %d\n", frame_num
, h
->short_ref_count
);
4282 for(i
=0; i
<h
->short_ref_count
; i
++){
4283 Picture
*pic
= h
->short_ref
[i
];
4284 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
4285 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d %d %p\n", i
, pic
->frame_num
, pic
);
4286 if(pic
->frame_num
== frame_num
){
4287 h
->short_ref
[i
]= NULL
;
4288 memmove(&h
->short_ref
[i
], &h
->short_ref
[i
+1], (h
->short_ref_count
- i
- 1)*sizeof(Picture
*));
4289 h
->short_ref_count
--;
4298 * @return the removed picture or NULL if an error occurs
4300 static Picture
* remove_long(H264Context
*h
, int i
){
4303 pic
= h
->long_ref
[i
];
4304 h
->long_ref
[i
]= NULL
;
4305 if(pic
) h
->long_ref_count
--;
4311 * print short term list
4313 static void print_short_term(H264Context
*h
) {
4315 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
4316 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "short term list:\n");
4317 for(i
=0; i
<h
->short_ref_count
; i
++){
4318 Picture
*pic
= h
->short_ref
[i
];
4319 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
4325 * print long term list
4327 static void print_long_term(H264Context
*h
) {
4329 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
4330 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "long term list:\n");
4331 for(i
= 0; i
< 16; i
++){
4332 Picture
*pic
= h
->long_ref
[i
];
4334 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
4341 * Executes the reference picture marking (memory management control operations).
4343 static int execute_ref_pic_marking(H264Context
*h
, MMCO
*mmco
, int mmco_count
){
4344 MpegEncContext
* const s
= &h
->s
;
4346 int current_is_long
=0;
4349 if((s
->avctx
->debug
&FF_DEBUG_MMCO
) && mmco_count
==0)
4350 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "no mmco here\n");
4352 for(i
=0; i
<mmco_count
; i
++){
4353 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
4354 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco:%d %d %d\n", h
->mmco
[i
].opcode
, h
->mmco
[i
].short_frame_num
, h
->mmco
[i
].long_index
);
4356 switch(mmco
[i
].opcode
){
4357 case MMCO_SHORT2UNUSED
:
4358 pic
= remove_short(h
, mmco
[i
].short_frame_num
);
4360 unreference_pic(h
, pic
);
4361 else if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
4362 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: remove_short() failure\n");
4364 case MMCO_SHORT2LONG
:
4365 pic
= remove_long(h
, mmco
[i
].long_index
);
4366 if(pic
) unreference_pic(h
, pic
);
4368 h
->long_ref
[ mmco
[i
].long_index
]= remove_short(h
, mmco
[i
].short_frame_num
);
4369 h
->long_ref
[ mmco
[i
].long_index
]->long_ref
=1;
4370 h
->long_ref_count
++;
4372 case MMCO_LONG2UNUSED
:
4373 pic
= remove_long(h
, mmco
[i
].long_index
);
4375 unreference_pic(h
, pic
);
4376 else if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
4377 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: remove_long() failure\n");
4380 pic
= remove_long(h
, mmco
[i
].long_index
);
4381 if(pic
) unreference_pic(h
, pic
);
4383 h
->long_ref
[ mmco
[i
].long_index
]= s
->current_picture_ptr
;
4384 h
->long_ref
[ mmco
[i
].long_index
]->long_ref
=1;
4385 h
->long_ref_count
++;
4389 case MMCO_SET_MAX_LONG
:
4390 assert(mmco
[i
].long_index
<= 16);
4391 // just remove the long term which index is greater than new max
4392 for(j
= mmco
[i
].long_index
; j
<16; j
++){
4393 pic
= remove_long(h
, j
);
4394 if (pic
) unreference_pic(h
, pic
);
4398 while(h
->short_ref_count
){
4399 pic
= remove_short(h
, h
->short_ref
[0]->frame_num
);
4400 unreference_pic(h
, pic
);
4402 for(j
= 0; j
< 16; j
++) {
4403 pic
= remove_long(h
, j
);
4404 if(pic
) unreference_pic(h
, pic
);
4411 if(!current_is_long
){
4412 pic
= remove_short(h
, s
->current_picture_ptr
->frame_num
);
4414 unreference_pic(h
, pic
);
4415 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term buffer state detected\n");
4418 if(h
->short_ref_count
)
4419 memmove(&h
->short_ref
[1], &h
->short_ref
[0], h
->short_ref_count
*sizeof(Picture
*));
4421 h
->short_ref
[0]= s
->current_picture_ptr
;
4422 h
->short_ref
[0]->long_ref
=0;
4423 h
->short_ref_count
++;
4426 print_short_term(h
);
4431 static int decode_ref_pic_marking(H264Context
*h
){
4432 MpegEncContext
* const s
= &h
->s
;
4435 if(h
->nal_unit_type
== NAL_IDR_SLICE
){ //FIXME fields
4436 s
->broken_link
= get_bits1(&s
->gb
) -1;
4437 h
->mmco
[0].long_index
= get_bits1(&s
->gb
) - 1; // current_long_term_idx
4438 if(h
->mmco
[0].long_index
== -1)
4441 h
->mmco
[0].opcode
= MMCO_LONG
;
4445 if(get_bits1(&s
->gb
)){ // adaptive_ref_pic_marking_mode_flag
4446 for(i
= 0; i
<MAX_MMCO_COUNT
; i
++) {
4447 MMCOOpcode opcode
= get_ue_golomb(&s
->gb
);;
4449 h
->mmco
[i
].opcode
= opcode
;
4450 if(opcode
==MMCO_SHORT2UNUSED
|| opcode
==MMCO_SHORT2LONG
){
4451 h
->mmco
[i
].short_frame_num
= (h
->frame_num
- get_ue_golomb(&s
->gb
) - 1) & ((1<<h
->sps
.log2_max_frame_num
)-1); //FIXME fields
4452 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
4453 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
4457 if(opcode
==MMCO_SHORT2LONG
|| opcode
==MMCO_LONG2UNUSED
|| opcode
==MMCO_LONG
|| opcode
==MMCO_SET_MAX_LONG
){
4458 h
->mmco
[i
].long_index
= get_ue_golomb(&s
->gb
);
4459 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h
->mmco
[i
].long_index
>= 16){
4460 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal long ref in memory management control operation %d\n", opcode
);
4465 if(opcode
> MMCO_LONG
){
4466 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal memory management control operation %d\n", opcode
);
4469 if(opcode
== MMCO_END
)
4474 assert(h
->long_ref_count
+ h
->short_ref_count
<= h
->sps
.ref_frame_count
);
4476 if(h
->long_ref_count
+ h
->short_ref_count
== h
->sps
.ref_frame_count
){ //FIXME fields
4477 h
->mmco
[0].opcode
= MMCO_SHORT2UNUSED
;
4478 h
->mmco
[0].short_frame_num
= h
->short_ref
[ h
->short_ref_count
- 1 ]->frame_num
;
4488 static int init_poc(H264Context
*h
){
4489 MpegEncContext
* const s
= &h
->s
;
4490 const int max_frame_num
= 1<<h
->sps
.log2_max_frame_num
;
4493 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
4494 h
->frame_num_offset
= 0;
4496 if(h
->frame_num
< h
->prev_frame_num
)
4497 h
->frame_num_offset
= h
->prev_frame_num_offset
+ max_frame_num
;
4499 h
->frame_num_offset
= h
->prev_frame_num_offset
;
4502 if(h
->sps
.poc_type
==0){
4503 const int max_poc_lsb
= 1<<h
->sps
.log2_max_poc_lsb
;
4505 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
4510 if (h
->poc_lsb
< h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
>= max_poc_lsb
/2)
4511 h
->poc_msb
= h
->prev_poc_msb
+ max_poc_lsb
;
4512 else if(h
->poc_lsb
> h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
< -max_poc_lsb
/2)
4513 h
->poc_msb
= h
->prev_poc_msb
- max_poc_lsb
;
4515 h
->poc_msb
= h
->prev_poc_msb
;
4516 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4518 field_poc
[1] = h
->poc_msb
+ h
->poc_lsb
;
4519 if(s
->picture_structure
== PICT_FRAME
)
4520 field_poc
[1] += h
->delta_poc_bottom
;
4521 }else if(h
->sps
.poc_type
==1){
4522 int abs_frame_num
, expected_delta_per_poc_cycle
, expectedpoc
;
4525 if(h
->sps
.poc_cycle_length
!= 0)
4526 abs_frame_num
= h
->frame_num_offset
+ h
->frame_num
;
4530 if(h
->nal_ref_idc
==0 && abs_frame_num
> 0)
4533 expected_delta_per_poc_cycle
= 0;
4534 for(i
=0; i
< h
->sps
.poc_cycle_length
; i
++)
4535 expected_delta_per_poc_cycle
+= h
->sps
.offset_for_ref_frame
[ i
]; //FIXME integrate during sps parse
4537 if(abs_frame_num
> 0){
4538 int poc_cycle_cnt
= (abs_frame_num
- 1) / h
->sps
.poc_cycle_length
;
4539 int frame_num_in_poc_cycle
= (abs_frame_num
- 1) % h
->sps
.poc_cycle_length
;
4541 expectedpoc
= poc_cycle_cnt
* expected_delta_per_poc_cycle
;
4542 for(i
= 0; i
<= frame_num_in_poc_cycle
; i
++)
4543 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_ref_frame
[ i
];
4547 if(h
->nal_ref_idc
== 0)
4548 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_non_ref_pic
;
4550 field_poc
[0] = expectedpoc
+ h
->delta_poc
[0];
4551 field_poc
[1] = field_poc
[0] + h
->sps
.offset_for_top_to_bottom_field
;
4553 if(s
->picture_structure
== PICT_FRAME
)
4554 field_poc
[1] += h
->delta_poc
[1];
4557 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
4560 if(h
->nal_ref_idc
) poc
= 2*(h
->frame_num_offset
+ h
->frame_num
);
4561 else poc
= 2*(h
->frame_num_offset
+ h
->frame_num
) - 1;
4567 if(s
->picture_structure
!= PICT_BOTTOM_FIELD
)
4568 s
->current_picture_ptr
->field_poc
[0]= field_poc
[0];
4569 if(s
->picture_structure
!= PICT_TOP_FIELD
)
4570 s
->current_picture_ptr
->field_poc
[1]= field_poc
[1];
4571 if(s
->picture_structure
== PICT_FRAME
) // FIXME field pix?
4572 s
->current_picture_ptr
->poc
= FFMIN(field_poc
[0], field_poc
[1]);
4578 * decodes a slice header.
4579 * this will allso call MPV_common_init() and frame_start() as needed
4581 static int decode_slice_header(H264Context
*h
){
4582 MpegEncContext
* const s
= &h
->s
;
4583 int first_mb_in_slice
, pps_id
;
4584 int num_ref_idx_active_override_flag
;
4585 static const uint8_t slice_type_map
[5]= {P_TYPE
, B_TYPE
, I_TYPE
, SP_TYPE
, SI_TYPE
};
4587 int default_ref_list_done
= 0;
4589 s
->current_picture
.reference
= h
->nal_ref_idc
!= 0;
4590 s
->dropable
= h
->nal_ref_idc
== 0;
4592 first_mb_in_slice
= get_ue_golomb(&s
->gb
);
4594 slice_type
= get_ue_golomb(&s
->gb
);
4596 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "slice type too large (%d) at %d %d\n", h
->slice_type
, s
->mb_x
, s
->mb_y
);
4601 h
->slice_type_fixed
=1;
4603 h
->slice_type_fixed
=0;
4605 slice_type
= slice_type_map
[ slice_type
];
4606 if (slice_type
== I_TYPE
4607 || (h
->slice_num
!= 0 && slice_type
== h
->slice_type
) ) {
4608 default_ref_list_done
= 1;
4610 h
->slice_type
= slice_type
;
4612 s
->pict_type
= h
->slice_type
; // to make a few old func happy, it's wrong though
4614 pps_id
= get_ue_golomb(&s
->gb
);
4616 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "pps_id out of range\n");
4619 h
->pps
= h
->pps_buffer
[pps_id
];
4620 if(h
->pps
.slice_group_count
== 0){
4621 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non existing PPS referenced\n");
4625 h
->sps
= h
->sps_buffer
[ h
->pps
.sps_id
];
4626 if(h
->sps
.log2_max_frame_num
== 0){
4627 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non existing SPS referenced\n");
4631 if(h
->dequant_coeff_pps
!= pps_id
){
4632 h
->dequant_coeff_pps
= pps_id
;
4633 init_dequant_tables(h
);
4636 s
->mb_width
= h
->sps
.mb_width
;
4637 s
->mb_height
= h
->sps
.mb_height
* (2 - h
->sps
.frame_mbs_only_flag
);
4639 h
->b_stride
= s
->mb_width
*4;
4640 h
->b8_stride
= s
->mb_width
*2;
4642 s
->width
= 16*s
->mb_width
- 2*(h
->sps
.crop_left
+ h
->sps
.crop_right
);
4643 if(h
->sps
.frame_mbs_only_flag
)
4644 s
->height
= 16*s
->mb_height
- 2*(h
->sps
.crop_top
+ h
->sps
.crop_bottom
);
4646 s
->height
= 16*s
->mb_height
- 4*(h
->sps
.crop_top
+ h
->sps
.crop_bottom
); //FIXME recheck
4648 if (s
->context_initialized
4649 && ( s
->width
!= s
->avctx
->width
|| s
->height
!= s
->avctx
->height
)) {
4653 if (!s
->context_initialized
) {
4654 if (MPV_common_init(s
) < 0)
4657 if(s
->dsp
.h264_idct_add
== ff_h264_idct_add_c
){ //FIXME little ugly
4658 memcpy(h
->zigzag_scan
, zigzag_scan
, 16*sizeof(uint8_t));
4659 memcpy(h
-> field_scan
, field_scan
, 16*sizeof(uint8_t));
4662 for(i
=0; i
<16; i
++){
4663 #define T(x) (x>>2) | ((x<<2) & 0xF)
4664 h
->zigzag_scan
[i
] = T(zigzag_scan
[i
]);
4665 h
-> field_scan
[i
] = T( field_scan
[i
]);
4669 if(s
->dsp
.h264_idct8_add
== ff_h264_idct8_add_c
){
4670 memcpy(h
->zigzag_scan8x8
, zigzag_scan8x8
, 64*sizeof(uint8_t));
4671 memcpy(h
->zigzag_scan8x8_cavlc
, zigzag_scan8x8_cavlc
, 64*sizeof(uint8_t));
4672 memcpy(h
->field_scan8x8
, field_scan8x8
, 64*sizeof(uint8_t));
4673 memcpy(h
->field_scan8x8_cavlc
, field_scan8x8_cavlc
, 64*sizeof(uint8_t));
4676 for(i
=0; i
<64; i
++){
4677 #define T(x) (x>>3) | ((x&7)<<3)
4678 h
->zigzag_scan8x8
[i
] = T(zigzag_scan8x8
[i
]);
4679 h
->zigzag_scan8x8_cavlc
[i
] = T(zigzag_scan8x8_cavlc
[i
]);
4680 h
->field_scan8x8
[i
] = T(field_scan8x8
[i
]);
4681 h
->field_scan8x8_cavlc
[i
] = T(field_scan8x8_cavlc
[i
]);
4685 if(h
->sps
.transform_bypass
){ //FIXME same ugly
4686 h
->zigzag_scan_q0
= zigzag_scan
;
4687 h
->zigzag_scan8x8_q0
= zigzag_scan8x8
;
4688 h
->zigzag_scan8x8_cavlc_q0
= zigzag_scan8x8_cavlc
;
4689 h
->field_scan_q0
= field_scan
;
4690 h
->field_scan8x8_q0
= field_scan8x8
;
4691 h
->field_scan8x8_cavlc_q0
= field_scan8x8_cavlc
;
4693 h
->zigzag_scan_q0
= h
->zigzag_scan
;
4694 h
->zigzag_scan8x8_q0
= h
->zigzag_scan8x8
;
4695 h
->zigzag_scan8x8_cavlc_q0
= h
->zigzag_scan8x8_cavlc
;
4696 h
->field_scan_q0
= h
->field_scan
;
4697 h
->field_scan8x8_q0
= h
->field_scan8x8
;
4698 h
->field_scan8x8_cavlc_q0
= h
->field_scan8x8_cavlc
;
4703 s
->avctx
->width
= s
->width
;
4704 s
->avctx
->height
= s
->height
;
4705 s
->avctx
->sample_aspect_ratio
= h
->sps
.sar
;
4706 if(!s
->avctx
->sample_aspect_ratio
.den
)
4707 s
->avctx
->sample_aspect_ratio
.den
= 1;
4709 if(h
->sps
.timing_info_present_flag
){
4710 s
->avctx
->time_base
= (AVRational
){h
->sps
.num_units_in_tick
* 2, h
->sps
.time_scale
};
4711 if(h
->x264_build
> 0 && h
->x264_build
< 44)
4712 s
->avctx
->time_base
.den
*= 2;
4713 av_reduce(&s
->avctx
->time_base
.num
, &s
->avctx
->time_base
.den
,
4714 s
->avctx
->time_base
.num
, s
->avctx
->time_base
.den
, 1<<30);
4718 if(h
->slice_num
== 0){
4719 if(frame_start(h
) < 0)
4723 s
->current_picture_ptr
->frame_num
= //FIXME frame_num cleanup
4724 h
->frame_num
= get_bits(&s
->gb
, h
->sps
.log2_max_frame_num
);
4727 h
->mb_aff_frame
= 0;
4728 if(h
->sps
.frame_mbs_only_flag
){
4729 s
->picture_structure
= PICT_FRAME
;
4731 if(get_bits1(&s
->gb
)) { //field_pic_flag
4732 s
->picture_structure
= PICT_TOP_FIELD
+ get_bits1(&s
->gb
); //bottom_field_flag
4733 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "PAFF interlacing is not implemented\n");
4735 s
->picture_structure
= PICT_FRAME
;
4736 h
->mb_aff_frame
= h
->sps
.mb_aff
;
4740 s
->resync_mb_x
= s
->mb_x
= first_mb_in_slice
% s
->mb_width
;
4741 s
->resync_mb_y
= s
->mb_y
= (first_mb_in_slice
/ s
->mb_width
) << h
->mb_aff_frame
;
4742 if(s
->mb_y
>= s
->mb_height
){
4746 if(s
->picture_structure
==PICT_FRAME
){
4747 h
->curr_pic_num
= h
->frame_num
;
4748 h
->max_pic_num
= 1<< h
->sps
.log2_max_frame_num
;
4750 h
->curr_pic_num
= 2*h
->frame_num
;
4751 h
->max_pic_num
= 1<<(h
->sps
.log2_max_frame_num
+ 1);
4754 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
4755 get_ue_golomb(&s
->gb
); /* idr_pic_id */
4758 if(h
->sps
.poc_type
==0){
4759 h
->poc_lsb
= get_bits(&s
->gb
, h
->sps
.log2_max_poc_lsb
);
4761 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
){
4762 h
->delta_poc_bottom
= get_se_golomb(&s
->gb
);
4766 if(h
->sps
.poc_type
==1 && !h
->sps
.delta_pic_order_always_zero_flag
){
4767 h
->delta_poc
[0]= get_se_golomb(&s
->gb
);
4769 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
)
4770 h
->delta_poc
[1]= get_se_golomb(&s
->gb
);
4775 if(h
->pps
.redundant_pic_cnt_present
){
4776 h
->redundant_pic_count
= get_ue_golomb(&s
->gb
);
4779 //set defaults, might be overriden a few line later
4780 h
->ref_count
[0]= h
->pps
.ref_count
[0];
4781 h
->ref_count
[1]= h
->pps
.ref_count
[1];
4783 if(h
->slice_type
== P_TYPE
|| h
->slice_type
== SP_TYPE
|| h
->slice_type
== B_TYPE
){
4784 if(h
->slice_type
== B_TYPE
){
4785 h
->direct_spatial_mv_pred
= get_bits1(&s
->gb
);
4786 if(h
->sps
.mb_aff
&& h
->direct_spatial_mv_pred
)
4787 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MBAFF + spatial direct mode is not implemented\n");
4789 num_ref_idx_active_override_flag
= get_bits1(&s
->gb
);
4791 if(num_ref_idx_active_override_flag
){
4792 h
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
4793 if(h
->slice_type
==B_TYPE
)
4794 h
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
4796 if(h
->ref_count
[0] > 32 || h
->ref_count
[1] > 32){
4797 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow\n");
4803 if(!default_ref_list_done
){
4804 fill_default_ref_list(h
);
4807 if(decode_ref_pic_list_reordering(h
) < 0)
4810 if( (h
->pps
.weighted_pred
&& (h
->slice_type
== P_TYPE
|| h
->slice_type
== SP_TYPE
))
4811 || (h
->pps
.weighted_bipred_idc
==1 && h
->slice_type
==B_TYPE
) )
4812 pred_weight_table(h
);
4813 else if(h
->pps
.weighted_bipred_idc
==2 && h
->slice_type
==B_TYPE
)
4814 implicit_weight_table(h
);
4818 if(s
->current_picture
.reference
)
4819 decode_ref_pic_marking(h
);
4822 fill_mbaff_ref_list(h
);
4824 if( h
->slice_type
!= I_TYPE
&& h
->slice_type
!= SI_TYPE
&& h
->pps
.cabac
)
4825 h
->cabac_init_idc
= get_ue_golomb(&s
->gb
);
4827 h
->last_qscale_diff
= 0;
4828 s
->qscale
= h
->pps
.init_qp
+ get_se_golomb(&s
->gb
);
4829 if(s
->qscale
<0 || s
->qscale
>51){
4830 av_log(s
->avctx
, AV_LOG_ERROR
, "QP %d out of range\n", s
->qscale
);
4833 h
->chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, s
->qscale
);
4834 //FIXME qscale / qp ... stuff
4835 if(h
->slice_type
== SP_TYPE
){
4836 get_bits1(&s
->gb
); /* sp_for_switch_flag */
4838 if(h
->slice_type
==SP_TYPE
|| h
->slice_type
== SI_TYPE
){
4839 get_se_golomb(&s
->gb
); /* slice_qs_delta */
4842 h
->deblocking_filter
= 1;
4843 h
->slice_alpha_c0_offset
= 0;
4844 h
->slice_beta_offset
= 0;
4845 if( h
->pps
.deblocking_filter_parameters_present
) {
4846 h
->deblocking_filter
= get_ue_golomb(&s
->gb
);
4847 if(h
->deblocking_filter
< 2)
4848 h
->deblocking_filter
^= 1; // 1<->0
4850 if( h
->deblocking_filter
) {
4851 h
->slice_alpha_c0_offset
= get_se_golomb(&s
->gb
) << 1;
4852 h
->slice_beta_offset
= get_se_golomb(&s
->gb
) << 1;
4855 if( s
->avctx
->skip_loop_filter
>= AVDISCARD_ALL
4856 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONKEY
&& h
->slice_type
!= I_TYPE
)
4857 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_BIDIR
&& h
->slice_type
== B_TYPE
)
4858 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
4859 h
->deblocking_filter
= 0;
4862 if( h
->pps
.num_slice_groups
> 1 && h
->pps
.mb_slice_group_map_type
>= 3 && h
->pps
.mb_slice_group_map_type
<= 5)
4863 slice_group_change_cycle
= get_bits(&s
->gb
, ?);
4868 h
->emu_edge_width
= (s
->flags
&CODEC_FLAG_EMU_EDGE
) ? 0 : 16;
4869 h
->emu_edge_height
= FRAME_MBAFF
? 0 : h
->emu_edge_width
;
4871 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
4872 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4874 (s
->picture_structure
==PICT_FRAME
? "F" : s
->picture_structure
==PICT_TOP_FIELD
? "T" : "B"),
4876 av_get_pict_type_char(h
->slice_type
),
4877 pps_id
, h
->frame_num
,
4878 s
->current_picture_ptr
->field_poc
[0], s
->current_picture_ptr
->field_poc
[1],
4879 h
->ref_count
[0], h
->ref_count
[1],
4881 h
->deblocking_filter
, h
->slice_alpha_c0_offset
/2, h
->slice_beta_offset
/2,
4883 h
->use_weight
==1 && h
->use_weight_chroma
? "c" : ""
4893 static inline int get_level_prefix(GetBitContext
*gb
){
4897 OPEN_READER(re
, gb
);
4898 UPDATE_CACHE(re
, gb
);
4899 buf
=GET_CACHE(re
, gb
);
4901 log
= 32 - av_log2(buf
);
4903 print_bin(buf
>>(32-log
), log
);
4904 av_log(NULL
, AV_LOG_DEBUG
, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf
>>(32-log
), log
, log
-1, get_bits_count(gb
), __FILE__
);
4907 LAST_SKIP_BITS(re
, gb
, log
);
4908 CLOSE_READER(re
, gb
);
4913 static inline int get_dct8x8_allowed(H264Context
*h
){
4916 if(!IS_SUB_8X8(h
->sub_mb_type
[i
])
4917 || (!h
->sps
.direct_8x8_inference_flag
&& IS_DIRECT(h
->sub_mb_type
[i
])))
4924 * decodes a residual block.
4925 * @param n block index
4926 * @param scantable scantable
4927 * @param max_coeff number of coefficients in the block
4928 * @return <0 if an error occured
4930 static int decode_residual(H264Context
*h
, GetBitContext
*gb
, DCTELEM
*block
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
){
4931 MpegEncContext
* const s
= &h
->s
;
4932 static const int coeff_token_table_index
[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4934 int zeros_left
, coeff_num
, coeff_token
, total_coeff
, i
, j
, trailing_ones
, run_before
;
4936 //FIXME put trailing_onex into the context
4938 if(n
== CHROMA_DC_BLOCK_INDEX
){
4939 coeff_token
= get_vlc2(gb
, chroma_dc_coeff_token_vlc
.table
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 1);
4940 total_coeff
= coeff_token
>>2;
4942 if(n
== LUMA_DC_BLOCK_INDEX
){
4943 total_coeff
= pred_non_zero_count(h
, 0);
4944 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4945 total_coeff
= coeff_token
>>2;
4947 total_coeff
= pred_non_zero_count(h
, n
);
4948 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4949 total_coeff
= coeff_token
>>2;
4950 h
->non_zero_count_cache
[ scan8
[n
] ]= total_coeff
;
4954 //FIXME set last_non_zero?
4959 trailing_ones
= coeff_token
&3;
4960 tprintf("trailing:%d, total:%d\n", trailing_ones
, total_coeff
);
4961 assert(total_coeff
<=16);
4963 for(i
=0; i
<trailing_ones
; i
++){
4964 level
[i
]= 1 - 2*get_bits1(gb
);
4968 int level_code
, mask
;
4969 int suffix_length
= total_coeff
> 10 && trailing_ones
< 3;
4970 int prefix
= get_level_prefix(gb
);
4972 //first coefficient has suffix_length equal to 0 or 1
4973 if(prefix
<14){ //FIXME try to build a large unified VLC table for all this
4975 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4977 level_code
= (prefix
<<suffix_length
); //part
4978 }else if(prefix
==14){
4980 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4982 level_code
= prefix
+ get_bits(gb
, 4); //part
4983 }else if(prefix
==15){
4984 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, 12); //part
4985 if(suffix_length
==0) level_code
+=15; //FIXME doesn't make (much)sense
4987 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "prefix too large at %d %d\n", s
->mb_x
, s
->mb_y
);
4991 if(trailing_ones
< 3) level_code
+= 2;
4996 mask
= -(level_code
&1);
4997 level
[i
]= (((2+level_code
)>>1) ^ mask
) - mask
;
5000 //remaining coefficients have suffix_length > 0
5001 for(;i
<total_coeff
;i
++) {
5002 static const int suffix_limit
[7] = {0,5,11,23,47,95,INT_MAX
};
5003 prefix
= get_level_prefix(gb
);
5005 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
);
5006 }else if(prefix
==15){
5007 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, 12);
5009 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "prefix too large at %d %d\n", s
->mb_x
, s
->mb_y
);
5012 mask
= -(level_code
&1);
5013 level
[i
]= (((2+level_code
)>>1) ^ mask
) - mask
;
5014 if(level_code
> suffix_limit
[suffix_length
])
5019 if(total_coeff
== max_coeff
)
5022 if(n
== CHROMA_DC_BLOCK_INDEX
)
5023 zeros_left
= get_vlc2(gb
, chroma_dc_total_zeros_vlc
[ total_coeff
-1 ].table
, CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 1);
5025 zeros_left
= get_vlc2(gb
, total_zeros_vlc
[ total_coeff
-1 ].table
, TOTAL_ZEROS_VLC_BITS
, 1);
5028 coeff_num
= zeros_left
+ total_coeff
- 1;
5029 j
= scantable
[coeff_num
];
5031 block
[j
] = level
[0];
5032 for(i
=1;i
<total_coeff
;i
++) {
5035 else if(zeros_left
< 7){
5036 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
5038 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
5040 zeros_left
-= run_before
;
5041 coeff_num
-= 1 + run_before
;
5042 j
= scantable
[ coeff_num
];
5047 block
[j
] = (level
[0] * qmul
[j
] + 32)>>6;
5048 for(i
=1;i
<total_coeff
;i
++) {
5051 else if(zeros_left
< 7){
5052 run_before
= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
5054 run_before
= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
5056 zeros_left
-= run_before
;
5057 coeff_num
-= 1 + run_before
;
5058 j
= scantable
[ coeff_num
];
5060 block
[j
]= (level
[i
] * qmul
[j
] + 32)>>6;
5065 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "negative number of zero coeffs at %d %d\n", s
->mb_x
, s
->mb_y
);
5072 static void predict_field_decoding_flag(H264Context
*h
){
5073 MpegEncContext
* const s
= &h
->s
;
5074 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5075 int mb_type
= (h
->slice_table
[mb_xy
-1] == h
->slice_num
)
5076 ? s
->current_picture
.mb_type
[mb_xy
-1]
5077 : (h
->slice_table
[mb_xy
-s
->mb_stride
] == h
->slice_num
)
5078 ? s
->current_picture
.mb_type
[mb_xy
-s
->mb_stride
]
5080 h
->mb_mbaff
= h
->mb_field_decoding_flag
= IS_INTERLACED(mb_type
) ? 1 : 0;
5084 * decodes a P_SKIP or B_SKIP macroblock
5086 static void decode_mb_skip(H264Context
*h
){
5087 MpegEncContext
* const s
= &h
->s
;
5088 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5091 memset(h
->non_zero_count
[mb_xy
], 0, 16);
5092 memset(h
->non_zero_count_cache
+ 8, 0, 8*5); //FIXME ugly, remove pfui
5095 mb_type
|= MB_TYPE_INTERLACED
;
5097 if( h
->slice_type
== B_TYPE
)
5099 // just for fill_caches. pred_direct_motion will set the real mb_type
5100 mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
|MB_TYPE_SKIP
;
5102 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
5103 pred_direct_motion(h
, &mb_type
);
5105 fill_rectangle(h
->mvd_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
5106 fill_rectangle(h
->mvd_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
5112 mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P1L0
|MB_TYPE_SKIP
;
5114 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
5115 pred_pskip_motion(h
, &mx
, &my
);
5116 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, 0, 1);
5117 fill_rectangle( h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mx
,my
), 4);
5119 fill_rectangle(h
->mvd_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
5122 write_back_motion(h
, mb_type
);
5123 s
->current_picture
.mb_type
[mb_xy
]= mb_type
|MB_TYPE_SKIP
;
5124 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
5125 h
->slice_table
[ mb_xy
]= h
->slice_num
;
5126 h
->prev_mb_skipped
= 1;
5130 * decodes a macroblock
5131 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5133 static int decode_mb_cavlc(H264Context
*h
){
5134 MpegEncContext
* const s
= &h
->s
;
5135 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5136 int mb_type
, partition_count
, cbp
;
5137 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
5139 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?
5141 tprintf("pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
5142 cbp
= 0; /* avoid warning. FIXME: find a solution without slowing
5144 if(h
->slice_type
!= I_TYPE
&& h
->slice_type
!= SI_TYPE
){
5145 if(s
->mb_skip_run
==-1)
5146 s
->mb_skip_run
= get_ue_golomb(&s
->gb
);
5148 if (s
->mb_skip_run
--) {
5149 if(FRAME_MBAFF
&& (s
->mb_y
&1) == 0){
5150 if(s
->mb_skip_run
==0)
5151 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
5153 predict_field_decoding_flag(h
);
5160 if( (s
->mb_y
&1) == 0 )
5161 h
->mb_mbaff
= h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
5163 h
->mb_field_decoding_flag
= (s
->picture_structure
!=PICT_FRAME
);
5165 h
->prev_mb_skipped
= 0;
5167 mb_type
= get_ue_golomb(&s
->gb
);
5168 if(h
->slice_type
== B_TYPE
){
5170 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
5171 mb_type
= b_mb_type_info
[mb_type
].type
;
5174 goto decode_intra_mb
;
5176 }else if(h
->slice_type
== P_TYPE
/*|| h->slice_type == SP_TYPE */){
5178 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
5179 mb_type
= p_mb_type_info
[mb_type
].type
;
5182 goto decode_intra_mb
;
5185 assert(h
->slice_type
== I_TYPE
);
5188 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_type %d in %c slice to large at %d %d\n", mb_type
, av_get_pict_type_char(h
->slice_type
), s
->mb_x
, s
->mb_y
);
5192 cbp
= i_mb_type_info
[mb_type
].cbp
;
5193 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
5194 mb_type
= i_mb_type_info
[mb_type
].type
;
5198 mb_type
|= MB_TYPE_INTERLACED
;
5200 h
->slice_table
[ mb_xy
]= h
->slice_num
;
5202 if(IS_INTRA_PCM(mb_type
)){
5205 // we assume these blocks are very rare so we dont optimize it
5206 align_get_bits(&s
->gb
);
5208 // The pixels are stored in the same order as levels in h->mb array.
5209 for(y
=0; y
<16; y
++){
5210 const int index
= 4*(y
&3) + 32*((y
>>2)&1) + 128*(y
>>3);
5211 for(x
=0; x
<16; x
++){
5212 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
5213 h
->mb
[index
+ (x
&3) + 16*((x
>>2)&1) + 64*(x
>>3)]= get_bits(&s
->gb
, 8);
5217 const int index
= 256 + 4*(y
&3) + 32*(y
>>2);
5219 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
5220 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= get_bits(&s
->gb
, 8);
5224 const int index
= 256 + 64 + 4*(y
&3) + 32*(y
>>2);
5226 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
5227 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= get_bits(&s
->gb
, 8);
5231 // In deblocking, the quantizer is 0
5232 s
->current_picture
.qscale_table
[mb_xy
]= 0;
5233 h
->chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, 0);
5234 // All coeffs are present
5235 memset(h
->non_zero_count
[mb_xy
], 16, 16);
5237 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5242 h
->ref_count
[0] <<= 1;
5243 h
->ref_count
[1] <<= 1;
5246 fill_caches(h
, mb_type
, 0);
5249 if(IS_INTRA(mb_type
)){
5250 // init_top_left_availability(h);
5251 if(IS_INTRA4x4(mb_type
)){
5254 if(dct8x8_allowed
&& get_bits1(&s
->gb
)){
5255 mb_type
|= MB_TYPE_8x8DCT
;
5259 // fill_intra4x4_pred_table(h);
5260 for(i
=0; i
<16; i
+=di
){
5261 const int mode_coded
= !get_bits1(&s
->gb
);
5262 const int predicted_mode
= pred_intra_mode(h
, i
);
5266 const int rem_mode
= get_bits(&s
->gb
, 3);
5267 if(rem_mode
<predicted_mode
)
5272 mode
= predicted_mode
;
5276 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
5278 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = mode
;
5280 write_back_intra_pred_mode(h
);
5281 if( check_intra4x4_pred_mode(h
) < 0)
5284 h
->intra16x16_pred_mode
= check_intra_pred_mode(h
, h
->intra16x16_pred_mode
);
5285 if(h
->intra16x16_pred_mode
< 0)
5288 h
->chroma_pred_mode
= get_ue_golomb(&s
->gb
);
5290 h
->chroma_pred_mode
= check_intra_pred_mode(h
, h
->chroma_pred_mode
);
5291 if(h
->chroma_pred_mode
< 0)
5293 }else if(partition_count
==4){
5294 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
5296 if(h
->slice_type
== B_TYPE
){
5298 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
5299 if(h
->sub_mb_type
[i
] >=13){
5300 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "B sub_mb_type %d out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
5303 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5304 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5306 if( IS_DIRECT(h
->sub_mb_type
[0]) || IS_DIRECT(h
->sub_mb_type
[1])
5307 || IS_DIRECT(h
->sub_mb_type
[2]) || IS_DIRECT(h
->sub_mb_type
[3])) {
5308 pred_direct_motion(h
, &mb_type
);
5309 h
->ref_cache
[0][scan8
[4]] =
5310 h
->ref_cache
[1][scan8
[4]] =
5311 h
->ref_cache
[0][scan8
[12]] =
5312 h
->ref_cache
[1][scan8
[12]] = PART_NOT_AVAILABLE
;
5315 assert(h
->slice_type
== P_TYPE
|| h
->slice_type
== SP_TYPE
); //FIXME SP correct ?
5317 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
5318 if(h
->sub_mb_type
[i
] >=4){
5319 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "P sub_mb_type %d out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
5322 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5323 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5327 for(list
=0; list
<2; list
++){
5328 int ref_count
= IS_REF0(mb_type
) ? 1 : h
->ref_count
[list
];
5329 if(ref_count
== 0) continue;
5331 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
5332 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
5333 ref
[list
][i
] = get_te0_golomb(&s
->gb
, ref_count
); //FIXME init to 0 before and skip?
5342 dct8x8_allowed
= get_dct8x8_allowed(h
);
5344 for(list
=0; list
<2; list
++){
5345 const int ref_count
= IS_REF0(mb_type
) ? 1 : h
->ref_count
[list
];
5346 if(ref_count
== 0) continue;
5349 if(IS_DIRECT(h
->sub_mb_type
[i
])) {
5350 h
->ref_cache
[list
][ scan8
[4*i
] ] = h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
5353 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
5354 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
5356 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
5357 const int sub_mb_type
= h
->sub_mb_type
[i
];
5358 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
5359 for(j
=0; j
<sub_partition_count
[i
]; j
++){
5361 const int index
= 4*i
+ block_width
*j
;
5362 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
5363 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mx
, &my
);
5364 mx
+= get_se_golomb(&s
->gb
);
5365 my
+= get_se_golomb(&s
->gb
);
5366 tprintf("final mv:%d %d\n", mx
, my
);
5368 if(IS_SUB_8X8(sub_mb_type
)){
5369 mv_cache
[ 0 ][0]= mv_cache
[ 1 ][0]=
5370 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
5371 mv_cache
[ 0 ][1]= mv_cache
[ 1 ][1]=
5372 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
5373 }else if(IS_SUB_8X4(sub_mb_type
)){
5374 mv_cache
[ 0 ][0]= mv_cache
[ 1 ][0]= mx
;
5375 mv_cache
[ 0 ][1]= mv_cache
[ 1 ][1]= my
;
5376 }else if(IS_SUB_4X8(sub_mb_type
)){
5377 mv_cache
[ 0 ][0]= mv_cache
[ 8 ][0]= mx
;
5378 mv_cache
[ 0 ][1]= mv_cache
[ 8 ][1]= my
;
5380 assert(IS_SUB_4X4(sub_mb_type
));
5381 mv_cache
[ 0 ][0]= mx
;
5382 mv_cache
[ 0 ][1]= my
;
5386 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
5392 }else if(IS_DIRECT(mb_type
)){
5393 pred_direct_motion(h
, &mb_type
);
5394 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
5396 int list
, mx
, my
, i
;
5397 //FIXME we should set ref_idx_l? to 0 if we use that later ...
5398 if(IS_16X16(mb_type
)){
5399 for(list
=0; list
<2; list
++){
5400 if(h
->ref_count
[list
]>0){
5401 if(IS_DIR(mb_type
, 0, list
)){
5402 const int val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
5403 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 1);
5405 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, (LIST_NOT_USED
&0xFF), 1);
5408 for(list
=0; list
<2; list
++){
5409 if(IS_DIR(mb_type
, 0, list
)){
5410 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mx
, &my
);
5411 mx
+= get_se_golomb(&s
->gb
);
5412 my
+= get_se_golomb(&s
->gb
);
5413 tprintf("final mv:%d %d\n", mx
, my
);
5415 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
,my
), 4);
5417 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, 0, 4);
5420 else if(IS_16X8(mb_type
)){
5421 for(list
=0; list
<2; list
++){
5422 if(h
->ref_count
[list
]>0){
5424 if(IS_DIR(mb_type
, i
, list
)){
5425 const int val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
5426 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 1);
5428 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, (LIST_NOT_USED
&0xFF), 1);
5432 for(list
=0; list
<2; list
++){
5434 if(IS_DIR(mb_type
, i
, list
)){
5435 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mx
, &my
);
5436 mx
+= get_se_golomb(&s
->gb
);
5437 my
+= get_se_golomb(&s
->gb
);
5438 tprintf("final mv:%d %d\n", mx
, my
);
5440 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
,my
), 4);
5442 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5446 assert(IS_8X16(mb_type
));
5447 for(list
=0; list
<2; list
++){
5448 if(h
->ref_count
[list
]>0){
5450 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
5451 const int val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
5452 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 1);
5454 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, (LIST_NOT_USED
&0xFF), 1);
5458 for(list
=0; list
<2; list
++){
5460 if(IS_DIR(mb_type
, i
, list
)){
5461 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mx
, &my
);
5462 mx
+= get_se_golomb(&s
->gb
);
5463 my
+= get_se_golomb(&s
->gb
);
5464 tprintf("final mv:%d %d\n", mx
, my
);
5466 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
,my
), 4);
5468 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5474 if(IS_INTER(mb_type
))
5475 write_back_motion(h
, mb_type
);
5477 if(!IS_INTRA16x16(mb_type
)){
5478 cbp
= get_ue_golomb(&s
->gb
);
5480 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cbp too large (%d) at %d %d\n", cbp
, s
->mb_x
, s
->mb_y
);
5484 if(IS_INTRA4x4(mb_type
))
5485 cbp
= golomb_to_intra4x4_cbp
[cbp
];
5487 cbp
= golomb_to_inter_cbp
[cbp
];
5490 if(dct8x8_allowed
&& (cbp
&15) && !IS_INTRA(mb_type
)){
5491 if(get_bits1(&s
->gb
))
5492 mb_type
|= MB_TYPE_8x8DCT
;
5494 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5496 if(cbp
|| IS_INTRA16x16(mb_type
)){
5497 int i8x8
, i4x4
, chroma_idx
;
5498 int chroma_qp
, dquant
;
5499 GetBitContext
*gb
= IS_INTRA(mb_type
) ? h
->intra_gb_ptr
: h
->inter_gb_ptr
;
5500 const uint8_t *scan
, *scan8x8
, *dc_scan
;
5502 // fill_non_zero_count_cache(h);
5504 if(IS_INTERLACED(mb_type
)){
5505 scan8x8
= s
->qscale
? h
->field_scan8x8_cavlc
: h
->field_scan8x8_cavlc_q0
;
5506 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
5507 dc_scan
= luma_dc_field_scan
;
5509 scan8x8
= s
->qscale
? h
->zigzag_scan8x8_cavlc
: h
->zigzag_scan8x8_cavlc_q0
;
5510 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
5511 dc_scan
= luma_dc_zigzag_scan
;
5514 dquant
= get_se_golomb(&s
->gb
);
5516 if( dquant
> 25 || dquant
< -26 ){
5517 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "dquant out of range (%d) at %d %d\n", dquant
, s
->mb_x
, s
->mb_y
);
5521 s
->qscale
+= dquant
;
5522 if(((unsigned)s
->qscale
) > 51){
5523 if(s
->qscale
<0) s
->qscale
+= 52;
5524 else s
->qscale
-= 52;
5527 h
->chroma_qp
= chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, s
->qscale
);
5528 if(IS_INTRA16x16(mb_type
)){
5529 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
, LUMA_DC_BLOCK_INDEX
, dc_scan
, h
->dequant4_coeff
[0][s
->qscale
], 16) < 0){
5530 return -1; //FIXME continue if partitioned and other return -1 too
5533 assert((cbp
&15) == 0 || (cbp
&15) == 15);
5536 for(i8x8
=0; i8x8
<4; i8x8
++){
5537 for(i4x4
=0; i4x4
<4; i4x4
++){
5538 const int index
= i4x4
+ 4*i8x8
;
5539 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
+ 16*index
, index
, scan
+ 1, h
->dequant4_coeff
[0][s
->qscale
], 15) < 0 ){
5545 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
5548 for(i8x8
=0; i8x8
<4; i8x8
++){
5549 if(cbp
& (1<<i8x8
)){
5550 if(IS_8x8DCT(mb_type
)){
5551 DCTELEM
*buf
= &h
->mb
[64*i8x8
];
5553 for(i4x4
=0; i4x4
<4; i4x4
++){
5554 if( decode_residual(h
, gb
, buf
, i4x4
+4*i8x8
, scan8x8
+16*i4x4
,
5555 h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 16) <0 )
5558 nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
5559 nnz
[0] += nnz
[1] + nnz
[8] + nnz
[9];
5561 for(i4x4
=0; i4x4
<4; i4x4
++){
5562 const int index
= i4x4
+ 4*i8x8
;
5564 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
, h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
], 16) <0 ){
5570 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
5571 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
5577 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++)
5578 if( decode_residual(h
, gb
, h
->mb
+ 256 + 16*4*chroma_idx
, CHROMA_DC_BLOCK_INDEX
, chroma_dc_scan
, NULL
, 4) < 0){
5584 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++){
5585 for(i4x4
=0; i4x4
<4; i4x4
++){
5586 const int index
= 16 + 4*chroma_idx
+ i4x4
;
5587 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
+ 1, h
->dequant4_coeff
[chroma_idx
+1+(IS_INTRA( mb_type
) ? 0:3)][chroma_qp
], 15) < 0){
5593 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
5594 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
5595 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
5598 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
5599 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
5600 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
5601 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
5603 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
5604 write_back_non_zero_count(h
);
5607 h
->ref_count
[0] >>= 1;
5608 h
->ref_count
[1] >>= 1;
5614 static int decode_cabac_field_decoding_flag(H264Context
*h
) {
5615 MpegEncContext
* const s
= &h
->s
;
5616 const int mb_x
= s
->mb_x
;
5617 const int mb_y
= s
->mb_y
& ~1;
5618 const int mba_xy
= mb_x
- 1 + mb_y
*s
->mb_stride
;
5619 const int mbb_xy
= mb_x
+ (mb_y
-2)*s
->mb_stride
;
5621 unsigned int ctx
= 0;
5623 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) ) {
5626 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) ) {
5630 return get_cabac( &h
->cabac
, &h
->cabac_state
[70 + ctx
] );
5633 static int decode_cabac_intra_mb_type(H264Context
*h
, int ctx_base
, int intra_slice
) {
5634 uint8_t *state
= &h
->cabac_state
[ctx_base
];
5638 MpegEncContext
* const s
= &h
->s
;
5639 const int mba_xy
= h
->left_mb_xy
[0];
5640 const int mbb_xy
= h
->top_mb_xy
;
5642 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mba_xy
] ) )
5644 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mbb_xy
] ) )
5646 if( get_cabac( &h
->cabac
, &state
[ctx
] ) == 0 )
5647 return 0; /* I4x4 */
5650 if( get_cabac( &h
->cabac
, &state
[0] ) == 0 )
5651 return 0; /* I4x4 */
5654 if( get_cabac_terminate( &h
->cabac
) )
5655 return 25; /* PCM */
5657 mb_type
= 1; /* I16x16 */
5658 mb_type
+= 12 * get_cabac( &h
->cabac
, &state
[1] ); /* cbp_luma != 0 */
5659 if( get_cabac( &h
->cabac
, &state
[2] ) ) /* cbp_chroma */
5660 mb_type
+= 4 + 4 * get_cabac( &h
->cabac
, &state
[2+intra_slice
] );
5661 mb_type
+= 2 * get_cabac( &h
->cabac
, &state
[3+intra_slice
] );
5662 mb_type
+= 1 * get_cabac( &h
->cabac
, &state
[3+2*intra_slice
] );
5666 static int decode_cabac_mb_type( H264Context
*h
) {
5667 MpegEncContext
* const s
= &h
->s
;
5669 if( h
->slice_type
== I_TYPE
) {
5670 return decode_cabac_intra_mb_type(h
, 3, 1);
5671 } else if( h
->slice_type
== P_TYPE
) {
5672 if( get_cabac( &h
->cabac
, &h
->cabac_state
[14] ) == 0 ) {
5674 if( get_cabac( &h
->cabac
, &h
->cabac_state
[15] ) == 0 ) {
5675 /* P_L0_D16x16, P_8x8 */
5676 return 3 * get_cabac( &h
->cabac
, &h
->cabac_state
[16] );
5678 /* P_L0_D8x16, P_L0_D16x8 */
5679 return 2 - get_cabac( &h
->cabac
, &h
->cabac_state
[17] );
5682 return decode_cabac_intra_mb_type(h
, 17, 0) + 5;
5684 } else if( h
->slice_type
== B_TYPE
) {
5685 const int mba_xy
= h
->left_mb_xy
[0];
5686 const int mbb_xy
= h
->top_mb_xy
;
5690 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mba_xy
] ) )
5692 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_DIRECT( s
->current_picture
.mb_type
[mbb_xy
] ) )
5695 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[27+ctx
] ) )
5696 return 0; /* B_Direct_16x16 */
5698 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[27+3] ) ) {
5699 return 1 + get_cabac( &h
->cabac
, &h
->cabac_state
[27+5] ); /* B_L[01]_16x16 */
5702 bits
= get_cabac( &h
->cabac
, &h
->cabac_state
[27+4] ) << 3;
5703 bits
|= get_cabac( &h
->cabac
, &h
->cabac_state
[27+5] ) << 2;
5704 bits
|= get_cabac( &h
->cabac
, &h
->cabac_state
[27+5] ) << 1;
5705 bits
|= get_cabac( &h
->cabac
, &h
->cabac_state
[27+5] );
5707 return bits
+ 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5708 else if( bits
== 13 ) {
5709 return decode_cabac_intra_mb_type(h
, 32, 0) + 23;
5710 } else if( bits
== 14 )
5711 return 11; /* B_L1_L0_8x16 */
5712 else if( bits
== 15 )
5713 return 22; /* B_8x8 */
5715 bits
= ( bits
<<1 ) | get_cabac( &h
->cabac
, &h
->cabac_state
[27+5] );
5716 return bits
- 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5718 /* TODO SI/SP frames? */
5723 static int decode_cabac_mb_skip( H264Context
*h
, int mb_x
, int mb_y
) {
5724 MpegEncContext
* const s
= &h
->s
;
5728 if(FRAME_MBAFF
){ //FIXME merge with the stuff in fill_caches?
5729 int mb_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
5732 && h
->slice_table
[mba_xy
] == h
->slice_num
5733 && MB_FIELD
== !!IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) )
5734 mba_xy
+= s
->mb_stride
;
5736 mbb_xy
= mb_xy
- s
->mb_stride
;
5738 && h
->slice_table
[mbb_xy
] == h
->slice_num
5739 && IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) )
5740 mbb_xy
-= s
->mb_stride
;
5742 mbb_xy
= mb_x
+ (mb_y
-1)*s
->mb_stride
;
5744 int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
5746 mbb_xy
= mb_xy
- s
->mb_stride
;
5749 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mba_xy
] ))
5751 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mbb_xy
] ))
5754 if( h
->slice_type
== B_TYPE
)
5756 return get_cabac( &h
->cabac
, &h
->cabac_state
[11+ctx
] );
5759 static int decode_cabac_mb_intra4x4_pred_mode( H264Context
*h
, int pred_mode
) {
5762 if( get_cabac( &h
->cabac
, &h
->cabac_state
[68] ) )
5765 mode
+= 1 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5766 mode
+= 2 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5767 mode
+= 4 * get_cabac( &h
->cabac
, &h
->cabac_state
[69] );
5769 if( mode
>= pred_mode
)
5775 static int decode_cabac_mb_chroma_pre_mode( H264Context
*h
) {
5776 const int mba_xy
= h
->left_mb_xy
[0];
5777 const int mbb_xy
= h
->top_mb_xy
;
5781 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5782 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mba_xy
] != 0 )
5785 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mbb_xy
] != 0 )
5788 if( get_cabac( &h
->cabac
, &h
->cabac_state
[64+ctx
] ) == 0 )
5791 if( get_cabac( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5793 if( get_cabac( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5799 static const uint8_t block_idx_x
[16] = {
5800 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5802 static const uint8_t block_idx_y
[16] = {
5803 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5805 static const uint8_t block_idx_xy
[4][4] = {
5812 static int decode_cabac_mb_cbp_luma( H264Context
*h
) {
5817 if( h
->slice_table
[h
->top_mb_xy
] == h
->slice_num
) {
5819 tprintf("cbp_b = top_cbp = %x\n", cbp_b
);
5822 for( i8x8
= 0; i8x8
< 4; i8x8
++ ) {
5827 x
= block_idx_x
[4*i8x8
];
5828 y
= block_idx_y
[4*i8x8
];
5832 else if( h
->slice_table
[h
->left_mb_xy
[0]] == h
->slice_num
) {
5833 cbp_a
= h
->left_cbp
;
5834 tprintf("cbp_a = left_cbp = %x\n", cbp_a
);
5840 /* No need to test for skip as we put 0 for skip block */
5841 /* No need to test for IPCM as we put 1 for IPCM block */
5843 int i8x8a
= block_idx_xy
[(x
-1)&0x03][y
]/4;
5844 if( ((cbp_a
>> i8x8a
)&0x01) == 0 )
5849 int i8x8b
= block_idx_xy
[x
][(y
-1)&0x03]/4;
5850 if( ((cbp_b
>> i8x8b
)&0x01) == 0 )
5854 if( get_cabac( &h
->cabac
, &h
->cabac_state
[73 + ctx
] ) ) {
5860 static int decode_cabac_mb_cbp_chroma( H264Context
*h
) {
5864 cbp_a
= (h
->left_cbp
>>4)&0x03;
5865 cbp_b
= (h
-> top_cbp
>>4)&0x03;
5868 if( cbp_a
> 0 ) ctx
++;
5869 if( cbp_b
> 0 ) ctx
+= 2;
5870 if( get_cabac( &h
->cabac
, &h
->cabac_state
[77 + ctx
] ) == 0 )
5874 if( cbp_a
== 2 ) ctx
++;
5875 if( cbp_b
== 2 ) ctx
+= 2;
5876 return 1 + get_cabac( &h
->cabac
, &h
->cabac_state
[77 + ctx
] );
5878 static int decode_cabac_mb_dqp( H264Context
*h
) {
5879 MpegEncContext
* const s
= &h
->s
;
5885 mbn_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
- 1;
5887 mbn_xy
= s
->mb_width
- 1 + (s
->mb_y
-1)*s
->mb_stride
;
5889 if( h
->last_qscale_diff
!= 0 )
5892 while( get_cabac( &h
->cabac
, &h
->cabac_state
[60 + ctx
] ) ) {
5898 if(val
> 102) //prevent infinite loop
5905 return -(val
+ 1)/2;
5907 static int decode_cabac_p_mb_sub_type( H264Context
*h
) {
5908 if( get_cabac( &h
->cabac
, &h
->cabac_state
[21] ) )
5910 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[22] ) )
5912 if( get_cabac( &h
->cabac
, &h
->cabac_state
[23] ) )
5916 static int decode_cabac_b_mb_sub_type( H264Context
*h
) {
5918 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[36] ) )
5919 return 0; /* B_Direct_8x8 */
5920 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[37] ) )
5921 return 1 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L0_8x8, B_L1_8x8 */
5923 if( get_cabac( &h
->cabac
, &h
->cabac_state
[38] ) ) {
5924 if( get_cabac( &h
->cabac
, &h
->cabac_state
[39] ) )
5925 return 11 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L1_4x4, B_Bi_4x4 */
5928 type
+= 2*get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5929 type
+= get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5933 static inline int decode_cabac_mb_transform_size( H264Context
*h
) {
5934 return get_cabac( &h
->cabac
, &h
->cabac_state
[399 + h
->neighbor_transform_size
] );
5937 static int decode_cabac_mb_ref( H264Context
*h
, int list
, int n
) {
5938 int refa
= h
->ref_cache
[list
][scan8
[n
] - 1];
5939 int refb
= h
->ref_cache
[list
][scan8
[n
] - 8];
5943 if( h
->slice_type
== B_TYPE
) {
5944 if( refa
> 0 && !h
->direct_cache
[scan8
[n
] - 1] )
5946 if( refb
> 0 && !h
->direct_cache
[scan8
[n
] - 8] )
5955 while( get_cabac( &h
->cabac
, &h
->cabac_state
[54+ctx
] ) ) {
5965 static int decode_cabac_mb_mvd( H264Context
*h
, int list
, int n
, int l
) {
5966 int amvd
= abs( h
->mvd_cache
[list
][scan8
[n
] - 1][l
] ) +
5967 abs( h
->mvd_cache
[list
][scan8
[n
] - 8][l
] );
5968 int ctxbase
= (l
== 0) ? 40 : 47;
5973 else if( amvd
> 32 )
5978 if(!get_cabac(&h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
]))
5983 while( mvd
< 9 && get_cabac( &h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
] ) ) {
5991 while( get_cabac_bypass( &h
->cabac
) ) {
5996 if( get_cabac_bypass( &h
->cabac
) )
6000 if( get_cabac_bypass( &h
->cabac
) ) return -mvd
;
6004 static int inline get_cabac_cbf_ctx( H264Context
*h
, int cat
, int idx
) {
6009 nza
= h
->left_cbp
&0x100;
6010 nzb
= h
-> top_cbp
&0x100;
6011 } else if( cat
== 1 || cat
== 2 ) {
6012 nza
= h
->non_zero_count_cache
[scan8
[idx
] - 1];
6013 nzb
= h
->non_zero_count_cache
[scan8
[idx
] - 8];
6014 } else if( cat
== 3 ) {
6015 nza
= (h
->left_cbp
>>(6+idx
))&0x01;
6016 nzb
= (h
-> top_cbp
>>(6+idx
))&0x01;
6019 nza
= h
->non_zero_count_cache
[scan8
[16+idx
] - 1];
6020 nzb
= h
->non_zero_count_cache
[scan8
[16+idx
] - 8];
6029 return ctx
+ 4 * cat
;
6032 static int decode_cabac_residual( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint32_t *qmul
, int max_coeff
) {
6033 const int mb_xy
= h
->s
.mb_x
+ h
->s
.mb_y
*h
->s
.mb_stride
;
6034 static const int significant_coeff_flag_offset
[2][6] = {
6035 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6036 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6038 static const int last_coeff_flag_offset
[2][6] = {
6039 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6040 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6042 static const int coeff_abs_level_m1_offset
[6] = {
6043 227+0, 227+10, 227+20, 227+30, 227+39, 426
6045 static const int significant_coeff_flag_offset_8x8
[2][63] = {
6046 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6047 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
6048 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
6049 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6050 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6051 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6052 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6053 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
6055 static const int last_coeff_flag_offset_8x8
[63] = {
6056 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6057 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6058 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
6059 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
6065 int coeff_count
= 0;
6068 int abslevelgt1
= 0;
6070 uint8_t *significant_coeff_ctx_base
;
6071 uint8_t *last_coeff_ctx_base
;
6072 uint8_t *abs_level_m1_ctx_base
;
6074 /* cat: 0-> DC 16x16 n = 0
6075 * 1-> AC 16x16 n = luma4x4idx
6076 * 2-> Luma4x4 n = luma4x4idx
6077 * 3-> DC Chroma n = iCbCr
6078 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
6079 * 5-> Luma8x8 n = 4 * luma8x8idx
6082 /* read coded block flag */
6084 if( get_cabac( &h
->cabac
, &h
->cabac_state
[85 + get_cabac_cbf_ctx( h
, cat
, n
) ] ) == 0 ) {
6085 if( cat
== 1 || cat
== 2 )
6086 h
->non_zero_count_cache
[scan8
[n
]] = 0;
6088 h
->non_zero_count_cache
[scan8
[16+n
]] = 0;
6094 significant_coeff_ctx_base
= h
->cabac_state
6095 + significant_coeff_flag_offset
[MB_FIELD
][cat
];
6096 last_coeff_ctx_base
= h
->cabac_state
6097 + last_coeff_flag_offset
[MB_FIELD
][cat
];
6098 abs_level_m1_ctx_base
= h
->cabac_state
6099 + coeff_abs_level_m1_offset
[cat
];
6102 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
6103 for(last= 0; last < coefs; last++) { \
6104 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
6105 if( get_cabac( &h->cabac, sig_ctx )) { \
6106 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
6107 index[coeff_count++] = last; \
6108 if( get_cabac( &h->cabac, last_ctx ) ) { \
6114 const int *sig_off
= significant_coeff_flag_offset_8x8
[MB_FIELD
];
6115 DECODE_SIGNIFICANCE( 63, sig_off
[last
], last_coeff_flag_offset_8x8
[last
] );
6117 DECODE_SIGNIFICANCE( max_coeff
- 1, last
, last
);
6119 if( last
== max_coeff
-1 ) {
6120 index
[coeff_count
++] = last
;
6122 assert(coeff_count
> 0);
6125 h
->cbp_table
[mb_xy
] |= 0x100;
6126 else if( cat
== 1 || cat
== 2 )
6127 h
->non_zero_count_cache
[scan8
[n
]] = coeff_count
;
6129 h
->cbp_table
[mb_xy
] |= 0x40 << n
;
6131 h
->non_zero_count_cache
[scan8
[16+n
]] = coeff_count
;
6134 fill_rectangle(&h
->non_zero_count_cache
[scan8
[n
]], 2, 2, 8, coeff_count
, 1);
6137 for( i
= coeff_count
- 1; i
>= 0; i
-- ) {
6138 uint8_t *ctx
= (abslevelgt1
!= 0 ? 0 : FFMIN( 4, abslevel1
)) + abs_level_m1_ctx_base
;
6139 int j
= scantable
[index
[i
]];
6141 if( get_cabac( &h
->cabac
, ctx
) == 0 ) {
6143 if( get_cabac_bypass( &h
->cabac
) ) block
[j
] = -1;
6146 if( get_cabac_bypass( &h
->cabac
) ) block
[j
] = (-qmul
[j
] + 32) >> 6;
6147 else block
[j
] = ( qmul
[j
] + 32) >> 6;
6153 ctx
= 5 + FFMIN( 4, abslevelgt1
) + abs_level_m1_ctx_base
;
6154 while( coeff_abs
< 15 && get_cabac( &h
->cabac
, ctx
) ) {
6158 if( coeff_abs
>= 15 ) {
6160 while( get_cabac_bypass( &h
->cabac
) ) {
6161 coeff_abs
+= 1 << j
;
6166 if( get_cabac_bypass( &h
->cabac
) )
6167 coeff_abs
+= 1 << j
;
6172 if( get_cabac_bypass( &h
->cabac
) ) block
[j
] = -coeff_abs
;
6173 else block
[j
] = coeff_abs
;
6175 if( get_cabac_bypass( &h
->cabac
) ) block
[j
] = (-coeff_abs
* qmul
[j
] + 32) >> 6;
6176 else block
[j
] = ( coeff_abs
* qmul
[j
] + 32) >> 6;
6185 static void inline compute_mb_neighbors(H264Context
*h
)
6187 MpegEncContext
* const s
= &h
->s
;
6188 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
6189 h
->top_mb_xy
= mb_xy
- s
->mb_stride
;
6190 h
->left_mb_xy
[0] = mb_xy
- 1;
6192 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
6193 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
6194 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
6195 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
6196 const int curr_mb_frame_flag
= !MB_FIELD
;
6197 const int bottom
= (s
->mb_y
& 1);
6199 ? !curr_mb_frame_flag
// bottom macroblock
6200 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
6202 h
->top_mb_xy
-= s
->mb_stride
;
6204 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
6205 h
->left_mb_xy
[0] = pair_xy
- 1;
6212 * decodes a macroblock
6213 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
6215 static int decode_mb_cabac(H264Context
*h
) {
6216 MpegEncContext
* const s
= &h
->s
;
6217 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
6218 int mb_type
, partition_count
, cbp
= 0;
6219 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
6221 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?)
6223 tprintf("pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
6224 if( h
->slice_type
!= I_TYPE
&& h
->slice_type
!= SI_TYPE
) {
6226 /* a skipped mb needs the aff flag from the following mb */
6227 if( FRAME_MBAFF
&& s
->mb_x
==0 && (s
->mb_y
&1)==0 )
6228 predict_field_decoding_flag(h
);
6229 if( FRAME_MBAFF
&& (s
->mb_y
&1)==1 && h
->prev_mb_skipped
)
6230 skip
= h
->next_mb_skipped
;
6232 skip
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
);
6233 /* read skip flags */
6235 if( FRAME_MBAFF
&& (s
->mb_y
&1)==0 ){
6236 s
->current_picture
.mb_type
[mb_xy
] = MB_TYPE_SKIP
;
6237 h
->next_mb_skipped
= decode_cabac_mb_skip( h
, s
->mb_x
, s
->mb_y
+1 );
6238 if(h
->next_mb_skipped
)
6239 predict_field_decoding_flag(h
);
6241 h
->mb_mbaff
= h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
6246 h
->cbp_table
[mb_xy
] = 0;
6247 h
->chroma_pred_mode_table
[mb_xy
] = 0;
6248 h
->last_qscale_diff
= 0;
6255 if( (s
->mb_y
&1) == 0 )
6257 h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
6259 h
->mb_field_decoding_flag
= (s
->picture_structure
!=PICT_FRAME
);
6261 h
->prev_mb_skipped
= 0;
6263 compute_mb_neighbors(h
);
6264 if( ( mb_type
= decode_cabac_mb_type( h
) ) < 0 ) {
6265 av_log( h
->s
.avctx
, AV_LOG_ERROR
, "decode_cabac_mb_type failed\n" );
6269 if( h
->slice_type
== B_TYPE
) {
6271 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
6272 mb_type
= b_mb_type_info
[mb_type
].type
;
6275 goto decode_intra_mb
;
6277 } else if( h
->slice_type
== P_TYPE
) {
6279 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
6280 mb_type
= p_mb_type_info
[mb_type
].type
;
6283 goto decode_intra_mb
;
6286 assert(h
->slice_type
== I_TYPE
);
6288 partition_count
= 0;
6289 cbp
= i_mb_type_info
[mb_type
].cbp
;
6290 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
6291 mb_type
= i_mb_type_info
[mb_type
].type
;
6294 mb_type
|= MB_TYPE_INTERLACED
;
6296 h
->slice_table
[ mb_xy
]= h
->slice_num
;
6298 if(IS_INTRA_PCM(mb_type
)) {
6302 // We assume these blocks are very rare so we dont optimize it.
6303 // FIXME The two following lines get the bitstream position in the cabac
6304 // decode, I think it should be done by a function in cabac.h (or cabac.c).
6305 ptr
= h
->cabac
.bytestream
;
6306 if (h
->cabac
.low
&0x1) ptr
-=CABAC_BITS
/8;
6308 // The pixels are stored in the same order as levels in h->mb array.
6309 for(y
=0; y
<16; y
++){
6310 const int index
= 4*(y
&3) + 32*((y
>>2)&1) + 128*(y
>>3);
6311 for(x
=0; x
<16; x
++){
6312 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr
);
6313 h
->mb
[index
+ (x
&3) + 16*((x
>>2)&1) + 64*(x
>>3)]= *ptr
++;
6317 const int index
= 256 + 4*(y
&3) + 32*(y
>>2);
6319 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr
);
6320 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= *ptr
++;
6324 const int index
= 256 + 64 + 4*(y
&3) + 32*(y
>>2);
6326 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr
);
6327 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= *ptr
++;
6331 ff_init_cabac_decoder(&h
->cabac
, ptr
, h
->cabac
.bytestream_end
- ptr
);
6333 // All blocks are present
6334 h
->cbp_table
[mb_xy
] = 0x1ef;
6335 h
->chroma_pred_mode_table
[mb_xy
] = 0;
6336 // In deblocking, the quantizer is 0
6337 s
->current_picture
.qscale_table
[mb_xy
]= 0;
6338 h
->chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, 0);
6339 // All coeffs are present
6340 memset(h
->non_zero_count
[mb_xy
], 16, 16);
6341 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
6346 h
->ref_count
[0] <<= 1;
6347 h
->ref_count
[1] <<= 1;
6350 fill_caches(h
, mb_type
, 0);
6352 if( IS_INTRA( mb_type
) ) {
6354 if( IS_INTRA4x4( mb_type
) ) {
6355 if( dct8x8_allowed
&& decode_cabac_mb_transform_size( h
) ) {
6356 mb_type
|= MB_TYPE_8x8DCT
;
6357 for( i
= 0; i
< 16; i
+=4 ) {
6358 int pred
= pred_intra_mode( h
, i
);
6359 int mode
= decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
6360 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
6363 for( i
= 0; i
< 16; i
++ ) {
6364 int pred
= pred_intra_mode( h
, i
);
6365 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
6367 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
6370 write_back_intra_pred_mode(h
);
6371 if( check_intra4x4_pred_mode(h
) < 0 ) return -1;
6373 h
->intra16x16_pred_mode
= check_intra_pred_mode( h
, h
->intra16x16_pred_mode
);
6374 if( h
->intra16x16_pred_mode
< 0 ) return -1;
6376 h
->chroma_pred_mode_table
[mb_xy
] =
6377 h
->chroma_pred_mode
= decode_cabac_mb_chroma_pre_mode( h
);
6379 h
->chroma_pred_mode
= check_intra_pred_mode( h
, h
->chroma_pred_mode
);
6380 if( h
->chroma_pred_mode
< 0 ) return -1;
6381 } else if( partition_count
== 4 ) {
6382 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
6384 if( h
->slice_type
== B_TYPE
) {
6385 for( i
= 0; i
< 4; i
++ ) {
6386 h
->sub_mb_type
[i
] = decode_cabac_b_mb_sub_type( h
);
6387 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
6388 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
6390 if( IS_DIRECT(h
->sub_mb_type
[0]) || IS_DIRECT(h
->sub_mb_type
[1])
6391 || IS_DIRECT(h
->sub_mb_type
[2]) || IS_DIRECT(h
->sub_mb_type
[3])) {
6392 pred_direct_motion(h
, &mb_type
);
6393 if( h
->ref_count
[0] > 1 || h
->ref_count
[1] > 1 ) {
6394 for( i
= 0; i
< 4; i
++ )
6395 if( IS_DIRECT(h
->sub_mb_type
[i
]) )
6396 fill_rectangle( &h
->direct_cache
[scan8
[4*i
]], 2, 2, 8, 1, 1 );
6400 for( i
= 0; i
< 4; i
++ ) {
6401 h
->sub_mb_type
[i
] = decode_cabac_p_mb_sub_type( h
);
6402 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
6403 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
6407 for( list
= 0; list
< 2; list
++ ) {
6408 if( h
->ref_count
[list
] > 0 ) {
6409 for( i
= 0; i
< 4; i
++ ) {
6410 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
6411 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
6412 if( h
->ref_count
[list
] > 1 )
6413 ref
[list
][i
] = decode_cabac_mb_ref( h
, list
, 4*i
);
6419 h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
6420 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
6426 dct8x8_allowed
= get_dct8x8_allowed(h
);
6428 for(list
=0; list
<2; list
++){
6430 if(IS_DIRECT(h
->sub_mb_type
[i
])){
6431 fill_rectangle(h
->mvd_cache
[list
][scan8
[4*i
]], 2, 2, 8, 0, 4);
6434 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
6436 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
) && !IS_DIRECT(h
->sub_mb_type
[i
])){
6437 const int sub_mb_type
= h
->sub_mb_type
[i
];
6438 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
6439 for(j
=0; j
<sub_partition_count
[i
]; j
++){
6442 const int index
= 4*i
+ block_width
*j
;
6443 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
6444 int16_t (* mvd_cache
)[2]= &h
->mvd_cache
[list
][ scan8
[index
] ];
6445 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mpx
, &mpy
);
6447 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, index
, 0 );
6448 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, index
, 1 );
6449 tprintf("final mv:%d %d\n", mx
, my
);
6451 if(IS_SUB_8X8(sub_mb_type
)){
6452 mv_cache
[ 0 ][0]= mv_cache
[ 1 ][0]=
6453 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
6454 mv_cache
[ 0 ][1]= mv_cache
[ 1 ][1]=
6455 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
6457 mvd_cache
[ 0 ][0]= mvd_cache
[ 1 ][0]=
6458 mvd_cache
[ 8 ][0]= mvd_cache
[ 9 ][0]= mx
- mpx
;
6459 mvd_cache
[ 0 ][1]= mvd_cache
[ 1 ][1]=
6460 mvd_cache
[ 8 ][1]= mvd_cache
[ 9 ][1]= my
- mpy
;
6461 }else if(IS_SUB_8X4(sub_mb_type
)){
6462 mv_cache
[ 0 ][0]= mv_cache
[ 1 ][0]= mx
;
6463 mv_cache
[ 0 ][1]= mv_cache
[ 1 ][1]= my
;
6465 mvd_cache
[ 0 ][0]= mvd_cache
[ 1 ][0]= mx
- mpx
;
6466 mvd_cache
[ 0 ][1]= mvd_cache
[ 1 ][1]= my
- mpy
;
6467 }else if(IS_SUB_4X8(sub_mb_type
)){
6468 mv_cache
[ 0 ][0]= mv_cache
[ 8 ][0]= mx
;
6469 mv_cache
[ 0 ][1]= mv_cache
[ 8 ][1]= my
;
6471 mvd_cache
[ 0 ][0]= mvd_cache
[ 8 ][0]= mx
- mpx
;
6472 mvd_cache
[ 0 ][1]= mvd_cache
[ 8 ][1]= my
- mpy
;
6474 assert(IS_SUB_4X4(sub_mb_type
));
6475 mv_cache
[ 0 ][0]= mx
;
6476 mv_cache
[ 0 ][1]= my
;
6478 mvd_cache
[ 0 ][0]= mx
- mpx
;
6479 mvd_cache
[ 0 ][1]= my
- mpy
;
6483 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
6484 uint32_t *pd
= (uint32_t *)&h
->mvd_cache
[list
][ scan8
[4*i
] ][0];
6485 p
[0] = p
[1] = p
[8] = p
[9] = 0;
6486 pd
[0]= pd
[1]= pd
[8]= pd
[9]= 0;
6490 } else if( IS_DIRECT(mb_type
) ) {
6491 pred_direct_motion(h
, &mb_type
);
6492 fill_rectangle(h
->mvd_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
6493 fill_rectangle(h
->mvd_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
6494 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
6496 int list
, mx
, my
, i
, mpx
, mpy
;
6497 if(IS_16X16(mb_type
)){
6498 for(list
=0; list
<2; list
++){
6499 if(IS_DIR(mb_type
, 0, list
)){
6500 if(h
->ref_count
[list
] > 0 ){
6501 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 0 ) : 0;
6502 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, ref
, 1);
6505 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED
, 1);
6507 for(list
=0; list
<2; list
++){
6508 if(IS_DIR(mb_type
, 0, list
)){
6509 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mpx
, &mpy
);
6511 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 0, 0 );
6512 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 0, 1 );
6513 tprintf("final mv:%d %d\n", mx
, my
);
6515 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
6516 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
,my
), 4);
6518 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, 0, 4);
6521 else if(IS_16X8(mb_type
)){
6522 for(list
=0; list
<2; list
++){
6523 if(h
->ref_count
[list
]>0){
6525 if(IS_DIR(mb_type
, i
, list
)){
6526 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 8*i
) : 0;
6527 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, ref
, 1);
6529 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, (LIST_NOT_USED
&0xFF), 1);
6533 for(list
=0; list
<2; list
++){
6535 if(IS_DIR(mb_type
, i
, list
)){
6536 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mpx
, &mpy
);
6537 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 0 );
6538 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 1 );
6539 tprintf("final mv:%d %d\n", mx
, my
);
6541 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
6542 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
,my
), 4);
6544 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
6545 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
6550 assert(IS_8X16(mb_type
));
6551 for(list
=0; list
<2; list
++){
6552 if(h
->ref_count
[list
]>0){
6554 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
6555 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 4*i
) : 0;
6556 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, ref
, 1);
6558 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, (LIST_NOT_USED
&0xFF), 1);
6562 for(list
=0; list
<2; list
++){
6564 if(IS_DIR(mb_type
, i
, list
)){
6565 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mpx
, &mpy
);
6566 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 0 );
6567 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 1 );
6569 tprintf("final mv:%d %d\n", mx
, my
);
6570 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
6571 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
,my
), 4);
6573 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
6574 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
6581 if( IS_INTER( mb_type
) ) {
6582 h
->chroma_pred_mode_table
[mb_xy
] = 0;
6583 write_back_motion( h
, mb_type
);
6586 if( !IS_INTRA16x16( mb_type
) ) {
6587 cbp
= decode_cabac_mb_cbp_luma( h
);
6588 cbp
|= decode_cabac_mb_cbp_chroma( h
) << 4;
6591 h
->cbp_table
[mb_xy
] = cbp
;
6593 if( dct8x8_allowed
&& (cbp
&15) && !IS_INTRA( mb_type
) ) {
6594 if( decode_cabac_mb_transform_size( h
) )
6595 mb_type
|= MB_TYPE_8x8DCT
;
6597 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
6599 if( cbp
|| IS_INTRA16x16( mb_type
) ) {
6600 const uint8_t *scan
, *scan8x8
, *dc_scan
;
6603 if(IS_INTERLACED(mb_type
)){
6604 scan8x8
= s
->qscale
? h
->field_scan8x8
: h
->field_scan8x8_q0
;
6605 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
6606 dc_scan
= luma_dc_field_scan
;
6608 scan8x8
= s
->qscale
? h
->zigzag_scan8x8
: h
->zigzag_scan8x8_q0
;
6609 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
6610 dc_scan
= luma_dc_zigzag_scan
;
6613 h
->last_qscale_diff
= dqp
= decode_cabac_mb_dqp( h
);
6614 if( dqp
== INT_MIN
){
6615 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cabac decode of qscale diff failed at %d %d\n", s
->mb_x
, s
->mb_y
);
6619 if(((unsigned)s
->qscale
) > 51){
6620 if(s
->qscale
<0) s
->qscale
+= 52;
6621 else s
->qscale
-= 52;
6623 h
->chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, s
->qscale
);
6625 if( IS_INTRA16x16( mb_type
) ) {
6627 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6628 if( decode_cabac_residual( h
, h
->mb
, 0, 0, dc_scan
, NULL
, 16) < 0)
6631 for( i
= 0; i
< 16; i
++ ) {
6632 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6633 if( decode_cabac_residual(h
, h
->mb
+ 16*i
, 1, i
, scan
+ 1, h
->dequant4_coeff
[0][s
->qscale
], 15) < 0 )
6637 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
6641 for( i8x8
= 0; i8x8
< 4; i8x8
++ ) {
6642 if( cbp
& (1<<i8x8
) ) {
6643 if( IS_8x8DCT(mb_type
) ) {
6644 if( decode_cabac_residual(h
, h
->mb
+ 64*i8x8
, 5, 4*i8x8
,
6645 scan8x8
, h
->dequant8_coeff
[IS_INTRA( mb_type
) ? 0:1][s
->qscale
], 64) < 0 )
6648 for( i4x4
= 0; i4x4
< 4; i4x4
++ ) {
6649 const int index
= 4*i8x8
+ i4x4
;
6650 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6651 if( decode_cabac_residual(h
, h
->mb
+ 16*index
, 2, index
, scan
, h
->dequant4_coeff
[IS_INTRA( mb_type
) ? 0:3][s
->qscale
], 16) < 0 )
6655 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
6656 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
6663 for( c
= 0; c
< 2; c
++ ) {
6664 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6665 if( decode_cabac_residual(h
, h
->mb
+ 256 + 16*4*c
, 3, c
, chroma_dc_scan
, NULL
, 4) < 0)
6672 for( c
= 0; c
< 2; c
++ ) {
6673 for( i
= 0; i
< 4; i
++ ) {
6674 const int index
= 16 + 4 * c
+ i
;
6675 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6676 if( decode_cabac_residual(h
, h
->mb
+ 16*index
, 4, index
- 16, scan
+ 1, h
->dequant4_coeff
[c
+1+(IS_INTRA( mb_type
) ? 0:3)][h
->chroma_qp
], 15) < 0)
6681 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
6682 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
6683 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
6686 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
6687 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
6688 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
6689 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
6690 h
->last_qscale_diff
= 0;
6693 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
6694 write_back_non_zero_count(h
);
6697 h
->ref_count
[0] >>= 1;
6698 h
->ref_count
[1] >>= 1;
6705 static void filter_mb_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[4], int qp
) {
6707 const int index_a
= clip( qp
+ h
->slice_alpha_c0_offset
, 0, 51 );
6708 const int alpha
= alpha_table
[index_a
];
6709 const int beta
= beta_table
[clip( qp
+ h
->slice_beta_offset
, 0, 51 )];
6714 tc
[i
] = bS
[i
] ? tc0_table
[index_a
][bS
[i
] - 1] : -1;
6715 h
->s
.dsp
.h264_h_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6717 /* 16px edge length, because bS=4 is triggered by being at
6718 * the edge of an intra MB, so all 4 bS are the same */
6719 for( d
= 0; d
< 16; d
++ ) {
6720 const int p0
= pix
[-1];
6721 const int p1
= pix
[-2];
6722 const int p2
= pix
[-3];
6724 const int q0
= pix
[0];
6725 const int q1
= pix
[1];
6726 const int q2
= pix
[2];
6728 if( ABS( p0
- q0
) < alpha
&&
6729 ABS( p1
- p0
) < beta
&&
6730 ABS( q1
- q0
) < beta
) {
6732 if(ABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6733 if( ABS( p2
- p0
) < beta
)
6735 const int p3
= pix
[-4];
6737 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6738 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6739 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6742 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6744 if( ABS( q2
- q0
) < beta
)
6746 const int q3
= pix
[3];
6748 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6749 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6750 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6753 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6757 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6758 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6760 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2], pix
[-1], pix
[0], pix
[1]);
6766 static void filter_mb_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[4], int qp
) {
6768 const int index_a
= clip( qp
+ h
->slice_alpha_c0_offset
, 0, 51 );
6769 const int alpha
= alpha_table
[index_a
];
6770 const int beta
= beta_table
[clip( qp
+ h
->slice_beta_offset
, 0, 51 )];
6775 tc
[i
] = bS
[i
] ? tc0_table
[index_a
][bS
[i
] - 1] + 1 : 0;
6776 h
->s
.dsp
.h264_h_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6778 h
->s
.dsp
.h264_h_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6782 static void filter_mb_mbaff_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[8], int qp
[2] ) {
6784 for( i
= 0; i
< 16; i
++, pix
+= stride
) {
6790 int bS_index
= (i
>> 1);
6793 bS_index
|= (i
& 1);
6796 if( bS
[bS_index
] == 0 ) {
6800 qp_index
= MB_FIELD
? (i
>> 3) : (i
& 1);
6801 index_a
= clip( qp
[qp_index
] + h
->slice_alpha_c0_offset
, 0, 51 );
6802 alpha
= alpha_table
[index_a
];
6803 beta
= beta_table
[clip( qp
[qp_index
] + h
->slice_beta_offset
, 0, 51 )];
6805 if( bS
[bS_index
] < 4 ) {
6806 const int tc0
= tc0_table
[index_a
][bS
[bS_index
] - 1];
6807 const int p0
= pix
[-1];
6808 const int p1
= pix
[-2];
6809 const int p2
= pix
[-3];
6810 const int q0
= pix
[0];
6811 const int q1
= pix
[1];
6812 const int q2
= pix
[2];
6814 if( ABS( p0
- q0
) < alpha
&&
6815 ABS( p1
- p0
) < beta
&&
6816 ABS( q1
- q0
) < beta
) {
6820 if( ABS( p2
- p0
) < beta
) {
6821 pix
[-2] = p1
+ clip( ( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( p1
<< 1 ) ) >> 1, -tc0
, tc0
);
6824 if( ABS( q2
- q0
) < beta
) {
6825 pix
[1] = q1
+ clip( ( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( q1
<< 1 ) ) >> 1, -tc0
, tc0
);
6829 i_delta
= clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6830 pix
[-1] = clip_uint8( p0
+ i_delta
); /* p0' */
6831 pix
[0] = clip_uint8( q0
- i_delta
); /* q0' */
6832 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6835 const int p0
= pix
[-1];
6836 const int p1
= pix
[-2];
6837 const int p2
= pix
[-3];
6839 const int q0
= pix
[0];
6840 const int q1
= pix
[1];
6841 const int q2
= pix
[2];
6843 if( ABS( p0
- q0
) < alpha
&&
6844 ABS( p1
- p0
) < beta
&&
6845 ABS( q1
- q0
) < beta
) {
6847 if(ABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6848 if( ABS( p2
- p0
) < beta
)
6850 const int p3
= pix
[-4];
6852 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6853 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6854 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6857 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6859 if( ABS( q2
- q0
) < beta
)
6861 const int q3
= pix
[3];
6863 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6864 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6865 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6868 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6872 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6873 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6875 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6880 static void filter_mb_mbaff_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[8], int qp
[2] ) {
6882 for( i
= 0; i
< 8; i
++, pix
+= stride
) {
6890 if( bS
[bS_index
] == 0 ) {
6894 qp_index
= MB_FIELD
? (i
>> 2) : (i
& 1);
6895 index_a
= clip( qp
[qp_index
] + h
->slice_alpha_c0_offset
, 0, 51 );
6896 alpha
= alpha_table
[index_a
];
6897 beta
= beta_table
[clip( qp
[qp_index
] + h
->slice_beta_offset
, 0, 51 )];
6899 if( bS
[bS_index
] < 4 ) {
6900 const int tc
= tc0_table
[index_a
][bS
[bS_index
] - 1] + 1;
6901 const int p0
= pix
[-1];
6902 const int p1
= pix
[-2];
6903 const int q0
= pix
[0];
6904 const int q1
= pix
[1];
6906 if( ABS( p0
- q0
) < alpha
&&
6907 ABS( p1
- p0
) < beta
&&
6908 ABS( q1
- q0
) < beta
) {
6909 const int i_delta
= clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6911 pix
[-1] = clip_uint8( p0
+ i_delta
); /* p0' */
6912 pix
[0] = clip_uint8( q0
- i_delta
); /* q0' */
6913 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6916 const int p0
= pix
[-1];
6917 const int p1
= pix
[-2];
6918 const int q0
= pix
[0];
6919 const int q1
= pix
[1];
6921 if( ABS( p0
- q0
) < alpha
&&
6922 ABS( p1
- p0
) < beta
&&
6923 ABS( q1
- q0
) < beta
) {
6925 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2; /* p0' */
6926 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2; /* q0' */
6927 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, pix
[-3], p1
, p0
, q0
, q1
, pix
[2], pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6933 static void filter_mb_edgeh( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[4], int qp
) {
6935 const int index_a
= clip( qp
+ h
->slice_alpha_c0_offset
, 0, 51 );
6936 const int alpha
= alpha_table
[index_a
];
6937 const int beta
= beta_table
[clip( qp
+ h
->slice_beta_offset
, 0, 51 )];
6938 const int pix_next
= stride
;
6943 tc
[i
] = bS
[i
] ? tc0_table
[index_a
][bS
[i
] - 1] : -1;
6944 h
->s
.dsp
.h264_v_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6946 /* 16px edge length, see filter_mb_edgev */
6947 for( d
= 0; d
< 16; d
++ ) {
6948 const int p0
= pix
[-1*pix_next
];
6949 const int p1
= pix
[-2*pix_next
];
6950 const int p2
= pix
[-3*pix_next
];
6951 const int q0
= pix
[0];
6952 const int q1
= pix
[1*pix_next
];
6953 const int q2
= pix
[2*pix_next
];
6955 if( ABS( p0
- q0
) < alpha
&&
6956 ABS( p1
- p0
) < beta
&&
6957 ABS( q1
- q0
) < beta
) {
6959 const int p3
= pix
[-4*pix_next
];
6960 const int q3
= pix
[ 3*pix_next
];
6962 if(ABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6963 if( ABS( p2
- p0
) < beta
) {
6965 pix
[-1*pix_next
] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6966 pix
[-2*pix_next
] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6967 pix
[-3*pix_next
] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6970 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6972 if( ABS( q2
- q0
) < beta
) {
6974 pix
[0*pix_next
] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6975 pix
[1*pix_next
] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6976 pix
[2*pix_next
] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6979 pix
[0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6983 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6984 pix
[ 0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6986 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, qp
, index_a
, alpha
, beta
, bS
[i
], p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2*pix_next
], pix
[-pix_next
], pix
[0], pix
[pix_next
]);
6993 static void filter_mb_edgech( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[4], int qp
) {
6995 const int index_a
= clip( qp
+ h
->slice_alpha_c0_offset
, 0, 51 );
6996 const int alpha
= alpha_table
[index_a
];
6997 const int beta
= beta_table
[clip( qp
+ h
->slice_beta_offset
, 0, 51 )];
7002 tc
[i
] = bS
[i
] ? tc0_table
[index_a
][bS
[i
] - 1] + 1 : 0;
7003 h
->s
.dsp
.h264_v_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
7005 h
->s
.dsp
.h264_v_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
7009 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
7010 MpegEncContext
* const s
= &h
->s
;
7011 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
7012 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
7013 const int mvy_limit
= IS_INTERLACED(mb_type
) ? 2 : 4;
7014 int first_vertical_edge_done
= 0;
7016 /* FIXME: A given frame may occupy more than one position in
7017 * the reference list. So ref2frm should be populated with
7018 * frame numbers, not indices. */
7019 static const int ref2frm
[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7020 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
7022 //for sufficiently low qp, filtering wouldn't do anything
7023 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
7025 int qp_thresh
= 15 - h
->slice_alpha_c0_offset
- FFMAX(0, h
->pps
.chroma_qp_index_offset
);
7026 int qp
= s
->current_picture
.qscale_table
[mb_xy
];
7028 && (mb_x
== 0 || ((qp
+ s
->current_picture
.qscale_table
[mb_xy
-1] + 1)>>1) <= qp_thresh
)
7029 && (mb_y
== 0 || ((qp
+ s
->current_picture
.qscale_table
[h
->top_mb_xy
] + 1)>>1) <= qp_thresh
)){
7035 // left mb is in picture
7036 && h
->slice_table
[mb_xy
-1] != 255
7037 // and current and left pair do not have the same interlaced type
7038 && (IS_INTERLACED(mb_type
) != IS_INTERLACED(s
->current_picture
.mb_type
[mb_xy
-1]))
7039 // and left mb is in the same slice if deblocking_filter == 2
7040 && (h
->deblocking_filter
!=2 || h
->slice_table
[mb_xy
-1] == h
->slice_table
[mb_xy
])) {
7041 /* First vertical edge is different in MBAFF frames
7042 * There are 8 different bS to compute and 2 different Qp
7044 const int pair_xy
= mb_x
+ (mb_y
&~1)*s
->mb_stride
;
7045 const int left_mb_xy
[2] = { pair_xy
-1, pair_xy
-1+s
->mb_stride
};
7049 int mb_qp
, mbn0_qp
, mbn1_qp
;
7051 first_vertical_edge_done
= 1;
7053 if( IS_INTRA(mb_type
) )
7054 bS
[0] = bS
[1] = bS
[2] = bS
[3] = bS
[4] = bS
[5] = bS
[6] = bS
[7] = 4;
7056 for( i
= 0; i
< 8; i
++ ) {
7057 int mbn_xy
= MB_FIELD
? left_mb_xy
[i
>>2] : left_mb_xy
[i
&1];
7059 if( IS_INTRA( s
->current_picture
.mb_type
[mbn_xy
] ) )
7061 else if( h
->non_zero_count_cache
[12+8*(i
>>1)] != 0 ||
7062 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
7063 h
->non_zero_count
[mbn_xy
][MB_FIELD
? i
&3 : (i
>>2)+(mb_y
&1)*2] )
7070 mb_qp
= s
->current_picture
.qscale_table
[mb_xy
];
7071 mbn0_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[0]];
7072 mbn1_qp
= s
->current_picture
.qscale_table
[left_mb_xy
[1]];
7073 qp
[0] = ( mb_qp
+ mbn0_qp
+ 1 ) >> 1;
7074 chroma_qp
[0] = ( get_chroma_qp( h
->pps
.chroma_qp_index_offset
, mb_qp
) +
7075 get_chroma_qp( h
->pps
.chroma_qp_index_offset
, mbn0_qp
) + 1 ) >> 1;
7076 qp
[1] = ( mb_qp
+ mbn1_qp
+ 1 ) >> 1;
7077 chroma_qp
[1] = ( get_chroma_qp( h
->pps
.chroma_qp_index_offset
, mb_qp
) +
7078 get_chroma_qp( h
->pps
.chroma_qp_index_offset
, mbn1_qp
) + 1 ) >> 1;
7081 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x
, mb_y
, qp
[0], qp
[1], chroma_qp
[0], chroma_qp
[1], linesize
, uvlinesize
);
7082 { int i
; for (i
= 0; i
< 8; i
++) tprintf(" bS[%d]:%d", i
, bS
[i
]); tprintf("\n"); }
7083 filter_mb_mbaff_edgev ( h
, &img_y
[0], linesize
, bS
, qp
);
7084 filter_mb_mbaff_edgecv( h
, &img_cb
[0], uvlinesize
, bS
, chroma_qp
);
7085 filter_mb_mbaff_edgecv( h
, &img_cr
[0], uvlinesize
, bS
, chroma_qp
);
7087 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
7088 for( dir
= 0; dir
< 2; dir
++ )
7091 const int mbm_xy
= dir
== 0 ? mb_xy
-1 : h
->top_mb_xy
;
7092 const int mbm_type
= s
->current_picture
.mb_type
[mbm_xy
];
7093 int start
= h
->slice_table
[mbm_xy
] == 255 ? 1 : 0;
7095 const int edges
= (mb_type
& (MB_TYPE_16x16
|MB_TYPE_SKIP
))
7096 == (MB_TYPE_16x16
|MB_TYPE_SKIP
) ? 1 : 4;
7097 // how often to recheck mv-based bS when iterating between edges
7098 const int mask_edge
= (mb_type
& (MB_TYPE_16x16
| (MB_TYPE_16x8
<< dir
))) ? 3 :
7099 (mb_type
& (MB_TYPE_8x16
>> dir
)) ? 1 : 0;
7100 // how often to recheck mv-based bS when iterating along each edge
7101 const int mask_par0
= mb_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
));
7103 if (first_vertical_edge_done
) {
7105 first_vertical_edge_done
= 0;
7108 if (h
->deblocking_filter
==2 && h
->slice_table
[mbm_xy
] != h
->slice_table
[mb_xy
])
7111 if (FRAME_MBAFF
&& (dir
== 1) && ((mb_y
&1) == 0) && start
== 0
7112 && !IS_INTERLACED(mb_type
)
7113 && IS_INTERLACED(mbm_type
)
7115 // This is a special case in the norm where the filtering must
7116 // be done twice (one each of the field) even if we are in a
7117 // frame macroblock.
7119 static const int nnz_idx
[4] = {4,5,6,3};
7120 unsigned int tmp_linesize
= 2 * linesize
;
7121 unsigned int tmp_uvlinesize
= 2 * uvlinesize
;
7122 int mbn_xy
= mb_xy
- 2 * s
->mb_stride
;
7127 for(j
=0; j
<2; j
++, mbn_xy
+= s
->mb_stride
){
7128 if( IS_INTRA(mb_type
) ||
7129 IS_INTRA(s
->current_picture
.mb_type
[mbn_xy
]) ) {
7130 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 3;
7132 const uint8_t *mbn_nnz
= h
->non_zero_count
[mbn_xy
];
7133 for( i
= 0; i
< 4; i
++ ) {
7134 if( h
->non_zero_count_cache
[scan8
[0]+i
] != 0 ||
7135 mbn_nnz
[nnz_idx
[i
]] != 0 )
7141 // Do not use s->qscale as luma quantizer because it has not the same
7142 // value in IPCM macroblocks.
7143 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
7144 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, tmp_linesize
, tmp_uvlinesize
);
7145 { int i
; for (i
= 0; i
< 4; i
++) tprintf(" bS[%d]:%d", i
, bS
[i
]); tprintf("\n"); }
7146 filter_mb_edgeh( h
, &img_y
[j
*linesize
], tmp_linesize
, bS
, qp
);
7147 chroma_qp
= ( h
->chroma_qp
+
7148 get_chroma_qp( h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1;
7149 filter_mb_edgech( h
, &img_cb
[j
*uvlinesize
], tmp_uvlinesize
, bS
, chroma_qp
);
7150 filter_mb_edgech( h
, &img_cr
[j
*uvlinesize
], tmp_uvlinesize
, bS
, chroma_qp
);
7157 for( edge
= start
; edge
< edges
; edge
++ ) {
7158 /* mbn_xy: neighbor macroblock */
7159 const int mbn_xy
= edge
> 0 ? mb_xy
: mbm_xy
;
7160 const int mbn_type
= s
->current_picture
.mb_type
[mbn_xy
];
7164 if( (edge
&1) && IS_8x8DCT(mb_type
) )
7167 if( IS_INTRA(mb_type
) ||
7168 IS_INTRA(mbn_type
) ) {
7171 if ( (!IS_INTERLACED(mb_type
) && !IS_INTERLACED(mbm_type
))
7172 || ((FRAME_MBAFF
|| (s
->picture_structure
!= PICT_FRAME
)) && (dir
== 0))
7181 bS
[0] = bS
[1] = bS
[2] = bS
[3] = value
;
7186 if( edge
& mask_edge
) {
7187 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 0;
7190 else if( FRAME_MBAFF
&& IS_INTERLACED(mb_type
^ mbn_type
)) {
7191 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 1;
7194 else if( mask_par0
&& (edge
|| (mbn_type
& (MB_TYPE_16x16
| (MB_TYPE_8x16
>> dir
)))) ) {
7195 int b_idx
= 8 + 4 + edge
* (dir
? 8:1);
7196 int bn_idx
= b_idx
- (dir
? 8:1);
7198 for( l
= 0; !v
&& l
< 1 + (h
->slice_type
== B_TYPE
); l
++ ) {
7199 v
|= ref2frm
[h
->ref_cache
[l
][b_idx
]+2] != ref2frm
[h
->ref_cache
[l
][bn_idx
]+2] ||
7200 ABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
7201 ABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
;
7203 bS
[0] = bS
[1] = bS
[2] = bS
[3] = v
;
7209 for( i
= 0; i
< 4; i
++ ) {
7210 int x
= dir
== 0 ? edge
: i
;
7211 int y
= dir
== 0 ? i
: edge
;
7212 int b_idx
= 8 + 4 + x
+ 8*y
;
7213 int bn_idx
= b_idx
- (dir
? 8:1);
7215 if( h
->non_zero_count_cache
[b_idx
] != 0 ||
7216 h
->non_zero_count_cache
[bn_idx
] != 0 ) {
7222 for( l
= 0; l
< 1 + (h
->slice_type
== B_TYPE
); l
++ ) {
7223 if( ref2frm
[h
->ref_cache
[l
][b_idx
]+2] != ref2frm
[h
->ref_cache
[l
][bn_idx
]+2] ||
7224 ABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
7225 ABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= mvy_limit
) {
7233 if(bS
[0]+bS
[1]+bS
[2]+bS
[3] == 0)
7238 // Do not use s->qscale as luma quantizer because it has not the same
7239 // value in IPCM macroblocks.
7240 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
7241 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
7242 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, linesize
, uvlinesize
);
7243 { int i
; for (i
= 0; i
< 4; i
++) tprintf(" bS[%d]:%d", i
, bS
[i
]); tprintf("\n"); }
7245 filter_mb_edgev( h
, &img_y
[4*edge
], linesize
, bS
, qp
);
7246 if( (edge
&1) == 0 ) {
7247 int chroma_qp
= ( h
->chroma_qp
+
7248 get_chroma_qp( h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1;
7249 filter_mb_edgecv( h
, &img_cb
[2*edge
], uvlinesize
, bS
, chroma_qp
);
7250 filter_mb_edgecv( h
, &img_cr
[2*edge
], uvlinesize
, bS
, chroma_qp
);
7253 filter_mb_edgeh( h
, &img_y
[4*edge
*linesize
], linesize
, bS
, qp
);
7254 if( (edge
&1) == 0 ) {
7255 int chroma_qp
= ( h
->chroma_qp
+
7256 get_chroma_qp( h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1;
7257 filter_mb_edgech( h
, &img_cb
[2*edge
*uvlinesize
], uvlinesize
, bS
, chroma_qp
);
7258 filter_mb_edgech( h
, &img_cr
[2*edge
*uvlinesize
], uvlinesize
, bS
, chroma_qp
);
7265 static int decode_slice(H264Context
*h
){
7266 MpegEncContext
* const s
= &h
->s
;
7267 const int part_mask
= s
->partitioned_frame
? (AC_END
|AC_ERROR
) : 0x7F;
7271 if( h
->pps
.cabac
) {
7275 align_get_bits( &s
->gb
);
7278 ff_init_cabac_states( &h
->cabac
, ff_h264_lps_range
, ff_h264_mps_state
, ff_h264_lps_state
, 64 );
7279 ff_init_cabac_decoder( &h
->cabac
,
7280 s
->gb
.buffer
+ get_bits_count(&s
->gb
)/8,
7281 ( s
->gb
.size_in_bits
- get_bits_count(&s
->gb
) + 7)/8);
7282 /* calculate pre-state */
7283 for( i
= 0; i
< 460; i
++ ) {
7285 if( h
->slice_type
== I_TYPE
)
7286 pre
= clip( ((cabac_context_init_I
[i
][0] * s
->qscale
) >>4 ) + cabac_context_init_I
[i
][1], 1, 126 );
7288 pre
= clip( ((cabac_context_init_PB
[h
->cabac_init_idc
][i
][0] * s
->qscale
) >>4 ) + cabac_context_init_PB
[h
->cabac_init_idc
][i
][1], 1, 126 );
7291 h
->cabac_state
[i
] = 2 * ( 63 - pre
) + 0;
7293 h
->cabac_state
[i
] = 2 * ( pre
- 64 ) + 1;
7297 int ret
= decode_mb_cabac(h
);
7300 if(ret
>=0) hl_decode_mb(h
);
7302 if( ret
>= 0 && FRAME_MBAFF
) { //FIXME optimal? or let mb_decode decode 16x32 ?
7305 if(ret
>=0) ret
= decode_mb_cabac(h
);
7307 if(ret
>=0) hl_decode_mb(h
);
7310 eos
= get_cabac_terminate( &h
->cabac
);
7312 if( ret
< 0 || h
->cabac
.bytestream
> h
->cabac
.bytestream_end
+ 1) {
7313 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d, bytestream (%d)\n", s
->mb_x
, s
->mb_y
, h
->cabac
.bytestream_end
- h
->cabac
.bytestream
);
7314 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
7318 if( ++s
->mb_x
>= s
->mb_width
) {
7320 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
7327 if( eos
|| s
->mb_y
>= s
->mb_height
) {
7328 tprintf("slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
7329 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
7336 int ret
= decode_mb_cavlc(h
);
7338 if(ret
>=0) hl_decode_mb(h
);
7340 if(ret
>=0 && FRAME_MBAFF
){ //FIXME optimal? or let mb_decode decode 16x32 ?
7342 ret
= decode_mb_cavlc(h
);
7344 if(ret
>=0) hl_decode_mb(h
);
7349 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
7350 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
7355 if(++s
->mb_x
>= s
->mb_width
){
7357 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
7362 if(s
->mb_y
>= s
->mb_height
){
7363 tprintf("slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
7365 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
) {
7366 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
7370 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
7377 if(get_bits_count(&s
->gb
) >= s
->gb
.size_in_bits
&& s
->mb_skip_run
<=0){
7378 tprintf("slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
7379 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
){
7380 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
7384 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
7393 for(;s
->mb_y
< s
->mb_height
; s
->mb_y
++){
7394 for(;s
->mb_x
< s
->mb_width
; s
->mb_x
++){
7395 int ret
= decode_mb(h
);
7400 av_log(s
->avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
7401 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
7406 if(++s
->mb_x
>= s
->mb_width
){
7408 if(++s
->mb_y
>= s
->mb_height
){
7409 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
7410 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
7414 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
7421 if(get_bits_count(s
->?gb
) >= s
->gb
?.size_in_bits
){
7422 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
7423 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
7427 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
7434 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
7437 return -1; //not reached
7440 static int decode_unregistered_user_data(H264Context
*h
, int size
){
7441 MpegEncContext
* const s
= &h
->s
;
7442 uint8_t user_data
[16+256];
7448 for(i
=0; i
<sizeof(user_data
)-1 && i
<size
; i
++){
7449 user_data
[i
]= get_bits(&s
->gb
, 8);
7453 e
= sscanf(user_data
+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build
);
7454 if(e
==1 && build
>=0)
7455 h
->x264_build
= build
;
7457 if(s
->avctx
->debug
& FF_DEBUG_BUGS
)
7458 av_log(s
->avctx
, AV_LOG_DEBUG
, "user data:\"%s\"\n", user_data
+16);
7461 skip_bits(&s
->gb
, 8);
7466 static int decode_sei(H264Context
*h
){
7467 MpegEncContext
* const s
= &h
->s
;
7469 while(get_bits_count(&s
->gb
) + 16 < s
->gb
.size_in_bits
){
7474 type
+= show_bits(&s
->gb
, 8);
7475 }while(get_bits(&s
->gb
, 8) == 255);
7479 size
+= show_bits(&s
->gb
, 8);
7480 }while(get_bits(&s
->gb
, 8) == 255);
7484 if(decode_unregistered_user_data(h
, size
) < 0)
7488 skip_bits(&s
->gb
, 8*size
);
7491 //FIXME check bits here
7492 align_get_bits(&s
->gb
);
7498 static inline void decode_hrd_parameters(H264Context
*h
, SPS
*sps
){
7499 MpegEncContext
* const s
= &h
->s
;
7501 cpb_count
= get_ue_golomb(&s
->gb
) + 1;
7502 get_bits(&s
->gb
, 4); /* bit_rate_scale */
7503 get_bits(&s
->gb
, 4); /* cpb_size_scale */
7504 for(i
=0; i
<cpb_count
; i
++){
7505 get_ue_golomb(&s
->gb
); /* bit_rate_value_minus1 */
7506 get_ue_golomb(&s
->gb
); /* cpb_size_value_minus1 */
7507 get_bits1(&s
->gb
); /* cbr_flag */
7509 get_bits(&s
->gb
, 5); /* initial_cpb_removal_delay_length_minus1 */
7510 get_bits(&s
->gb
, 5); /* cpb_removal_delay_length_minus1 */
7511 get_bits(&s
->gb
, 5); /* dpb_output_delay_length_minus1 */
7512 get_bits(&s
->gb
, 5); /* time_offset_length */
7515 static inline int decode_vui_parameters(H264Context
*h
, SPS
*sps
){
7516 MpegEncContext
* const s
= &h
->s
;
7517 int aspect_ratio_info_present_flag
, aspect_ratio_idc
;
7518 int nal_hrd_parameters_present_flag
, vcl_hrd_parameters_present_flag
;
7520 aspect_ratio_info_present_flag
= get_bits1(&s
->gb
);
7522 if( aspect_ratio_info_present_flag
) {
7523 aspect_ratio_idc
= get_bits(&s
->gb
, 8);
7524 if( aspect_ratio_idc
== EXTENDED_SAR
) {
7525 sps
->sar
.num
= get_bits(&s
->gb
, 16);
7526 sps
->sar
.den
= get_bits(&s
->gb
, 16);
7527 }else if(aspect_ratio_idc
< 14){
7528 sps
->sar
= pixel_aspect
[aspect_ratio_idc
];
7530 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal aspect ratio\n");
7537 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7539 if(get_bits1(&s
->gb
)){ /* overscan_info_present_flag */
7540 get_bits1(&s
->gb
); /* overscan_appropriate_flag */
7543 if(get_bits1(&s
->gb
)){ /* video_signal_type_present_flag */
7544 get_bits(&s
->gb
, 3); /* video_format */
7545 get_bits1(&s
->gb
); /* video_full_range_flag */
7546 if(get_bits1(&s
->gb
)){ /* colour_description_present_flag */
7547 get_bits(&s
->gb
, 8); /* colour_primaries */
7548 get_bits(&s
->gb
, 8); /* transfer_characteristics */
7549 get_bits(&s
->gb
, 8); /* matrix_coefficients */
7553 if(get_bits1(&s
->gb
)){ /* chroma_location_info_present_flag */
7554 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_top_field */
7555 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_bottom_field */
7558 sps
->timing_info_present_flag
= get_bits1(&s
->gb
);
7559 if(sps
->timing_info_present_flag
){
7560 sps
->num_units_in_tick
= get_bits_long(&s
->gb
, 32);
7561 sps
->time_scale
= get_bits_long(&s
->gb
, 32);
7562 sps
->fixed_frame_rate_flag
= get_bits1(&s
->gb
);
7565 nal_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
7566 if(nal_hrd_parameters_present_flag
)
7567 decode_hrd_parameters(h
, sps
);
7568 vcl_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
7569 if(vcl_hrd_parameters_present_flag
)
7570 decode_hrd_parameters(h
, sps
);
7571 if(nal_hrd_parameters_present_flag
|| vcl_hrd_parameters_present_flag
)
7572 get_bits1(&s
->gb
); /* low_delay_hrd_flag */
7573 get_bits1(&s
->gb
); /* pic_struct_present_flag */
7575 sps
->bitstream_restriction_flag
= get_bits1(&s
->gb
);
7576 if(sps
->bitstream_restriction_flag
){
7577 get_bits1(&s
->gb
); /* motion_vectors_over_pic_boundaries_flag */
7578 get_ue_golomb(&s
->gb
); /* max_bytes_per_pic_denom */
7579 get_ue_golomb(&s
->gb
); /* max_bits_per_mb_denom */
7580 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_horizontal */
7581 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_vertical */
7582 sps
->num_reorder_frames
= get_ue_golomb(&s
->gb
);
7583 get_ue_golomb(&s
->gb
); /* max_dec_frame_buffering */
7589 static void decode_scaling_list(H264Context
*h
, uint8_t *factors
, int size
,
7590 const uint8_t *jvt_list
, const uint8_t *fallback_list
){
7591 MpegEncContext
* const s
= &h
->s
;
7592 int i
, last
= 8, next
= 8;
7593 const uint8_t *scan
= size
== 16 ? zigzag_scan
: zigzag_scan8x8
;
7594 if(!get_bits1(&s
->gb
)) /* matrix not written, we use the predicted one */
7595 memcpy(factors
, fallback_list
, size
*sizeof(uint8_t));
7597 for(i
=0;i
<size
;i
++){
7599 next
= (last
+ get_se_golomb(&s
->gb
)) & 0xff;
7600 if(!i
&& !next
){ /* matrix not written, we use the preset one */
7601 memcpy(factors
, jvt_list
, size
*sizeof(uint8_t));
7604 last
= factors
[scan
[i
]] = next
? next
: last
;
7608 static void decode_scaling_matrices(H264Context
*h
, SPS
*sps
, PPS
*pps
, int is_sps
,
7609 uint8_t (*scaling_matrix4
)[16], uint8_t (*scaling_matrix8
)[64]){
7610 MpegEncContext
* const s
= &h
->s
;
7611 int fallback_sps
= !is_sps
&& sps
->scaling_matrix_present
;
7612 const uint8_t *fallback
[4] = {
7613 fallback_sps
? sps
->scaling_matrix4
[0] : default_scaling4
[0],
7614 fallback_sps
? sps
->scaling_matrix4
[3] : default_scaling4
[1],
7615 fallback_sps
? sps
->scaling_matrix8
[0] : default_scaling8
[0],
7616 fallback_sps
? sps
->scaling_matrix8
[1] : default_scaling8
[1]
7618 if(get_bits1(&s
->gb
)){
7619 sps
->scaling_matrix_present
|= is_sps
;
7620 decode_scaling_list(h
,scaling_matrix4
[0],16,default_scaling4
[0],fallback
[0]); // Intra, Y
7621 decode_scaling_list(h
,scaling_matrix4
[1],16,default_scaling4
[0],scaling_matrix4
[0]); // Intra, Cr
7622 decode_scaling_list(h
,scaling_matrix4
[2],16,default_scaling4
[0],scaling_matrix4
[1]); // Intra, Cb
7623 decode_scaling_list(h
,scaling_matrix4
[3],16,default_scaling4
[1],fallback
[1]); // Inter, Y
7624 decode_scaling_list(h
,scaling_matrix4
[4],16,default_scaling4
[1],scaling_matrix4
[3]); // Inter, Cr
7625 decode_scaling_list(h
,scaling_matrix4
[5],16,default_scaling4
[1],scaling_matrix4
[4]); // Inter, Cb
7626 if(is_sps
|| pps
->transform_8x8_mode
){
7627 decode_scaling_list(h
,scaling_matrix8
[0],64,default_scaling8
[0],fallback
[2]); // Intra, Y
7628 decode_scaling_list(h
,scaling_matrix8
[1],64,default_scaling8
[1],fallback
[3]); // Inter, Y
7630 } else if(fallback_sps
) {
7631 memcpy(scaling_matrix4
, sps
->scaling_matrix4
, 6*16*sizeof(uint8_t));
7632 memcpy(scaling_matrix8
, sps
->scaling_matrix8
, 2*64*sizeof(uint8_t));
7636 static inline int decode_seq_parameter_set(H264Context
*h
){
7637 MpegEncContext
* const s
= &h
->s
;
7638 int profile_idc
, level_idc
;
7642 profile_idc
= get_bits(&s
->gb
, 8);
7643 get_bits1(&s
->gb
); //constraint_set0_flag
7644 get_bits1(&s
->gb
); //constraint_set1_flag
7645 get_bits1(&s
->gb
); //constraint_set2_flag
7646 get_bits1(&s
->gb
); //constraint_set3_flag
7647 get_bits(&s
->gb
, 4); // reserved
7648 level_idc
= get_bits(&s
->gb
, 8);
7649 sps_id
= get_ue_golomb(&s
->gb
);
7651 sps
= &h
->sps_buffer
[ sps_id
];
7652 sps
->profile_idc
= profile_idc
;
7653 sps
->level_idc
= level_idc
;
7655 if(sps
->profile_idc
>= 100){ //high profile
7656 if(get_ue_golomb(&s
->gb
) == 3) //chroma_format_idc
7657 get_bits1(&s
->gb
); //residual_color_transform_flag
7658 get_ue_golomb(&s
->gb
); //bit_depth_luma_minus8
7659 get_ue_golomb(&s
->gb
); //bit_depth_chroma_minus8
7660 sps
->transform_bypass
= get_bits1(&s
->gb
);
7661 decode_scaling_matrices(h
, sps
, NULL
, 1, sps
->scaling_matrix4
, sps
->scaling_matrix8
);
7663 sps
->scaling_matrix_present
= 0;
7665 sps
->log2_max_frame_num
= get_ue_golomb(&s
->gb
) + 4;
7666 sps
->poc_type
= get_ue_golomb(&s
->gb
);
7668 if(sps
->poc_type
== 0){ //FIXME #define
7669 sps
->log2_max_poc_lsb
= get_ue_golomb(&s
->gb
) + 4;
7670 } else if(sps
->poc_type
== 1){//FIXME #define
7671 sps
->delta_pic_order_always_zero_flag
= get_bits1(&s
->gb
);
7672 sps
->offset_for_non_ref_pic
= get_se_golomb(&s
->gb
);
7673 sps
->offset_for_top_to_bottom_field
= get_se_golomb(&s
->gb
);
7674 sps
->poc_cycle_length
= get_ue_golomb(&s
->gb
);
7676 for(i
=0; i
<sps
->poc_cycle_length
; i
++)
7677 sps
->offset_for_ref_frame
[i
]= get_se_golomb(&s
->gb
);
7679 if(sps
->poc_type
> 2){
7680 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal POC type %d\n", sps
->poc_type
);
7684 sps
->ref_frame_count
= get_ue_golomb(&s
->gb
);
7685 if(sps
->ref_frame_count
> MAX_PICTURE_COUNT
-2){
7686 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "too many reference frames\n");
7688 sps
->gaps_in_frame_num_allowed_flag
= get_bits1(&s
->gb
);
7689 sps
->mb_width
= get_ue_golomb(&s
->gb
) + 1;
7690 sps
->mb_height
= get_ue_golomb(&s
->gb
) + 1;
7691 if((unsigned)sps
->mb_width
>= INT_MAX
/16 || (unsigned)sps
->mb_height
>= INT_MAX
/16 ||
7692 avcodec_check_dimensions(NULL
, 16*sps
->mb_width
, 16*sps
->mb_height
))
7695 sps
->frame_mbs_only_flag
= get_bits1(&s
->gb
);
7696 if(!sps
->frame_mbs_only_flag
)
7697 sps
->mb_aff
= get_bits1(&s
->gb
);
7701 sps
->direct_8x8_inference_flag
= get_bits1(&s
->gb
);
7703 #ifndef ALLOW_INTERLACE
7705 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MBAFF support not included; enable it compilation time\n");
7707 if(!sps
->direct_8x8_inference_flag
&& sps
->mb_aff
)
7708 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "MBAFF + !direct_8x8_inference is not implemented\n");
7710 sps
->crop
= get_bits1(&s
->gb
);
7712 sps
->crop_left
= get_ue_golomb(&s
->gb
);
7713 sps
->crop_right
= get_ue_golomb(&s
->gb
);
7714 sps
->crop_top
= get_ue_golomb(&s
->gb
);
7715 sps
->crop_bottom
= get_ue_golomb(&s
->gb
);
7716 if(sps
->crop_left
|| sps
->crop_top
){
7717 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "insane cropping not completely supported, this could look slightly wrong ...\n");
7723 sps
->crop_bottom
= 0;
7726 sps
->vui_parameters_present_flag
= get_bits1(&s
->gb
);
7727 if( sps
->vui_parameters_present_flag
)
7728 decode_vui_parameters(h
, sps
);
7730 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7731 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7732 sps_id
, sps
->profile_idc
, sps
->level_idc
,
7734 sps
->ref_frame_count
,
7735 sps
->mb_width
, sps
->mb_height
,
7736 sps
->frame_mbs_only_flag
? "FRM" : (sps
->mb_aff
? "MB-AFF" : "PIC-AFF"),
7737 sps
->direct_8x8_inference_flag
? "8B8" : "",
7738 sps
->crop_left
, sps
->crop_right
,
7739 sps
->crop_top
, sps
->crop_bottom
,
7740 sps
->vui_parameters_present_flag
? "VUI" : ""
7746 static inline int decode_picture_parameter_set(H264Context
*h
, int bit_length
){
7747 MpegEncContext
* const s
= &h
->s
;
7748 int pps_id
= get_ue_golomb(&s
->gb
);
7749 PPS
*pps
= &h
->pps_buffer
[pps_id
];
7751 pps
->sps_id
= get_ue_golomb(&s
->gb
);
7752 pps
->cabac
= get_bits1(&s
->gb
);
7753 pps
->pic_order_present
= get_bits1(&s
->gb
);
7754 pps
->slice_group_count
= get_ue_golomb(&s
->gb
) + 1;
7755 if(pps
->slice_group_count
> 1 ){
7756 pps
->mb_slice_group_map_type
= get_ue_golomb(&s
->gb
);
7757 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "FMO not supported\n");
7758 switch(pps
->mb_slice_group_map_type
){
7761 | for( i
= 0; i
<= num_slice_groups_minus1
; i
++ ) | | |
7762 | run_length
[ i
] |1 |ue(v
) |
7767 | for( i
= 0; i
< num_slice_groups_minus1
; i
++ ) | | |
7769 | top_left_mb
[ i
] |1 |ue(v
) |
7770 | bottom_right_mb
[ i
] |1 |ue(v
) |
7778 | slice_group_change_direction_flag
|1 |u(1) |
7779 | slice_group_change_rate_minus1
|1 |ue(v
) |
7784 | slice_group_id_cnt_minus1
|1 |ue(v
) |
7785 | for( i
= 0; i
<= slice_group_id_cnt_minus1
; i
++ | | |
7787 | slice_group_id
[ i
] |1 |u(v
) |
7792 pps
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
7793 pps
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
7794 if(pps
->ref_count
[0] > 32 || pps
->ref_count
[1] > 32){
7795 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow (pps)\n");
7799 pps
->weighted_pred
= get_bits1(&s
->gb
);
7800 pps
->weighted_bipred_idc
= get_bits(&s
->gb
, 2);
7801 pps
->init_qp
= get_se_golomb(&s
->gb
) + 26;
7802 pps
->init_qs
= get_se_golomb(&s
->gb
) + 26;
7803 pps
->chroma_qp_index_offset
= get_se_golomb(&s
->gb
);
7804 pps
->deblocking_filter_parameters_present
= get_bits1(&s
->gb
);
7805 pps
->constrained_intra_pred
= get_bits1(&s
->gb
);
7806 pps
->redundant_pic_cnt_present
= get_bits1(&s
->gb
);
7808 pps
->transform_8x8_mode
= 0;
7809 h
->dequant_coeff_pps
= -1; //contents of sps/pps can change even if id doesn't, so reinit
7810 memset(pps
->scaling_matrix4
, 16, 6*16*sizeof(uint8_t));
7811 memset(pps
->scaling_matrix8
, 16, 2*64*sizeof(uint8_t));
7813 if(get_bits_count(&s
->gb
) < bit_length
){
7814 pps
->transform_8x8_mode
= get_bits1(&s
->gb
);
7815 decode_scaling_matrices(h
, &h
->sps_buffer
[pps
->sps_id
], pps
, 0, pps
->scaling_matrix4
, pps
->scaling_matrix8
);
7816 get_se_golomb(&s
->gb
); //second_chroma_qp_index_offset
7819 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7820 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7821 pps_id
, pps
->sps_id
,
7822 pps
->cabac
? "CABAC" : "CAVLC",
7823 pps
->slice_group_count
,
7824 pps
->ref_count
[0], pps
->ref_count
[1],
7825 pps
->weighted_pred
? "weighted" : "",
7826 pps
->init_qp
, pps
->init_qs
, pps
->chroma_qp_index_offset
,
7827 pps
->deblocking_filter_parameters_present
? "LPAR" : "",
7828 pps
->constrained_intra_pred
? "CONSTR" : "",
7829 pps
->redundant_pic_cnt_present
? "REDU" : "",
7830 pps
->transform_8x8_mode
? "8x8DCT" : ""
7838 * finds the end of the current frame in the bitstream.
7839 * @return the position of the first byte of the next frame, or -1
7841 static int find_frame_end(H264Context
*h
, const uint8_t *buf
, int buf_size
){
7844 ParseContext
*pc
= &(h
->s
.parse_context
);
7845 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7846 // mb_addr= pc->mb_addr - 1;
7848 for(i
=0; i
<=buf_size
; i
++){
7849 if((state
&0xFFFFFF1F) == 0x101 || (state
&0xFFFFFF1F) == 0x102 || (state
&0xFFFFFF1F) == 0x105){
7850 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state
, pc
->frame_start_found
, i
);
7851 if(pc
->frame_start_found
){
7852 // If there isn't one more byte in the buffer
7853 // the test on first_mb_in_slice cannot be done yet
7854 // do it at next call.
7855 if (i
>= buf_size
) break;
7856 if (buf
[i
] & 0x80) {
7857 // first_mb_in_slice is 0, probably the first nal of a new
7859 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state
, i
);
7861 pc
->frame_start_found
= 0;
7865 pc
->frame_start_found
= 1;
7867 if((state
&0xFFFFFF1F) == 0x107 || (state
&0xFFFFFF1F) == 0x108 || (state
&0xFFFFFF1F) == 0x109){
7868 if(pc
->frame_start_found
){
7870 pc
->frame_start_found
= 0;
7875 state
= (state
<<8) | buf
[i
];
7879 return END_NOT_FOUND
;
7882 static int h264_parse(AVCodecParserContext
*s
,
7883 AVCodecContext
*avctx
,
7884 uint8_t **poutbuf
, int *poutbuf_size
,
7885 const uint8_t *buf
, int buf_size
)
7887 H264Context
*h
= s
->priv_data
;
7888 ParseContext
*pc
= &h
->s
.parse_context
;
7891 next
= find_frame_end(h
, buf
, buf_size
);
7893 if (ff_combine_frame(pc
, next
, (uint8_t **)&buf
, &buf_size
) < 0) {
7899 *poutbuf
= (uint8_t *)buf
;
7900 *poutbuf_size
= buf_size
;
7904 static int h264_split(AVCodecContext
*avctx
,
7905 const uint8_t *buf
, int buf_size
)
7908 uint32_t state
= -1;
7911 for(i
=0; i
<=buf_size
; i
++){
7912 if((state
&0xFFFFFF1F) == 0x107)
7914 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7916 if((state
&0xFFFFFF00) == 0x100 && (state
&0xFFFFFF1F) != 0x107 && (state
&0xFFFFFF1F) != 0x108 && (state
&0xFFFFFF1F) != 0x109){
7918 while(i
>4 && buf
[i
-5]==0) i
--;
7923 state
= (state
<<8) | buf
[i
];
7929 static int decode_nal_units(H264Context
*h
, uint8_t *buf
, int buf_size
){
7930 MpegEncContext
* const s
= &h
->s
;
7931 AVCodecContext
* const avctx
= s
->avctx
;
7935 for(i
=0; i
<50; i
++){
7936 av_log(NULL
, AV_LOG_ERROR
,"%02X ", buf
[i
]);
7940 s
->current_picture_ptr
= NULL
;
7949 if(buf_index
>= buf_size
) break;
7951 for(i
= 0; i
< h
->nal_length_size
; i
++)
7952 nalsize
= (nalsize
<< 8) | buf
[buf_index
++];
7958 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: nal size %d\n", nalsize
);
7963 // start code prefix search
7964 for(; buf_index
+ 3 < buf_size
; buf_index
++){
7965 // this should allways succeed in the first iteration
7966 if(buf
[buf_index
] == 0 && buf
[buf_index
+1] == 0 && buf
[buf_index
+2] == 1)
7970 if(buf_index
+3 >= buf_size
) break;
7975 ptr
= decode_nal(h
, buf
+ buf_index
, &dst_length
, &consumed
, h
->is_avc
? nalsize
: buf_size
- buf_index
);
7976 if(ptr
[dst_length
- 1] == 0) dst_length
--;
7977 bit_length
= 8*dst_length
- decode_rbsp_trailing(ptr
+ dst_length
- 1);
7979 if(s
->avctx
->debug
&FF_DEBUG_STARTCODE
){
7980 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "NAL %d at %d/%d length %d\n", h
->nal_unit_type
, buf_index
, buf_size
, dst_length
);
7983 if (h
->is_avc
&& (nalsize
!= consumed
))
7984 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: Consumed only %d bytes instead of %d\n", consumed
, nalsize
);
7986 buf_index
+= consumed
;
7988 if( (s
->hurry_up
== 1 && h
->nal_ref_idc
== 0) //FIXME dont discard SEI id
7989 ||(avctx
->skip_frame
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
7992 switch(h
->nal_unit_type
){
7994 idr(h
); //FIXME ensure we don't loose some frames if there is reordering
7996 init_get_bits(&s
->gb
, ptr
, bit_length
);
7998 h
->inter_gb_ptr
= &s
->gb
;
7999 s
->data_partitioning
= 0;
8001 if(decode_slice_header(h
) < 0){
8002 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "decode_slice_header error\n");
8005 s
->current_picture_ptr
->key_frame
= (h
->nal_unit_type
== NAL_IDR_SLICE
);
8006 if(h
->redundant_pic_count
==0 && s
->hurry_up
< 5
8007 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| h
->nal_ref_idc
)
8008 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| h
->slice_type
!=B_TYPE
)
8009 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| h
->slice_type
==I_TYPE
)
8010 && avctx
->skip_frame
< AVDISCARD_ALL
)
8014 init_get_bits(&s
->gb
, ptr
, bit_length
);
8016 h
->inter_gb_ptr
= NULL
;
8017 s
->data_partitioning
= 1;
8019 if(decode_slice_header(h
) < 0){
8020 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "decode_slice_header error\n");
8024 init_get_bits(&h
->intra_gb
, ptr
, bit_length
);
8025 h
->intra_gb_ptr
= &h
->intra_gb
;
8028 init_get_bits(&h
->inter_gb
, ptr
, bit_length
);
8029 h
->inter_gb_ptr
= &h
->inter_gb
;
8031 if(h
->redundant_pic_count
==0 && h
->intra_gb_ptr
&& s
->data_partitioning
8033 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| h
->nal_ref_idc
)
8034 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| h
->slice_type
!=B_TYPE
)
8035 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| h
->slice_type
==I_TYPE
)
8036 && avctx
->skip_frame
< AVDISCARD_ALL
)
8040 init_get_bits(&s
->gb
, ptr
, bit_length
);
8044 init_get_bits(&s
->gb
, ptr
, bit_length
);
8045 decode_seq_parameter_set(h
);
8047 if(s
->flags
& CODEC_FLAG_LOW_DELAY
)
8050 if(avctx
->has_b_frames
< 2)
8051 avctx
->has_b_frames
= !s
->low_delay
;
8054 init_get_bits(&s
->gb
, ptr
, bit_length
);
8056 decode_picture_parameter_set(h
, bit_length
);
8060 case NAL_END_SEQUENCE
:
8061 case NAL_END_STREAM
:
8062 case NAL_FILLER_DATA
:
8064 case NAL_AUXILIARY_SLICE
:
8067 av_log(avctx
, AV_LOG_ERROR
, "Unknown NAL code: %d\n", h
->nal_unit_type
);
8071 if(!s
->current_picture_ptr
) return buf_index
; //no frame
8073 s
->current_picture_ptr
->qscale_type
= FF_QSCALE_TYPE_H264
;
8074 s
->current_picture_ptr
->pict_type
= s
->pict_type
;
8076 h
->prev_frame_num_offset
= h
->frame_num_offset
;
8077 h
->prev_frame_num
= h
->frame_num
;
8078 if(s
->current_picture_ptr
->reference
){
8079 h
->prev_poc_msb
= h
->poc_msb
;
8080 h
->prev_poc_lsb
= h
->poc_lsb
;
8082 if(s
->current_picture_ptr
->reference
)
8083 execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
8093 * returns the number of bytes consumed for building the current frame
8095 static int get_consumed_bytes(MpegEncContext
*s
, int pos
, int buf_size
){
8096 if(s
->flags
&CODEC_FLAG_TRUNCATED
){
8097 pos
-= s
->parse_context
.last_index
;
8098 if(pos
<0) pos
=0; // FIXME remove (unneeded?)
8102 if(pos
==0) pos
=1; //avoid infinite loops (i doubt thats needed but ...)
8103 if(pos
+10>buf_size
) pos
=buf_size
; // oops ;)
8109 static int decode_frame(AVCodecContext
*avctx
,
8110 void *data
, int *data_size
,
8111 uint8_t *buf
, int buf_size
)
8113 H264Context
*h
= avctx
->priv_data
;
8114 MpegEncContext
*s
= &h
->s
;
8115 AVFrame
*pict
= data
;
8118 s
->flags
= avctx
->flags
;
8119 s
->flags2
= avctx
->flags2
;
8121 /* no supplementary picture */
8122 if (buf_size
== 0) {
8126 if(s
->flags
&CODEC_FLAG_TRUNCATED
){
8127 int next
= find_frame_end(h
, buf
, buf_size
);
8129 if( ff_combine_frame(&s
->parse_context
, next
, &buf
, &buf_size
) < 0 )
8131 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
8134 if(h
->is_avc
&& !h
->got_avcC
) {
8135 int i
, cnt
, nalsize
;
8136 unsigned char *p
= avctx
->extradata
;
8137 if(avctx
->extradata_size
< 7) {
8138 av_log(avctx
, AV_LOG_ERROR
, "avcC too short\n");
8142 av_log(avctx
, AV_LOG_ERROR
, "Unknown avcC version %d\n", *p
);
8145 /* sps and pps in the avcC always have length coded with 2 bytes,
8146 so put a fake nal_length_size = 2 while parsing them */
8147 h
->nal_length_size
= 2;
8148 // Decode sps from avcC
8149 cnt
= *(p
+5) & 0x1f; // Number of sps
8151 for (i
= 0; i
< cnt
; i
++) {
8152 nalsize
= BE_16(p
) + 2;
8153 if(decode_nal_units(h
, p
, nalsize
) < 0) {
8154 av_log(avctx
, AV_LOG_ERROR
, "Decoding sps %d from avcC failed\n", i
);
8159 // Decode pps from avcC
8160 cnt
= *(p
++); // Number of pps
8161 for (i
= 0; i
< cnt
; i
++) {
8162 nalsize
= BE_16(p
) + 2;
8163 if(decode_nal_units(h
, p
, nalsize
) != nalsize
) {
8164 av_log(avctx
, AV_LOG_ERROR
, "Decoding pps %d from avcC failed\n", i
);
8169 // Now store right nal length size, that will be use to parse all other nals
8170 h
->nal_length_size
= ((*(((char*)(avctx
->extradata
))+4))&0x03)+1;
8171 // Do not reparse avcC
8175 if(!h
->is_avc
&& s
->avctx
->extradata_size
&& s
->picture_number
==0){
8176 if(decode_nal_units(h
, s
->avctx
->extradata
, s
->avctx
->extradata_size
) < 0)
8180 buf_index
=decode_nal_units(h
, buf
, buf_size
);
8184 //FIXME do something with unavailable reference frames
8186 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
8187 if(!s
->current_picture_ptr
){
8188 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "error, NO frame\n");
8193 Picture
*out
= s
->current_picture_ptr
;
8194 #if 0 //decode order
8195 *data_size
= sizeof(AVFrame
);
8197 /* Sort B-frames into display order */
8198 Picture
*cur
= s
->current_picture_ptr
;
8199 Picture
*prev
= h
->delayed_output_pic
;
8200 int i
, pics
, cross_idr
, out_of_order
, out_idx
;
8202 if(h
->sps
.bitstream_restriction_flag
8203 && s
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
){
8204 s
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
8209 while(h
->delayed_pic
[pics
]) pics
++;
8210 h
->delayed_pic
[pics
++] = cur
;
8211 if(cur
->reference
== 0)
8215 for(i
=0; h
->delayed_pic
[i
]; i
++)
8216 if(h
->delayed_pic
[i
]->key_frame
|| h
->delayed_pic
[i
]->poc
==0)
8219 out
= h
->delayed_pic
[0];
8221 for(i
=1; h
->delayed_pic
[i
] && !h
->delayed_pic
[i
]->key_frame
; i
++)
8222 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
8223 out
= h
->delayed_pic
[i
];
8227 out_of_order
= !cross_idr
&& prev
&& out
->poc
< prev
->poc
;
8228 if(h
->sps
.bitstream_restriction_flag
&& s
->avctx
->has_b_frames
>= h
->sps
.num_reorder_frames
)
8230 else if(prev
&& pics
<= s
->avctx
->has_b_frames
)
8232 else if((out_of_order
&& pics
-1 == s
->avctx
->has_b_frames
&& pics
< 15)
8234 ((!cross_idr
&& prev
&& out
->poc
> prev
->poc
+ 2)
8235 || cur
->pict_type
== B_TYPE
)))
8238 s
->avctx
->has_b_frames
++;
8241 else if(out_of_order
)
8244 if(out_of_order
|| pics
> s
->avctx
->has_b_frames
){
8245 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
8246 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
8252 *data_size
= sizeof(AVFrame
);
8253 if(prev
&& prev
!= out
&& prev
->reference
== 1)
8254 prev
->reference
= 0;
8255 h
->delayed_output_pic
= out
;
8259 *pict
= *(AVFrame
*)out
;
8261 av_log(avctx
, AV_LOG_DEBUG
, "no picture\n");
8264 assert(pict
->data
[0] || !*data_size
);
8265 ff_print_debug_info(s
, pict
);
8266 //printf("out %d\n", (int)pict->data[0]);
8269 /* Return the Picture timestamp as the frame number */
8270 /* we substract 1 because it is added on utils.c */
8271 avctx
->frame_number
= s
->picture_number
- 1;
8273 return get_consumed_bytes(s
, buf_index
, buf_size
);
8276 static inline void fill_mb_avail(H264Context
*h
){
8277 MpegEncContext
* const s
= &h
->s
;
8278 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
8281 h
->mb_avail
[0]= s
->mb_x
&& h
->slice_table
[mb_xy
- s
->mb_stride
- 1] == h
->slice_num
;
8282 h
->mb_avail
[1]= h
->slice_table
[mb_xy
- s
->mb_stride
] == h
->slice_num
;
8283 h
->mb_avail
[2]= s
->mb_x
+1 < s
->mb_width
&& h
->slice_table
[mb_xy
- s
->mb_stride
+ 1] == h
->slice_num
;
8289 h
->mb_avail
[3]= s
->mb_x
&& h
->slice_table
[mb_xy
- 1] == h
->slice_num
;
8290 h
->mb_avail
[4]= 1; //FIXME move out
8291 h
->mb_avail
[5]= 0; //FIXME move out
8297 #define SIZE (COUNT*40)
8303 // int int_temp[10000];
8305 AVCodecContext avctx
;
8307 dsputil_init(&dsp
, &avctx
);
8309 init_put_bits(&pb
, temp
, SIZE
);
8310 printf("testing unsigned exp golomb\n");
8311 for(i
=0; i
<COUNT
; i
++){
8313 set_ue_golomb(&pb
, i
);
8314 STOP_TIMER("set_ue_golomb");
8316 flush_put_bits(&pb
);
8318 init_get_bits(&gb
, temp
, 8*SIZE
);
8319 for(i
=0; i
<COUNT
; i
++){
8322 s
= show_bits(&gb
, 24);
8325 j
= get_ue_golomb(&gb
);
8327 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
8330 STOP_TIMER("get_ue_golomb");
8334 init_put_bits(&pb
, temp
, SIZE
);
8335 printf("testing signed exp golomb\n");
8336 for(i
=0; i
<COUNT
; i
++){
8338 set_se_golomb(&pb
, i
- COUNT
/2);
8339 STOP_TIMER("set_se_golomb");
8341 flush_put_bits(&pb
);
8343 init_get_bits(&gb
, temp
, 8*SIZE
);
8344 for(i
=0; i
<COUNT
; i
++){
8347 s
= show_bits(&gb
, 24);
8350 j
= get_se_golomb(&gb
);
8351 if(j
!= i
- COUNT
/2){
8352 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
8355 STOP_TIMER("get_se_golomb");
8358 printf("testing 4x4 (I)DCT\n");
8361 uint8_t src
[16], ref
[16];
8362 uint64_t error
= 0, max_error
=0;
8364 for(i
=0; i
<COUNT
; i
++){
8366 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
8367 for(j
=0; j
<16; j
++){
8368 ref
[j
]= random()%255;
8369 src
[j
]= random()%255;
8372 h264_diff_dct_c(block
, src
, ref
, 4);
8375 for(j
=0; j
<16; j
++){
8376 // printf("%d ", block[j]);
8377 block
[j
]= block
[j
]*4;
8378 if(j
&1) block
[j
]= (block
[j
]*4 + 2)/5;
8379 if(j
&4) block
[j
]= (block
[j
]*4 + 2)/5;
8383 s
->dsp
.h264_idct_add(ref
, block
, 4);
8384 /* for(j=0; j<16; j++){
8385 printf("%d ", ref[j]);
8389 for(j
=0; j
<16; j
++){
8390 int diff
= ABS(src
[j
] - ref
[j
]);
8393 max_error
= FFMAX(max_error
, diff
);
8396 printf("error=%f max_error=%d\n", ((float)error
)/COUNT
/16, (int)max_error
);
8398 printf("testing quantizer\n");
8399 for(qp
=0; qp
<52; qp
++){
8401 src1_block
[i
]= src2_block
[i
]= random()%255;
8405 printf("Testing NAL layer\n");
8407 uint8_t bitstream
[COUNT
];
8408 uint8_t nal
[COUNT
*2];
8410 memset(&h
, 0, sizeof(H264Context
));
8412 for(i
=0; i
<COUNT
; i
++){
8420 for(j
=0; j
<COUNT
; j
++){
8421 bitstream
[j
]= (random() % 255) + 1;
8424 for(j
=0; j
<zeros
; j
++){
8425 int pos
= random() % COUNT
;
8426 while(bitstream
[pos
] == 0){
8435 nal_length
= encode_nal(&h
, nal
, bitstream
, COUNT
, COUNT
*2);
8437 printf("encoding failed\n");
8441 out
= decode_nal(&h
, nal
, &out_length
, &consumed
, nal_length
);
8445 if(out_length
!= COUNT
){
8446 printf("incorrect length %d %d\n", out_length
, COUNT
);
8450 if(consumed
!= nal_length
){
8451 printf("incorrect consumed length %d %d\n", nal_length
, consumed
);
8455 if(memcmp(bitstream
, out
, COUNT
)){
8456 printf("missmatch\n");
8461 printf("Testing RBSP\n");
8469 static int decode_end(AVCodecContext
*avctx
)
8471 H264Context
*h
= avctx
->priv_data
;
8472 MpegEncContext
*s
= &h
->s
;
8474 av_freep(&h
->rbsp_buffer
);
8475 free_tables(h
); //FIXME cleanup init stuff perhaps
8478 // memset(h, 0, sizeof(H264Context));
8484 AVCodec h264_decoder
= {
8488 sizeof(H264Context
),
8493 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1
| CODEC_CAP_TRUNCATED
| CODEC_CAP_DELAY
,
8497 AVCodecParser h264_parser
= {
8499 sizeof(H264Context
),