2 * DXVA2 H264 HW acceleration.
4 * copyright (c) 2009 Laurent Aimar
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "dxva2_internal.h"
27 struct dxva2_picture_context
{
28 DXVA_PicParams_H264 pp
;
31 DXVA_Slice_H264_Short slice_short
[MAX_SLICES
];
32 DXVA_Slice_H264_Long slice_long
[MAX_SLICES
];
33 const uint8_t *bitstream
;
34 unsigned bitstream_size
;
37 static void fill_picture_entry(DXVA_PicEntry_H264
*pic
,
38 unsigned index
, unsigned flag
)
40 assert((index
&0x7f) == index
&& (flag
&0x01) == flag
);
41 pic
->bPicEntry
= index
| (flag
<< 7);
44 static void fill_picture_parameters(struct dxva_context
*ctx
, const H264Context
*h
,
45 DXVA_PicParams_H264
*pp
)
47 const MpegEncContext
*s
= &h
->s
;
48 const Picture
*current_picture
= s
->current_picture_ptr
;
51 memset(pp
, 0, sizeof(*pp
));
52 /* Configure current picture */
53 fill_picture_entry(&pp
->CurrPic
,
54 ff_dxva2_get_surface_index(ctx
, current_picture
),
55 s
->picture_structure
== PICT_BOTTOM_FIELD
);
56 /* Configure the set of references */
57 pp
->UsedForReferenceFlags
= 0;
58 pp
->NonExistingFrameFlags
= 0;
59 for (i
= 0, j
= 0; i
< FF_ARRAY_ELEMS(pp
->RefFrameList
); i
++) {
61 if (j
< h
->short_ref_count
) {
62 r
= h
->short_ref
[j
++];
65 while (!r
&& j
< h
->short_ref_count
+ 16)
66 r
= h
->long_ref
[j
++ - h
->short_ref_count
];
69 fill_picture_entry(&pp
->RefFrameList
[i
],
70 ff_dxva2_get_surface_index(ctx
, r
),
73 if ((r
->f
.reference
& PICT_TOP_FIELD
) && r
->field_poc
[0] != INT_MAX
)
74 pp
->FieldOrderCntList
[i
][0] = r
->field_poc
[0];
75 if ((r
->f
.reference
& PICT_BOTTOM_FIELD
) && r
->field_poc
[1] != INT_MAX
)
76 pp
->FieldOrderCntList
[i
][1] = r
->field_poc
[1];
78 pp
->FrameNumList
[i
] = r
->long_ref
? r
->pic_id
: r
->frame_num
;
79 if (r
->f
.reference
& PICT_TOP_FIELD
)
80 pp
->UsedForReferenceFlags
|= 1 << (2*i
+ 0);
81 if (r
->f
.reference
& PICT_BOTTOM_FIELD
)
82 pp
->UsedForReferenceFlags
|= 1 << (2*i
+ 1);
84 pp
->RefFrameList
[i
].bPicEntry
= 0xff;
85 pp
->FieldOrderCntList
[i
][0] = 0;
86 pp
->FieldOrderCntList
[i
][1] = 0;
87 pp
->FrameNumList
[i
] = 0;
91 pp
->wFrameWidthInMbsMinus1
= s
->mb_width
- 1;
92 pp
->wFrameHeightInMbsMinus1
= s
->mb_height
- 1;
93 pp
->num_ref_frames
= h
->sps
.ref_frame_count
;
95 pp
->wBitFields
= ((s
->picture_structure
!= PICT_FRAME
) << 0) |
97 (s
->picture_structure
== PICT_FRAME
)) << 1) |
98 (h
->sps
.residual_color_transform_flag
<< 2) |
99 /* sp_for_switch_flag (not implemented by Libav) */
101 (h
->sps
.chroma_format_idc
<< 4) |
102 ((h
->nal_ref_idc
!= 0) << 6) |
103 (h
->pps
.constrained_intra_pred
<< 7) |
104 (h
->pps
.weighted_pred
<< 8) |
105 (h
->pps
.weighted_bipred_idc
<< 9) |
106 /* MbsConsecutiveFlag */
108 (h
->sps
.frame_mbs_only_flag
<< 12) |
109 (h
->pps
.transform_8x8_mode
<< 13) |
110 ((h
->sps
.level_idc
>= 31) << 14) |
111 /* IntraPicFlag (Modified if we detect a non
112 * intra slice in decode_slice) */
115 pp
->bit_depth_luma_minus8
= h
->sps
.bit_depth_luma
- 8;
116 pp
->bit_depth_chroma_minus8
= h
->sps
.bit_depth_chroma
- 8;
117 if (ctx
->workaround
& FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG
)
118 pp
->Reserved16Bits
= 0;
120 pp
->Reserved16Bits
= 3; /* FIXME is there a way to detect the right mode ? */
121 pp
->StatusReportFeedbackNumber
= 1 + ctx
->report_id
++;
122 pp
->CurrFieldOrderCnt
[0] = 0;
123 if ((s
->picture_structure
& PICT_TOP_FIELD
) &&
124 current_picture
->field_poc
[0] != INT_MAX
)
125 pp
->CurrFieldOrderCnt
[0] = current_picture
->field_poc
[0];
126 pp
->CurrFieldOrderCnt
[1] = 0;
127 if ((s
->picture_structure
& PICT_BOTTOM_FIELD
) &&
128 current_picture
->field_poc
[1] != INT_MAX
)
129 pp
->CurrFieldOrderCnt
[1] = current_picture
->field_poc
[1];
130 pp
->pic_init_qs_minus26
= h
->pps
.init_qs
- 26;
131 pp
->chroma_qp_index_offset
= h
->pps
.chroma_qp_index_offset
[0];
132 pp
->second_chroma_qp_index_offset
= h
->pps
.chroma_qp_index_offset
[1];
133 pp
->ContinuationFlag
= 1;
134 pp
->pic_init_qp_minus26
= h
->pps
.init_qp
- 26;
135 pp
->num_ref_idx_l0_active_minus1
= h
->pps
.ref_count
[0] - 1;
136 pp
->num_ref_idx_l1_active_minus1
= h
->pps
.ref_count
[1] - 1;
137 pp
->Reserved8BitsA
= 0;
138 pp
->frame_num
= h
->frame_num
;
139 pp
->log2_max_frame_num_minus4
= h
->sps
.log2_max_frame_num
- 4;
140 pp
->pic_order_cnt_type
= h
->sps
.poc_type
;
141 if (h
->sps
.poc_type
== 0)
142 pp
->log2_max_pic_order_cnt_lsb_minus4
= h
->sps
.log2_max_poc_lsb
- 4;
143 else if (h
->sps
.poc_type
== 1)
144 pp
->delta_pic_order_always_zero_flag
= h
->sps
.delta_pic_order_always_zero_flag
;
145 pp
->direct_8x8_inference_flag
= h
->sps
.direct_8x8_inference_flag
;
146 pp
->entropy_coding_mode_flag
= h
->pps
.cabac
;
147 pp
->pic_order_present_flag
= h
->pps
.pic_order_present
;
148 pp
->num_slice_groups_minus1
= h
->pps
.slice_group_count
- 1;
149 pp
->slice_group_map_type
= h
->pps
.mb_slice_group_map_type
;
150 pp
->deblocking_filter_control_present_flag
= h
->pps
.deblocking_filter_parameters_present
;
151 pp
->redundant_pic_cnt_present_flag
= h
->pps
.redundant_pic_cnt_present
;
152 pp
->Reserved8BitsB
= 0;
153 pp
->slice_group_change_rate_minus1
= 0; /* XXX not implemented by Libav */
154 //pp->SliceGroupMap[810]; /* XXX not implemented by Libav */
157 static void fill_scaling_lists(struct dxva_context
*ctx
, const H264Context
*h
, DXVA_Qmatrix_H264
*qm
)
160 memset(qm
, 0, sizeof(*qm
));
161 if (ctx
->workaround
& FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG
) {
162 for (i
= 0; i
< 6; i
++)
163 for (j
= 0; j
< 16; j
++)
164 qm
->bScalingLists4x4
[i
][j
] = h
->pps
.scaling_matrix4
[i
][j
];
166 for (i
= 0; i
< 64; i
++) {
167 qm
->bScalingLists8x8
[0][i
] = h
->pps
.scaling_matrix8
[0][i
];
168 qm
->bScalingLists8x8
[1][i
] = h
->pps
.scaling_matrix8
[3][i
];
171 for (i
= 0; i
< 6; i
++)
172 for (j
= 0; j
< 16; j
++)
173 qm
->bScalingLists4x4
[i
][j
] = h
->pps
.scaling_matrix4
[i
][zigzag_scan
[j
]];
175 for (i
= 0; i
< 64; i
++) {
176 qm
->bScalingLists8x8
[0][i
] = h
->pps
.scaling_matrix8
[0][ff_zigzag_direct
[i
]];
177 qm
->bScalingLists8x8
[1][i
] = h
->pps
.scaling_matrix8
[3][ff_zigzag_direct
[i
]];
182 static int is_slice_short(struct dxva_context
*ctx
)
184 assert(ctx
->cfg
->ConfigBitstreamRaw
== 1 ||
185 ctx
->cfg
->ConfigBitstreamRaw
== 2);
186 return ctx
->cfg
->ConfigBitstreamRaw
== 2;
189 static void fill_slice_short(DXVA_Slice_H264_Short
*slice
,
190 unsigned position
, unsigned size
)
192 memset(slice
, 0, sizeof(*slice
));
193 slice
->BSNALunitDataLocation
= position
;
194 slice
->SliceBytesInBuffer
= size
;
195 slice
->wBadSliceChopping
= 0;
198 static void fill_slice_long(AVCodecContext
*avctx
, DXVA_Slice_H264_Long
*slice
,
199 unsigned position
, unsigned size
)
201 const H264Context
*h
= avctx
->priv_data
;
202 struct dxva_context
*ctx
= avctx
->hwaccel_context
;
203 const MpegEncContext
*s
= &h
->s
;
206 memset(slice
, 0, sizeof(*slice
));
207 slice
->BSNALunitDataLocation
= position
;
208 slice
->SliceBytesInBuffer
= size
;
209 slice
->wBadSliceChopping
= 0;
211 slice
->first_mb_in_slice
= (s
->mb_y
>> FIELD_OR_MBAFF_PICTURE
) * s
->mb_width
+ s
->mb_x
;
212 slice
->NumMbsForSlice
= 0; /* XXX it is set once we have all slices */
213 slice
->BitOffsetToSliceData
= get_bits_count(&s
->gb
);
214 slice
->slice_type
= ff_h264_get_slice_type(h
);
215 if (h
->slice_type_fixed
)
216 slice
->slice_type
+= 5;
217 slice
->luma_log2_weight_denom
= h
->luma_log2_weight_denom
;
218 slice
->chroma_log2_weight_denom
= h
->chroma_log2_weight_denom
;
219 if (h
->list_count
> 0)
220 slice
->num_ref_idx_l0_active_minus1
= h
->ref_count
[0] - 1;
221 if (h
->list_count
> 1)
222 slice
->num_ref_idx_l1_active_minus1
= h
->ref_count
[1] - 1;
223 slice
->slice_alpha_c0_offset_div2
= h
->slice_alpha_c0_offset
/ 2 - 26;
224 slice
->slice_beta_offset_div2
= h
->slice_beta_offset
/ 2 - 26;
225 slice
->Reserved8Bits
= 0;
227 for (list
= 0; list
< 2; list
++) {
229 for (i
= 0; i
< FF_ARRAY_ELEMS(slice
->RefPicList
[list
]); i
++) {
230 if (list
< h
->list_count
&& i
< h
->ref_count
[list
]) {
231 const Picture
*r
= &h
->ref_list
[list
][i
];
233 fill_picture_entry(&slice
->RefPicList
[list
][i
],
234 ff_dxva2_get_surface_index(ctx
, r
),
235 r
->f
.reference
== PICT_BOTTOM_FIELD
);
236 for (plane
= 0; plane
< 3; plane
++) {
238 if (plane
== 0 && h
->luma_weight_flag
[list
]) {
239 w
= h
->luma_weight
[i
][list
][0];
240 o
= h
->luma_weight
[i
][list
][1];
241 } else if (plane
>= 1 && h
->chroma_weight_flag
[list
]) {
242 w
= h
->chroma_weight
[i
][list
][plane
-1][0];
243 o
= h
->chroma_weight
[i
][list
][plane
-1][1];
245 w
= 1 << (plane
== 0 ? h
->luma_log2_weight_denom
:
246 h
->chroma_log2_weight_denom
);
249 slice
->Weights
[list
][i
][plane
][0] = w
;
250 slice
->Weights
[list
][i
][plane
][1] = o
;
254 slice
->RefPicList
[list
][i
].bPicEntry
= 0xff;
255 for (plane
= 0; plane
< 3; plane
++) {
256 slice
->Weights
[list
][i
][plane
][0] = 0;
257 slice
->Weights
[list
][i
][plane
][1] = 0;
262 slice
->slice_qs_delta
= 0; /* XXX not implemented by Libav */
263 slice
->slice_qp_delta
= s
->qscale
- h
->pps
.init_qp
;
264 slice
->redundant_pic_cnt
= h
->redundant_pic_count
;
265 if (h
->slice_type
== AV_PICTURE_TYPE_B
)
266 slice
->direct_spatial_mv_pred_flag
= h
->direct_spatial_mv_pred
;
267 slice
->cabac_init_idc
= h
->pps
.cabac
? h
->cabac_init_idc
: 0;
268 if (h
->deblocking_filter
< 2)
269 slice
->disable_deblocking_filter_idc
= 1 - h
->deblocking_filter
;
271 slice
->disable_deblocking_filter_idc
= h
->deblocking_filter
;
272 slice
->slice_id
= h
->current_slice
- 1;
275 static int commit_bitstream_and_slice_buffer(AVCodecContext
*avctx
,
276 DXVA2_DecodeBufferDesc
*bs
,
277 DXVA2_DecodeBufferDesc
*sc
)
279 const H264Context
*h
= avctx
->priv_data
;
280 const MpegEncContext
*s
= &h
->s
;
281 const unsigned mb_count
= s
->mb_width
* s
->mb_height
;
282 struct dxva_context
*ctx
= avctx
->hwaccel_context
;
283 const Picture
*current_picture
= h
->s
.current_picture_ptr
;
284 struct dxva2_picture_context
*ctx_pic
= current_picture
->f
.hwaccel_picture_private
;
285 DXVA_Slice_H264_Short
*slice
= NULL
;
286 uint8_t *dxva_data
, *current
, *end
;
293 /* Create an annex B bitstream buffer with only slice NAL and finalize slice */
294 if (FAILED(IDirectXVideoDecoder_GetBuffer(ctx
->decoder
,
295 DXVA2_BitStreamDateBufferType
,
296 &dxva_data
, &dxva_size
)))
299 end
= dxva_data
+ dxva_size
;
301 for (i
= 0; i
< ctx_pic
->slice_count
; i
++) {
302 static const uint8_t start_code
[] = { 0, 0, 1 };
303 static const unsigned start_code_size
= sizeof(start_code
);
304 unsigned position
, size
;
306 assert(offsetof(DXVA_Slice_H264_Short
, BSNALunitDataLocation
) ==
307 offsetof(DXVA_Slice_H264_Long
, BSNALunitDataLocation
));
308 assert(offsetof(DXVA_Slice_H264_Short
, SliceBytesInBuffer
) ==
309 offsetof(DXVA_Slice_H264_Long
, SliceBytesInBuffer
));
311 if (is_slice_short(ctx
))
312 slice
= &ctx_pic
->slice_short
[i
];
314 slice
= (DXVA_Slice_H264_Short
*)&ctx_pic
->slice_long
[i
];
316 position
= slice
->BSNALunitDataLocation
;
317 size
= slice
->SliceBytesInBuffer
;
318 if (start_code_size
+ size
> end
- current
) {
319 av_log(avctx
, AV_LOG_ERROR
, "Failed to build bitstream");
323 slice
->BSNALunitDataLocation
= current
- dxva_data
;
324 slice
->SliceBytesInBuffer
= start_code_size
+ size
;
326 if (!is_slice_short(ctx
)) {
327 DXVA_Slice_H264_Long
*slice_long
= (DXVA_Slice_H264_Long
*)slice
;
328 if (i
< ctx_pic
->slice_count
- 1)
329 slice_long
->NumMbsForSlice
=
330 slice_long
[1].first_mb_in_slice
- slice_long
[0].first_mb_in_slice
;
332 slice_long
->NumMbsForSlice
= mb_count
- slice_long
->first_mb_in_slice
;
335 memcpy(current
, start_code
, start_code_size
);
336 current
+= start_code_size
;
338 memcpy(current
, &ctx_pic
->bitstream
[position
], size
);
341 padding
= FFMIN(128 - ((current
- dxva_data
) & 127), end
- current
);
342 if (slice
&& padding
> 0) {
343 memset(current
, 0, padding
);
346 slice
->SliceBytesInBuffer
+= padding
;
348 if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(ctx
->decoder
,
349 DXVA2_BitStreamDateBufferType
)))
351 if (i
< ctx_pic
->slice_count
)
354 memset(bs
, 0, sizeof(*bs
));
355 bs
->CompressedBufferType
= DXVA2_BitStreamDateBufferType
;
356 bs
->DataSize
= current
- dxva_data
;
357 bs
->NumMBsInBuffer
= mb_count
;
359 if (is_slice_short(ctx
)) {
360 slice_data
= ctx_pic
->slice_short
;
361 slice_size
= ctx_pic
->slice_count
* sizeof(*ctx_pic
->slice_short
);
363 slice_data
= ctx_pic
->slice_long
;
364 slice_size
= ctx_pic
->slice_count
* sizeof(*ctx_pic
->slice_long
);
366 assert((bs
->DataSize
& 127) == 0);
367 return ff_dxva2_commit_buffer(avctx
, ctx
, sc
,
368 DXVA2_SliceControlBufferType
,
369 slice_data
, slice_size
, mb_count
);
373 static int start_frame(AVCodecContext
*avctx
,
374 av_unused
const uint8_t *buffer
,
375 av_unused
uint32_t size
)
377 const H264Context
*h
= avctx
->priv_data
;
378 struct dxva_context
*ctx
= avctx
->hwaccel_context
;
379 struct dxva2_picture_context
*ctx_pic
= h
->s
.current_picture_ptr
->f
.hwaccel_picture_private
;
381 if (!ctx
->decoder
|| !ctx
->cfg
|| ctx
->surface_count
<= 0)
385 /* Fill up DXVA_PicParams_H264 */
386 fill_picture_parameters(ctx
, h
, &ctx_pic
->pp
);
388 /* Fill up DXVA_Qmatrix_H264 */
389 fill_scaling_lists(ctx
, h
, &ctx_pic
->qm
);
391 ctx_pic
->slice_count
= 0;
392 ctx_pic
->bitstream_size
= 0;
393 ctx_pic
->bitstream
= NULL
;
397 static int decode_slice(AVCodecContext
*avctx
,
398 const uint8_t *buffer
, uint32_t size
)
400 const H264Context
*h
= avctx
->priv_data
;
401 struct dxva_context
*ctx
= avctx
->hwaccel_context
;
402 const Picture
*current_picture
= h
->s
.current_picture_ptr
;
403 struct dxva2_picture_context
*ctx_pic
= current_picture
->f
.hwaccel_picture_private
;
406 if (ctx_pic
->slice_count
>= MAX_SLICES
)
409 if (!ctx_pic
->bitstream
)
410 ctx_pic
->bitstream
= buffer
;
411 ctx_pic
->bitstream_size
+= size
;
413 position
= buffer
- ctx_pic
->bitstream
;
414 if (is_slice_short(ctx
))
415 fill_slice_short(&ctx_pic
->slice_short
[ctx_pic
->slice_count
],
418 fill_slice_long(avctx
, &ctx_pic
->slice_long
[ctx_pic
->slice_count
],
420 ctx_pic
->slice_count
++;
422 if (h
->slice_type
!= AV_PICTURE_TYPE_I
&& h
->slice_type
!= AV_PICTURE_TYPE_SI
)
423 ctx_pic
->pp
.wBitFields
&= ~(1 << 15); /* Set IntraPicFlag to 0 */
427 static int end_frame(AVCodecContext
*avctx
)
429 H264Context
*h
= avctx
->priv_data
;
430 MpegEncContext
*s
= &h
->s
;
431 struct dxva2_picture_context
*ctx_pic
=
432 h
->s
.current_picture_ptr
->f
.hwaccel_picture_private
;
434 if (ctx_pic
->slice_count
<= 0 || ctx_pic
->bitstream_size
<= 0)
436 return ff_dxva2_common_end_frame(avctx
, s
,
437 &ctx_pic
->pp
, sizeof(ctx_pic
->pp
),
438 &ctx_pic
->qm
, sizeof(ctx_pic
->qm
),
439 commit_bitstream_and_slice_buffer
);
442 AVHWAccel ff_h264_dxva2_hwaccel
= {
443 .name
= "h264_dxva2",
444 .type
= AVMEDIA_TYPE_VIDEO
,
445 .id
= AV_CODEC_ID_H264
,
446 .pix_fmt
= AV_PIX_FMT_DXVA2_VLD
,
447 .start_frame
= start_frame
,
448 .decode_slice
= decode_slice
,
449 .end_frame
= end_frame
,
450 .priv_data_size
= sizeof(struct dxva2_picture_context
),