1 /*****************************************************************************
2 * va.c: Video Acceleration helpers
3 *****************************************************************************
4 * Copyright (C) 2009 Geoffroy Couprie
5 * Copyright (C) 2009 Laurent Aimar
8 * Authors: Geoffroy Couprie <geal@videolan.org>
9 * Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
30 #include <vlc_common.h>
31 #include <vlc_picture.h>
32 #include <vlc_fourcc.h>
36 #ifdef HAVE_LIBAVCODEC_AVCODEC_H
37 # include <libavcodec/avcodec.h>
38 # ifdef HAVE_AVCODEC_DXVA2
39 # define DXVA2API_USE_BITFIELDS
40 # include <libavcodec/dxva2.h>
42 #elif defined(HAVE_FFMPEG_AVCODEC_H)
43 # include <ffmpeg/avcodec.h>
51 #ifdef HAVE_AVCODEC_DXVA2
61 #define CoTaskMemFree(x)
64 #define DXVA2_E_NOT_INITIALIZED MAKE_HRESULT(1, 4, 4096)
65 #define DXVA2_E_NEW_VIDEO_DEVICE MAKE_HRESULT(1, 4, 4097)
66 #define DXVA2_E_VIDEO_DEVICE_LOCKED MAKE_HRESULT(1, 4, 4098)
67 #define DXVA2_E_NOT_AVAILABLE MAKE_HRESULT(1, 4, 4099)
69 static const GUID DXVA2_ModeMPEG2_MoComp
= {
70 0xe6a9f44b, 0x61b0, 0x4563, {0x9e,0xa4,0x63,0xd2,0xa3,0xc6,0xfe,0x66}
72 static const GUID DXVA2_ModeMPEG2_IDCT
= {
73 0xbf22ad00, 0x03ea, 0x4690, {0x80,0x77,0x47,0x33,0x46,0x20,0x9b,0x7e}
75 static const GUID DXVA2_ModeMPEG2_VLD
= {
76 0xee27417f, 0x5e28, 0x4e65, {0xbe,0xea,0x1d,0x26,0xb5,0x08,0xad,0xc9}
79 static const GUID DXVA2_ModeH264_A
= {
80 0x1b81be64, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
82 static const GUID DXVA2_ModeH264_B
= {
83 0x1b81be65, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
85 static const GUID DXVA2_ModeH264_C
= {
86 0x1b81be66, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
88 static const GUID DXVA2_ModeH264_D
= {
89 0x1b81be67, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
91 static const GUID DXVA2_ModeH264_E
= {
92 0x1b81be68, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
94 static const GUID DXVA2_ModeH264_F
= {
95 0x1b81be69, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
97 static const GUID DXVADDI_Intel_ModeH264_A
= {
98 0x604F8E64, 0x4951,0x4c54, {0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6}
100 static const GUID DXVADDI_Intel_ModeH264_C
= {
101 0x604F8E66,0x4951, 0x4c54, {0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6}
103 static const GUID DXVADDI_Intel_ModeH264_E
= {
104 0x604F8E68,0x4951, 0x4c54, {0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6}
106 static const GUID DXVA2_ModeWMV8_A
= {
107 0x1b81be80, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
109 static const GUID DXVA2_ModeWMV8_B
= {
110 0x1b81be81, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
112 static const GUID DXVA2_ModeWMV9_A
= {
113 0x1b81be90, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
115 static const GUID DXVA2_ModeWMV9_B
= {
116 0x1b81be91, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
118 static const GUID DXVA2_ModeWMV9_C
= {
119 0x1b81be94, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
122 static const GUID DXVA2_ModeVC1_A
= {
123 0x1b81beA0, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
125 static const GUID DXVA2_ModeVC1_B
= {
126 0x1b81beA1, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
128 static const GUID DXVA2_ModeVC1_C
= {
129 0x1b81beA2, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
131 static const GUID DXVA2_ModeVC1_D
= {
132 0x1b81beA3, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
141 /* XXX Prefered modes must come first */
142 static const dxva2_mode_t dxva2_modes
[] = {
143 { "DXVA2_ModeMPEG2_VLD", &DXVA2_ModeMPEG2_VLD
, 0 },
144 { "DXVA2_ModeMPEG2_MoComp", &DXVA2_ModeMPEG2_MoComp
, 0 },
145 { "DXVA2_ModeMPEG2_IDCT", &DXVA2_ModeMPEG2_IDCT
, 0 },
147 { "H.264 variable-length decoder (VLD), FGT", &DXVA2_ModeH264_F
, CODEC_ID_H264
},
148 { "H.264 VLD, no FGT", &DXVA2_ModeH264_E
, CODEC_ID_H264
},
149 { "H.264 VLD, no FGT (Intel)", &DXVADDI_Intel_ModeH264_E
, CODEC_ID_H264
},
150 { "H.264 IDCT, FGT", &DXVA2_ModeH264_D
, 0 },
151 { "H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVA2_ModeH264_C
, 0 },
152 { "H.264 inverse discrete cosine transform (IDCT), no FGT (Intel)", &DXVADDI_Intel_ModeH264_C
, 0 },
153 { "H.264 MoComp, FGT", &DXVA2_ModeH264_B
, 0 },
154 { "H.264 motion compensation (MoComp), no FGT", &DXVA2_ModeH264_A
, 0 },
155 { "H.264 motion compensation (MoComp), no FGT (Intel)", &DXVADDI_Intel_ModeH264_A
, 0 },
157 { "Windows Media Video 8 MoComp", &DXVA2_ModeWMV8_B
, 0 },
158 { "Windows Media Video 8 post processing", &DXVA2_ModeWMV8_A
, 0 },
160 { "Windows Media Video 9 IDCT", &DXVA2_ModeWMV9_C
, 0 },
161 { "Windows Media Video 9 MoComp", &DXVA2_ModeWMV9_B
, 0 },
162 { "Windows Media Video 9 post processing", &DXVA2_ModeWMV9_A
, 0 },
164 { "VC-1 VLD", &DXVA2_ModeVC1_D
, CODEC_ID_VC1
},
165 { "VC-1 VLD", &DXVA2_ModeVC1_D
, CODEC_ID_WMV3
},
166 { "VC-1 IDCT", &DXVA2_ModeVC1_C
, 0 },
167 { "VC-1 MoComp", &DXVA2_ModeVC1_B
, 0 },
168 { "VC-1 post processing", &DXVA2_ModeVC1_A
, 0 },
173 static const dxva2_mode_t
*Dxva2FindMode(const GUID
*guid
)
175 for (unsigned i
= 0; dxva2_modes
[i
].name
; i
++) {
176 if (IsEqualGUID(dxva2_modes
[i
].guid
, guid
))
177 return &dxva2_modes
[i
];
188 /* XXX Prefered format must come first */
189 static const d3d_format_t d3d_formats
[] = {
190 { "YV12", MAKEFOURCC('Y','V','1','2'), VLC_CODEC_YV12
},
191 { "NV12", MAKEFOURCC('N','V','1','2'), VLC_CODEC_NV12
},
196 static const d3d_format_t
*D3dFindFormat(D3DFORMAT format
)
198 for (unsigned i
= 0; d3d_formats
[i
].name
; i
++) {
199 if (d3d_formats
[i
].format
== format
)
200 return &d3d_formats
[i
];
205 static const GUID IID_IDirectXVideoDecoderService
= {
206 0xfc51a551, 0xd5e7, 0x11d9, {0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02}
208 static const GUID IID_IDirectXVideoAccelerationService
= {
209 0xfc51a550, 0xd5e7, 0x11d9, {0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02}
214 LPDIRECT3DSURFACE9 d3d
;
219 #define VA_DXVA2_MAX_SURFACE_COUNT (64)
231 HINSTANCE hdxva2_dll
;
234 D3DPRESENT_PARAMETERS d3dpp
;
236 D3DADAPTER_IDENTIFIER9 d3dai
;
237 LPDIRECT3DDEVICE9 d3ddev
;
241 IDirect3DDeviceManager9
*devmng
;
245 IDirectXVideoDecoderService
*vs
;
250 DXVA2_ConfigPictureDecode cfg
;
251 IDirectXVideoDecoder
*decoder
;
253 /* Option conversion */
255 uint8_t *surface_cache_base
;
256 uint8_t *surface_cache
;
257 size_t surface_cache_size
;
260 struct dxva_context hw
;
263 unsigned surface_count
;
264 unsigned surface_order
;
267 vlc_fourcc_t surface_chroma
;
269 vlc_va_surface_t surface
[VA_DXVA2_MAX_SURFACE_COUNT
];
270 LPDIRECT3DSURFACE9 hw_surface
[VA_DXVA2_MAX_SURFACE_COUNT
];
274 static vlc_va_dxva2_t
*vlc_va_dxva2_Get(void *external
)
276 assert(external
== (void*)(&((vlc_va_dxva2_t
*)external
)->va
));
281 static int D3dCreateDevice(vlc_va_dxva2_t
*);
282 static void D3dDestroyDevice(vlc_va_dxva2_t
*);
283 static char *DxDescribe(vlc_va_dxva2_t
*);
285 static int D3dCreateDeviceManager(vlc_va_dxva2_t
*);
286 static void D3dDestroyDeviceManager(vlc_va_dxva2_t
*);
288 static int DxCreateVideoService(vlc_va_dxva2_t
*);
289 static void DxDestroyVideoService(vlc_va_dxva2_t
*);
290 static int DxFindVideoServiceConversion(vlc_va_dxva2_t
*, GUID
*input
, D3DFORMAT
*output
);
292 static int DxCreateVideoDecoder(vlc_va_dxva2_t
*,
293 int codec_id
, const video_format_t
*);
294 static void DxDestroyVideoDecoder(vlc_va_dxva2_t
*);
295 static int DxResetVideoDecoder(vlc_va_dxva2_t
*);
297 static void DxCreateVideoConversion(vlc_va_dxva2_t
*);
298 static void DxDestroyVideoConversion(vlc_va_dxva2_t
*);
300 static void CopyFromNv12(picture_t
*dst
, const D3DLOCKED_RECT
*src
,
301 uint8_t *cache
, size_t cache_size
,
302 unsigned width
, unsigned height
);
303 static void CopyFromYv12(picture_t
*dst
, const D3DLOCKED_RECT
*src
,
304 uint8_t *cache
, size_t cache_size
,
305 unsigned width
, unsigned height
);
308 static int Setup(vlc_va_t
*external
, void **hw
, vlc_fourcc_t
*chroma
,
309 int width
, int height
)
311 vlc_va_dxva2_t
*va
= vlc_va_dxva2_Get(external
);
313 if (va
->surface_width
== width
&&
314 va
->surface_height
== height
)
318 DxDestroyVideoConversion(va
);
319 DxDestroyVideoDecoder(va
);
323 if (width
<= 0 || height
<= 0)
326 /* FIXME transmit a video_format_t by VaSetup directly */
328 memset(&fmt
, 0, sizeof(fmt
));
330 fmt
.i_height
= height
;
332 if (DxCreateVideoDecoder(va
, va
->codec_id
, &fmt
))
335 va
->hw
.decoder
= va
->decoder
;
336 va
->hw
.cfg
= &va
->cfg
;
337 va
->hw
.surface_count
= va
->surface_count
;
338 va
->hw
.surface
= va
->hw_surface
;
339 for (unsigned i
= 0; i
< va
->surface_count
; i
++)
340 va
->hw
.surface
[i
] = va
->surface
[i
].d3d
;
343 DxCreateVideoConversion(va
);
348 const d3d_format_t
*output
= D3dFindFormat(va
->output
);
349 *chroma
= output
->codec
;
354 static int Extract(vlc_va_t
*external
, picture_t
*picture
, AVFrame
*ff
)
356 vlc_va_dxva2_t
*va
= vlc_va_dxva2_Get(external
);
357 LPDIRECT3DSURFACE9 d3d
= (LPDIRECT3DSURFACE9
)(uintptr_t)ff
->data
[3];
359 if (!va
->surface_cache
)
363 assert(va
->output
== MAKEFOURCC('Y','V','1','2'));
367 if (FAILED(IDirect3DSurface9_LockRect(d3d
, &lock
, NULL
, D3DLOCK_READONLY
))) {
368 msg_Err(va
->log
, "Failed to lock surface");
372 if (va
->render
== MAKEFOURCC('Y','V','1','2')) {
373 CopyFromYv12(picture
, &lock
,
374 va
->surface_cache
, va
->surface_cache_size
,
375 va
->surface_width
, va
->surface_height
);
377 assert(va
->render
== MAKEFOURCC('N','V','1','2'));
378 CopyFromNv12(picture
, &lock
,
379 va
->surface_cache
, va
->surface_cache_size
,
380 va
->surface_width
, va
->surface_height
);
384 IDirect3DSurface9_UnlockRect(d3d
);
387 /* FIXME it is nearly common with VAAPI */
388 static int Get(vlc_va_t
*external
, AVFrame
*ff
)
390 vlc_va_dxva2_t
*va
= vlc_va_dxva2_Get(external
);
392 /* Check the device */
393 HRESULT hr
= IDirect3DDeviceManager9_TestDevice(va
->devmng
, va
->device
);
394 if (hr
== DXVA2_E_NEW_VIDEO_DEVICE
) {
395 if (DxResetVideoDecoder(va
))
397 } else if (FAILED(hr
)) {
398 msg_Err(va
->log
, "IDirect3DDeviceManager9_TestDevice %u", (unsigned)hr
);
402 /* Grab an unused surface, in case none are, try the oldest
403 * XXX using the oldest is a workaround in case a problem happens with ffmpeg */
405 for (i
= 0, old
= 0; i
< va
->surface_count
; i
++) {
406 vlc_va_surface_t
*surface
= &va
->surface
[i
];
408 if (!surface
->refcount
)
411 if (surface
->order
< va
->surface
[old
].order
)
414 if (i
>= va
->surface_count
)
417 vlc_va_surface_t
*surface
= &va
->surface
[i
];
419 surface
->refcount
= 1;
420 surface
->order
= va
->surface_order
++;
423 for (int i
= 0; i
< 4; i
++) {
427 if (i
== 0 || i
== 3)
428 ff
->data
[i
] = (void*)surface
->d3d
;/* Yummie */
432 static void Release(vlc_va_t
*external
, AVFrame
*ff
)
434 vlc_va_dxva2_t
*va
= vlc_va_dxva2_Get(external
);
435 LPDIRECT3DSURFACE9 d3d
= (LPDIRECT3DSURFACE9
)(uintptr_t)ff
->data
[3];
437 for (unsigned i
= 0; i
< va
->surface_count
; i
++) {
438 vlc_va_surface_t
*surface
= &va
->surface
[i
];
440 if (surface
->d3d
== d3d
)
444 static void Close(vlc_va_t
*external
)
446 vlc_va_dxva2_t
*va
= vlc_va_dxva2_Get(external
);
448 DxDestroyVideoConversion(va
);
449 DxDestroyVideoDecoder(va
);
450 DxDestroyVideoService(va
);
451 D3dDestroyDeviceManager(va
);
452 D3dDestroyDevice(va
);
455 FreeLibrary(va
->hdxva2_dll
);
457 FreeLibrary(va
->hd3d9_dll
);
459 free(va
->va
.description
);
463 vlc_va_t
*vlc_va_NewDxva2(vlc_object_t
*log
, int codec_id
)
465 vlc_va_dxva2_t
*va
= calloc(1, sizeof(*va
));
471 va
->codec_id
= codec_id
;
474 va
->hd3d9_dll
= LoadLibrary(TEXT("D3D9.DLL"));
475 if (!va
->hd3d9_dll
) {
476 msg_Warn(va
->log
, "cannot load d3d9.dll");
479 va
->hdxva2_dll
= LoadLibrary(TEXT("DXVA2.DLL"));
480 if (!va
->hdxva2_dll
) {
481 msg_Warn(va
->log
, "cannot load dxva2.dll");
484 msg_Dbg(va
->log
, "DLLs loaded");
487 if (D3dCreateDevice(va
)) {
488 msg_Err(va
->log
, "Failed to create Direct3D device");
491 msg_Dbg(va
->log
, "D3dCreateDevice succeed");
493 if (D3dCreateDeviceManager(va
)) {
494 msg_Err(va
->log
, "D3dCreateDeviceManager failed");
498 if (DxCreateVideoService(va
)) {
499 msg_Err(va
->log
, "DxCreateVideoService failed");
504 if (DxFindVideoServiceConversion(va
, &va
->input
, &va
->render
)) {
505 msg_Err(va
->log
, "DxFindVideoServiceConversion failed");
509 /* TODO print the hardware name/vendor for debugging purposes */
510 va
->va
.description
= DxDescribe(va
);
511 va
->va
.setup
= Setup
;
513 va
->va
.release
= Release
;
514 va
->va
.extract
= Extract
;
515 va
->va
.close
= Close
;
525 * It creates a Direct3D device usable for DXVA 2
527 static int D3dCreateDevice(vlc_va_dxva2_t
*va
)
530 LPDIRECT3D9 (WINAPI
*Create9
)(UINT SDKVersion
);
531 Create9
= (void *)GetProcAddress(va
->hd3d9_dll
,
532 TEXT("Direct3DCreate9"));
534 msg_Err(va
->log
, "Cannot locate reference to Direct3DCreate9 ABI in DLL");
540 d3dobj
= Create9(D3D_SDK_VERSION
);
542 msg_Err(va
->log
, "Direct3DCreate9 failed");
548 D3DADAPTER_IDENTIFIER9
*d3dai
= &va
->d3dai
;
549 if (FAILED(IDirect3D9_GetAdapterIdentifier(va
->d3dobj
,
550 D3DADAPTER_DEFAULT
, 0, d3dai
))) {
551 msg_Warn(va
->log
, "IDirect3D9_GetAdapterIdentifier failed");
552 ZeroMemory(d3dai
, sizeof(*d3dai
));
556 D3DPRESENT_PARAMETERS
*d3dpp
= &va
->d3dpp
;
557 ZeroMemory(d3dpp
, sizeof(*d3dpp
));
558 d3dpp
->Flags
= D3DPRESENTFLAG_VIDEO
;
559 d3dpp
->Windowed
= TRUE
;
560 d3dpp
->hDeviceWindow
= NULL
;
561 d3dpp
->SwapEffect
= D3DSWAPEFFECT_DISCARD
;
562 d3dpp
->MultiSampleType
= D3DMULTISAMPLE_NONE
;
563 d3dpp
->PresentationInterval
= D3DPRESENT_INTERVAL_DEFAULT
;
564 d3dpp
->BackBufferCount
= 0; /* FIXME what to put here */
565 d3dpp
->BackBufferFormat
= D3DFMT_X8R8G8B8
; /* FIXME what to put here */
566 d3dpp
->BackBufferWidth
= 0;
567 d3dpp
->BackBufferHeight
= 0;
568 d3dpp
->EnableAutoDepthStencil
= FALSE
;
570 /* Direct3D needs a HWND to create a device, even without using ::Present
571 this HWND is used to alert Direct3D when there's a change of focus window.
572 For now, use GetShellWindow, as it looks harmless */
573 LPDIRECT3DDEVICE9 d3ddev
;
574 if (FAILED(IDirect3D9_CreateDevice(d3dobj
, D3DADAPTER_DEFAULT
,
575 D3DDEVTYPE_HAL
, GetShellWindow(),
576 D3DCREATE_SOFTWARE_VERTEXPROCESSING
|
577 D3DCREATE_MULTITHREADED
,
579 msg_Err(va
->log
, "IDirect3D9_CreateDevice failed");
587 * It releases a Direct3D device and its resources.
589 static void D3dDestroyDevice(vlc_va_dxva2_t
*va
)
592 IDirect3DDevice9_Release(va
->d3ddev
);
594 IDirect3D9_Release(va
->d3dobj
);
597 * It describes our Direct3D object
599 static char *DxDescribe(vlc_va_dxva2_t
*va
)
601 static const struct {
606 { 0x10DE, "NVIDIA" },
608 { 0x5333, "S3 Graphics" },
611 D3DADAPTER_IDENTIFIER9
*id
= &va
->d3dai
;
613 const char *vendor
= "Unknown";
614 for (int i
= 0; vendors
[i
].id
!= 0; i
++) {
615 if (vendors
[i
].id
== id
->VendorId
) {
616 vendor
= vendors
[i
].name
;
622 if (asprintf(&description
, "DXVA2 (%.*s, vendor %d(%s), device %d, revision %d)",
623 sizeof(id
->Description
), id
->Description
,
624 id
->VendorId
, vendor
, id
->DeviceId
, id
->Revision
) < 0)
630 * It creates a Direct3D device manager
632 static int D3dCreateDeviceManager(vlc_va_dxva2_t
*va
)
634 HRESULT (WINAPI
*CreateDeviceManager9
)(UINT
*pResetToken
,
635 IDirect3DDeviceManager9
**);
636 CreateDeviceManager9
=
637 (void *)GetProcAddress(va
->hdxva2_dll
,
638 TEXT("DXVA2CreateDirect3DDeviceManager9"));
640 if (!CreateDeviceManager9
) {
641 msg_Err(va
->log
, "cannot load function\n");
644 msg_Dbg(va
->log
, "OurDirect3DCreateDeviceManager9 Success!");
647 IDirect3DDeviceManager9
*devmng
;
648 if (FAILED(CreateDeviceManager9(&token
, &devmng
))) {
649 msg_Err(va
->log
, " OurDirect3DCreateDeviceManager9 failed");
654 msg_Info(va
->log
, "obtained IDirect3DDeviceManager9");
656 HRESULT hr
= IDirect3DDeviceManager9_ResetDevice(devmng
, va
->d3ddev
, token
);
658 msg_Err(va
->log
, "IDirect3DDeviceManager9_ResetDevice failed: %08x", (unsigned)hr
);
664 * It destroys a Direct3D device manager
666 static void D3dDestroyDeviceManager(vlc_va_dxva2_t
*va
)
669 IDirect3DDeviceManager9_Release(va
->devmng
);
673 * It creates a DirectX video service
675 static int DxCreateVideoService(vlc_va_dxva2_t
*va
)
677 HRESULT (WINAPI
*CreateVideoService
)(IDirect3DDevice9
*,
681 (void *)GetProcAddress(va
->hdxva2_dll
,
682 TEXT("DXVA2CreateVideoService"));
684 if (!CreateVideoService
) {
685 msg_Err(va
->log
, "cannot load function\n");
688 msg_Info(va
->log
, "DXVA2CreateVideoService Success!");
693 hr
= IDirect3DDeviceManager9_OpenDeviceHandle(va
->devmng
, &device
);
695 msg_Err(va
->log
, "OpenDeviceHandle failed");
700 IDirectXVideoDecoderService
*vs
;
701 hr
= IDirect3DDeviceManager9_GetVideoService(va
->devmng
, device
,
702 &IID_IDirectXVideoDecoderService
,
705 msg_Err(va
->log
, "GetVideoService failed");
713 * It destroys a DirectX video service
715 static void DxDestroyVideoService(vlc_va_dxva2_t
*va
)
718 IDirect3DDeviceManager9_CloseDeviceHandle(va
->devmng
, va
->device
);
720 IDirectXVideoDecoderService_Release(va
->vs
);
723 * Find the best suited decoder mode GUID and render format.
725 static int DxFindVideoServiceConversion(vlc_va_dxva2_t
*va
, GUID
*input
, D3DFORMAT
*output
)
727 /* Retreive supported modes from the decoder service */
728 UINT input_count
= 0;
729 GUID
*input_list
= NULL
;
730 if (FAILED(IDirectXVideoDecoderService_GetDecoderDeviceGuids(va
->vs
,
733 msg_Err(va
->log
, "IDirectXVideoDecoderService_GetDecoderDeviceGuids failed");
736 for (unsigned i
= 0; i
< input_count
; i
++) {
737 const GUID
*g
= &input_list
[i
];
738 const dxva2_mode_t
*mode
= Dxva2FindMode(g
);
740 msg_Dbg(va
->log
, "- '%s' is supported by hardware", mode
->name
);
742 msg_Warn(va
->log
, "- Unknown GUID = %08X-%04x-%04x-XXXX",
743 (unsigned)g
->Data1
, g
->Data2
, g
->Data3
);
747 /* Try all supported mode by our priority */
748 for (unsigned i
= 0; dxva2_modes
[i
].name
; i
++) {
749 const dxva2_mode_t
*mode
= &dxva2_modes
[i
];
750 if (!mode
->codec
|| mode
->codec
!= va
->codec_id
)
754 bool is_suported
= false;
755 for (const GUID
*g
= &input_list
[0]; !is_suported
&& g
< &input_list
[input_count
]; g
++) {
756 is_suported
= IsEqualGUID(mode
->guid
, g
);
762 msg_Dbg(va
->log
, "Trying to use '%s' as input", mode
->name
);
763 UINT output_count
= 0;
764 D3DFORMAT
*output_list
= NULL
;
765 if (FAILED(IDirectXVideoDecoderService_GetDecoderRenderTargets(va
->vs
, mode
->guid
,
768 msg_Err(va
->log
, "IDirectXVideoDecoderService_GetDecoderRenderTargets failed");
771 for (unsigned j
= 0; j
< output_count
; j
++) {
772 const D3DFORMAT f
= output_list
[j
];
773 const d3d_format_t
*format
= D3dFindFormat(f
);
775 msg_Dbg(va
->log
, "%s is supported for output", format
->name
);
777 msg_Dbg(va
->log
, "%d is supported for output (%4.4s)", f
, (const char*)&f
);
782 for (unsigned j
= 0; d3d_formats
[j
].name
; j
++) {
783 const d3d_format_t
*format
= &d3d_formats
[j
];
786 bool is_suported
= false;
787 for (unsigned k
= 0; !is_suported
&& k
< output_count
; k
++) {
788 is_suported
= format
->format
== output_list
[k
];
793 /* We have our solution */
794 msg_Dbg(va
->log
, "Using '%s' to decode to '%s'", mode
->name
, format
->name
);
795 *input
= *mode
->guid
;
796 *output
= format
->format
;
797 CoTaskMemFree(output_list
);
798 CoTaskMemFree(input_list
);
801 CoTaskMemFree(output_list
);
803 CoTaskMemFree(input_list
);
808 * It creates a DXVA2 decoder using the given video format
810 static int DxCreateVideoDecoder(vlc_va_dxva2_t
*va
,
811 int codec_id
, const video_format_t
*fmt
)
814 msg_Dbg(va
->log
, "DxCreateVideoDecoder id %d %dx%d",
815 codec_id
, fmt
->i_width
, fmt
->i_height
);
817 /* Allocates all surfaces needed for the decoder */
820 va
->surface_count
= 16 + 1;
823 va
->surface_count
= 2 + 1;
826 LPDIRECT3DSURFACE9 surface_list
[VA_DXVA2_MAX_SURFACE_COUNT
];
827 if (FAILED(IDirectXVideoDecoderService_CreateSurface(va
->vs
,
830 va
->surface_count
- 1,
834 DXVA2_VideoDecoderRenderTarget
,
837 msg_Err(va
->log
, "IDirectXVideoAccelerationService_CreateSurface failed\n");
838 va
->surface_count
= 0;
841 for (unsigned i
= 0; i
< va
->surface_count
; i
++) {
842 vlc_va_surface_t
*surface
= &va
->surface
[i
];
843 surface
->d3d
= surface_list
[i
];
844 surface
->refcount
= 0;
847 va
->surface_width
= fmt
->i_width
;
848 va
->surface_height
= fmt
->i_height
;
849 msg_Dbg(va
->log
, "IDirectXVideoAccelerationService_CreateSurface succeed with %d surfaces (%dx%d)",
850 va
->surface_count
, fmt
->i_width
, fmt
->i_height
);
854 ZeroMemory(&dsc
, sizeof(dsc
));
855 dsc
.SampleWidth
= fmt
->i_width
;
856 dsc
.SampleHeight
= fmt
->i_height
;
857 dsc
.Format
= va
->render
;
858 if (fmt
->i_frame_rate
> 0 && fmt
->i_frame_rate_base
> 0) {
859 dsc
.InputSampleFreq
.Numerator
= fmt
->i_frame_rate
;
860 dsc
.InputSampleFreq
.Denominator
= fmt
->i_frame_rate_base
;
862 dsc
.InputSampleFreq
.Numerator
= 0;
863 dsc
.InputSampleFreq
.Denominator
= 0;
865 dsc
.OutputFrameFreq
= dsc
.InputSampleFreq
;
866 dsc
.UABProtectionLevel
= FALSE
;
869 /* FIXME I am unsure we can let unknown everywhere */
870 DXVA2_ExtendedFormat
*ext
= &dsc
.SampleFormat
;
871 ext
->SampleFormat
= 0;//DXVA2_SampleUnknown;
872 ext
->VideoChromaSubsampling
= 0;//DXVA2_VideoChromaSubsampling_Unknown;
873 ext
->NominalRange
= 0;//DXVA2_NominalRange_Unknown;
874 ext
->VideoTransferMatrix
= 0;//DXVA2_VideoTransferMatrix_Unknown;
875 ext
->VideoLighting
= 0;//DXVA2_VideoLighting_Unknown;
876 ext
->VideoPrimaries
= 0;//DXVA2_VideoPrimaries_Unknown;
877 ext
->VideoTransferFunction
= 0;//DXVA2_VideoTransFunc_Unknown;
879 /* List all configurations available for the decoder */
881 DXVA2_ConfigPictureDecode
*cfg_list
= NULL
;
882 if (FAILED(IDirectXVideoDecoderService_GetDecoderConfigurations(va
->vs
,
888 msg_Err(va
->log
, "IDirectXVideoDecoderService_GetDecoderConfigurations failed\n");
891 msg_Dbg(va
->log
, "we got %d decoder configurations", cfg_count
);
893 /* Select the best decoder configuration */
894 bool has_cfg
= false;
895 for (unsigned i
= 0; i
< cfg_count
; i
++) {
896 const DXVA2_ConfigPictureDecode
*cfg
= &cfg_list
[i
];
899 msg_Dbg(va
->log
, "configuration[%d] ConfigBitstreamRaw %d",
900 i
, cfg
->ConfigBitstreamRaw
);
903 if ((!has_cfg
&& cfg
->ConfigBitstreamRaw
== 1) ||
904 (codec_id
== CODEC_ID_H264
&& cfg
->ConfigBitstreamRaw
== 2)) {
909 CoTaskMemFree(cfg_list
);
911 msg_Err(va
->log
, "Failed to find a supported decoder configuration");
915 /* Create the decoder */
916 IDirectXVideoDecoder
*decoder
;
917 if (FAILED(IDirectXVideoDecoderService_CreateVideoDecoder(va
->vs
,
924 msg_Err(va
->log
, "IDirectXVideoDecoderService_CreateVideoDecoder failed\n");
927 va
->decoder
= decoder
;
928 msg_Dbg(va
->log
, "IDirectXVideoDecoderService_CreateVideoDecoder succeed");
931 static void DxDestroyVideoDecoder(vlc_va_dxva2_t
*va
)
934 IDirectXVideoDecoder_Release(va
->decoder
);
937 for (unsigned i
= 0; i
< va
->surface_count
; i
++)
938 IDirect3DSurface9_Release(va
->surface
[i
].d3d
);
939 va
->surface_count
= 0;
941 static int DxResetVideoDecoder(vlc_va_dxva2_t
*va
)
943 msg_Err(va
->log
, "DxResetVideoDecoder unimplemented");
947 static void DxCreateVideoConversion(vlc_va_dxva2_t
*va
)
949 switch (va
->render
) {
950 case MAKEFOURCC('N','V','1','2'):
951 va
->output
= MAKEFOURCC('Y','V','1','2');
954 va
->output
= va
->render
;
957 va
->surface_cache_size
= __MAX((va
->surface_width
+ 0x0f) & ~ 0x0f, 4096);
958 va
->surface_cache_base
= malloc(16 + va
->surface_cache_size
);
959 va
->surface_cache
= &va
->surface_cache_base
[16 - ((intptr_t)va
->surface_cache_base
& 0x0f)];
961 static void DxDestroyVideoConversion(vlc_va_dxva2_t
*va
)
963 free(va
->surface_cache_base
);
964 va
->surface_cache_base
= NULL
;
965 va
->surface_cache
= NULL
;
966 va
->surface_cache_size
= 0;
969 /* Copy 64 bytes from srcp to dsp loading data with the SSE>=2 instruction load and
970 * storing data with the SSE>=2 instruction store.
972 #define COPY64(dstp, srcp, load, store) \
974 load " 0(%[src]), %%xmm1\n" \
975 load " 16(%[src]), %%xmm2\n" \
976 load " 32(%[src]), %%xmm3\n" \
977 load " 48(%[src]), %%xmm4\n" \
978 store " %%xmm1, 0(%[dst])\n" \
979 store " %%xmm2, 16(%[dst])\n" \
980 store " %%xmm3, 32(%[dst])\n" \
981 store " %%xmm4, 48(%[dst])\n" \
982 : : [dst]"r"(dstp), [src]"r"(srcp) : "memory")
984 /* Execute the instruction op only if SSE2 is supported. */
985 #ifdef CAN_COMPILE_SSE2
986 # define ASM_SSE2(cpu, op) do { \
987 if (cpu & CPU_CAPABILITY_SSE2) \
991 # define ASM_SSE2(cpu, op)
994 /* Optimized copy from "Uncacheable Speculative Write Combining" memory
995 * as used by some video surface.
996 * XXX It is really efficient only when SSE4.1 is available.
998 static void CopyFromUswc(uint8_t *dst
, size_t dst_pitch
,
999 const uint8_t *src
, size_t src_pitch
,
1001 unsigned width
, unsigned height
,
1004 assert(((intptr_t)dst
& 0x0f) == 0 && (dst_pitch
& 0x0f) == 0);
1006 ASM_SSE2(cpu
, "mfence");
1007 for (unsigned y
= 0; y
< height
; y
++) {
1010 for (x
= 0; x
< unaligned
; x
++)
1013 #ifdef CAN_COMPILE_SSE4_1
1014 if (cpu
& CPU_CAPABILITY_SSE4_1
) {
1016 for (; x
+63 < width
; x
+= 64)
1017 COPY64(&dst
[x
], &src
[x
], "movntdqa", "movdqa");
1019 for (; x
+63 < width
; x
+= 64)
1020 COPY64(&dst
[x
], &src
[x
], "movntdqa", "movdqu");
1024 #ifdef CAN_COMPILE_SSE2
1025 if (cpu
& CPU_CAPABILITY_SSE2
) {
1027 for (; x
+63 < width
; x
+= 64)
1028 COPY64(&dst
[x
], &src
[x
], "movdqa", "movdqa");
1030 for (; x
+63 < width
; x
+= 64)
1031 COPY64(&dst
[x
], &src
[x
], "movdqa", "movdqu");
1036 for (; x
< width
; x
++)
1044 static void Copy2d(uint8_t *dst
, size_t dst_pitch
,
1045 const uint8_t *src
, size_t src_pitch
,
1046 unsigned width
, unsigned height
,
1049 assert(((intptr_t)src
& 0x0f) == 0 && (src_pitch
& 0x0f) == 0);
1051 ASM_SSE2(cpu
, "mfence");
1053 for (unsigned y
= 0; y
< height
; y
++) {
1055 bool unaligned
= ((intptr_t)dst
& 0x0f) != 0;
1057 #ifdef CAN_COMPILE_SSE2
1058 if (cpu
& CPU_CAPABILITY_SSE2
) {
1060 for (; x
+63 < width
; x
+= 64)
1061 COPY64(&dst
[x
], &src
[x
], "movdqa", "movntdq");
1063 for (; x
+63 < width
; x
+= 64)
1064 COPY64(&dst
[x
], &src
[x
], "movdqa", "movdqu");
1069 for (; x
< width
; x
++)
1077 static void SplitUV(uint8_t *dstu
, size_t dstu_pitch
,
1078 uint8_t *dstv
, size_t dstv_pitch
,
1079 const uint8_t *src
, size_t src_pitch
,
1080 unsigned width
, unsigned height
, unsigned cpu
)
1082 const uint8_t shuffle
[] = { 0, 2, 4, 6, 8, 10, 12, 14,
1083 1, 3, 5, 7, 9, 11, 13, 15 };
1084 const uint8_t mask
[] = { 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00,
1085 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00 };
1087 assert(((intptr_t)src
& 0x0f) == 0 && (src_pitch
& 0x0f) == 0);
1089 ASM_SSE2(cpu
, "mfence");
1091 for (unsigned y
= 0; y
< height
; y
++) {
1095 "movdqa 0(%[src]), %%xmm0\n" \
1096 "movdqa 16(%[src]), %%xmm1\n" \
1097 "movdqa 32(%[src]), %%xmm2\n" \
1098 "movdqa 48(%[src]), %%xmm3\n"
1101 "movq %%xmm0, 0(%[dst1])\n" \
1102 "movq %%xmm1, 8(%[dst1])\n" \
1103 "movhpd %%xmm0, 0(%[dst2])\n" \
1104 "movhpd %%xmm1, 8(%[dst2])\n" \
1105 "movq %%xmm2, 16(%[dst1])\n" \
1106 "movq %%xmm3, 24(%[dst1])\n" \
1107 "movhpd %%xmm2, 16(%[dst2])\n" \
1108 "movhpd %%xmm3, 24(%[dst2])\n"
1110 #ifdef CAN_COMPILE_SSSE3
1111 if (cpu
& CPU_CAPABILITY_SSSE3
) {
1112 for (x
= 0; x
< (width
& ~31); x
+= 32) {
1114 "movdqu (%[shuffle]), %%xmm7\n"
1116 "pshufb %%xmm7, %%xmm0\n"
1117 "pshufb %%xmm7, %%xmm1\n"
1118 "pshufb %%xmm7, %%xmm2\n"
1119 "pshufb %%xmm7, %%xmm3\n"
1121 : : [dst1
]"r"(&dstu
[x
]), [dst2
]"r"(&dstv
[x
]), [src
]"r"(&src
[2*x
]), [shuffle
]"r"(shuffle
) : "memory");
1125 #ifdef CAN_COMPILE_SSE2
1126 if (cpu
& CPU_CAPABILITY_SSE2
) {
1127 for (x
= 0; x
< (width
& ~31); x
+= 32) {
1129 "movdqu (%[mask]), %%xmm7\n"
1131 "movdqa %%xmm0, %%xmm4\n"
1132 "movdqa %%xmm1, %%xmm5\n"
1133 "movdqa %%xmm2, %%xmm6\n"
1134 "psrlw $8, %%xmm0\n"
1135 "psrlw $8, %%xmm1\n"
1136 "pand %%xmm7, %%xmm4\n"
1137 "pand %%xmm7, %%xmm5\n"
1138 "pand %%xmm7, %%xmm6\n"
1139 "packuswb %%xmm4, %%xmm0\n"
1140 "packuswb %%xmm5, %%xmm1\n"
1141 "pand %%xmm3, %%xmm7\n"
1142 "psrlw $8, %%xmm2\n"
1143 "psrlw $8, %%xmm3\n"
1144 "packuswb %%xmm6, %%xmm2\n"
1145 "packuswb %%xmm7, %%xmm3\n"
1147 : : [dst2
]"r"(&dstu
[x
]), [dst1
]"r"(&dstv
[x
]), [src
]"r"(&src
[2*x
]), [mask
]"r"(mask
) : "memory");
1154 for (; x
< width
; x
++) {
1155 dstu
[x
] = src
[2*x
+0];
1156 dstv
[x
] = src
[2*x
+1];
1164 static void CopyPlane(uint8_t *dst
, size_t dst_pitch
, const uint8_t *src
, size_t src_pitch
,
1165 uint8_t *cache
, size_t cache_size
,
1166 unsigned width
, unsigned height
,
1169 const unsigned w16
= (width
+15) & ~15;
1170 const unsigned hstep
= cache_size
/ w16
;
1173 for (unsigned y
= 0; y
< height
; y
+= hstep
) {
1174 const unsigned unaligned
= (intptr_t)src
& 0x0f;
1175 const unsigned hblock
= __MIN(hstep
, height
- y
);
1177 /* Copy a bunch of line into our cache */
1178 CopyFromUswc(cache
, w16
,
1181 width
, hblock
, cpu
);
1183 /* Copy from our cache to the destination */
1184 Copy2d(dst
, dst_pitch
,
1186 width
, hblock
, cpu
);
1189 src
+= src_pitch
* hblock
;
1190 dst
+= dst_pitch
* hblock
;
1193 ASM_SSE2(cpu
, "mfence");
1195 static void SplitPlanes(uint8_t *dstu
, size_t dstu_pitch
,
1196 uint8_t *dstv
, size_t dstv_pitch
,
1197 const uint8_t *src
, size_t src_pitch
,
1198 uint8_t *cache
, size_t cache_size
,
1199 unsigned width
, unsigned height
,
1202 const unsigned w2_16
= (2*width
+15) & ~15;
1203 const unsigned hstep
= cache_size
/ w2_16
;
1206 for (unsigned y
= 0; y
< height
; y
+= hstep
) {
1207 const unsigned unaligned
= (intptr_t)src
& 0x0f;
1208 const unsigned hblock
= __MIN(hstep
, height
- y
);
1210 /* Copy a bunch of line into our cache */
1211 CopyFromUswc(cache
, w2_16
,
1214 2*width
, hblock
, cpu
);
1216 /* Copy from our cache to the destination */
1217 SplitUV(dstu
, dstu_pitch
,
1220 width
, hblock
, cpu
);
1223 src
+= src_pitch
* hblock
;
1224 dstu
+= dstu_pitch
* hblock
;
1225 dstv
+= dstv_pitch
* hblock
;
1228 ASM_SSE2(cpu
, "mfence");
1231 static void CopyFromNv12(picture_t
*dst
, const D3DLOCKED_RECT
*src
,
1232 uint8_t *cache
, size_t cache_size
,
1233 unsigned width
, unsigned height
)
1235 const unsigned cpu
= vlc_CPU();
1238 CopyPlane(dst
->p
[0].p_pixels
, dst
->p
[0].i_pitch
,
1239 src
->pBits
, src
->Pitch
,
1241 width
, height
, cpu
);
1242 SplitPlanes(dst
->p
[2].p_pixels
, dst
->p
[2].i_pitch
,
1243 dst
->p
[1].p_pixels
, dst
->p
[1].i_pitch
,
1244 (const uint8_t*)src
->pBits
+ src
->Pitch
* height
, src
->Pitch
,
1246 width
/2, height
/2, cpu
);
1248 ASM_SSE2(cpu
, "emms");
1250 static void CopyFromYv12(picture_t
*dst
, const D3DLOCKED_RECT
*src
,
1251 uint8_t *cache
, size_t cache_size
,
1252 unsigned width
, unsigned height
)
1254 const unsigned cpu
= vlc_CPU();
1257 for (unsigned n
= 0, offset
= 0; n
< 3; n
++) {
1258 const unsigned d
= n
> 0 ? 2 : 1;
1259 CopyPlane(dst
->p
[n
].p_pixels
, dst
->p
[n
].i_pitch
,
1260 (const uint8_t*)src
->pBits
+ offset
, src
->Pitch
/d
,
1262 width
/d
, height
/d
, cpu
);
1263 offset
+= (src
->Pitch
/d
) * (height
/d
);
1265 ASM_SSE2(cpu
, "emms");
1271 vlc_va_t
*vlc_va_NewDxva2(vlc_object_t
*log
, int codec_id
)