Added VLC_CODEC_NV12 to vlc_fourcc.h.
[vlc/solaris.git] / modules / codec / avcodec / dxva2.c
blob74c7d1208579b198bc1caa9e12c4601a2fc4cd33
1 /*****************************************************************************
2 * va.c: Video Acceleration helpers
3 *****************************************************************************
4 * Copyright (C) 2009 Geoffroy Couprie
5 * Copyright (C) 2009 Laurent Aimar
6 * $Id$
8 * Authors: Geoffroy Couprie <geal@videolan.org>
9 * Laurent Aimar <fenrir _AT_ videolan _DOT_ org>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
24 *****************************************************************************/
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #endif
30 #include <vlc_common.h>
31 #include <vlc_picture.h>
32 #include <vlc_fourcc.h>
33 #include <vlc_cpu.h>
34 #include <assert.h>
36 #ifdef HAVE_LIBAVCODEC_AVCODEC_H
37 # include <libavcodec/avcodec.h>
38 # ifdef HAVE_AVCODEC_DXVA2
39 # define DXVA2API_USE_BITFIELDS
40 # include <libavcodec/dxva2.h>
41 # endif
42 #elif defined(HAVE_FFMPEG_AVCODEC_H)
43 # include <ffmpeg/avcodec.h>
44 #else
45 # include <avcodec.h>
46 #endif
48 #include "avcodec.h"
49 #include "va.h"
51 #ifdef HAVE_AVCODEC_DXVA2
53 #include <windows.h>
54 #include <windowsx.h>
55 #include <ole2.h>
56 #include <commctrl.h>
57 #include <shlwapi.h>
58 #include <d3d9.h>
60 /* FIXME */
61 #define CoTaskMemFree(x)
63 /* */
64 #define DXVA2_E_NOT_INITIALIZED MAKE_HRESULT(1, 4, 4096)
65 #define DXVA2_E_NEW_VIDEO_DEVICE MAKE_HRESULT(1, 4, 4097)
66 #define DXVA2_E_VIDEO_DEVICE_LOCKED MAKE_HRESULT(1, 4, 4098)
67 #define DXVA2_E_NOT_AVAILABLE MAKE_HRESULT(1, 4, 4099)
69 static const GUID DXVA2_ModeMPEG2_MoComp = {
70 0xe6a9f44b, 0x61b0, 0x4563, {0x9e,0xa4,0x63,0xd2,0xa3,0xc6,0xfe,0x66}
72 static const GUID DXVA2_ModeMPEG2_IDCT = {
73 0xbf22ad00, 0x03ea, 0x4690, {0x80,0x77,0x47,0x33,0x46,0x20,0x9b,0x7e}
75 static const GUID DXVA2_ModeMPEG2_VLD = {
76 0xee27417f, 0x5e28, 0x4e65, {0xbe,0xea,0x1d,0x26,0xb5,0x08,0xad,0xc9}
79 static const GUID DXVA2_ModeH264_A = {
80 0x1b81be64, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
82 static const GUID DXVA2_ModeH264_B = {
83 0x1b81be65, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
85 static const GUID DXVA2_ModeH264_C = {
86 0x1b81be66, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
88 static const GUID DXVA2_ModeH264_D = {
89 0x1b81be67, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
91 static const GUID DXVA2_ModeH264_E = {
92 0x1b81be68, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
94 static const GUID DXVA2_ModeH264_F = {
95 0x1b81be69, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
97 static const GUID DXVADDI_Intel_ModeH264_A = {
98 0x604F8E64, 0x4951,0x4c54, {0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6}
100 static const GUID DXVADDI_Intel_ModeH264_C = {
101 0x604F8E66,0x4951, 0x4c54, {0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6}
103 static const GUID DXVADDI_Intel_ModeH264_E = {
104 0x604F8E68,0x4951, 0x4c54, {0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6}
106 static const GUID DXVA2_ModeWMV8_A = {
107 0x1b81be80, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
109 static const GUID DXVA2_ModeWMV8_B = {
110 0x1b81be81, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
112 static const GUID DXVA2_ModeWMV9_A = {
113 0x1b81be90, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
115 static const GUID DXVA2_ModeWMV9_B = {
116 0x1b81be91, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
118 static const GUID DXVA2_ModeWMV9_C = {
119 0x1b81be94, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
122 static const GUID DXVA2_ModeVC1_A = {
123 0x1b81beA0, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
125 static const GUID DXVA2_ModeVC1_B = {
126 0x1b81beA1, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
128 static const GUID DXVA2_ModeVC1_C = {
129 0x1b81beA2, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
131 static const GUID DXVA2_ModeVC1_D = {
132 0x1b81beA3, 0xa0c7,0x11d3, {0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5}
135 /* */
136 typedef struct {
137 const char *name;
138 const GUID *guid;
139 int codec;
140 } dxva2_mode_t;
141 /* XXX Prefered modes must come first */
142 static const dxva2_mode_t dxva2_modes[] = {
143 { "DXVA2_ModeMPEG2_VLD", &DXVA2_ModeMPEG2_VLD, 0 },
144 { "DXVA2_ModeMPEG2_MoComp", &DXVA2_ModeMPEG2_MoComp, 0 },
145 { "DXVA2_ModeMPEG2_IDCT", &DXVA2_ModeMPEG2_IDCT, 0 },
147 { "H.264 variable-length decoder (VLD), FGT", &DXVA2_ModeH264_F, CODEC_ID_H264 },
148 { "H.264 VLD, no FGT", &DXVA2_ModeH264_E, CODEC_ID_H264 },
149 { "H.264 VLD, no FGT (Intel)", &DXVADDI_Intel_ModeH264_E, CODEC_ID_H264 },
150 { "H.264 IDCT, FGT", &DXVA2_ModeH264_D, 0 },
151 { "H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVA2_ModeH264_C, 0 },
152 { "H.264 inverse discrete cosine transform (IDCT), no FGT (Intel)", &DXVADDI_Intel_ModeH264_C, 0 },
153 { "H.264 MoComp, FGT", &DXVA2_ModeH264_B, 0 },
154 { "H.264 motion compensation (MoComp), no FGT", &DXVA2_ModeH264_A, 0 },
155 { "H.264 motion compensation (MoComp), no FGT (Intel)", &DXVADDI_Intel_ModeH264_A, 0 },
157 { "Windows Media Video 8 MoComp", &DXVA2_ModeWMV8_B, 0 },
158 { "Windows Media Video 8 post processing", &DXVA2_ModeWMV8_A, 0 },
160 { "Windows Media Video 9 IDCT", &DXVA2_ModeWMV9_C, 0 },
161 { "Windows Media Video 9 MoComp", &DXVA2_ModeWMV9_B, 0 },
162 { "Windows Media Video 9 post processing", &DXVA2_ModeWMV9_A, 0 },
164 { "VC-1 VLD", &DXVA2_ModeVC1_D, CODEC_ID_VC1 },
165 { "VC-1 VLD", &DXVA2_ModeVC1_D, CODEC_ID_WMV3 },
166 { "VC-1 IDCT", &DXVA2_ModeVC1_C, 0 },
167 { "VC-1 MoComp", &DXVA2_ModeVC1_B, 0 },
168 { "VC-1 post processing", &DXVA2_ModeVC1_A, 0 },
170 { NULL, NULL, 0 }
173 static const dxva2_mode_t *Dxva2FindMode(const GUID *guid)
175 for (unsigned i = 0; dxva2_modes[i].name; i++) {
176 if (IsEqualGUID(dxva2_modes[i].guid, guid))
177 return &dxva2_modes[i];
179 return NULL;
182 /* */
183 typedef struct {
184 const char *name;
185 D3DFORMAT format;
186 vlc_fourcc_t codec;
187 } d3d_format_t;
188 /* XXX Prefered format must come first */
189 static const d3d_format_t d3d_formats[] = {
190 { "YV12", MAKEFOURCC('Y','V','1','2'), VLC_CODEC_YV12 },
191 { "NV12", MAKEFOURCC('N','V','1','2'), VLC_CODEC_NV12 },
193 { NULL, 0, 0 }
196 static const d3d_format_t *D3dFindFormat(D3DFORMAT format)
198 for (unsigned i = 0; d3d_formats[i].name; i++) {
199 if (d3d_formats[i].format == format)
200 return &d3d_formats[i];
202 return NULL;
205 static const GUID IID_IDirectXVideoDecoderService = {
206 0xfc51a551, 0xd5e7, 0x11d9, {0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02}
208 static const GUID IID_IDirectXVideoAccelerationService = {
209 0xfc51a550, 0xd5e7, 0x11d9, {0xaf,0x55,0x00,0x05,0x4e,0x43,0xff,0x02}
212 /* */
213 typedef struct {
214 LPDIRECT3DSURFACE9 d3d;
215 int refcount;
216 unsigned int order;
217 } vlc_va_surface_t;
219 #define VA_DXVA2_MAX_SURFACE_COUNT (64)
220 typedef struct
222 /* */
223 vlc_va_t va;
225 /* */
226 vlc_object_t *log;
227 int codec_id;
229 /* DLL */
230 HINSTANCE hd3d9_dll;
231 HINSTANCE hdxva2_dll;
233 /* Direct3D */
234 D3DPRESENT_PARAMETERS d3dpp;
235 LPDIRECT3D9 d3dobj;
236 D3DADAPTER_IDENTIFIER9 d3dai;
237 LPDIRECT3DDEVICE9 d3ddev;
239 /* Device manager */
240 UINT token;
241 IDirect3DDeviceManager9 *devmng;
242 HANDLE device;
244 /* Video service */
245 IDirectXVideoDecoderService *vs;
246 GUID input;
247 D3DFORMAT render;
249 /* Video decoder */
250 DXVA2_ConfigPictureDecode cfg;
251 IDirectXVideoDecoder *decoder;
253 /* Option conversion */
254 D3DFORMAT output;
255 uint8_t *surface_cache_base;
256 uint8_t *surface_cache;
257 size_t surface_cache_size;
259 /* */
260 struct dxva_context hw;
262 /* */
263 unsigned surface_count;
264 unsigned surface_order;
265 int surface_width;
266 int surface_height;
267 vlc_fourcc_t surface_chroma;
269 vlc_va_surface_t surface[VA_DXVA2_MAX_SURFACE_COUNT];
270 LPDIRECT3DSURFACE9 hw_surface[VA_DXVA2_MAX_SURFACE_COUNT];
271 } vlc_va_dxva2_t;
273 /* */
274 static vlc_va_dxva2_t *vlc_va_dxva2_Get(void *external)
276 assert(external == (void*)(&((vlc_va_dxva2_t*)external)->va));
277 return external;
280 /* */
281 static int D3dCreateDevice(vlc_va_dxva2_t *);
282 static void D3dDestroyDevice(vlc_va_dxva2_t *);
283 static char *DxDescribe(vlc_va_dxva2_t *);
285 static int D3dCreateDeviceManager(vlc_va_dxva2_t *);
286 static void D3dDestroyDeviceManager(vlc_va_dxva2_t *);
288 static int DxCreateVideoService(vlc_va_dxva2_t *);
289 static void DxDestroyVideoService(vlc_va_dxva2_t *);
290 static int DxFindVideoServiceConversion(vlc_va_dxva2_t *, GUID *input, D3DFORMAT *output);
292 static int DxCreateVideoDecoder(vlc_va_dxva2_t *,
293 int codec_id, const video_format_t *);
294 static void DxDestroyVideoDecoder(vlc_va_dxva2_t *);
295 static int DxResetVideoDecoder(vlc_va_dxva2_t *);
297 static void DxCreateVideoConversion(vlc_va_dxva2_t *);
298 static void DxDestroyVideoConversion(vlc_va_dxva2_t *);
300 static void CopyFromNv12(picture_t *dst, const D3DLOCKED_RECT *src,
301 uint8_t *cache, size_t cache_size,
302 unsigned width, unsigned height);
303 static void CopyFromYv12(picture_t *dst, const D3DLOCKED_RECT *src,
304 uint8_t *cache, size_t cache_size,
305 unsigned width, unsigned height);
307 /* */
308 static int Setup(vlc_va_t *external, void **hw, vlc_fourcc_t *chroma,
309 int width, int height)
311 vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
313 if (va->surface_width == width &&
314 va->surface_height == height)
315 goto ok;
317 /* */
318 DxDestroyVideoConversion(va);
319 DxDestroyVideoDecoder(va);
321 *hw = NULL;
322 *chroma = 0;
323 if (width <= 0 || height <= 0)
324 return VLC_EGENERIC;
326 /* FIXME transmit a video_format_t by VaSetup directly */
327 video_format_t fmt;
328 memset(&fmt, 0, sizeof(fmt));
329 fmt.i_width = width;
330 fmt.i_height = height;
332 if (DxCreateVideoDecoder(va, va->codec_id, &fmt))
333 return VLC_EGENERIC;
334 /* */
335 va->hw.decoder = va->decoder;
336 va->hw.cfg = &va->cfg;
337 va->hw.surface_count = va->surface_count;
338 va->hw.surface = va->hw_surface;
339 for (unsigned i = 0; i < va->surface_count; i++)
340 va->hw.surface[i] = va->surface[i].d3d;
342 /* */
343 DxCreateVideoConversion(va);
345 /* */
347 *hw = &va->hw;
348 const d3d_format_t *output = D3dFindFormat(va->output);
349 *chroma = output->codec;
351 return VLC_SUCCESS;
354 static int Extract(vlc_va_t *external, picture_t *picture, AVFrame *ff)
356 vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
357 LPDIRECT3DSURFACE9 d3d = (LPDIRECT3DSURFACE9)(uintptr_t)ff->data[3];
359 if (!va->surface_cache)
360 return VLC_EGENERIC;
362 /* */
363 assert(va->output == MAKEFOURCC('Y','V','1','2'));
365 /* */
366 D3DLOCKED_RECT lock;
367 if (FAILED(IDirect3DSurface9_LockRect(d3d, &lock, NULL, D3DLOCK_READONLY))) {
368 msg_Err(va->log, "Failed to lock surface");
369 return VLC_EGENERIC;
372 if (va->render == MAKEFOURCC('Y','V','1','2')) {
373 CopyFromYv12(picture, &lock,
374 va->surface_cache, va->surface_cache_size,
375 va->surface_width, va->surface_height);
376 } else {
377 assert(va->render == MAKEFOURCC('N','V','1','2'));
378 CopyFromNv12(picture, &lock,
379 va->surface_cache, va->surface_cache_size,
380 va->surface_width, va->surface_height);
383 /* */
384 IDirect3DSurface9_UnlockRect(d3d);
385 return VLC_SUCCESS;
387 /* FIXME it is nearly common with VAAPI */
388 static int Get(vlc_va_t *external, AVFrame *ff)
390 vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
392 /* Check the device */
393 HRESULT hr = IDirect3DDeviceManager9_TestDevice(va->devmng, va->device);
394 if (hr == DXVA2_E_NEW_VIDEO_DEVICE) {
395 if (DxResetVideoDecoder(va))
396 return VLC_EGENERIC;
397 } else if (FAILED(hr)) {
398 msg_Err(va->log, "IDirect3DDeviceManager9_TestDevice %u", (unsigned)hr);
399 return VLC_EGENERIC;
402 /* Grab an unused surface, in case none are, try the oldest
403 * XXX using the oldest is a workaround in case a problem happens with ffmpeg */
404 unsigned i, old;
405 for (i = 0, old = 0; i < va->surface_count; i++) {
406 vlc_va_surface_t *surface = &va->surface[i];
408 if (!surface->refcount)
409 break;
411 if (surface->order < va->surface[old].order)
412 old = i;
414 if (i >= va->surface_count)
415 i = old;
417 vlc_va_surface_t *surface = &va->surface[i];
419 surface->refcount = 1;
420 surface->order = va->surface_order++;
422 /* */
423 for (int i = 0; i < 4; i++) {
424 ff->data[i] = NULL;
425 ff->linesize[i] = 0;
427 if (i == 0 || i == 3)
428 ff->data[i] = (void*)surface->d3d;/* Yummie */
430 return VLC_SUCCESS;
432 static void Release(vlc_va_t *external, AVFrame *ff)
434 vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
435 LPDIRECT3DSURFACE9 d3d = (LPDIRECT3DSURFACE9)(uintptr_t)ff->data[3];
437 for (unsigned i = 0; i < va->surface_count; i++) {
438 vlc_va_surface_t *surface = &va->surface[i];
440 if (surface->d3d == d3d)
441 surface->refcount--;
444 static void Close(vlc_va_t *external)
446 vlc_va_dxva2_t *va = vlc_va_dxva2_Get(external);
448 DxDestroyVideoConversion(va);
449 DxDestroyVideoDecoder(va);
450 DxDestroyVideoService(va);
451 D3dDestroyDeviceManager(va);
452 D3dDestroyDevice(va);
454 if (va->hdxva2_dll)
455 FreeLibrary(va->hdxva2_dll);
456 if (va->hd3d9_dll)
457 FreeLibrary(va->hd3d9_dll);
459 free(va->va.description);
460 free(va);
463 vlc_va_t *vlc_va_NewDxva2(vlc_object_t *log, int codec_id)
465 vlc_va_dxva2_t *va = calloc(1, sizeof(*va));
466 if (!va)
467 return NULL;
469 /* */
470 va->log = log;
471 va->codec_id = codec_id;
473 /* Load dll*/
474 va->hd3d9_dll = LoadLibrary(TEXT("D3D9.DLL"));
475 if (!va->hd3d9_dll) {
476 msg_Warn(va->log, "cannot load d3d9.dll");
477 goto error;
479 va->hdxva2_dll = LoadLibrary(TEXT("DXVA2.DLL"));
480 if (!va->hdxva2_dll) {
481 msg_Warn(va->log, "cannot load dxva2.dll");
482 goto error;
484 msg_Dbg(va->log, "DLLs loaded");
486 /* */
487 if (D3dCreateDevice(va)) {
488 msg_Err(va->log, "Failed to create Direct3D device");
489 goto error;
491 msg_Dbg(va->log, "D3dCreateDevice succeed");
493 if (D3dCreateDeviceManager(va)) {
494 msg_Err(va->log, "D3dCreateDeviceManager failed");
495 goto error;
498 if (DxCreateVideoService(va)) {
499 msg_Err(va->log, "DxCreateVideoService failed");
500 goto error;
503 /* */
504 if (DxFindVideoServiceConversion(va, &va->input, &va->render)) {
505 msg_Err(va->log, "DxFindVideoServiceConversion failed");
506 goto error;
509 /* TODO print the hardware name/vendor for debugging purposes */
510 va->va.description = DxDescribe(va);
511 va->va.setup = Setup;
512 va->va.get = Get;
513 va->va.release = Release;
514 va->va.extract = Extract;
515 va->va.close = Close;
516 return &va->va;
518 error:
519 Close(&va->va);
520 return NULL;
522 /* */
525 * It creates a Direct3D device usable for DXVA 2
527 static int D3dCreateDevice(vlc_va_dxva2_t *va)
529 /* */
530 LPDIRECT3D9 (WINAPI *Create9)(UINT SDKVersion);
531 Create9 = (void *)GetProcAddress(va->hd3d9_dll,
532 TEXT("Direct3DCreate9"));
533 if (!Create9) {
534 msg_Err(va->log, "Cannot locate reference to Direct3DCreate9 ABI in DLL");
535 return VLC_EGENERIC;
538 /* */
539 LPDIRECT3D9 d3dobj;
540 d3dobj = Create9(D3D_SDK_VERSION);
541 if (!d3dobj) {
542 msg_Err(va->log, "Direct3DCreate9 failed");
543 return VLC_EGENERIC;
545 va->d3dobj = d3dobj;
547 /* */
548 D3DADAPTER_IDENTIFIER9 *d3dai = &va->d3dai;
549 if (FAILED(IDirect3D9_GetAdapterIdentifier(va->d3dobj,
550 D3DADAPTER_DEFAULT, 0, d3dai))) {
551 msg_Warn(va->log, "IDirect3D9_GetAdapterIdentifier failed");
552 ZeroMemory(d3dai, sizeof(*d3dai));
555 /* */
556 D3DPRESENT_PARAMETERS *d3dpp = &va->d3dpp;
557 ZeroMemory(d3dpp, sizeof(*d3dpp));
558 d3dpp->Flags = D3DPRESENTFLAG_VIDEO;
559 d3dpp->Windowed = TRUE;
560 d3dpp->hDeviceWindow = NULL;
561 d3dpp->SwapEffect = D3DSWAPEFFECT_DISCARD;
562 d3dpp->MultiSampleType = D3DMULTISAMPLE_NONE;
563 d3dpp->PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT;
564 d3dpp->BackBufferCount = 0; /* FIXME what to put here */
565 d3dpp->BackBufferFormat = D3DFMT_X8R8G8B8; /* FIXME what to put here */
566 d3dpp->BackBufferWidth = 0;
567 d3dpp->BackBufferHeight = 0;
568 d3dpp->EnableAutoDepthStencil = FALSE;
570 /* Direct3D needs a HWND to create a device, even without using ::Present
571 this HWND is used to alert Direct3D when there's a change of focus window.
572 For now, use GetShellWindow, as it looks harmless */
573 LPDIRECT3DDEVICE9 d3ddev;
574 if (FAILED(IDirect3D9_CreateDevice(d3dobj, D3DADAPTER_DEFAULT,
575 D3DDEVTYPE_HAL, GetShellWindow(),
576 D3DCREATE_SOFTWARE_VERTEXPROCESSING |
577 D3DCREATE_MULTITHREADED,
578 d3dpp, &d3ddev))) {
579 msg_Err(va->log, "IDirect3D9_CreateDevice failed");
580 return VLC_EGENERIC;
582 va->d3ddev = d3ddev;
584 return VLC_SUCCESS;
587 * It releases a Direct3D device and its resources.
589 static void D3dDestroyDevice(vlc_va_dxva2_t *va)
591 if (va->d3ddev)
592 IDirect3DDevice9_Release(va->d3ddev);
593 if (va->d3dobj)
594 IDirect3D9_Release(va->d3dobj);
597 * It describes our Direct3D object
599 static char *DxDescribe(vlc_va_dxva2_t *va)
601 static const struct {
602 unsigned id;
603 char name[32];
604 } vendors [] = {
605 { 0x1002, "ATI" },
606 { 0x10DE, "NVIDIA" },
607 { 0x8086, "Intel" },
608 { 0x5333, "S3 Graphics" },
609 { 0, "" }
611 D3DADAPTER_IDENTIFIER9 *id = &va->d3dai;
613 const char *vendor = "Unknown";
614 for (int i = 0; vendors[i].id != 0; i++) {
615 if (vendors[i].id == id->VendorId) {
616 vendor = vendors[i].name;
617 break;
621 char *description;
622 if (asprintf(&description, "DXVA2 (%.*s, vendor %d(%s), device %d, revision %d)",
623 sizeof(id->Description), id->Description,
624 id->VendorId, vendor, id->DeviceId, id->Revision) < 0)
625 return NULL;
626 return description;
630 * It creates a Direct3D device manager
632 static int D3dCreateDeviceManager(vlc_va_dxva2_t *va)
634 HRESULT (WINAPI *CreateDeviceManager9)(UINT *pResetToken,
635 IDirect3DDeviceManager9 **);
636 CreateDeviceManager9 =
637 (void *)GetProcAddress(va->hdxva2_dll,
638 TEXT("DXVA2CreateDirect3DDeviceManager9"));
640 if (!CreateDeviceManager9) {
641 msg_Err(va->log, "cannot load function\n");
642 return VLC_EGENERIC;
644 msg_Dbg(va->log, "OurDirect3DCreateDeviceManager9 Success!");
646 UINT token;
647 IDirect3DDeviceManager9 *devmng;
648 if (FAILED(CreateDeviceManager9(&token, &devmng))) {
649 msg_Err(va->log, " OurDirect3DCreateDeviceManager9 failed");
650 return VLC_EGENERIC;
652 va->token = token;
653 va->devmng = devmng;
654 msg_Info(va->log, "obtained IDirect3DDeviceManager9");
656 HRESULT hr = IDirect3DDeviceManager9_ResetDevice(devmng, va->d3ddev, token);
657 if (FAILED(hr)) {
658 msg_Err(va->log, "IDirect3DDeviceManager9_ResetDevice failed: %08x", (unsigned)hr);
659 return VLC_EGENERIC;
661 return VLC_SUCCESS;
664 * It destroys a Direct3D device manager
666 static void D3dDestroyDeviceManager(vlc_va_dxva2_t *va)
668 if (va->devmng)
669 IDirect3DDeviceManager9_Release(va->devmng);
673 * It creates a DirectX video service
675 static int DxCreateVideoService(vlc_va_dxva2_t *va)
677 HRESULT (WINAPI *CreateVideoService)(IDirect3DDevice9 *,
678 REFIID riid,
679 void **ppService);
680 CreateVideoService =
681 (void *)GetProcAddress(va->hdxva2_dll,
682 TEXT("DXVA2CreateVideoService"));
684 if (!CreateVideoService) {
685 msg_Err(va->log, "cannot load function\n");
686 return 4;
688 msg_Info(va->log, "DXVA2CreateVideoService Success!");
690 HRESULT hr;
692 HANDLE device;
693 hr = IDirect3DDeviceManager9_OpenDeviceHandle(va->devmng, &device);
694 if (FAILED(hr)) {
695 msg_Err(va->log, "OpenDeviceHandle failed");
696 return VLC_EGENERIC;
698 va->device = device;
700 IDirectXVideoDecoderService *vs;
701 hr = IDirect3DDeviceManager9_GetVideoService(va->devmng, device,
702 &IID_IDirectXVideoDecoderService,
703 &vs);
704 if (FAILED(hr)) {
705 msg_Err(va->log, "GetVideoService failed");
706 return VLC_EGENERIC;
708 va->vs = vs;
710 return VLC_SUCCESS;
713 * It destroys a DirectX video service
715 static void DxDestroyVideoService(vlc_va_dxva2_t *va)
717 if (va->device)
718 IDirect3DDeviceManager9_CloseDeviceHandle(va->devmng, va->device);
719 if (va->vs)
720 IDirectXVideoDecoderService_Release(va->vs);
723 * Find the best suited decoder mode GUID and render format.
725 static int DxFindVideoServiceConversion(vlc_va_dxva2_t *va, GUID *input, D3DFORMAT *output)
727 /* Retreive supported modes from the decoder service */
728 UINT input_count = 0;
729 GUID *input_list = NULL;
730 if (FAILED(IDirectXVideoDecoderService_GetDecoderDeviceGuids(va->vs,
731 &input_count,
732 &input_list))) {
733 msg_Err(va->log, "IDirectXVideoDecoderService_GetDecoderDeviceGuids failed");
734 return VLC_EGENERIC;
736 for (unsigned i = 0; i < input_count; i++) {
737 const GUID *g = &input_list[i];
738 const dxva2_mode_t *mode = Dxva2FindMode(g);
739 if (mode) {
740 msg_Dbg(va->log, "- '%s' is supported by hardware", mode->name);
741 } else {
742 msg_Warn(va->log, "- Unknown GUID = %08X-%04x-%04x-XXXX",
743 (unsigned)g->Data1, g->Data2, g->Data3);
747 /* Try all supported mode by our priority */
748 for (unsigned i = 0; dxva2_modes[i].name; i++) {
749 const dxva2_mode_t *mode = &dxva2_modes[i];
750 if (!mode->codec || mode->codec != va->codec_id)
751 continue;
753 /* */
754 bool is_suported = false;
755 for (const GUID *g = &input_list[0]; !is_suported && g < &input_list[input_count]; g++) {
756 is_suported = IsEqualGUID(mode->guid, g);
758 if (!is_suported)
759 continue;
761 /* */
762 msg_Dbg(va->log, "Trying to use '%s' as input", mode->name);
763 UINT output_count = 0;
764 D3DFORMAT *output_list = NULL;
765 if (FAILED(IDirectXVideoDecoderService_GetDecoderRenderTargets(va->vs, mode->guid,
766 &output_count,
767 &output_list))) {
768 msg_Err(va->log, "IDirectXVideoDecoderService_GetDecoderRenderTargets failed");
769 continue;
771 for (unsigned j = 0; j < output_count; j++) {
772 const D3DFORMAT f = output_list[j];
773 const d3d_format_t *format = D3dFindFormat(f);
774 if (format) {
775 msg_Dbg(va->log, "%s is supported for output", format->name);
776 } else {
777 msg_Dbg(va->log, "%d is supported for output (%4.4s)", f, (const char*)&f);
781 /* */
782 for (unsigned j = 0; d3d_formats[j].name; j++) {
783 const d3d_format_t *format = &d3d_formats[j];
785 /* */
786 bool is_suported = false;
787 for (unsigned k = 0; !is_suported && k < output_count; k++) {
788 is_suported = format->format == output_list[k];
790 if (!is_suported)
791 continue;
793 /* We have our solution */
794 msg_Dbg(va->log, "Using '%s' to decode to '%s'", mode->name, format->name);
795 *input = *mode->guid;
796 *output = format->format;
797 CoTaskMemFree(output_list);
798 CoTaskMemFree(input_list);
799 return VLC_SUCCESS;
801 CoTaskMemFree(output_list);
803 CoTaskMemFree(input_list);
804 return VLC_EGENERIC;
808 * It creates a DXVA2 decoder using the given video format
810 static int DxCreateVideoDecoder(vlc_va_dxva2_t *va,
811 int codec_id, const video_format_t *fmt)
813 /* */
814 msg_Dbg(va->log, "DxCreateVideoDecoder id %d %dx%d",
815 codec_id, fmt->i_width, fmt->i_height);
817 /* Allocates all surfaces needed for the decoder */
818 switch (codec_id) {
819 case CODEC_ID_H264:
820 va->surface_count = 16 + 1;
821 break;
822 default:
823 va->surface_count = 2 + 1;
824 break;
826 LPDIRECT3DSURFACE9 surface_list[VA_DXVA2_MAX_SURFACE_COUNT];
827 if (FAILED(IDirectXVideoDecoderService_CreateSurface(va->vs,
828 fmt->i_width,
829 fmt->i_height,
830 va->surface_count - 1,
831 va->render,
832 D3DPOOL_DEFAULT,
834 DXVA2_VideoDecoderRenderTarget,
835 surface_list,
836 NULL))) {
837 msg_Err(va->log, "IDirectXVideoAccelerationService_CreateSurface failed\n");
838 va->surface_count = 0;
839 return VLC_EGENERIC;
841 for (unsigned i = 0; i < va->surface_count; i++) {
842 vlc_va_surface_t *surface = &va->surface[i];
843 surface->d3d = surface_list[i];
844 surface->refcount = 0;
845 surface->order = 0;
847 va->surface_width = fmt->i_width;
848 va->surface_height = fmt->i_height;
849 msg_Dbg(va->log, "IDirectXVideoAccelerationService_CreateSurface succeed with %d surfaces (%dx%d)",
850 va->surface_count, fmt->i_width, fmt->i_height);
852 /* */
853 DXVA2_VideoDesc dsc;
854 ZeroMemory(&dsc, sizeof(dsc));
855 dsc.SampleWidth = fmt->i_width;
856 dsc.SampleHeight = fmt->i_height;
857 dsc.Format = va->render;
858 if (fmt->i_frame_rate > 0 && fmt->i_frame_rate_base > 0) {
859 dsc.InputSampleFreq.Numerator = fmt->i_frame_rate;
860 dsc.InputSampleFreq.Denominator = fmt->i_frame_rate_base;
861 } else {
862 dsc.InputSampleFreq.Numerator = 0;
863 dsc.InputSampleFreq.Denominator = 0;
865 dsc.OutputFrameFreq = dsc.InputSampleFreq;
866 dsc.UABProtectionLevel = FALSE;
867 dsc.Reserved = 0;
869 /* FIXME I am unsure we can let unknown everywhere */
870 DXVA2_ExtendedFormat *ext = &dsc.SampleFormat;
871 ext->SampleFormat = 0;//DXVA2_SampleUnknown;
872 ext->VideoChromaSubsampling = 0;//DXVA2_VideoChromaSubsampling_Unknown;
873 ext->NominalRange = 0;//DXVA2_NominalRange_Unknown;
874 ext->VideoTransferMatrix = 0;//DXVA2_VideoTransferMatrix_Unknown;
875 ext->VideoLighting = 0;//DXVA2_VideoLighting_Unknown;
876 ext->VideoPrimaries = 0;//DXVA2_VideoPrimaries_Unknown;
877 ext->VideoTransferFunction = 0;//DXVA2_VideoTransFunc_Unknown;
879 /* List all configurations available for the decoder */
880 UINT cfg_count = 0;
881 DXVA2_ConfigPictureDecode *cfg_list = NULL;
882 if (FAILED(IDirectXVideoDecoderService_GetDecoderConfigurations(va->vs,
883 &va->input,
884 &dsc,
885 NULL,
886 &cfg_count,
887 &cfg_list))) {
888 msg_Err(va->log, "IDirectXVideoDecoderService_GetDecoderConfigurations failed\n");
889 return VLC_EGENERIC;
891 msg_Dbg(va->log, "we got %d decoder configurations", cfg_count);
893 /* Select the best decoder configuration */
894 bool has_cfg = false;
895 for (unsigned i = 0; i < cfg_count; i++) {
896 const DXVA2_ConfigPictureDecode *cfg = &cfg_list[i];
898 /* */
899 msg_Dbg(va->log, "configuration[%d] ConfigBitstreamRaw %d",
900 i, cfg->ConfigBitstreamRaw);
902 /* */
903 if ((!has_cfg && cfg->ConfigBitstreamRaw == 1) ||
904 (codec_id == CODEC_ID_H264 && cfg->ConfigBitstreamRaw == 2)) {
905 va->cfg = *cfg;
906 has_cfg = true;
909 CoTaskMemFree(cfg_list);
910 if (!has_cfg) {
911 msg_Err(va->log, "Failed to find a supported decoder configuration");
912 return VLC_EGENERIC;
915 /* Create the decoder */
916 IDirectXVideoDecoder *decoder;
917 if (FAILED(IDirectXVideoDecoderService_CreateVideoDecoder(va->vs,
918 &va->input,
919 &dsc,
920 &va->cfg,
921 surface_list,
922 va->surface_count,
923 &decoder))) {
924 msg_Err(va->log, "IDirectXVideoDecoderService_CreateVideoDecoder failed\n");
925 return VLC_EGENERIC;
927 va->decoder = decoder;
928 msg_Dbg(va->log, "IDirectXVideoDecoderService_CreateVideoDecoder succeed");
929 return VLC_SUCCESS;
931 static void DxDestroyVideoDecoder(vlc_va_dxva2_t *va)
933 if (va->decoder)
934 IDirectXVideoDecoder_Release(va->decoder);
935 va->decoder = NULL;
937 for (unsigned i = 0; i < va->surface_count; i++)
938 IDirect3DSurface9_Release(va->surface[i].d3d);
939 va->surface_count = 0;
941 static int DxResetVideoDecoder(vlc_va_dxva2_t *va)
943 msg_Err(va->log, "DxResetVideoDecoder unimplemented");
944 return VLC_EGENERIC;
947 static void DxCreateVideoConversion(vlc_va_dxva2_t *va)
949 switch (va->render) {
950 case MAKEFOURCC('N','V','1','2'):
951 va->output = MAKEFOURCC('Y','V','1','2');
952 break;
953 default:
954 va->output = va->render;
955 break;
957 va->surface_cache_size = __MAX((va->surface_width + 0x0f) & ~ 0x0f, 4096);
958 va->surface_cache_base = malloc(16 + va->surface_cache_size);
959 va->surface_cache = &va->surface_cache_base[16 - ((intptr_t)va->surface_cache_base & 0x0f)];
961 static void DxDestroyVideoConversion(vlc_va_dxva2_t *va)
963 free(va->surface_cache_base);
964 va->surface_cache_base = NULL;
965 va->surface_cache = NULL;
966 va->surface_cache_size = 0;
969 /* Copy 64 bytes from srcp to dsp loading data with the SSE>=2 instruction load and
970 * storing data with the SSE>=2 instruction store.
972 #define COPY64(dstp, srcp, load, store) \
973 asm volatile ( \
974 load " 0(%[src]), %%xmm1\n" \
975 load " 16(%[src]), %%xmm2\n" \
976 load " 32(%[src]), %%xmm3\n" \
977 load " 48(%[src]), %%xmm4\n" \
978 store " %%xmm1, 0(%[dst])\n" \
979 store " %%xmm2, 16(%[dst])\n" \
980 store " %%xmm3, 32(%[dst])\n" \
981 store " %%xmm4, 48(%[dst])\n" \
982 : : [dst]"r"(dstp), [src]"r"(srcp) : "memory")
984 /* Execute the instruction op only if SSE2 is supported. */
985 #ifdef CAN_COMPILE_SSE2
986 # define ASM_SSE2(cpu, op) do { \
987 if (cpu & CPU_CAPABILITY_SSE2) \
988 asm volatile (op); \
989 } while (0)
990 #else
991 # define ASM_SSE2(cpu, op)
992 #endif
994 /* Optimized copy from "Uncacheable Speculative Write Combining" memory
995 * as used by some video surface.
996 * XXX It is really efficient only when SSE4.1 is available.
998 static void CopyFromUswc(uint8_t *dst, size_t dst_pitch,
999 const uint8_t *src, size_t src_pitch,
1000 unsigned unaligned,
1001 unsigned width, unsigned height,
1002 unsigned cpu)
1004 assert(((intptr_t)dst & 0x0f) == 0 && (dst_pitch & 0x0f) == 0);
1006 ASM_SSE2(cpu, "mfence");
1007 for (unsigned y = 0; y < height; y++) {
1008 unsigned x;
1010 for (x = 0; x < unaligned; x++)
1011 dst[x] = src[x];
1013 #ifdef CAN_COMPILE_SSE4_1
1014 if (cpu & CPU_CAPABILITY_SSE4_1) {
1015 if (!unaligned) {
1016 for (; x+63 < width; x += 64)
1017 COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
1018 } else {
1019 for (; x+63 < width; x += 64)
1020 COPY64(&dst[x], &src[x], "movntdqa", "movdqu");
1022 } else
1023 #endif
1024 #ifdef CAN_COMPILE_SSE2
1025 if (cpu & CPU_CAPABILITY_SSE2) {
1026 if (!unaligned) {
1027 for (; x+63 < width; x += 64)
1028 COPY64(&dst[x], &src[x], "movdqa", "movdqa");
1029 } else {
1030 for (; x+63 < width; x += 64)
1031 COPY64(&dst[x], &src[x], "movdqa", "movdqu");
1034 #endif
1036 for (; x < width; x++)
1037 dst[x] = src[x];
1039 src += src_pitch;
1040 dst += dst_pitch;
1044 static void Copy2d(uint8_t *dst, size_t dst_pitch,
1045 const uint8_t *src, size_t src_pitch,
1046 unsigned width, unsigned height,
1047 unsigned cpu)
1049 assert(((intptr_t)src & 0x0f) == 0 && (src_pitch & 0x0f) == 0);
1051 ASM_SSE2(cpu, "mfence");
1053 for (unsigned y = 0; y < height; y++) {
1054 unsigned x = 0;
1055 bool unaligned = ((intptr_t)dst & 0x0f) != 0;
1057 #ifdef CAN_COMPILE_SSE2
1058 if (cpu & CPU_CAPABILITY_SSE2) {
1059 if (!unaligned) {
1060 for (; x+63 < width; x += 64)
1061 COPY64(&dst[x], &src[x], "movdqa", "movntdq");
1062 } else {
1063 for (; x+63 < width; x += 64)
1064 COPY64(&dst[x], &src[x], "movdqa", "movdqu");
1067 #endif
1069 for (; x < width; x++)
1070 dst[x] = src[x];
1072 src += src_pitch;
1073 dst += dst_pitch;
1077 static void SplitUV(uint8_t *dstu, size_t dstu_pitch,
1078 uint8_t *dstv, size_t dstv_pitch,
1079 const uint8_t *src, size_t src_pitch,
1080 unsigned width, unsigned height, unsigned cpu)
1082 const uint8_t shuffle[] = { 0, 2, 4, 6, 8, 10, 12, 14,
1083 1, 3, 5, 7, 9, 11, 13, 15 };
1084 const uint8_t mask[] = { 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00,
1085 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00 };
1087 assert(((intptr_t)src & 0x0f) == 0 && (src_pitch & 0x0f) == 0);
1089 ASM_SSE2(cpu, "mfence");
1091 for (unsigned y = 0; y < height; y++) {
1092 unsigned x = 0;
1094 #define LOAD64 \
1095 "movdqa 0(%[src]), %%xmm0\n" \
1096 "movdqa 16(%[src]), %%xmm1\n" \
1097 "movdqa 32(%[src]), %%xmm2\n" \
1098 "movdqa 48(%[src]), %%xmm3\n"
1100 #define STORE2X32 \
1101 "movq %%xmm0, 0(%[dst1])\n" \
1102 "movq %%xmm1, 8(%[dst1])\n" \
1103 "movhpd %%xmm0, 0(%[dst2])\n" \
1104 "movhpd %%xmm1, 8(%[dst2])\n" \
1105 "movq %%xmm2, 16(%[dst1])\n" \
1106 "movq %%xmm3, 24(%[dst1])\n" \
1107 "movhpd %%xmm2, 16(%[dst2])\n" \
1108 "movhpd %%xmm3, 24(%[dst2])\n"
1110 #ifdef CAN_COMPILE_SSSE3
1111 if (cpu & CPU_CAPABILITY_SSSE3) {
1112 for (x = 0; x < (width & ~31); x += 32) {
1113 asm volatile (
1114 "movdqu (%[shuffle]), %%xmm7\n"
1115 LOAD64
1116 "pshufb %%xmm7, %%xmm0\n"
1117 "pshufb %%xmm7, %%xmm1\n"
1118 "pshufb %%xmm7, %%xmm2\n"
1119 "pshufb %%xmm7, %%xmm3\n"
1120 STORE2X32
1121 : : [dst1]"r"(&dstu[x]), [dst2]"r"(&dstv[x]), [src]"r"(&src[2*x]), [shuffle]"r"(shuffle) : "memory");
1123 } else
1124 #endif
1125 #ifdef CAN_COMPILE_SSE2
1126 if (cpu & CPU_CAPABILITY_SSE2) {
1127 for (x = 0; x < (width & ~31); x += 32) {
1128 asm volatile (
1129 "movdqu (%[mask]), %%xmm7\n"
1130 LOAD64
1131 "movdqa %%xmm0, %%xmm4\n"
1132 "movdqa %%xmm1, %%xmm5\n"
1133 "movdqa %%xmm2, %%xmm6\n"
1134 "psrlw $8, %%xmm0\n"
1135 "psrlw $8, %%xmm1\n"
1136 "pand %%xmm7, %%xmm4\n"
1137 "pand %%xmm7, %%xmm5\n"
1138 "pand %%xmm7, %%xmm6\n"
1139 "packuswb %%xmm4, %%xmm0\n"
1140 "packuswb %%xmm5, %%xmm1\n"
1141 "pand %%xmm3, %%xmm7\n"
1142 "psrlw $8, %%xmm2\n"
1143 "psrlw $8, %%xmm3\n"
1144 "packuswb %%xmm6, %%xmm2\n"
1145 "packuswb %%xmm7, %%xmm3\n"
1146 STORE2X32
1147 : : [dst2]"r"(&dstu[x]), [dst1]"r"(&dstv[x]), [src]"r"(&src[2*x]), [mask]"r"(mask) : "memory");
1150 #endif
1151 #undef STORE2X32
1152 #undef LOAD64
1154 for (; x < width; x++) {
1155 dstu[x] = src[2*x+0];
1156 dstv[x] = src[2*x+1];
1158 src += src_pitch;
1159 dstu += dstu_pitch;
1160 dstv += dstv_pitch;
1164 static void CopyPlane(uint8_t *dst, size_t dst_pitch, const uint8_t *src, size_t src_pitch,
1165 uint8_t *cache, size_t cache_size,
1166 unsigned width, unsigned height,
1167 unsigned cpu)
1169 const unsigned w16 = (width+15) & ~15;
1170 const unsigned hstep = cache_size / w16;
1171 assert(hstep > 0);
1173 for (unsigned y = 0; y < height; y += hstep) {
1174 const unsigned unaligned = (intptr_t)src & 0x0f;
1175 const unsigned hblock = __MIN(hstep, height - y);
1177 /* Copy a bunch of line into our cache */
1178 CopyFromUswc(cache, w16,
1179 src, src_pitch,
1180 unaligned,
1181 width, hblock, cpu);
1183 /* Copy from our cache to the destination */
1184 Copy2d(dst, dst_pitch,
1185 cache, w16,
1186 width, hblock, cpu);
1188 /* */
1189 src += src_pitch * hblock;
1190 dst += dst_pitch * hblock;
1193 ASM_SSE2(cpu, "mfence");
1195 static void SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
1196 uint8_t *dstv, size_t dstv_pitch,
1197 const uint8_t *src, size_t src_pitch,
1198 uint8_t *cache, size_t cache_size,
1199 unsigned width, unsigned height,
1200 unsigned cpu)
1202 const unsigned w2_16 = (2*width+15) & ~15;
1203 const unsigned hstep = cache_size / w2_16;
1204 assert(hstep > 0);
1206 for (unsigned y = 0; y < height; y += hstep) {
1207 const unsigned unaligned = (intptr_t)src & 0x0f;
1208 const unsigned hblock = __MIN(hstep, height - y);
1210 /* Copy a bunch of line into our cache */
1211 CopyFromUswc(cache, w2_16,
1212 src, src_pitch,
1213 unaligned,
1214 2*width, hblock, cpu);
1216 /* Copy from our cache to the destination */
1217 SplitUV(dstu, dstu_pitch,
1218 dstv, dstv_pitch,
1219 cache, w2_16,
1220 width, hblock, cpu);
1222 /* */
1223 src += src_pitch * hblock;
1224 dstu += dstu_pitch * hblock;
1225 dstv += dstv_pitch * hblock;
1228 ASM_SSE2(cpu, "mfence");
1231 static void CopyFromNv12(picture_t *dst, const D3DLOCKED_RECT *src,
1232 uint8_t *cache, size_t cache_size,
1233 unsigned width, unsigned height)
1235 const unsigned cpu = vlc_CPU();
1237 /* */
1238 CopyPlane(dst->p[0].p_pixels, dst->p[0].i_pitch,
1239 src->pBits, src->Pitch,
1240 cache, cache_size,
1241 width, height, cpu);
1242 SplitPlanes(dst->p[2].p_pixels, dst->p[2].i_pitch,
1243 dst->p[1].p_pixels, dst->p[1].i_pitch,
1244 (const uint8_t*)src->pBits + src->Pitch * height, src->Pitch,
1245 cache, cache_size,
1246 width/2, height/2, cpu);
1248 ASM_SSE2(cpu, "emms");
1250 static void CopyFromYv12(picture_t *dst, const D3DLOCKED_RECT *src,
1251 uint8_t *cache, size_t cache_size,
1252 unsigned width, unsigned height)
1254 const unsigned cpu = vlc_CPU();
1256 /* */
1257 for (unsigned n = 0, offset = 0; n < 3; n++) {
1258 const unsigned d = n > 0 ? 2 : 1;
1259 CopyPlane(dst->p[n].p_pixels, dst->p[n].i_pitch,
1260 (const uint8_t*)src->pBits + offset, src->Pitch/d,
1261 cache, cache_size,
1262 width/d, height/d, cpu);
1263 offset += (src->Pitch/d) * (height/d);
1265 ASM_SSE2(cpu, "emms");
1267 #undef ASM_SSE2
1268 #undef COPY64
1270 #else
1271 vlc_va_t *vlc_va_NewDxva2(vlc_object_t *log, int codec_id)
1273 (void)log;
1274 (void)codec_id;
1275 return NULL;
1277 #endif