2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
9 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
31 #include "wined3d_private.h"
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
35 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
37 static void vshader_set_limits(IWineD3DVertexShaderImpl
*This
)
39 This
->baseShader
.limits
.texcoord
= 0;
40 This
->baseShader
.limits
.attributes
= 16;
41 This
->baseShader
.limits
.packed_input
= 0;
43 switch (This
->baseShader
.reg_maps
.shader_version
)
45 case WINED3DVS_VERSION(1,0):
46 case WINED3DVS_VERSION(1,1):
47 This
->baseShader
.limits
.temporary
= 12;
48 This
->baseShader
.limits
.constant_bool
= 0;
49 This
->baseShader
.limits
.constant_int
= 0;
50 This
->baseShader
.limits
.address
= 1;
51 This
->baseShader
.limits
.packed_output
= 0;
52 This
->baseShader
.limits
.sampler
= 0;
53 This
->baseShader
.limits
.label
= 0;
54 /* TODO: vs_1_1 has a minimum of 96 constants. What happens if a vs_1_1 shader is used
55 * on a vs_3_0 capable card that has 256 constants? */
56 This
->baseShader
.limits
.constant_float
= min(256, GL_LIMITS(vshader_constantsF
));
59 case WINED3DVS_VERSION(2,0):
60 case WINED3DVS_VERSION(2,1):
61 This
->baseShader
.limits
.temporary
= 12;
62 This
->baseShader
.limits
.constant_bool
= 16;
63 This
->baseShader
.limits
.constant_int
= 16;
64 This
->baseShader
.limits
.address
= 1;
65 This
->baseShader
.limits
.packed_output
= 0;
66 This
->baseShader
.limits
.sampler
= 0;
67 This
->baseShader
.limits
.label
= 16;
68 This
->baseShader
.limits
.constant_float
= min(256, GL_LIMITS(vshader_constantsF
));
71 case WINED3DVS_VERSION(3,0):
72 This
->baseShader
.limits
.temporary
= 32;
73 This
->baseShader
.limits
.constant_bool
= 32;
74 This
->baseShader
.limits
.constant_int
= 32;
75 This
->baseShader
.limits
.address
= 1;
76 This
->baseShader
.limits
.packed_output
= 12;
77 This
->baseShader
.limits
.sampler
= 4;
78 This
->baseShader
.limits
.label
= 16; /* FIXME: 2048 */
79 /* DX10 cards on Windows advertise a d3d9 constant limit of 256 even though they are capable
80 * of supporting much more(GL drivers advertise 1024). d3d9.dll and d3d8.dll clamp the
81 * wined3d-advertised maximum. Clamp the constant limit for <= 3.0 shaders to 256.s
82 * use constant buffers */
83 This
->baseShader
.limits
.constant_float
= min(256, GL_LIMITS(vshader_constantsF
));
87 This
->baseShader
.limits
.temporary
= 12;
88 This
->baseShader
.limits
.constant_bool
= 16;
89 This
->baseShader
.limits
.constant_int
= 16;
90 This
->baseShader
.limits
.address
= 1;
91 This
->baseShader
.limits
.packed_output
= 0;
92 This
->baseShader
.limits
.sampler
= 0;
93 This
->baseShader
.limits
.label
= 16;
94 This
->baseShader
.limits
.constant_float
= min(256, GL_LIMITS(vshader_constantsF
));
95 FIXME("Unrecognized vertex shader version %#x\n",
96 This
->baseShader
.reg_maps
.shader_version
);
100 /* This is an internal function,
101 * used to create fake semantics for shaders
102 * that don't have them - d3d8 shaders where the declaration
103 * stores the register for each input
105 static void vshader_set_input(
106 IWineD3DVertexShaderImpl
* This
,
108 BYTE usage
, BYTE usage_idx
) {
110 This
->semantics_in
[regnum
].usage
= usage
;
111 This
->semantics_in
[regnum
].usage_idx
= usage_idx
;
112 This
->semantics_in
[regnum
].reg
.register_type
= WINED3DSPR_INPUT
;
113 This
->semantics_in
[regnum
].reg
.register_idx
= regnum
;
114 This
->semantics_in
[regnum
].reg
.write_mask
= WINED3DSP_WRITEMASK_ALL
;
115 This
->semantics_in
[regnum
].reg
.modifiers
= 0;
116 This
->semantics_in
[regnum
].reg
.shift
= 0;
117 This
->semantics_in
[regnum
].reg
.rel_addr
= NULL
;
120 static BOOL
match_usage(BYTE usage1
, BYTE usage_idx1
, BYTE usage2
, BYTE usage_idx2
) {
121 if (usage_idx1
!= usage_idx2
) return FALSE
;
122 if (usage1
== usage2
) return TRUE
;
123 if (usage1
== WINED3DDECLUSAGE_POSITION
&& usage2
== WINED3DDECLUSAGE_POSITIONT
) return TRUE
;
124 if (usage2
== WINED3DDECLUSAGE_POSITION
&& usage1
== WINED3DDECLUSAGE_POSITIONT
) return TRUE
;
129 BOOL
vshader_get_input(
130 IWineD3DVertexShader
* iface
,
131 BYTE usage_req
, BYTE usage_idx_req
,
132 unsigned int* regnum
) {
134 IWineD3DVertexShaderImpl
* This
= (IWineD3DVertexShaderImpl
*) iface
;
137 for (i
= 0; i
< MAX_ATTRIBS
; i
++) {
138 if (!This
->baseShader
.reg_maps
.attributes
[i
]) continue;
140 if (match_usage(This
->semantics_in
[i
].usage
,
141 This
->semantics_in
[i
].usage_idx
, usage_req
, usage_idx_req
))
150 /* *******************************************
151 IWineD3DVertexShader IUnknown parts follow
152 ******************************************* */
153 static HRESULT WINAPI
IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader
*iface
, REFIID riid
, LPVOID
*ppobj
) {
154 TRACE("iface %p, riid %s, ppobj %p\n", iface
, debugstr_guid(riid
), ppobj
);
156 if (IsEqualGUID(riid
, &IID_IWineD3DVertexShader
)
157 || IsEqualGUID(riid
, &IID_IWineD3DBaseShader
)
158 || IsEqualGUID(riid
, &IID_IWineD3DBase
)
159 || IsEqualGUID(riid
, &IID_IUnknown
))
161 IUnknown_AddRef(iface
);
166 WARN("%s not implemented, returning E_NOINTERFACE\n", debugstr_guid(riid
));
169 return E_NOINTERFACE
;
172 static ULONG WINAPI
IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader
*iface
) {
173 IWineD3DVertexShaderImpl
*This
= (IWineD3DVertexShaderImpl
*)iface
;
174 ULONG refcount
= InterlockedIncrement(&This
->baseShader
.ref
);
176 TRACE("%p increasing refcount to %u\n", This
, refcount
);
181 static ULONG WINAPI
IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader
*iface
) {
182 IWineD3DVertexShaderImpl
*This
= (IWineD3DVertexShaderImpl
*)iface
;
183 ULONG refcount
= InterlockedDecrement(&This
->baseShader
.ref
);
185 TRACE("%p decreasing refcount to %u\n", This
, refcount
);
189 shader_cleanup((IWineD3DBaseShader
*)iface
);
190 HeapFree(GetProcessHeap(), 0, This
);
196 /* *******************************************
197 IWineD3DVertexShader IWineD3DVertexShader parts follow
198 ******************************************* */
200 static HRESULT WINAPI
IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader
*iface
, IUnknown
** parent
){
201 IWineD3DVertexShaderImpl
*This
= (IWineD3DVertexShaderImpl
*)iface
;
203 *parent
= This
->parent
;
204 IUnknown_AddRef(*parent
);
205 TRACE("(%p) : returning %p\n", This
, *parent
);
209 static HRESULT WINAPI
IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader
* iface
, IWineD3DDevice
**pDevice
){
210 IWineD3DVertexShaderImpl
*This
= (IWineD3DVertexShaderImpl
*)iface
;
211 IWineD3DDevice_AddRef(This
->baseShader
.device
);
212 *pDevice
= This
->baseShader
.device
;
213 TRACE("(%p) returning %p\n", This
, *pDevice
);
217 static HRESULT WINAPI
IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader
* impl
, VOID
* pData
, UINT
* pSizeOfData
) {
218 IWineD3DVertexShaderImpl
*This
= (IWineD3DVertexShaderImpl
*)impl
;
219 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This
, pData
, pSizeOfData
);
222 *pSizeOfData
= This
->baseShader
.functionLength
;
225 if (*pSizeOfData
< This
->baseShader
.functionLength
) {
226 /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
227 * than the required size we should write the required size and
228 * return D3DERR_MOREDATA. That's not actually true. */
229 return WINED3DERR_INVALIDCALL
;
232 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
233 memcpy(pData
, This
->baseShader
.function
, This
->baseShader
.functionLength
);
238 /* Note that for vertex shaders CompileShader isn't called until the
239 * shader is first used. The reason for this is that we need the vertex
240 * declaration the shader will be used with in order to determine if
241 * the data in a register is of type D3DCOLOR, and needs swizzling. */
242 static HRESULT WINAPI
IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader
*iface
, CONST DWORD
*pFunction
) {
244 IWineD3DVertexShaderImpl
*This
=(IWineD3DVertexShaderImpl
*)iface
;
245 IWineD3DDeviceImpl
*deviceImpl
= (IWineD3DDeviceImpl
*) This
->baseShader
.device
;
246 const struct wined3d_shader_frontend
*fe
;
248 shader_reg_maps
*reg_maps
= &This
->baseShader
.reg_maps
;
250 TRACE("(%p) : pFunction %p\n", iface
, pFunction
);
252 fe
= shader_select_frontend(*pFunction
);
255 FIXME("Unable to find frontend for shader.\n");
256 return WINED3DERR_INVALIDCALL
;
258 This
->baseShader
.frontend
= fe
;
259 This
->baseShader
.frontend_data
= fe
->shader_init(pFunction
);
260 if (!This
->baseShader
.frontend_data
)
262 FIXME("Failed to initialize frontend.\n");
263 return WINED3DERR_INVALIDCALL
;
266 /* First pass: trace shader */
267 if (TRACE_ON(d3d_shader
)) shader_trace_init(fe
, This
->baseShader
.frontend_data
, pFunction
);
269 /* Initialize immediate constant lists */
270 list_init(&This
->baseShader
.constantsF
);
271 list_init(&This
->baseShader
.constantsB
);
272 list_init(&This
->baseShader
.constantsI
);
274 /* Second pass: figure out registers used, semantics, etc.. */
275 This
->min_rel_offset
= GL_LIMITS(vshader_constantsF
);
276 This
->max_rel_offset
= 0;
277 hr
= shader_get_registers_used((IWineD3DBaseShader
*) This
, fe
,
278 reg_maps
, This
->semantics_in
, This
->semantics_out
, pFunction
);
279 if (hr
!= WINED3D_OK
) return hr
;
281 vshader_set_limits(This
);
283 This
->baseShader
.shader_mode
= deviceImpl
->vs_selected_mode
;
285 if(deviceImpl
->vs_selected_mode
== SHADER_ARB
&&
286 (GLINFO_LOCATION
).arb_vs_offset_limit
&&
287 This
->min_rel_offset
<= This
->max_rel_offset
) {
289 if(This
->max_rel_offset
- This
->min_rel_offset
> 127) {
290 FIXME("The difference between the minimum and maximum relative offset is > 127\n");
291 FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
292 FIXME("Min: %d, Max: %d\n", This
->min_rel_offset
, This
->max_rel_offset
);
293 } else if(This
->max_rel_offset
- This
->min_rel_offset
> 63) {
294 This
->rel_offset
= This
->min_rel_offset
+ 63;
295 } else if(This
->max_rel_offset
> 63) {
296 This
->rel_offset
= This
->min_rel_offset
;
298 This
->rel_offset
= 0;
301 This
->baseShader
.load_local_constsF
= This
->baseShader
.reg_maps
.usesrelconstF
&& !list_empty(&This
->baseShader
.constantsF
);
303 /* copy the function ... because it will certainly be released by application */
304 This
->baseShader
.function
= HeapAlloc(GetProcessHeap(), 0, This
->baseShader
.functionLength
);
305 if (!This
->baseShader
.function
) return E_OUTOFMEMORY
;
306 memcpy(This
->baseShader
.function
, pFunction
, This
->baseShader
.functionLength
);
311 /* Preload semantics for d3d8 shaders */
312 static void WINAPI
IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader
*iface
, IWineD3DVertexDeclaration
*vertex_declaration
) {
313 IWineD3DVertexShaderImpl
*This
=(IWineD3DVertexShaderImpl
*)iface
;
314 IWineD3DVertexDeclarationImpl
* vdecl
= (IWineD3DVertexDeclarationImpl
*)vertex_declaration
;
317 for (i
= 0; i
< vdecl
->element_count
; ++i
)
319 const struct wined3d_vertex_declaration_element
*e
= &vdecl
->elements
[i
];
320 vshader_set_input(This
, e
->output_slot
, e
->usage
, e
->usage_idx
);
324 /* Set local constants for d3d8 shaders */
325 static HRESULT WINAPI
IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader
*iface
,
326 UINT start_idx
, const float *src_data
, UINT count
) {
327 IWineD3DVertexShaderImpl
*This
=(IWineD3DVertexShaderImpl
*)iface
;
330 TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This
, start_idx
, src_data
, count
);
332 end_idx
= start_idx
+ count
;
333 if (end_idx
> GL_LIMITS(vshader_constantsF
)) {
334 WARN("end_idx %u > float constants limit %u\n", end_idx
, GL_LIMITS(vshader_constantsF
));
335 end_idx
= GL_LIMITS(vshader_constantsF
);
338 for (i
= start_idx
; i
< end_idx
; ++i
) {
339 local_constant
* lconst
= HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant
));
340 if (!lconst
) return E_OUTOFMEMORY
;
343 memcpy(lconst
->value
, src_data
+ (i
- start_idx
) * 4 /* 4 components */, 4 * sizeof(float));
344 list_add_head(&This
->baseShader
.constantsF
, &lconst
->entry
);
350 static GLuint
vertexshader_compile(IWineD3DVertexShaderImpl
*This
, const struct vs_compile_args
*args
) {
351 IWineD3DDeviceImpl
*deviceImpl
= (IWineD3DDeviceImpl
*) This
->baseShader
.device
;
352 SHADER_BUFFER buffer
;
355 /* Generate the HW shader */
356 TRACE("(%p) : Generating hardware program\n", This
);
357 shader_buffer_init(&buffer
);
358 This
->cur_args
= args
;
359 ret
= deviceImpl
->shader_backend
->shader_generate_vshader((IWineD3DVertexShader
*)This
, &buffer
, args
);
360 This
->cur_args
= NULL
;
361 shader_buffer_free(&buffer
);
366 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl
=
368 /*** IUnknown methods ***/
369 IWineD3DVertexShaderImpl_QueryInterface
,
370 IWineD3DVertexShaderImpl_AddRef
,
371 IWineD3DVertexShaderImpl_Release
,
372 /*** IWineD3DBase methods ***/
373 IWineD3DVertexShaderImpl_GetParent
,
374 /*** IWineD3DBaseShader methods ***/
375 IWineD3DVertexShaderImpl_SetFunction
,
376 /*** IWineD3DVertexShader methods ***/
377 IWineD3DVertexShaderImpl_GetDevice
,
378 IWineD3DVertexShaderImpl_GetFunction
,
379 IWineD3DVertexShaderImpl_FakeSemantics
,
380 IWIneD3DVertexShaderImpl_SetLocalConstantsF
383 void find_vs_compile_args(IWineD3DVertexShaderImpl
*shader
, IWineD3DStateBlockImpl
*stateblock
, struct vs_compile_args
*args
) {
384 args
->fog_src
= stateblock
->renderState
[WINED3DRS_FOGTABLEMODE
] == WINED3DFOG_NONE
? VS_FOG_COORD
: VS_FOG_Z
;
385 args
->swizzle_map
= ((IWineD3DDeviceImpl
*)shader
->baseShader
.device
)->strided_streams
.swizzle_map
;
388 static inline BOOL
vs_args_equal(const struct vs_compile_args
*stored
, const struct vs_compile_args
*new,
389 const DWORD use_map
) {
390 if((stored
->swizzle_map
& use_map
) != new->swizzle_map
) return FALSE
;
391 return stored
->fog_src
== new->fog_src
;
394 GLuint
find_gl_vshader(IWineD3DVertexShaderImpl
*shader
, const struct vs_compile_args
*args
)
397 DWORD new_size
= shader
->shader_array_size
;
398 struct vs_compiled_shader
*new_array
;
399 DWORD use_map
= ((IWineD3DDeviceImpl
*)shader
->baseShader
.device
)->strided_streams
.use_map
;
401 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
402 * so a linear search is more performant than a hashmap or a binary search
403 * (cache coherency etc)
405 for(i
= 0; i
< shader
->num_gl_shaders
; i
++) {
406 if(vs_args_equal(&shader
->gl_shaders
[i
].args
, args
, use_map
)) {
407 return shader
->gl_shaders
[i
].prgId
;
411 TRACE("No matching GL shader found, compiling a new shader\n");
413 if(shader
->shader_array_size
== shader
->num_gl_shaders
) {
414 if (shader
->num_gl_shaders
)
416 new_size
= shader
->shader_array_size
+ max(1, shader
->shader_array_size
/ 2);
417 new_array
= HeapReAlloc(GetProcessHeap(), 0, shader
->gl_shaders
,
418 new_size
* sizeof(*shader
->gl_shaders
));
420 new_array
= HeapAlloc(GetProcessHeap(), 0, sizeof(*shader
->gl_shaders
));
425 ERR("Out of memory\n");
428 shader
->gl_shaders
= new_array
;
429 shader
->shader_array_size
= new_size
;
432 shader
->gl_shaders
[shader
->num_gl_shaders
].args
= *args
;
433 shader
->gl_shaders
[shader
->num_gl_shaders
].prgId
= vertexshader_compile(shader
, args
);
434 return shader
->gl_shaders
[shader
->num_gl_shaders
++].prgId
;