2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
9 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
31 #include "wined3d_private.h"
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
35 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
37 static void vshader_set_limits(IWineD3DVertexShaderImpl
*This
)
39 DWORD shader_version
= WINED3D_SHADER_VERSION(This
->baseShader
.reg_maps
.shader_version
.major
,
40 This
->baseShader
.reg_maps
.shader_version
.minor
);
42 This
->baseShader
.limits
.texcoord
= 0;
43 This
->baseShader
.limits
.attributes
= 16;
44 This
->baseShader
.limits
.packed_input
= 0;
46 switch (shader_version
)
48 case WINED3D_SHADER_VERSION(1,0):
49 case WINED3D_SHADER_VERSION(1,1):
50 This
->baseShader
.limits
.temporary
= 12;
51 This
->baseShader
.limits
.constant_bool
= 0;
52 This
->baseShader
.limits
.constant_int
= 0;
53 This
->baseShader
.limits
.address
= 1;
54 This
->baseShader
.limits
.packed_output
= 0;
55 This
->baseShader
.limits
.sampler
= 0;
56 This
->baseShader
.limits
.label
= 0;
57 /* TODO: vs_1_1 has a minimum of 96 constants. What happens if a vs_1_1 shader is used
58 * on a vs_3_0 capable card that has 256 constants? */
59 This
->baseShader
.limits
.constant_float
= min(256, GL_LIMITS(vshader_constantsF
));
62 case WINED3D_SHADER_VERSION(2,0):
63 case WINED3D_SHADER_VERSION(2,1):
64 This
->baseShader
.limits
.temporary
= 12;
65 This
->baseShader
.limits
.constant_bool
= 16;
66 This
->baseShader
.limits
.constant_int
= 16;
67 This
->baseShader
.limits
.address
= 1;
68 This
->baseShader
.limits
.packed_output
= 0;
69 This
->baseShader
.limits
.sampler
= 0;
70 This
->baseShader
.limits
.label
= 16;
71 This
->baseShader
.limits
.constant_float
= min(256, GL_LIMITS(vshader_constantsF
));
74 case WINED3D_SHADER_VERSION(3,0):
75 This
->baseShader
.limits
.temporary
= 32;
76 This
->baseShader
.limits
.constant_bool
= 32;
77 This
->baseShader
.limits
.constant_int
= 32;
78 This
->baseShader
.limits
.address
= 1;
79 This
->baseShader
.limits
.packed_output
= 12;
80 This
->baseShader
.limits
.sampler
= 4;
81 This
->baseShader
.limits
.label
= 16; /* FIXME: 2048 */
82 /* DX10 cards on Windows advertise a d3d9 constant limit of 256 even though they are capable
83 * of supporting much more(GL drivers advertise 1024). d3d9.dll and d3d8.dll clamp the
84 * wined3d-advertised maximum. Clamp the constant limit for <= 3.0 shaders to 256.s
85 * use constant buffers */
86 This
->baseShader
.limits
.constant_float
= min(256, GL_LIMITS(vshader_constantsF
));
90 This
->baseShader
.limits
.temporary
= 12;
91 This
->baseShader
.limits
.constant_bool
= 16;
92 This
->baseShader
.limits
.constant_int
= 16;
93 This
->baseShader
.limits
.address
= 1;
94 This
->baseShader
.limits
.packed_output
= 0;
95 This
->baseShader
.limits
.sampler
= 0;
96 This
->baseShader
.limits
.label
= 16;
97 This
->baseShader
.limits
.constant_float
= min(256, GL_LIMITS(vshader_constantsF
));
98 FIXME("Unrecognized vertex shader version %u.%u\n",
99 This
->baseShader
.reg_maps
.shader_version
.major
,
100 This
->baseShader
.reg_maps
.shader_version
.minor
);
104 /* This is an internal function,
105 * used to create fake semantics for shaders
106 * that don't have them - d3d8 shaders where the declaration
107 * stores the register for each input
109 static void vshader_set_input(
110 IWineD3DVertexShaderImpl
* This
,
112 BYTE usage
, BYTE usage_idx
) {
114 This
->semantics_in
[regnum
].usage
= usage
;
115 This
->semantics_in
[regnum
].usage_idx
= usage_idx
;
116 This
->semantics_in
[regnum
].reg
.reg
.type
= WINED3DSPR_INPUT
;
117 This
->semantics_in
[regnum
].reg
.reg
.idx
= regnum
;
118 This
->semantics_in
[regnum
].reg
.write_mask
= WINED3DSP_WRITEMASK_ALL
;
119 This
->semantics_in
[regnum
].reg
.modifiers
= 0;
120 This
->semantics_in
[regnum
].reg
.shift
= 0;
121 This
->semantics_in
[regnum
].reg
.reg
.rel_addr
= NULL
;
124 static BOOL
match_usage(BYTE usage1
, BYTE usage_idx1
, BYTE usage2
, BYTE usage_idx2
) {
125 if (usage_idx1
!= usage_idx2
) return FALSE
;
126 if (usage1
== usage2
) return TRUE
;
127 if (usage1
== WINED3DDECLUSAGE_POSITION
&& usage2
== WINED3DDECLUSAGE_POSITIONT
) return TRUE
;
128 if (usage2
== WINED3DDECLUSAGE_POSITION
&& usage1
== WINED3DDECLUSAGE_POSITIONT
) return TRUE
;
133 BOOL
vshader_get_input(
134 IWineD3DVertexShader
* iface
,
135 BYTE usage_req
, BYTE usage_idx_req
,
136 unsigned int* regnum
) {
138 IWineD3DVertexShaderImpl
* This
= (IWineD3DVertexShaderImpl
*) iface
;
141 for (i
= 0; i
< MAX_ATTRIBS
; i
++) {
142 if (!This
->baseShader
.reg_maps
.attributes
[i
]) continue;
144 if (match_usage(This
->semantics_in
[i
].usage
,
145 This
->semantics_in
[i
].usage_idx
, usage_req
, usage_idx_req
))
154 /* *******************************************
155 IWineD3DVertexShader IUnknown parts follow
156 ******************************************* */
157 static HRESULT WINAPI
IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader
*iface
, REFIID riid
, LPVOID
*ppobj
) {
158 TRACE("iface %p, riid %s, ppobj %p\n", iface
, debugstr_guid(riid
), ppobj
);
160 if (IsEqualGUID(riid
, &IID_IWineD3DVertexShader
)
161 || IsEqualGUID(riid
, &IID_IWineD3DBaseShader
)
162 || IsEqualGUID(riid
, &IID_IWineD3DBase
)
163 || IsEqualGUID(riid
, &IID_IUnknown
))
165 IUnknown_AddRef(iface
);
170 WARN("%s not implemented, returning E_NOINTERFACE\n", debugstr_guid(riid
));
173 return E_NOINTERFACE
;
176 static ULONG WINAPI
IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader
*iface
) {
177 IWineD3DVertexShaderImpl
*This
= (IWineD3DVertexShaderImpl
*)iface
;
178 ULONG refcount
= InterlockedIncrement(&This
->baseShader
.ref
);
180 TRACE("%p increasing refcount to %u\n", This
, refcount
);
185 static ULONG WINAPI
IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader
*iface
) {
186 IWineD3DVertexShaderImpl
*This
= (IWineD3DVertexShaderImpl
*)iface
;
187 ULONG refcount
= InterlockedDecrement(&This
->baseShader
.ref
);
189 TRACE("%p decreasing refcount to %u\n", This
, refcount
);
193 shader_cleanup((IWineD3DBaseShader
*)iface
);
194 HeapFree(GetProcessHeap(), 0, This
);
200 /* *******************************************
201 IWineD3DVertexShader IWineD3DVertexShader parts follow
202 ******************************************* */
204 static HRESULT WINAPI
IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader
*iface
, IUnknown
** parent
){
205 IWineD3DVertexShaderImpl
*This
= (IWineD3DVertexShaderImpl
*)iface
;
207 *parent
= This
->parent
;
208 IUnknown_AddRef(*parent
);
209 TRACE("(%p) : returning %p\n", This
, *parent
);
213 static HRESULT WINAPI
IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader
* iface
, IWineD3DDevice
**pDevice
){
214 IWineD3DVertexShaderImpl
*This
= (IWineD3DVertexShaderImpl
*)iface
;
215 IWineD3DDevice_AddRef(This
->baseShader
.device
);
216 *pDevice
= This
->baseShader
.device
;
217 TRACE("(%p) returning %p\n", This
, *pDevice
);
221 static HRESULT WINAPI
IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader
* impl
, VOID
* pData
, UINT
* pSizeOfData
) {
222 IWineD3DVertexShaderImpl
*This
= (IWineD3DVertexShaderImpl
*)impl
;
223 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This
, pData
, pSizeOfData
);
226 *pSizeOfData
= This
->baseShader
.functionLength
;
229 if (*pSizeOfData
< This
->baseShader
.functionLength
) {
230 /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
231 * than the required size we should write the required size and
232 * return D3DERR_MOREDATA. That's not actually true. */
233 return WINED3DERR_INVALIDCALL
;
236 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
237 memcpy(pData
, This
->baseShader
.function
, This
->baseShader
.functionLength
);
242 /* Note that for vertex shaders CompileShader isn't called until the
243 * shader is first used. The reason for this is that we need the vertex
244 * declaration the shader will be used with in order to determine if
245 * the data in a register is of type D3DCOLOR, and needs swizzling. */
246 static HRESULT WINAPI
IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader
*iface
, CONST DWORD
*pFunction
) {
248 IWineD3DVertexShaderImpl
*This
=(IWineD3DVertexShaderImpl
*)iface
;
249 IWineD3DDeviceImpl
*deviceImpl
= (IWineD3DDeviceImpl
*) This
->baseShader
.device
;
250 const struct wined3d_shader_frontend
*fe
;
252 shader_reg_maps
*reg_maps
= &This
->baseShader
.reg_maps
;
254 TRACE("(%p) : pFunction %p\n", iface
, pFunction
);
256 fe
= shader_select_frontend(*pFunction
);
259 FIXME("Unable to find frontend for shader.\n");
260 return WINED3DERR_INVALIDCALL
;
262 This
->baseShader
.frontend
= fe
;
263 This
->baseShader
.frontend_data
= fe
->shader_init(pFunction
);
264 if (!This
->baseShader
.frontend_data
)
266 FIXME("Failed to initialize frontend.\n");
267 return WINED3DERR_INVALIDCALL
;
270 /* First pass: trace shader */
271 if (TRACE_ON(d3d_shader
)) shader_trace_init(fe
, This
->baseShader
.frontend_data
, pFunction
);
273 /* Initialize immediate constant lists */
274 list_init(&This
->baseShader
.constantsF
);
275 list_init(&This
->baseShader
.constantsB
);
276 list_init(&This
->baseShader
.constantsI
);
278 /* Second pass: figure out registers used, semantics, etc.. */
279 This
->min_rel_offset
= GL_LIMITS(vshader_constantsF
);
280 This
->max_rel_offset
= 0;
281 hr
= shader_get_registers_used((IWineD3DBaseShader
*) This
, fe
,
282 reg_maps
, This
->semantics_in
, This
->semantics_out
, pFunction
);
283 if (hr
!= WINED3D_OK
) return hr
;
285 vshader_set_limits(This
);
287 This
->baseShader
.shader_mode
= deviceImpl
->vs_selected_mode
;
289 if(deviceImpl
->vs_selected_mode
== SHADER_ARB
&&
290 (GLINFO_LOCATION
).arb_vs_offset_limit
&&
291 This
->min_rel_offset
<= This
->max_rel_offset
) {
293 if(This
->max_rel_offset
- This
->min_rel_offset
> 127) {
294 FIXME("The difference between the minimum and maximum relative offset is > 127\n");
295 FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
296 FIXME("Min: %d, Max: %d\n", This
->min_rel_offset
, This
->max_rel_offset
);
297 } else if(This
->max_rel_offset
- This
->min_rel_offset
> 63) {
298 This
->rel_offset
= This
->min_rel_offset
+ 63;
299 } else if(This
->max_rel_offset
> 63) {
300 This
->rel_offset
= This
->min_rel_offset
;
302 This
->rel_offset
= 0;
305 This
->baseShader
.load_local_constsF
= This
->baseShader
.reg_maps
.usesrelconstF
&& !list_empty(&This
->baseShader
.constantsF
);
307 /* copy the function ... because it will certainly be released by application */
308 This
->baseShader
.function
= HeapAlloc(GetProcessHeap(), 0, This
->baseShader
.functionLength
);
309 if (!This
->baseShader
.function
) return E_OUTOFMEMORY
;
310 memcpy(This
->baseShader
.function
, pFunction
, This
->baseShader
.functionLength
);
315 /* Preload semantics for d3d8 shaders */
316 static void WINAPI
IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader
*iface
, IWineD3DVertexDeclaration
*vertex_declaration
) {
317 IWineD3DVertexShaderImpl
*This
=(IWineD3DVertexShaderImpl
*)iface
;
318 IWineD3DVertexDeclarationImpl
* vdecl
= (IWineD3DVertexDeclarationImpl
*)vertex_declaration
;
321 for (i
= 0; i
< vdecl
->element_count
; ++i
)
323 const struct wined3d_vertex_declaration_element
*e
= &vdecl
->elements
[i
];
324 vshader_set_input(This
, e
->output_slot
, e
->usage
, e
->usage_idx
);
328 /* Set local constants for d3d8 shaders */
329 static HRESULT WINAPI
IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader
*iface
,
330 UINT start_idx
, const float *src_data
, UINT count
) {
331 IWineD3DVertexShaderImpl
*This
=(IWineD3DVertexShaderImpl
*)iface
;
334 TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This
, start_idx
, src_data
, count
);
336 end_idx
= start_idx
+ count
;
337 if (end_idx
> GL_LIMITS(vshader_constantsF
)) {
338 WARN("end_idx %u > float constants limit %u\n", end_idx
, GL_LIMITS(vshader_constantsF
));
339 end_idx
= GL_LIMITS(vshader_constantsF
);
342 for (i
= start_idx
; i
< end_idx
; ++i
) {
343 local_constant
* lconst
= HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant
));
344 if (!lconst
) return E_OUTOFMEMORY
;
347 memcpy(lconst
->value
, src_data
+ (i
- start_idx
) * 4 /* 4 components */, 4 * sizeof(float));
348 list_add_head(&This
->baseShader
.constantsF
, &lconst
->entry
);
354 static GLuint
vertexshader_compile(IWineD3DVertexShaderImpl
*This
, const struct vs_compile_args
*args
) {
355 IWineD3DDeviceImpl
*deviceImpl
= (IWineD3DDeviceImpl
*) This
->baseShader
.device
;
356 SHADER_BUFFER buffer
;
359 /* Generate the HW shader */
360 TRACE("(%p) : Generating hardware program\n", This
);
361 shader_buffer_init(&buffer
);
362 This
->cur_args
= args
;
363 ret
= deviceImpl
->shader_backend
->shader_generate_vshader((IWineD3DVertexShader
*)This
, &buffer
, args
);
364 This
->cur_args
= NULL
;
365 shader_buffer_free(&buffer
);
370 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl
=
372 /*** IUnknown methods ***/
373 IWineD3DVertexShaderImpl_QueryInterface
,
374 IWineD3DVertexShaderImpl_AddRef
,
375 IWineD3DVertexShaderImpl_Release
,
376 /*** IWineD3DBase methods ***/
377 IWineD3DVertexShaderImpl_GetParent
,
378 /*** IWineD3DBaseShader methods ***/
379 IWineD3DVertexShaderImpl_SetFunction
,
380 /*** IWineD3DVertexShader methods ***/
381 IWineD3DVertexShaderImpl_GetDevice
,
382 IWineD3DVertexShaderImpl_GetFunction
,
383 IWineD3DVertexShaderImpl_FakeSemantics
,
384 IWIneD3DVertexShaderImpl_SetLocalConstantsF
387 void find_vs_compile_args(IWineD3DVertexShaderImpl
*shader
, IWineD3DStateBlockImpl
*stateblock
, struct vs_compile_args
*args
) {
388 args
->fog_src
= stateblock
->renderState
[WINED3DRS_FOGTABLEMODE
] == WINED3DFOG_NONE
? VS_FOG_COORD
: VS_FOG_Z
;
389 args
->swizzle_map
= ((IWineD3DDeviceImpl
*)shader
->baseShader
.device
)->strided_streams
.swizzle_map
;
392 static inline BOOL
vs_args_equal(const struct vs_compile_args
*stored
, const struct vs_compile_args
*new,
393 const DWORD use_map
) {
394 if((stored
->swizzle_map
& use_map
) != new->swizzle_map
) return FALSE
;
395 return stored
->fog_src
== new->fog_src
;
398 GLuint
find_gl_vshader(IWineD3DVertexShaderImpl
*shader
, const struct vs_compile_args
*args
)
401 DWORD new_size
= shader
->shader_array_size
;
402 struct vs_compiled_shader
*new_array
;
403 DWORD use_map
= ((IWineD3DDeviceImpl
*)shader
->baseShader
.device
)->strided_streams
.use_map
;
405 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
406 * so a linear search is more performant than a hashmap or a binary search
407 * (cache coherency etc)
409 for(i
= 0; i
< shader
->num_gl_shaders
; i
++) {
410 if(vs_args_equal(&shader
->gl_shaders
[i
].args
, args
, use_map
)) {
411 return shader
->gl_shaders
[i
].prgId
;
415 TRACE("No matching GL shader found, compiling a new shader\n");
417 if(shader
->shader_array_size
== shader
->num_gl_shaders
) {
418 if (shader
->num_gl_shaders
)
420 new_size
= shader
->shader_array_size
+ max(1, shader
->shader_array_size
/ 2);
421 new_array
= HeapReAlloc(GetProcessHeap(), 0, shader
->gl_shaders
,
422 new_size
* sizeof(*shader
->gl_shaders
));
424 new_array
= HeapAlloc(GetProcessHeap(), 0, sizeof(*shader
->gl_shaders
));
429 ERR("Out of memory\n");
432 shader
->gl_shaders
= new_array
;
433 shader
->shader_array_size
= new_size
;
436 shader
->gl_shaders
[shader
->num_gl_shaders
].args
= *args
;
437 shader
->gl_shaders
[shader
->num_gl_shaders
].prgId
= vertexshader_compile(shader
, args
);
438 return shader
->gl_shaders
[shader
->num_gl_shaders
++].prgId
;