push 0f15bbd80d260bbd8adf052e820484a405c49375
[wine/hacks.git] / dlls / wined3d / vertexshader.c
blob37a3705c7f578dba7f7f5404833a2c365bfbd366
1 /*
2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
25 #include "config.h"
27 #include <math.h>
28 #include <stdio.h>
30 #include "wined3d_private.h"
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
36 /* Shader debugging - Change the following line to enable debugging of software
37 vertex shaders */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
46 /**
47 * DirectX9 SDK download
48 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
50 * Exploring D3DX
51 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
53 * Using Vertex Shaders
54 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
56 * Dx9 New
57 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
59 * Dx9 Shaders
60 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
65 * Dx9 D3DX
66 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
68 * FVF
69 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
71 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72 * http://developer.nvidia.com/view.asp?IO=vstovp
74 * NVIDIA: Memory Management with VAR
75 * http://developer.nvidia.com/view.asp?IO=var_memory_management
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
81 #define GLNAME_REQUIRE_GLSL ((const char *)1)
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84 /* This table is not order or position dependent. */
86 /* Arithmetic */
87 {WINED3DSIO_NOP, "nop", "NOP", 0, 0, vshader_hw_map2gl, NULL, 0, 0},
88 {WINED3DSIO_MOV, "mov", "MOV", 1, 2, vshader_hw_map2gl, shader_glsl_mov, 0, 0},
89 {WINED3DSIO_MOVA, "mova", NULL, 1, 2, vshader_hw_map2gl, shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90 {WINED3DSIO_ADD, "add", "ADD", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
91 {WINED3DSIO_SUB, "sub", "SUB", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
92 {WINED3DSIO_MAD, "mad", "MAD", 1, 4, vshader_hw_map2gl, shader_glsl_mad, 0, 0},
93 {WINED3DSIO_MUL, "mul", "MUL", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
94 {WINED3DSIO_RCP, "rcp", "RCP", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rcp, 0, 0},
95 {WINED3DSIO_RSQ, "rsq", "RSQ", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rsq, 0, 0},
96 {WINED3DSIO_DP3, "dp3", "DP3", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
97 {WINED3DSIO_DP4, "dp4", "DP4", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
98 {WINED3DSIO_MIN, "min", "MIN", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
99 {WINED3DSIO_MAX, "max", "MAX", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
100 {WINED3DSIO_SLT, "slt", "SLT", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
101 {WINED3DSIO_SGE, "sge", "SGE", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
102 {WINED3DSIO_ABS, "abs", "ABS", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
103 {WINED3DSIO_EXP, "exp", "EX2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
104 {WINED3DSIO_LOG, "log", "LG2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
105 {WINED3DSIO_EXPP, "expp", "EXP", 1, 2, vshader_hw_map2gl, shader_glsl_expp, 0, 0},
106 {WINED3DSIO_LOGP, "logp", "LOG", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
107 {WINED3DSIO_LIT, "lit", "LIT", 1, 2, vshader_hw_map2gl, shader_glsl_lit, 0, 0},
108 {WINED3DSIO_DST, "dst", "DST", 1, 3, vshader_hw_map2gl, shader_glsl_dst, 0, 0},
109 {WINED3DSIO_LRP, "lrp", "LRP", 1, 4, NULL, shader_glsl_lrp, 0, 0},
110 {WINED3DSIO_FRC, "frc", "FRC", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
111 {WINED3DSIO_POW, "pow", "POW", 1, 3, vshader_hw_map2gl, shader_glsl_pow, 0, 0},
112 {WINED3DSIO_CRS, "crs", "XPD", 1, 3, vshader_hw_map2gl, shader_glsl_cross, 0, 0},
113 /* TODO: sng can possibly be performed a s
114 RCP tmp, vec
115 MUL out, tmp, vec*/
116 {WINED3DSIO_SGN, "sgn", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
117 {WINED3DSIO_NRM, "nrm", NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
118 {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
119 {WINED3DSIO_SINCOS, "sincos", "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
120 /* Matrix */
121 {WINED3DSIO_M4x4, "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
122 {WINED3DSIO_M4x3, "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
123 {WINED3DSIO_M3x4, "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
124 {WINED3DSIO_M3x3, "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
125 {WINED3DSIO_M3x2, "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
126 /* Declare registers */
127 {WINED3DSIO_DCL, "dcl", NULL, 0, 2, NULL, NULL, 0, 0},
128 /* Constant definitions */
129 {WINED3DSIO_DEF, "def", NULL, 1, 5, NULL, NULL, 0, 0},
130 {WINED3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
131 {WINED3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
132 /* Flow control - requires GLSL or software shaders */
133 {WINED3DSIO_REP , "rep", NULL, 0, 1, NULL, shader_glsl_rep, WINED3DVS_VERSION(2,0), -1},
134 {WINED3DSIO_ENDREP, "endrep", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
135 {WINED3DSIO_IF, "if", NULL, 0, 1, NULL, shader_glsl_if, WINED3DVS_VERSION(2,0), -1},
136 {WINED3DSIO_IFC, "ifc", NULL, 0, 2, NULL, shader_glsl_ifc, WINED3DVS_VERSION(2,1), -1},
137 {WINED3DSIO_ELSE, "else", NULL, 0, 0, NULL, shader_glsl_else, WINED3DVS_VERSION(2,0), -1},
138 {WINED3DSIO_ENDIF, "endif", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
139 {WINED3DSIO_BREAK, "break", NULL, 0, 0, NULL, shader_glsl_break, WINED3DVS_VERSION(2,1), -1},
140 {WINED3DSIO_BREAKC, "breakc", NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
141 {WINED3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
142 {WINED3DSIO_CALL, "call", NULL, 0, 1, NULL, shader_glsl_call, WINED3DVS_VERSION(2,0), -1},
143 {WINED3DSIO_CALLNZ, "callnz", NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
144 {WINED3DSIO_LOOP, "loop", NULL, 0, 2, NULL, shader_glsl_loop, WINED3DVS_VERSION(2,0), -1},
145 {WINED3DSIO_RET, "ret", NULL, 0, 0, NULL, NULL, WINED3DVS_VERSION(2,0), -1},
146 {WINED3DSIO_ENDLOOP,"endloop", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
147 {WINED3DSIO_LABEL, "label", NULL, 0, 1, NULL, shader_glsl_label, WINED3DVS_VERSION(2,0), -1},
149 {WINED3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
150 {WINED3DSIO_TEXLDL, "texldl", NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
151 {0, NULL, NULL, 0, 0, NULL, NULL, 0, 0}
154 static void vshader_set_limits(
155 IWineD3DVertexShaderImpl *This) {
157 This->baseShader.limits.texcoord = 0;
158 This->baseShader.limits.attributes = 16;
159 This->baseShader.limits.packed_input = 0;
161 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
162 This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
164 switch (This->baseShader.hex_version) {
165 case WINED3DVS_VERSION(1,0):
166 case WINED3DVS_VERSION(1,1):
167 This->baseShader.limits.temporary = 12;
168 This->baseShader.limits.constant_bool = 0;
169 This->baseShader.limits.constant_int = 0;
170 This->baseShader.limits.address = 1;
171 This->baseShader.limits.packed_output = 0;
172 This->baseShader.limits.sampler = 0;
173 This->baseShader.limits.label = 0;
174 break;
176 case WINED3DVS_VERSION(2,0):
177 case WINED3DVS_VERSION(2,1):
178 This->baseShader.limits.temporary = 12;
179 This->baseShader.limits.constant_bool = 16;
180 This->baseShader.limits.constant_int = 16;
181 This->baseShader.limits.address = 1;
182 This->baseShader.limits.packed_output = 0;
183 This->baseShader.limits.sampler = 0;
184 This->baseShader.limits.label = 16;
185 break;
187 case WINED3DVS_VERSION(3,0):
188 This->baseShader.limits.temporary = 32;
189 This->baseShader.limits.constant_bool = 32;
190 This->baseShader.limits.constant_int = 32;
191 This->baseShader.limits.address = 1;
192 This->baseShader.limits.packed_output = 12;
193 This->baseShader.limits.sampler = 4;
194 This->baseShader.limits.label = 16; /* FIXME: 2048 */
195 break;
197 default: This->baseShader.limits.temporary = 12;
198 This->baseShader.limits.constant_bool = 16;
199 This->baseShader.limits.constant_int = 16;
200 This->baseShader.limits.address = 1;
201 This->baseShader.limits.packed_output = 0;
202 This->baseShader.limits.sampler = 0;
203 This->baseShader.limits.label = 16;
204 FIXME("Unrecognized vertex shader version %#x\n",
205 This->baseShader.hex_version);
209 /* This is an internal function,
210 * used to create fake semantics for shaders
211 * that don't have them - d3d8 shaders where the declaration
212 * stores the register for each input
214 static void vshader_set_input(
215 IWineD3DVertexShaderImpl* This,
216 unsigned int regnum,
217 BYTE usage, BYTE usage_idx) {
219 /* Fake usage: set reserved bit, usage, usage_idx */
220 DWORD usage_token = (0x1 << 31) |
221 (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
223 /* Fake register; set reserved bit, regnum, type: input, wmask: all */
224 DWORD reg_token = (0x1 << 31) |
225 WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
227 This->semantics_in[regnum].usage = usage_token;
228 This->semantics_in[regnum].reg = reg_token;
231 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
232 if (usage_idx1 != usage_idx2) return FALSE;
233 if (usage1 == usage2) return TRUE;
234 if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
235 if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
237 return FALSE;
240 BOOL vshader_get_input(
241 IWineD3DVertexShader* iface,
242 BYTE usage_req, BYTE usage_idx_req,
243 unsigned int* regnum) {
245 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
246 int i;
248 for (i = 0; i < MAX_ATTRIBS; i++) {
249 DWORD usage_token = This->semantics_in[i].usage;
250 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
251 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
253 if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
254 *regnum = i;
255 return TRUE;
258 return FALSE;
261 BOOL vshader_input_is_color(
262 IWineD3DVertexShader* iface,
263 unsigned int regnum) {
265 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
267 DWORD usage_token = This->semantics_in[regnum].usage;
268 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
269 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
271 int i;
273 for(i = 0; i < This->num_swizzled_attribs; i++) {
274 if(This->swizzled_attribs[i].usage == usage &&
275 This->swizzled_attribs[i].idx == usage_idx) {
276 return TRUE;
279 return FALSE;
282 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
283 UINT num = 0, i, j;
284 UINT numoldswizzles = This->num_swizzled_attribs;
285 IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
287 DWORD usage_token, usage, usage_idx;
288 BOOL found;
290 attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
292 /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
293 memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
295 memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
297 for(i = 0; i < decl->num_swizzled_attribs; i++) {
298 for(j = 0; j < MAX_ATTRIBS; j++) {
299 usage_token = This->semantics_in[j].usage;
300 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
301 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
303 if(decl->swizzled_attribs[i].usage == usage &&
304 decl->swizzled_attribs[i].idx == usage_idx) {
305 This->swizzled_attribs[num].usage = usage;
306 This->swizzled_attribs[num].idx = usage_idx;
307 num++;
312 /* Add previously converted attributes back in if they are not defined in the current declaration */
313 for(i = 0; i < numoldswizzles; i++) {
315 found = FALSE;
316 for(j = 0; j < decl->declarationWNumElements; j++) {
317 if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
318 oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
319 found = TRUE;
322 if(found) {
323 /* This previously converted attribute is declared in the current declaration. Either it is
324 * already in the new array, or it should not be there. Skip it
326 continue;
328 /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
329 * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
330 * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
331 * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
332 * stays unswizzled as well because it isn't found in the oldswizzles array
334 for(j = 0; j < num; j++) {
335 if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
336 oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
337 oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
338 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
339 sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
340 break;
343 This->swizzled_attribs[j].usage = oldswizzles[i].usage;
344 This->swizzled_attribs[j].idx = oldswizzles[i].idx;
345 num++;
348 TRACE("New swizzled attributes array\n");
349 for(i = 0; i < num; i++) {
350 TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
351 This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
353 This->num_swizzled_attribs = num;
355 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
356 or GLSL and send it to the card */
357 static VOID IWineD3DVertexShaderImpl_GenerateShader(
358 IWineD3DVertexShader *iface,
359 shader_reg_maps* reg_maps,
360 CONST DWORD *pFunction) {
362 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
363 IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
364 SHADER_BUFFER buffer;
366 find_swizzled_attribs(decl, This);
368 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
369 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
370 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
371 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
372 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
373 This->fixupVertexBufferSize = PGMSIZE;
374 This->fixupVertexBuffer[0] = 0;
376 buffer.buffer = This->device->fixupVertexBuffer;
377 #else
378 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
379 #endif
380 buffer.bsize = 0;
381 buffer.lineNo = 0;
382 buffer.newline = TRUE;
384 if (This->baseShader.shader_mode == SHADER_GLSL) {
386 /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
387 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
389 /* Base Declarations */
390 shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
392 /* Base Shader Body */
393 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
395 /* Unpack 3.0 outputs */
396 if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0)) {
397 shader_addline(&buffer, "order_ps_input(OUT);\n");
398 } else {
399 shader_addline(&buffer, "order_ps_input();\n");
402 /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
403 if (!reg_maps->fog)
404 shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
406 /* Write the final position.
408 * OpenGL coordinates specify the center of the pixel while d3d coords specify
409 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
410 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
411 * contains 1.0 to allow a mad.
413 shader_addline(&buffer, "gl_Position.xy = gl_Position.xy * posFixup.xy + posFixup.zw;\n");
415 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
417 * Basically we want(in homogenous coordinates) z = z * 2 - 1. However, shaders are run
418 * before the homogenous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
419 * which is the same as z = z / 2 - w.
421 shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
423 shader_addline(&buffer, "}\n");
425 TRACE("Compiling shader object %u\n", shader_obj);
426 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
427 GL_EXTCALL(glCompileShaderARB(shader_obj));
428 print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
430 /* Store the shader object */
431 This->baseShader.prgId = shader_obj;
433 } else if (This->baseShader.shader_mode == SHADER_ARB) {
435 /* Create the hw ARB shader */
436 shader_addline(&buffer, "!!ARBvp1.0\n");
437 shader_addline(&buffer, "PARAM helper_const = { 2.0, -1.0, %d.0, 0.0 };\n", This->rel_offset);
439 /* Mesa supports only 95 constants */
440 if (GL_VEND(MESA) || GL_VEND(WINE))
441 This->baseShader.limits.constant_float =
442 min(95, This->baseShader.limits.constant_float);
444 /* Some instructions need a temporary register. Add it if needed, but only if it is really needed */
445 if(reg_maps->usesnrm || This->rel_offset) {
446 shader_addline(&buffer, "TEMP TMP;\n");
449 /* Base Declarations */
450 shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
452 /* We need a constant to fixup the final position */
453 shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
455 /* Base Shader Body */
456 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
458 /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
459 if (!reg_maps->fog)
460 shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
462 /* Write the final position.
464 * OpenGL coordinates specify the center of the pixel while d3d coords specify
465 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
466 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
467 * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
469 shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, posFixup.z;\n");
470 shader_addline(&buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, posFixup.w;\n");
472 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
473 * and the glsl equivalent
475 shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, helper_const.x, -TMP_OUT.w;\n");
477 shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
479 shader_addline(&buffer, "END\n");
481 /* TODO: change to resource.glObjectHandle or something like that */
482 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
484 TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
485 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
487 TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
488 /* Create the program and check for errors */
489 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
490 buffer.bsize, buffer.buffer));
492 if (glGetError() == GL_INVALID_OPERATION) {
493 GLint errPos;
494 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
495 FIXME("HW VertexShader Error at position %d: %s\n",
496 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
497 This->baseShader.prgId = -1;
501 #if 1 /* if were using the data buffer of device then we don't need to free it */
502 HeapFree(GetProcessHeap(), 0, buffer.buffer);
503 #endif
506 /* *******************************************
507 IWineD3DVertexShader IUnknown parts follow
508 ******************************************* */
509 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
510 return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
513 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
514 return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
517 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
518 return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
521 /* *******************************************
522 IWineD3DVertexShader IWineD3DVertexShader parts follow
523 ******************************************* */
525 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
526 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
528 *parent = This->parent;
529 IUnknown_AddRef(*parent);
530 TRACE("(%p) : returning %p\n", This, *parent);
531 return WINED3D_OK;
534 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
535 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
536 IWineD3DDevice_AddRef(This->baseShader.device);
537 *pDevice = This->baseShader.device;
538 TRACE("(%p) returning %p\n", This, *pDevice);
539 return WINED3D_OK;
542 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
543 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
544 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
546 if (NULL == pData) {
547 *pSizeOfData = This->baseShader.functionLength;
548 return WINED3D_OK;
550 if (*pSizeOfData < This->baseShader.functionLength) {
551 /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
552 * than the required size we should write the required size and
553 * return D3DERR_MOREDATA. That's not actually true. */
554 return WINED3DERR_INVALIDCALL;
556 if (NULL == This->baseShader.function) { /* no function defined */
557 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
558 (*(DWORD **) pData) = NULL;
559 } else {
560 if(This->baseShader.functionLength == 0){
563 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
564 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
566 return WINED3D_OK;
569 /* Note that for vertex shaders CompileShader isn't called until the
570 * shader is first used. The reason for this is that we need the vertex
571 * declaration the shader will be used with in order to determine if
572 * the data in a register is of type D3DCOLOR, and needs swizzling. */
573 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
575 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
576 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
577 HRESULT hr;
578 shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
580 TRACE("(%p) : pFunction %p\n", iface, pFunction);
582 /* First pass: trace shader */
583 shader_trace_init((IWineD3DBaseShader*) This, pFunction);
584 vshader_set_limits(This);
586 /* Initialize immediate constant lists */
587 list_init(&This->baseShader.constantsF);
588 list_init(&This->baseShader.constantsB);
589 list_init(&This->baseShader.constantsI);
591 /* Second pass: figure out registers used, semantics, etc.. */
592 This->min_rel_offset = GL_LIMITS(vshader_constantsF);
593 This->max_rel_offset = 0;
594 memset(reg_maps, 0, sizeof(shader_reg_maps));
595 hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
596 This->semantics_in, This->semantics_out, pFunction, NULL);
597 if (hr != WINED3D_OK) return hr;
599 This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
601 if(deviceImpl->vs_selected_mode == SHADER_ARB &&
602 (GLINFO_LOCATION).arb_vs_offset_limit &&
603 This->min_rel_offset <= This->max_rel_offset) {
605 if(This->max_rel_offset - This->min_rel_offset > 127) {
606 FIXME("The difference between the minimum and maximum relative offset is > 127\n");
607 FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
608 FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
609 } else if(This->max_rel_offset - This->min_rel_offset > 63) {
610 This->rel_offset = This->min_rel_offset + 63;
611 } else if(This->max_rel_offset > 63) {
612 This->rel_offset = This->min_rel_offset;
613 } else {
614 This->rel_offset = 0;
617 This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
619 /* copy the function ... because it will certainly be released by application */
620 if (NULL != pFunction) {
621 void *function;
623 function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
624 if (!function) return E_OUTOFMEMORY;
625 memcpy(function, pFunction, This->baseShader.functionLength);
626 This->baseShader.function = function;
627 } else {
628 This->baseShader.function = NULL;
631 return WINED3D_OK;
634 /* Preload semantics for d3d8 shaders */
635 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
636 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
637 IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
639 int i;
640 for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
641 WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
642 vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
646 /* Set local constants for d3d8 shaders */
647 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
648 UINT start_idx, const float *src_data, UINT count) {
649 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
650 UINT i, end_idx;
652 TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
654 end_idx = start_idx + count;
655 if (end_idx > GL_LIMITS(vshader_constantsF)) {
656 WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
657 end_idx = GL_LIMITS(vshader_constantsF);
660 for (i = start_idx; i < end_idx; ++i) {
661 local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
662 if (!lconst) return E_OUTOFMEMORY;
664 lconst->idx = i;
665 memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
666 list_add_head(&This->baseShader.constantsF, &lconst->entry);
669 return WINED3D_OK;
672 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
673 UINT i, j, k;
674 BOOL found;
676 DWORD usage_token;
677 DWORD usage;
678 DWORD usage_idx;
680 for(i = 0; i < vdecl->declarationWNumElements; i++) {
681 for(j = 0; j < MAX_ATTRIBS; j++) {
682 if(!This->baseShader.reg_maps.attributes) continue;
684 usage_token = This->semantics_in[j].usage;
685 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
686 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
688 if(vdecl->pDeclarationWine[i].Usage != usage ||
689 vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
690 continue;
693 found = FALSE;
694 for(k = 0; k < This->num_swizzled_attribs; k++) {
695 if(This->swizzled_attribs[k].usage == usage &&
696 This->swizzled_attribs[k].idx == usage_idx) {
697 found = TRUE;
700 if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
701 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
702 debug_d3ddeclusage(usage), usage_idx);
703 return TRUE;
705 if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
706 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
707 debug_d3ddeclusage(usage), usage_idx);
708 return TRUE;
712 return FALSE;
715 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
716 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
717 IWineD3DVertexDeclarationImpl *vdecl;
718 CONST DWORD *function = This->baseShader.function;
719 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
721 TRACE("(%p) : function %p\n", iface, function);
723 /* We're already compiled. */
724 if (This->baseShader.is_compiled) {
725 vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
727 if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
728 memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
730 /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
731 * we have to recompile, but we have to take a deeper look at see if the attribs that differ
732 * are declared in the decl and used in the shader
734 if(swizzled_attribs_differ(This, vdecl)) {
735 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
736 goto recompile;
738 WARN("Swizzled attribute validation required an expensive comparison\n");
741 return WINED3D_OK;
743 recompile:
744 if(This->recompile_count < 50) {
745 This->recompile_count++;
746 } else {
747 FIXME("Vertexshader %p recompiled more than 50 times\n", This);
750 deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
753 /* We don't need to compile */
754 if (!function) {
755 This->baseShader.is_compiled = TRUE;
756 return WINED3D_OK;
759 /* Generate the HW shader */
760 TRACE("(%p) : Generating hardware program\n", This);
761 IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
763 This->baseShader.is_compiled = TRUE;
765 return WINED3D_OK;
768 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
770 /*** IUnknown methods ***/
771 IWineD3DVertexShaderImpl_QueryInterface,
772 IWineD3DVertexShaderImpl_AddRef,
773 IWineD3DVertexShaderImpl_Release,
774 /*** IWineD3DBase methods ***/
775 IWineD3DVertexShaderImpl_GetParent,
776 /*** IWineD3DBaseShader methods ***/
777 IWineD3DVertexShaderImpl_SetFunction,
778 IWineD3DVertexShaderImpl_CompileShader,
779 /*** IWineD3DVertexShader methods ***/
780 IWineD3DVertexShaderImpl_GetDevice,
781 IWineD3DVertexShaderImpl_GetFunction,
782 IWineD3DVertexShaderImpl_FakeSemantics,
783 IWIneD3DVertexShaderImpl_SetLocalConstantsF