wined3d: Multiply the half pixel correction with .w.
[wine/multimedia.git] / dlls / wined3d / vertexshader.c
blobd6c34d8a87396317ec46ce6e1192ec76069044da
1 /*
2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2004 Christian Costa
7 * Copyright 2005 Oliver Stieber
8 * Copyright 2006 Ivan Gyurdiev
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
25 #include "config.h"
27 #include <math.h>
28 #include <stdio.h>
30 #include "wined3d_private.h"
32 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34 #define GLINFO_LOCATION ((IWineD3DDeviceImpl *)This->baseShader.device)->adapter->gl_info
36 /* Shader debugging - Change the following line to enable debugging of software
37 vertex shaders */
38 #if 0 /* Musxt not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
46 /**
47 * DirectX9 SDK download
48 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
50 * Exploring D3DX
51 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
53 * Using Vertex Shaders
54 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
56 * Dx9 New
57 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
59 * Dx9 Shaders
60 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
65 * Dx9 D3DX
66 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
68 * FVF
69 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
71 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72 * http://developer.nvidia.com/view.asp?IO=vstovp
74 * NVIDIA: Memory Management with VAR
75 * http://developer.nvidia.com/view.asp?IO=var_memory_management
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
81 #define GLNAME_REQUIRE_GLSL ((const char *)1)
83 CONST SHADER_OPCODE IWineD3DVertexShaderImpl_shader_ins[] = {
84 /* This table is not order or position dependent. */
86 /* Arithmetic */
87 {WINED3DSIO_NOP, "nop", "NOP", 0, 0, vshader_hw_map2gl, NULL, 0, 0},
88 {WINED3DSIO_MOV, "mov", "MOV", 1, 2, vshader_hw_map2gl, shader_glsl_mov, 0, 0},
89 {WINED3DSIO_MOVA, "mova", NULL, 1, 2, vshader_hw_map2gl, shader_glsl_mov, WINED3DVS_VERSION(2,0), -1},
90 {WINED3DSIO_ADD, "add", "ADD", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
91 {WINED3DSIO_SUB, "sub", "SUB", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
92 {WINED3DSIO_MAD, "mad", "MAD", 1, 4, vshader_hw_map2gl, shader_glsl_mad, 0, 0},
93 {WINED3DSIO_MUL, "mul", "MUL", 1, 3, vshader_hw_map2gl, shader_glsl_arith, 0, 0},
94 {WINED3DSIO_RCP, "rcp", "RCP", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rcp, 0, 0},
95 {WINED3DSIO_RSQ, "rsq", "RSQ", 1, 2, vshader_hw_rsq_rcp, shader_glsl_rsq, 0, 0},
96 {WINED3DSIO_DP3, "dp3", "DP3", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
97 {WINED3DSIO_DP4, "dp4", "DP4", 1, 3, vshader_hw_map2gl, shader_glsl_dot, 0, 0},
98 {WINED3DSIO_MIN, "min", "MIN", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
99 {WINED3DSIO_MAX, "max", "MAX", 1, 3, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
100 {WINED3DSIO_SLT, "slt", "SLT", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
101 {WINED3DSIO_SGE, "sge", "SGE", 1, 3, vshader_hw_map2gl, shader_glsl_compare, 0, 0},
102 {WINED3DSIO_ABS, "abs", "ABS", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
103 {WINED3DSIO_EXP, "exp", "EX2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
104 {WINED3DSIO_LOG, "log", "LG2", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
105 {WINED3DSIO_EXPP, "expp", "EXP", 1, 2, vshader_hw_map2gl, shader_glsl_expp, 0, 0},
106 {WINED3DSIO_LOGP, "logp", "LOG", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
107 {WINED3DSIO_LIT, "lit", "LIT", 1, 2, vshader_hw_map2gl, shader_glsl_lit, 0, 0},
108 {WINED3DSIO_DST, "dst", "DST", 1, 3, vshader_hw_map2gl, shader_glsl_dst, 0, 0},
109 {WINED3DSIO_LRP, "lrp", "LRP", 1, 4, NULL, shader_glsl_lrp, 0, 0},
110 {WINED3DSIO_FRC, "frc", "FRC", 1, 2, vshader_hw_map2gl, shader_glsl_map2gl, 0, 0},
111 {WINED3DSIO_POW, "pow", "POW", 1, 3, vshader_hw_map2gl, shader_glsl_pow, 0, 0},
112 {WINED3DSIO_CRS, "crs", "XPD", 1, 3, vshader_hw_map2gl, shader_glsl_cross, 0, 0},
113 /* TODO: sng can possibly be performed a s
114 RCP tmp, vec
115 MUL out, tmp, vec*/
116 {WINED3DSIO_SGN, "sgn", NULL, 1, 2, NULL, shader_glsl_map2gl, 0, 0},
117 {WINED3DSIO_NRM, "nrm", NULL, 1, 2, shader_hw_nrm, shader_glsl_map2gl, 0, 0},
118 {WINED3DSIO_SINCOS, "sincos", NULL, 1, 4, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(2,0), WINED3DVS_VERSION(2,1)},
119 {WINED3DSIO_SINCOS, "sincos", "SCS", 1, 2, shader_hw_sincos, shader_glsl_sincos, WINED3DVS_VERSION(3,0), -1},
120 /* Matrix */
121 {WINED3DSIO_M4x4, "m4x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
122 {WINED3DSIO_M4x3, "m4x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
123 {WINED3DSIO_M3x4, "m3x4", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
124 {WINED3DSIO_M3x3, "m3x3", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
125 {WINED3DSIO_M3x2, "m3x2", "undefined", 1, 3, shader_hw_mnxn, shader_glsl_mnxn, 0, 0},
126 /* Declare registers */
127 {WINED3DSIO_DCL, "dcl", NULL, 0, 2, NULL, NULL, 0, 0},
128 /* Constant definitions */
129 {WINED3DSIO_DEF, "def", NULL, 1, 5, NULL, NULL, 0, 0},
130 {WINED3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 1, 2, NULL, NULL, 0, 0},
131 {WINED3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 1, 5, NULL, NULL, 0, 0},
132 /* Flow control - requires GLSL or software shaders */
133 {WINED3DSIO_REP , "rep", NULL, 0, 1, NULL, shader_glsl_rep, WINED3DVS_VERSION(2,0), -1},
134 {WINED3DSIO_ENDREP, "endrep", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
135 {WINED3DSIO_IF, "if", NULL, 0, 1, NULL, shader_glsl_if, WINED3DVS_VERSION(2,0), -1},
136 {WINED3DSIO_IFC, "ifc", NULL, 0, 2, NULL, shader_glsl_ifc, WINED3DVS_VERSION(2,1), -1},
137 {WINED3DSIO_ELSE, "else", NULL, 0, 0, NULL, shader_glsl_else, WINED3DVS_VERSION(2,0), -1},
138 {WINED3DSIO_ENDIF, "endif", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
139 {WINED3DSIO_BREAK, "break", NULL, 0, 0, NULL, shader_glsl_break, WINED3DVS_VERSION(2,1), -1},
140 {WINED3DSIO_BREAKC, "breakc", NULL, 0, 2, NULL, shader_glsl_breakc, WINED3DVS_VERSION(2,1), -1},
141 {WINED3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 0, 1, NULL, NULL, 0, 0},
142 {WINED3DSIO_CALL, "call", NULL, 0, 1, NULL, shader_glsl_call, WINED3DVS_VERSION(2,0), -1},
143 {WINED3DSIO_CALLNZ, "callnz", NULL, 0, 2, NULL, shader_glsl_callnz, WINED3DVS_VERSION(2,0), -1},
144 {WINED3DSIO_LOOP, "loop", NULL, 0, 2, NULL, shader_glsl_loop, WINED3DVS_VERSION(2,0), -1},
145 {WINED3DSIO_RET, "ret", NULL, 0, 0, NULL, NULL, WINED3DVS_VERSION(2,0), -1},
146 {WINED3DSIO_ENDLOOP,"endloop", NULL, 0, 0, NULL, shader_glsl_end, WINED3DVS_VERSION(2,0), -1},
147 {WINED3DSIO_LABEL, "label", NULL, 0, 1, NULL, shader_glsl_label, WINED3DVS_VERSION(2,0), -1},
149 {WINED3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 1, 3, NULL, NULL, 0, 0},
150 {WINED3DSIO_TEXLDL, "texldl", NULL, 1, 3, NULL, shader_glsl_texldl, WINED3DVS_VERSION(3,0), -1},
151 {0, NULL, NULL, 0, 0, NULL, NULL, 0, 0}
154 static void vshader_set_limits(
155 IWineD3DVertexShaderImpl *This) {
157 This->baseShader.limits.texcoord = 0;
158 This->baseShader.limits.attributes = 16;
159 This->baseShader.limits.packed_input = 0;
161 /* Must match D3DCAPS9.MaxVertexShaderConst: at least 256 for vs_2_0 */
162 This->baseShader.limits.constant_float = GL_LIMITS(vshader_constantsF);
164 switch (This->baseShader.hex_version) {
165 case WINED3DVS_VERSION(1,0):
166 case WINED3DVS_VERSION(1,1):
167 This->baseShader.limits.temporary = 12;
168 This->baseShader.limits.constant_bool = 0;
169 This->baseShader.limits.constant_int = 0;
170 This->baseShader.limits.address = 1;
171 This->baseShader.limits.packed_output = 0;
172 This->baseShader.limits.sampler = 0;
173 This->baseShader.limits.label = 0;
174 break;
176 case WINED3DVS_VERSION(2,0):
177 case WINED3DVS_VERSION(2,1):
178 This->baseShader.limits.temporary = 12;
179 This->baseShader.limits.constant_bool = 16;
180 This->baseShader.limits.constant_int = 16;
181 This->baseShader.limits.address = 1;
182 This->baseShader.limits.packed_output = 0;
183 This->baseShader.limits.sampler = 0;
184 This->baseShader.limits.label = 16;
185 break;
187 case WINED3DVS_VERSION(3,0):
188 This->baseShader.limits.temporary = 32;
189 This->baseShader.limits.constant_bool = 32;
190 This->baseShader.limits.constant_int = 32;
191 This->baseShader.limits.address = 1;
192 This->baseShader.limits.packed_output = 12;
193 This->baseShader.limits.sampler = 4;
194 This->baseShader.limits.label = 16; /* FIXME: 2048 */
195 break;
197 default: This->baseShader.limits.temporary = 12;
198 This->baseShader.limits.constant_bool = 16;
199 This->baseShader.limits.constant_int = 16;
200 This->baseShader.limits.address = 1;
201 This->baseShader.limits.packed_output = 0;
202 This->baseShader.limits.sampler = 0;
203 This->baseShader.limits.label = 16;
204 FIXME("Unrecognized vertex shader version %#x\n",
205 This->baseShader.hex_version);
209 /* This is an internal function,
210 * used to create fake semantics for shaders
211 * that don't have them - d3d8 shaders where the declaration
212 * stores the register for each input
214 static void vshader_set_input(
215 IWineD3DVertexShaderImpl* This,
216 unsigned int regnum,
217 BYTE usage, BYTE usage_idx) {
219 /* Fake usage: set reserved bit, usage, usage_idx */
220 DWORD usage_token = (0x1 << 31) |
221 (usage << WINED3DSP_DCL_USAGE_SHIFT) | (usage_idx << WINED3DSP_DCL_USAGEINDEX_SHIFT);
223 /* Fake register; set reserved bit, regnum, type: input, wmask: all */
224 DWORD reg_token = (0x1 << 31) |
225 WINED3DSP_WRITEMASK_ALL | (WINED3DSPR_INPUT << WINED3DSP_REGTYPE_SHIFT) | regnum;
227 This->semantics_in[regnum].usage = usage_token;
228 This->semantics_in[regnum].reg = reg_token;
231 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) {
232 if (usage_idx1 != usage_idx2) return FALSE;
233 if (usage1 == usage2) return TRUE;
234 if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
235 if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE;
237 return FALSE;
240 BOOL vshader_get_input(
241 IWineD3DVertexShader* iface,
242 BYTE usage_req, BYTE usage_idx_req,
243 unsigned int* regnum) {
245 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
246 int i;
248 for (i = 0; i < MAX_ATTRIBS; i++) {
249 DWORD usage_token = This->semantics_in[i].usage;
250 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
251 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
253 if (usage_token && match_usage(usage, usage_idx, usage_req, usage_idx_req)) {
254 *regnum = i;
255 return TRUE;
258 return FALSE;
261 BOOL vshader_input_is_color(
262 IWineD3DVertexShader* iface,
263 unsigned int regnum) {
265 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl*) iface;
267 DWORD usage_token = This->semantics_in[regnum].usage;
268 DWORD usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
269 DWORD usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
271 int i;
273 for(i = 0; i < This->num_swizzled_attribs; i++) {
274 if(This->swizzled_attribs[i].usage == usage &&
275 This->swizzled_attribs[i].idx == usage_idx) {
276 return TRUE;
279 return FALSE;
282 static inline void find_swizzled_attribs(IWineD3DVertexDeclaration *declaration, IWineD3DVertexShaderImpl *This) {
283 UINT num = 0, i, j;
284 UINT numoldswizzles = This->num_swizzled_attribs;
285 IWineD3DVertexDeclarationImpl *decl = (IWineD3DVertexDeclarationImpl *) declaration;
287 DWORD usage_token, usage, usage_idx;
288 BOOL found;
290 attrib_declaration oldswizzles[sizeof(This->swizzled_attribs) / sizeof(This->swizzled_attribs[0])];
292 /* Back up the old swizzles to keep attributes that are undefined in the current declaration */
293 memcpy(oldswizzles, This->swizzled_attribs, sizeof(oldswizzles));
295 memset(This->swizzled_attribs, 0, sizeof(This->swizzled_attribs[0]) * MAX_ATTRIBS);
297 for(i = 0; i < decl->num_swizzled_attribs; i++) {
298 for(j = 0; j < MAX_ATTRIBS; j++) {
300 if(!This->baseShader.reg_maps.attributes[j]) continue;
302 usage_token = This->semantics_in[j].usage;
303 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
304 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
306 if(decl->swizzled_attribs[i].usage == usage &&
307 decl->swizzled_attribs[i].idx == usage_idx) {
308 This->swizzled_attribs[num].usage = usage;
309 This->swizzled_attribs[num].idx = usage_idx;
310 num++;
315 /* Add previously converted attributes back in if they are not defined in the current declaration */
316 for(i = 0; i < numoldswizzles; i++) {
318 found = FALSE;
319 for(j = 0; j < decl->declarationWNumElements; j++) {
320 if(oldswizzles[i].usage == decl->pDeclarationWine[j].Usage &&
321 oldswizzles[i].idx == decl->pDeclarationWine[j].UsageIndex) {
322 found = TRUE;
325 if(found) {
326 /* This previously converted attribute is declared in the current declaration. Either it is
327 * already in the new array, or it should not be there. Skip it
329 continue;
331 /* We have a previously swizzled attribute that is not defined by the current vertex declaration.
332 * Insert it into the new conversion array to keep it in the old defined state. Otherwise we end up
333 * recompiling if the old decl is used again because undefined attributes are reset to no swizzling.
334 * In the reverse way(attribute was not swizzled and is not declared in new declaration) the attrib
335 * stays unswizzled as well because it isn't found in the oldswizzles array
337 for(j = 0; j < num; j++) {
338 if(oldswizzles[i].usage > This->swizzled_attribs[j].usage || (
339 oldswizzles[i].usage == This->swizzled_attribs[j].usage &&
340 oldswizzles[i].idx > This->swizzled_attribs[j].idx)) {
341 memmove(&This->swizzled_attribs[j + 1], &This->swizzled_attribs[j],
342 sizeof(This->swizzled_attribs) - (sizeof(This->swizzled_attribs[0]) * (j + 1)));
343 break;
346 This->swizzled_attribs[j].usage = oldswizzles[i].usage;
347 This->swizzled_attribs[j].idx = oldswizzles[i].idx;
348 num++;
351 TRACE("New swizzled attributes array\n");
352 for(i = 0; i < num; i++) {
353 TRACE("%d: %s(%d), %d\n", i, debug_d3ddeclusage(This->swizzled_attribs[i].usage),
354 This->swizzled_attribs[i].usage, This->swizzled_attribs[i].idx);
356 This->num_swizzled_attribs = num;
358 /** Generate a vertex shader string using either GL_VERTEX_PROGRAM_ARB
359 or GLSL and send it to the card */
360 static VOID IWineD3DVertexShaderImpl_GenerateShader(
361 IWineD3DVertexShader *iface,
362 shader_reg_maps* reg_maps,
363 CONST DWORD *pFunction) {
365 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
366 IWineD3DVertexDeclaration *decl = ((IWineD3DDeviceImpl *) This->baseShader.device)->stateBlock->vertexDecl;
367 SHADER_BUFFER buffer;
369 find_swizzled_attribs(decl, This);
371 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
372 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
373 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
374 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
375 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
376 This->fixupVertexBufferSize = PGMSIZE;
377 This->fixupVertexBuffer[0] = 0;
379 buffer.buffer = This->device->fixupVertexBuffer;
380 #else
381 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
382 #endif
383 buffer.bsize = 0;
384 buffer.lineNo = 0;
385 buffer.newline = TRUE;
387 if (This->baseShader.shader_mode == SHADER_GLSL) {
389 /* Create the hw GLSL shader program and assign it as the baseShader.prgId */
390 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
392 /* Base Declarations */
393 shader_generate_glsl_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
395 /* Base Shader Body */
396 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
398 /* Unpack 3.0 outputs */
399 if (This->baseShader.hex_version >= WINED3DVS_VERSION(3,0)) {
400 shader_addline(&buffer, "order_ps_input(OUT);\n");
401 } else {
402 shader_addline(&buffer, "order_ps_input();\n");
405 /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
406 if (!reg_maps->fog)
407 shader_addline(&buffer, "gl_FogFragCoord = gl_Position.z;\n");
409 /* Write the final position.
411 * OpenGL coordinates specify the center of the pixel while d3d coords specify
412 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
413 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
414 * contains 1.0 to allow a mad.
416 shader_addline(&buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
417 shader_addline(&buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
419 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
421 * Basically we want(in homogenous coordinates) z = z * 2 - 1. However, shaders are run
422 * before the homogenous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
423 * which is the same as z = z / 2 - w.
425 shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
427 shader_addline(&buffer, "}\n");
429 TRACE("Compiling shader object %u\n", shader_obj);
430 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer.buffer, NULL));
431 GL_EXTCALL(glCompileShaderARB(shader_obj));
432 print_glsl_info_log(&GLINFO_LOCATION, shader_obj);
434 /* Store the shader object */
435 This->baseShader.prgId = shader_obj;
437 } else if (This->baseShader.shader_mode == SHADER_ARB) {
439 /* Create the hw ARB shader */
440 shader_addline(&buffer, "!!ARBvp1.0\n");
441 shader_addline(&buffer, "PARAM helper_const = { 2.0, -1.0, %d.0, 0.0 };\n", This->rel_offset);
443 /* Mesa supports only 95 constants */
444 if (GL_VEND(MESA) || GL_VEND(WINE))
445 This->baseShader.limits.constant_float =
446 min(95, This->baseShader.limits.constant_float);
448 shader_addline(&buffer, "TEMP TMP;\n");
450 /* Base Declarations */
451 shader_generate_arb_declarations( (IWineD3DBaseShader*) This, reg_maps, &buffer, &GLINFO_LOCATION);
453 /* We need a constant to fixup the final position */
454 shader_addline(&buffer, "PARAM posFixup = program.env[%d];\n", ARB_SHADER_PRIVCONST_POS);
456 if((GLINFO_LOCATION).set_texcoord_w) {
457 int i;
458 for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) {
459 if(This->baseShader.reg_maps.texcoord_mask[i] != 0 &&
460 This->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) {
461 shader_addline(&buffer, "MOV result.texcoord[%u].w, -helper_const.y;\n", i);
466 /* Base Shader Body */
467 shader_generate_main( (IWineD3DBaseShader*) This, &buffer, reg_maps, pFunction);
469 /* If this shader doesn't use fog copy the z coord to the fog coord so that we can use table fog */
470 if (!reg_maps->fog)
471 shader_addline(&buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
473 /* Write the final position.
475 * OpenGL coordinates specify the center of the pixel while d3d coords specify
476 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
477 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
478 * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
480 shader_addline(&buffer, "MUL TMP, posFixup, TMP_OUT.w;\n");
481 shader_addline(&buffer, "ADD TMP_OUT.x, TMP_OUT.x, TMP.z;\n");
482 shader_addline(&buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TMP.w;\n");
484 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
485 * and the glsl equivalent
487 shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, helper_const.x, -TMP_OUT.w;\n");
489 shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
491 shader_addline(&buffer, "END\n");
493 /* TODO: change to resource.glObjectHandle or something like that */
494 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
496 TRACE("Creating a hw vertex shader, prg=%d\n", This->baseShader.prgId);
497 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->baseShader.prgId));
499 TRACE("Created hw vertex shader, prg=%d\n", This->baseShader.prgId);
500 /* Create the program and check for errors */
501 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
502 buffer.bsize, buffer.buffer));
504 if (glGetError() == GL_INVALID_OPERATION) {
505 GLint errPos;
506 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
507 FIXME("HW VertexShader Error at position %d: %s\n",
508 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
509 This->baseShader.prgId = -1;
513 #if 1 /* if were using the data buffer of device then we don't need to free it */
514 HeapFree(GetProcessHeap(), 0, buffer.buffer);
515 #endif
518 /* *******************************************
519 IWineD3DVertexShader IUnknown parts follow
520 ******************************************* */
521 static HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj) {
522 return IWineD3DBaseShaderImpl_QueryInterface((IWineD3DBaseShader *) iface, riid, ppobj);
525 static ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
526 return IWineD3DBaseShaderImpl_AddRef((IWineD3DBaseShader *) iface);
529 static ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
530 return IWineD3DBaseShaderImpl_Release((IWineD3DBaseShader *) iface);
533 /* *******************************************
534 IWineD3DVertexShader IWineD3DVertexShader parts follow
535 ******************************************* */
537 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
538 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
540 *parent = This->parent;
541 IUnknown_AddRef(*parent);
542 TRACE("(%p) : returning %p\n", This, *parent);
543 return WINED3D_OK;
546 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
547 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
548 IWineD3DDevice_AddRef(This->baseShader.device);
549 *pDevice = This->baseShader.device;
550 TRACE("(%p) returning %p\n", This, *pDevice);
551 return WINED3D_OK;
554 static HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
555 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
556 TRACE("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
558 if (NULL == pData) {
559 *pSizeOfData = This->baseShader.functionLength;
560 return WINED3D_OK;
562 if (*pSizeOfData < This->baseShader.functionLength) {
563 /* MSDN claims (for d3d8 at least) that if *pSizeOfData is smaller
564 * than the required size we should write the required size and
565 * return D3DERR_MOREDATA. That's not actually true. */
566 return WINED3DERR_INVALIDCALL;
568 if (NULL == This->baseShader.function) { /* no function defined */
569 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
570 (*(DWORD **) pData) = NULL;
571 } else {
572 if(This->baseShader.functionLength == 0){
575 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
576 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
578 return WINED3D_OK;
581 /* Note that for vertex shaders CompileShader isn't called until the
582 * shader is first used. The reason for this is that we need the vertex
583 * declaration the shader will be used with in order to determine if
584 * the data in a register is of type D3DCOLOR, and needs swizzling. */
585 static HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
587 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
588 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
589 HRESULT hr;
590 shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
592 TRACE("(%p) : pFunction %p\n", iface, pFunction);
594 /* First pass: trace shader */
595 shader_trace_init((IWineD3DBaseShader*) This, pFunction);
596 vshader_set_limits(This);
598 /* Initialize immediate constant lists */
599 list_init(&This->baseShader.constantsF);
600 list_init(&This->baseShader.constantsB);
601 list_init(&This->baseShader.constantsI);
603 /* Second pass: figure out registers used, semantics, etc.. */
604 This->min_rel_offset = GL_LIMITS(vshader_constantsF);
605 This->max_rel_offset = 0;
606 memset(reg_maps, 0, sizeof(shader_reg_maps));
607 hr = shader_get_registers_used((IWineD3DBaseShader*) This, reg_maps,
608 This->semantics_in, This->semantics_out, pFunction, NULL);
609 if (hr != WINED3D_OK) return hr;
611 This->baseShader.shader_mode = deviceImpl->vs_selected_mode;
613 if(deviceImpl->vs_selected_mode == SHADER_ARB &&
614 (GLINFO_LOCATION).arb_vs_offset_limit &&
615 This->min_rel_offset <= This->max_rel_offset) {
617 if(This->max_rel_offset - This->min_rel_offset > 127) {
618 FIXME("The difference between the minimum and maximum relative offset is > 127\n");
619 FIXME("Which this OpenGL implementation does not support. Try using GLSL\n");
620 FIXME("Min: %d, Max: %d\n", This->min_rel_offset, This->max_rel_offset);
621 } else if(This->max_rel_offset - This->min_rel_offset > 63) {
622 This->rel_offset = This->min_rel_offset + 63;
623 } else if(This->max_rel_offset > 63) {
624 This->rel_offset = This->min_rel_offset;
625 } else {
626 This->rel_offset = 0;
629 This->baseShader.load_local_constsF = This->baseShader.reg_maps.usesrelconstF && !list_empty(&This->baseShader.constantsF);
631 /* copy the function ... because it will certainly be released by application */
632 if (NULL != pFunction) {
633 void *function;
635 function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
636 if (!function) return E_OUTOFMEMORY;
637 memcpy(function, pFunction, This->baseShader.functionLength);
638 This->baseShader.function = function;
639 } else {
640 This->baseShader.function = NULL;
643 return WINED3D_OK;
646 /* Preload semantics for d3d8 shaders */
647 static void WINAPI IWineD3DVertexShaderImpl_FakeSemantics(IWineD3DVertexShader *iface, IWineD3DVertexDeclaration *vertex_declaration) {
648 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
649 IWineD3DVertexDeclarationImpl* vdecl = (IWineD3DVertexDeclarationImpl*)vertex_declaration;
651 int i;
652 for (i = 0; i < vdecl->declarationWNumElements - 1; ++i) {
653 WINED3DVERTEXELEMENT* element = vdecl->pDeclarationWine + i;
654 vshader_set_input(This, element->Reg, element->Usage, element->UsageIndex);
658 /* Set local constants for d3d8 shaders */
659 static HRESULT WINAPI IWIneD3DVertexShaderImpl_SetLocalConstantsF(IWineD3DVertexShader *iface,
660 UINT start_idx, const float *src_data, UINT count) {
661 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
662 UINT i, end_idx;
664 TRACE("(%p) : start_idx %u, src_data %p, count %u\n", This, start_idx, src_data, count);
666 end_idx = start_idx + count;
667 if (end_idx > GL_LIMITS(vshader_constantsF)) {
668 WARN("end_idx %u > float constants limit %u\n", end_idx, GL_LIMITS(vshader_constantsF));
669 end_idx = GL_LIMITS(vshader_constantsF);
672 for (i = start_idx; i < end_idx; ++i) {
673 local_constant* lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(local_constant));
674 if (!lconst) return E_OUTOFMEMORY;
676 lconst->idx = i;
677 memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
678 list_add_head(&This->baseShader.constantsF, &lconst->entry);
681 return WINED3D_OK;
684 static inline BOOL swizzled_attribs_differ(IWineD3DVertexShaderImpl *This, IWineD3DVertexDeclarationImpl *vdecl) {
685 UINT i, j, k;
686 BOOL found;
688 DWORD usage_token;
689 DWORD usage;
690 DWORD usage_idx;
692 for(i = 0; i < vdecl->declarationWNumElements; i++) {
693 /* Ignore tesselated streams and the termination entry(position0, stream 255, unused) */
694 if(vdecl->pDeclarationWine[i].Stream >= MAX_STREAMS ||
695 vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_UNUSED) continue;
697 for(j = 0; j < MAX_ATTRIBS; j++) {
698 if(!This->baseShader.reg_maps.attributes[j]) continue;
700 usage_token = This->semantics_in[j].usage;
701 usage = (usage_token & WINED3DSP_DCL_USAGE_MASK) >> WINED3DSP_DCL_USAGE_SHIFT;
702 usage_idx = (usage_token & WINED3DSP_DCL_USAGEINDEX_MASK) >> WINED3DSP_DCL_USAGEINDEX_SHIFT;
704 if(vdecl->pDeclarationWine[i].Usage != usage ||
705 vdecl->pDeclarationWine[i].UsageIndex != usage_idx) {
706 continue;
709 found = FALSE;
710 for(k = 0; k < This->num_swizzled_attribs; k++) {
711 if(This->swizzled_attribs[k].usage == usage &&
712 This->swizzled_attribs[k].idx == usage_idx) {
713 found = TRUE;
716 if(!found && vdecl->pDeclarationWine[i].Type == WINED3DDECLTYPE_D3DCOLOR) {
717 TRACE("Attribute %s%d is D3DCOLOR now but wasn't before\n",
718 debug_d3ddeclusage(usage), usage_idx);
719 return TRUE;
721 if( found && vdecl->pDeclarationWine[i].Type != WINED3DDECLTYPE_D3DCOLOR) {
722 TRACE("Attribute %s%d was D3DCOLOR before but is not any more\n",
723 debug_d3ddeclusage(usage), usage_idx);
724 return TRUE;
728 return FALSE;
731 static HRESULT WINAPI IWineD3DVertexShaderImpl_CompileShader(IWineD3DVertexShader *iface) {
732 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
733 IWineD3DVertexDeclarationImpl *vdecl;
734 CONST DWORD *function = This->baseShader.function;
735 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *) This->baseShader.device;
737 TRACE("(%p) : function %p\n", iface, function);
739 /* We're already compiled. */
740 if (This->baseShader.is_compiled) {
741 vdecl = (IWineD3DVertexDeclarationImpl *) deviceImpl->stateBlock->vertexDecl;
743 if(This->num_swizzled_attribs != vdecl->num_swizzled_attribs ||
744 memcmp(This->swizzled_attribs, vdecl->swizzled_attribs, sizeof(vdecl->swizzled_attribs[0]) * This->num_swizzled_attribs) != 0) {
746 /* The swizzled attributes differ between shader and declaration. This doesn't necessarily mean
747 * we have to recompile, but we have to take a deeper look at see if the attribs that differ
748 * are declared in the decl and used in the shader
750 if(swizzled_attribs_differ(This, vdecl)) {
751 WARN("Recompiling vertex shader %p due to D3DCOLOR input changes\n", This);
752 goto recompile;
754 WARN("Swizzled attribute validation required an expensive comparison\n");
757 return WINED3D_OK;
759 recompile:
760 if(This->recompile_count < 50) {
761 This->recompile_count++;
762 } else {
763 FIXME("Vertexshader %p recompiled more than 50 times\n", This);
766 deviceImpl->shader_backend->shader_destroy((IWineD3DBaseShader *) iface);
769 /* We don't need to compile */
770 if (!function) {
771 This->baseShader.is_compiled = TRUE;
772 return WINED3D_OK;
775 /* Generate the HW shader */
776 TRACE("(%p) : Generating hardware program\n", This);
777 IWineD3DVertexShaderImpl_GenerateShader(iface, &This->baseShader.reg_maps, function);
779 This->baseShader.is_compiled = TRUE;
781 return WINED3D_OK;
784 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
786 /*** IUnknown methods ***/
787 IWineD3DVertexShaderImpl_QueryInterface,
788 IWineD3DVertexShaderImpl_AddRef,
789 IWineD3DVertexShaderImpl_Release,
790 /*** IWineD3DBase methods ***/
791 IWineD3DVertexShaderImpl_GetParent,
792 /*** IWineD3DBaseShader methods ***/
793 IWineD3DVertexShaderImpl_SetFunction,
794 IWineD3DVertexShaderImpl_CompileShader,
795 /*** IWineD3DVertexShader methods ***/
796 IWineD3DVertexShaderImpl_GetDevice,
797 IWineD3DVertexShaderImpl_GetFunction,
798 IWineD3DVertexShaderImpl_FakeSemantics,
799 IWIneD3DVertexShaderImpl_SetLocalConstantsF