wined3d: Enable color fixups for vertex shaders.
[wine/multimedia.git] / dlls / wined3d / vertexshader.c
blob000a1aa911edfbe4853ff5c159b0c7ec62fbd4b8
1 /*
2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2005 Oliver Stieber
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "config.h"
25 #include <math.h>
26 #include <stdio.h>
28 #include "wined3d_private.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
32 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
34 /* Shader debugging - Change the following line to enable debugging of software
35 vertex shaders */
36 #if 0 /* Musxt not be 1 in cvs version */
37 # define VSTRACE(A) TRACE A
38 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
39 #else
40 # define VSTRACE(A)
41 # define TRACE_VSVECTOR(name)
42 #endif
44 #if 1 /* FIXME : Needs sorting when vshader code moved in properly */
46 /**
47 * DirectX9 SDK download
48 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
50 * Exploring D3DX
51 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
53 * Using Vertex Shaders
54 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
56 * Dx9 New
57 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
59 * Dx9 Shaders
60 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
65 * Dx9 D3DX
66 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
68 * FVF
69 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
71 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72 * http://developer.nvidia.com/view.asp?IO=vstovp
74 * NVIDIA: Memory Management with VAR
75 * http://developer.nvidia.com/view.asp?IO=var_memory_management
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
81 typedef void (*shader_fct_t)();
83 typedef struct SHADER_OPCODE {
84 unsigned int opcode;
85 const char* name;
86 const char* glname;
87 CONST UINT num_params;
88 shader_fct_t soft_fct;
89 DWORD min_version;
90 DWORD max_version;
91 } SHADER_OPCODE;
93 #define GLNAME_REQUIRE_GLSL ((const char *)1)
95 /*******************************
96 * vshader functions software VM
99 void vshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
100 d->x = s0->x + s1->x;
101 d->y = s0->y + s1->y;
102 d->z = s0->z + s1->z;
103 d->w = s0->w + s1->w;
104 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
105 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
108 void vshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
109 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
110 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
111 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
114 void vshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
115 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
116 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
117 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
120 void vshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
121 d->x = 1.0f;
122 d->y = s0->y * s1->y;
123 d->z = s0->z;
124 d->w = s1->w;
125 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
126 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
129 void vshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
130 union {
131 float f;
132 DWORD d;
133 } tmp;
135 tmp.f = floorf(s0->w);
136 d->x = powf(2.0f, tmp.f);
137 d->y = s0->w - tmp.f;
138 tmp.f = powf(2.0f, s0->w);
139 tmp.d &= 0xFFFFFF00U;
140 d->z = tmp.f;
141 d->w = 1.0f;
142 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
143 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
146 void vshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
147 d->x = 1.0f;
148 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
149 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
150 d->w = 1.0f;
151 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
152 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
155 void vshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
156 float tmp_f = fabsf(s0->w);
157 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
158 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
159 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
162 void vshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
163 d->x = s0->x * s1->x + s2->x;
164 d->y = s0->y * s1->y + s2->y;
165 d->z = s0->z * s1->z + s2->z;
166 d->w = s0->w * s1->w + s2->w;
167 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
168 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
171 void vshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
172 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
173 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
174 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
175 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
176 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
177 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
180 void vshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
181 d->x = (s0->x < s1->x) ? s0->x : s1->x;
182 d->y = (s0->y < s1->y) ? s0->y : s1->y;
183 d->z = (s0->z < s1->z) ? s0->z : s1->z;
184 d->w = (s0->w < s1->w) ? s0->w : s1->w;
185 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
186 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
189 void vshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
190 d->x = s0->x;
191 d->y = s0->y;
192 d->z = s0->z;
193 d->w = s0->w;
194 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
195 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
198 void vshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
199 d->x = s0->x * s1->x;
200 d->y = s0->y * s1->y;
201 d->z = s0->z * s1->z;
202 d->w = s0->w * s1->w;
203 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
204 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
207 void vshader_nop(void) {
208 /* NOPPPP ahhh too easy ;) */
209 VSTRACE(("executing nop\n"));
212 void vshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
213 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
214 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
215 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
218 void vshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
219 float tmp_f = fabsf(s0->w);
220 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
221 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
222 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
225 void vshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
226 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
227 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
228 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
229 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
230 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
231 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
234 void vshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
235 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
236 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
237 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
238 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
239 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
240 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
243 void vshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
244 d->x = s0->x - s1->x;
245 d->y = s0->y - s1->y;
246 d->z = s0->z - s1->z;
247 d->w = s0->w - s1->w;
248 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
249 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
253 * Version 1.1 specific
256 void vshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
257 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
258 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
259 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
262 void vshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
263 float tmp_f = fabsf(s0->w);
264 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
265 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
266 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
269 void vshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
270 d->x = s0->x - floorf(s0->x);
271 d->y = s0->y - floorf(s0->y);
272 d->z = 0.0f;
273 d->w = 1.0f;
274 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
275 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
278 typedef FLOAT D3DMATRIX44[4][4];
279 typedef FLOAT D3DMATRIX43[4][3];
280 typedef FLOAT D3DMATRIX34[3][4];
281 typedef FLOAT D3DMATRIX33[3][3];
282 typedef FLOAT D3DMATRIX23[2][3];
284 void vshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
286 * Buggy CODE: here only if cast not work for copy/paste
287 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
288 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
289 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
290 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
291 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
292 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
293 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
295 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
296 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
297 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
298 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
299 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
300 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
301 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
302 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
305 void vshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
306 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
307 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
308 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
309 d->w = 1.0f;
310 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
311 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
312 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
313 VSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
316 void vshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
317 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
318 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
319 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
320 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
321 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
322 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
323 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
324 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
327 void vshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
328 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
329 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
330 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
331 d->w = 1.0f;
332 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
333 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
334 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
335 VSTRACE(("executing m3x3(4): (%f) \n", d->w));
338 void vshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
339 FIXME("check\n");
340 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
341 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
342 d->z = 0.0f;
343 d->w = 1.0f;
347 * Version 2.0 specific
349 void vshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
350 d->x = s0->x * (s1->x - s2->x) + s2->x;
351 d->y = s0->y * (s1->y - s2->y) + s2->y;
352 d->z = s0->z * (s1->z - s2->z) + s2->z;
353 d->w = s0->w * (s1->w - s2->w) + s2->w;
356 void vshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
357 d->x = s0->y * s1->z - s0->z * s1->y;
358 d->y = s0->z * s1->x - s0->x * s1->z;
359 d->z = s0->x * s1->y - s0->y * s1->x;
360 d->w = 0.9f; /* w is undefined, so set it to something safeish */
362 VSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
363 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
366 void vshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
368 d->x = fabsf(s0->x);
369 d->y = fabsf(s0->y);
370 d->z = fabsf(s0->z);
371 d->w = fabsf(s0->w);
372 VSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
373 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
376 /* Stubs */
377 void vshader_texcoord(WINED3DSHADERVECTOR* d) {
378 FIXME(" : Stub\n");
381 void vshader_texkill(WINED3DSHADERVECTOR* d) {
382 FIXME(" : Stub\n");
385 void vshader_tex(WINED3DSHADERVECTOR* d) {
386 FIXME(" : Stub\n");
388 void vshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
389 FIXME(" : Stub\n");
392 void vshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
393 FIXME(" : Stub\n");
396 void vshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
397 FIXME(" : Stub\n");
400 void vshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
401 FIXME(" : Stub\n");
404 void vshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
405 FIXME(" : Stub\n");
408 void vshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
409 FIXME(" : Stub\n");
412 void vshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
413 FIXME(" : Stub\n");
416 void vshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
417 FIXME(" : Stub\n");
420 void vshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
421 FIXME(" : Stub\n");
424 void vshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
425 FIXME(" : Stub\n");
428 void vshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
429 FIXME(" : Stub\n");
432 void vshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
433 FIXME(" : Stub\n");
436 void vshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
437 FIXME(" : Stub\n");
440 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
441 void vshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
442 FIXME(" : Stub\n");
445 void vshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
446 FIXME(" : Stub\n");
449 void vshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
450 FIXME(" : Stub\n");
453 void vshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
454 FIXME(" : Stub\n");
457 void vshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
458 FIXME(" : Stub\n");
461 void vshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
462 FIXME(" : Stub\n");
465 void vshader_texdepth(WINED3DSHADERVECTOR* d) {
466 FIXME(" : Stub\n");
469 void vshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
470 FIXME(" : Stub\n");
473 void vshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
474 FIXME(" : Stub\n");
477 void vshader_call(WINED3DSHADERVECTOR* d) {
478 FIXME(" : Stub\n");
481 void vshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
482 FIXME(" : Stub\n");
485 void vshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
486 FIXME(" : Stub\n");
489 void vshader_ret(WINED3DSHADERVECTOR* d) {
490 FIXME(" : Stub\n");
493 void vshader_endloop(WINED3DSHADERVECTOR* d) {
494 FIXME(" : Stub\n");
497 void vshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
498 FIXME(" : Stub\n");
501 void vshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
502 FIXME(" : Stub\n");
505 void vshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
506 FIXME(" : Stub\n");
509 void vshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
510 FIXME(" : Stub\n");
513 void vshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
514 FIXME(" : Stub\n");
517 void vshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
518 FIXME(" : Stub\n");
521 void vshader_endrep(void) {
522 FIXME(" : Stub\n");
525 void vshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
526 FIXME(" : Stub\n");
529 void vshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
530 FIXME(" : Stub\n");
533 void vshader_else(WINED3DSHADERVECTOR* d) {
534 FIXME(" : Stub\n");
537 void vshader_label(WINED3DSHADERVECTOR* d) {
538 FIXME(" : Stub\n");
541 void vshader_endif(WINED3DSHADERVECTOR* d) {
542 FIXME(" : Stub\n");
545 void vshader_break(WINED3DSHADERVECTOR* d) {
546 FIXME(" : Stub\n");
549 void vshader_breakc(WINED3DSHADERVECTOR* d) {
550 FIXME(" : Stub\n");
553 void vshader_mova(WINED3DSHADERVECTOR* d) {
554 FIXME(" : Stub\n");
557 void vshader_defb(WINED3DSHADERVECTOR* d) {
558 FIXME(" : Stub\n");
561 void vshader_defi(WINED3DSHADERVECTOR* d) {
562 FIXME(" : Stub\n");
565 void vshader_dp2add(WINED3DSHADERVECTOR* d) {
566 FIXME(" : Stub\n");
569 void vshader_dsx(WINED3DSHADERVECTOR* d) {
570 FIXME(" : Stub\n");
573 void vshader_dsy(WINED3DSHADERVECTOR* d) {
574 FIXME(" : Stub\n");
577 void vshader_texldd(WINED3DSHADERVECTOR* d) {
578 FIXME(" : Stub\n");
581 void vshader_setp(WINED3DSHADERVECTOR* d) {
582 FIXME(" : Stub\n");
585 void vshader_texldl(WINED3DSHADERVECTOR* d) {
586 FIXME(" : Stub\n");
589 void vshader_breakp(WINED3DSHADERVECTOR* d) {
590 FIXME(" : Stub\n");
595 * log, exp, frc, m*x* seems to be macros ins ... to see
597 static CONST SHADER_OPCODE vshader_ins [] = {
598 {D3DSIO_NOP, "nop", "NOP", 0, vshader_nop, 0, 0},
599 {D3DSIO_MOV, "mov", "MOV", 2, vshader_mov, 0, 0},
600 {D3DSIO_ADD, "add", "ADD", 3, vshader_add, 0, 0},
601 {D3DSIO_SUB, "sub", "SUB", 3, vshader_sub, 0, 0},
602 {D3DSIO_MAD, "mad", "MAD", 4, vshader_mad, 0, 0},
603 {D3DSIO_MUL, "mul", "MUL", 3, vshader_mul, 0, 0},
604 {D3DSIO_RCP, "rcp", "RCP", 2, vshader_rcp, 0, 0},
605 {D3DSIO_RSQ, "rsq", "RSQ", 2, vshader_rsq, 0, 0},
606 {D3DSIO_DP3, "dp3", "DP3", 3, vshader_dp3, 0, 0},
607 {D3DSIO_DP4, "dp4", "DP4", 3, vshader_dp4, 0, 0},
608 {D3DSIO_MIN, "min", "MIN", 3, vshader_min, 0, 0},
609 {D3DSIO_MAX, "max", "MAX", 3, vshader_max, 0, 0},
610 {D3DSIO_SLT, "slt", "SLT", 3, vshader_slt, 0, 0},
611 {D3DSIO_SGE, "sge", "SGE", 3, vshader_sge, 0, 0},
612 {D3DSIO_ABS, "abs", "ABS", 2, vshader_abs, 0, 0},
613 {D3DSIO_EXP, "exp", "EX2", 2, vshader_exp, 0, 0},
614 {D3DSIO_LOG, "log", "LG2", 2, vshader_log, 0, 0},
615 {D3DSIO_LIT, "lit", "LIT", 2, vshader_lit, 0, 0},
616 {D3DSIO_DST, "dst", "DST", 3, vshader_dst, 0, 0},
617 {D3DSIO_LRP, "lrp", "LRP", 4, vshader_lrp, 0, 0},
618 {D3DSIO_FRC, "frc", "FRC", 2, vshader_frc, 0, 0},
619 {D3DSIO_M4x4, "m4x4", "undefined", 3, vshader_m4x4, 0, 0},
620 {D3DSIO_M4x3, "m4x3", "undefined", 3, vshader_m4x3, 0, 0},
621 {D3DSIO_M3x4, "m3x4", "undefined", 3, vshader_m3x4, 0, 0},
622 {D3DSIO_M3x3, "m3x3", "undefined", 3, vshader_m3x3, 0, 0},
623 {D3DSIO_M3x2, "m3x2", "undefined", 3, vshader_m3x2, 0, 0},
624 /** FIXME: use direct access so add the others opcodes as stubs */
625 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
626 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
627 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, vshader_call, 0, 0},
628 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, vshader_callnz, 0, 0},
629 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, vshader_loop, 0, 0},
630 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, vshader_ret, 0, 0},
631 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, vshader_endloop, 0, 0},
632 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, vshader_label, 0, 0},
633 /* DCL is a specil operation */
634 {D3DSIO_DCL, "dcl", NULL, 1, vshader_dcl, 0, 0},
635 {D3DSIO_POW, "pow", "POW", 3, vshader_pow, 0, 0},
636 {D3DSIO_CRS, "crs", "XPS", 3, vshader_crs, 0, 0},
637 /* TODO: sng can possibly be performed as
638 RCP tmp, vec
639 MUL out, tmp, vec*/
640 {D3DSIO_SGN, "sng", NULL, 2, vshader_sng, 0, 0},
641 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
642 DP3 tmp , vec, vec;
643 RSQ tmp, tmp.x;
644 MUL vec.xyz, vec, tmp;
645 but I think this is better because it accounts for w properly.
646 DP3 tmp , vec, vec;
647 RSQ tmp, tmp.x;
648 MUL vec, vec, tmp;
651 {D3DSIO_NRM, "nrm", NULL, 2, vshader_nrm, 0, 0},
652 {D3DSIO_SINCOS, "sincos", NULL, 2, vshader_sincos, 0, 0},
653 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, vshader_rep, 0, 0},
654 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, vshader_endrep, 0, 0},
655 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, vshader_if, 0, 0},
656 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, vshader_ifc, 0, 0},
657 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, vshader_else, 0, 0},
658 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, vshader_endif, 0, 0},
659 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, vshader_break, 0, 0},
660 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, vshader_breakc, 0, 0},
661 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, vshader_mova, 0, 0},
662 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, vshader_defb, 0, 0},
663 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, vshader_defi, 0, 0},
665 {D3DSIO_TEXCOORD, "texcoord", GLNAME_REQUIRE_GLSL, 1, vshader_texcoord, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
666 {D3DSIO_TEXCOORD, "texcrd", GLNAME_REQUIRE_GLSL, 2, vshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
667 {D3DSIO_TEXKILL, "texkill", GLNAME_REQUIRE_GLSL, 1, vshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
668 {D3DSIO_TEX, "tex", GLNAME_REQUIRE_GLSL, 1, vshader_tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
669 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 2, vshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
670 {D3DSIO_TEXBEM, "texbem", GLNAME_REQUIRE_GLSL, 2, vshader_texbem, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
671 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, vshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
672 {D3DSIO_TEXREG2AR,"texreg2ar",GLNAME_REQUIRE_GLSL, 2, vshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
673 {D3DSIO_TEXREG2GB,"texreg2gb",GLNAME_REQUIRE_GLSL, 2, vshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
674 {D3DSIO_TEXM3x2PAD, "texm3x2pad", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
675 {D3DSIO_TEXM3x2TEX, "texm3x2tex", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
676 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
677 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", GLNAME_REQUIRE_GLSL, 3, vshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
678 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
680 {D3DSIO_EXPP, "expp", "EXP", 2, vshader_expp, 0, 0},
681 {D3DSIO_LOGP, "logp", "LOG", 2, vshader_logp, 0, 0},
682 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, vshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
683 /* def is a special operation */
684 {D3DSIO_DEF, "def", NULL, 5, vshader_def, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
685 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, vshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
686 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, vshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
687 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
688 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, vshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
689 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
690 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, vshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
691 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, vshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
692 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, vshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
693 /* TODO: dp2add can be made out of multiple instuctions */
694 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, vshader_dp2add, 0, 0},
695 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, vshader_dsx, 0, 0},
696 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, vshader_dsy, 0, 0},
697 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, vshader_texldd, 0, 0},
698 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, vshader_setp, 0, 0},
699 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, vshader_texldl, 0, 0},
700 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, vshader_breakp, 0, 0},
701 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, vshader_nop, 0, 0},
702 {0, NULL, NULL, 0, NULL, 0, 0}
706 inline static const SHADER_OPCODE* vshader_program_get_opcode(const DWORD code) {
707 DWORD i = 0;
708 /** TODO: use dichotomic search or hash table */
709 while (NULL != vshader_ins[i].name) {
710 if ((code & D3DSI_OPCODE_MASK) == vshader_ins[i].opcode) {
711 return &vshader_ins[i];
713 ++i;
715 FIXME("Unsupported opcode %lx\n",code);
716 return NULL;
719 inline static void vshader_program_dump_param(const DWORD param, int input) {
720 static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" };
721 static const char swizzle_reg_chars[] = "xyzw";
723 DWORD reg = param & 0x00001FFF;
724 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
726 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) TRACE("-");
728 switch (regtype) {
729 case D3DSPR_TEMP:
730 TRACE("R[%lu]", reg);
731 break;
732 case D3DSPR_INPUT:
733 TRACE("v%lu", reg);
734 break;
735 case D3DSPR_CONST:
736 TRACE("C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
737 break;
738 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
739 TRACE("a[%lu]", reg);
740 break;
741 case D3DSPR_RASTOUT:
742 TRACE("%s", rastout_reg_names[reg]);
743 break;
744 case D3DSPR_ATTROUT:
745 TRACE("oD[%lu]", reg);
746 break;
747 case D3DSPR_TEXCRDOUT:
748 TRACE("oT[%lu]", reg);
749 break;
750 default:
751 FIXME("Unknown %lu %u reg %lu\n",regtype, D3DSPR_ATTROUT, reg);
752 break;
755 if (!input) {
756 /** operand output */
757 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
758 if (param & D3DSP_WRITEMASK_0) TRACE(".x");
759 if (param & D3DSP_WRITEMASK_1) TRACE(".y");
760 if (param & D3DSP_WRITEMASK_2) TRACE(".z");
761 if (param & D3DSP_WRITEMASK_3) TRACE(".w");
763 } else {
764 /** operand input */
765 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
766 DWORD swizzle_x = swizzle & 0x03;
767 DWORD swizzle_y = (swizzle >> 2) & 0x03;
768 DWORD swizzle_z = (swizzle >> 4) & 0x03;
769 DWORD swizzle_w = (swizzle >> 6) & 0x03;
771 * swizzle bits fields:
772 * WWZZYYXX
774 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
775 if (swizzle_x == swizzle_y &&
776 swizzle_x == swizzle_z &&
777 swizzle_x == swizzle_w) {
778 TRACE(".%c", swizzle_reg_chars[swizzle_x]);
779 } else {
780 TRACE(".%c%c%c%c",
781 swizzle_reg_chars[swizzle_x],
782 swizzle_reg_chars[swizzle_y],
783 swizzle_reg_chars[swizzle_z],
784 swizzle_reg_chars[swizzle_w]);
790 inline static void vshader_program_dump_vs_param(const DWORD param, int input) {
791 static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" };
792 static const char swizzle_reg_chars[] = "xyzw";
793 /* the unknown mask is for bits not yet accounted for by any other mask... */
794 #define UNKNOWN_MASK 0xC000
796 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
797 #define EXTENDED_REG 0x1800
799 DWORD reg = param & D3DSP_REGNUM_MASK; /* 0x00001FFF; isn't this D3DSP_REGNUM_MASK? */
800 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
802 if(param & UNKNOWN_MASK) { /* if this register has any of the unknown bits set then report them*/
803 FIXME("Unknown bits set regtype %lx , %lx, UK(%lx)\n", regtype, (param & EXTENDED_REG), param & UNKNOWN_MASK);
806 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) TRACE("-");
808 switch (regtype /*<< D3DSP_REGTYPE_SHIFT*/) {
809 case D3DSPR_TEMP:
810 TRACE("r%lu", reg);
811 break;
812 case D3DSPR_INPUT:
813 TRACE("v%lu", reg);
814 break;
815 case D3DSPR_CONST:
816 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
817 break;
818 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
819 TRACE("a%lu", reg);
820 break;
821 case D3DSPR_RASTOUT:
822 TRACE("%s", rastout_reg_names[reg]);
823 break;
824 case D3DSPR_ATTROUT:
825 TRACE("oD%lu", reg);
826 break;
827 case D3DSPR_TEXCRDOUT:
828 TRACE("oT%lu", reg);
829 break;
830 case D3DSPR_CONSTINT:
831 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
832 break;
833 case D3DSPR_CONSTBOOL:
834 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
835 break;
836 case D3DSPR_LABEL:
837 TRACE("l%lu", reg);
838 break;
839 case D3DSPR_LOOP:
840 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
841 break;
842 default:
843 FIXME("Unknown %lu reg %lu\n",regtype, reg);
844 break;
847 if (!input) {
848 /** operand output */
849 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
850 if (param & D3DSP_WRITEMASK_0) TRACE(".x");
851 if (param & D3DSP_WRITEMASK_1) TRACE(".y");
852 if (param & D3DSP_WRITEMASK_2) TRACE(".z");
853 if (param & D3DSP_WRITEMASK_3) TRACE(".w");
855 } else {
856 /** operand input */
857 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
858 DWORD swizzle_x = swizzle & 0x03;
859 DWORD swizzle_y = (swizzle >> 2) & 0x03;
860 DWORD swizzle_z = (swizzle >> 4) & 0x03;
861 DWORD swizzle_w = (swizzle >> 6) & 0x03;
863 * swizzle bits fields:
864 * WWZZYYXX
866 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
867 if (swizzle_x == swizzle_y &&
868 swizzle_x == swizzle_z &&
869 swizzle_x == swizzle_w) {
870 TRACE(".%c", swizzle_reg_chars[swizzle_x]);
871 } else {
872 TRACE(".%c%c%c%c",
873 swizzle_reg_chars[swizzle_x],
874 swizzle_reg_chars[swizzle_y],
875 swizzle_reg_chars[swizzle_z],
876 swizzle_reg_chars[swizzle_w]);
882 inline static BOOL vshader_is_version_token(DWORD token) {
883 return 0xFFFE0000 == (token & 0xFFFE0000);
886 inline static BOOL vshader_is_comment_token(DWORD token) {
887 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
890 inline static void vshader_program_add_output_param_swizzle(const DWORD param, int is_color, char *hwLine) {
891 /** operand output */
892 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
893 strcat(hwLine, ".");
894 if (param & D3DSP_WRITEMASK_0) { strcat(hwLine, "x"); }
895 if (param & D3DSP_WRITEMASK_1) { strcat(hwLine, "y"); }
896 if (param & D3DSP_WRITEMASK_2) { strcat(hwLine, "z"); }
897 if (param & D3DSP_WRITEMASK_3) { strcat(hwLine, "w"); }
901 inline static void vshader_program_add_input_param_swizzle(const DWORD param, int is_color, char *hwLine) {
902 static const char swizzle_reg_chars_color_fix[] = "zyxw";
903 static const char swizzle_reg_chars[] = "xyzw";
904 const char* swizzle_regs = NULL;
905 char tmpReg[255];
907 /** operand input */
908 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
909 DWORD swizzle_x = swizzle & 0x03;
910 DWORD swizzle_y = (swizzle >> 2) & 0x03;
911 DWORD swizzle_z = (swizzle >> 4) & 0x03;
912 DWORD swizzle_w = (swizzle >> 6) & 0x03;
914 if (is_color) {
915 swizzle_regs = swizzle_reg_chars_color_fix;
916 } else {
917 swizzle_regs = swizzle_reg_chars;
921 * swizzle bits fields:
922 * WWZZYYXX
924 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) { /* D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
925 if (is_color) {
926 sprintf(tmpReg, ".%c%c%c%c",
927 swizzle_regs[swizzle_x],
928 swizzle_regs[swizzle_y],
929 swizzle_regs[swizzle_z],
930 swizzle_regs[swizzle_w]);
931 strcat(hwLine, tmpReg);
933 return ;
935 if (swizzle_x == swizzle_y &&
936 swizzle_x == swizzle_z &&
937 swizzle_x == swizzle_w)
939 sprintf(tmpReg, ".%c", swizzle_regs[swizzle_x]);
940 strcat(hwLine, tmpReg);
941 } else {
942 sprintf(tmpReg, ".%c%c%c%c",
943 swizzle_regs[swizzle_x],
944 swizzle_regs[swizzle_y],
945 swizzle_regs[swizzle_z],
946 swizzle_regs[swizzle_w]);
947 strcat(hwLine, tmpReg);
951 inline static void vshader_program_add_param(IWineD3DVertexShaderImpl *This, const DWORD param, BOOL is_input, char *hwLine) {
952 /* oPos, oFog and oPts in D3D */
953 static const char* hwrastout_reg_names[] = { "result.position", "result.fogcoord", "result.pointsize" };
955 DWORD reg = param & 0x00001FFF;
956 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
957 char tmpReg[255];
958 BOOL is_color = FALSE;
960 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) {
961 strcat(hwLine, " -");
962 } else {
963 strcat(hwLine, " ");
966 switch (regtype) {
967 case D3DSPR_TEMP:
968 sprintf(tmpReg, "T%lu", reg);
969 strcat(hwLine, tmpReg);
970 break;
971 case D3DSPR_INPUT:
972 if (reg == This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DIFFUSE]
973 || reg == This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SPECULAR]) {
974 is_color = TRUE;
976 /* if the attributes come in as named dcl's then use a named vertex (called namedVertexN) */
977 if (This->namedArrays) {
978 sprintf(tmpReg, "namedVertex%lu", reg);
979 } else {
980 /* otherwise the input is on a numbered attribute so use opengl numbered attributes */
981 sprintf(tmpReg, "vertex.attrib[%lu]", reg);
983 strcat(hwLine, tmpReg);
984 break;
985 case D3DSPR_CONST:
986 /* FIXME: some constants are named so we need a constants map*/
987 if (This->constantsUsedBitmap[reg] == VS_CONSTANT_CONSTANT) {
988 if (param & D3DVS_ADDRMODE_RELATIVE) {
989 FIXME("Relative addressing not expected for a named constant %lu\n", reg);
991 sprintf(tmpReg, "const%lu", reg);
992 } else {
993 sprintf(tmpReg, "C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "A0.x + " : "", reg);
995 strcat(hwLine, tmpReg);
996 break;
997 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
998 sprintf(tmpReg, "A%lu", reg);
999 strcat(hwLine, tmpReg);
1000 break;
1001 case D3DSPR_RASTOUT:
1002 sprintf(tmpReg, "%s", hwrastout_reg_names[reg]);
1003 strcat(hwLine, tmpReg);
1004 break;
1005 case D3DSPR_ATTROUT:
1006 if (reg==0) {
1007 strcat(hwLine, "result.color.primary");
1008 } else {
1009 strcat(hwLine, "result.color.secondary");
1011 break;
1012 case D3DSPR_TEXCRDOUT:
1013 sprintf(tmpReg, "result.texcoord[%lu]", reg);
1014 strcat(hwLine, tmpReg);
1015 break;
1016 default:
1017 FIXME("Unknown reg type %ld %ld\n", regtype, reg);
1018 break;
1021 if (!is_input) {
1022 vshader_program_add_output_param_swizzle(param, is_color, hwLine);
1023 } else {
1024 vshader_program_add_input_param_swizzle(param, is_color, hwLine);
1028 DWORD MacroExpansion[4*4];
1030 int ExpandMxMacro(DWORD macro_opcode, const DWORD* args) {
1031 int i;
1032 int nComponents = 0;
1033 DWORD opcode =0;
1034 switch(macro_opcode) {
1035 case D3DSIO_M4x4:
1036 nComponents = 4;
1037 opcode = D3DSIO_DP4;
1038 break;
1039 case D3DSIO_M4x3:
1040 nComponents = 3;
1041 opcode = D3DSIO_DP4;
1042 break;
1043 case D3DSIO_M3x4:
1044 nComponents = 4;
1045 opcode = D3DSIO_DP3;
1046 break;
1047 case D3DSIO_M3x3:
1048 nComponents = 3;
1049 opcode = D3DSIO_DP3;
1050 break;
1051 case D3DSIO_M3x2:
1052 nComponents = 2;
1053 opcode = D3DSIO_DP3;
1054 break;
1055 default:
1056 break;
1058 for (i = 0; i < nComponents; i++) {
1059 MacroExpansion[i*4+0] = opcode;
1060 MacroExpansion[i*4+1] = ((*args) & ~D3DSP_WRITEMASK_ALL)|(D3DSP_WRITEMASK_0<<i);
1061 MacroExpansion[i*4+2] = *(args+1);
1062 MacroExpansion[i*4+3] = (*(args+2))+i;
1064 return nComponents;
1068 * Function parser ...
1071 inline static VOID IWineD3DVertexShaderImpl_GenerateProgramArbHW(IWineD3DVertexShader *iface, CONST DWORD* pFunction) {
1072 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1073 const DWORD* pToken = pFunction;
1074 const DWORD* pSavedToken = NULL;
1075 const SHADER_OPCODE* curOpcode = NULL;
1076 int nRemInstr = -1;
1077 DWORD i;
1078 unsigned lineNum = 0;
1079 char *pgmStr = NULL;
1080 char tmpLine[255];
1081 DWORD nUseAddressRegister = 0;
1082 DWORD nUseTempRegister = 0;
1083 DWORD regtype;
1084 DWORD reg;
1085 BOOL tmpsUsed[32];
1086 #if 0 /* TODO: loope register (just another address register ) */
1087 BOOL hasLoops = FALSE;
1088 #endif
1090 #define PGMSIZE 65535
1091 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
1092 int pgmLength = 0;
1094 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
1095 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
1096 if (This->device->fixupVertexBufferSize < PGMSIZE) {
1097 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
1098 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
1099 This->fixupVertexBufferSize = PGMSIZE;
1100 This->fixupVertexBuffer[0] = 0;
1102 pgmStr = This->device->fixupVertexBuffer;
1103 #endif
1104 #define PNSTRCAT(_pgmStr, _tmpLine) { \
1105 int _tmpLineLen = strlen(_tmpLine); \
1106 if(_tmpLineLen + pgmLength > PGMSIZE) { \
1107 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, _tmpLineLen + pgmLength); \
1108 } else { \
1109 memcpy(_pgmStr + pgmLength, _tmpLine, _tmpLineLen); \
1111 pgmLength += _tmpLineLen; \
1114 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, 65535); /* 64kb should be enough */
1115 /* Initialise the shader */
1116 This->namedArrays = FALSE;
1117 This->declaredArrays = FALSE;
1118 for (i = 0; i < WINED3DSHADERDECLUSAGE_MAX_USAGE; i++) {
1119 This->arrayUsageMap[i] = -1;
1121 /* set all the tmpsUsed to not used */
1122 memset(tmpsUsed, FALSE , sizeof(tmpsUsed));
1124 /* TODO: renumbering of attributes if the values are higher than the highest supported attribute but the total number of attributes is less than the highest supported attribute */
1125 This->highestConstant = -1;
1129 * First pass to determine what we need to declare:
1130 * - Temporary variables
1131 * - Address variables
1133 if (NULL != pToken) {
1134 while (D3DVS_END() != *pToken) {
1135 if (vshader_is_version_token(*pToken)) {
1136 /** skip version */
1137 ++pToken;
1138 continue;
1140 if (vshader_is_comment_token(*pToken)) { /** comment */
1141 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1142 ++pToken;
1143 pToken += comment_len;
1144 continue;
1146 curOpcode = vshader_program_get_opcode(*pToken);
1147 ++pToken;
1148 /* TODO: dcl's */
1149 /* TODO: Consts */
1151 if (NULL == curOpcode) {
1152 while (*pToken & 0x80000000) {
1153 FIXME("unrecognized opcode: %08lx\n", *pToken);
1154 /* skip unrecognized opcode */
1155 ++pToken;
1157 } else {
1158 if (curOpcode->opcode == D3DSIO_DCL){
1159 INT usage = *pToken++;
1160 INT arrayNo = (*pToken++ & 0x00001FFF);
1161 switch(usage & 0xFFFF) {
1162 case D3DDECLUSAGE_POSITION:
1163 if((usage & 0xF0000) >> 16 == 0) { /* tween data */
1164 TRACE("Setting position to %d\n", arrayNo);
1165 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITION] = arrayNo;
1166 This->namedArrays = TRUE;
1167 } else {
1168 /* TODO: position indexes go from 0-8!!*/
1169 TRACE("Setting position 2 to %d because usage = %d\n", arrayNo, (usage & 0xF0000) >> 16);
1170 /* robots uses positions up to 8, the position arrays are just packed.*/
1171 if ((usage & 0xF0000) >> 16 > 1) {
1172 TRACE("Loaded for position %d (greater than 2)\n", (usage & 0xF0000) >> 16);
1174 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITION2 + ((usage & 0xF0000) >> 16) -1] = arrayNo;
1175 This->declaredArrays = TRUE;
1177 break;
1178 case D3DDECLUSAGE_BLENDINDICES:
1179 /* not supported by openGL */
1180 TRACE("Setting BLENDINDICES to %d\n", arrayNo);
1181 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_BLENDINDICES] = arrayNo;
1182 This->declaredArrays = TRUE;
1183 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended BLENDINDICES\n");
1184 break;
1185 case D3DDECLUSAGE_BLENDWEIGHT:
1186 TRACE("Setting BLENDWEIGHT to %d\n", arrayNo);
1187 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_BLENDWEIGHT] = arrayNo;
1188 This->namedArrays = TRUE;
1189 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended blend weights\n");
1190 break;
1191 case D3DDECLUSAGE_NORMAL:
1192 if((usage & 0xF0000) >> 16 == 0) { /* tween data */
1193 TRACE("Setting normal to %d\n", arrayNo);
1194 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_NORMAL] = arrayNo;
1195 This->namedArrays = TRUE;
1196 } else {
1197 TRACE("Setting normal 2 to %d because usage = %d\n", arrayNo, (usage & 0xF0000) >> 16);
1198 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_NORMAL2] = arrayNo;
1199 This->declaredArrays = TRUE;
1201 break;
1202 case D3DDECLUSAGE_PSIZE:
1203 TRACE("Setting PSIZE to %d\n", arrayNo);
1204 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_PSIZE] = arrayNo;
1205 This->namedArrays = TRUE;
1206 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended PSIZE\n");
1207 break;
1208 case D3DDECLUSAGE_COLOR:
1209 if((usage & 0xF0000) >> 16 == 0) {
1210 TRACE("Setting DIFFUSE to %d\n", arrayNo);
1211 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DIFFUSE] = arrayNo;
1212 This->namedArrays = TRUE;
1213 } else {
1214 TRACE("Setting SPECULAR to %d\n", arrayNo);
1215 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SPECULAR] = arrayNo;
1216 This->namedArrays = TRUE;
1218 break;
1219 case D3DDECLUSAGE_TEXCOORD:
1220 This->namedArrays = TRUE;
1221 /* only 7 texture coords have been designed for, so run a quick sanity check */
1222 if ((usage & 0xF0000) >> 16 > 7) {
1223 FIXME("(%p) : Program uses texture coordinate %d but only 0-7 have been implemented\n", This, (usage & 0xF0000) >> 16);
1224 } else {
1225 TRACE("Setting TEXCOORD %d to %d\n", ((usage & 0xF0000) >> 16), arrayNo);
1226 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_TEXCOORD0 + ((usage & 0xF0000) >> 16)] = arrayNo;
1228 break;
1229 /* The following aren't supported by openGL,
1230 if we get them then everything needs to be mapped to numbered attributes instead of named ones.
1231 this should be caught in the first pass */
1232 case D3DDECLUSAGE_TANGENT:
1233 TRACE("Setting TANGENT to %d\n", arrayNo);
1234 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_TANGENT] = arrayNo;
1235 This->declaredArrays = TRUE;
1236 break;
1237 case D3DDECLUSAGE_BINORMAL:
1238 TRACE("Setting BINORMAL to %d\n", arrayNo);
1239 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_BINORMAL] = arrayNo;
1240 This->declaredArrays = TRUE;
1241 break;
1242 case D3DDECLUSAGE_TESSFACTOR:
1243 TRACE("Setting TESSFACTOR to %d\n", arrayNo);
1244 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_TESSFACTOR] = arrayNo;
1245 This->declaredArrays = TRUE;
1246 break;
1247 case D3DDECLUSAGE_POSITIONT:
1248 if((usage & 0xF0000) >> 16 == 0) { /* tween data */
1249 FIXME("Setting positiont to %d\n", arrayNo);
1250 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITIONT] = arrayNo;
1251 This->namedArrays = TRUE;
1252 } else {
1253 FIXME("Setting positiont 2 to %d because usage = %d\n", arrayNo, (usage & 0xF0000) >> 16);
1254 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITIONT2] = arrayNo;
1255 This->declaredArrays = TRUE;
1256 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended positiont\n");
1258 break;
1259 case D3DDECLUSAGE_FOG:
1260 /* supported by OpenGL */
1261 TRACE("Setting FOG to %d\n", arrayNo);
1262 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_FOG] = arrayNo;
1263 This->namedArrays = TRUE;
1264 break;
1265 case D3DDECLUSAGE_DEPTH:
1266 TRACE("Setting DEPTH to %d\n", arrayNo);
1267 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DEPTH] = arrayNo;
1268 This->declaredArrays = TRUE;
1269 break;
1270 case D3DDECLUSAGE_SAMPLE:
1271 TRACE("Setting SAMPLE to %d\n", arrayNo);
1272 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SAMPLE] = arrayNo;
1273 This->declaredArrays = TRUE;
1274 break;
1275 default:
1276 FIXME("Unrecognised dcl %08x", usage & 0xFFFF);
1278 } else if(curOpcode->opcode == D3DSIO_DEF) {
1279 This->constantsUsedBitmap[*pToken & 0xFF] = VS_CONSTANT_CONSTANT;
1280 FIXME("Constant %ld\n", *pToken & 0xFF);
1281 ++pToken;
1282 ++pToken;
1283 ++pToken;
1284 ++pToken;
1285 ++pToken;
1287 } else {
1288 /* Check to see if and tmp or addressing redisters are used */
1289 if (curOpcode->num_params > 0) {
1290 regtype = ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT));
1291 reg = ((*pToken) & 0x00001FFF);
1292 if (D3DSPR_ADDR == regtype && nUseAddressRegister <= reg) nUseAddressRegister = reg + 1;
1293 if (D3DSPR_TEMP == regtype){
1294 tmpsUsed[reg] = TRUE;
1295 if(nUseTempRegister <= reg) nUseTempRegister = reg + 1;
1297 ++pToken;
1298 for (i = 1; i < curOpcode->num_params; ++i) {
1299 regtype = ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT));
1300 reg = ((*pToken) & 0x00001FFF);
1301 if (D3DSPR_ADDR == regtype && nUseAddressRegister <= reg) nUseAddressRegister = reg + 1;
1302 if (D3DSPR_TEMP == regtype){
1303 tmpsUsed[reg] = TRUE;
1304 if(nUseTempRegister <= reg) nUseTempRegister = reg + 1;
1306 ++pToken;
1310 #if 1 /* TODO: if the shaders uses calls or loops then we need to convert the shader into glsl */
1311 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1312 FIXME("This shader requires gl shader language support\n");
1313 #if 0
1314 This->shaderLanguage = GLSHADER_GLSL;
1315 #endif
1317 #endif
1321 #if 1
1322 #define VSHADER_ALWAYS_NUMBERED
1323 #endif
1325 #ifdef VSHADER_ALWAYS_NUMBERED /* handy for debugging using numbered arrays instead of named arrays */
1326 /* TODO: using numbered arrays for software shaders makes things easier */
1327 This->declaredArrays = TRUE;
1328 #endif
1330 /* named arrays and declared arrays are mutually exclusive */
1331 if (This->declaredArrays) {
1332 This->namedArrays = FALSE;
1334 /* TODO: validate
1335 nUseAddressRegister < = GL_MAX_PROGRAM_ADDRESS_REGISTERS_AR
1336 nUseTempRegister <= GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB
1339 /** second pass, now generate */
1340 pToken = pFunction;
1342 if (NULL != pToken) {
1343 while (1) {
1344 tmpLine[0] = 0;
1345 if ((nRemInstr >= 0) && (--nRemInstr == -1))
1346 /* Macro is finished, continue normal path */
1347 pToken = pSavedToken;
1348 if (D3DVS_END() == *pToken)
1349 break;
1351 if (vshader_is_version_token(*pToken)) { /** version */
1352 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1353 int version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1354 int numTemps;
1355 int numConstants;
1357 TRACE("found version token vs.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1359 /* Each release of vertex shaders has had different numbers of temp registers */
1360 switch (version) {
1361 case 10:
1362 case 11: numTemps=12;
1363 numConstants=96;/* min(GL_LIMITS(constants),96) */
1364 strcpy(tmpLine, "!!ARBvp1.0\n");
1365 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1366 break;
1367 /* FIXME: if there are no calls or loops then use ARBvp1 otherwise use GLSL instead
1368 TODO: see if there are any operations in vs2/3 that aren't supported by ARBvp
1369 TODO: only map the maximum possible number of constants supported by openGL and not the maximum required by d3d (even better only map the used constants)*/
1370 case 20: numTemps=12; /* min(GL_LIMITS(temps),12) */
1371 numConstants=96; /* min(GL_LIMITS(constants),256) */
1372 strcpy(tmpLine, "!!ARBvp1.0\n");
1373 FIXME("No work done yet to support vs2.0 in hw\n");
1374 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1375 break;
1376 case 21: numTemps=12; /* min(GL_LIMITS(temps),12) */
1377 numConstants=96; /* min(GL_LIMITS(constants),256) */
1378 strcpy(tmpLine, "!!ARBvp1.0\n");
1379 FIXME("No work done yet to support vs2.1 in hw\n");
1380 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1381 break;
1382 case 30: numTemps=32; /* min(GL_LIMITS(temps),32) */
1383 numConstants=96;/* min(GL_LIMITS(constants),256) */
1384 strcpy(tmpLine, "!!ARBvp3.0\n");
1385 FIXME("No work done yet to support vs3.0 in hw\n");
1386 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1387 break;
1388 default:
1389 numTemps=12;/* min(GL_LIMITS(temps),12) */
1390 numConstants=96;/* min(GL_LIMITS(constants),96) */
1391 strcpy(tmpLine, "!!ARBvp1.0\n");
1392 FIXME("Unrecognized vertex shader version %d!\n", version);
1394 PNSTRCAT(pgmStr, tmpLine);
1396 ++lineNum;
1398 /* This should be a bitmap so that only temp registers that are used are declared. */
1399 for (i = 0; i < nUseTempRegister /* we should check numTemps here */ ; i++) {
1400 if (tmpsUsed[i]) { /* only write out the temps if they are actually in use */
1401 sprintf(tmpLine, "TEMP T%ld;\n", i);
1402 ++lineNum;
1403 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1404 PNSTRCAT(pgmStr, tmpLine);
1408 /* TODO: loop register counts as an address register */
1409 for (i = 0; i < nUseAddressRegister; i++) {
1410 sprintf(tmpLine, "ADDRESS A%ld;\n", i);
1411 ++lineNum;
1412 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1413 PNSTRCAT(pgmStr, tmpLine);
1416 /* Due to the dynamic constants binding mechanism, we need to declare
1417 * all the constants for relative addressing. */
1418 /* Mesa supports only 95 constants for VS1.X although we should have at least 96. */
1419 if (GL_VEND(MESA) || GL_VEND(WINE)) {
1420 numConstants = 95;
1422 /* FIXME: We should be counting the number of constants in the first pass and then validating that many are supported
1423 Looking at some of the shaders in use by applications we'd need to create a list of all used env variables
1425 sprintf(tmpLine, "PARAM C[%d] = { program.env[0..%d] };\n", numConstants, numConstants - 1);
1426 TRACE("GL HW (%u,%u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1427 PNSTRCAT(pgmStr, tmpLine);
1429 ++lineNum;
1431 ++pToken;
1432 continue;
1434 if (vshader_is_comment_token(*pToken)) { /** comment */
1435 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1436 ++pToken;
1437 FIXME("#%s\n", (char*)pToken);
1438 pToken += comment_len;
1439 continue;
1442 curOpcode = vshader_program_get_opcode(*pToken);
1443 ++pToken;
1444 if (NULL == curOpcode) {
1445 /* unknown current opcode ... (shouldn't be any!) */
1446 while (*pToken & 0x80000000) {
1447 FIXME("unrecognized opcode: %08lx\n", *pToken);
1448 ++pToken;
1450 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1451 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1453 FIXME("Token %s requires greater functionality than Vertex_Progarm_ARB supports\n", curOpcode->name);
1454 pToken += curOpcode->num_params;
1455 } else {
1456 /* Build opcode for GL vertex_program */
1457 switch (curOpcode->opcode) {
1458 case D3DSIO_NOP:
1459 continue;
1460 case D3DSIO_MOV:
1461 /* Address registers must be loaded with the ARL instruction */
1462 if ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) == D3DSPR_ADDR) {
1463 if (((*pToken) & 0x00001FFF) < nUseAddressRegister) {
1464 strcpy(tmpLine, "ARL");
1465 break;
1466 } else
1467 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This, ((*pToken) & 0x00001FFF));
1469 /* fall through */
1470 case D3DSIO_ADD:
1471 case D3DSIO_SUB:
1472 case D3DSIO_MAD:
1473 case D3DSIO_MUL:
1474 case D3DSIO_RCP:
1475 case D3DSIO_RSQ:
1476 case D3DSIO_DP3:
1477 case D3DSIO_DP4:
1478 case D3DSIO_MIN:
1479 case D3DSIO_MAX:
1480 case D3DSIO_SLT:
1481 case D3DSIO_SGE:
1482 case D3DSIO_LIT:
1483 case D3DSIO_DST:
1484 case D3DSIO_FRC:
1485 case D3DSIO_EXPP:
1486 case D3DSIO_LOGP:
1487 case D3DSIO_EXP:
1488 case D3DSIO_LOG:
1489 strcpy(tmpLine, curOpcode->glname);
1490 break;
1491 case D3DSIO_M4x4:
1492 case D3DSIO_M4x3:
1493 case D3DSIO_M3x4:
1494 case D3DSIO_M3x3:
1495 case D3DSIO_M3x2:
1496 /* Expand the macro and get nusprintf(tmpLine,mber of generated instruction */
1497 nRemInstr = ExpandMxMacro(curOpcode->opcode, pToken);
1498 /* Save point to next instruction */
1499 pSavedToken = pToken + 3;
1500 /* Execute expanded macro */
1501 pToken = MacroExpansion;
1502 continue;
1503 /* dcl and def are handeled in the first pass */
1504 case D3DSIO_DCL:
1505 if (This->namedArrays) {
1506 const char* attribName = "undefined";
1507 switch(*pToken & 0xFFFF) {
1508 case D3DDECLUSAGE_POSITION:
1509 attribName = "vertex.position";
1510 break;
1511 case D3DDECLUSAGE_BLENDINDICES:
1512 /* not supported by openGL */
1513 attribName = "vertex.blend";
1514 break;
1515 case D3DDECLUSAGE_BLENDWEIGHT:
1516 attribName = "vertex.weight";
1517 break;
1518 case D3DDECLUSAGE_NORMAL:
1519 attribName = "vertex.normal";
1520 break;
1521 case D3DDECLUSAGE_PSIZE:
1522 attribName = "vertex.psize";
1523 break;
1524 case D3DDECLUSAGE_COLOR:
1525 if((*pToken & 0xF0000) >> 16 == 0) {
1526 attribName = "vertex.color";
1527 } else {
1528 attribName = "vertex.color.secondary";
1530 break;
1531 case D3DDECLUSAGE_TEXCOORD:
1533 char tmpChar[100];
1534 tmpChar[0] = 0;
1535 sprintf(tmpChar,"vertex.texcoord[%lu]",(*pToken & 0xF0000) >> 16);
1536 attribName = tmpChar;
1537 break;
1539 /* The following aren't directly supported by openGL, so shouldn't come up using namedarrays. */
1540 case D3DDECLUSAGE_TANGENT:
1541 attribName = "vertex.tangent";
1542 break;
1543 case D3DDECLUSAGE_BINORMAL:
1544 attribName = "vertex.binormal";
1545 break;
1546 case D3DDECLUSAGE_TESSFACTOR:
1547 attribName = "vertex.tessfactor";
1548 break;
1549 case D3DDECLUSAGE_POSITIONT:
1550 attribName = "vertex.possitionT";
1551 break;
1552 case D3DDECLUSAGE_FOG:
1553 attribName = "vertex.fogcoord";
1554 break;
1555 case D3DDECLUSAGE_DEPTH:
1556 attribName = "vertex.depth";
1557 break;
1558 case D3DDECLUSAGE_SAMPLE:
1559 attribName = "vertex.sample";
1560 break;
1561 default:
1562 FIXME("Unrecognised dcl %08lx", *pToken & 0xFFFF);
1565 char tmpChar[80];
1566 ++pToken;
1567 sprintf(tmpLine, "ATTRIB ");
1568 vshader_program_add_param(This, *pToken, FALSE, tmpLine);
1569 sprintf(tmpChar," = %s", attribName);
1570 strcat(tmpLine, tmpChar);
1571 strcat(tmpLine,";\n");
1572 ++lineNum;
1573 if (This->namedArrays) {
1574 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine);
1575 PNSTRCAT(pgmStr, tmpLine);
1577 } else {
1578 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine);
1581 } else {
1582 /* eat the token so it doesn't generate a warning */
1583 ++pToken;
1585 ++pToken;
1586 continue;
1587 case D3DSIO_DEF:
1589 char tmpChar[80];
1590 sprintf(tmpLine, "PARAM const%lu = {", *pToken & 0xFF);
1591 ++pToken;
1592 sprintf(tmpChar,"%f ,", *(float *)pToken);
1593 strcat(tmpLine, tmpChar);
1594 ++pToken;
1595 sprintf(tmpChar,"%f ,", *(float *)pToken);
1596 strcat(tmpLine, tmpChar);
1597 ++pToken;
1598 sprintf(tmpChar,"%f ,", *(float *)pToken);
1599 strcat(tmpLine, tmpChar);
1600 ++pToken;
1601 sprintf(tmpChar,"%f}", *(float *)pToken);
1602 strcat(tmpLine, tmpChar);
1604 strcat(tmpLine,";\n");
1605 ++lineNum;
1606 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1607 PNSTRCAT(pgmStr, tmpLine);
1609 ++pToken;
1610 continue;
1612 default:
1613 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1614 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1615 } else {
1616 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1619 if (curOpcode->num_params > 0) {
1620 vshader_program_add_param(This, *pToken, FALSE, tmpLine);
1622 ++pToken;
1623 for (i = 1; i < curOpcode->num_params; ++i) {
1624 strcat(tmpLine, ",");
1625 vshader_program_add_param(This, *pToken, TRUE, tmpLine);
1626 ++pToken;
1629 strcat(tmpLine,";\n");
1630 ++lineNum;
1631 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1632 PNSTRCAT(pgmStr, tmpLine);
1636 strcpy(tmpLine, "END\n");
1637 ++lineNum;
1638 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1639 PNSTRCAT(pgmStr, tmpLine);
1642 /* finally null terminate the pgmStr*/
1643 pgmStr[pgmLength] = 0;
1645 /* Check that Vertex Shaders are supported */
1646 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1647 /* Create the hw shader */
1648 /* TODO: change to resource.glObjectHandel or something like that */
1649 GL_EXTCALL(glGenProgramsARB(1, &This->prgId));
1650 TRACE("Creating a hw vertex shader, prg=%d\n", This->prgId);
1651 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->prgId));
1653 /* Create the program and check for errors */
1654 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr)/*pgmLength*/, pgmStr));
1655 if (glGetError() == GL_INVALID_OPERATION) {
1656 GLint errPos;
1657 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1658 FIXME("HW VertexShader Error at position %d: %s\n",
1659 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1660 This->prgId = -1;
1663 #if 1 /* if were using the data buffer of device then we don't need to free it */
1664 HeapFree(GetProcessHeap(), 0, pgmStr);
1665 #endif
1666 #undef PNSTRCAT
1669 BOOL IWineD3DVertexShaderImpl_ExecuteHAL(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
1671 * TODO: use the NV_vertex_program (or 1_1) extension
1672 * and specifics vendors (ARB_vertex_program??) variants for it
1674 return TRUE;
1677 HRESULT WINAPI IWineD3DVertexShaderImpl_ExecuteSW(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
1678 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1680 /** Vertex Shader Temporary Registers */
1681 WINED3DSHADERVECTOR R[12];
1682 /*D3DSHADERSCALAR A0;*/
1683 WINED3DSHADERVECTOR A[1];
1684 /** temporary Vector for modifier management */
1685 WINED3DSHADERVECTOR d;
1686 WINED3DSHADERVECTOR s[3];
1687 /** parser datas */
1688 const DWORD* pToken = This->function;
1689 const SHADER_OPCODE* curOpcode = NULL;
1690 /** functions parameters */
1691 WINED3DSHADERVECTOR* p[4];
1692 WINED3DSHADERVECTOR* p_send[4];
1693 DWORD i;
1695 /** init temporary register */
1696 memset(R, 0, 12 * sizeof(WINED3DSHADERVECTOR));
1698 /* vshader_program_parse(vshader); */
1699 #if 0 /* Must not be 1 in cvs */
1700 TRACE("Input:\n");
1701 TRACE_VSVECTOR(This->data->C[0]);
1702 TRACE_VSVECTOR(This->data->C[1]);
1703 TRACE_VSVECTOR(This->data->C[2]);
1704 TRACE_VSVECTOR(This->data->C[3]);
1705 TRACE_VSVECTOR(This->data->C[4]);
1706 TRACE_VSVECTOR(This->data->C[5]);
1707 TRACE_VSVECTOR(This->data->C[6]);
1708 TRACE_VSVECTOR(This->data->C[7]);
1709 TRACE_VSVECTOR(This->data->C[8]);
1710 TRACE_VSVECTOR(This->data->C[64]);
1711 TRACE_VSVECTOR(input->V[D3DVSDE_POSITION]);
1712 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDWEIGHT]);
1713 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDINDICES]);
1714 TRACE_VSVECTOR(input->V[D3DVSDE_NORMAL]);
1715 TRACE_VSVECTOR(input->V[D3DVSDE_PSIZE]);
1716 TRACE_VSVECTOR(input->V[D3DVSDE_DIFFUSE]);
1717 TRACE_VSVECTOR(input->V[D3DVSDE_SPECULAR]);
1718 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD0]);
1719 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD1]);
1720 #endif
1722 TRACE_VSVECTOR(vshader->data->C[64]);
1723 /* TODO: Run through all the tokens and find and labels, if, endifs, loops etc...., and make a labels list */
1725 /* the first dword is the version tag */
1726 /* TODO: parse it */
1728 if (vshader_is_version_token(*pToken)) { /** version */
1729 ++pToken;
1731 while (D3DVS_END() != *pToken) {
1732 if (vshader_is_comment_token(*pToken)) { /** comment */
1733 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1734 ++pToken;
1735 pToken += comment_len;
1736 continue ;
1738 curOpcode = vshader_program_get_opcode(*pToken);
1739 ++pToken;
1740 if (NULL == curOpcode) {
1741 i = 0;
1742 /* unknown current opcode ... */
1743 /* TODO: Think of a name for 0x80000000 and replace its use with a constant */
1744 while (*pToken & 0x80000000) {
1745 if (i == 0) {
1746 FIXME("unrecognized opcode: pos=%d token=%08lX\n", (pToken - 1) - This->function, *(pToken - 1));
1748 FIXME("unrecognized opcode param: pos=%d token=%08lX what=", pToken - This->function, *pToken);
1749 vshader_program_dump_param(*pToken, i);
1750 TRACE("\n");
1751 ++i;
1752 ++pToken;
1754 /* return FALSE; */
1755 } else {
1756 if (curOpcode->num_params > 0) {
1757 /* TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken); */
1758 for (i = 0; i < curOpcode->num_params; ++i) {
1759 DWORD reg = pToken[i] & 0x00001FFF;
1760 DWORD regtype = ((pToken[i] & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
1762 switch (regtype << D3DSP_REGTYPE_SHIFT) {
1763 case D3DSPR_TEMP:
1764 /* TRACE("p[%d]=R[%d]\n", i, reg); */
1765 p[i] = &R[reg];
1766 break;
1767 case D3DSPR_INPUT:
1768 /* TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]); */
1769 p[i] = &input->V[reg];
1770 break;
1771 case D3DSPR_CONST:
1772 if (pToken[i] & D3DVS_ADDRMODE_RELATIVE) {
1773 p[i] = &This->data->C[(DWORD) A[0].x + reg];
1774 } else {
1775 p[i] = &This->data->C[reg];
1777 break;
1778 case D3DSPR_ADDR: /* case D3DSPR_TEXTURE: */
1779 if (0 != reg) {
1780 ERR("cannot handle address registers != a0, forcing use of a0\n");
1781 reg = 0;
1783 /* TRACE("p[%d]=A[%d]\n", i, reg); */
1784 p[i] = &A[reg];
1785 break;
1786 case D3DSPR_RASTOUT:
1787 switch (reg) {
1788 case D3DSRO_POSITION:
1789 p[i] = &output->oPos;
1790 break;
1791 case D3DSRO_FOG:
1792 p[i] = &output->oFog;
1793 break;
1794 case D3DSRO_POINT_SIZE:
1795 p[i] = &output->oPts;
1796 break;
1798 break;
1799 case D3DSPR_ATTROUT:
1800 /* TRACE("p[%d]=oD[%d]\n", i, reg); */
1801 p[i] = &output->oD[reg];
1802 break;
1803 case D3DSPR_TEXCRDOUT:
1804 /* TRACE("p[%d]=oT[%d]\n", i, reg); */
1805 p[i] = &output->oT[reg];
1806 break;
1807 /* TODO Decls and defs */
1808 #if 0
1809 case D3DSPR_DCL:
1810 case D3DSPR_DEF:
1811 #endif
1812 default:
1813 break;
1816 if (i > 0) { /* input reg */
1817 DWORD swizzle = (pToken[i] & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
1818 UINT isNegative = ((pToken[i] & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG);
1820 if (!isNegative && (D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) {
1821 /* TRACE("p[%d] not swizzled\n", i); */
1822 p_send[i] = p[i];
1823 } else {
1824 DWORD swizzle_x = swizzle & 0x03;
1825 DWORD swizzle_y = (swizzle >> 2) & 0x03;
1826 DWORD swizzle_z = (swizzle >> 4) & 0x03;
1827 DWORD swizzle_w = (swizzle >> 6) & 0x03;
1828 /* TRACE("p[%d] swizzled\n", i); */
1829 float* tt = (float*) p[i];
1830 s[i].x = (isNegative) ? -tt[swizzle_x] : tt[swizzle_x];
1831 s[i].y = (isNegative) ? -tt[swizzle_y] : tt[swizzle_y];
1832 s[i].z = (isNegative) ? -tt[swizzle_z] : tt[swizzle_z];
1833 s[i].w = (isNegative) ? -tt[swizzle_w] : tt[swizzle_w];
1834 p_send[i] = &s[i];
1836 } else { /* output reg */
1837 if ((pToken[i] & D3DSP_WRITEMASK_ALL) == D3DSP_WRITEMASK_ALL) {
1838 p_send[i] = p[i];
1839 } else {
1840 p_send[i] = &d; /* to be post-processed for modifiers management */
1846 switch (curOpcode->num_params) {
1847 case 0:
1848 curOpcode->soft_fct();
1849 break;
1850 case 1:
1851 curOpcode->soft_fct(p_send[0]);
1852 break;
1853 case 2:
1854 curOpcode->soft_fct(p_send[0], p_send[1]);
1855 break;
1856 case 3:
1857 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2]);
1858 break;
1859 case 4:
1860 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3]);
1861 break;
1862 case 5:
1863 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4]);
1864 break;
1865 case 6:
1866 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4], p_send[5]);
1867 break;
1868 default:
1869 ERR("%s too many params: %u\n", curOpcode->name, curOpcode->num_params);
1872 /* check if output reg modifier post-process */
1873 if (curOpcode->num_params > 0 && (pToken[0] & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1874 if (pToken[0] & D3DSP_WRITEMASK_0) p[0]->x = d.x;
1875 if (pToken[0] & D3DSP_WRITEMASK_1) p[0]->y = d.y;
1876 if (pToken[0] & D3DSP_WRITEMASK_2) p[0]->z = d.z;
1877 if (pToken[0] & D3DSP_WRITEMASK_3) p[0]->w = d.w;
1879 #if 0
1880 TRACE_VSVECTOR(output->oPos);
1881 TRACE_VSVECTOR(output->oD[0]);
1882 TRACE_VSVECTOR(output->oD[1]);
1883 TRACE_VSVECTOR(output->oT[0]);
1884 TRACE_VSVECTOR(output->oT[1]);
1885 TRACE_VSVECTOR(R[0]);
1886 TRACE_VSVECTOR(R[1]);
1887 TRACE_VSVECTOR(R[2]);
1888 TRACE_VSVECTOR(R[3]);
1889 TRACE_VSVECTOR(R[4]);
1890 TRACE_VSVECTOR(R[5]);
1891 #endif
1893 /* to next opcode token */
1894 pToken += curOpcode->num_params;
1896 #if 0
1897 TRACE("End of current instruction:\n");
1898 TRACE_VSVECTOR(output->oPos);
1899 TRACE_VSVECTOR(output->oD[0]);
1900 TRACE_VSVECTOR(output->oD[1]);
1901 TRACE_VSVECTOR(output->oT[0]);
1902 TRACE_VSVECTOR(output->oT[1]);
1903 TRACE_VSVECTOR(R[0]);
1904 TRACE_VSVECTOR(R[1]);
1905 TRACE_VSVECTOR(R[2]);
1906 TRACE_VSVECTOR(R[3]);
1907 TRACE_VSVECTOR(R[4]);
1908 TRACE_VSVECTOR(R[5]);
1909 #endif
1911 #if 0 /* Must not be 1 in cvs */
1912 TRACE("Output:\n");
1913 TRACE_VSVECTOR(output->oPos);
1914 TRACE_VSVECTOR(output->oD[0]);
1915 TRACE_VSVECTOR(output->oD[1]);
1916 TRACE_VSVECTOR(output->oT[0]);
1917 TRACE_VSVECTOR(output->oT[1]);
1918 #endif
1919 return D3D_OK;
1922 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantF(IWineD3DVertexShader *iface, UINT StartRegister, CONST FLOAT *pConstantData, UINT Vector4fCount) {
1923 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1924 FIXME("(%p) : stub\n", This);
1925 return D3D_OK;
1928 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantF(IWineD3DVertexShader *iface, UINT StartRegister, FLOAT *pConstantData, UINT Vector4fCount) {
1929 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1930 FIXME("(%p) : stub\n", This);
1931 return D3D_OK;
1934 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantI(IWineD3DVertexShader *iface, UINT StartRegister, CONST int *pConstantData, UINT Vector4iCount) {
1935 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1936 if (StartRegister + Vector4iCount > WINED3D_VSHADER_MAX_CONSTANTS) {
1937 ERR("(%p) : SetVertexShaderConstantI C[%u] invalid\n", This, StartRegister);
1938 return D3DERR_INVALIDCALL;
1940 if (NULL == pConstantData) {
1941 return D3DERR_INVALIDCALL;
1943 FIXME("(%p) : stub\n", This);
1944 return D3D_OK;
1947 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantI(IWineD3DVertexShader *iface, UINT StartRegister, int *pConstantData, UINT Vector4iCount) {
1948 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1949 TRACE("(%p) : C[%u] count=%u\n", This, StartRegister, Vector4iCount);
1950 if (StartRegister + Vector4iCount > WINED3D_VSHADER_MAX_CONSTANTS) {
1951 return D3DERR_INVALIDCALL;
1953 if (NULL == pConstantData) {
1954 return D3DERR_INVALIDCALL;
1956 FIXME("(%p) : stub\n", This);
1957 return D3D_OK;
1960 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantB(IWineD3DVertexShader *iface, UINT StartRegister, CONST BOOL *pConstantData, UINT BoolCount) {
1961 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1962 if (StartRegister + BoolCount > WINED3D_VSHADER_MAX_CONSTANTS) {
1963 ERR("(%p) : SetVertexShaderConstantB C[%u] invalid\n", This, StartRegister);
1964 return D3DERR_INVALIDCALL;
1966 if (NULL == pConstantData) {
1967 return D3DERR_INVALIDCALL;
1969 FIXME("(%p) : stub\n", This);
1970 return D3D_OK;
1973 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantB(IWineD3DVertexShader *iface, UINT StartRegister, BOOL *pConstantData, UINT BoolCount) {
1974 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl *)iface;
1975 FIXME("(%p) : stub\n", This);
1976 return D3D_OK;
1979 #endif
1981 /* *******************************************
1982 IWineD3DVertexShader IUnknown parts follow
1983 ******************************************* */
1984 HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
1986 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1987 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
1988 if (IsEqualGUID(riid, &IID_IUnknown)
1989 || IsEqualGUID(riid, &IID_IWineD3DBase)
1990 || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
1991 IUnknown_AddRef(iface);
1992 *ppobj = This;
1993 return D3D_OK;
1995 return E_NOINTERFACE;
1998 ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
1999 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2000 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
2001 return InterlockedIncrement(&This->ref);
2004 ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
2005 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2006 ULONG ref;
2007 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
2008 ref = InterlockedDecrement(&This->ref);
2009 if (ref == 0) {
2010 if (This->vertexDeclaration) IWineD3DVertexDeclaration_Release(This->vertexDeclaration);
2011 HeapFree(GetProcessHeap(), 0, This);
2013 return ref;
2016 /* *******************************************
2017 IWineD3DVertexShader IWineD3DVertexShader parts follow
2018 ******************************************* */
2020 HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
2021 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2023 *parent = This->parent;
2024 IUnknown_AddRef(*parent);
2025 TRACE("(%p) : returning %p\n", This, *parent);
2026 return D3D_OK;
2029 HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
2030 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2031 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
2032 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
2033 TRACE("(%p) returning %p\n", This, *pDevice);
2034 return D3D_OK;
2037 HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
2038 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
2039 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
2041 if (NULL == pData) {
2042 *pSizeOfData = This->functionLength;
2043 return D3D_OK;
2045 if (*pSizeOfData < This->functionLength) {
2046 *pSizeOfData = This->functionLength;
2047 return D3DERR_MOREDATA;
2049 if (NULL == This->function) { /* no function defined */
2050 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
2051 (*(DWORD **) pData) = NULL;
2052 } else {
2053 if(This->functionLength == 0){
2056 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
2057 memcpy(pData, This->function, This->functionLength);
2059 return D3D_OK;
2062 HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
2063 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
2064 const DWORD* pToken = pFunction;
2065 const SHADER_OPCODE* curOpcode = NULL;
2066 DWORD len = 0;
2067 DWORD i;
2068 TRACE("(%p) : Parsing programme\n", This);
2070 if (NULL != pToken) {
2071 while (D3DVS_END() != *pToken) {
2072 if (vshader_is_version_token(*pToken)) { /** version */
2073 TRACE("vs_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
2074 ++pToken;
2075 ++len;
2076 continue;
2078 if (vshader_is_comment_token(*pToken)) { /** comment */
2079 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2080 ++pToken;
2081 TRACE("//%s\n", (char*)pToken);
2082 pToken += comment_len;
2083 len += comment_len + 1;
2084 continue;
2086 curOpcode = vshader_program_get_opcode(*pToken);
2087 ++pToken;
2088 ++len;
2089 if (NULL == curOpcode) {
2090 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
2091 while (*pToken & 0x80000000) {
2092 /* unknown current opcode ... */
2093 FIXME("unrecognized opcode: %08lx", *pToken);
2094 ++pToken;
2095 ++len;
2096 TRACE("\n");
2099 } else {
2100 if (curOpcode->opcode == D3DSIO_DCL) {
2101 TRACE("dcl_");
2102 switch(*pToken & 0xFFFF) {
2103 case D3DDECLUSAGE_POSITION:
2104 TRACE("%s%ld ", "position",(*pToken & 0xF0000) >> 16);
2105 break;
2106 case D3DDECLUSAGE_BLENDINDICES:
2107 TRACE("%s ", "blend");
2108 break;
2109 case D3DDECLUSAGE_BLENDWEIGHT:
2110 TRACE("%s ", "weight");
2111 break;
2112 case D3DDECLUSAGE_NORMAL:
2113 TRACE("%s%ld ", "normal",(*pToken & 0xF0000) >> 16);
2114 break;
2115 case D3DDECLUSAGE_PSIZE:
2116 TRACE("%s ", "psize");
2117 break;
2118 case D3DDECLUSAGE_COLOR:
2119 if((*pToken & 0xF0000) >> 16 == 0) {
2120 TRACE("%s ", "color");
2121 } else {
2122 TRACE("%s ", "specular");
2124 break;
2125 case D3DDECLUSAGE_TEXCOORD:
2126 TRACE("%s%ld ", "texture", (*pToken & 0xF0000) >> 16);
2127 break;
2128 case D3DDECLUSAGE_TANGENT:
2129 TRACE("%s ", "tangent");
2130 break;
2131 case D3DDECLUSAGE_BINORMAL:
2132 TRACE("%s ", "binormal");
2133 break;
2134 case D3DDECLUSAGE_TESSFACTOR:
2135 TRACE("%s ", "tessfactor");
2136 break;
2137 case D3DDECLUSAGE_POSITIONT:
2138 TRACE("%s%ld ", "positionT",(*pToken & 0xF0000) >> 16);
2139 break;
2140 case D3DDECLUSAGE_FOG:
2141 TRACE("%s ", "fog");
2142 break;
2143 case D3DDECLUSAGE_DEPTH:
2144 TRACE("%s ", "depth");
2145 break;
2146 case D3DDECLUSAGE_SAMPLE:
2147 TRACE("%s ", "sample");
2148 break;
2149 default:
2150 FIXME("Unrecognised dcl %08lx", *pToken & 0xFFFF);
2152 ++pToken;
2153 ++len;
2154 vshader_program_dump_vs_param(*pToken, 0);
2155 ++pToken;
2156 ++len;
2157 } else
2158 if (curOpcode->opcode == D3DSIO_DEF) {
2159 TRACE("def c%lu = ", *pToken & 0xFF);
2160 ++pToken;
2161 ++len;
2162 TRACE("%f ,", *(float *)pToken);
2163 ++pToken;
2164 ++len;
2165 TRACE("%f ,", *(float *)pToken);
2166 ++pToken;
2167 ++len;
2168 TRACE("%f ,", *(float *)pToken);
2169 ++pToken;
2170 ++len;
2171 TRACE("%f", *(float *)pToken);
2172 ++pToken;
2173 ++len;
2174 } else {
2175 TRACE("%s ", curOpcode->name);
2176 if (curOpcode->num_params > 0) {
2177 vshader_program_dump_vs_param(*pToken, 0);
2178 ++pToken;
2179 ++len;
2180 for (i = 1; i < curOpcode->num_params; ++i) {
2181 TRACE(", ");
2182 vshader_program_dump_vs_param(*pToken, 1);
2183 ++pToken;
2184 ++len;
2188 TRACE("\n");
2191 This->functionLength = (len + 1) * sizeof(DWORD);
2192 } else {
2193 This->functionLength = 1; /* no Function defined use fixed function vertex processing */
2196 /* Generate HW shader in needed */
2197 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
2198 #if 1
2199 IWineD3DVertexShaderImpl_GenerateProgramArbHW(iface, pFunction);
2200 #endif
2203 /* copy the function ... because it will certainly be released by application */
2204 if (NULL != pFunction) {
2205 This->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->functionLength);
2206 memcpy((void *)This->function, pFunction, This->functionLength);
2207 } else {
2208 This->function = NULL;
2210 return D3D_OK;
2213 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
2215 /*** IUnknown methods ***/
2216 IWineD3DVertexShaderImpl_QueryInterface,
2217 IWineD3DVertexShaderImpl_AddRef,
2218 IWineD3DVertexShaderImpl_Release,
2219 /*** IWineD3DVertexShader methods ***/
2220 IWineD3DVertexShaderImpl_GetParent,
2221 IWineD3DVertexShaderImpl_GetDevice,
2222 IWineD3DVertexShaderImpl_GetFunction,
2223 IWineD3DVertexShaderImpl_SetFunction