Correct the matrix sizes in software shaders and remove an erroneous
[wine/wine64.git] / dlls / wined3d / vertexshader.c
blobce5258db0c04de8b057c553ff69d005f245595c0
1 /*
2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2005 Oliver Stieber
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "config.h"
25 #include <math.h>
26 #include <stdio.h>
28 #include "wined3d_private.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
32 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
34 /* Shader debugging - Change the following line to enable debugging of software
35 vertex shaders */
36 #if 0 /* Musxt not be 1 in cvs version */
37 # define VSTRACE(A) TRACE A
38 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
39 #else
40 # define VSTRACE(A)
41 # define TRACE_VSVECTOR(name)
42 #endif
44 #if 1 /* FIXME : Needs sorting when vshader code moved in properly */
46 /**
47 * DirectX9 SDK download
48 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
50 * Exploring D3DX
51 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
53 * Using Vertex Shaders
54 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
56 * Dx9 New
57 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
59 * Dx9 Shaders
60 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
65 * Dx9 D3DX
66 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
68 * FVF
69 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
71 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72 * http://developer.nvidia.com/view.asp?IO=vstovp
74 * NVIDIA: Memory Management with VAR
75 * http://developer.nvidia.com/view.asp?IO=var_memory_management
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
81 typedef void (*shader_fct_t)();
83 typedef struct SHADER_OPCODE {
84 unsigned int opcode;
85 const char* name;
86 const char* glname;
87 CONST UINT num_params;
88 shader_fct_t soft_fct;
89 DWORD min_version;
90 DWORD max_version;
91 } SHADER_OPCODE;
93 #define GLNAME_REQUIRE_GLSL ((const char *)1)
95 /*******************************
96 * vshader functions software VM
99 void vshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
100 d->x = s0->x + s1->x;
101 d->y = s0->y + s1->y;
102 d->z = s0->z + s1->z;
103 d->w = s0->w + s1->w;
104 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
105 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
108 void vshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
109 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
110 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
111 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
114 void vshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
115 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
116 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
117 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
120 void vshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
121 d->x = 1.0f;
122 d->y = s0->y * s1->y;
123 d->z = s0->z;
124 d->w = s1->w;
125 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
126 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
129 void vshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
130 union {
131 float f;
132 DWORD d;
133 } tmp;
135 tmp.f = floorf(s0->w);
136 d->x = powf(2.0f, tmp.f);
137 d->y = s0->w - tmp.f;
138 tmp.f = powf(2.0f, s0->w);
139 tmp.d &= 0xFFFFFF00U;
140 d->z = tmp.f;
141 d->w = 1.0f;
142 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
143 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
146 void vshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
147 d->x = 1.0f;
148 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
149 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
150 d->w = 1.0f;
151 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
152 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
155 void vshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
156 float tmp_f = fabsf(s0->w);
157 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
158 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
159 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
162 void vshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
163 d->x = s0->x * s1->x + s2->x;
164 d->y = s0->y * s1->y + s2->y;
165 d->z = s0->z * s1->z + s2->z;
166 d->w = s0->w * s1->w + s2->w;
167 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
168 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
171 void vshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
172 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
173 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
174 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
175 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
176 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
177 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
180 void vshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
181 d->x = (s0->x < s1->x) ? s0->x : s1->x;
182 d->y = (s0->y < s1->y) ? s0->y : s1->y;
183 d->z = (s0->z < s1->z) ? s0->z : s1->z;
184 d->w = (s0->w < s1->w) ? s0->w : s1->w;
185 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
186 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
189 void vshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
190 d->x = s0->x;
191 d->y = s0->y;
192 d->z = s0->z;
193 d->w = s0->w;
194 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
195 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
198 void vshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
199 d->x = s0->x * s1->x;
200 d->y = s0->y * s1->y;
201 d->z = s0->z * s1->z;
202 d->w = s0->w * s1->w;
203 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
204 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
207 void vshader_nop(void) {
208 /* NOPPPP ahhh too easy ;) */
209 VSTRACE(("executing nop\n"));
212 void vshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
213 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
214 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
215 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
218 void vshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
219 float tmp_f = fabsf(s0->w);
220 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
221 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
222 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
225 void vshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
226 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
227 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
228 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
229 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
230 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
231 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
234 void vshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
235 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
236 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
237 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
238 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
239 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
240 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
243 void vshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
244 d->x = s0->x - s1->x;
245 d->y = s0->y - s1->y;
246 d->z = s0->z - s1->z;
247 d->w = s0->w - s1->w;
248 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
249 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
253 * Version 1.1 specific
256 void vshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
257 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
258 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
259 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
262 void vshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
263 float tmp_f = fabsf(s0->w);
264 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
265 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
266 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
269 void vshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
270 d->x = s0->x - floorf(s0->x);
271 d->y = s0->y - floorf(s0->y);
272 d->z = 0.0f;
273 d->w = 1.0f;
274 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
275 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
278 typedef FLOAT D3DMATRIX44[4][4];
279 typedef FLOAT D3DMATRIX43[4][3];
280 typedef FLOAT D3DMATRIX34[3][4];
281 typedef FLOAT D3DMATRIX33[3][3];
282 typedef FLOAT D3DMATRIX23[2][3];
284 void vshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
286 * Buggy CODE: here only if cast not work for copy/paste
287 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
288 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
289 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
290 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
291 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
292 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
293 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
295 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
296 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
297 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
298 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
299 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
300 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
301 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
302 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
305 void vshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
306 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
307 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
308 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
309 d->w = 1.0f;
310 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
311 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
312 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
313 VSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
316 void vshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
317 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
318 d->y = mat[2][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
319 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
320 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
321 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
322 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
323 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
324 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
327 void vshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
328 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
329 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
330 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
331 d->w = 1.0f;
332 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
333 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
334 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
335 VSTRACE(("executing m3x3(4): (%f) \n", d->w));
338 void vshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
339 FIXME("check\n");
340 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
341 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
342 d->z = 0.0f;
343 d->w = 1.0f;
347 * Version 2.0 specific
349 void vshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
350 d->x = s0->x * (s1->x - s2->x) + s2->x;
351 d->y = s0->y * (s1->y - s2->y) + s2->y;
352 d->z = s0->z * (s1->z - s2->z) + s2->z;
353 d->w = s0->w * (s1->w - s2->w) + s2->w;
356 void vshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
357 d->x = s0->y * s1->z - s0->z * s1->y;
358 d->y = s0->z * s1->x - s0->x * s1->z;
359 d->z = s0->x * s1->y - s0->y * s1->x;
360 d->w = 0.9f; /* w is undefined, so set it to something safeish */
362 VSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
363 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
366 void vshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
368 d->x = fabsf(s0->x);
369 d->y = fabsf(s0->y);
370 d->z = fabsf(s0->z);
371 d->w = fabsf(s0->w);
372 VSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
373 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
376 /* Stubs */
377 void vshader_texcoord(WINED3DSHADERVECTOR* d) {
378 FIXME(" : Stub\n");
381 void vshader_texkill(WINED3DSHADERVECTOR* d) {
382 FIXME(" : Stub\n");
385 void vshader_tex(WINED3DSHADERVECTOR* d) {
386 FIXME(" : Stub\n");
388 void vshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
389 FIXME(" : Stub\n");
392 void vshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
393 FIXME(" : Stub\n");
396 void vshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
397 FIXME(" : Stub\n");
400 void vshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
401 FIXME(" : Stub\n");
404 void vshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
405 FIXME(" : Stub\n");
408 void vshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
409 FIXME(" : Stub\n");
412 void vshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
413 FIXME(" : Stub\n");
416 void vshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
417 FIXME(" : Stub\n");
420 void vshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
421 FIXME(" : Stub\n");
424 void vshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
425 FIXME(" : Stub\n");
428 void vshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
429 FIXME(" : Stub\n");
432 void vshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
433 FIXME(" : Stub\n");
436 void vshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
437 FIXME(" : Stub\n");
440 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
441 void vshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
442 FIXME(" : Stub\n");
445 void vshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
446 FIXME(" : Stub\n");
449 void vshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
450 FIXME(" : Stub\n");
453 void vshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
454 FIXME(" : Stub\n");
457 void vshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
458 FIXME(" : Stub\n");
461 void vshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
462 FIXME(" : Stub\n");
465 void vshader_texdepth(WINED3DSHADERVECTOR* d) {
466 FIXME(" : Stub\n");
469 void vshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
470 FIXME(" : Stub\n");
473 void vshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
474 FIXME(" : Stub\n");
477 void vshader_call(WINED3DSHADERVECTOR* d) {
478 FIXME(" : Stub\n");
481 void vshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
482 FIXME(" : Stub\n");
485 void vshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
486 FIXME(" : Stub\n");
489 void vshader_ret(WINED3DSHADERVECTOR* d) {
490 FIXME(" : Stub\n");
493 void vshader_endloop(WINED3DSHADERVECTOR* d) {
494 FIXME(" : Stub\n");
497 void vshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
498 FIXME(" : Stub\n");
501 void vshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
502 FIXME(" : Stub\n");
505 void vshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
506 FIXME(" : Stub\n");
509 void vshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
510 FIXME(" : Stub\n");
513 void vshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
514 FIXME(" : Stub\n");
517 void vshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
518 FIXME(" : Stub\n");
521 void vshader_endrep(void) {
522 FIXME(" : Stub\n");
525 void vshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
526 FIXME(" : Stub\n");
529 void vshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
530 FIXME(" : Stub\n");
533 void vshader_else(WINED3DSHADERVECTOR* d) {
534 FIXME(" : Stub\n");
537 void vshader_label(WINED3DSHADERVECTOR* d) {
538 FIXME(" : Stub\n");
541 void vshader_endif(WINED3DSHADERVECTOR* d) {
542 FIXME(" : Stub\n");
545 void vshader_break(WINED3DSHADERVECTOR* d) {
546 FIXME(" : Stub\n");
549 void vshader_breakc(WINED3DSHADERVECTOR* d) {
550 FIXME(" : Stub\n");
553 void vshader_mova(WINED3DSHADERVECTOR* d) {
554 FIXME(" : Stub\n");
557 void vshader_defb(WINED3DSHADERVECTOR* d) {
558 FIXME(" : Stub\n");
561 void vshader_defi(WINED3DSHADERVECTOR* d) {
562 FIXME(" : Stub\n");
565 void vshader_dp2add(WINED3DSHADERVECTOR* d) {
566 FIXME(" : Stub\n");
569 void vshader_dsx(WINED3DSHADERVECTOR* d) {
570 FIXME(" : Stub\n");
573 void vshader_dsy(WINED3DSHADERVECTOR* d) {
574 FIXME(" : Stub\n");
577 void vshader_texldd(WINED3DSHADERVECTOR* d) {
578 FIXME(" : Stub\n");
581 void vshader_setp(WINED3DSHADERVECTOR* d) {
582 FIXME(" : Stub\n");
585 void vshader_texldl(WINED3DSHADERVECTOR* d) {
586 FIXME(" : Stub\n");
589 void vshader_breakp(WINED3DSHADERVECTOR* d) {
590 FIXME(" : Stub\n");
595 * log, exp, frc, m*x* seems to be macros ins ... to see
597 static CONST SHADER_OPCODE vshader_ins [] = {
598 {D3DSIO_NOP, "nop", "NOP", 0, vshader_nop, 0, 0},
599 {D3DSIO_MOV, "mov", "MOV", 2, vshader_mov, 0, 0},
600 {D3DSIO_ADD, "add", "ADD", 3, vshader_add, 0, 0},
601 {D3DSIO_SUB, "sub", "SUB", 3, vshader_sub, 0, 0},
602 {D3DSIO_MAD, "mad", "MAD", 4, vshader_mad, 0, 0},
603 {D3DSIO_MUL, "mul", "MUL", 3, vshader_mul, 0, 0},
604 {D3DSIO_RCP, "rcp", "RCP", 2, vshader_rcp, 0, 0},
605 {D3DSIO_RSQ, "rsq", "RSQ", 2, vshader_rsq, 0, 0},
606 {D3DSIO_DP3, "dp3", "DP3", 3, vshader_dp3, 0, 0},
607 {D3DSIO_DP4, "dp4", "DP4", 3, vshader_dp4, 0, 0},
608 {D3DSIO_MIN, "min", "MIN", 3, vshader_min, 0, 0},
609 {D3DSIO_MAX, "max", "MAX", 3, vshader_max, 0, 0},
610 {D3DSIO_SLT, "slt", "SLT", 3, vshader_slt, 0, 0},
611 {D3DSIO_SGE, "sge", "SGE", 3, vshader_sge, 0, 0},
612 {D3DSIO_ABS, "abs", "ABS", 2, vshader_abs, 0, 0},
613 {D3DSIO_EXP, "exp", "EX2", 2, vshader_exp, 0, 0},
614 {D3DSIO_LOG, "log", "LG2", 2, vshader_log, 0, 0},
615 {D3DSIO_LIT, "lit", "LIT", 2, vshader_lit, 0, 0},
616 {D3DSIO_DST, "dst", "DST", 3, vshader_dst, 0, 0},
617 {D3DSIO_LRP, "lrp", "LRP", 4, vshader_lrp, 0, 0},
618 {D3DSIO_FRC, "frc", "FRC", 2, vshader_frc, 0, 0},
619 {D3DSIO_M4x4, "m4x4", "undefined", 3, vshader_m4x4, 0, 0},
620 {D3DSIO_M4x3, "m4x3", "undefined", 3, vshader_m4x3, 0, 0},
621 {D3DSIO_M3x4, "m3x4", "undefined", 3, vshader_m3x4, 0, 0},
622 {D3DSIO_M3x3, "m3x3", "undefined", 3, vshader_m3x3, 0, 0},
623 {D3DSIO_M3x2, "m3x2", "undefined", 3, vshader_m3x2, 0, 0},
624 /** FIXME: use direct access so add the others opcodes as stubs */
625 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
626 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
627 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, vshader_call, 0, 0},
628 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, vshader_callnz, 0, 0},
629 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, vshader_loop, 0, 0},
630 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, vshader_ret, 0, 0},
631 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, vshader_endloop, 0, 0},
632 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, vshader_label, 0, 0},
633 /* DCL is a specil operation */
634 {D3DSIO_DCL, "dcl", NULL, 1, vshader_dcl, 0, 0},
635 {D3DSIO_POW, "pow", "POW", 3, vshader_pow, 0, 0},
636 {D3DSIO_CRS, "crs", "XPS", 3, vshader_crs, 0, 0},
637 /* TODO: sng can possibly be performed as
638 RCP tmp, vec
639 MUL out, tmp, vec*/
640 {D3DSIO_SGN, "sng", NULL, 2, vshader_sng, 0, 0},
641 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
642 DP3 tmp , vec, vec;
643 RSQ tmp, tmp.x;
644 MUL vec.xyz, vec, tmp;
645 but I think this is better because it accounts for w properly.
646 DP3 tmp , vec, vec;
647 RSQ tmp, tmp.x;
648 MUL vec, vec, tmp;
651 {D3DSIO_NRM, "nrm", NULL, 2, vshader_nrm, 0, 0},
652 {D3DSIO_SINCOS, "sincos", NULL, 2, vshader_sincos, 0, 0},
653 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, vshader_rep, 0, 0},
654 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, vshader_endrep, 0, 0},
655 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, vshader_if, 0, 0},
656 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, vshader_ifc, 0, 0},
657 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, vshader_else, 0, 0},
658 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, vshader_endif, 0, 0},
659 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, vshader_break, 0, 0},
660 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, vshader_breakc, 0, 0},
661 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, vshader_mova, 0, 0},
662 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, vshader_defb, 0, 0},
663 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, vshader_defi, 0, 0},
665 {D3DSIO_TEXCOORD, "texcoord", GLNAME_REQUIRE_GLSL, 1, vshader_texcoord, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
666 {D3DSIO_TEXCOORD, "texcrd", GLNAME_REQUIRE_GLSL, 2, vshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
667 {D3DSIO_TEXKILL, "texkill", GLNAME_REQUIRE_GLSL, 1, vshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
668 {D3DSIO_TEX, "tex", GLNAME_REQUIRE_GLSL, 1, vshader_tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
669 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 2, vshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
670 {D3DSIO_TEXBEM, "texbem", GLNAME_REQUIRE_GLSL, 2, vshader_texbem, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
671 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, vshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
672 {D3DSIO_TEXREG2AR,"texreg2ar",GLNAME_REQUIRE_GLSL, 2, vshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
673 {D3DSIO_TEXREG2GB,"texreg2gb",GLNAME_REQUIRE_GLSL, 2, vshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
674 {D3DSIO_TEXM3x2PAD, "texm3x2pad", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
675 {D3DSIO_TEXM3x2TEX, "texm3x2tex", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
676 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
677 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", GLNAME_REQUIRE_GLSL, 3, vshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
678 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
680 {D3DSIO_EXPP, "expp", "EXP", 2, vshader_expp, 0, 0},
681 {D3DSIO_LOGP, "logp", "LOG", 2, vshader_logp, 0, 0},
682 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, vshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
683 /* def is a special operation */
684 {D3DSIO_DEF, "def", NULL, 5, vshader_def, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
685 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, vshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
686 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, vshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
687 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
688 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, vshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
689 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
690 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, vshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
691 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, vshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
692 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, vshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
693 /* TODO: dp2add can be made out of multiple instuctions */
694 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, vshader_dp2add, 0, 0},
695 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, vshader_dsx, 0, 0},
696 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, vshader_dsy, 0, 0},
697 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, vshader_texldd, 0, 0},
698 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, vshader_setp, 0, 0},
699 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, vshader_texldl, 0, 0},
700 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, vshader_breakp, 0, 0},
701 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, vshader_nop, 0, 0},
702 {0, NULL, NULL, 0, NULL, 0, 0}
706 inline static const SHADER_OPCODE* vshader_program_get_opcode(const DWORD code) {
707 DWORD i = 0;
708 /** TODO: use dichotomic search or hash table */
709 while (NULL != vshader_ins[i].name) {
710 if ((code & D3DSI_OPCODE_MASK) == vshader_ins[i].opcode) {
711 return &vshader_ins[i];
713 ++i;
715 FIXME("Unsupported opcode %lx\n",code);
716 return NULL;
719 inline static void vshader_program_dump_param(const DWORD param, int input) {
720 static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" };
721 static const char swizzle_reg_chars[] = "xyzw";
723 DWORD reg = param & 0x00001FFF;
724 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
726 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) TRACE("-");
728 switch (regtype) {
729 case D3DSPR_TEMP:
730 TRACE("R[%lu]", reg);
731 break;
732 case D3DSPR_INPUT:
733 TRACE("v%lu", reg);
734 break;
735 case D3DSPR_CONST:
736 TRACE("C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
737 break;
738 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
739 TRACE("a[%lu]", reg);
740 break;
741 case D3DSPR_RASTOUT:
742 TRACE("%s", rastout_reg_names[reg]);
743 break;
744 case D3DSPR_ATTROUT:
745 TRACE("oD[%lu]", reg);
746 break;
747 case D3DSPR_TEXCRDOUT:
748 TRACE("oT[%lu]", reg);
749 break;
750 default:
751 FIXME("Unknown %lu %u reg %lu\n",regtype, D3DSPR_ATTROUT, reg);
752 break;
755 if (!input) {
756 /** operand output */
757 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
758 if (param & D3DSP_WRITEMASK_0) TRACE(".x");
759 if (param & D3DSP_WRITEMASK_1) TRACE(".y");
760 if (param & D3DSP_WRITEMASK_2) TRACE(".z");
761 if (param & D3DSP_WRITEMASK_3) TRACE(".w");
763 } else {
764 /** operand input */
765 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
766 DWORD swizzle_x = swizzle & 0x03;
767 DWORD swizzle_y = (swizzle >> 2) & 0x03;
768 DWORD swizzle_z = (swizzle >> 4) & 0x03;
769 DWORD swizzle_w = (swizzle >> 6) & 0x03;
771 * swizzle bits fields:
772 * WWZZYYXX
774 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
775 if (swizzle_x == swizzle_y &&
776 swizzle_x == swizzle_z &&
777 swizzle_x == swizzle_w) {
778 TRACE(".%c", swizzle_reg_chars[swizzle_x]);
779 } else {
780 TRACE(".%c%c%c%c",
781 swizzle_reg_chars[swizzle_x],
782 swizzle_reg_chars[swizzle_y],
783 swizzle_reg_chars[swizzle_z],
784 swizzle_reg_chars[swizzle_w]);
790 inline static void vshader_program_dump_vs_param(const DWORD param, int input) {
791 static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" };
792 static const char swizzle_reg_chars[] = "xyzw";
793 /* the unknown mask is for bits not yet accounted for by any other mask... */
794 #define UNKNOWN_MASK 0xC000
796 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
797 #define EXTENDED_REG 0x1800
799 DWORD reg = param & D3DSP_REGNUM_MASK; /* 0x00001FFF; isn't this D3DSP_REGNUM_MASK? */
800 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
802 if(param & UNKNOWN_MASK) { /* if this register has any of the unknown bits set then report them*/
803 FIXME("Unknown bits set regtype %lx , %lx, UK(%lx)\n", regtype, (param & EXTENDED_REG), param & UNKNOWN_MASK);
806 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) TRACE("-");
808 switch (regtype /*<< D3DSP_REGTYPE_SHIFT*/) {
809 case D3DSPR_TEMP:
810 TRACE("r%lu", reg);
811 break;
812 case D3DSPR_INPUT:
813 TRACE("v%lu", reg);
814 break;
815 case D3DSPR_CONST:
816 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
817 break;
818 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
819 TRACE("a%lu", reg);
820 break;
821 case D3DSPR_RASTOUT:
822 TRACE("%s", rastout_reg_names[reg]);
823 break;
824 case D3DSPR_ATTROUT:
825 TRACE("oD%lu", reg);
826 break;
827 case D3DSPR_TEXCRDOUT:
828 TRACE("oT%lu", reg);
829 break;
830 case D3DSPR_CONSTINT:
831 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
832 break;
833 case D3DSPR_CONSTBOOL:
834 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
835 break;
836 case D3DSPR_LABEL:
837 TRACE("l%lu", reg);
838 break;
839 case D3DSPR_LOOP:
840 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
841 break;
842 default:
843 FIXME("Unknown %lu reg %lu\n",regtype, reg);
844 break;
847 if (!input) {
848 /** operand output */
849 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
850 if (param & D3DSP_WRITEMASK_0) TRACE(".x");
851 if (param & D3DSP_WRITEMASK_1) TRACE(".y");
852 if (param & D3DSP_WRITEMASK_2) TRACE(".z");
853 if (param & D3DSP_WRITEMASK_3) TRACE(".w");
855 } else {
856 /** operand input */
857 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
858 DWORD swizzle_x = swizzle & 0x03;
859 DWORD swizzle_y = (swizzle >> 2) & 0x03;
860 DWORD swizzle_z = (swizzle >> 4) & 0x03;
861 DWORD swizzle_w = (swizzle >> 6) & 0x03;
863 * swizzle bits fields:
864 * WWZZYYXX
866 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
867 if (swizzle_x == swizzle_y &&
868 swizzle_x == swizzle_z &&
869 swizzle_x == swizzle_w) {
870 TRACE(".%c", swizzle_reg_chars[swizzle_x]);
871 } else {
872 TRACE(".%c%c%c%c",
873 swizzle_reg_chars[swizzle_x],
874 swizzle_reg_chars[swizzle_y],
875 swizzle_reg_chars[swizzle_z],
876 swizzle_reg_chars[swizzle_w]);
882 inline static BOOL vshader_is_version_token(DWORD token) {
883 return 0xFFFE0000 == (token & 0xFFFE0000);
886 inline static BOOL vshader_is_comment_token(DWORD token) {
887 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
890 inline static void vshader_program_add_output_param_swizzle(const DWORD param, int is_color, char *hwLine) {
891 /** operand output */
892 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
893 strcat(hwLine, ".");
894 if (param & D3DSP_WRITEMASK_0) { strcat(hwLine, "x"); }
895 if (param & D3DSP_WRITEMASK_1) { strcat(hwLine, "y"); }
896 if (param & D3DSP_WRITEMASK_2) { strcat(hwLine, "z"); }
897 if (param & D3DSP_WRITEMASK_3) { strcat(hwLine, "w"); }
901 inline static void vshader_program_add_input_param_swizzle(const DWORD param, int is_color, char *hwLine) {
902 static const char swizzle_reg_chars_color_fix[] = "zyxw";
903 static const char swizzle_reg_chars[] = "xyzw";
904 const char* swizzle_regs = NULL;
905 char tmpReg[255];
907 /** operand input */
908 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
909 DWORD swizzle_x = swizzle & 0x03;
910 DWORD swizzle_y = (swizzle >> 2) & 0x03;
911 DWORD swizzle_z = (swizzle >> 4) & 0x03;
912 DWORD swizzle_w = (swizzle >> 6) & 0x03;
914 if (is_color) {
915 swizzle_regs = swizzle_reg_chars_color_fix;
916 } else {
917 swizzle_regs = swizzle_reg_chars;
921 * swizzle bits fields:
922 * WWZZYYXX
924 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) { /* D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
925 if (is_color) {
926 sprintf(tmpReg, ".%c%c%c%c",
927 swizzle_regs[swizzle_x],
928 swizzle_regs[swizzle_y],
929 swizzle_regs[swizzle_z],
930 swizzle_regs[swizzle_w]);
931 strcat(hwLine, tmpReg);
933 return ;
935 if (swizzle_x == swizzle_y &&
936 swizzle_x == swizzle_z &&
937 swizzle_x == swizzle_w)
939 sprintf(tmpReg, ".%c", swizzle_regs[swizzle_x]);
940 strcat(hwLine, tmpReg);
941 } else {
942 sprintf(tmpReg, ".%c%c%c%c",
943 swizzle_regs[swizzle_x],
944 swizzle_regs[swizzle_y],
945 swizzle_regs[swizzle_z],
946 swizzle_regs[swizzle_w]);
947 strcat(hwLine, tmpReg);
951 inline static void vshader_program_add_param(const DWORD param, int input, int is_color, char *hwLine, BOOL namedArrays, CHAR constantsUsedBitmap[]) {
952 /*static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" }; */
953 static const char* hwrastout_reg_names[] = { "result.position", "result.fogcoord", "result.pointsize" };
955 DWORD reg = param & 0x00001FFF;
956 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
957 char tmpReg[255];
959 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) {
960 strcat(hwLine, " -");
961 } else {
962 strcat(hwLine, " ");
965 switch (regtype) {
966 case D3DSPR_TEMP:
967 sprintf(tmpReg, "T%lu", reg);
968 strcat(hwLine, tmpReg);
969 break;
970 case D3DSPR_INPUT:
971 /* if the attributes come in as named dcl's then use a named vertex (called namedVertexN) */
972 if (namedArrays) {
973 sprintf(tmpReg, "namedVertex%lu", reg);
974 } else {
975 /* otherwise the input is on a numbered attribute so use opengl numbered attributes */
976 sprintf(tmpReg, "vertex.attrib[%lu]", reg);
978 strcat(hwLine, tmpReg);
979 break;
980 case D3DSPR_CONST:
981 /* FIXME: some constants are named so we need a constants map*/
982 if (constantsUsedBitmap[reg] == VS_CONSTANT_CONSTANT) {
983 if (param & D3DVS_ADDRMODE_RELATIVE) {
984 FIXME("Relitive addressing not expected for a named constant %lu\n", reg);
986 sprintf(tmpReg, "const%lu", reg);
987 } else {
988 sprintf(tmpReg, "C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "A0.x + " : "", reg);
990 strcat(hwLine, tmpReg);
991 break;
992 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
993 sprintf(tmpReg, "A%lu", reg);
994 strcat(hwLine, tmpReg);
995 break;
996 case D3DSPR_RASTOUT:
997 sprintf(tmpReg, "%s", hwrastout_reg_names[reg]);
998 strcat(hwLine, tmpReg);
999 break;
1000 case D3DSPR_ATTROUT:
1001 if (reg==0) {
1002 strcat(hwLine, "result.color.primary");
1003 } else {
1004 strcat(hwLine, "result.color.secondary");
1006 break;
1007 case D3DSPR_TEXCRDOUT:
1008 sprintf(tmpReg, "result.texcoord[%lu]", reg);
1009 strcat(hwLine, tmpReg);
1010 break;
1011 default:
1012 FIXME("Unknown reg type %ld %ld\n", regtype, reg);
1013 break;
1016 if (!input) {
1017 vshader_program_add_output_param_swizzle(param, is_color, hwLine);
1018 } else {
1019 vshader_program_add_input_param_swizzle(param, is_color, hwLine);
1023 DWORD MacroExpansion[4*4];
1025 int ExpandMxMacro(DWORD macro_opcode, const DWORD* args) {
1026 int i;
1027 int nComponents = 0;
1028 DWORD opcode =0;
1029 switch(macro_opcode) {
1030 case D3DSIO_M4x4:
1031 nComponents = 4;
1032 opcode = D3DSIO_DP4;
1033 break;
1034 case D3DSIO_M4x3:
1035 nComponents = 3;
1036 opcode = D3DSIO_DP4;
1037 break;
1038 case D3DSIO_M3x4:
1039 nComponents = 4;
1040 opcode = D3DSIO_DP3;
1041 break;
1042 case D3DSIO_M3x3:
1043 nComponents = 3;
1044 opcode = D3DSIO_DP3;
1045 break;
1046 case D3DSIO_M3x2:
1047 nComponents = 2;
1048 opcode = D3DSIO_DP3;
1049 break;
1050 default:
1051 break;
1053 for (i = 0; i < nComponents; i++) {
1054 MacroExpansion[i*4+0] = opcode;
1055 MacroExpansion[i*4+1] = ((*args) & ~D3DSP_WRITEMASK_ALL)|(D3DSP_WRITEMASK_0<<i);
1056 MacroExpansion[i*4+2] = *(args+1);
1057 MacroExpansion[i*4+3] = (*(args+2))+i;
1059 return nComponents;
1063 * Function parser ...
1066 inline static VOID IWineD3DVertexShaderImpl_GenerateProgramArbHW(IWineD3DVertexShader *iface, CONST DWORD* pFunction) {
1067 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1068 const DWORD* pToken = pFunction;
1069 const DWORD* pSavedToken = NULL;
1070 const SHADER_OPCODE* curOpcode = NULL;
1071 int nRemInstr = -1;
1072 DWORD i;
1073 unsigned lineNum = 0;
1074 char *pgmStr = NULL;
1075 char tmpLine[255];
1076 DWORD nUseAddressRegister = 0;
1077 DWORD nUseTempRegister = 0;
1078 DWORD regtype;
1079 DWORD reg;
1080 BOOL tmpsUsed[32];
1081 #if 0 /* TODO: loope register (just another address register ) */
1082 BOOL hasLoops = FALSE;
1083 #endif
1085 #define PGMSIZE 65535
1086 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
1087 int pgmLength = 0;
1089 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
1090 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
1091 if (This->device->fixupVertexBufferSize < PGMSIZE) {
1092 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
1093 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
1094 This->fixupVertexBufferSize = PGMSIZE;
1095 This->fixupVertexBuffer[0] = 0;
1097 pgmStr = This->device->fixupVertexBuffer;
1098 #endif
1099 #define PNSTRCAT(_pgmStr, _tmpLine) { \
1100 int _tmpLineLen = strlen(_tmpLine); \
1101 if(_tmpLineLen + pgmLength > PGMSIZE) { \
1102 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, _tmpLineLen + pgmLength); \
1103 } else { \
1104 memcpy(_pgmStr + pgmLength, _tmpLine, _tmpLineLen); \
1106 pgmLength += _tmpLineLen; \
1109 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, 65535); /* 64kb should be enough */
1110 /* Initialise the shader */
1111 This->namedArrays = FALSE;
1112 This->declaredArrays = FALSE;
1113 for (i = 0; i < WINED3DSHADERDECLUSAGE_MAX_USAGE; i++) {
1114 This->arrayUsageMap[i] = -1;
1116 /* set all the tmpsUsed to not used */
1117 memset(tmpsUsed, FALSE , sizeof(tmpsUsed));
1119 /* TODO: renumbering of attributes if the values are higher than the highest supported attribute but the total number of attributes is less than the highest supported attribute */
1120 This->highestConstant = -1;
1124 * First pass to determine what we need to declare:
1125 * - Temporary variables
1126 * - Address variables
1128 if (NULL != pToken) {
1129 while (D3DVS_END() != *pToken) {
1130 if (vshader_is_version_token(*pToken)) {
1131 /** skip version */
1132 ++pToken;
1133 continue;
1135 if (vshader_is_comment_token(*pToken)) { /** comment */
1136 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1137 ++pToken;
1138 pToken += comment_len;
1139 continue;
1141 curOpcode = vshader_program_get_opcode(*pToken);
1142 ++pToken;
1143 /* TODO: dcl's */
1144 /* TODO: Consts */
1146 if (NULL == curOpcode) {
1147 while (*pToken & 0x80000000) {
1148 FIXME("unrecognized opcode: %08lx\n", *pToken);
1149 /* skip unrecognized opcode */
1150 ++pToken;
1152 } else {
1153 if (curOpcode->opcode == D3DSIO_DCL){
1154 INT usage = *pToken++;
1155 INT arrayNo = (*pToken++ & 0x00001FFF);
1156 switch(usage & 0xFFFF) {
1157 case D3DDECLUSAGE_POSITION:
1158 if((usage & 0xF0000) >> 16 == 0) { /* tween data */
1159 TRACE("Setting position to %d\n", arrayNo);
1160 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITION] = arrayNo;
1161 This->namedArrays = TRUE;
1162 } else {
1163 /* TODO: position indexes go from 0-8!!*/
1164 TRACE("Setting position 2 to %d because usage = %d\n", arrayNo, (usage & 0xF0000) >> 16);
1165 /* robots uses positions up to 8, the position arrays are just packed.*/
1166 if ((usage & 0xF0000) >> 16 > 1) {
1167 TRACE("Loaded for position %d (greater than 2)\n", (usage & 0xF0000) >> 16);
1169 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITION2 + ((usage & 0xF0000) >> 16) -1] = arrayNo;
1170 This->declaredArrays = TRUE;
1172 break;
1173 case D3DDECLUSAGE_BLENDINDICES:
1174 /* not supported by openGL */
1175 TRACE("Setting BLENDINDICES to %d\n", arrayNo);
1176 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_BLENDINDICES] = arrayNo;
1177 This->declaredArrays = TRUE;
1178 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended BLENDINDICES\n");
1179 break;
1180 case D3DDECLUSAGE_BLENDWEIGHT:
1181 TRACE("Setting BLENDWEIGHT to %d\n", arrayNo);
1182 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_BLENDWEIGHT] = arrayNo;
1183 This->namedArrays = TRUE;
1184 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended blend weights\n");
1185 break;
1186 case D3DDECLUSAGE_NORMAL:
1187 if((usage & 0xF0000) >> 16 == 0) { /* tween data */
1188 TRACE("Setting normal to %d\n", arrayNo);
1189 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_NORMAL] = arrayNo;
1190 This->namedArrays = TRUE;
1191 } else {
1192 TRACE("Setting normal 2 to %d because usage = %d\n", arrayNo, (usage & 0xF0000) >> 16);
1193 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_NORMAL2] = arrayNo;
1194 This->declaredArrays = TRUE;
1196 break;
1197 case D3DDECLUSAGE_PSIZE:
1198 TRACE("Setting PSIZE to %d\n", arrayNo);
1199 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_PSIZE] = arrayNo;
1200 This->namedArrays = TRUE;
1201 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended PSIZE\n");
1202 break;
1203 case D3DDECLUSAGE_COLOR:
1204 if((usage & 0xF0000) >> 16 == 0) {
1205 TRACE("Setting DIFFUSE to %d\n", arrayNo);
1206 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DIFFUSE] = arrayNo;
1207 This->namedArrays = TRUE;
1208 } else {
1209 TRACE("Setting SPECULAR to %d\n", arrayNo);
1210 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SPECULAR] = arrayNo;
1211 This->namedArrays = TRUE;
1213 break;
1214 case D3DDECLUSAGE_TEXCOORD:
1215 This->namedArrays = TRUE;
1216 /* only 7 texture coords have been designed for, so run a quick sanity check */
1217 if ((usage & 0xF0000) >> 16 > 7) {
1218 FIXME("(%p) : Program uses texture coordinate %d but only 0-7 have been implemented\n", This, (usage & 0xF0000) >> 16);
1219 } else {
1220 TRACE("Setting TEXCOORD %d to %d\n", ((usage & 0xF0000) >> 16), arrayNo);
1221 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_TEXCOORD0 + ((usage & 0xF0000) >> 16)] = arrayNo;
1223 break;
1224 /* The following aren't supported by openGL,
1225 if we get them then everything needs to be mapped to numbered attributes instead of named ones.
1226 this should be caught in the first pass */
1227 case D3DDECLUSAGE_TANGENT:
1228 TRACE("Setting TANGENT to %d\n", arrayNo);
1229 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_TANGENT] = arrayNo;
1230 This->declaredArrays = TRUE;
1231 break;
1232 case D3DDECLUSAGE_BINORMAL:
1233 TRACE("Setting BINORMAL to %d\n", arrayNo);
1234 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_BINORMAL] = arrayNo;
1235 This->declaredArrays = TRUE;
1236 break;
1237 case D3DDECLUSAGE_TESSFACTOR:
1238 TRACE("Setting TESSFACTOR to %d\n", arrayNo);
1239 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_TESSFACTOR] = arrayNo;
1240 This->declaredArrays = TRUE;
1241 break;
1242 case D3DDECLUSAGE_POSITIONT:
1243 if((usage & 0xF0000) >> 16 == 0) { /* tween data */
1244 FIXME("Setting positiont to %d\n", arrayNo);
1245 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITIONT] = arrayNo;
1246 This->namedArrays = TRUE;
1247 } else {
1248 FIXME("Setting positiont 2 to %d because usage = %d\n", arrayNo, (usage & 0xF0000) >> 16);
1249 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITIONT2] = arrayNo;
1250 This->declaredArrays = TRUE;
1251 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended positiont\n");
1253 break;
1254 case D3DDECLUSAGE_FOG:
1255 /* supported by OpenGL */
1256 TRACE("Setting FOG to %d\n", arrayNo);
1257 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_FOG] = arrayNo;
1258 This->namedArrays = TRUE;
1259 break;
1260 case D3DDECLUSAGE_DEPTH:
1261 TRACE("Setting DEPTH to %d\n", arrayNo);
1262 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DEPTH] = arrayNo;
1263 This->declaredArrays = TRUE;
1264 break;
1265 case D3DDECLUSAGE_SAMPLE:
1266 TRACE("Setting SAMPLE to %d\n", arrayNo);
1267 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SAMPLE] = arrayNo;
1268 This->declaredArrays = TRUE;
1269 break;
1270 default:
1271 FIXME("Unrecognised dcl %08x", usage & 0xFFFF);
1273 } else if(curOpcode->opcode == D3DSIO_DEF) {
1274 This->constantsUsedBitmap[*pToken & 0xFF] = VS_CONSTANT_CONSTANT;
1275 FIXME("Constant %ld\n", *pToken & 0xFF);
1276 ++pToken;
1277 ++pToken;
1278 ++pToken;
1279 ++pToken;
1280 ++pToken;
1282 } else {
1283 /* Check to see if and tmp or addressing redisters are used */
1284 if (curOpcode->num_params > 0) {
1285 regtype = ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT));
1286 reg = ((*pToken) & 0x00001FFF);
1287 if (D3DSPR_ADDR == regtype && nUseAddressRegister <= reg) nUseAddressRegister = reg + 1;
1288 if (D3DSPR_TEMP == regtype){
1289 tmpsUsed[reg] = TRUE;
1290 if(nUseTempRegister <= reg) nUseTempRegister = reg + 1;
1292 ++pToken;
1293 for (i = 1; i < curOpcode->num_params; ++i) {
1294 regtype = ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT));
1295 reg = ((*pToken) & 0x00001FFF);
1296 if (D3DSPR_ADDR == regtype && nUseAddressRegister <= reg) nUseAddressRegister = reg + 1;
1297 if (D3DSPR_TEMP == regtype){
1298 tmpsUsed[reg] = TRUE;
1299 if(nUseTempRegister <= reg) nUseTempRegister = reg + 1;
1301 ++pToken;
1305 #if 1 /* TODO: if the shaders uses calls or loops then we need to convert the shader into glsl */
1306 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1307 FIXME("This shader requires gl shader language support\n");
1308 #if 0
1309 This->shaderLanguage = GLSHADER_GLSL;
1310 #endif
1312 #endif
1316 #if 1
1317 #define VSHADER_ALWAYS_NUMBERED
1318 #endif
1320 #ifdef VSHADER_ALWAYS_NUMBERED /* handy for debugging using numbered arrays instead of named arrays */
1321 /* TODO: using numbered arrays for software shaders makes things easier */
1322 This->declaredArrays = TRUE;
1323 #endif
1325 /* named arrays and declared arrays are mutually exclusive */
1326 if (This->declaredArrays) {
1327 This->namedArrays = FALSE;
1329 /* TODO: validate
1330 nUseAddressRegister < = GL_MAX_PROGRAM_ADDRESS_REGISTERS_AR
1331 nUseTempRegister <= GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB
1334 /** second pass, now generate */
1335 pToken = pFunction;
1337 if (NULL != pToken) {
1338 while (1) {
1339 tmpLine[0] = 0;
1340 if ((nRemInstr >= 0) && (--nRemInstr == -1))
1341 /* Macro is finished, continue normal path */
1342 pToken = pSavedToken;
1343 if (D3DVS_END() == *pToken)
1344 break;
1346 if (vshader_is_version_token(*pToken)) { /** version */
1347 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1348 int version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1349 int numTemps;
1350 int numConstants;
1352 TRACE("found version token vs.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1354 /* Each release of vertex shaders has had different numbers of temp registers */
1355 switch (version) {
1356 case 10:
1357 case 11: numTemps=12;
1358 numConstants=96;/* min(GL_LIMITS(constants),96) */
1359 strcpy(tmpLine, "!!ARBvp1.0\n");
1360 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1361 break;
1362 /* FIXME: if there are no calls or loops then use ARBvp1 otherwise use GLSL instead
1363 TODO: see if there are any operations in vs2/3 that aren't supported by ARBvp
1364 TODO: only map the maximum possible number of constants supported by openGL and not the maximum required by d3d (even better only map the used constants)*/
1365 case 20: numTemps=12; /* min(GL_LIMITS(temps),12) */
1366 numConstants=96; /* min(GL_LIMITS(constants),256) */
1367 strcpy(tmpLine, "!!ARBvp1.0\n");
1368 FIXME("No work done yet to support vs2.0 in hw\n");
1369 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1370 break;
1371 case 21: numTemps=12; /* min(GL_LIMITS(temps),12) */
1372 numConstants=96; /* min(GL_LIMITS(constants),256) */
1373 strcpy(tmpLine, "!!ARBvp1.0\n");
1374 FIXME("No work done yet to support vs2.1 in hw\n");
1375 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1376 break;
1377 case 30: numTemps=32; /* min(GL_LIMITS(temps),32) */
1378 numConstants=96;/* min(GL_LIMITS(constants),256) */
1379 strcpy(tmpLine, "!!ARBvp3.0\n");
1380 FIXME("No work done yet to support vs3.0 in hw\n");
1381 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1382 break;
1383 default:
1384 numTemps=12;/* min(GL_LIMITS(temps),12) */
1385 numConstants=96;/* min(GL_LIMITS(constants),96) */
1386 strcpy(tmpLine, "!!ARBvp1.0\n");
1387 FIXME("Unrecognized vertex shader version %d!\n", version);
1389 PNSTRCAT(pgmStr, tmpLine);
1391 ++lineNum;
1393 /* This should be a bitmap so that only temp registers that are used are declared. */
1394 for (i = 0; i < nUseTempRegister /* we should check numTemps here */ ; i++) {
1395 if (tmpsUsed[i]) { /* only write out the temps if they are actually in use */
1396 sprintf(tmpLine, "TEMP T%ld;\n", i);
1397 ++lineNum;
1398 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1399 PNSTRCAT(pgmStr, tmpLine);
1403 /* TODO: loop register counts as an address register */
1404 for (i = 0; i < nUseAddressRegister; i++) {
1405 sprintf(tmpLine, "ADDRESS A%ld;\n", i);
1406 ++lineNum;
1407 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1408 PNSTRCAT(pgmStr, tmpLine);
1411 /* Due to the dynamic constants binding mechanism, we need to declare
1412 * all the constants for relative addressing. */
1413 /* Mesa supports only 95 constants for VS1.X although we should have at least 96. */
1414 if (GL_VEND(MESA) || GL_VEND(WINE)) {
1415 numConstants = 95;
1417 /* FIXME: We should be counting the number of constants in the first pass and then validating that many are supported
1418 Looking at some of the shaders in use by applications we'd need to create a list of all used env variables
1420 sprintf(tmpLine, "PARAM C[%d] = { program.env[0..%d] };\n", numConstants, numConstants - 1);
1421 TRACE("GL HW (%u,%u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1422 PNSTRCAT(pgmStr, tmpLine);
1424 ++lineNum;
1426 ++pToken;
1427 continue;
1429 if (vshader_is_comment_token(*pToken)) { /** comment */
1430 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1431 ++pToken;
1432 FIXME("#%s\n", (char*)pToken);
1433 pToken += comment_len;
1434 continue;
1437 curOpcode = vshader_program_get_opcode(*pToken);
1438 ++pToken;
1439 if (NULL == curOpcode) {
1440 /* unknown current opcode ... (shouldn't be any!) */
1441 while (*pToken & 0x80000000) {
1442 FIXME("unrecognized opcode: %08lx\n", *pToken);
1443 ++pToken;
1445 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1446 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1448 FIXME("Token %s requires greater functionality than Vertex_Progarm_ARB supports\n", curOpcode->name);
1449 pToken += curOpcode->num_params;
1450 } else {
1451 /* Build opcode for GL vertex_program */
1452 switch (curOpcode->opcode) {
1453 case D3DSIO_NOP:
1454 continue;
1455 case D3DSIO_MOV:
1456 /* Address registers must be loaded with the ARL instruction */
1457 if ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) == D3DSPR_ADDR) {
1458 if (((*pToken) & 0x00001FFF) < nUseAddressRegister) {
1459 strcpy(tmpLine, "ARL");
1460 break;
1461 } else
1462 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This, ((*pToken) & 0x00001FFF));
1464 /* fall through */
1465 case D3DSIO_ADD:
1466 case D3DSIO_SUB:
1467 case D3DSIO_MAD:
1468 case D3DSIO_MUL:
1469 case D3DSIO_RCP:
1470 case D3DSIO_RSQ:
1471 case D3DSIO_DP3:
1472 case D3DSIO_DP4:
1473 case D3DSIO_MIN:
1474 case D3DSIO_MAX:
1475 case D3DSIO_SLT:
1476 case D3DSIO_SGE:
1477 case D3DSIO_LIT:
1478 case D3DSIO_DST:
1479 case D3DSIO_FRC:
1480 case D3DSIO_EXPP:
1481 case D3DSIO_LOGP:
1482 case D3DSIO_EXP:
1483 case D3DSIO_LOG:
1484 strcpy(tmpLine, curOpcode->glname);
1485 break;
1486 case D3DSIO_M4x4:
1487 case D3DSIO_M4x3:
1488 case D3DSIO_M3x4:
1489 case D3DSIO_M3x3:
1490 case D3DSIO_M3x2:
1491 /* Expand the macro and get nusprintf(tmpLine,mber of generated instruction */
1492 nRemInstr = ExpandMxMacro(curOpcode->opcode, pToken);
1493 /* Save point to next instruction */
1494 pSavedToken = pToken + 3;
1495 /* Execute expanded macro */
1496 pToken = MacroExpansion;
1497 continue;
1498 /* dcl and def are handeled in the first pass */
1499 case D3DSIO_DCL:
1500 if (This->namedArrays) {
1501 const char* attribName = "undefined";
1502 switch(*pToken & 0xFFFF) {
1503 case D3DDECLUSAGE_POSITION:
1504 attribName = "vertex.position";
1505 break;
1506 case D3DDECLUSAGE_BLENDINDICES:
1507 /* not supported by openGL */
1508 attribName = "vertex.blend";
1509 break;
1510 case D3DDECLUSAGE_BLENDWEIGHT:
1511 attribName = "vertex.weight";
1512 break;
1513 case D3DDECLUSAGE_NORMAL:
1514 attribName = "vertex.normal";
1515 break;
1516 case D3DDECLUSAGE_PSIZE:
1517 attribName = "vertex.psize";
1518 break;
1519 case D3DDECLUSAGE_COLOR:
1520 if((*pToken & 0xF0000) >> 16 == 0) {
1521 attribName = "vertex.color";
1522 } else {
1523 attribName = "vertex.color.secondary";
1525 break;
1526 case D3DDECLUSAGE_TEXCOORD:
1528 char tmpChar[100];
1529 tmpChar[0] = 0;
1530 sprintf(tmpChar,"vertex.texcoord[%lu]",(*pToken & 0xF0000) >> 16);
1531 attribName = tmpChar;
1532 break;
1534 /* The following aren't directly supported by openGL, so shouldn't come up using namedarrays. */
1535 case D3DDECLUSAGE_TANGENT:
1536 attribName = "vertex.tangent";
1537 break;
1538 case D3DDECLUSAGE_BINORMAL:
1539 attribName = "vertex.binormal";
1540 break;
1541 case D3DDECLUSAGE_TESSFACTOR:
1542 attribName = "vertex.tessfactor";
1543 break;
1544 case D3DDECLUSAGE_POSITIONT:
1545 attribName = "vertex.possitionT";
1546 break;
1547 case D3DDECLUSAGE_FOG:
1548 attribName = "vertex.fogcoord";
1549 break;
1550 case D3DDECLUSAGE_DEPTH:
1551 attribName = "vertex.depth";
1552 break;
1553 case D3DDECLUSAGE_SAMPLE:
1554 attribName = "vertex.sample";
1555 break;
1556 default:
1557 FIXME("Unrecognised dcl %08lx", *pToken & 0xFFFF);
1560 char tmpChar[80];
1561 ++pToken;
1562 sprintf(tmpLine, "ATTRIB ");
1563 vshader_program_add_param(*pToken, 0, 0, tmpLine, This->namedArrays, This->constantsUsedBitmap);
1564 sprintf(tmpChar," = %s", attribName);
1565 strcat(tmpLine, tmpChar);
1566 strcat(tmpLine,";\n");
1567 ++lineNum;
1568 if (This->namedArrays) {
1569 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine);
1570 PNSTRCAT(pgmStr, tmpLine);
1572 } else {
1573 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine);
1576 } else {
1577 /* eat the token so it doesn't generate a warning */
1578 ++pToken;
1580 ++pToken;
1581 continue;
1582 case D3DSIO_DEF:
1584 char tmpChar[80];
1585 sprintf(tmpLine, "PARAM const%lu = {", *pToken & 0xFF);
1586 ++pToken;
1587 sprintf(tmpChar,"%f ,", *(float *)pToken);
1588 strcat(tmpLine, tmpChar);
1589 ++pToken;
1590 sprintf(tmpChar,"%f ,", *(float *)pToken);
1591 strcat(tmpLine, tmpChar);
1592 ++pToken;
1593 sprintf(tmpChar,"%f ,", *(float *)pToken);
1594 strcat(tmpLine, tmpChar);
1595 ++pToken;
1596 sprintf(tmpChar,"%f}", *(float *)pToken);
1597 strcat(tmpLine, tmpChar);
1599 strcat(tmpLine,";\n");
1600 ++lineNum;
1601 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1602 PNSTRCAT(pgmStr, tmpLine);
1604 ++pToken;
1605 continue;
1607 default:
1608 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1609 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1610 } else {
1611 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1614 if (curOpcode->num_params > 0) {
1615 vshader_program_add_param(*pToken, 0, 0, tmpLine, This->namedArrays, This->constantsUsedBitmap);
1617 ++pToken;
1618 for (i = 1; i < curOpcode->num_params; ++i) {
1619 strcat(tmpLine, ",");
1620 vshader_program_add_param(*pToken, 1, 0, tmpLine, This->namedArrays, This->constantsUsedBitmap);
1621 ++pToken;
1624 strcat(tmpLine,";\n");
1625 ++lineNum;
1626 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1627 PNSTRCAT(pgmStr, tmpLine);
1631 strcpy(tmpLine, "END\n");
1632 ++lineNum;
1633 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1634 PNSTRCAT(pgmStr, tmpLine);
1637 /* finally null terminate the pgmStr*/
1638 pgmStr[pgmLength] = 0;
1640 /* Check that Vertex Shaders are supported */
1641 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1642 /* Create the hw shader */
1643 /* TODO: change to resource.glObjectHandel or something like that */
1644 GL_EXTCALL(glGenProgramsARB(1, &This->prgId));
1645 TRACE("Creating a hw vertex shader, prg=%d\n", This->prgId);
1646 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->prgId));
1648 /* Create the program and check for errors */
1649 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr)/*pgmLength*/, pgmStr));
1650 if (glGetError() == GL_INVALID_OPERATION) {
1651 GLint errPos;
1652 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1653 FIXME("HW VertexShader Error at position: %d\n%s\n", errPos, glGetString(GL_PROGRAM_ERROR_STRING_ARB));
1654 This->prgId = -1;
1657 #if 1 /* if were using the data buffer of device then we don't need to free it */
1658 HeapFree(GetProcessHeap(), 0, pgmStr);
1659 #endif
1660 #undef PNSTRCAT
1663 BOOL IWineD3DVertexShaderImpl_ExecuteHAL(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
1665 * TODO: use the NV_vertex_program (or 1_1) extension
1666 * and specifics vendors (ARB_vertex_program??) variants for it
1668 return TRUE;
1671 HRESULT WINAPI IWineD3DVertexShaderImpl_ExecuteSW(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
1672 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1674 /** Vertex Shader Temporary Registers */
1675 WINED3DSHADERVECTOR R[12];
1676 /*D3DSHADERSCALAR A0;*/
1677 WINED3DSHADERVECTOR A[1];
1678 /** temporary Vector for modifier management */
1679 WINED3DSHADERVECTOR d;
1680 WINED3DSHADERVECTOR s[3];
1681 /** parser datas */
1682 const DWORD* pToken = This->function;
1683 const SHADER_OPCODE* curOpcode = NULL;
1684 /** functions parameters */
1685 WINED3DSHADERVECTOR* p[4];
1686 WINED3DSHADERVECTOR* p_send[4];
1687 DWORD i;
1689 /** init temporary register */
1690 memset(R, 0, 12 * sizeof(WINED3DSHADERVECTOR));
1692 /* vshader_program_parse(vshader); */
1693 #if 0 /* Must not be 1 in cvs */
1694 TRACE("Input:\n");
1695 TRACE_VSVECTOR(This->data->C[0]);
1696 TRACE_VSVECTOR(This->data->C[1]);
1697 TRACE_VSVECTOR(This->data->C[2]);
1698 TRACE_VSVECTOR(This->data->C[3]);
1699 TRACE_VSVECTOR(This->data->C[4]);
1700 TRACE_VSVECTOR(This->data->C[5]);
1701 TRACE_VSVECTOR(This->data->C[6]);
1702 TRACE_VSVECTOR(This->data->C[7]);
1703 TRACE_VSVECTOR(This->data->C[8]);
1704 TRACE_VSVECTOR(This->data->C[64]);
1705 TRACE_VSVECTOR(input->V[D3DVSDE_POSITION]);
1706 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDWEIGHT]);
1707 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDINDICES]);
1708 TRACE_VSVECTOR(input->V[D3DVSDE_NORMAL]);
1709 TRACE_VSVECTOR(input->V[D3DVSDE_PSIZE]);
1710 TRACE_VSVECTOR(input->V[D3DVSDE_DIFFUSE]);
1711 TRACE_VSVECTOR(input->V[D3DVSDE_SPECULAR]);
1712 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD0]);
1713 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD1]);
1714 #endif
1716 TRACE_VSVECTOR(vshader->data->C[64]);
1717 /* TODO: Run through all the tokens and find and labels, if, endifs, loops etc...., and make a labels list */
1719 /* the first dword is the version tag */
1720 /* TODO: parse it */
1722 if (vshader_is_version_token(*pToken)) { /** version */
1723 ++pToken;
1725 while (D3DVS_END() != *pToken) {
1726 if (vshader_is_comment_token(*pToken)) { /** comment */
1727 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1728 ++pToken;
1729 pToken += comment_len;
1730 continue ;
1732 curOpcode = vshader_program_get_opcode(*pToken);
1733 ++pToken;
1734 if (NULL == curOpcode) {
1735 i = 0;
1736 /* unknown current opcode ... */
1737 /* TODO: Think of a name for 0x80000000 and replace its use with a constant */
1738 while (*pToken & 0x80000000) {
1739 if (i == 0) {
1740 FIXME("unrecognized opcode: pos=%d token=%08lX\n", (pToken - 1) - This->function, *(pToken - 1));
1742 FIXME("unrecognized opcode param: pos=%d token=%08lX what=", pToken - This->function, *pToken);
1743 vshader_program_dump_param(*pToken, i);
1744 TRACE("\n");
1745 ++i;
1746 ++pToken;
1748 /* return FALSE; */
1749 } else {
1750 if (curOpcode->num_params > 0) {
1751 /* TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken); */
1752 for (i = 0; i < curOpcode->num_params; ++i) {
1753 DWORD reg = pToken[i] & 0x00001FFF;
1754 DWORD regtype = ((pToken[i] & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
1756 switch (regtype << D3DSP_REGTYPE_SHIFT) {
1757 case D3DSPR_TEMP:
1758 /* TRACE("p[%d]=R[%d]\n", i, reg); */
1759 p[i] = &R[reg];
1760 break;
1761 case D3DSPR_INPUT:
1762 /* TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]); */
1763 p[i] = &input->V[reg];
1764 break;
1765 case D3DSPR_CONST:
1766 if (pToken[i] & D3DVS_ADDRMODE_RELATIVE) {
1767 p[i] = &This->data->C[(DWORD) A[0].x + reg];
1768 } else {
1769 p[i] = &This->data->C[reg];
1771 break;
1772 case D3DSPR_ADDR: /* case D3DSPR_TEXTURE: */
1773 if (0 != reg) {
1774 ERR("cannot handle address registers != a0, forcing use of a0\n");
1775 reg = 0;
1777 /* TRACE("p[%d]=A[%d]\n", i, reg); */
1778 p[i] = &A[reg];
1779 break;
1780 case D3DSPR_RASTOUT:
1781 switch (reg) {
1782 case D3DSRO_POSITION:
1783 p[i] = &output->oPos;
1784 break;
1785 case D3DSRO_FOG:
1786 p[i] = &output->oFog;
1787 break;
1788 case D3DSRO_POINT_SIZE:
1789 p[i] = &output->oPts;
1790 break;
1792 break;
1793 case D3DSPR_ATTROUT:
1794 /* TRACE("p[%d]=oD[%d]\n", i, reg); */
1795 p[i] = &output->oD[reg];
1796 break;
1797 case D3DSPR_TEXCRDOUT:
1798 /* TRACE("p[%d]=oT[%d]\n", i, reg); */
1799 p[i] = &output->oT[reg];
1800 break;
1801 /* TODO Decls and defs */
1802 #if 0
1803 case D3DSPR_DCL:
1804 case D3DSPR_DEF:
1805 #endif
1806 default:
1807 break;
1810 if (i > 0) { /* input reg */
1811 DWORD swizzle = (pToken[i] & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
1812 UINT isNegative = ((pToken[i] & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG);
1814 if (!isNegative && (D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) {
1815 /* TRACE("p[%d] not swizzled\n", i); */
1816 p_send[i] = p[i];
1817 } else {
1818 DWORD swizzle_x = swizzle & 0x03;
1819 DWORD swizzle_y = (swizzle >> 2) & 0x03;
1820 DWORD swizzle_z = (swizzle >> 4) & 0x03;
1821 DWORD swizzle_w = (swizzle >> 6) & 0x03;
1822 /* TRACE("p[%d] swizzled\n", i); */
1823 float* tt = (float*) p[i];
1824 s[i].x = (isNegative) ? -tt[swizzle_x] : tt[swizzle_x];
1825 s[i].y = (isNegative) ? -tt[swizzle_y] : tt[swizzle_y];
1826 s[i].z = (isNegative) ? -tt[swizzle_z] : tt[swizzle_z];
1827 s[i].w = (isNegative) ? -tt[swizzle_w] : tt[swizzle_w];
1828 p_send[i] = &s[i];
1830 } else { /* output reg */
1831 if ((pToken[i] & D3DSP_WRITEMASK_ALL) == D3DSP_WRITEMASK_ALL) {
1832 p_send[i] = p[i];
1833 } else {
1834 p_send[i] = &d; /* to be post-processed for modifiers management */
1840 switch (curOpcode->num_params) {
1841 case 0:
1842 curOpcode->soft_fct();
1843 break;
1844 case 1:
1845 curOpcode->soft_fct(p_send[0]);
1846 break;
1847 case 2:
1848 curOpcode->soft_fct(p_send[0], p_send[1]);
1849 break;
1850 case 3:
1851 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2]);
1852 break;
1853 case 4:
1854 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3]);
1855 break;
1856 case 5:
1857 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4]);
1858 break;
1859 case 6:
1860 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4], p_send[5]);
1861 break;
1862 default:
1863 ERR("%s too many params: %u\n", curOpcode->name, curOpcode->num_params);
1866 /* check if output reg modifier post-process */
1867 if (curOpcode->num_params > 0 && (pToken[0] & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1868 if (pToken[0] & D3DSP_WRITEMASK_0) p[0]->x = d.x;
1869 if (pToken[0] & D3DSP_WRITEMASK_1) p[0]->y = d.y;
1870 if (pToken[0] & D3DSP_WRITEMASK_2) p[0]->z = d.z;
1871 if (pToken[0] & D3DSP_WRITEMASK_3) p[0]->w = d.w;
1873 #if 0
1874 TRACE_VSVECTOR(output->oPos);
1875 TRACE_VSVECTOR(output->oD[0]);
1876 TRACE_VSVECTOR(output->oD[1]);
1877 TRACE_VSVECTOR(output->oT[0]);
1878 TRACE_VSVECTOR(output->oT[1]);
1879 TRACE_VSVECTOR(R[0]);
1880 TRACE_VSVECTOR(R[1]);
1881 TRACE_VSVECTOR(R[2]);
1882 TRACE_VSVECTOR(R[3]);
1883 TRACE_VSVECTOR(R[4]);
1884 TRACE_VSVECTOR(R[5]);
1885 #endif
1887 /* to next opcode token */
1888 pToken += curOpcode->num_params;
1890 #if 0
1891 TRACE("End of current instruction:\n");
1892 TRACE_VSVECTOR(output->oPos);
1893 TRACE_VSVECTOR(output->oD[0]);
1894 TRACE_VSVECTOR(output->oD[1]);
1895 TRACE_VSVECTOR(output->oT[0]);
1896 TRACE_VSVECTOR(output->oT[1]);
1897 TRACE_VSVECTOR(R[0]);
1898 TRACE_VSVECTOR(R[1]);
1899 TRACE_VSVECTOR(R[2]);
1900 TRACE_VSVECTOR(R[3]);
1901 TRACE_VSVECTOR(R[4]);
1902 TRACE_VSVECTOR(R[5]);
1903 #endif
1905 #if 0 /* Must not be 1 in cvs */
1906 TRACE("Output:\n");
1907 TRACE_VSVECTOR(output->oPos);
1908 TRACE_VSVECTOR(output->oD[0]);
1909 TRACE_VSVECTOR(output->oD[1]);
1910 TRACE_VSVECTOR(output->oT[0]);
1911 TRACE_VSVECTOR(output->oT[1]);
1912 #endif
1913 return D3D_OK;
1916 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantF(IWineD3DVertexShader *iface, UINT StartRegister, CONST FLOAT *pConstantData, UINT Vector4fCount) {
1917 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1918 FIXME("(%p) : stub\n", This);
1919 return D3D_OK;
1922 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantF(IWineD3DVertexShader *iface, UINT StartRegister, FLOAT *pConstantData, UINT Vector4fCount) {
1923 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1924 FIXME("(%p) : stub\n", This);
1925 return D3D_OK;
1928 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantI(IWineD3DVertexShader *iface, UINT StartRegister, CONST int *pConstantData, UINT Vector4iCount) {
1929 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1930 if (StartRegister + Vector4iCount > WINED3D_VSHADER_MAX_CONSTANTS) {
1931 ERR("(%p) : SetVertexShaderConstantI C[%u] invalid\n", This, StartRegister);
1932 return D3DERR_INVALIDCALL;
1934 if (NULL == pConstantData) {
1935 return D3DERR_INVALIDCALL;
1937 FIXME("(%p) : stub\n", This);
1938 return D3D_OK;
1941 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantI(IWineD3DVertexShader *iface, UINT StartRegister, int *pConstantData, UINT Vector4iCount) {
1942 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1943 TRACE("(%p) : C[%u] count=%u\n", This, StartRegister, Vector4iCount);
1944 if (StartRegister + Vector4iCount > WINED3D_VSHADER_MAX_CONSTANTS) {
1945 return D3DERR_INVALIDCALL;
1947 if (NULL == pConstantData) {
1948 return D3DERR_INVALIDCALL;
1950 FIXME("(%p) : stub\n", This);
1951 return D3D_OK;
1954 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantB(IWineD3DVertexShader *iface, UINT StartRegister, CONST BOOL *pConstantData, UINT BoolCount) {
1955 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1956 if (StartRegister + BoolCount > WINED3D_VSHADER_MAX_CONSTANTS) {
1957 ERR("(%p) : SetVertexShaderConstantB C[%u] invalid\n", This, StartRegister);
1958 return D3DERR_INVALIDCALL;
1960 if (NULL == pConstantData) {
1961 return D3DERR_INVALIDCALL;
1963 FIXME("(%p) : stub\n", This);
1964 return D3D_OK;
1967 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantB(IWineD3DVertexShader *iface, UINT StartRegister, BOOL *pConstantData, UINT BoolCount) {
1968 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl *)iface;
1969 FIXME("(%p) : stub\n", This);
1970 return D3D_OK;
1973 #endif
1975 /* *******************************************
1976 IWineD3DVertexShader IUnknown parts follow
1977 ******************************************* */
1978 HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
1980 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1981 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
1982 if (IsEqualGUID(riid, &IID_IUnknown)
1983 || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
1984 IUnknown_AddRef(iface);
1985 *ppobj = This;
1986 return D3D_OK;
1988 return E_NOINTERFACE;
1991 ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
1992 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1993 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
1994 return InterlockedIncrement(&This->ref);
1997 ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
1998 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1999 ULONG ref;
2000 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
2001 ref = InterlockedDecrement(&This->ref);
2002 if (ref == 0) {
2003 HeapFree(GetProcessHeap(), 0, This);
2005 return ref;
2008 /* *******************************************
2009 IWineD3DVertexShader IWineD3DVertexShader parts follow
2010 ******************************************* */
2012 HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
2013 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2015 *parent = (IUnknown*)This->parent;
2016 IUnknown_AddRef(*parent);
2017 TRACE("(%p) : returning %p\n", This, *parent);
2018 return D3D_OK;
2021 HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
2022 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2023 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
2024 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
2025 TRACE("(%p) returning %p\n", This, *pDevice);
2026 return D3D_OK;
2029 HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
2030 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
2031 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
2033 if (NULL == pData) {
2034 *pSizeOfData = This->functionLength;
2035 return D3D_OK;
2037 if (*pSizeOfData < This->functionLength) {
2038 *pSizeOfData = This->functionLength;
2039 return D3DERR_MOREDATA;
2041 if (NULL == This->function) { /* no function defined */
2042 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
2043 (*(DWORD **) pData) = NULL;
2044 } else {
2045 if(This->functionLength == 0){
2048 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
2049 memcpy(pData, This->function, This->functionLength);
2051 return D3D_OK;
2054 HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
2055 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
2056 const DWORD* pToken = pFunction;
2057 const SHADER_OPCODE* curOpcode = NULL;
2058 DWORD len = 0;
2059 DWORD i;
2060 TRACE("(%p) : Parsing programme\n", This);
2062 if (NULL != pToken) {
2063 while (D3DVS_END() != *pToken) {
2064 if (vshader_is_version_token(*pToken)) { /** version */
2065 TRACE("vs_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
2066 ++pToken;
2067 ++len;
2068 continue;
2070 if (vshader_is_comment_token(*pToken)) { /** comment */
2071 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2072 ++pToken;
2073 TRACE("//%s\n", (char*)pToken);
2074 pToken += comment_len;
2075 len += comment_len + 1;
2076 continue;
2078 curOpcode = vshader_program_get_opcode(*pToken);
2079 ++pToken;
2080 ++len;
2081 if (NULL == curOpcode) {
2082 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
2083 while (*pToken & 0x80000000) {
2084 /* unknown current opcode ... */
2085 FIXME("unrecognized opcode: %08lx", *pToken);
2086 ++pToken;
2087 ++len;
2088 TRACE("\n");
2091 } else {
2092 if (curOpcode->opcode == D3DSIO_DCL) {
2093 TRACE("dcl_");
2094 switch(*pToken & 0xFFFF) {
2095 case D3DDECLUSAGE_POSITION:
2096 TRACE("%s%ld ", "position",(*pToken & 0xF0000) >> 16);
2097 break;
2098 case D3DDECLUSAGE_BLENDINDICES:
2099 TRACE("%s ", "blend");
2100 break;
2101 case D3DDECLUSAGE_BLENDWEIGHT:
2102 TRACE("%s ", "weight");
2103 break;
2104 case D3DDECLUSAGE_NORMAL:
2105 TRACE("%s%ld ", "normal",(*pToken & 0xF0000) >> 16);
2106 break;
2107 case D3DDECLUSAGE_PSIZE:
2108 TRACE("%s ", "psize");
2109 break;
2110 case D3DDECLUSAGE_COLOR:
2111 if((*pToken & 0xF0000) >> 16 == 0) {
2112 TRACE("%s ", "color");
2113 } else {
2114 TRACE("%s ", "specular");
2116 break;
2117 case D3DDECLUSAGE_TEXCOORD:
2118 TRACE("%s%ld ", "texture", (*pToken & 0xF0000) >> 16);
2119 break;
2120 case D3DDECLUSAGE_TANGENT:
2121 TRACE("%s ", "tangent");
2122 break;
2123 case D3DDECLUSAGE_BINORMAL:
2124 TRACE("%s ", "binormal");
2125 break;
2126 case D3DDECLUSAGE_TESSFACTOR:
2127 TRACE("%s ", "tessfactor");
2128 break;
2129 case D3DDECLUSAGE_POSITIONT:
2130 TRACE("%s%ld ", "positionT",(*pToken & 0xF0000) >> 16);
2131 break;
2132 case D3DDECLUSAGE_FOG:
2133 TRACE("%s ", "fog");
2134 break;
2135 case D3DDECLUSAGE_DEPTH:
2136 TRACE("%s ", "depth");
2137 break;
2138 case D3DDECLUSAGE_SAMPLE:
2139 TRACE("%s ", "sample");
2140 break;
2141 default:
2142 FIXME("Unrecognised dcl %08lx", *pToken & 0xFFFF);
2144 ++pToken;
2145 ++len;
2146 vshader_program_dump_vs_param(*pToken, 0);
2147 ++pToken;
2148 ++len;
2149 } else
2150 if (curOpcode->opcode == D3DSIO_DEF) {
2151 TRACE("def c%lu = ", *pToken & 0xFF);
2152 ++pToken;
2153 ++len;
2154 TRACE("%f ,", *(float *)pToken);
2155 ++pToken;
2156 ++len;
2157 TRACE("%f ,", *(float *)pToken);
2158 ++pToken;
2159 ++len;
2160 TRACE("%f ,", *(float *)pToken);
2161 ++pToken;
2162 ++len;
2163 TRACE("%f", *(float *)pToken);
2164 ++pToken;
2165 ++len;
2166 } else {
2167 TRACE("%s ", curOpcode->name);
2168 if (curOpcode->num_params > 0) {
2169 vshader_program_dump_vs_param(*pToken, 0);
2170 ++pToken;
2171 ++len;
2172 for (i = 1; i < curOpcode->num_params; ++i) {
2173 TRACE(", ");
2174 vshader_program_dump_vs_param(*pToken, 1);
2175 ++pToken;
2176 ++len;
2180 TRACE("\n");
2183 This->functionLength = (len + 1) * sizeof(DWORD);
2184 } else {
2185 This->functionLength = 1; /* no Function defined use fixed function vertex processing */
2188 /* Generate HW shader in needed */
2189 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
2190 #if 1
2191 IWineD3DVertexShaderImpl_GenerateProgramArbHW(iface, pFunction);
2192 #endif
2195 /* copy the function ... because it will certainly be released by application */
2196 if (NULL != pFunction) {
2197 This->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->functionLength);
2198 memcpy((void *)This->function, pFunction, This->functionLength);
2199 } else {
2200 This->function = NULL;
2202 return D3D_OK;
2205 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
2207 /*** IUnknown methods ***/
2208 IWineD3DVertexShaderImpl_QueryInterface,
2209 IWineD3DVertexShaderImpl_AddRef,
2210 IWineD3DVertexShaderImpl_Release,
2211 /*** IWineD3DVertexShader methods ***/
2212 IWineD3DVertexShaderImpl_GetParent,
2213 IWineD3DVertexShaderImpl_GetDevice,
2214 IWineD3DVertexShaderImpl_GetFunction,
2215 IWineD3DVertexShaderImpl_SetFunction