wined3d: Move the SHADER_OPCODE definition to wined3d_private.h.
[wine/multimedia.git] / dlls / wined3d / vertexshader.c
blobcfa8f2e726ed81831d2de7831dd2f858522698e1
1 /*
2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2005 Oliver Stieber
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "config.h"
25 #include <math.h>
26 #include <stdio.h>
28 #include "wined3d_private.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
32 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
34 /* Shader debugging - Change the following line to enable debugging of software
35 vertex shaders */
36 #if 0 /* Musxt not be 1 in cvs version */
37 # define VSTRACE(A) TRACE A
38 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
39 #else
40 # define VSTRACE(A)
41 # define TRACE_VSVECTOR(name)
42 #endif
44 #if 1 /* FIXME : Needs sorting when vshader code moved in properly */
46 /**
47 * DirectX9 SDK download
48 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
50 * Exploring D3DX
51 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
53 * Using Vertex Shaders
54 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
56 * Dx9 New
57 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
59 * Dx9 Shaders
60 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
63 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
65 * Dx9 D3DX
66 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
68 * FVF
69 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
71 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
72 * http://developer.nvidia.com/view.asp?IO=vstovp
74 * NVIDIA: Memory Management with VAR
75 * http://developer.nvidia.com/view.asp?IO=var_memory_management
78 /* TODO: Vertex and Pixel shaders are almost identicle, the only exception being the way that some of the data is looked up or the availablity of some of the data i.e. some instructions are only valid for pshaders and some for vshaders
79 because of this the bulk of the software pipeline can be shared between pixel and vertex shaders... and it wouldn't supprise me if the programes can be cross compiled using a large body body shared code */
81 #define GLNAME_REQUIRE_GLSL ((const char *)1)
83 /*******************************
84 * vshader functions software VM
87 void vshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
88 d->x = s0->x + s1->x;
89 d->y = s0->y + s1->y;
90 d->z = s0->z + s1->z;
91 d->w = s0->w + s1->w;
92 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
93 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
96 void vshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
97 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
98 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
99 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
102 void vshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
103 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
104 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
105 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
108 void vshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
109 d->x = 1.0f;
110 d->y = s0->y * s1->y;
111 d->z = s0->z;
112 d->w = s1->w;
113 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
114 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
117 void vshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
118 union {
119 float f;
120 DWORD d;
121 } tmp;
123 tmp.f = floorf(s0->w);
124 d->x = powf(2.0f, tmp.f);
125 d->y = s0->w - tmp.f;
126 tmp.f = powf(2.0f, s0->w);
127 tmp.d &= 0xFFFFFF00U;
128 d->z = tmp.f;
129 d->w = 1.0f;
130 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
131 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
134 void vshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
135 d->x = 1.0f;
136 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
137 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
138 d->w = 1.0f;
139 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
140 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
143 void vshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
144 float tmp_f = fabsf(s0->w);
145 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
146 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
147 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
150 void vshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
151 d->x = s0->x * s1->x + s2->x;
152 d->y = s0->y * s1->y + s2->y;
153 d->z = s0->z * s1->z + s2->z;
154 d->w = s0->w * s1->w + s2->w;
155 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
156 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
159 void vshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
160 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
161 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
162 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
163 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
164 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
165 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
168 void vshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
169 d->x = (s0->x < s1->x) ? s0->x : s1->x;
170 d->y = (s0->y < s1->y) ? s0->y : s1->y;
171 d->z = (s0->z < s1->z) ? s0->z : s1->z;
172 d->w = (s0->w < s1->w) ? s0->w : s1->w;
173 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
174 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
177 void vshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
178 d->x = s0->x;
179 d->y = s0->y;
180 d->z = s0->z;
181 d->w = s0->w;
182 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
183 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
186 void vshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
187 d->x = s0->x * s1->x;
188 d->y = s0->y * s1->y;
189 d->z = s0->z * s1->z;
190 d->w = s0->w * s1->w;
191 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
192 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
195 void vshader_nop(void) {
196 /* NOPPPP ahhh too easy ;) */
197 VSTRACE(("executing nop\n"));
200 void vshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
201 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
202 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
203 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
206 void vshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
207 float tmp_f = fabsf(s0->w);
208 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
209 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
210 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
213 void vshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
214 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
215 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
216 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
217 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
218 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
219 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
222 void vshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
223 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
224 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
225 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
226 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
227 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
228 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
231 void vshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
232 d->x = s0->x - s1->x;
233 d->y = s0->y - s1->y;
234 d->z = s0->z - s1->z;
235 d->w = s0->w - s1->w;
236 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
237 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
241 * Version 1.1 specific
244 void vshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
245 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
246 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
247 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
250 void vshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
251 float tmp_f = fabsf(s0->w);
252 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
253 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
254 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
257 void vshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
258 d->x = s0->x - floorf(s0->x);
259 d->y = s0->y - floorf(s0->y);
260 d->z = 0.0f;
261 d->w = 1.0f;
262 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
263 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
266 typedef FLOAT D3DMATRIX44[4][4];
267 typedef FLOAT D3DMATRIX43[4][3];
268 typedef FLOAT D3DMATRIX34[3][4];
269 typedef FLOAT D3DMATRIX33[3][3];
270 typedef FLOAT D3DMATRIX23[2][3];
272 void vshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
274 * Buggy CODE: here only if cast not work for copy/paste
275 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
276 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
277 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
278 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
279 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
280 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
281 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
283 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
284 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
285 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
286 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
287 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
288 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
289 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
290 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
293 void vshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
294 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
295 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
296 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
297 d->w = 1.0f;
298 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
299 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
300 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
301 VSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
304 void vshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
305 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
306 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
307 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
308 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
309 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
310 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
311 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
312 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
315 void vshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
316 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
317 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
318 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
319 d->w = 1.0f;
320 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
321 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
322 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
323 VSTRACE(("executing m3x3(4): (%f) \n", d->w));
326 void vshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
327 FIXME("check\n");
328 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
329 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
330 d->z = 0.0f;
331 d->w = 1.0f;
335 * Version 2.0 specific
337 void vshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
338 d->x = s0->x * (s1->x - s2->x) + s2->x;
339 d->y = s0->y * (s1->y - s2->y) + s2->y;
340 d->z = s0->z * (s1->z - s2->z) + s2->z;
341 d->w = s0->w * (s1->w - s2->w) + s2->w;
344 void vshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
345 d->x = s0->y * s1->z - s0->z * s1->y;
346 d->y = s0->z * s1->x - s0->x * s1->z;
347 d->z = s0->x * s1->y - s0->y * s1->x;
348 d->w = 0.9f; /* w is undefined, so set it to something safeish */
350 VSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
351 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
354 void vshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
356 d->x = fabsf(s0->x);
357 d->y = fabsf(s0->y);
358 d->z = fabsf(s0->z);
359 d->w = fabsf(s0->w);
360 VSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
361 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
364 /* Stubs */
365 void vshader_texcoord(WINED3DSHADERVECTOR* d) {
366 FIXME(" : Stub\n");
369 void vshader_texkill(WINED3DSHADERVECTOR* d) {
370 FIXME(" : Stub\n");
373 void vshader_tex(WINED3DSHADERVECTOR* d) {
374 FIXME(" : Stub\n");
376 void vshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
377 FIXME(" : Stub\n");
380 void vshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
381 FIXME(" : Stub\n");
384 void vshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
385 FIXME(" : Stub\n");
388 void vshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
389 FIXME(" : Stub\n");
392 void vshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
393 FIXME(" : Stub\n");
396 void vshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
397 FIXME(" : Stub\n");
400 void vshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
401 FIXME(" : Stub\n");
404 void vshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
405 FIXME(" : Stub\n");
408 void vshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
409 FIXME(" : Stub\n");
412 void vshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
413 FIXME(" : Stub\n");
416 void vshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
417 FIXME(" : Stub\n");
420 void vshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
421 FIXME(" : Stub\n");
424 void vshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
425 FIXME(" : Stub\n");
428 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
429 void vshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
430 FIXME(" : Stub\n");
433 void vshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
434 FIXME(" : Stub\n");
437 void vshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
438 FIXME(" : Stub\n");
441 void vshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
442 FIXME(" : Stub\n");
445 void vshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
446 FIXME(" : Stub\n");
449 void vshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
450 FIXME(" : Stub\n");
453 void vshader_texdepth(WINED3DSHADERVECTOR* d) {
454 FIXME(" : Stub\n");
457 void vshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
458 FIXME(" : Stub\n");
461 void vshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
462 FIXME(" : Stub\n");
465 void vshader_call(WINED3DSHADERVECTOR* d) {
466 FIXME(" : Stub\n");
469 void vshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
470 FIXME(" : Stub\n");
473 void vshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
474 FIXME(" : Stub\n");
477 void vshader_ret(WINED3DSHADERVECTOR* d) {
478 FIXME(" : Stub\n");
481 void vshader_endloop(WINED3DSHADERVECTOR* d) {
482 FIXME(" : Stub\n");
485 void vshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
486 FIXME(" : Stub\n");
489 void vshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
490 FIXME(" : Stub\n");
493 void vshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
494 FIXME(" : Stub\n");
497 void vshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
498 FIXME(" : Stub\n");
501 void vshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
502 FIXME(" : Stub\n");
505 void vshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
506 FIXME(" : Stub\n");
509 void vshader_endrep(void) {
510 FIXME(" : Stub\n");
513 void vshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
514 FIXME(" : Stub\n");
517 void vshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
518 FIXME(" : Stub\n");
521 void vshader_else(WINED3DSHADERVECTOR* d) {
522 FIXME(" : Stub\n");
525 void vshader_label(WINED3DSHADERVECTOR* d) {
526 FIXME(" : Stub\n");
529 void vshader_endif(WINED3DSHADERVECTOR* d) {
530 FIXME(" : Stub\n");
533 void vshader_break(WINED3DSHADERVECTOR* d) {
534 FIXME(" : Stub\n");
537 void vshader_breakc(WINED3DSHADERVECTOR* d) {
538 FIXME(" : Stub\n");
541 void vshader_mova(WINED3DSHADERVECTOR* d) {
542 FIXME(" : Stub\n");
545 void vshader_defb(WINED3DSHADERVECTOR* d) {
546 FIXME(" : Stub\n");
549 void vshader_defi(WINED3DSHADERVECTOR* d) {
550 FIXME(" : Stub\n");
553 void vshader_dp2add(WINED3DSHADERVECTOR* d) {
554 FIXME(" : Stub\n");
557 void vshader_dsx(WINED3DSHADERVECTOR* d) {
558 FIXME(" : Stub\n");
561 void vshader_dsy(WINED3DSHADERVECTOR* d) {
562 FIXME(" : Stub\n");
565 void vshader_texldd(WINED3DSHADERVECTOR* d) {
566 FIXME(" : Stub\n");
569 void vshader_setp(WINED3DSHADERVECTOR* d) {
570 FIXME(" : Stub\n");
573 void vshader_texldl(WINED3DSHADERVECTOR* d) {
574 FIXME(" : Stub\n");
577 void vshader_breakp(WINED3DSHADERVECTOR* d) {
578 FIXME(" : Stub\n");
583 * log, exp, frc, m*x* seems to be macros ins ... to see
585 static CONST SHADER_OPCODE vshader_ins [] = {
586 {D3DSIO_NOP, "nop", "NOP", 0, vshader_nop, 0, 0},
587 {D3DSIO_MOV, "mov", "MOV", 2, vshader_mov, 0, 0},
588 {D3DSIO_ADD, "add", "ADD", 3, vshader_add, 0, 0},
589 {D3DSIO_SUB, "sub", "SUB", 3, vshader_sub, 0, 0},
590 {D3DSIO_MAD, "mad", "MAD", 4, vshader_mad, 0, 0},
591 {D3DSIO_MUL, "mul", "MUL", 3, vshader_mul, 0, 0},
592 {D3DSIO_RCP, "rcp", "RCP", 2, vshader_rcp, 0, 0},
593 {D3DSIO_RSQ, "rsq", "RSQ", 2, vshader_rsq, 0, 0},
594 {D3DSIO_DP3, "dp3", "DP3", 3, vshader_dp3, 0, 0},
595 {D3DSIO_DP4, "dp4", "DP4", 3, vshader_dp4, 0, 0},
596 {D3DSIO_MIN, "min", "MIN", 3, vshader_min, 0, 0},
597 {D3DSIO_MAX, "max", "MAX", 3, vshader_max, 0, 0},
598 {D3DSIO_SLT, "slt", "SLT", 3, vshader_slt, 0, 0},
599 {D3DSIO_SGE, "sge", "SGE", 3, vshader_sge, 0, 0},
600 {D3DSIO_ABS, "abs", "ABS", 2, vshader_abs, 0, 0},
601 {D3DSIO_EXP, "exp", "EX2", 2, vshader_exp, 0, 0},
602 {D3DSIO_LOG, "log", "LG2", 2, vshader_log, 0, 0},
603 {D3DSIO_LIT, "lit", "LIT", 2, vshader_lit, 0, 0},
604 {D3DSIO_DST, "dst", "DST", 3, vshader_dst, 0, 0},
605 {D3DSIO_LRP, "lrp", "LRP", 4, vshader_lrp, 0, 0},
606 {D3DSIO_FRC, "frc", "FRC", 2, vshader_frc, 0, 0},
607 {D3DSIO_M4x4, "m4x4", "undefined", 3, vshader_m4x4, 0, 0},
608 {D3DSIO_M4x3, "m4x3", "undefined", 3, vshader_m4x3, 0, 0},
609 {D3DSIO_M3x4, "m3x4", "undefined", 3, vshader_m3x4, 0, 0},
610 {D3DSIO_M3x3, "m3x3", "undefined", 3, vshader_m3x3, 0, 0},
611 {D3DSIO_M3x2, "m3x2", "undefined", 3, vshader_m3x2, 0, 0},
612 /** FIXME: use direct access so add the others opcodes as stubs */
613 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
614 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
615 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, vshader_call, 0, 0},
616 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, vshader_callnz, 0, 0},
617 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, vshader_loop, 0, 0},
618 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, vshader_ret, 0, 0},
619 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, vshader_endloop, 0, 0},
620 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, vshader_label, 0, 0},
621 /* DCL is a specil operation */
622 {D3DSIO_DCL, "dcl", NULL, 1, vshader_dcl, 0, 0},
623 {D3DSIO_POW, "pow", "POW", 3, vshader_pow, 0, 0},
624 {D3DSIO_CRS, "crs", "XPS", 3, vshader_crs, 0, 0},
625 /* TODO: sng can possibly be performed as
626 RCP tmp, vec
627 MUL out, tmp, vec*/
628 {D3DSIO_SGN, "sng", NULL, 2, vshader_sng, 0, 0},
629 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
630 DP3 tmp , vec, vec;
631 RSQ tmp, tmp.x;
632 MUL vec.xyz, vec, tmp;
633 but I think this is better because it accounts for w properly.
634 DP3 tmp , vec, vec;
635 RSQ tmp, tmp.x;
636 MUL vec, vec, tmp;
639 {D3DSIO_NRM, "nrm", NULL, 2, vshader_nrm, 0, 0},
640 {D3DSIO_SINCOS, "sincos", NULL, 2, vshader_sincos, 0, 0},
641 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, vshader_rep, 0, 0},
642 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, vshader_endrep, 0, 0},
643 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, vshader_if, 0, 0},
644 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, vshader_ifc, 0, 0},
645 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, vshader_else, 0, 0},
646 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, vshader_endif, 0, 0},
647 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, vshader_break, 0, 0},
648 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, vshader_breakc, 0, 0},
649 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, vshader_mova, 0, 0},
650 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, vshader_defb, 0, 0},
651 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, vshader_defi, 0, 0},
653 {D3DSIO_TEXCOORD, "texcoord", GLNAME_REQUIRE_GLSL, 1, vshader_texcoord, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
654 {D3DSIO_TEXCOORD, "texcrd", GLNAME_REQUIRE_GLSL, 2, vshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
655 {D3DSIO_TEXKILL, "texkill", GLNAME_REQUIRE_GLSL, 1, vshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
656 {D3DSIO_TEX, "tex", GLNAME_REQUIRE_GLSL, 1, vshader_tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
657 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 2, vshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
658 {D3DSIO_TEXBEM, "texbem", GLNAME_REQUIRE_GLSL, 2, vshader_texbem, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
659 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, vshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
660 {D3DSIO_TEXREG2AR,"texreg2ar",GLNAME_REQUIRE_GLSL, 2, vshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
661 {D3DSIO_TEXREG2GB,"texreg2gb",GLNAME_REQUIRE_GLSL, 2, vshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
662 {D3DSIO_TEXM3x2PAD, "texm3x2pad", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
663 {D3DSIO_TEXM3x2TEX, "texm3x2tex", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
664 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
665 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", GLNAME_REQUIRE_GLSL, 3, vshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
666 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
668 {D3DSIO_EXPP, "expp", "EXP", 2, vshader_expp, 0, 0},
669 {D3DSIO_LOGP, "logp", "LOG", 2, vshader_logp, 0, 0},
670 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, vshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
671 /* def is a special operation */
672 {D3DSIO_DEF, "def", NULL, 5, vshader_def, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
673 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, vshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
674 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, vshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
675 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
676 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, vshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
677 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, vshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
678 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, vshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
679 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, vshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
680 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, vshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
681 /* TODO: dp2add can be made out of multiple instuctions */
682 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, vshader_dp2add, 0, 0},
683 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, vshader_dsx, 0, 0},
684 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, vshader_dsy, 0, 0},
685 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, vshader_texldd, 0, 0},
686 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, vshader_setp, 0, 0},
687 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, vshader_texldl, 0, 0},
688 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, vshader_breakp, 0, 0},
689 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, vshader_nop, 0, 0},
690 {0, NULL, NULL, 0, NULL, 0, 0}
694 inline static const SHADER_OPCODE* vshader_program_get_opcode(const DWORD code) {
695 DWORD i = 0;
696 /** TODO: use dichotomic search or hash table */
697 while (NULL != vshader_ins[i].name) {
698 if ((code & D3DSI_OPCODE_MASK) == vshader_ins[i].opcode) {
699 return &vshader_ins[i];
701 ++i;
703 FIXME("Unsupported opcode %lx\n",code);
704 return NULL;
707 inline static void vshader_program_dump_param(const DWORD param, int input) {
708 static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" };
709 static const char swizzle_reg_chars[] = "xyzw";
711 DWORD reg = param & 0x00001FFF;
712 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
714 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) TRACE("-");
716 switch (regtype) {
717 case D3DSPR_TEMP:
718 TRACE("R[%lu]", reg);
719 break;
720 case D3DSPR_INPUT:
721 TRACE("v%lu", reg);
722 break;
723 case D3DSPR_CONST:
724 TRACE("C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
725 break;
726 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
727 TRACE("a[%lu]", reg);
728 break;
729 case D3DSPR_RASTOUT:
730 TRACE("%s", rastout_reg_names[reg]);
731 break;
732 case D3DSPR_ATTROUT:
733 TRACE("oD[%lu]", reg);
734 break;
735 case D3DSPR_TEXCRDOUT:
736 TRACE("oT[%lu]", reg);
737 break;
738 default:
739 FIXME("Unknown %lu %u reg %lu\n",regtype, D3DSPR_ATTROUT, reg);
740 break;
743 if (!input) {
744 /** operand output */
745 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
746 if (param & D3DSP_WRITEMASK_0) TRACE(".x");
747 if (param & D3DSP_WRITEMASK_1) TRACE(".y");
748 if (param & D3DSP_WRITEMASK_2) TRACE(".z");
749 if (param & D3DSP_WRITEMASK_3) TRACE(".w");
751 } else {
752 /** operand input */
753 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
754 DWORD swizzle_x = swizzle & 0x03;
755 DWORD swizzle_y = (swizzle >> 2) & 0x03;
756 DWORD swizzle_z = (swizzle >> 4) & 0x03;
757 DWORD swizzle_w = (swizzle >> 6) & 0x03;
759 * swizzle bits fields:
760 * WWZZYYXX
762 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
763 if (swizzle_x == swizzle_y &&
764 swizzle_x == swizzle_z &&
765 swizzle_x == swizzle_w) {
766 TRACE(".%c", swizzle_reg_chars[swizzle_x]);
767 } else {
768 TRACE(".%c%c%c%c",
769 swizzle_reg_chars[swizzle_x],
770 swizzle_reg_chars[swizzle_y],
771 swizzle_reg_chars[swizzle_z],
772 swizzle_reg_chars[swizzle_w]);
778 inline static void vshader_program_dump_vs_param(const DWORD param, int input) {
779 static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" };
780 static const char swizzle_reg_chars[] = "xyzw";
781 /* the unknown mask is for bits not yet accounted for by any other mask... */
782 #define UNKNOWN_MASK 0xC000
784 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
785 #define EXTENDED_REG 0x1800
787 DWORD reg = param & D3DSP_REGNUM_MASK; /* 0x00001FFF; isn't this D3DSP_REGNUM_MASK? */
788 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
790 if(param & UNKNOWN_MASK) { /* if this register has any of the unknown bits set then report them*/
791 FIXME("Unknown bits set regtype %lx , %lx, UK(%lx)\n", regtype, (param & EXTENDED_REG), param & UNKNOWN_MASK);
794 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) TRACE("-");
796 switch (regtype /*<< D3DSP_REGTYPE_SHIFT*/) {
797 case D3DSPR_TEMP:
798 TRACE("r%lu", reg);
799 break;
800 case D3DSPR_INPUT:
801 TRACE("v%lu", reg);
802 break;
803 case D3DSPR_CONST:
804 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
805 break;
806 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
807 TRACE("a%lu", reg);
808 break;
809 case D3DSPR_RASTOUT:
810 TRACE("%s", rastout_reg_names[reg]);
811 break;
812 case D3DSPR_ATTROUT:
813 TRACE("oD%lu", reg);
814 break;
815 case D3DSPR_TEXCRDOUT:
816 TRACE("oT%lu", reg);
817 break;
818 case D3DSPR_CONSTINT:
819 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
820 break;
821 case D3DSPR_CONSTBOOL:
822 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
823 break;
824 case D3DSPR_LABEL:
825 TRACE("l%lu", reg);
826 break;
827 case D3DSPR_LOOP:
828 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
829 break;
830 default:
831 FIXME("Unknown %lu reg %lu\n",regtype, reg);
832 break;
835 if (!input) {
836 /** operand output */
837 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
838 if (param & D3DSP_WRITEMASK_0) TRACE(".x");
839 if (param & D3DSP_WRITEMASK_1) TRACE(".y");
840 if (param & D3DSP_WRITEMASK_2) TRACE(".z");
841 if (param & D3DSP_WRITEMASK_3) TRACE(".w");
843 } else {
844 /** operand input */
845 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
846 DWORD swizzle_x = swizzle & 0x03;
847 DWORD swizzle_y = (swizzle >> 2) & 0x03;
848 DWORD swizzle_z = (swizzle >> 4) & 0x03;
849 DWORD swizzle_w = (swizzle >> 6) & 0x03;
851 * swizzle bits fields:
852 * WWZZYYXX
854 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
855 if (swizzle_x == swizzle_y &&
856 swizzle_x == swizzle_z &&
857 swizzle_x == swizzle_w) {
858 TRACE(".%c", swizzle_reg_chars[swizzle_x]);
859 } else {
860 TRACE(".%c%c%c%c",
861 swizzle_reg_chars[swizzle_x],
862 swizzle_reg_chars[swizzle_y],
863 swizzle_reg_chars[swizzle_z],
864 swizzle_reg_chars[swizzle_w]);
870 inline static void vshader_program_dump_decl_usage(IWineD3DVertexShaderImpl *This, DWORD token) {
871 TRACE("dcl_");
872 switch(token & 0xFFFF) {
873 case D3DDECLUSAGE_POSITION:
874 TRACE("%s%ld ", "position",(token & 0xF0000) >> 16);
875 break;
876 case D3DDECLUSAGE_BLENDINDICES:
877 TRACE("%s ", "blend");
878 break;
879 case D3DDECLUSAGE_BLENDWEIGHT:
880 TRACE("%s ", "weight");
881 break;
882 case D3DDECLUSAGE_NORMAL:
883 TRACE("%s%ld ", "normal",(token & 0xF0000) >> 16);
884 break;
885 case D3DDECLUSAGE_PSIZE:
886 TRACE("%s ", "psize");
887 break;
888 case D3DDECLUSAGE_COLOR:
889 if((token & 0xF0000) >> 16 == 0) {
890 TRACE("%s ", "color");
891 } else {
892 TRACE("%s ", "specular");
894 break;
895 case D3DDECLUSAGE_TEXCOORD:
896 TRACE("%s%ld ", "texture", (token & 0xF0000) >> 16);
897 break;
898 case D3DDECLUSAGE_TANGENT:
899 TRACE("%s ", "tangent");
900 break;
901 case D3DDECLUSAGE_BINORMAL:
902 TRACE("%s ", "binormal");
903 break;
904 case D3DDECLUSAGE_TESSFACTOR:
905 TRACE("%s ", "tessfactor");
906 break;
907 case D3DDECLUSAGE_POSITIONT:
908 TRACE("%s%ld ", "positionT",(token & 0xF0000) >> 16);
909 break;
910 case D3DDECLUSAGE_FOG:
911 TRACE("%s ", "fog");
912 break;
913 case D3DDECLUSAGE_DEPTH:
914 TRACE("%s ", "depth");
915 break;
916 case D3DDECLUSAGE_SAMPLE:
917 TRACE("%s ", "sample");
918 break;
919 default:
920 FIXME("Unrecognised dcl %08lx", token & 0xFFFF);
924 inline static BOOL vshader_is_version_token(DWORD token) {
925 return 0xFFFE0000 == (token & 0xFFFE0000);
928 inline static BOOL vshader_is_comment_token(DWORD token) {
929 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
932 inline static void vshader_program_add_output_param_swizzle(const DWORD param, int is_color, char *hwLine) {
933 /** operand output */
934 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
935 strcat(hwLine, ".");
936 if (param & D3DSP_WRITEMASK_0) { strcat(hwLine, "x"); }
937 if (param & D3DSP_WRITEMASK_1) { strcat(hwLine, "y"); }
938 if (param & D3DSP_WRITEMASK_2) { strcat(hwLine, "z"); }
939 if (param & D3DSP_WRITEMASK_3) { strcat(hwLine, "w"); }
943 inline static void vshader_program_add_input_param_swizzle(const DWORD param, int is_color, char *hwLine) {
944 static const char swizzle_reg_chars_color_fix[] = "zyxw";
945 static const char swizzle_reg_chars[] = "xyzw";
946 const char* swizzle_regs = NULL;
947 char tmpReg[255];
949 /** operand input */
950 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
951 DWORD swizzle_x = swizzle & 0x03;
952 DWORD swizzle_y = (swizzle >> 2) & 0x03;
953 DWORD swizzle_z = (swizzle >> 4) & 0x03;
954 DWORD swizzle_w = (swizzle >> 6) & 0x03;
956 if (is_color) {
957 swizzle_regs = swizzle_reg_chars_color_fix;
958 } else {
959 swizzle_regs = swizzle_reg_chars;
963 * swizzle bits fields:
964 * WWZZYYXX
966 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) { /* D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
967 if (is_color) {
968 sprintf(tmpReg, ".%c%c%c%c",
969 swizzle_regs[swizzle_x],
970 swizzle_regs[swizzle_y],
971 swizzle_regs[swizzle_z],
972 swizzle_regs[swizzle_w]);
973 strcat(hwLine, tmpReg);
975 return ;
977 if (swizzle_x == swizzle_y &&
978 swizzle_x == swizzle_z &&
979 swizzle_x == swizzle_w)
981 sprintf(tmpReg, ".%c", swizzle_regs[swizzle_x]);
982 strcat(hwLine, tmpReg);
983 } else {
984 sprintf(tmpReg, ".%c%c%c%c",
985 swizzle_regs[swizzle_x],
986 swizzle_regs[swizzle_y],
987 swizzle_regs[swizzle_z],
988 swizzle_regs[swizzle_w]);
989 strcat(hwLine, tmpReg);
993 inline static void vshader_program_add_param(IWineD3DVertexShaderImpl *This, const DWORD param, BOOL is_input, char *hwLine) {
994 /* oPos, oFog and oPts in D3D */
995 static const char* hwrastout_reg_names[] = { "result.position", "result.fogcoord", "result.pointsize" };
997 DWORD reg = param & 0x00001FFF;
998 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
999 char tmpReg[255];
1000 BOOL is_color = FALSE;
1002 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) {
1003 strcat(hwLine, " -");
1004 } else {
1005 strcat(hwLine, " ");
1008 switch (regtype) {
1009 case D3DSPR_TEMP:
1010 sprintf(tmpReg, "T%lu", reg);
1011 strcat(hwLine, tmpReg);
1012 break;
1013 case D3DSPR_INPUT:
1014 if (reg == This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DIFFUSE]
1015 || reg == This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SPECULAR]) {
1016 is_color = TRUE;
1018 /* if the attributes come in as named dcl's then use a named vertex (called namedVertexN) */
1019 if (This->namedArrays) {
1020 sprintf(tmpReg, "namedVertex%lu", reg);
1021 } else {
1022 /* otherwise the input is on a numbered attribute so use opengl numbered attributes */
1023 sprintf(tmpReg, "vertex.attrib[%lu]", reg);
1025 strcat(hwLine, tmpReg);
1026 break;
1027 case D3DSPR_CONST:
1028 /* FIXME: some constants are named so we need a constants map*/
1029 if (This->constantsUsedBitmap[reg] == VS_CONSTANT_CONSTANT) {
1030 if (param & D3DVS_ADDRMODE_RELATIVE) {
1031 FIXME("Relative addressing not expected for a named constant %lu\n", reg);
1033 sprintf(tmpReg, "const%lu", reg);
1034 } else {
1035 sprintf(tmpReg, "C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "A0.x + " : "", reg);
1037 strcat(hwLine, tmpReg);
1038 break;
1039 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
1040 sprintf(tmpReg, "A%lu", reg);
1041 strcat(hwLine, tmpReg);
1042 break;
1043 case D3DSPR_RASTOUT:
1044 sprintf(tmpReg, "%s", hwrastout_reg_names[reg]);
1045 strcat(hwLine, tmpReg);
1046 break;
1047 case D3DSPR_ATTROUT:
1048 if (reg==0) {
1049 strcat(hwLine, "result.color.primary");
1050 } else {
1051 strcat(hwLine, "result.color.secondary");
1053 break;
1054 case D3DSPR_TEXCRDOUT:
1055 sprintf(tmpReg, "result.texcoord[%lu]", reg);
1056 strcat(hwLine, tmpReg);
1057 break;
1058 default:
1059 FIXME("Unknown reg type %ld %ld\n", regtype, reg);
1060 break;
1063 if (!is_input) {
1064 vshader_program_add_output_param_swizzle(param, is_color, hwLine);
1065 } else {
1066 vshader_program_add_input_param_swizzle(param, is_color, hwLine);
1070 DWORD MacroExpansion[4*4];
1072 int ExpandMxMacro(DWORD macro_opcode, const DWORD* args) {
1073 int i;
1074 int nComponents = 0;
1075 DWORD opcode =0;
1076 switch(macro_opcode) {
1077 case D3DSIO_M4x4:
1078 nComponents = 4;
1079 opcode = D3DSIO_DP4;
1080 break;
1081 case D3DSIO_M4x3:
1082 nComponents = 3;
1083 opcode = D3DSIO_DP4;
1084 break;
1085 case D3DSIO_M3x4:
1086 nComponents = 4;
1087 opcode = D3DSIO_DP3;
1088 break;
1089 case D3DSIO_M3x3:
1090 nComponents = 3;
1091 opcode = D3DSIO_DP3;
1092 break;
1093 case D3DSIO_M3x2:
1094 nComponents = 2;
1095 opcode = D3DSIO_DP3;
1096 break;
1097 default:
1098 break;
1100 for (i = 0; i < nComponents; i++) {
1101 MacroExpansion[i*4+0] = opcode;
1102 MacroExpansion[i*4+1] = ((*args) & ~D3DSP_WRITEMASK_ALL)|(D3DSP_WRITEMASK_0<<i);
1103 MacroExpansion[i*4+2] = *(args+1);
1104 MacroExpansion[i*4+3] = (*(args+2))+i;
1106 return nComponents;
1109 static void parse_decl_usage(IWineD3DVertexShaderImpl *This, INT usage, INT arrayNo)
1111 switch(usage & 0xFFFF) {
1112 case D3DDECLUSAGE_POSITION:
1113 if((usage & 0xF0000) >> 16 == 0) { /* tween data */
1114 TRACE("Setting position to %d\n", arrayNo);
1115 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITION] = arrayNo;
1116 This->namedArrays = TRUE;
1117 } else {
1118 /* TODO: position indexes go from 0-8!!*/
1119 TRACE("Setting position 2 to %d because usage = %d\n", arrayNo, (usage & 0xF0000) >> 16);
1120 /* robots uses positions up to 8, the position arrays are just packed.*/
1121 if ((usage & 0xF0000) >> 16 > 1) {
1122 TRACE("Loaded for position %d (greater than 2)\n", (usage & 0xF0000) >> 16);
1124 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITION2 + ((usage & 0xF0000) >> 16) -1] = arrayNo;
1125 This->declaredArrays = TRUE;
1127 break;
1128 case D3DDECLUSAGE_BLENDINDICES:
1129 /* not supported by openGL */
1130 TRACE("Setting BLENDINDICES to %d\n", arrayNo);
1131 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_BLENDINDICES] = arrayNo;
1132 This->declaredArrays = TRUE;
1133 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended BLENDINDICES\n");
1134 break;
1135 case D3DDECLUSAGE_BLENDWEIGHT:
1136 TRACE("Setting BLENDWEIGHT to %d\n", arrayNo);
1137 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_BLENDWEIGHT] = arrayNo;
1138 This->namedArrays = TRUE;
1139 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended blend weights\n");
1140 break;
1141 case D3DDECLUSAGE_NORMAL:
1142 if((usage & 0xF0000) >> 16 == 0) { /* tween data */
1143 TRACE("Setting normal to %d\n", arrayNo);
1144 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_NORMAL] = arrayNo;
1145 This->namedArrays = TRUE;
1146 } else {
1147 TRACE("Setting normal 2 to %d because usage = %d\n", arrayNo, (usage & 0xF0000) >> 16);
1148 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_NORMAL2] = arrayNo;
1149 This->declaredArrays = TRUE;
1151 break;
1152 case D3DDECLUSAGE_PSIZE:
1153 TRACE("Setting PSIZE to %d\n", arrayNo);
1154 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_PSIZE] = arrayNo;
1155 This->namedArrays = TRUE;
1156 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended PSIZE\n");
1157 break;
1158 case D3DDECLUSAGE_COLOR:
1159 if((usage & 0xF0000) >> 16 == 0) {
1160 TRACE("Setting DIFFUSE to %d\n", arrayNo);
1161 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DIFFUSE] = arrayNo;
1162 This->namedArrays = TRUE;
1163 } else {
1164 TRACE("Setting SPECULAR to %d\n", arrayNo);
1165 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SPECULAR] = arrayNo;
1166 This->namedArrays = TRUE;
1168 break;
1169 case D3DDECLUSAGE_TEXCOORD:
1170 This->namedArrays = TRUE;
1171 /* only 7 texture coords have been designed for, so run a quick sanity check */
1172 if ((usage & 0xF0000) >> 16 > 7) {
1173 FIXME("(%p) : Program uses texture coordinate %d but only 0-7 have been implemented\n", This, (usage & 0xF0000) >> 16);
1174 } else {
1175 TRACE("Setting TEXCOORD %d to %d\n", ((usage & 0xF0000) >> 16), arrayNo);
1176 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_TEXCOORD0 + ((usage & 0xF0000) >> 16)] = arrayNo;
1178 break;
1179 /* The following aren't supported by openGL,
1180 if we get them then everything needs to be mapped to numbered attributes instead of named ones.
1181 this should be caught in the first pass */
1182 case D3DDECLUSAGE_TANGENT:
1183 TRACE("Setting TANGENT to %d\n", arrayNo);
1184 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_TANGENT] = arrayNo;
1185 This->declaredArrays = TRUE;
1186 break;
1187 case D3DDECLUSAGE_BINORMAL:
1188 TRACE("Setting BINORMAL to %d\n", arrayNo);
1189 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_BINORMAL] = arrayNo;
1190 This->declaredArrays = TRUE;
1191 break;
1192 case D3DDECLUSAGE_TESSFACTOR:
1193 TRACE("Setting TESSFACTOR to %d\n", arrayNo);
1194 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_TESSFACTOR] = arrayNo;
1195 This->declaredArrays = TRUE;
1196 break;
1197 case D3DDECLUSAGE_POSITIONT:
1198 if((usage & 0xF0000) >> 16 == 0) { /* tween data */
1199 FIXME("Setting positiont to %d\n", arrayNo);
1200 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITIONT] = arrayNo;
1201 This->namedArrays = TRUE;
1202 } else {
1203 FIXME("Setting positiont 2 to %d because usage = %d\n", arrayNo, (usage & 0xF0000) >> 16);
1204 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_POSITIONT2] = arrayNo;
1205 This->declaredArrays = TRUE;
1206 if ((usage & 0xF0000) >> 16 != 0) FIXME("Extended positiont\n");
1208 break;
1209 case D3DDECLUSAGE_FOG:
1210 /* supported by OpenGL */
1211 TRACE("Setting FOG to %d\n", arrayNo);
1212 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_FOG] = arrayNo;
1213 This->namedArrays = TRUE;
1214 break;
1215 case D3DDECLUSAGE_DEPTH:
1216 TRACE("Setting DEPTH to %d\n", arrayNo);
1217 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_DEPTH] = arrayNo;
1218 This->declaredArrays = TRUE;
1219 break;
1220 case D3DDECLUSAGE_SAMPLE:
1221 TRACE("Setting SAMPLE to %d\n", arrayNo);
1222 This->arrayUsageMap[WINED3DSHADERDECLUSAGE_SAMPLE] = arrayNo;
1223 This->declaredArrays = TRUE;
1224 break;
1225 default:
1226 FIXME("Unrecognised dcl %08x", usage & 0xFFFF);
1231 * Function parser ...
1234 inline static VOID IWineD3DVertexShaderImpl_GenerateProgramArbHW(IWineD3DVertexShader *iface, CONST DWORD* pFunction) {
1235 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1236 const DWORD* pToken = pFunction;
1237 const DWORD* pSavedToken = NULL;
1238 const SHADER_OPCODE* curOpcode = NULL;
1239 int nRemInstr = -1;
1240 DWORD i;
1241 unsigned lineNum = 0;
1242 char *pgmStr = NULL;
1243 char tmpLine[255];
1244 DWORD nUseAddressRegister = 0;
1245 DWORD nUseTempRegister = 0;
1246 DWORD regtype;
1247 DWORD reg;
1248 BOOL tmpsUsed[32];
1249 #if 0 /* TODO: loope register (just another address register ) */
1250 BOOL hasLoops = FALSE;
1251 #endif
1253 #define PGMSIZE 65535
1254 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
1255 int pgmLength = 0;
1257 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
1258 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
1259 if (This->device->fixupVertexBufferSize < PGMSIZE) {
1260 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
1261 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
1262 This->fixupVertexBufferSize = PGMSIZE;
1263 This->fixupVertexBuffer[0] = 0;
1265 pgmStr = This->device->fixupVertexBuffer;
1266 #endif
1267 #define PNSTRCAT(_pgmStr, _tmpLine) { \
1268 int _tmpLineLen = strlen(_tmpLine); \
1269 if(_tmpLineLen + pgmLength > PGMSIZE) { \
1270 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, _tmpLineLen + pgmLength); \
1271 } else { \
1272 memcpy(_pgmStr + pgmLength, _tmpLine, _tmpLineLen); \
1274 pgmLength += _tmpLineLen; \
1277 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, 65535); /* 64kb should be enough */
1278 /* Initialise the shader */
1279 This->namedArrays = FALSE;
1280 This->declaredArrays = FALSE;
1281 for (i = 0; i < WINED3DSHADERDECLUSAGE_MAX_USAGE; i++) {
1282 This->arrayUsageMap[i] = -1;
1284 /* set all the tmpsUsed to not used */
1285 memset(tmpsUsed, FALSE , sizeof(tmpsUsed));
1287 /* TODO: renumbering of attributes if the values are higher than the highest supported attribute but the total number of attributes is less than the highest supported attribute */
1288 This->highestConstant = -1;
1291 * First pass to determine what we need to declare:
1292 * - Temporary variables
1293 * - Address variables
1295 if (NULL != pToken) {
1296 while (D3DVS_END() != *pToken) {
1297 if (vshader_is_version_token(*pToken)) {
1298 /** skip version */
1299 ++pToken;
1300 continue;
1302 if (vshader_is_comment_token(*pToken)) { /** comment */
1303 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1304 ++pToken;
1305 pToken += comment_len;
1306 continue;
1308 curOpcode = vshader_program_get_opcode(*pToken);
1309 ++pToken;
1310 /* TODO: dcl's */
1311 /* TODO: Consts */
1313 if (NULL == curOpcode) {
1314 while (*pToken & 0x80000000) {
1315 FIXME("unrecognized opcode: %08lx\n", *pToken);
1316 /* skip unrecognized opcode */
1317 ++pToken;
1319 } else {
1320 if (curOpcode->opcode == D3DSIO_DCL){
1321 INT usage = *pToken++;
1322 INT arrayNo = (*pToken++ & 0x00001FFF);
1323 parse_decl_usage(This, usage, arrayNo);
1324 } else if(curOpcode->opcode == D3DSIO_DEF) {
1325 This->constantsUsedBitmap[*pToken & 0xFF] = VS_CONSTANT_CONSTANT;
1326 FIXME("Constant %ld\n", *pToken & 0xFF);
1327 ++pToken;
1328 ++pToken;
1329 ++pToken;
1330 ++pToken;
1331 ++pToken;
1333 } else {
1334 /* Check to see if and tmp or addressing redisters are used */
1335 if (curOpcode->num_params > 0) {
1336 regtype = ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT));
1337 reg = ((*pToken) & 0x00001FFF);
1338 if (D3DSPR_ADDR == regtype && nUseAddressRegister <= reg) nUseAddressRegister = reg + 1;
1339 if (D3DSPR_TEMP == regtype){
1340 tmpsUsed[reg] = TRUE;
1341 if(nUseTempRegister <= reg) nUseTempRegister = reg + 1;
1343 ++pToken;
1344 for (i = 1; i < curOpcode->num_params; ++i) {
1345 regtype = ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT));
1346 reg = ((*pToken) & 0x00001FFF);
1347 if (D3DSPR_ADDR == regtype && nUseAddressRegister <= reg) nUseAddressRegister = reg + 1;
1348 if (D3DSPR_TEMP == regtype){
1349 tmpsUsed[reg] = TRUE;
1350 if(nUseTempRegister <= reg) nUseTempRegister = reg + 1;
1352 ++pToken;
1356 #if 1 /* TODO: if the shaders uses calls or loops then we need to convert the shader into glsl */
1357 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1358 FIXME("This shader requires gl shader language support\n");
1359 #if 0
1360 This->shaderLanguage = GLSHADER_GLSL;
1361 #endif
1363 #endif
1367 #if 1
1368 #define VSHADER_ALWAYS_NUMBERED
1369 #endif
1371 #ifdef VSHADER_ALWAYS_NUMBERED /* handy for debugging using numbered arrays instead of named arrays */
1372 /* TODO: using numbered arrays for software shaders makes things easier */
1373 This->declaredArrays = TRUE;
1374 #endif
1376 /* named arrays and declared arrays are mutually exclusive */
1377 if (This->declaredArrays) {
1378 This->namedArrays = FALSE;
1380 /* TODO: validate
1381 nUseAddressRegister < = GL_MAX_PROGRAM_ADDRESS_REGISTERS_AR
1382 nUseTempRegister <= GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB
1385 /** second pass, now generate */
1386 pToken = pFunction;
1388 if (NULL != pToken) {
1389 while (1) {
1390 tmpLine[0] = 0;
1391 if ((nRemInstr >= 0) && (--nRemInstr == -1))
1392 /* Macro is finished, continue normal path */
1393 pToken = pSavedToken;
1394 if (D3DVS_END() == *pToken)
1395 break;
1397 if (vshader_is_version_token(*pToken)) { /** version */
1398 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1399 int version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1400 int numTemps;
1401 int numConstants;
1403 TRACE("found version token vs.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1405 /* Each release of vertex shaders has had different numbers of temp registers */
1406 switch (version) {
1407 case 10:
1408 case 11: numTemps=12;
1409 numConstants=96;/* min(GL_LIMITS(constants),96) */
1410 strcpy(tmpLine, "!!ARBvp1.0\n");
1411 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1412 break;
1413 /* FIXME: if there are no calls or loops then use ARBvp1 otherwise use GLSL instead
1414 TODO: see if there are any operations in vs2/3 that aren't supported by ARBvp
1415 TODO: only map the maximum possible number of constants supported by openGL and not the maximum required by d3d (even better only map the used constants)*/
1416 case 20: numTemps=12; /* min(GL_LIMITS(temps),12) */
1417 numConstants=96; /* min(GL_LIMITS(constants),256) */
1418 strcpy(tmpLine, "!!ARBvp1.0\n");
1419 FIXME("No work done yet to support vs2.0 in hw\n");
1420 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1421 break;
1422 case 21: numTemps=12; /* min(GL_LIMITS(temps),12) */
1423 numConstants=96; /* min(GL_LIMITS(constants),256) */
1424 strcpy(tmpLine, "!!ARBvp1.0\n");
1425 FIXME("No work done yet to support vs2.1 in hw\n");
1426 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1427 break;
1428 case 30: numTemps=32; /* min(GL_LIMITS(temps),32) */
1429 numConstants=96;/* min(GL_LIMITS(constants),256) */
1430 strcpy(tmpLine, "!!ARBvp3.0\n");
1431 FIXME("No work done yet to support vs3.0 in hw\n");
1432 TRACE("GL HW (%u) : %s", pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1433 break;
1434 default:
1435 numTemps=12;/* min(GL_LIMITS(temps),12) */
1436 numConstants=96;/* min(GL_LIMITS(constants),96) */
1437 strcpy(tmpLine, "!!ARBvp1.0\n");
1438 FIXME("Unrecognized vertex shader version %d!\n", version);
1440 PNSTRCAT(pgmStr, tmpLine);
1442 ++lineNum;
1444 /* This should be a bitmap so that only temp registers that are used are declared. */
1445 for (i = 0; i < nUseTempRegister /* we should check numTemps here */ ; i++) {
1446 if (tmpsUsed[i]) { /* only write out the temps if they are actually in use */
1447 sprintf(tmpLine, "TEMP T%ld;\n", i);
1448 ++lineNum;
1449 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1450 PNSTRCAT(pgmStr, tmpLine);
1454 /* TODO: loop register counts as an address register */
1455 for (i = 0; i < nUseAddressRegister; i++) {
1456 sprintf(tmpLine, "ADDRESS A%ld;\n", i);
1457 ++lineNum;
1458 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1459 PNSTRCAT(pgmStr, tmpLine);
1462 /* Due to the dynamic constants binding mechanism, we need to declare
1463 * all the constants for relative addressing. */
1464 /* Mesa supports only 95 constants for VS1.X although we should have at least 96. */
1465 if (GL_VEND(MESA) || GL_VEND(WINE)) {
1466 numConstants = 95;
1468 /* FIXME: We should be counting the number of constants in the first pass and then validating that many are supported
1469 Looking at some of the shaders in use by applications we'd need to create a list of all used env variables
1471 sprintf(tmpLine, "PARAM C[%d] = { program.env[0..%d] };\n", numConstants, numConstants - 1);
1472 TRACE("GL HW (%u,%u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1473 PNSTRCAT(pgmStr, tmpLine);
1475 ++lineNum;
1477 ++pToken;
1478 continue;
1480 if (vshader_is_comment_token(*pToken)) { /** comment */
1481 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1482 ++pToken;
1483 FIXME("#%s\n", (char*)pToken);
1484 pToken += comment_len;
1485 continue;
1488 curOpcode = vshader_program_get_opcode(*pToken);
1489 ++pToken;
1490 if (NULL == curOpcode) {
1491 /* unknown current opcode ... (shouldn't be any!) */
1492 while (*pToken & 0x80000000) {
1493 FIXME("unrecognized opcode: %08lx\n", *pToken);
1494 ++pToken;
1496 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1497 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1499 FIXME("Token %s requires greater functionality than Vertex_Progarm_ARB supports\n", curOpcode->name);
1500 pToken += curOpcode->num_params;
1501 } else {
1502 /* Build opcode for GL vertex_program */
1503 switch (curOpcode->opcode) {
1504 case D3DSIO_NOP:
1505 continue;
1506 case D3DSIO_MOV:
1507 /* Address registers must be loaded with the ARL instruction */
1508 if ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) == D3DSPR_ADDR) {
1509 if (((*pToken) & 0x00001FFF) < nUseAddressRegister) {
1510 strcpy(tmpLine, "ARL");
1511 break;
1512 } else
1513 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This, ((*pToken) & 0x00001FFF));
1515 /* fall through */
1516 case D3DSIO_ADD:
1517 case D3DSIO_SUB:
1518 case D3DSIO_MAD:
1519 case D3DSIO_MUL:
1520 case D3DSIO_RCP:
1521 case D3DSIO_RSQ:
1522 case D3DSIO_DP3:
1523 case D3DSIO_DP4:
1524 case D3DSIO_MIN:
1525 case D3DSIO_MAX:
1526 case D3DSIO_SLT:
1527 case D3DSIO_SGE:
1528 case D3DSIO_LIT:
1529 case D3DSIO_DST:
1530 case D3DSIO_FRC:
1531 case D3DSIO_EXPP:
1532 case D3DSIO_LOGP:
1533 case D3DSIO_EXP:
1534 case D3DSIO_LOG:
1535 strcpy(tmpLine, curOpcode->glname);
1536 break;
1537 case D3DSIO_M4x4:
1538 case D3DSIO_M4x3:
1539 case D3DSIO_M3x4:
1540 case D3DSIO_M3x3:
1541 case D3DSIO_M3x2:
1542 /* Expand the macro and get nusprintf(tmpLine,mber of generated instruction */
1543 nRemInstr = ExpandMxMacro(curOpcode->opcode, pToken);
1544 /* Save point to next instruction */
1545 pSavedToken = pToken + 3;
1546 /* Execute expanded macro */
1547 pToken = MacroExpansion;
1548 continue;
1549 /* dcl and def are handeled in the first pass */
1550 case D3DSIO_DCL:
1551 if (This->namedArrays) {
1552 const char* attribName = "undefined";
1553 switch(*pToken & 0xFFFF) {
1554 case D3DDECLUSAGE_POSITION:
1555 attribName = "vertex.position";
1556 break;
1557 case D3DDECLUSAGE_BLENDINDICES:
1558 /* not supported by openGL */
1559 attribName = "vertex.blend";
1560 break;
1561 case D3DDECLUSAGE_BLENDWEIGHT:
1562 attribName = "vertex.weight";
1563 break;
1564 case D3DDECLUSAGE_NORMAL:
1565 attribName = "vertex.normal";
1566 break;
1567 case D3DDECLUSAGE_PSIZE:
1568 attribName = "vertex.psize";
1569 break;
1570 case D3DDECLUSAGE_COLOR:
1571 if((*pToken & 0xF0000) >> 16 == 0) {
1572 attribName = "vertex.color";
1573 } else {
1574 attribName = "vertex.color.secondary";
1576 break;
1577 case D3DDECLUSAGE_TEXCOORD:
1579 char tmpChar[100];
1580 tmpChar[0] = 0;
1581 sprintf(tmpChar,"vertex.texcoord[%lu]",(*pToken & 0xF0000) >> 16);
1582 attribName = tmpChar;
1583 break;
1585 /* The following aren't directly supported by openGL, so shouldn't come up using namedarrays. */
1586 case D3DDECLUSAGE_TANGENT:
1587 attribName = "vertex.tangent";
1588 break;
1589 case D3DDECLUSAGE_BINORMAL:
1590 attribName = "vertex.binormal";
1591 break;
1592 case D3DDECLUSAGE_TESSFACTOR:
1593 attribName = "vertex.tessfactor";
1594 break;
1595 case D3DDECLUSAGE_POSITIONT:
1596 attribName = "vertex.possitionT";
1597 break;
1598 case D3DDECLUSAGE_FOG:
1599 attribName = "vertex.fogcoord";
1600 break;
1601 case D3DDECLUSAGE_DEPTH:
1602 attribName = "vertex.depth";
1603 break;
1604 case D3DDECLUSAGE_SAMPLE:
1605 attribName = "vertex.sample";
1606 break;
1607 default:
1608 FIXME("Unrecognised dcl %08lx", *pToken & 0xFFFF);
1611 char tmpChar[80];
1612 ++pToken;
1613 sprintf(tmpLine, "ATTRIB ");
1614 vshader_program_add_param(This, *pToken, FALSE, tmpLine);
1615 sprintf(tmpChar," = %s", attribName);
1616 strcat(tmpLine, tmpChar);
1617 strcat(tmpLine,";\n");
1618 ++lineNum;
1619 if (This->namedArrays) {
1620 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine);
1621 PNSTRCAT(pgmStr, tmpLine);
1623 } else {
1624 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine);
1627 } else {
1628 /* eat the token so it doesn't generate a warning */
1629 ++pToken;
1631 ++pToken;
1632 continue;
1633 case D3DSIO_DEF:
1635 char tmpChar[80];
1636 sprintf(tmpLine, "PARAM const%lu = {", *pToken & 0xFF);
1637 ++pToken;
1638 sprintf(tmpChar,"%f ,", *(float *)pToken);
1639 strcat(tmpLine, tmpChar);
1640 ++pToken;
1641 sprintf(tmpChar,"%f ,", *(float *)pToken);
1642 strcat(tmpLine, tmpChar);
1643 ++pToken;
1644 sprintf(tmpChar,"%f ,", *(float *)pToken);
1645 strcat(tmpLine, tmpChar);
1646 ++pToken;
1647 sprintf(tmpChar,"%f}", *(float *)pToken);
1648 strcat(tmpLine, tmpChar);
1650 strcat(tmpLine,";\n");
1651 ++lineNum;
1652 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1653 PNSTRCAT(pgmStr, tmpLine);
1655 ++pToken;
1656 continue;
1658 default:
1659 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1660 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1661 } else {
1662 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1665 if (curOpcode->num_params > 0) {
1666 vshader_program_add_param(This, *pToken, FALSE, tmpLine);
1668 ++pToken;
1669 for (i = 1; i < curOpcode->num_params; ++i) {
1670 strcat(tmpLine, ",");
1671 vshader_program_add_param(This, *pToken, TRUE, tmpLine);
1672 ++pToken;
1675 strcat(tmpLine,";\n");
1676 ++lineNum;
1677 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1678 PNSTRCAT(pgmStr, tmpLine);
1682 strcpy(tmpLine, "END\n");
1683 ++lineNum;
1684 TRACE("GL HW (%u, %u) : %s", lineNum, pgmLength, tmpLine); /* Don't add \n to this line as already in tmpLine */
1685 PNSTRCAT(pgmStr, tmpLine);
1688 /* finally null terminate the pgmStr*/
1689 pgmStr[pgmLength] = 0;
1691 /* Check that Vertex Shaders are supported */
1692 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1693 /* Create the hw shader */
1694 /* TODO: change to resource.glObjectHandel or something like that */
1695 GL_EXTCALL(glGenProgramsARB(1, &This->prgId));
1696 TRACE("Creating a hw vertex shader, prg=%d\n", This->prgId);
1697 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, This->prgId));
1699 /* Create the program and check for errors */
1700 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr)/*pgmLength*/, pgmStr));
1701 if (glGetError() == GL_INVALID_OPERATION) {
1702 GLint errPos;
1703 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1704 FIXME("HW VertexShader Error at position %d: %s\n",
1705 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1706 This->prgId = -1;
1709 #if 1 /* if were using the data buffer of device then we don't need to free it */
1710 HeapFree(GetProcessHeap(), 0, pgmStr);
1711 #endif
1712 #undef PNSTRCAT
1715 BOOL IWineD3DVertexShaderImpl_ExecuteHAL(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
1717 * TODO: use the NV_vertex_program (or 1_1) extension
1718 * and specifics vendors (ARB_vertex_program??) variants for it
1720 return TRUE;
1723 HRESULT WINAPI IWineD3DVertexShaderImpl_ExecuteSW(IWineD3DVertexShader* iface, WINEVSHADERINPUTDATA* input, WINEVSHADEROUTPUTDATA* output) {
1724 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1726 /** Vertex Shader Temporary Registers */
1727 WINED3DSHADERVECTOR R[12];
1728 /*D3DSHADERSCALAR A0;*/
1729 WINED3DSHADERVECTOR A[1];
1730 /** temporary Vector for modifier management */
1731 WINED3DSHADERVECTOR d;
1732 WINED3DSHADERVECTOR s[3];
1733 /** parser datas */
1734 const DWORD* pToken = This->function;
1735 const SHADER_OPCODE* curOpcode = NULL;
1736 /** functions parameters */
1737 WINED3DSHADERVECTOR* p[4];
1738 WINED3DSHADERVECTOR* p_send[4];
1739 DWORD i;
1741 /** init temporary register */
1742 memset(R, 0, 12 * sizeof(WINED3DSHADERVECTOR));
1744 /* vshader_program_parse(vshader); */
1745 #if 0 /* Must not be 1 in cvs */
1746 TRACE("Input:\n");
1747 TRACE_VSVECTOR(This->data->C[0]);
1748 TRACE_VSVECTOR(This->data->C[1]);
1749 TRACE_VSVECTOR(This->data->C[2]);
1750 TRACE_VSVECTOR(This->data->C[3]);
1751 TRACE_VSVECTOR(This->data->C[4]);
1752 TRACE_VSVECTOR(This->data->C[5]);
1753 TRACE_VSVECTOR(This->data->C[6]);
1754 TRACE_VSVECTOR(This->data->C[7]);
1755 TRACE_VSVECTOR(This->data->C[8]);
1756 TRACE_VSVECTOR(This->data->C[64]);
1757 TRACE_VSVECTOR(input->V[D3DVSDE_POSITION]);
1758 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDWEIGHT]);
1759 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDINDICES]);
1760 TRACE_VSVECTOR(input->V[D3DVSDE_NORMAL]);
1761 TRACE_VSVECTOR(input->V[D3DVSDE_PSIZE]);
1762 TRACE_VSVECTOR(input->V[D3DVSDE_DIFFUSE]);
1763 TRACE_VSVECTOR(input->V[D3DVSDE_SPECULAR]);
1764 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD0]);
1765 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD1]);
1766 #endif
1768 TRACE_VSVECTOR(vshader->data->C[64]);
1769 /* TODO: Run through all the tokens and find and labels, if, endifs, loops etc...., and make a labels list */
1771 /* the first dword is the version tag */
1772 /* TODO: parse it */
1774 if (vshader_is_version_token(*pToken)) { /** version */
1775 ++pToken;
1777 while (D3DVS_END() != *pToken) {
1778 if (vshader_is_comment_token(*pToken)) { /** comment */
1779 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1780 ++pToken;
1781 pToken += comment_len;
1782 continue ;
1784 curOpcode = vshader_program_get_opcode(*pToken);
1785 ++pToken;
1786 if (NULL == curOpcode) {
1787 i = 0;
1788 /* unknown current opcode ... */
1789 /* TODO: Think of a name for 0x80000000 and replace its use with a constant */
1790 while (*pToken & 0x80000000) {
1791 if (i == 0) {
1792 FIXME("unrecognized opcode: pos=%d token=%08lX\n", (pToken - 1) - This->function, *(pToken - 1));
1794 FIXME("unrecognized opcode param: pos=%d token=%08lX what=", pToken - This->function, *pToken);
1795 vshader_program_dump_param(*pToken, i);
1796 TRACE("\n");
1797 ++i;
1798 ++pToken;
1800 /* return FALSE; */
1801 } else {
1802 if (curOpcode->num_params > 0) {
1803 /* TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken); */
1804 for (i = 0; i < curOpcode->num_params; ++i) {
1805 DWORD reg = pToken[i] & 0x00001FFF;
1806 DWORD regtype = ((pToken[i] & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
1808 switch (regtype << D3DSP_REGTYPE_SHIFT) {
1809 case D3DSPR_TEMP:
1810 /* TRACE("p[%d]=R[%d]\n", i, reg); */
1811 p[i] = &R[reg];
1812 break;
1813 case D3DSPR_INPUT:
1814 /* TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]); */
1815 p[i] = &input->V[reg];
1816 break;
1817 case D3DSPR_CONST:
1818 if (pToken[i] & D3DVS_ADDRMODE_RELATIVE) {
1819 p[i] = &This->data->C[(DWORD) A[0].x + reg];
1820 } else {
1821 p[i] = &This->data->C[reg];
1823 break;
1824 case D3DSPR_ADDR: /* case D3DSPR_TEXTURE: */
1825 if (0 != reg) {
1826 ERR("cannot handle address registers != a0, forcing use of a0\n");
1827 reg = 0;
1829 /* TRACE("p[%d]=A[%d]\n", i, reg); */
1830 p[i] = &A[reg];
1831 break;
1832 case D3DSPR_RASTOUT:
1833 switch (reg) {
1834 case D3DSRO_POSITION:
1835 p[i] = &output->oPos;
1836 break;
1837 case D3DSRO_FOG:
1838 p[i] = &output->oFog;
1839 break;
1840 case D3DSRO_POINT_SIZE:
1841 p[i] = &output->oPts;
1842 break;
1844 break;
1845 case D3DSPR_ATTROUT:
1846 /* TRACE("p[%d]=oD[%d]\n", i, reg); */
1847 p[i] = &output->oD[reg];
1848 break;
1849 case D3DSPR_TEXCRDOUT:
1850 /* TRACE("p[%d]=oT[%d]\n", i, reg); */
1851 p[i] = &output->oT[reg];
1852 break;
1853 /* TODO Decls and defs */
1854 #if 0
1855 case D3DSPR_DCL:
1856 case D3DSPR_DEF:
1857 #endif
1858 default:
1859 break;
1862 if (i > 0) { /* input reg */
1863 DWORD swizzle = (pToken[i] & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
1864 UINT isNegative = ((pToken[i] & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG);
1866 if (!isNegative && (D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) {
1867 /* TRACE("p[%d] not swizzled\n", i); */
1868 p_send[i] = p[i];
1869 } else {
1870 DWORD swizzle_x = swizzle & 0x03;
1871 DWORD swizzle_y = (swizzle >> 2) & 0x03;
1872 DWORD swizzle_z = (swizzle >> 4) & 0x03;
1873 DWORD swizzle_w = (swizzle >> 6) & 0x03;
1874 /* TRACE("p[%d] swizzled\n", i); */
1875 float* tt = (float*) p[i];
1876 s[i].x = (isNegative) ? -tt[swizzle_x] : tt[swizzle_x];
1877 s[i].y = (isNegative) ? -tt[swizzle_y] : tt[swizzle_y];
1878 s[i].z = (isNegative) ? -tt[swizzle_z] : tt[swizzle_z];
1879 s[i].w = (isNegative) ? -tt[swizzle_w] : tt[swizzle_w];
1880 p_send[i] = &s[i];
1882 } else { /* output reg */
1883 if ((pToken[i] & D3DSP_WRITEMASK_ALL) == D3DSP_WRITEMASK_ALL) {
1884 p_send[i] = p[i];
1885 } else {
1886 p_send[i] = &d; /* to be post-processed for modifiers management */
1892 switch (curOpcode->num_params) {
1893 case 0:
1894 curOpcode->soft_fct();
1895 break;
1896 case 1:
1897 curOpcode->soft_fct(p_send[0]);
1898 break;
1899 case 2:
1900 curOpcode->soft_fct(p_send[0], p_send[1]);
1901 break;
1902 case 3:
1903 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2]);
1904 break;
1905 case 4:
1906 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3]);
1907 break;
1908 case 5:
1909 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4]);
1910 break;
1911 case 6:
1912 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4], p_send[5]);
1913 break;
1914 default:
1915 ERR("%s too many params: %u\n", curOpcode->name, curOpcode->num_params);
1918 /* check if output reg modifier post-process */
1919 if (curOpcode->num_params > 0 && (pToken[0] & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1920 if (pToken[0] & D3DSP_WRITEMASK_0) p[0]->x = d.x;
1921 if (pToken[0] & D3DSP_WRITEMASK_1) p[0]->y = d.y;
1922 if (pToken[0] & D3DSP_WRITEMASK_2) p[0]->z = d.z;
1923 if (pToken[0] & D3DSP_WRITEMASK_3) p[0]->w = d.w;
1925 #if 0
1926 TRACE_VSVECTOR(output->oPos);
1927 TRACE_VSVECTOR(output->oD[0]);
1928 TRACE_VSVECTOR(output->oD[1]);
1929 TRACE_VSVECTOR(output->oT[0]);
1930 TRACE_VSVECTOR(output->oT[1]);
1931 TRACE_VSVECTOR(R[0]);
1932 TRACE_VSVECTOR(R[1]);
1933 TRACE_VSVECTOR(R[2]);
1934 TRACE_VSVECTOR(R[3]);
1935 TRACE_VSVECTOR(R[4]);
1936 TRACE_VSVECTOR(R[5]);
1937 #endif
1939 /* to next opcode token */
1940 pToken += curOpcode->num_params;
1942 #if 0
1943 TRACE("End of current instruction:\n");
1944 TRACE_VSVECTOR(output->oPos);
1945 TRACE_VSVECTOR(output->oD[0]);
1946 TRACE_VSVECTOR(output->oD[1]);
1947 TRACE_VSVECTOR(output->oT[0]);
1948 TRACE_VSVECTOR(output->oT[1]);
1949 TRACE_VSVECTOR(R[0]);
1950 TRACE_VSVECTOR(R[1]);
1951 TRACE_VSVECTOR(R[2]);
1952 TRACE_VSVECTOR(R[3]);
1953 TRACE_VSVECTOR(R[4]);
1954 TRACE_VSVECTOR(R[5]);
1955 #endif
1957 #if 0 /* Must not be 1 in cvs */
1958 TRACE("Output:\n");
1959 TRACE_VSVECTOR(output->oPos);
1960 TRACE_VSVECTOR(output->oD[0]);
1961 TRACE_VSVECTOR(output->oD[1]);
1962 TRACE_VSVECTOR(output->oT[0]);
1963 TRACE_VSVECTOR(output->oT[1]);
1964 #endif
1965 return D3D_OK;
1968 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantF(IWineD3DVertexShader *iface, UINT StartRegister, CONST FLOAT *pConstantData, UINT Vector4fCount) {
1969 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1970 FIXME("(%p) : stub\n", This);
1971 return D3D_OK;
1974 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantF(IWineD3DVertexShader *iface, UINT StartRegister, FLOAT *pConstantData, UINT Vector4fCount) {
1975 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1976 FIXME("(%p) : stub\n", This);
1977 return D3D_OK;
1980 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantI(IWineD3DVertexShader *iface, UINT StartRegister, CONST int *pConstantData, UINT Vector4iCount) {
1981 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1982 if (StartRegister + Vector4iCount > WINED3D_VSHADER_MAX_CONSTANTS) {
1983 ERR("(%p) : SetVertexShaderConstantI C[%u] invalid\n", This, StartRegister);
1984 return D3DERR_INVALIDCALL;
1986 if (NULL == pConstantData) {
1987 return D3DERR_INVALIDCALL;
1989 FIXME("(%p) : stub\n", This);
1990 return D3D_OK;
1993 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantI(IWineD3DVertexShader *iface, UINT StartRegister, int *pConstantData, UINT Vector4iCount) {
1994 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
1995 TRACE("(%p) : C[%u] count=%u\n", This, StartRegister, Vector4iCount);
1996 if (StartRegister + Vector4iCount > WINED3D_VSHADER_MAX_CONSTANTS) {
1997 return D3DERR_INVALIDCALL;
1999 if (NULL == pConstantData) {
2000 return D3DERR_INVALIDCALL;
2002 FIXME("(%p) : stub\n", This);
2003 return D3D_OK;
2006 HRESULT WINAPI IWineD3DVertexShaderImpl_SetConstantB(IWineD3DVertexShader *iface, UINT StartRegister, CONST BOOL *pConstantData, UINT BoolCount) {
2007 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2008 if (StartRegister + BoolCount > WINED3D_VSHADER_MAX_CONSTANTS) {
2009 ERR("(%p) : SetVertexShaderConstantB C[%u] invalid\n", This, StartRegister);
2010 return D3DERR_INVALIDCALL;
2012 if (NULL == pConstantData) {
2013 return D3DERR_INVALIDCALL;
2015 FIXME("(%p) : stub\n", This);
2016 return D3D_OK;
2019 HRESULT WINAPI IWineD3DVertexShaderImpl_GetConstantB(IWineD3DVertexShader *iface, UINT StartRegister, BOOL *pConstantData, UINT BoolCount) {
2020 IWineD3DVertexShaderImpl* This = (IWineD3DVertexShaderImpl *)iface;
2021 FIXME("(%p) : stub\n", This);
2022 return D3D_OK;
2025 #endif
2027 /* *******************************************
2028 IWineD3DVertexShader IUnknown parts follow
2029 ******************************************* */
2030 HRESULT WINAPI IWineD3DVertexShaderImpl_QueryInterface(IWineD3DVertexShader *iface, REFIID riid, LPVOID *ppobj)
2032 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2033 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
2034 if (IsEqualGUID(riid, &IID_IUnknown)
2035 || IsEqualGUID(riid, &IID_IWineD3DBase)
2036 || IsEqualGUID(riid, &IID_IWineD3DVertexShader)) {
2037 IUnknown_AddRef(iface);
2038 *ppobj = This;
2039 return D3D_OK;
2041 return E_NOINTERFACE;
2044 ULONG WINAPI IWineD3DVertexShaderImpl_AddRef(IWineD3DVertexShader *iface) {
2045 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2046 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
2047 return InterlockedIncrement(&This->ref);
2050 ULONG WINAPI IWineD3DVertexShaderImpl_Release(IWineD3DVertexShader *iface) {
2051 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2052 ULONG ref;
2053 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
2054 ref = InterlockedDecrement(&This->ref);
2055 if (ref == 0) {
2056 if (This->vertexDeclaration) IWineD3DVertexDeclaration_Release(This->vertexDeclaration);
2057 HeapFree(GetProcessHeap(), 0, This);
2059 return ref;
2062 /* *******************************************
2063 IWineD3DVertexShader IWineD3DVertexShader parts follow
2064 ******************************************* */
2066 HRESULT WINAPI IWineD3DVertexShaderImpl_GetParent(IWineD3DVertexShader *iface, IUnknown** parent){
2067 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2069 *parent = This->parent;
2070 IUnknown_AddRef(*parent);
2071 TRACE("(%p) : returning %p\n", This, *parent);
2072 return D3D_OK;
2075 HRESULT WINAPI IWineD3DVertexShaderImpl_GetDevice(IWineD3DVertexShader* iface, IWineD3DDevice **pDevice){
2076 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)iface;
2077 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
2078 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
2079 TRACE("(%p) returning %p\n", This, *pDevice);
2080 return D3D_OK;
2083 HRESULT WINAPI IWineD3DVertexShaderImpl_GetFunction(IWineD3DVertexShader* impl, VOID* pData, UINT* pSizeOfData) {
2084 IWineD3DVertexShaderImpl *This = (IWineD3DVertexShaderImpl *)impl;
2085 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
2087 if (NULL == pData) {
2088 *pSizeOfData = This->functionLength;
2089 return D3D_OK;
2091 if (*pSizeOfData < This->functionLength) {
2092 *pSizeOfData = This->functionLength;
2093 return D3DERR_MOREDATA;
2095 if (NULL == This->function) { /* no function defined */
2096 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
2097 (*(DWORD **) pData) = NULL;
2098 } else {
2099 if(This->functionLength == 0){
2102 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
2103 memcpy(pData, This->function, This->functionLength);
2105 return D3D_OK;
2108 HRESULT WINAPI IWineD3DVertexShaderImpl_SetFunction(IWineD3DVertexShader *iface, CONST DWORD *pFunction) {
2109 IWineD3DVertexShaderImpl *This =(IWineD3DVertexShaderImpl *)iface;
2110 const DWORD* pToken = pFunction;
2111 const SHADER_OPCODE* curOpcode = NULL;
2112 DWORD len = 0;
2113 DWORD i;
2114 TRACE("(%p) : Parsing programme\n", This);
2116 if (NULL != pToken) {
2117 while (D3DVS_END() != *pToken) {
2118 if (vshader_is_version_token(*pToken)) { /** version */
2119 TRACE("vs_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
2120 ++pToken;
2121 ++len;
2122 continue;
2124 if (vshader_is_comment_token(*pToken)) { /** comment */
2125 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2126 ++pToken;
2127 TRACE("//%s\n", (char*)pToken);
2128 pToken += comment_len;
2129 len += comment_len + 1;
2130 continue;
2132 curOpcode = vshader_program_get_opcode(*pToken);
2133 ++pToken;
2134 ++len;
2135 if (NULL == curOpcode) {
2136 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
2137 while (*pToken & 0x80000000) {
2138 /* unknown current opcode ... */
2139 FIXME("unrecognized opcode: %08lx", *pToken);
2140 ++pToken;
2141 ++len;
2142 TRACE("\n");
2145 } else {
2146 if (curOpcode->opcode == D3DSIO_DCL) {
2147 vshader_program_dump_decl_usage(This, *pToken);
2148 ++pToken;
2149 ++len;
2150 vshader_program_dump_vs_param(*pToken, 0);
2151 ++pToken;
2152 ++len;
2153 } else
2154 if (curOpcode->opcode == D3DSIO_DEF) {
2155 TRACE("def c%lu = ", *pToken & 0xFF);
2156 ++pToken;
2157 ++len;
2158 TRACE("%f ,", *(float *)pToken);
2159 ++pToken;
2160 ++len;
2161 TRACE("%f ,", *(float *)pToken);
2162 ++pToken;
2163 ++len;
2164 TRACE("%f ,", *(float *)pToken);
2165 ++pToken;
2166 ++len;
2167 TRACE("%f", *(float *)pToken);
2168 ++pToken;
2169 ++len;
2170 } else {
2171 TRACE("%s ", curOpcode->name);
2172 if (curOpcode->num_params > 0) {
2173 vshader_program_dump_vs_param(*pToken, 0);
2174 ++pToken;
2175 ++len;
2176 for (i = 1; i < curOpcode->num_params; ++i) {
2177 TRACE(", ");
2178 vshader_program_dump_vs_param(*pToken, 1);
2179 ++pToken;
2180 ++len;
2184 TRACE("\n");
2187 This->functionLength = (len + 1) * sizeof(DWORD);
2188 } else {
2189 This->functionLength = 1; /* no Function defined use fixed function vertex processing */
2192 /* Generate HW shader in needed */
2193 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
2194 #if 1
2195 IWineD3DVertexShaderImpl_GenerateProgramArbHW(iface, pFunction);
2196 #endif
2199 /* copy the function ... because it will certainly be released by application */
2200 if (NULL != pFunction) {
2201 This->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->functionLength);
2202 memcpy((void *)This->function, pFunction, This->functionLength);
2203 } else {
2204 This->function = NULL;
2206 return D3D_OK;
2209 const IWineD3DVertexShaderVtbl IWineD3DVertexShader_Vtbl =
2211 /*** IUnknown methods ***/
2212 IWineD3DVertexShaderImpl_QueryInterface,
2213 IWineD3DVertexShaderImpl_AddRef,
2214 IWineD3DVertexShaderImpl_Release,
2215 /*** IWineD3DVertexShader methods ***/
2216 IWineD3DVertexShaderImpl_GetParent,
2217 IWineD3DVertexShaderImpl_GetDevice,
2218 IWineD3DVertexShaderImpl_GetFunction,
2219 IWineD3DVertexShaderImpl_SetFunction