Limit usage of vertex arrays to correct states.
[wine/multimedia.git] / dlls / d3d8 / shader.c
blob426051dae7138f8e717d360158ab4fc29108e62d
1 /*
2 * shaders implementation
4 * Copyright 2002 Raphael Junqueira
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "config.h"
23 #include <math.h>
24 #include <stdarg.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "winuser.h"
29 #include "wingdi.h"
30 #include "wine/debug.h"
32 #include "d3d8_private.h"
34 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
36 /* Shader debugging - Change the following line to enable debugging of software
37 vertex shaders */
38 #if 0 /* Must not be 1 in cvs version */
39 # define VSTRACE(A) TRACE A
40 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
41 #else
42 # define VSTRACE(A)
43 # define TRACE_VSVECTOR(name)
44 #endif
47 /**
48 * DirectX9 SDK download
49 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
51 * Exploring D3DX
52 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
54 * Using Vertex Shaders
55 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
57 * Dx9 New
58 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
60 * Dx9 Shaders
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
62 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
63 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
64 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
66 * Dx9 D3DX
67 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
69 * FVF
70 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
72 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
73 * http://developer.nvidia.com/view.asp?IO=vstovp
75 * NVIDIA: Memory Management with VAR
76 * http://developer.nvidia.com/view.asp?IO=var_memory_management
79 typedef void (*shader_fct_t)();
81 typedef struct SHADER_OPCODE {
82 CONST BYTE opcode;
83 const char* name;
84 CONST UINT num_params;
85 shader_fct_t soft_fct;
86 DWORD min_version;
87 DWORD max_version;
88 } SHADER_OPCODE;
90 /*******************************
91 * vshader functions software VM
94 void vshader_add(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
95 d->x = s0->x + s1->x;
96 d->y = s0->y + s1->y;
97 d->z = s0->z + s1->z;
98 d->w = s0->w + s1->w;
99 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
100 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
103 void vshader_dp3(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
104 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
105 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
106 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
109 void vshader_dp4(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
110 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
111 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
112 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
115 void vshader_dst(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
116 d->x = 1.0f;
117 d->y = s0->y * s1->y;
118 d->z = s0->z;
119 d->w = s1->w;
120 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
121 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
124 void vshader_expp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
125 float tmp_f = floorf(s0->w);
126 DWORD tmp_d = 0;
127 tmp_f = powf(2.0f, s0->w);
128 tmp_d = *((DWORD*) &tmp_f) & 0xFFFFFF00;
130 d->x = powf(2.0f, tmp_f);
131 d->y = s0->w - tmp_f;
132 d->z = *((float*) &tmp_d);
133 d->w = 1.0f;
134 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
135 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
138 void vshader_lit(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
139 d->x = 1.0f;
140 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
141 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
142 d->w = 1.0f;
143 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
144 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
147 void vshader_logp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
148 float tmp_f = fabsf(s0->w);
149 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE;
150 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
151 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
154 void vshader_mad(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1, D3DSHADERVECTOR* s2) {
155 d->x = s0->x * s1->x + s2->x;
156 d->y = s0->y * s1->y + s2->y;
157 d->z = s0->z * s1->z + s2->z;
158 d->w = s0->w * s1->w + s2->w;
159 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
160 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
163 void vshader_max(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
164 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
165 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
166 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
167 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
168 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
169 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
172 void vshader_min(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
173 d->x = (s0->x < s1->x) ? s0->x : s1->x;
174 d->y = (s0->y < s1->y) ? s0->y : s1->y;
175 d->z = (s0->z < s1->z) ? s0->z : s1->z;
176 d->w = (s0->w < s1->w) ? s0->w : s1->w;
177 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
178 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
181 void vshader_mov(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
182 d->x = s0->x;
183 d->y = s0->y;
184 d->z = s0->z;
185 d->w = s0->w;
186 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
187 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
190 void vshader_mul(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
191 d->x = s0->x * s1->x;
192 d->y = s0->y * s1->y;
193 d->z = s0->z * s1->z;
194 d->w = s0->w * s1->w;
195 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
196 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
199 void vshader_nop(void) {
200 /* NOPPPP ahhh too easy ;) */
203 void vshader_rcp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
204 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE : 1.0f / s0->w;
205 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
206 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
209 void vshader_rsq(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
210 float tmp_f = fabsf(s0->w);
211 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
212 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
213 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
216 void vshader_sge(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
217 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
218 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
219 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
220 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
221 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
222 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
225 void vshader_slt(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
226 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
227 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
228 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
229 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
230 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
231 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
234 void vshader_sub(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
235 d->x = s0->x - s1->x;
236 d->y = s0->y - s1->y;
237 d->z = s0->z - s1->z;
238 d->w = s0->w - s1->w;
239 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
240 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
244 * Version 1.1 specific
247 void vshader_exp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
248 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
249 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
250 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
253 void vshader_log(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
254 float tmp_f = fabsf(s0->w);
255 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE;
256 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
257 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
260 void vshader_frc(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
261 d->x = s0->x - floorf(s0->x);
262 d->y = s0->y - floorf(s0->y);
263 d->z = 0.0f;
264 d->w = 1.0f;
265 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
266 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
269 typedef FLOAT D3DMATRIX44[4][4];
270 typedef FLOAT D3DMATRIX43[4][3];
271 typedef FLOAT D3DMATRIX34[4][4];
272 typedef FLOAT D3DMATRIX33[4][3];
273 typedef FLOAT D3DMATRIX32[4][2];
275 void vshader_m4x4(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, /*D3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
277 * BuGGY CODE: here only if cast not work for copy/paste
278 D3DSHADERVECTOR* mat2 = mat1 + 1;
279 D3DSHADERVECTOR* mat3 = mat1 + 2;
280 D3DSHADERVECTOR* mat4 = mat1 + 3;
281 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
282 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
283 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
284 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
286 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
287 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
288 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
289 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
290 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
291 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
292 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
293 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
296 void vshader_m4x3(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
297 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
298 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
299 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
300 d->w = 1.0f;
301 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
302 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
303 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
304 VSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
307 void vshader_m3x4(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
308 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
309 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
310 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
311 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
312 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
313 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
314 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
315 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
318 void vshader_m3x3(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
319 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
320 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
321 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
322 d->w = 1.0f;
323 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
324 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
325 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
326 VSTRACE(("executing m3x3(4): (%f) \n", d->w));
329 void vshader_m3x2(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DMATRIX32 mat) {
330 FIXME("check\n");
331 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
332 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
333 d->z = 0.0f;
334 d->w = 1.0f;
338 * Version 2.0 specific
340 void vshader_lrp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1, D3DSHADERVECTOR* s2, D3DSHADERVECTOR* s3) {
341 d->x = s0->x * (s1->x - s2->x) + s2->x;
342 d->y = s0->y * (s1->y - s2->y) + s2->y;
343 d->z = s0->z * (s1->z - s2->z) + s2->z;
344 d->w = s0->w * (s1->w - s2->w) + s2->x;
348 * log, exp, frc, m*x* seems to be macros ins ... to see
350 static CONST SHADER_OPCODE vshader_ins [] = {
351 {D3DSIO_NOP, "nop", 0, vshader_nop, 0, 0},
352 {D3DSIO_MOV, "mov", 2, vshader_mov, 0, 0},
353 {D3DSIO_ADD, "add", 3, vshader_add, 0, 0},
354 {D3DSIO_SUB, "sub", 3, vshader_sub, 0, 0},
355 {D3DSIO_MAD, "mad", 4, vshader_mad, 0, 0},
356 {D3DSIO_MUL, "mul", 3, vshader_mul, 0, 0},
357 {D3DSIO_RCP, "rcp", 2, vshader_rcp, 0, 0},
358 {D3DSIO_RSQ, "rsq", 2, vshader_rsq, 0, 0},
359 {D3DSIO_DP3, "dp3", 3, vshader_dp3, 0, 0},
360 {D3DSIO_DP4, "dp4", 3, vshader_dp4, 0, 0},
361 {D3DSIO_MIN, "min", 3, vshader_min, 0, 0},
362 {D3DSIO_MAX, "max", 3, vshader_max, 0, 0},
363 {D3DSIO_SLT, "slt", 3, vshader_slt, 0, 0},
364 {D3DSIO_SGE, "sge", 3, vshader_sge, 0, 0},
365 {D3DSIO_EXP, "exp", 2, vshader_exp, 0, 0},
366 {D3DSIO_LOG, "log", 2, vshader_log, 0, 0},
367 {D3DSIO_LIT, "lit", 2, vshader_lit, 0, 0},
368 {D3DSIO_DST, "dst", 3, vshader_dst, 0, 0},
369 {D3DSIO_LRP, "lrp", 5, vshader_lrp, 0, 0},
370 {D3DSIO_FRC, "frc", 2, vshader_frc, 0, 0},
371 {D3DSIO_M4x4, "m4x4", 3, vshader_m4x4, 0, 0},
372 {D3DSIO_M4x3, "m4x3", 3, vshader_m4x3, 0, 0},
373 {D3DSIO_M3x4, "m3x4", 3, vshader_m3x4, 0, 0},
374 {D3DSIO_M3x3, "m3x3", 3, vshader_m3x3, 0, 0},
375 {D3DSIO_M3x2, "m3x2", 3, vshader_m3x2, 0, 0},
376 /** FIXME: use direct access so add the others opcodes as stubs */
377 {D3DSIO_EXPP, "expp", 2, vshader_expp, 0, 0},
378 {D3DSIO_LOGP, "logp", 2, vshader_logp, 0, 0},
380 {0, NULL, 0, NULL, 0, 0}
384 inline static const SHADER_OPCODE* vshader_program_get_opcode(const DWORD code) {
385 DWORD i = 0;
386 /** TODO: use dichotomic search */
387 while (NULL != vshader_ins[i].name) {
388 if ((code & D3DSI_OPCODE_MASK) == vshader_ins[i].opcode) {
389 return &vshader_ins[i];
391 ++i;
393 return NULL;
396 inline static void vshader_program_dump_param(const DWORD param, int input) {
397 static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" };
398 static const char swizzle_reg_chars[] = "xyzw";
400 DWORD reg = param & 0x00001FFF;
401 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
403 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) TRACE("-");
405 switch (regtype << D3DSP_REGTYPE_SHIFT) {
406 case D3DSPR_TEMP:
407 TRACE("R[%lu]", reg);
408 break;
409 case D3DSPR_INPUT:
410 TRACE("V[%lu]", reg);
411 break;
412 case D3DSPR_CONST:
413 TRACE("C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
414 break;
415 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
416 TRACE("a[%lu]", reg);
417 break;
418 case D3DSPR_RASTOUT:
419 TRACE("%s", rastout_reg_names[reg]);
420 break;
421 case D3DSPR_ATTROUT:
422 TRACE("oD[%lu]", reg);
423 break;
424 case D3DSPR_TEXCRDOUT:
425 TRACE("oT[%lu]", reg);
426 break;
427 default:
428 break;
431 if (!input) {
432 /** operand output */
433 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
434 if (param & D3DSP_WRITEMASK_0) TRACE(".x");
435 if (param & D3DSP_WRITEMASK_1) TRACE(".y");
436 if (param & D3DSP_WRITEMASK_2) TRACE(".z");
437 if (param & D3DSP_WRITEMASK_3) TRACE(".w");
439 } else {
440 /** operand input */
441 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
442 DWORD swizzle_x = swizzle & 0x03;
443 DWORD swizzle_y = (swizzle >> 2) & 0x03;
444 DWORD swizzle_z = (swizzle >> 4) & 0x03;
445 DWORD swizzle_w = (swizzle >> 6) & 0x03;
447 * swizzle bits fields:
448 * WWZZYYXX
450 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
451 if (swizzle_x == swizzle_y &&
452 swizzle_x == swizzle_z &&
453 swizzle_x == swizzle_w) {
454 TRACE(".%c", swizzle_reg_chars[swizzle_x]);
455 } else {
456 TRACE(".%c%c%c%c",
457 swizzle_reg_chars[swizzle_x],
458 swizzle_reg_chars[swizzle_y],
459 swizzle_reg_chars[swizzle_z],
460 swizzle_reg_chars[swizzle_w]);
466 inline static BOOL vshader_is_version_token(DWORD token) {
467 return 0xFFFE0000 == (token & 0xFFFE0000);
470 inline static BOOL vshader_is_comment_token(DWORD token) {
471 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
475 * Function parser ...
477 inline static VOID IDirect3DVertexShaderImpl_ParseProgram(IDirect3DVertexShaderImpl* vshader, CONST DWORD* pFunction) {
478 const DWORD* pToken = pFunction;
479 const SHADER_OPCODE* curOpcode = NULL;
480 DWORD len = 0;
481 DWORD i;
483 if (NULL != pToken) {
484 while (D3DVS_END() != *pToken) {
485 if (vshader_is_version_token(*pToken)) { /** version */
486 TRACE("vs.%lu.%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
487 ++pToken;
488 ++len;
489 continue;
491 if (vshader_is_comment_token(*pToken)) { /** comment */
492 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
493 ++pToken;
494 /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
495 pToken += comment_len;
496 len += comment_len + 1;
497 continue;
499 curOpcode = vshader_program_get_opcode(*pToken);
500 ++pToken;
501 ++len;
502 if (NULL == curOpcode) {
503 /* unkown current opcode ... */
504 while (*pToken & 0x80000000) {
505 TRACE("unrecognized opcode: %08lx\n", *pToken);
506 ++pToken;
507 ++len;
509 } else {
510 TRACE("%s ", curOpcode->name);
511 if (curOpcode->num_params > 0) {
512 vshader_program_dump_param(*pToken, 0);
513 ++pToken;
514 ++len;
515 for (i = 1; i < curOpcode->num_params; ++i) {
516 TRACE(", ");
517 vshader_program_dump_param(*pToken, 1);
518 ++pToken;
519 ++len;
522 TRACE("\n");
525 vshader->functionLength = (len + 1) * sizeof(DWORD);
526 } else {
527 vshader->functionLength = 1; /* no Function defined use fixed function vertex processing */
529 /* copy the function ... because it will certainly be released by application */
531 if (NULL != pFunction) {
532 vshader->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, vshader->functionLength);
533 memcpy(vshader->function, pFunction, vshader->functionLength);
534 } else {
535 vshader->function = NULL;
539 HRESULT WINAPI IDirect3DDeviceImpl_CreateVertexShader(IDirect3DDevice8Impl* This, CONST DWORD* pFunction, DWORD Usage, IDirect3DVertexShaderImpl** ppVertexShader) {
540 IDirect3DVertexShaderImpl* object;
542 object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(IDirect3DVertexShaderImpl));
543 if (NULL == object) {
544 *ppVertexShader = NULL;
545 return D3DERR_OUTOFVIDEOMEMORY;
547 /*object->lpVtbl = &Direct3DVextexShader9_Vtbl;*/
548 object->device = This; /* FIXME: AddRef(This) */
549 object->ref = 1;
551 object->usage = Usage;
552 object->data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(VSHADERDATA8));
554 IDirect3DVertexShaderImpl_ParseProgram(object, pFunction);
556 *ppVertexShader = object;
557 return D3D_OK;
560 BOOL IDirect3DVertexShaderImpl_ExecuteHAL(IDirect3DVertexShaderImpl* vshader, VSHADERINPUTDATA8* input, VSHADEROUTPUTDATA8* output) {
561 /**
562 * TODO: use the NV_vertex_program (or 1_1) extension
563 * and specifics vendors (ARB_vertex_program??) variants for it
565 return TRUE;
568 HRESULT WINAPI IDirect3DVertexShaderImpl_ExecuteSW(IDirect3DVertexShaderImpl* vshader, VSHADERINPUTDATA8* input, VSHADEROUTPUTDATA8* output) {
569 /** Vertex Shader Temporary Registers */
570 D3DSHADERVECTOR R[12];
571 /*D3DSHADERSCALAR A0;*/
572 D3DSHADERVECTOR A[1];
573 /** temporary Vector for modifier management */
574 D3DSHADERVECTOR d;
575 D3DSHADERVECTOR s[3];
576 /** parser datas */
577 const DWORD* pToken = vshader->function;
578 const SHADER_OPCODE* curOpcode = NULL;
579 /** functions parameters */
580 D3DSHADERVECTOR* p[4];
581 D3DSHADERVECTOR* p_send[4];
582 DWORD i;
584 /** init temporary register */
585 memset(R, 0, 12 * sizeof(D3DSHADERVECTOR));
587 /* vshader_program_parse(vshader); */
588 #if 0 /* Must not be 1 in cvs */
589 TRACE("Input:\n");
590 TRACE_VSVECTOR(vshader->data->C[0]);
591 TRACE_VSVECTOR(vshader->data->C[1]);
592 TRACE_VSVECTOR(vshader->data->C[2]);
593 TRACE_VSVECTOR(vshader->data->C[3]);
594 TRACE_VSVECTOR(vshader->data->C[4]);
595 TRACE_VSVECTOR(vshader->data->C[5]);
596 TRACE_VSVECTOR(vshader->data->C[6]);
597 TRACE_VSVECTOR(vshader->data->C[7]);
598 TRACE_VSVECTOR(vshader->data->C[8]);
599 TRACE_VSVECTOR(vshader->data->C[64]);
600 TRACE_VSVECTOR(input->V[D3DVSDE_POSITION]);
601 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDWEIGHT]);
602 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDINDICES]);
603 TRACE_VSVECTOR(input->V[D3DVSDE_NORMAL]);
604 TRACE_VSVECTOR(input->V[D3DVSDE_PSIZE]);
605 TRACE_VSVECTOR(input->V[D3DVSDE_DIFFUSE]);
606 TRACE_VSVECTOR(input->V[D3DVSDE_SPECULAR]);
607 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD0]);
608 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD1]);
609 #endif
611 TRACE_VSVECTOR(vshader->data->C[64]);
613 /* the first dword is the version tag */
614 /* TODO: parse it */
616 if (vshader_is_version_token(*pToken)) { /** version */
617 ++pToken;
619 while (D3DVS_END() != *pToken) {
620 if (vshader_is_comment_token(*pToken)) { /** comment */
621 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
622 ++pToken;
623 pToken += comment_len;
624 continue ;
626 curOpcode = vshader_program_get_opcode(*pToken);
627 ++pToken;
628 if (NULL == curOpcode) {
629 i = 0;
630 /* unkown current opcode ... */
631 while (*pToken & 0x80000000) {
632 if (i == 0) {
633 TRACE("unrecognized opcode: pos=%d token=%08lX\n", (pToken - 1) - vshader->function, *(pToken - 1));
635 TRACE("unrecognized opcode param: pos=%d token=%08lX what=", pToken - vshader->function, *pToken);
636 vshader_program_dump_param(*pToken, i);
637 TRACE("\n");
638 ++i;
639 ++pToken;
641 /*return FALSE;*/
642 } else {
643 if (curOpcode->num_params > 0) {
644 /*TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken);*/
645 for (i = 0; i < curOpcode->num_params; ++i) {
646 DWORD reg = pToken[i] & 0x00001FFF;
647 DWORD regtype = ((pToken[i] & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
649 switch (regtype << D3DSP_REGTYPE_SHIFT) {
650 case D3DSPR_TEMP:
651 /*TRACE("p[%d]=R[%d]\n", i, reg);*/
652 p[i] = &R[reg];
653 break;
654 case D3DSPR_INPUT:
655 /*TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]);*/
656 p[i] = &input->V[reg];
657 break;
658 case D3DSPR_CONST:
659 if (pToken[i] & D3DVS_ADDRMODE_RELATIVE) {
660 p[i] = &vshader->data->C[(DWORD) A[0].x + reg];
661 } else {
662 p[i] = &vshader->data->C[reg];
664 break;
665 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
666 if (0 != reg) {
667 ERR("cannot handle address registers != a0, forcing use of a0\n");
668 reg = 0;
670 /*TRACE("p[%d]=A[%d]\n", i, reg);*/
671 p[i] = &A[reg];
672 break;
673 case D3DSPR_RASTOUT:
674 switch (reg) {
675 case D3DSRO_POSITION:
676 p[i] = &output->oPos;
677 break;
678 case D3DSRO_FOG:
679 p[i] = &output->oFog;
680 break;
681 case D3DSRO_POINT_SIZE:
682 p[i] = &output->oPts;
683 break;
685 break;
686 case D3DSPR_ATTROUT:
687 /*TRACE("p[%d]=oD[%d]\n", i, reg);*/
688 p[i] = &output->oD[reg];
689 break;
690 case D3DSPR_TEXCRDOUT:
691 /*TRACE("p[%d]=oT[%d]\n", i, reg);*/
692 p[i] = &output->oT[reg];
693 break;
694 default:
695 break;
698 if (i > 0) { /* input reg */
699 DWORD swizzle = (pToken[i] & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
700 UINT isNegative = ((pToken[i] & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG);
702 if (!isNegative && (D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) {
703 /*TRACE("p[%d] not swizzled\n", i);*/
704 p_send[i] = p[i];
705 } else {
706 DWORD swizzle_x = swizzle & 0x03;
707 DWORD swizzle_y = (swizzle >> 2) & 0x03;
708 DWORD swizzle_z = (swizzle >> 4) & 0x03;
709 DWORD swizzle_w = (swizzle >> 6) & 0x03;
710 /*TRACE("p[%d] swizzled\n", i);*/
711 float* tt = (float*) p[i];
712 s[i].x = (isNegative) ? -tt[swizzle_x] : tt[swizzle_x];
713 s[i].y = (isNegative) ? -tt[swizzle_y] : tt[swizzle_y];
714 s[i].z = (isNegative) ? -tt[swizzle_z] : tt[swizzle_z];
715 s[i].w = (isNegative) ? -tt[swizzle_w] : tt[swizzle_w];
716 p_send[i] = &s[i];
718 } else { /* output reg */
719 if ((pToken[i] & D3DSP_WRITEMASK_ALL) == D3DSP_WRITEMASK_ALL) {
720 p_send[i] = p[i];
721 } else {
722 p_send[i] = &d; /* to be post-processed for modifiers management */
728 switch (curOpcode->num_params) {
729 case 0:
730 curOpcode->soft_fct();
731 break;
732 case 1:
733 curOpcode->soft_fct(p_send[0]);
734 break;
735 case 2:
736 curOpcode->soft_fct(p_send[0], p_send[1]);
737 break;
738 case 3:
739 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2]);
740 break;
741 case 4:
742 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3]);
743 break;
744 case 5:
745 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4]);
746 break;
747 default:
748 ERR("%s too many params: %u\n", curOpcode->name, curOpcode->num_params);
751 /* check if output reg modifier post-process */
752 if (curOpcode->num_params > 0 && (pToken[0] & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
753 if (pToken[0] & D3DSP_WRITEMASK_0) p[0]->x = d.x;
754 if (pToken[0] & D3DSP_WRITEMASK_1) p[0]->y = d.y;
755 if (pToken[0] & D3DSP_WRITEMASK_2) p[0]->z = d.z;
756 if (pToken[0] & D3DSP_WRITEMASK_3) p[0]->w = d.w;
759 #if 0
760 TRACE_VSVECTOR(output->oPos);
761 TRACE_VSVECTOR(output->oD[0]);
762 TRACE_VSVECTOR(output->oD[1]);
763 TRACE_VSVECTOR(output->oT[0]);
764 TRACE_VSVECTOR(output->oT[1]);
765 TRACE_VSVECTOR(R[0]);
766 TRACE_VSVECTOR(R[1]);
767 TRACE_VSVECTOR(R[2]);
768 TRACE_VSVECTOR(R[3]);
769 TRACE_VSVECTOR(R[4]);
770 TRACE_VSVECTOR(R[5]);
771 #endif
773 /* to next opcode token */
774 pToken += curOpcode->num_params;
776 #if 0
777 TRACE("End of current instruction:\n");
778 TRACE_VSVECTOR(output->oPos);
779 TRACE_VSVECTOR(output->oD[0]);
780 TRACE_VSVECTOR(output->oD[1]);
781 TRACE_VSVECTOR(output->oT[0]);
782 TRACE_VSVECTOR(output->oT[1]);
783 TRACE_VSVECTOR(R[0]);
784 TRACE_VSVECTOR(R[1]);
785 TRACE_VSVECTOR(R[2]);
786 TRACE_VSVECTOR(R[3]);
787 TRACE_VSVECTOR(R[4]);
788 TRACE_VSVECTOR(R[5]);
789 #endif
791 #if 0 /* Must not be 1 in cvs */
792 TRACE("Output:\n");
793 TRACE_VSVECTOR(output->oPos);
794 TRACE_VSVECTOR(output->oD[0]);
795 TRACE_VSVECTOR(output->oD[1]);
796 TRACE_VSVECTOR(output->oT[0]);
797 TRACE_VSVECTOR(output->oT[1]);
798 #endif
799 return D3D_OK;
802 HRESULT WINAPI IDirect3DVertexShaderImpl_GetFunction(IDirect3DVertexShaderImpl* This, VOID* pData, UINT* pSizeOfData) {
803 if (NULL == pData) {
804 *pSizeOfData = This->functionLength;
805 return D3D_OK;
807 if (*pSizeOfData < This->functionLength) {
808 *pSizeOfData = This->functionLength;
809 return D3DERR_MOREDATA;
811 if (NULL == This->function) { /* no function defined */
812 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
813 (*(DWORD **) pData) = NULL;
814 } else {
815 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
816 memcpy(pData, This->function, This->functionLength);
818 return D3D_OK;
821 HRESULT WINAPI IDirect3DVertexShaderImpl_SetConstantF(IDirect3DVertexShaderImpl* This, UINT StartRegister, CONST FLOAT* pConstantData, UINT Vector4fCount) {
822 if (StartRegister + Vector4fCount > D3D8_VSHADER_MAX_CONSTANTS) {
823 return D3DERR_INVALIDCALL;
825 if (NULL == This->data) { /* temporary while datas not supported */
826 FIXME("(%p) : VertexShader_SetConstant not fully supported yet\n", This);
827 return D3DERR_INVALIDCALL;
829 memcpy(&This->data->C[StartRegister], pConstantData, Vector4fCount * 4 * sizeof(FLOAT));
830 return D3D_OK;
833 HRESULT WINAPI IDirect3DVertexShaderImpl_GetConstantF(IDirect3DVertexShaderImpl* This, UINT StartRegister, FLOAT* pConstantData, UINT Vector4fCount) {
834 if (StartRegister + Vector4fCount > D3D8_VSHADER_MAX_CONSTANTS) {
835 return D3DERR_INVALIDCALL;
837 if (NULL == This->data) { /* temporary while datas not supported */
838 return D3DERR_INVALIDCALL;
840 memcpy(pConstantData, &This->data->C[StartRegister], Vector4fCount * 4 * sizeof(FLOAT));
841 return D3D_OK;
845 /**********************************************************************************************************************************************
846 **********************************************************************************************************************************************
847 **********************************************************************************************************************************************
848 **********************************************************************************************************************************************
849 **********************************************************************************************************************************************/
851 void pshader_texcoord(D3DSHADERVECTOR* d) {
854 void pshader_texkill(D3DSHADERVECTOR* d) {
857 void pshader_tex(D3DSHADERVECTOR* d) {
860 void pshader_texbem(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
863 void pshader_texbeml(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
866 void pshader_texreg2ar(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
869 void pshader_texreg2gb(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
872 void pshader_texm3x2pad(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
875 void pshader_texm3x2tex(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
878 void pshader_texm3x3pad(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
881 void pshader_texm3x3tex(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
884 void pshader_texm3x3diff(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
887 void pshader_texm3x3spec(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
890 void pshader_texm3x3vspec(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
893 void pshader_cnd(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1, D3DSHADERVECTOR* s2) {
896 void pshader_def(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1, D3DSHADERVECTOR* s2, D3DSHADERVECTOR* s3) {
899 void pshader_texreg2rgb(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
902 void pshader_texdp3tex(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
905 void pshader_texm3x2depth(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
908 void pshader_texdp3(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
911 void pshader_texm3x3(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
914 void pshader_texdepth(D3DSHADERVECTOR* d) {
917 void pshader_cmp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1, D3DSHADERVECTOR* s2) {
920 void pshader_bem(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
923 static CONST SHADER_OPCODE pshader_ins [] = {
924 {D3DSIO_NOP, "nop", 0, vshader_nop, 0, 0},
925 {D3DSIO_MOV, "mov", 2, vshader_mov, 0, 0},
926 {D3DSIO_ADD, "add", 3, vshader_add, 0, 0},
927 {D3DSIO_SUB, "sub", 3, vshader_sub, 0, 0},
928 {D3DSIO_MAD, "mad", 4, vshader_mad, 0, 0},
929 {D3DSIO_MUL, "mul", 3, vshader_mul, 0, 0},
930 {D3DSIO_RCP, "rcp", 2, vshader_rcp, 0, 0},
931 {D3DSIO_RSQ, "rsq", 2, vshader_rsq, 0, 0},
932 {D3DSIO_DP3, "dp3", 3, vshader_dp3, 0, 0},
933 {D3DSIO_DP4, "dp4", 3, vshader_dp4, 0, 0},
934 {D3DSIO_MIN, "min", 3, vshader_min, 0, 0},
935 {D3DSIO_MAX, "max", 3, vshader_max, 0, 0},
936 {D3DSIO_SLT, "slt", 3, vshader_slt, 0, 0},
937 {D3DSIO_SGE, "sge", 3, vshader_sge, 0, 0},
938 {D3DSIO_EXP, "exp", 2, vshader_exp, 0, 0},
939 {D3DSIO_LOG, "log", 2, vshader_log, 0, 0},
940 {D3DSIO_LIT, "lit", 2, vshader_lit, 0, 0},
941 {D3DSIO_DST, "dst", 3, vshader_dst, 0, 0},
942 {D3DSIO_LRP, "lrp", 5, vshader_lrp, 0, 0},
943 {D3DSIO_FRC, "frc", 2, vshader_frc, 0, 0},
944 {D3DSIO_M4x4, "m4x4", 3, vshader_m4x4, 0, 0},
945 {D3DSIO_M4x3, "m4x3", 3, vshader_m4x3, 0, 0},
946 {D3DSIO_M3x4, "m3x4", 3, vshader_m3x4, 0, 0},
947 {D3DSIO_M3x3, "m3x3", 3, vshader_m3x3, 0, 0},
948 {D3DSIO_M3x2, "m3x2", 3, vshader_m3x2, 0, 0},
950 {D3DSIO_TEXCOORD, "texcoord", 1, pshader_texcoord, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
951 {D3DSIO_TEXKILL, "texkill", 1, pshader_texkill, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
952 {D3DSIO_TEX, "tex", 1, pshader_tex, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
953 {D3DSIO_TEXBEM, "texbem", 2, pshader_texbem, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
954 {D3DSIO_TEXBEML, "texbeml", 2, pshader_texbeml, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
955 {D3DSIO_TEXREG2AR, "texreg2ar", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
956 {D3DSIO_TEXREG2GB, "texreg2gb", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
957 {D3DSIO_TEXM3x2PAD, "texm3x2pad", 2, pshader_texm3x2pad, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
958 {D3DSIO_TEXM3x2TEX, "texm3x2tex", 2, pshader_texm3x2tex, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
959 {D3DSIO_TEXM3x3PAD, "texm3x3pad", 2, pshader_texm3x3pad, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
960 {D3DSIO_TEXM3x3TEX, "texm3x3tex", 2, pshader_texm3x3tex, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
961 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
962 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", 3, pshader_texm3x3spec, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
963 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspec", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
965 {D3DSIO_EXPP, "expp", 2, vshader_expp, 0, 0},
966 {D3DSIO_LOGP, "logp", 2, vshader_logp, 0, 0},
968 {D3DSIO_CND, "cnd", 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
969 {D3DSIO_DEF, "def", 5, pshader_def, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
970 {D3DSIO_TEXREG2RGB, "texbreg2rgb", 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
972 {D3DSIO_TEXDP3TEX, "texdp3tex", 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
973 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", 2, pshader_texm3x2depth, D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
974 {D3DSIO_TEXDP3, "texdp3", 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
975 {D3DSIO_TEXM3x3, "texm3x3", 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
976 {D3DSIO_TEXDEPTH, "texdepth", 1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
977 {D3DSIO_CMP, "cmp", 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
978 {D3DSIO_BEM, "bem", 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
980 {0, NULL, 0, NULL}
983 inline static const SHADER_OPCODE* pshader_program_get_opcode(const DWORD code) {
984 DWORD i = 0;
985 /** TODO: use dichotomic search */
986 while (NULL != pshader_ins[i].name) {
987 if ((code & D3DSI_OPCODE_MASK) == pshader_ins[i].opcode) {
988 return &pshader_ins[i];
990 ++i;
992 return NULL;
995 inline static void pshader_program_dump_opcode(const SHADER_OPCODE* curOpcode, const DWORD code, const DWORD output) {
996 if (0 != (code & ~D3DSI_OPCODE_MASK)) {
997 DWORD mask = (code & ~D3DSI_OPCODE_MASK);
998 switch (mask) {
999 case 0x40000000: TRACE("+"); break;
1000 default:
1001 TRACE(" unhandled modifier(0x%08lx) ", mask);
1004 TRACE("%s", curOpcode->name);
1006 * normally this is a destination reg modifier
1007 * but in pixel shaders asm code its specified as:
1008 * dp3_x4 t1.rgba, r1, c1
1009 * or
1010 * dp3_x2_sat r0, t0_bx2, v0_bx2
1011 * so for better debbuging i use the same norm
1013 if (0 != (output & D3DSP_DSTSHIFT_MASK)) {
1014 DWORD shift = (output & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1015 if (shift > 0) {
1016 TRACE("_x%u", 1 << shift);
1020 * TODO: fix the divide shifts: d2, d4, d8
1021 * so i have to find a sample
1023 if (0 != (output & D3DSP_DSTMOD_MASK)) {
1024 DWORD mask = output & D3DSP_DSTMOD_MASK;
1025 switch (mask) {
1026 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1027 default:
1028 TRACE("_unhandled_modifier(0x%08lx)", mask);
1031 TRACE(" ");
1034 inline static void pshader_program_dump_param(const DWORD param, int input) {
1035 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1036 static const char swizzle_reg_chars[] = "rgba";
1038 DWORD reg = param & 0x00001FFF;
1039 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
1041 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) {
1042 TRACE("-");
1045 switch (regtype << D3DSP_REGTYPE_SHIFT) {
1046 case D3DSPR_TEMP:
1047 TRACE("R[%lu]", reg);
1048 break;
1049 case D3DSPR_INPUT:
1050 TRACE("V[%lu]", reg);
1051 break;
1052 case D3DSPR_CONST:
1053 TRACE("C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1054 break;
1055 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1056 TRACE("t[%lu]", reg);
1057 break;
1058 case D3DSPR_RASTOUT:
1059 TRACE("%s", rastout_reg_names[reg]);
1060 break;
1061 case D3DSPR_ATTROUT:
1062 TRACE("oD[%lu]", reg);
1063 break;
1064 case D3DSPR_TEXCRDOUT:
1065 TRACE("oT[%lu]", reg);
1066 break;
1067 default:
1068 break;
1071 if (!input) {
1072 /** operand output */
1074 * for better debugging traces it's done into opcode dump code
1075 * @see pshader_program_dump_opcode
1076 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1077 DWORD mask = param & D3DSP_DSTMOD_MASK;
1078 switch (mask) {
1079 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1080 default:
1081 TRACE("_unhandled_modifier(0x%08lx)", mask);
1084 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1085 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1086 if (shift > 0) {
1087 TRACE("_x%u", 1 << shift);
1091 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1092 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1093 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1094 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1095 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1097 } else {
1098 /** operand input */
1099 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1100 DWORD swizzle_x = swizzle & 0x03;
1101 DWORD swizzle_y = (swizzle >> 2) & 0x03;
1102 DWORD swizzle_z = (swizzle >> 4) & 0x03;
1103 DWORD swizzle_w = (swizzle >> 6) & 0x03;
1105 * swizzle bits fields:
1106 * WWZZYYXX
1108 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1109 if (swizzle_x == swizzle_y &&
1110 swizzle_x == swizzle_z &&
1111 swizzle_x == swizzle_w) {
1112 TRACE(".%c", swizzle_reg_chars[swizzle_x]);
1113 } else {
1114 TRACE(".%c%c%c%c",
1115 swizzle_reg_chars[swizzle_x],
1116 swizzle_reg_chars[swizzle_y],
1117 swizzle_reg_chars[swizzle_z],
1118 swizzle_reg_chars[swizzle_w]);
1121 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1122 DWORD mask = param & D3DSP_SRCMOD_MASK;
1123 /*TRACE("_modifier(0x%08lx) ", mask);*/
1124 switch (mask) {
1125 case D3DSPSM_NONE: break;
1126 case D3DSPSM_NEG: break;
1127 case D3DSPSM_BIAS: TRACE("_bias"); break;
1128 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1129 case D3DSPSM_SIGN: TRACE("_sign"); break;
1130 case D3DSPSM_SIGNNEG: TRACE("_sign"); break;
1131 case D3DSPSM_COMP: TRACE("_comp"); break;
1132 case D3DSPSM_X2: TRACE("_x2"); break;
1133 case D3DSPSM_X2NEG: TRACE("_bx2"); break;
1134 case D3DSPSM_DZ: TRACE("_dz"); break;
1135 case D3DSPSM_DW: TRACE("_dw"); break;
1136 default:
1137 TRACE("_unknown(0x%08lx)", mask);
1143 inline static BOOL pshader_is_version_token(DWORD token) {
1144 return 0xFFFF0000 == (token & 0xFFFF0000);
1147 inline static BOOL pshader_is_comment_token(DWORD token) {
1148 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
1154 * Pixel Shaders
1156 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/PixelShader1_X/modifiers/sourceregistermodifiers.asp
1157 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/PixelShader2_0/Registers/Registers.asp
1158 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/d3d/interfaces/IDirect3DPixelShader9/_IDirect3DPixelShader9.asp
1161 inline static VOID IDirect3DPixelShaderImpl_ParseProgram(IDirect3DPixelShaderImpl* pshader, CONST DWORD* pFunction) {
1162 const DWORD* pToken = pFunction;
1163 const SHADER_OPCODE* curOpcode = NULL;
1164 DWORD code;
1165 DWORD len = 0;
1166 DWORD i;
1168 if (NULL != pToken) {
1169 while (D3DPS_END() != *pToken) {
1170 if (pshader_is_version_token(*pToken)) { /** version */
1171 TRACE("ps.%lu.%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1172 ++pToken;
1173 ++len;
1174 continue;
1176 if (pshader_is_comment_token(*pToken)) { /** comment */
1177 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1178 ++pToken;
1179 /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
1180 pToken += comment_len;
1181 len += comment_len + 1;
1182 continue;
1184 code = *pToken;
1185 curOpcode = pshader_program_get_opcode(code);
1186 ++pToken;
1187 ++len;
1188 if (NULL == curOpcode) {
1189 /* unkown current opcode ... */
1190 while (*pToken & 0x80000000) {
1191 TRACE("unrecognized opcode: %08lx\n", *pToken);
1192 ++pToken;
1193 ++len;
1195 } else {
1196 TRACE(" ");
1197 pshader_program_dump_opcode(curOpcode, code, *pToken);
1198 if (curOpcode->num_params > 0) {
1199 pshader_program_dump_param(*pToken, 0);
1200 ++pToken;
1201 ++len;
1202 for (i = 1; i < curOpcode->num_params; ++i) {
1203 TRACE(", ");
1204 if (D3DSIO_DEF != code) {
1205 pshader_program_dump_param(*pToken, 1);
1206 } else {
1207 TRACE("%f", *((float*) pToken));
1209 ++pToken;
1210 ++len;
1213 TRACE("\n");
1215 pshader->functionLength = (len + 1) * sizeof(DWORD);
1217 } else {
1218 pshader->functionLength = 1; /* no Function defined use fixed function vertex processing */
1220 if (NULL != pFunction) {
1221 pshader->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, pshader->functionLength);
1222 memcpy(pshader->function, pFunction, pshader->functionLength);
1223 } else {
1224 pshader->function = NULL;
1228 HRESULT WINAPI IDirect3DDeviceImpl_CreatePixelShader(IDirect3DDevice8Impl* This, CONST DWORD* pFunction, IDirect3DPixelShaderImpl** ppPixelShader) {
1229 IDirect3DPixelShaderImpl* object;
1231 object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(IDirect3DPixelShaderImpl));
1232 if (NULL == object) {
1233 *ppPixelShader = NULL;
1234 return D3DERR_OUTOFVIDEOMEMORY;
1236 /*object->lpVtbl = &Direct3DPixelShader9_Vtbl;*/
1237 object->device = This;
1238 object->ref = 1;
1240 object->data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(PSHADERDATA8));
1242 IDirect3DPixelShaderImpl_ParseProgram(object, pFunction);
1244 *ppPixelShader = object;
1245 return D3D_OK;
1248 HRESULT WINAPI IDirect3DPixelShaderImpl_GetFunction(IDirect3DPixelShaderImpl* This, VOID* pData, UINT* pSizeOfData) {
1249 if (NULL == pData) {
1250 *pSizeOfData = This->functionLength;
1251 return D3D_OK;
1253 if (*pSizeOfData < This->functionLength) {
1254 *pSizeOfData = This->functionLength;
1255 return D3DERR_MOREDATA;
1257 if (NULL == This->function) { /* no function defined */
1258 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
1259 (*(DWORD **) pData) = NULL;
1260 } else {
1261 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
1262 memcpy(pData, This->function, This->functionLength);
1264 return D3D_OK;
1267 HRESULT WINAPI IDirect3DPixelShaderImpl_SetConstantF(IDirect3DPixelShaderImpl* This, UINT StartRegister, CONST FLOAT* pConstantData, UINT Vector4fCount) {
1268 if (StartRegister + Vector4fCount > D3D8_VSHADER_MAX_CONSTANTS) {
1269 return D3DERR_INVALIDCALL;
1271 if (NULL == This->data) { /* temporary while datas not supported */
1272 FIXME("(%p) : VertexShader_SetConstant not fully supported yet\n", This);
1273 return D3DERR_INVALIDCALL;
1275 memcpy(&This->data->C[StartRegister], pConstantData, Vector4fCount * 4 * sizeof(FLOAT));
1276 return D3D_OK;
1279 HRESULT WINAPI IDirect3DPixelShaderImpl_GetConstantF(IDirect3DPixelShaderImpl* This, UINT StartRegister, FLOAT* pConstantData, UINT Vector4fCount) {
1280 if (StartRegister + Vector4fCount > D3D8_VSHADER_MAX_CONSTANTS) {
1281 return D3DERR_INVALIDCALL;
1283 if (NULL == This->data) { /* temporary while datas not supported */
1284 return D3DERR_INVALIDCALL;
1286 memcpy(pConstantData, &This->data->C[StartRegister], Vector4fCount * 4 * sizeof(FLOAT));
1287 return D3D_OK;
1291 /**********************************************************************************************************************************************
1292 **********************************************************************************************************************************************
1293 **********************************************************************************************************************************************
1294 **********************************************************************************************************************************************
1295 **********************************************************************************************************************************************/
1297 /***********************************************************************
1298 * ValidateVertexShader (D3D8.@)
1300 BOOL WINAPI ValidateVertexShader(LPVOID what, LPVOID toto) {
1301 FIXME("(void): stub: %p %p\n", what, toto);
1302 return TRUE;
1305 /***********************************************************************
1306 * ValidatePixelShader (D3D8.@)
1308 BOOL WINAPI ValidatePixelShader(LPVOID what, LPVOID toto) {
1309 FIXME("(void): stub: %p %p\n", what, toto);
1310 return TRUE;