winsock: Fix two Windows to Unix mapping issues.
[wine/hacks.git] / dlls / d3d8 / shader.c
blobf52a0ed16f18c26cc4ab829541d3868501709667
1 /*
2 * shaders implementation
4 * Copyright 2002-2004 Raphael Junqueira
5 * Copyright 2004 Jason Edmeades
6 * Copyright 2004 Christian Costa
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "config.h"
25 #include <math.h>
26 #include <stdarg.h>
27 #include <stdio.h>
29 #include "windef.h"
30 #include "winbase.h"
31 #include "winuser.h"
32 #include "wingdi.h"
33 #include "wine/debug.h"
35 #include "d3d8_private.h"
37 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
38 WINE_DECLARE_DEBUG_CHANNEL(d3d_hw_shader);
40 /* Shader debugging - Change the following line to enable debugging of software
41 vertex shaders */
42 #if 0 /* Must not be 1 in cvs version */
43 # define VSTRACE(A) TRACE A
44 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
45 #else
46 # define VSTRACE(A)
47 # define TRACE_VSVECTOR(name)
48 #endif
50 /**
51 * DirectX9 SDK download
52 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
54 * Exploring D3DX
55 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
57 * Using Vertex Shaders
58 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
60 * Dx9 New
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
63 * Dx9 Shaders
64 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
65 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
66 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
67 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
69 * Dx9 D3DX
70 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
72 * FVF
73 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
75 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
76 * http://developer.nvidia.com/view.asp?IO=vstovp
78 * NVIDIA: Memory Management with VAR
79 * http://developer.nvidia.com/view.asp?IO=var_memory_management
82 typedef void (*shader_fct_t)();
84 typedef struct SHADER_OPCODE {
85 CONST WORD opcode;
86 const char* name;
87 CONST UINT num_params;
88 shader_fct_t soft_fct;
89 DWORD min_version;
90 DWORD max_version;
91 } SHADER_OPCODE;
93 /*******************************
94 * vshader functions software VM
97 void vshader_add(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
98 d->x = s0->x + s1->x;
99 d->y = s0->y + s1->y;
100 d->z = s0->z + s1->z;
101 d->w = s0->w + s1->w;
102 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
103 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
106 void vshader_dp3(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
107 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
108 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
109 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
112 void vshader_dp4(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
113 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
114 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
115 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
118 void vshader_dst(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
119 d->x = 1.0f;
120 d->y = s0->y * s1->y;
121 d->z = s0->z;
122 d->w = s1->w;
123 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
124 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
127 void vshader_expp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
128 union {
129 float f;
130 DWORD d;
131 } tmp;
133 tmp.f = floorf(s0->w);
134 d->x = powf(2.0f, tmp.f);
135 d->y = s0->w - tmp.f;
137 tmp.f = powf(2.0f, s0->w);
138 tmp.d &= 0xFFFFFF00U;
139 d->z = tmp.f;
140 d->w = 1.0f;
141 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
142 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
145 void vshader_lit(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
146 d->x = 1.0f;
147 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
148 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
149 d->w = 1.0f;
150 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
151 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
154 void vshader_logp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
155 float tmp_f = fabsf(s0->w);
156 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
157 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
158 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
161 void vshader_mad(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1, D3DSHADERVECTOR* s2) {
162 d->x = s0->x * s1->x + s2->x;
163 d->y = s0->y * s1->y + s2->y;
164 d->z = s0->z * s1->z + s2->z;
165 d->w = s0->w * s1->w + s2->w;
166 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
167 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
170 void vshader_max(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
171 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
172 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
173 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
174 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
175 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
176 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
179 void vshader_min(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
180 d->x = (s0->x < s1->x) ? s0->x : s1->x;
181 d->y = (s0->y < s1->y) ? s0->y : s1->y;
182 d->z = (s0->z < s1->z) ? s0->z : s1->z;
183 d->w = (s0->w < s1->w) ? s0->w : s1->w;
184 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
185 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
188 void vshader_mov(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
189 d->x = s0->x;
190 d->y = s0->y;
191 d->z = s0->z;
192 d->w = s0->w;
193 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
194 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
197 void vshader_mul(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
198 d->x = s0->x * s1->x;
199 d->y = s0->y * s1->y;
200 d->z = s0->z * s1->z;
201 d->w = s0->w * s1->w;
202 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
203 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
206 void vshader_nop(void) {
207 /* NOPPPP ahhh too easy ;) */
210 void vshader_rcp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
211 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
212 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
213 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
216 void vshader_rsq(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
217 float tmp_f = fabsf(s0->w);
218 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
219 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
220 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
223 void vshader_sge(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
224 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
225 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
226 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
227 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
228 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
229 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
232 void vshader_slt(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
233 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
234 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
235 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
236 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
237 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
238 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
241 void vshader_sub(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
242 d->x = s0->x - s1->x;
243 d->y = s0->y - s1->y;
244 d->z = s0->z - s1->z;
245 d->w = s0->w - s1->w;
246 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
247 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
251 * Version 1.1 specific
254 void vshader_exp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
255 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
256 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
257 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
260 void vshader_log(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
261 float tmp_f = fabsf(s0->w);
262 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
263 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
264 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
267 void vshader_frc(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
268 d->x = s0->x - floorf(s0->x);
269 d->y = s0->y - floorf(s0->y);
270 d->z = 0.0f;
271 d->w = 1.0f;
272 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
273 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
276 typedef FLOAT D3DMATRIX44[4][4];
277 typedef FLOAT D3DMATRIX43[3][4];
278 typedef FLOAT D3DMATRIX34[4][3];
279 typedef FLOAT D3DMATRIX33[3][3];
280 typedef FLOAT D3DMATRIX32[2][3];
282 void vshader_m4x4(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, /*D3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
284 * BuGGY CODE: here only if cast not work for copy/paste
285 D3DSHADERVECTOR* mat2 = mat1 + 1;
286 D3DSHADERVECTOR* mat3 = mat1 + 2;
287 D3DSHADERVECTOR* mat4 = mat1 + 3;
288 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
289 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
290 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
291 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
293 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
294 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
295 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
296 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
297 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f)\n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
298 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f)\n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
299 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f)\n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
300 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f)\n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
303 void vshader_m4x3(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
304 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
305 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
306 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
307 d->w = 1.0f;
308 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f)\n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
309 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f)\n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
310 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f)\n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
311 VSTRACE(("executing m4x3(4): (%f) (%f)\n", s0->w, d->w));
314 void vshader_m3x4(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
315 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
316 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
317 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
318 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
319 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
320 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
321 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
322 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
325 void vshader_m3x3(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
326 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
327 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
328 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
329 d->w = 1.0f;
330 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
331 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
332 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
333 VSTRACE(("executing m3x3(4): (%f) \n", d->w));
336 void vshader_m3x2(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DMATRIX32 mat) {
337 FIXME("check\n");
338 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
339 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
340 d->z = 0.0f;
341 d->w = 1.0f;
345 * Version 2.0 specific
347 void vshader_lrp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1, D3DSHADERVECTOR* s2, D3DSHADERVECTOR* s3) {
348 d->x = s0->x * (s1->x - s2->x) + s2->x;
349 d->y = s0->y * (s1->y - s2->y) + s2->y;
350 d->z = s0->z * (s1->z - s2->z) + s2->z;
351 d->w = s0->w * (s1->w - s2->w) + s2->x;
355 * log, exp, frc, m*x* seems to be macros ins ... to see
356 * Note opcode must be in uppercase if direct mapping to GL hw shaders
358 static CONST SHADER_OPCODE vshader_ins [] = {
359 {D3DSIO_NOP, "NOP", 0, vshader_nop, 0, 0},
360 {D3DSIO_MOV, "MOV", 2, vshader_mov, 0, 0},
361 {D3DSIO_ADD, "ADD", 3, vshader_add, 0, 0},
362 {D3DSIO_SUB, "SUB", 3, vshader_sub, 0, 0},
363 {D3DSIO_MAD, "MAD", 4, vshader_mad, 0, 0},
364 {D3DSIO_MUL, "MUL", 3, vshader_mul, 0, 0},
365 {D3DSIO_RCP, "RCP", 2, vshader_rcp, 0, 0},
366 {D3DSIO_RSQ, "RSQ", 2, vshader_rsq, 0, 0},
367 {D3DSIO_DP3, "DP3", 3, vshader_dp3, 0, 0},
368 {D3DSIO_DP4, "DP4", 3, vshader_dp4, 0, 0},
369 {D3DSIO_MIN, "MIN", 3, vshader_min, 0, 0},
370 {D3DSIO_MAX, "MAX", 3, vshader_max, 0, 0},
371 {D3DSIO_SLT, "SLT", 3, vshader_slt, 0, 0},
372 {D3DSIO_SGE, "SGE", 3, vshader_sge, 0, 0},
373 {D3DSIO_EXP, "EXP", 2, vshader_exp, 0, 0},
374 {D3DSIO_LOG, "LOG", 2, vshader_log, 0, 0},
375 {D3DSIO_LIT, "LIT", 2, vshader_lit, 0, 0},
376 {D3DSIO_DST, "DST", 3, vshader_dst, 0, 0},
377 {D3DSIO_LRP, "LRP", 5, vshader_lrp, 0, 0},
378 {D3DSIO_FRC, "FRC", 2, vshader_frc, 0, 0},
379 {D3DSIO_M4x4, "M4X4", 3, vshader_m4x4, 0, 0},
380 {D3DSIO_M4x3, "M4X3", 3, vshader_m4x3, 0, 0},
381 {D3DSIO_M3x4, "M3X4", 3, vshader_m3x4, 0, 0},
382 {D3DSIO_M3x3, "M3X3", 3, vshader_m3x3, 0, 0},
383 {D3DSIO_M3x2, "M3X2", 3, vshader_m3x2, 0, 0},
384 /** FIXME: use direct access so add the others opcodes as stubs */
385 {D3DSIO_EXPP, "EXPP", 2, vshader_expp, 0, 0},
386 {D3DSIO_LOGP, "LOGP", 2, vshader_logp, 0, 0},
388 {0, NULL, 0, NULL, 0, 0}
392 inline static const SHADER_OPCODE* vshader_program_get_opcode(const DWORD code) {
393 DWORD i = 0;
394 /** TODO: use dichotomic search */
395 while (NULL != vshader_ins[i].name) {
396 if ((code & D3DSI_OPCODE_MASK) == vshader_ins[i].opcode) {
397 return &vshader_ins[i];
399 ++i;
401 return NULL;
404 inline static BOOL vshader_is_version_token(DWORD token) {
405 return 0xFFFE0000 == (token & 0xFFFE0000);
408 inline static BOOL vshader_is_comment_token(DWORD token) {
409 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
412 inline static void vshader_program_dump_param(const DWORD param, int input) {
413 static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" };
414 static const char swizzle_reg_chars[] = "xyzw";
416 DWORD reg = param & 0x00001FFF;
417 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
419 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) TRACE("-");
421 switch (regtype << D3DSP_REGTYPE_SHIFT) {
422 case D3DSPR_TEMP:
423 TRACE("R[%lu]", reg);
424 break;
425 case D3DSPR_INPUT:
426 TRACE("V[%lu]", reg);
427 break;
428 case D3DSPR_CONST:
429 TRACE("C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
430 break;
431 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
432 TRACE("a[%lu]", reg);
433 break;
434 case D3DSPR_RASTOUT:
435 TRACE("%s", rastout_reg_names[reg]);
436 break;
437 case D3DSPR_ATTROUT:
438 TRACE("oD[%lu]", reg);
439 break;
440 case D3DSPR_TEXCRDOUT:
441 TRACE("oT[%lu]", reg);
442 break;
443 default:
444 break;
447 if (!input) {
448 /** operand output */
449 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
450 if (param & D3DSP_WRITEMASK_0) TRACE(".x");
451 if (param & D3DSP_WRITEMASK_1) TRACE(".y");
452 if (param & D3DSP_WRITEMASK_2) TRACE(".z");
453 if (param & D3DSP_WRITEMASK_3) TRACE(".w");
455 } else {
456 /** operand input */
457 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
458 DWORD swizzle_x = swizzle & 0x03;
459 DWORD swizzle_y = (swizzle >> 2) & 0x03;
460 DWORD swizzle_z = (swizzle >> 4) & 0x03;
461 DWORD swizzle_w = (swizzle >> 6) & 0x03;
463 * swizzle bits fields:
464 * WWZZYYXX
466 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
467 if (swizzle_x == swizzle_y &&
468 swizzle_x == swizzle_z &&
469 swizzle_x == swizzle_w) {
470 TRACE(".%c", swizzle_reg_chars[swizzle_x]);
471 } else {
472 TRACE(".%c%c%c%c",
473 swizzle_reg_chars[swizzle_x],
474 swizzle_reg_chars[swizzle_y],
475 swizzle_reg_chars[swizzle_z],
476 swizzle_reg_chars[swizzle_w]);
482 inline static void vshader_program_add_param(const DWORD param, int input, char *hwLine) {
483 /*static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" }; */
484 static const char* hwrastout_reg_names[] = { "result.position", "result.fogcoord", "result.pointsize" };
485 static const char swizzle_reg_chars[] = "xyzw";
487 DWORD reg = param & 0x00001FFF;
488 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
489 char tmpReg[255];
491 if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) {
492 strcat(hwLine, " -");
493 } else {
494 strcat(hwLine, " ");
497 switch (regtype << D3DSP_REGTYPE_SHIFT) {
498 case D3DSPR_TEMP:
499 sprintf(tmpReg, "T%lu", reg);
500 strcat(hwLine, tmpReg);
501 break;
502 case D3DSPR_INPUT:
503 sprintf(tmpReg, "vertex.attrib[%lu]", reg);
504 strcat(hwLine, tmpReg);
505 break;
506 case D3DSPR_CONST:
507 sprintf(tmpReg, "C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "A0.x + " : "", reg);
508 strcat(hwLine, tmpReg);
509 break;
510 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
511 sprintf(tmpReg, "A%lu", reg);
512 strcat(hwLine, tmpReg);
513 break;
514 case D3DSPR_RASTOUT:
515 sprintf(tmpReg, "%s", hwrastout_reg_names[reg]);
516 strcat(hwLine, tmpReg);
517 break;
518 case D3DSPR_ATTROUT:
519 if (reg==0) {
520 strcat(hwLine, "result.color.primary");
521 } else {
522 strcat(hwLine, "result.color.secondary");
524 break;
525 case D3DSPR_TEXCRDOUT:
526 sprintf(tmpReg, "result.texcoord[%lu]", reg);
527 strcat(hwLine, tmpReg);
528 break;
529 default:
530 break;
533 if (!input) {
534 /** operand output */
535 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
536 strcat(hwLine, ".");
537 if (param & D3DSP_WRITEMASK_0) {
538 strcat(hwLine, "x");
540 if (param & D3DSP_WRITEMASK_1) {
541 strcat(hwLine, "y");
543 if (param & D3DSP_WRITEMASK_2) {
544 strcat(hwLine, "z");
546 if (param & D3DSP_WRITEMASK_3) {
547 strcat(hwLine, "w");
550 } else {
551 /** operand input */
552 DWORD swizzle = (param & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
553 DWORD swizzle_x = swizzle & 0x03;
554 DWORD swizzle_y = (swizzle >> 2) & 0x03;
555 DWORD swizzle_z = (swizzle >> 4) & 0x03;
556 DWORD swizzle_w = (swizzle >> 6) & 0x03;
558 * swizzle bits fields:
559 * WWZZYYXX
561 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
562 if (swizzle_x == swizzle_y &&
563 swizzle_x == swizzle_z &&
564 swizzle_x == swizzle_w) {
565 sprintf(tmpReg, ".%c", swizzle_reg_chars[swizzle_x]);
566 strcat(hwLine, tmpReg);
567 } else {
568 sprintf(tmpReg, ".%c%c%c%c",
569 swizzle_reg_chars[swizzle_x],
570 swizzle_reg_chars[swizzle_y],
571 swizzle_reg_chars[swizzle_z],
572 swizzle_reg_chars[swizzle_w]);
573 strcat(hwLine, tmpReg);
579 DWORD MacroExpansion[4*4];
581 int ExpandMxMacro(DWORD macro_opcode, const DWORD* args) {
582 int i;
583 int nComponents = 0;
584 DWORD opcode =0;
585 switch(macro_opcode) {
586 case D3DSIO_M4x4:
587 nComponents = 4;
588 opcode = D3DSIO_DP4;
589 break;
590 case D3DSIO_M4x3:
591 nComponents = 3;
592 opcode = D3DSIO_DP4;
593 break;
594 case D3DSIO_M3x4:
595 nComponents = 4;
596 opcode = D3DSIO_DP3;
597 break;
598 case D3DSIO_M3x3:
599 nComponents = 3;
600 opcode = D3DSIO_DP3;
601 break;
602 case D3DSIO_M3x2:
603 nComponents = 2;
604 opcode = D3DSIO_DP3;
605 break;
606 default:
607 break;
609 for (i = 0; i < nComponents; i++) {
610 MacroExpansion[i*4+0] = opcode;
611 MacroExpansion[i*4+1] = ((*args) & ~D3DSP_WRITEMASK_ALL)|(D3DSP_WRITEMASK_0<<i);
612 MacroExpansion[i*4+2] = *(args+1);
613 MacroExpansion[i*4+3] = (*(args+2))+i;
615 return nComponents;
619 * Function parser ...
621 inline static VOID IDirect3DVertexShaderImpl_GenerateProgramArbHW(IDirect3DVertexShaderImpl* vshader, CONST DWORD* pFunction) {
622 const DWORD* pToken = pFunction;
623 const DWORD* pSavedToken = NULL;
624 const SHADER_OPCODE* curOpcode = NULL;
625 int nRemInstr = -1;
626 DWORD i;
627 unsigned lineNum = 0;
628 char *pgmStr = NULL;
629 char tmpLine[255];
630 DWORD nUseAddressRegister = 0;
631 DWORD nUseTempRegister = 0;
632 DWORD regtype;
633 DWORD reg;
634 IDirect3DDevice8Impl* This = vshader->device;
636 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, 65535); /* 64kb should be enough */
639 * First pass to determine what we need to declare:
640 * - Temporary variables
641 * - Address variables
643 if (NULL != pToken) {
644 while (D3DVS_END() != *pToken) {
645 if (vshader_is_version_token(*pToken)) {
646 /** skip version */
647 ++pToken;
648 continue;
650 if (vshader_is_comment_token(*pToken)) { /** comment */
651 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
652 ++pToken;
653 pToken += comment_len;
654 continue;
656 curOpcode = vshader_program_get_opcode(*pToken);
657 ++pToken;
658 if (NULL == curOpcode) {
659 while (*pToken & 0x80000000) {
660 /* skip unrecognized opcode */
661 ++pToken;
663 } else {
664 if (curOpcode->num_params > 0) {
665 regtype = ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) << D3DSP_REGTYPE_SHIFT);
666 reg = ((*pToken) & 0x00001FFF);
667 /** we should validate GL_MAX_PROGRAM_ADDRESS_REGISTERS_AR limits here */
668 if (D3DSPR_ADDR == regtype && nUseAddressRegister <= reg) nUseAddressRegister = reg + 1;
669 /** we should validate GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB limits here */
670 if (D3DSPR_TEMP == regtype && nUseTempRegister <= reg) nUseTempRegister = reg + 1;
671 ++pToken;
672 for (i = 1; i < curOpcode->num_params; ++i) {
673 regtype = ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) << D3DSP_REGTYPE_SHIFT);
674 reg = ((*pToken) & 0x00001FFF);
675 /** we should validate GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB limits here */
676 if (D3DSPR_TEMP == regtype && nUseTempRegister <= reg) nUseTempRegister = reg + 1;
677 ++pToken;
684 /** second pass, now generate */
685 pToken = pFunction;
687 if (NULL != pToken) {
688 while (1) {
689 tmpLine[0] = 0;
691 if ((nRemInstr >= 0) && (--nRemInstr == -1))
692 /* Macro is finished, continue normal path */
693 pToken = pSavedToken;
695 if (D3DVS_END() == *pToken)
696 break;
698 if (vshader_is_version_token(*pToken)) { /** version */
700 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
701 int version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
702 int numTemps;
703 int numConstants;
705 TRACE_(d3d_hw_shader)("vs.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
707 /* Each release of vertex shaders has had different numbers of temp registers */
708 switch (version) {
709 case 10:
710 case 11: numTemps=12;
711 numConstants=96;
712 strcpy(tmpLine, "!!ARBvp1.0\n");
713 TRACE_(d3d_hw_shader)("GL HW (%u) : %s", strlen(pgmStr), tmpLine); /* Don't add \n to this line as already in tmpLine */
714 break;
715 case 20: numTemps=12;
716 numConstants=256;
717 strcpy(tmpLine, "!!ARBvp2.0\n");
718 FIXME_(d3d_hw_shader)("No work done yet to support vs2.0 in hw\n");
719 TRACE_(d3d_hw_shader)("GL HW (%u) : %s", strlen(pgmStr), tmpLine); /* Don't add \n to this line as already in tmpLine */
720 break;
721 case 30: numTemps=32;
722 numConstants=256;
723 strcpy(tmpLine, "!!ARBvp3.0\n");
724 FIXME_(d3d_hw_shader)("No work done yet to support vs3.0 in hw\n");
725 TRACE_(d3d_hw_shader)("GL HW (%u) : %s", strlen(pgmStr), tmpLine); /* Don't add \n to this line as already in tmpLine */
726 break;
727 default:
728 numTemps=12;
729 numConstants=96;
730 strcpy(tmpLine, "!!ARBvp1.0\n");
731 FIXME_(d3d_hw_shader)("Unrecognized vertex shader version!\n");
733 strcat(pgmStr,tmpLine);
734 ++lineNum;
736 for (i = 0; i < nUseTempRegister/*we should check numTemps here*/; i++) {
737 sprintf(tmpLine, "TEMP T%ld;\n", i);
738 ++lineNum;
739 TRACE_(d3d_hw_shader)("GL HW (%u, %u) : %s", lineNum, strlen(pgmStr), tmpLine); /* Don't add \n to this line as already in tmpLine */
740 strcat(pgmStr,tmpLine);
742 for (i = 0; i < nUseAddressRegister; i++) {
743 sprintf(tmpLine, "ADDRESS A%ld;\n", i);
744 ++lineNum;
745 TRACE_(d3d_hw_shader)("GL HW (%u, %u) : %s", lineNum, strlen(pgmStr), tmpLine); /* Don't add \n to this line as already in tmpLine */
746 strcat(pgmStr,tmpLine);
748 /* Due to the dynamic constants binding mechanism, we need to declare
749 * all the constants for relative addressing. */
750 /* Mesa supports nly 95 constants for VS1.X although we should have at least 96. */
751 if (GL_VENDOR_NAME(This) == VENDOR_MESA || GL_VENDOR_NAME(This) == VENDOR_WINE) {
752 numConstants = 95;
754 sprintf(tmpLine, "PARAM C[%d] = { program.env[0..%d] };\n", numConstants, numConstants-1);
755 TRACE_(d3d_hw_shader)("GL HW (%u,%u) : %s", lineNum, strlen(pgmStr), tmpLine); /* Don't add \n to this line as already in tmpLine */
756 strcat(pgmStr, tmpLine);
757 ++lineNum;
759 ++pToken;
760 continue;
762 if (vshader_is_comment_token(*pToken)) { /** comment */
763 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
764 ++pToken;
765 /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
766 pToken += comment_len;
767 continue;
769 curOpcode = vshader_program_get_opcode(*pToken);
770 ++pToken;
771 if (NULL == curOpcode) {
772 /* unknown current opcode ... */
773 while (*pToken & 0x80000000) {
774 TRACE_(d3d_hw_shader)("unrecognized opcode: %08lx\n", *pToken);
775 ++pToken;
777 } else {
778 /* Build opcode for GL vertex_program */
779 switch (curOpcode->opcode) {
780 case D3DSIO_NOP:
781 continue;
782 case D3DSIO_MOV:
783 /* Address registers must be loaded with the ARL instruction */
784 if (((*pToken) & D3DSP_REGTYPE_MASK) == D3DSPR_ADDR) {
785 if (0 < nUseAddressRegister) {
786 strcpy(tmpLine, "ARL");
787 break;
788 } else
789 FIXME_(d3d_hw_shader)("Try to load an undeclared address register!\n");
791 /* fall through */
792 case D3DSIO_ADD:
793 case D3DSIO_SUB:
794 case D3DSIO_MAD:
795 case D3DSIO_MUL:
796 case D3DSIO_RCP:
797 case D3DSIO_RSQ:
798 case D3DSIO_DP3:
799 case D3DSIO_DP4:
800 case D3DSIO_MIN:
801 case D3DSIO_MAX:
802 case D3DSIO_SLT:
803 case D3DSIO_SGE:
804 case D3DSIO_LIT:
805 case D3DSIO_DST:
806 case D3DSIO_FRC:
807 strcpy(tmpLine, curOpcode->name);
808 break;
810 case D3DSIO_EXPP:
811 strcpy(tmpLine, "EXP");
812 break;
813 case D3DSIO_LOGP:
814 strcpy(tmpLine, "LOG");
815 break;
816 case D3DSIO_EXP:
817 strcpy(tmpLine, "EX2");
818 break;
819 case D3DSIO_LOG:
820 strcpy(tmpLine, "LG2");
821 break;
823 case D3DSIO_M4x4:
824 case D3DSIO_M4x3:
825 case D3DSIO_M3x4:
826 case D3DSIO_M3x3:
827 case D3DSIO_M3x2:
828 /* Expand the macro and get number of generated instruction */
829 nRemInstr = ExpandMxMacro(curOpcode->opcode, pToken);
830 /* Save point to next instruction */
831 pSavedToken = pToken + 3;
832 /* Execute expanded macro */
833 pToken = MacroExpansion;
834 continue;
836 default:
837 FIXME_(d3d_hw_shader)("Can't handle opcode %s in hwShader\n", curOpcode->name);
840 if (curOpcode->num_params > 0) {
841 vshader_program_add_param(*pToken, 0, tmpLine);
843 ++pToken;
844 for (i = 1; i < curOpcode->num_params; ++i) {
845 strcat(tmpLine, ",");
846 vshader_program_add_param(*pToken, 1, tmpLine);
847 ++pToken;
850 strcat(tmpLine,";\n");
851 ++lineNum;
852 TRACE_(d3d_hw_shader)("GL HW (%u, %u) : %s", lineNum, strlen(pgmStr), tmpLine); /* Don't add \n to this line as already in tmpLine */
853 strcat(pgmStr, tmpLine);
856 strcpy(tmpLine, "END\n");
857 ++lineNum;
858 TRACE_(d3d_hw_shader)("GL HW (%u, %u) : %s", lineNum, strlen(pgmStr), tmpLine); /* Don't add \n to this line as already in tmpLine */
859 strcat(pgmStr, tmpLine);
862 /* Create the hw shader */
863 GL_EXTCALL(glGenProgramsARB(1, &vshader->prgId));
864 TRACE_(d3d_hw_shader)("Creating a hw vertex shader, prg=%d\n", vshader->prgId);
866 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vshader->prgId));
868 /* Create the program and check for errors */
869 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
870 if (glGetError() == GL_INVALID_OPERATION) {
871 GLint errPos;
872 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
873 FIXME_(d3d_hw_shader)("HW VertexShader Error at position: %d\n%s\n", errPos, glGetString(GL_PROGRAM_ERROR_STRING_ARB));
874 vshader->prgId = -1;
877 HeapFree(GetProcessHeap(), 0, pgmStr);
880 inline static VOID IDirect3DVertexShaderImpl_ParseProgram(IDirect3DVertexShaderImpl* vshader, CONST DWORD* pFunction, int useHW) {
881 const DWORD* pToken = pFunction;
882 const SHADER_OPCODE* curOpcode = NULL;
883 DWORD len = 0;
884 DWORD i;
886 if (NULL != pToken) {
887 while (D3DVS_END() != *pToken) {
888 if (vshader_is_version_token(*pToken)) { /** version */
889 TRACE("vs.%lu.%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
890 ++pToken;
891 ++len;
892 continue;
894 if (vshader_is_comment_token(*pToken)) { /** comment */
895 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
896 ++pToken;
897 /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
898 pToken += comment_len;
899 len += comment_len + 1;
900 continue;
902 curOpcode = vshader_program_get_opcode(*pToken);
903 ++pToken;
904 ++len;
905 if (NULL == curOpcode) {
906 /* unknown current opcode ... */
907 while (*pToken & 0x80000000) {
908 TRACE("unrecognized opcode: %08lx\n", *pToken);
909 ++pToken;
910 ++len;
912 } else {
913 TRACE("%s ", curOpcode->name);
914 if (curOpcode->num_params > 0) {
915 vshader_program_dump_param(*pToken, 0);
916 ++pToken;
917 ++len;
918 for (i = 1; i < curOpcode->num_params; ++i) {
919 TRACE(", ");
920 vshader_program_dump_param(*pToken, 1);
921 ++pToken;
922 ++len;
925 TRACE("\n");
928 vshader->functionLength = (len + 1) * sizeof(DWORD);
929 } else {
930 vshader->functionLength = 1; /* no Function defined use fixed function vertex processing */
933 /* Generate HW shader in needed */
934 if (useHW && NULL != pFunction) {
935 IDirect3DVertexShaderImpl_GenerateProgramArbHW(vshader, pFunction);
938 /* copy the function ... because it will certainly be released by application */
939 if (NULL != pFunction) {
940 vshader->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, vshader->functionLength);
941 memcpy(vshader->function, pFunction, vshader->functionLength);
942 } else {
943 vshader->function = NULL;
947 HRESULT WINAPI IDirect3DDeviceImpl_CreateVertexShader(IDirect3DDevice8Impl* This, CONST DWORD* pFunction, DWORD Usage, IDirect3DVertexShaderImpl** ppVertexShader) {
948 IDirect3DVertexShaderImpl* object;
949 int useHW;
951 object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(IDirect3DVertexShaderImpl));
952 if (NULL == object) {
953 *ppVertexShader = NULL;
954 return D3DERR_OUTOFVIDEOMEMORY;
956 /*object->lpVtbl = &Direct3DVextexShader9_Vtbl;*/
957 object->device = This; /* FIXME: AddRef(This) */
958 object->ref = 1;
960 object->usage = Usage;
961 object->data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(VSHADERDATA8));
963 useHW = (((vs_mode == VS_HW) && GL_SUPPORT(ARB_VERTEX_PROGRAM)) &&
964 This->devType != D3DDEVTYPE_REF &&
965 object->usage != D3DUSAGE_SOFTWAREPROCESSING);
967 IDirect3DVertexShaderImpl_ParseProgram(object, pFunction, useHW);
969 *ppVertexShader = object;
970 return D3D_OK;
973 BOOL IDirect3DVertexShaderImpl_ExecuteHAL(IDirect3DVertexShaderImpl* vshader, VSHADERINPUTDATA8* input, VSHADEROUTPUTDATA8* output) {
974 /**
975 * TODO: use the NV_vertex_program (or 1_1) extension
976 * and specifics vendors (ARB_vertex_program??) variants for it
978 return TRUE;
981 HRESULT WINAPI IDirect3DVertexShaderImpl_ExecuteSW(IDirect3DVertexShaderImpl* vshader, VSHADERINPUTDATA8* input, VSHADEROUTPUTDATA8* output) {
982 /** Vertex Shader Temporary Registers */
983 D3DSHADERVECTOR R[12];
984 /*D3DSHADERSCALAR A0;*/
985 D3DSHADERVECTOR A[1];
986 /** temporary Vector for modifier management */
987 D3DSHADERVECTOR d;
988 D3DSHADERVECTOR s[3];
989 /** parser datas */
990 const DWORD* pToken = vshader->function;
991 const SHADER_OPCODE* curOpcode = NULL;
992 /** functions parameters */
993 D3DSHADERVECTOR* p[5];
994 D3DSHADERVECTOR* p_send[5];
995 DWORD i;
997 /** init temporary register */
998 memset(R, 0, 12 * sizeof(D3DSHADERVECTOR));
1000 /* vshader_program_parse(vshader); */
1001 #if 0 /* Must not be 1 in cvs */
1002 TRACE("Input:\n");
1003 TRACE_VSVECTOR(vshader->data->C[0]);
1004 TRACE_VSVECTOR(vshader->data->C[1]);
1005 TRACE_VSVECTOR(vshader->data->C[2]);
1006 TRACE_VSVECTOR(vshader->data->C[3]);
1007 TRACE_VSVECTOR(vshader->data->C[4]);
1008 TRACE_VSVECTOR(vshader->data->C[5]);
1009 TRACE_VSVECTOR(vshader->data->C[6]);
1010 TRACE_VSVECTOR(vshader->data->C[7]);
1011 TRACE_VSVECTOR(vshader->data->C[8]);
1012 TRACE_VSVECTOR(vshader->data->C[64]);
1013 TRACE_VSVECTOR(input->V[D3DVSDE_POSITION]);
1014 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDWEIGHT]);
1015 TRACE_VSVECTOR(input->V[D3DVSDE_BLENDINDICES]);
1016 TRACE_VSVECTOR(input->V[D3DVSDE_NORMAL]);
1017 TRACE_VSVECTOR(input->V[D3DVSDE_PSIZE]);
1018 TRACE_VSVECTOR(input->V[D3DVSDE_DIFFUSE]);
1019 TRACE_VSVECTOR(input->V[D3DVSDE_SPECULAR]);
1020 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD0]);
1021 TRACE_VSVECTOR(input->V[D3DVSDE_TEXCOORD1]);
1022 #endif
1024 TRACE_VSVECTOR(vshader->data->C[64]);
1026 /* the first dword is the version tag */
1027 /* TODO: parse it */
1029 if (vshader_is_version_token(*pToken)) { /** version */
1030 ++pToken;
1032 while (D3DVS_END() != *pToken) {
1033 if (vshader_is_comment_token(*pToken)) { /** comment */
1034 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1035 ++pToken;
1036 pToken += comment_len;
1037 continue ;
1039 curOpcode = vshader_program_get_opcode(*pToken);
1040 ++pToken;
1041 if (NULL == curOpcode) {
1042 i = 0;
1043 /* unknown current opcode ... */
1044 while (*pToken & 0x80000000) {
1045 if (i == 0) {
1046 TRACE("unrecognized opcode: pos=%d token=%08lX\n", (pToken - 1) - vshader->function, *(pToken - 1));
1048 TRACE("unrecognized opcode param: pos=%d token=%08lX what=", pToken - vshader->function, *pToken);
1049 vshader_program_add_param(*pToken, i, NULL); /* Add function just used for trace error scenario */
1050 TRACE("\n");
1051 ++i;
1052 ++pToken;
1054 /*return FALSE;*/
1055 } else {
1056 if (curOpcode->num_params > 0) {
1057 /*TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken);*/
1058 for (i = 0; i < curOpcode->num_params; ++i) {
1059 DWORD reg = pToken[i] & 0x00001FFF;
1060 DWORD regtype = ((pToken[i] & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
1062 switch (regtype << D3DSP_REGTYPE_SHIFT) {
1063 case D3DSPR_TEMP:
1064 /*TRACE("p[%d]=R[%d]\n", i, reg);*/
1065 p[i] = &R[reg];
1066 break;
1067 case D3DSPR_INPUT:
1068 /*TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]);*/
1069 p[i] = &input->V[reg];
1070 break;
1071 case D3DSPR_CONST:
1072 if (pToken[i] & D3DVS_ADDRMODE_RELATIVE) {
1073 p[i] = &vshader->data->C[(DWORD) A[0].x + reg];
1074 } else {
1075 p[i] = &vshader->data->C[reg];
1077 break;
1078 case D3DSPR_ADDR: /*case D3DSPR_TEXTURE:*/
1079 if (0 != reg) {
1080 ERR("cannot handle address registers != a0, forcing use of a0\n");
1081 reg = 0;
1083 /*TRACE("p[%d]=A[%d]\n", i, reg);*/
1084 p[i] = &A[reg];
1085 break;
1086 case D3DSPR_RASTOUT:
1087 switch (reg) {
1088 case D3DSRO_POSITION:
1089 p[i] = &output->oPos;
1090 break;
1091 case D3DSRO_FOG:
1092 p[i] = &output->oFog;
1093 break;
1094 case D3DSRO_POINT_SIZE:
1095 p[i] = &output->oPts;
1096 break;
1098 break;
1099 case D3DSPR_ATTROUT:
1100 /*TRACE("p[%d]=oD[%d]\n", i, reg);*/
1101 p[i] = &output->oD[reg];
1102 break;
1103 case D3DSPR_TEXCRDOUT:
1104 /*TRACE("p[%d]=oT[%d]\n", i, reg);*/
1105 p[i] = &output->oT[reg];
1106 break;
1107 default:
1108 break;
1111 if (i > 0) { /* input reg */
1112 DWORD swizzle = (pToken[i] & D3DVS_SWIZZLE_MASK) >> D3DVS_SWIZZLE_SHIFT;
1113 UINT isNegative = ((pToken[i] & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG);
1115 if (!isNegative && (D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) == swizzle) {
1116 /*TRACE("p[%d] not swizzled\n", i);*/
1117 p_send[i] = p[i];
1118 } else {
1119 DWORD swizzle_x = swizzle & 0x03;
1120 DWORD swizzle_y = (swizzle >> 2) & 0x03;
1121 DWORD swizzle_z = (swizzle >> 4) & 0x03;
1122 DWORD swizzle_w = (swizzle >> 6) & 0x03;
1123 /*TRACE("p[%d] swizzled\n", i);*/
1124 float* tt = (float*) p[i];
1125 s[i].x = (isNegative) ? -tt[swizzle_x] : tt[swizzle_x];
1126 s[i].y = (isNegative) ? -tt[swizzle_y] : tt[swizzle_y];
1127 s[i].z = (isNegative) ? -tt[swizzle_z] : tt[swizzle_z];
1128 s[i].w = (isNegative) ? -tt[swizzle_w] : tt[swizzle_w];
1129 p_send[i] = &s[i];
1131 } else { /* output reg */
1132 if ((pToken[i] & D3DSP_WRITEMASK_ALL) == D3DSP_WRITEMASK_ALL) {
1133 p_send[i] = p[i];
1134 } else {
1135 p_send[i] = &d; /* to be post-processed for modifiers management */
1141 switch (curOpcode->num_params) {
1142 case 0:
1143 curOpcode->soft_fct();
1144 break;
1145 case 1:
1146 curOpcode->soft_fct(p_send[0]);
1147 break;
1148 case 2:
1149 curOpcode->soft_fct(p_send[0], p_send[1]);
1150 break;
1151 case 3:
1152 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2]);
1153 break;
1154 case 4:
1155 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3]);
1156 break;
1157 case 5:
1158 curOpcode->soft_fct(p_send[0], p_send[1], p_send[2], p_send[3], p_send[4]);
1159 break;
1160 default:
1161 ERR("%s too many params: %u\n", curOpcode->name, curOpcode->num_params);
1164 /* check if output reg modifier post-process */
1165 if (curOpcode->num_params > 0 && (pToken[0] & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1166 if (pToken[0] & D3DSP_WRITEMASK_0) p[0]->x = d.x;
1167 if (pToken[0] & D3DSP_WRITEMASK_1) p[0]->y = d.y;
1168 if (pToken[0] & D3DSP_WRITEMASK_2) p[0]->z = d.z;
1169 if (pToken[0] & D3DSP_WRITEMASK_3) p[0]->w = d.w;
1172 #if 0
1173 TRACE_VSVECTOR(output->oPos);
1174 TRACE_VSVECTOR(output->oD[0]);
1175 TRACE_VSVECTOR(output->oD[1]);
1176 TRACE_VSVECTOR(output->oT[0]);
1177 TRACE_VSVECTOR(output->oT[1]);
1178 TRACE_VSVECTOR(R[0]);
1179 TRACE_VSVECTOR(R[1]);
1180 TRACE_VSVECTOR(R[2]);
1181 TRACE_VSVECTOR(R[3]);
1182 TRACE_VSVECTOR(R[4]);
1183 TRACE_VSVECTOR(R[5]);
1184 #endif
1186 /* to next opcode token */
1187 pToken += curOpcode->num_params;
1189 #if 0
1190 TRACE("End of current instruction:\n");
1191 TRACE_VSVECTOR(output->oPos);
1192 TRACE_VSVECTOR(output->oD[0]);
1193 TRACE_VSVECTOR(output->oD[1]);
1194 TRACE_VSVECTOR(output->oT[0]);
1195 TRACE_VSVECTOR(output->oT[1]);
1196 TRACE_VSVECTOR(R[0]);
1197 TRACE_VSVECTOR(R[1]);
1198 TRACE_VSVECTOR(R[2]);
1199 TRACE_VSVECTOR(R[3]);
1200 TRACE_VSVECTOR(R[4]);
1201 TRACE_VSVECTOR(R[5]);
1202 #endif
1204 #if 0 /* Must not be 1 in cvs */
1205 TRACE("Output:\n");
1206 TRACE_VSVECTOR(output->oPos);
1207 TRACE_VSVECTOR(output->oD[0]);
1208 TRACE_VSVECTOR(output->oD[1]);
1209 TRACE_VSVECTOR(output->oT[0]);
1210 TRACE_VSVECTOR(output->oT[1]);
1211 #endif
1212 return D3D_OK;
1215 HRESULT WINAPI IDirect3DVertexShaderImpl_GetFunction(IDirect3DVertexShaderImpl* This, VOID* pData, UINT* pSizeOfData) {
1216 if (NULL == pData) {
1217 *pSizeOfData = This->functionLength;
1218 return D3D_OK;
1220 if (*pSizeOfData < This->functionLength) {
1221 *pSizeOfData = This->functionLength;
1222 return D3DERR_MOREDATA;
1224 if (NULL == This->function) { /* no function defined */
1225 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
1226 (*(DWORD **) pData) = NULL;
1227 } else {
1228 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
1229 memcpy(pData, This->function, This->functionLength);
1231 return D3D_OK;
1234 HRESULT WINAPI IDirect3DVertexShaderImpl_SetConstantF(IDirect3DVertexShaderImpl* This, UINT StartRegister, CONST FLOAT* pConstantData, UINT Vector4fCount) {
1235 if (StartRegister + Vector4fCount > D3D8_VSHADER_MAX_CONSTANTS) {
1236 return D3DERR_INVALIDCALL;
1238 if (NULL == This->data) { /* temporary while datas not supported */
1239 FIXME("(%p) : VertexShader_SetConstant not fully supported yet\n", This);
1240 return D3DERR_INVALIDCALL;
1242 memcpy(&This->data->C[StartRegister], pConstantData, Vector4fCount * 4 * sizeof(FLOAT));
1243 return D3D_OK;
1246 HRESULT WINAPI IDirect3DVertexShaderImpl_GetConstantF(IDirect3DVertexShaderImpl* This, UINT StartRegister, FLOAT* pConstantData, UINT Vector4fCount) {
1247 if (StartRegister + Vector4fCount > D3D8_VSHADER_MAX_CONSTANTS) {
1248 return D3DERR_INVALIDCALL;
1250 if (NULL == This->data) { /* temporary while datas not supported */
1251 return D3DERR_INVALIDCALL;
1253 memcpy(pConstantData, &This->data->C[StartRegister], Vector4fCount * 4 * sizeof(FLOAT));
1254 return D3D_OK;
1258 /**********************************************************************************************************************************************
1259 **********************************************************************************************************************************************
1260 **********************************************************************************************************************************************
1261 **********************************************************************************************************************************************
1262 **********************************************************************************************************************************************/
1264 void pshader_texcoord(D3DSHADERVECTOR* d) {
1267 void pshader_texkill(D3DSHADERVECTOR* d) {
1270 void pshader_tex(D3DSHADERVECTOR* d) {
1273 void pshader_texbem(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1276 void pshader_texbeml(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1279 void pshader_texreg2ar(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1282 void pshader_texreg2gb(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1285 void pshader_texm3x2pad(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1288 void pshader_texm3x2tex(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1291 void pshader_texm3x3pad(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1294 void pshader_texm3x3tex(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1297 void pshader_texm3x3diff(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1300 void pshader_texm3x3spec(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
1303 void pshader_texm3x3vspec(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1306 void pshader_cnd(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1, D3DSHADERVECTOR* s2) {
1309 void pshader_def(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1, D3DSHADERVECTOR* s2, D3DSHADERVECTOR* s3) {
1312 void pshader_texreg2rgb(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1315 void pshader_texdp3tex(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1318 void pshader_texm3x2depth(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1321 void pshader_texdp3(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1324 void pshader_texm3x3(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0) {
1327 void pshader_texdepth(D3DSHADERVECTOR* d) {
1330 void pshader_cmp(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1, D3DSHADERVECTOR* s2) {
1333 void pshader_bem(D3DSHADERVECTOR* d, D3DSHADERVECTOR* s0, D3DSHADERVECTOR* s1) {
1336 static CONST SHADER_OPCODE pshader_ins [] = {
1337 {D3DSIO_NOP, "nop", 0, vshader_nop, 0, 0},
1338 {D3DSIO_MOV, "mov", 2, vshader_mov, 0, 0},
1339 {D3DSIO_ADD, "add", 3, vshader_add, 0, 0},
1340 {D3DSIO_SUB, "sub", 3, vshader_sub, 0, 0},
1341 {D3DSIO_MAD, "mad", 4, vshader_mad, 0, 0},
1342 {D3DSIO_MUL, "mul", 3, vshader_mul, 0, 0},
1343 {D3DSIO_RCP, "rcp", 2, vshader_rcp, 0, 0},
1344 {D3DSIO_RSQ, "rsq", 2, vshader_rsq, 0, 0},
1345 {D3DSIO_DP3, "dp3", 3, vshader_dp3, 0, 0},
1346 {D3DSIO_DP4, "dp4", 3, vshader_dp4, 0, 0},
1347 {D3DSIO_MIN, "min", 3, vshader_min, 0, 0},
1348 {D3DSIO_MAX, "max", 3, vshader_max, 0, 0},
1349 {D3DSIO_SLT, "slt", 3, vshader_slt, 0, 0},
1350 {D3DSIO_SGE, "sge", 3, vshader_sge, 0, 0},
1351 {D3DSIO_EXP, "exp", 2, vshader_exp, 0, 0},
1352 {D3DSIO_LOG, "log", 2, vshader_log, 0, 0},
1353 {D3DSIO_LIT, "lit", 2, vshader_lit, 0, 0},
1354 {D3DSIO_DST, "dst", 3, vshader_dst, 0, 0},
1355 {D3DSIO_LRP, "lrp", 4, vshader_lrp, 0, 0},
1356 {D3DSIO_FRC, "frc", 2, vshader_frc, 0, 0},
1357 {D3DSIO_M4x4, "m4x4", 3, vshader_m4x4, 0, 0},
1358 {D3DSIO_M4x3, "m4x3", 3, vshader_m4x3, 0, 0},
1359 {D3DSIO_M3x4, "m3x4", 3, vshader_m3x4, 0, 0},
1360 {D3DSIO_M3x3, "m3x3", 3, vshader_m3x3, 0, 0},
1361 {D3DSIO_M3x2, "m3x2", 3, vshader_m3x2, 0, 0},
1363 {D3DSIO_TEXCOORD, "texcoord", 1, pshader_texcoord, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1364 {D3DSIO_TEXCOORD, "texcrd", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
1365 {D3DSIO_TEXKILL, "texkill", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
1366 {D3DSIO_TEX, "tex", 1, pshader_tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1367 {D3DSIO_TEX, "texld", 2, pshader_tex, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
1368 {D3DSIO_TEXBEM, "texbem", 2, pshader_texbem, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1369 {D3DSIO_TEXBEML, "texbeml", 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1370 {D3DSIO_TEXREG2AR, "texreg2ar", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
1371 {D3DSIO_TEXREG2GB, "texreg2gb", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
1372 {D3DSIO_TEXM3x2PAD, "texm3x2pad", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1373 {D3DSIO_TEXM3x2TEX, "texm3x2tex", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1374 {D3DSIO_TEXM3x3PAD, "texm3x3pad", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1375 {D3DSIO_TEXM3x3TEX, "texm3x3tex", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1376 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
1377 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1378 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspec", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1380 {D3DSIO_EXPP, "expp", 2, vshader_expp, 0, 0},
1381 {D3DSIO_LOGP, "logp", 2, vshader_logp, 0, 0},
1383 {D3DSIO_CND, "cnd", 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
1384 {D3DSIO_DEF, "def", 5, pshader_def, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
1385 {D3DSIO_TEXREG2RGB, "texbreg2rgb", 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
1387 {D3DSIO_TEXDP3TEX, "texdp3tex", 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
1388 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", 2, pshader_texm3x2depth, D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
1389 {D3DSIO_TEXDP3, "texdp3", 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
1390 {D3DSIO_TEXM3x3, "texm3x3", 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
1391 {D3DSIO_TEXDEPTH, "texdepth", 1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
1392 {D3DSIO_CMP, "cmp", 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
1393 {D3DSIO_BEM, "bem", 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
1395 {D3DSIO_PHASE, "phase", 0, vshader_nop, 0, 0},
1397 {0, NULL, 0, NULL}
1400 inline static const SHADER_OPCODE* pshader_program_get_opcode(const DWORD code, const int version) {
1401 DWORD i = 0;
1402 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
1403 /** TODO: use dichotomic search */
1404 while (NULL != pshader_ins[i].name) {
1405 if ( ( (code & D3DSI_OPCODE_MASK) == pshader_ins[i].opcode) &&
1406 ( ( (hex_version >= pshader_ins[i].min_version) && (hex_version <= pshader_ins[i].max_version)) ||
1407 ( (pshader_ins[i].min_version == 0) && (pshader_ins[i].max_version == 0) ) ) ) {
1408 return &pshader_ins[i];
1410 ++i;
1412 return NULL;
1415 inline static BOOL pshader_is_version_token(DWORD token) {
1416 return 0xFFFF0000 == (token & 0xFFFF0000);
1419 inline static BOOL pshader_is_comment_token(DWORD token) {
1420 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
1423 inline static void pshader_program_dump_opcode(const SHADER_OPCODE* curOpcode, const DWORD code, const DWORD output) {
1424 if (0 != (code & ~D3DSI_OPCODE_MASK)) {
1425 DWORD mask = (code & ~D3DSI_OPCODE_MASK);
1426 switch (mask) {
1427 case 0x40000000: TRACE("+"); break;
1428 default:
1429 TRACE(" unhandled modifier(0x%08lx) ", mask);
1432 TRACE("%s", curOpcode->name);
1434 * normally this is a destination reg modifier
1435 * but in pixel shaders asm code its specified as:
1436 * dp3_x4 t1.rgba, r1, c1
1437 * or
1438 * dp3_x2_sat r0, t0_bx2, v0_bx2
1439 * so for better debbuging i use the same norm
1441 if (0 != (output & D3DSP_DSTSHIFT_MASK)) {
1442 DWORD shift = (output & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1443 if (shift < 8) {
1444 TRACE("_x%u", 1 << shift);
1445 } else {
1446 TRACE("_d%u", 1 << (16-shift));
1449 if (0 != (output & D3DSP_DSTMOD_MASK)) {
1450 DWORD mask = output & D3DSP_DSTMOD_MASK;
1451 switch (mask) {
1452 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1453 default:
1454 TRACE("_unhandled_modifier(0x%08lx)", mask);
1457 TRACE(" ");
1460 inline static void pshader_program_dump_param(const DWORD param, int input) {
1461 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1462 static const char swizzle_reg_chars[] = "rgba";
1464 DWORD reg = param & 0x00001FFF;
1465 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
1467 if (input) {
1468 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1469 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1470 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1471 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1472 TRACE("-");
1473 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1474 TRACE("1-");
1477 switch (regtype << D3DSP_REGTYPE_SHIFT) {
1478 case D3DSPR_TEMP:
1479 TRACE("R[%lu]", reg);
1480 break;
1481 case D3DSPR_INPUT:
1482 TRACE("V[%lu]", reg);
1483 break;
1484 case D3DSPR_CONST:
1485 TRACE("C[%s%lu]", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1486 break;
1487 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1488 TRACE("t[%lu]", reg);
1489 break;
1490 case D3DSPR_RASTOUT:
1491 TRACE("%s", rastout_reg_names[reg]);
1492 break;
1493 case D3DSPR_ATTROUT:
1494 TRACE("oD[%lu]", reg);
1495 break;
1496 case D3DSPR_TEXCRDOUT:
1497 TRACE("oT[%lu]", reg);
1498 break;
1499 default:
1500 break;
1503 if (!input) {
1504 /** operand output */
1506 * for better debugging traces it's done into opcode dump code
1507 * @see pshader_program_dump_opcode
1508 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1509 DWORD mask = param & D3DSP_DSTMOD_MASK;
1510 switch (mask) {
1511 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1512 default:
1513 TRACE("_unhandled_modifier(0x%08lx)", mask);
1516 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1517 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1518 if (shift > 0) {
1519 TRACE("_x%u", 1 << shift);
1523 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1524 TRACE(".");
1525 if (param & D3DSP_WRITEMASK_0) TRACE("r");
1526 if (param & D3DSP_WRITEMASK_1) TRACE("g");
1527 if (param & D3DSP_WRITEMASK_2) TRACE("b");
1528 if (param & D3DSP_WRITEMASK_3) TRACE("a");
1530 } else {
1531 /** operand input */
1532 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1533 DWORD swizzle_x = swizzle & 0x03;
1534 DWORD swizzle_y = (swizzle >> 2) & 0x03;
1535 DWORD swizzle_z = (swizzle >> 4) & 0x03;
1536 DWORD swizzle_w = (swizzle >> 6) & 0x03;
1538 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1539 DWORD mask = param & D3DSP_SRCMOD_MASK;
1540 /*TRACE("_modifier(0x%08lx) ", mask);*/
1541 switch (mask) {
1542 case D3DSPSM_NONE: break;
1543 case D3DSPSM_NEG: break;
1544 case D3DSPSM_BIAS: TRACE("_bias"); break;
1545 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1546 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1547 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1548 case D3DSPSM_COMP: break;
1549 case D3DSPSM_X2: TRACE("_x2"); break;
1550 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1551 case D3DSPSM_DZ: TRACE("_dz"); break;
1552 case D3DSPSM_DW: TRACE("_dw"); break;
1553 default:
1554 TRACE("_unknown(0x%08lx)", mask);
1559 * swizzle bits fields:
1560 * WWZZYYXX
1562 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1563 if (swizzle_x == swizzle_y &&
1564 swizzle_x == swizzle_z &&
1565 swizzle_x == swizzle_w) {
1566 TRACE(".%c", swizzle_reg_chars[swizzle_x]);
1567 } else {
1568 TRACE(".%c%c%c%c",
1569 swizzle_reg_chars[swizzle_x],
1570 swizzle_reg_chars[swizzle_y],
1571 swizzle_reg_chars[swizzle_z],
1572 swizzle_reg_chars[swizzle_w]);
1578 static int constants[D3D8_PSHADER_MAX_CONSTANTS];
1580 inline static void get_register_name(const DWORD param, char* regstr)
1582 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1584 DWORD reg = param & 0x00001FFF;
1585 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
1587 switch (regtype << D3DSP_REGTYPE_SHIFT) {
1588 case D3DSPR_TEMP:
1589 sprintf(regstr, "R%lu", reg);
1590 break;
1591 case D3DSPR_INPUT:
1592 if (reg==0) {
1593 strcpy(regstr, "fragment.color.primary");
1594 } else {
1595 strcpy(regstr, "fragment.color.secondary");
1597 break;
1598 case D3DSPR_CONST:
1599 if (constants[reg])
1600 sprintf(regstr, "C%lu", reg);
1601 else
1602 sprintf(regstr, "program.env[%lu]", reg);
1603 break;
1604 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1605 sprintf(regstr,"T%lu", reg);
1606 break;
1607 case D3DSPR_RASTOUT:
1608 sprintf(regstr, "%s", rastout_reg_names[reg]);
1609 break;
1610 case D3DSPR_ATTROUT:
1611 sprintf(regstr, "oD[%lu]", reg);
1612 break;
1613 case D3DSPR_TEXCRDOUT:
1614 sprintf(regstr, "oT[%lu]", reg);
1615 break;
1616 default:
1617 break;
1621 inline static void addline(unsigned int* lineNum, char* pgm, char* line)
1623 ++(*lineNum);
1624 TRACE_(d3d_hw_shader)("GL HW (%u, %u) : %s\n", *lineNum, strlen(pgm), line);
1625 strcat(pgm, line);
1626 strcat(pgm, "\n");
1629 static const char* shift_tab[] = {
1630 "dummy", /* 0 (none) */
1631 "coefmul.x", /* 1 (x2) */
1632 "coefmul.y", /* 2 (x4) */
1633 "coefmul.z", /* 3 (x8) */
1634 "coefmul.w", /* 4 (x16) */
1635 "dummy", /* 5 (x32) */
1636 "dummy", /* 6 (x64) */
1637 "dummy", /* 7 (x128) */
1638 "dummy", /* 8 (d256) */
1639 "dummy", /* 9 (d128) */
1640 "dummy", /* 10 (d64) */
1641 "dummy", /* 11 (d32) */
1642 "coefdiv.w", /* 12 (d16) */
1643 "coefdiv.z", /* 13 (d8) */
1644 "coefdiv.y", /* 14 (d4) */
1645 "coefdiv.x" /* 15 (d2) */
1648 inline static void get_write_mask(const DWORD output_reg, char* write_mask)
1650 *write_mask = 0;
1651 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1652 strcat(write_mask, ".");
1653 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
1654 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
1655 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
1656 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
1660 inline static void get_input_register_swizzle(const DWORD instr, char* swzstring)
1662 static const char swizzle_reg_chars[] = "rgba";
1663 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1664 DWORD swizzle_x = swizzle & 0x03;
1665 DWORD swizzle_y = (swizzle >> 2) & 0x03;
1666 DWORD swizzle_z = (swizzle >> 4) & 0x03;
1667 DWORD swizzle_w = (swizzle >> 6) & 0x03;
1669 * swizzle bits fields:
1670 * WWZZYYXX
1672 *swzstring = 0;
1673 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1674 if (swizzle_x == swizzle_y &&
1675 swizzle_x == swizzle_z &&
1676 swizzle_x == swizzle_w) {
1677 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
1678 } else {
1679 sprintf(swzstring, ".%c%c%c%c",
1680 swizzle_reg_chars[swizzle_x],
1681 swizzle_reg_chars[swizzle_y],
1682 swizzle_reg_chars[swizzle_z],
1683 swizzle_reg_chars[swizzle_w]);
1688 inline static void gen_output_modifier_line(int saturate, char* write_mask, int shift, char *regstr, char* line)
1690 /* Generate a line that does the output modifier computation */
1691 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
1694 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char* outregstr, char* line)
1696 /* Generate a line that does the input modifier computation and return the input register to use */
1697 static char regstr[256];
1698 static char tmpline[256];
1699 int insert_line;
1701 /* Assume a new line will be added */
1702 insert_line = 1;
1704 /* Get register name */
1705 get_register_name(instr, regstr);
1707 switch (instr & D3DSP_SRCMOD_MASK) {
1708 case D3DSPSM_NONE:
1709 strcpy(outregstr, regstr);
1710 insert_line = 0;
1711 break;
1712 case D3DSPSM_NEG:
1713 sprintf(outregstr, "-%s", regstr);
1714 insert_line = 0;
1715 break;
1716 case D3DSPSM_BIAS:
1717 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
1718 break;
1719 case D3DSPSM_BIASNEG:
1720 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
1721 break;
1722 case D3DSPSM_SIGN:
1723 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
1724 break;
1725 case D3DSPSM_SIGNNEG:
1726 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
1727 break;
1728 case D3DSPSM_COMP:
1729 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
1730 break;
1731 case D3DSPSM_X2:
1732 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
1733 break;
1734 case D3DSPSM_X2NEG:
1735 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
1736 break;
1737 case D3DSPSM_DZ:
1738 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
1739 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
1740 strcat(line, "\n"); /* Hack */
1741 strcat(line, tmpline);
1742 break;
1743 case D3DSPSM_DW:
1744 sprintf(line, "RCP T%c, %s;", 'A' + tmpreg, regstr);
1745 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
1746 strcat(line, "\n"); /* Hack */
1747 strcat(line, tmpline);
1748 break;
1749 default:
1750 strcpy(outregstr, regstr);
1751 insert_line = 0;
1754 if (insert_line) {
1755 /* Substitute the register name */
1756 sprintf(outregstr, "T%c", 'A' + tmpreg);
1759 return insert_line;
1763 * Pixel Shaders
1765 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/PixelShader1_X/modifiers/sourceregistermodifiers.asp
1766 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/PixelShader2_0/Registers/Registers.asp
1767 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/d3d/interfaces/IDirect3DPixelShader9/_IDirect3DPixelShader9.asp
1770 inline static VOID IDirect3DPixelShaderImpl_GenerateProgramArbHW(IDirect3DPixelShaderImpl* pshader, CONST DWORD* pFunction) {
1771 const DWORD* pToken = pFunction;
1772 const SHADER_OPCODE* curOpcode = NULL;
1773 const DWORD* pInstr;
1774 DWORD code;
1775 DWORD i;
1776 int autoparam;
1777 unsigned lineNum = 0;
1778 char *pgmStr = NULL;
1779 char tmpLine[255];
1780 BOOL saturate;
1781 int row = 0;
1782 DWORD tcw[2];
1783 IDirect3DDevice8Impl* This = pshader->device;
1784 int version = 0;
1786 for(i = 0; i < D3D8_PSHADER_MAX_CONSTANTS; i++)
1787 constants[i] = 0;
1789 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, 65535); /* 64kb should be enough */
1791 if (NULL != pToken) {
1792 while (D3DPS_END() != *pToken) {
1793 if (pshader_is_version_token(*pToken)) { /** version */
1794 int numTemps;
1795 int numConstants;
1797 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1798 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1800 TRACE_(d3d_hw_shader)("ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1802 /* Each release of pixel shaders has had different numbers of temp registers */
1803 switch (version) {
1804 case 10:
1805 case 11:
1806 case 12:
1807 case 13:
1808 case 14: numTemps=12;
1809 numConstants=8;
1810 strcpy(tmpLine, "!!ARBfp1.0");
1811 break;
1812 case 20: numTemps=12;
1813 numConstants=8;
1814 strcpy(tmpLine, "!!ARBfp2.0");
1815 FIXME_(d3d_hw_shader)("No work done yet to support ps2.0 in hw\n");
1816 break;
1817 case 30: numTemps=32;
1818 numConstants=8;
1819 strcpy(tmpLine, "!!ARBfp3.0");
1820 FIXME_(d3d_hw_shader)("No work done yet to support ps3.0 in hw\n");
1821 break;
1822 default:
1823 numTemps=12;
1824 numConstants=8;
1825 strcpy(tmpLine, "!!ARBfp1.0");
1826 FIXME_(d3d_hw_shader)("Unrecognized pixel shader version!\n");
1828 addline(&lineNum, pgmStr, tmpLine);
1830 for(i = 0; i < 6; i++) {
1831 sprintf(tmpLine, "TEMP T%lu;", i);
1832 addline(&lineNum, pgmStr, tmpLine);
1834 for(i = 0; i < 6; i++) {
1835 sprintf(tmpLine, "TEMP R%lu;", i);
1836 addline(&lineNum, pgmStr, tmpLine);
1839 sprintf(tmpLine, "TEMP TMP;");
1840 addline(&lineNum, pgmStr, tmpLine);
1841 sprintf(tmpLine, "TEMP TMP2;");
1842 addline(&lineNum, pgmStr, tmpLine);
1843 sprintf(tmpLine, "TEMP TA;");
1844 addline(&lineNum, pgmStr, tmpLine);
1845 sprintf(tmpLine, "TEMP TB;");
1846 addline(&lineNum, pgmStr, tmpLine);
1847 sprintf(tmpLine, "TEMP TC;");
1848 addline(&lineNum, pgmStr, tmpLine);
1850 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };");
1851 addline(&lineNum, pgmStr, tmpLine);
1852 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };");
1853 addline(&lineNum, pgmStr, tmpLine);
1854 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };");
1855 addline(&lineNum, pgmStr, tmpLine);
1857 for(i = 0; i < 4; i++) {
1858 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];", i, i);
1859 addline(&lineNum, pgmStr, tmpLine);
1862 ++pToken;
1863 continue;
1865 if (pshader_is_comment_token(*pToken)) { /** comment */
1866 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1867 ++pToken;
1868 /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
1869 pToken += comment_len;
1870 continue;
1872 code = *pToken;
1873 pInstr = pToken;
1874 curOpcode = pshader_program_get_opcode(code, version);
1875 ++pToken;
1876 if (NULL == curOpcode) {
1877 /* unknown current opcode ... */
1878 while (*pToken & 0x80000000) {
1879 TRACE("unrecognized opcode: %08lx\n", *pToken);
1880 ++pToken;
1882 } else {
1883 autoparam = 1;
1884 saturate = FALSE;
1885 /* Build opcode for GL vertex_program */
1886 switch (curOpcode->opcode) {
1887 case D3DSIO_NOP:
1888 case D3DSIO_PHASE:
1889 continue;
1890 case D3DSIO_DEF:
1892 DWORD reg = *pToken & 0x00001FFF;
1893 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };", reg,
1894 *((const float*)(pToken+1)),
1895 *((const float*)(pToken+2)),
1896 *((const float*)(pToken+3)),
1897 *((const float*)(pToken+4)) );
1898 addline(&lineNum, pgmStr, tmpLine);
1899 constants[reg] = 1;
1900 autoparam = 0;
1901 pToken+=5;
1903 break;
1904 case D3DSIO_TEXKILL:
1905 strcpy(tmpLine, "KIL");
1906 break;
1907 case D3DSIO_TEX:
1909 char tmp[20];
1910 get_write_mask(*pToken, tmp);
1911 if (version != 14) {
1912 DWORD reg = *pToken & 0x00001FFF;
1913 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;", reg, tmp, reg, reg);
1914 addline(&lineNum, pgmStr, tmpLine);
1915 autoparam = 0;
1916 pToken++;
1917 } else {
1918 char line[256];
1919 char reg[20];
1920 DWORD reg1 = *pToken & 0x00001FFF;
1921 DWORD reg2 = *(pToken+1) & 0x00001FFF;
1922 if (gen_input_modifier_line(*(pToken+1), 0, reg, line))
1923 addline(&lineNum, pgmStr, line);
1924 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;", reg1, tmp, reg, reg2);
1925 addline(&lineNum, pgmStr, tmpLine);
1926 autoparam = 0;
1927 pToken += 2;
1930 break;
1931 case D3DSIO_TEXCOORD:
1933 char tmp[20];
1934 get_write_mask(*pToken, tmp);
1935 if (version != 14) {
1936 DWORD reg = *pToken & 0x00001FFF;
1937 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];", reg, tmp, reg);
1938 addline(&lineNum, pgmStr, tmpLine);
1939 autoparam = 0;
1940 pToken++;
1941 } else {
1942 DWORD reg1 = *pToken & 0x00001FFF;
1943 DWORD reg2 = *(pToken+1) & 0x00001FFF;
1944 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];", reg1, tmp, reg2);
1945 addline(&lineNum, pgmStr, tmpLine);
1946 autoparam = 0;
1947 pToken += 2;
1950 break;
1951 case D3DSIO_TEXM3x2PAD:
1953 DWORD reg = *pToken & 0x00001FFF;
1954 char buf[50];
1955 if (gen_input_modifier_line(*(pToken+1), 0, buf, tmpLine))
1956 addline(&lineNum, pgmStr, tmpLine);
1957 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;", reg, buf);
1958 addline(&lineNum, pgmStr, tmpLine);
1959 autoparam = 0;
1960 pToken += 2;
1962 break;
1963 case D3DSIO_TEXM3x2TEX:
1965 DWORD reg = *pToken & 0x00001FFF;
1966 char buf[50];
1967 if (gen_input_modifier_line(*(pToken+1), 0, buf, tmpLine))
1968 addline(&lineNum, pgmStr, tmpLine);
1969 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;", reg, buf);
1970 addline(&lineNum, pgmStr, tmpLine);
1971 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;", reg, reg);
1972 addline(&lineNum, pgmStr, tmpLine);
1973 autoparam = 0;
1974 pToken += 2;
1976 break;
1977 case D3DSIO_TEXREG2AR:
1979 DWORD reg1 = *pToken & 0x00001FFF;
1980 DWORD reg2 = *(pToken+1) & 0x00001FFF;
1981 sprintf(tmpLine, "MOV TMP.r, T%lu.a;", reg2);
1982 addline(&lineNum, pgmStr, tmpLine);
1983 sprintf(tmpLine, "MOV TMP.g, T%lu.r;", reg2);
1984 addline(&lineNum, pgmStr, tmpLine);
1985 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;", reg1, reg1);
1986 addline(&lineNum, pgmStr, tmpLine);
1987 autoparam = 0;
1988 pToken+=2;
1990 break;
1991 case D3DSIO_TEXREG2GB:
1993 DWORD reg1 = *pToken & 0x00001FFF;
1994 DWORD reg2 = *(pToken+1) & 0x00001FFF;
1995 sprintf(tmpLine, "MOV TMP.r, T%lu.g;", reg2);
1996 addline(&lineNum, pgmStr, tmpLine);
1997 sprintf(tmpLine, "MOV TMP.g, T%lu.b;", reg2);
1998 addline(&lineNum, pgmStr, tmpLine);
1999 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;", reg1, reg1);
2000 addline(&lineNum, pgmStr, tmpLine);
2001 autoparam = 0;
2002 pToken+=2;
2004 break;
2005 case D3DSIO_TEXBEM:
2007 DWORD reg1 = *pToken & 0x00001FFF;
2008 DWORD reg2 = *(pToken+1) & 0x00001FFF;
2009 /* FIXME: Should apply the BUMPMAPENV matrix */
2010 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;", reg1, reg2);
2011 addline(&lineNum, pgmStr, tmpLine);
2012 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;", reg1, reg1);
2013 addline(&lineNum, pgmStr, tmpLine);
2014 autoparam = 0;
2015 pToken+=2;
2017 break;
2018 case D3DSIO_TEXM3x3PAD:
2020 DWORD reg = *pToken & 0x00001FFF;
2021 char buf[50];
2022 if (gen_input_modifier_line(*(pToken+1), 0, buf, tmpLine))
2023 addline(&lineNum, pgmStr, tmpLine);
2024 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;", 'x'+row, reg, buf);
2025 addline(&lineNum, pgmStr, tmpLine);
2026 tcw[row++] = reg;
2027 autoparam = 0;
2028 pToken += 2;
2030 break;
2031 case D3DSIO_TEXM3x3TEX:
2033 DWORD reg = *pToken & 0x00001FFF;
2034 char buf[50];
2035 if (gen_input_modifier_line(*(pToken+1), 0, buf, tmpLine))
2036 addline(&lineNum, pgmStr, tmpLine);
2037 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;", reg, buf);
2038 addline(&lineNum, pgmStr, tmpLine);
2039 /* Cubemap textures will be more used than 3D ones. */
2040 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;", reg, reg);
2041 addline(&lineNum, pgmStr, tmpLine);
2042 row = 0;
2043 autoparam = 0;
2044 pToken += 2;
2046 case D3DSIO_TEXM3x3VSPEC:
2048 DWORD reg = *pToken & 0x00001FFF;
2049 char buf[50];
2050 if (gen_input_modifier_line(*(pToken+1), 0, buf, tmpLine))
2051 addline(&lineNum, pgmStr, tmpLine);
2052 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;", reg, buf);
2053 addline(&lineNum, pgmStr, tmpLine);
2054 /* Construct the eye-ray vector from w coordinates */
2055 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;", tcw[0]);
2056 addline(&lineNum, pgmStr, tmpLine);
2057 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;", tcw[1]);
2058 addline(&lineNum, pgmStr, tmpLine);
2059 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;", reg);
2060 addline(&lineNum, pgmStr, tmpLine);
2061 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
2062 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;");
2063 addline(&lineNum, pgmStr, tmpLine);
2064 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;");
2065 addline(&lineNum, pgmStr, tmpLine);
2066 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;");
2067 addline(&lineNum, pgmStr, tmpLine);
2068 /* Cubemap textures will be more used than 3D ones. */
2069 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;", reg, reg);
2070 addline(&lineNum, pgmStr, tmpLine);
2071 row = 0;
2072 autoparam = 0;
2073 pToken += 2;
2075 break;
2076 case D3DSIO_TEXM3x3SPEC:
2078 DWORD reg = *pToken & 0x00001FFF;
2079 DWORD reg3 = *(pToken+2) & 0x00001FFF;
2080 char buf[50];
2081 if (gen_input_modifier_line(*(pToken+1), 0, buf, tmpLine))
2082 addline(&lineNum, pgmStr, tmpLine);
2083 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;", reg, buf);
2084 addline(&lineNum, pgmStr, tmpLine);
2085 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
2086 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];", reg3);
2087 addline(&lineNum, pgmStr, tmpLine);
2088 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;");
2089 addline(&lineNum, pgmStr, tmpLine);
2090 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];", reg3);
2091 addline(&lineNum, pgmStr, tmpLine);
2092 /* Cubemap textures will be more used than 3D ones. */
2093 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;", reg, reg);
2094 addline(&lineNum, pgmStr, tmpLine);
2095 row = 0;
2096 autoparam = 0;
2097 pToken += 3;
2099 break;
2100 case D3DSIO_CND:
2101 break;
2102 case D3DSIO_CMP:
2103 break;
2104 case D3DSIO_MOV:
2105 strcpy(tmpLine, "MOV");
2106 break;
2107 case D3DSIO_MUL:
2108 strcpy(tmpLine, "MUL");
2109 break;
2110 case D3DSIO_DP3:
2111 strcpy(tmpLine, "DP3");
2112 break;
2113 case D3DSIO_MAD:
2114 strcpy(tmpLine, "MAD");
2115 break;
2116 case D3DSIO_ADD:
2117 strcpy(tmpLine, "ADD");
2118 break;
2119 case D3DSIO_SUB:
2120 strcpy(tmpLine, "SUB");
2121 break;
2122 case D3DSIO_LRP:
2123 strcpy(tmpLine, "LRP");
2124 break;
2125 default:
2126 FIXME_(d3d_hw_shader)("Can't handle opcode %s in hwShader\n", curOpcode->name);
2128 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
2129 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
2130 switch (mask) {
2131 case D3DSPDM_SATURATE: saturate = TRUE; break;
2132 default:
2133 TRACE("_unhandled_modifier(0x%08lx)", mask);
2136 if (autoparam && (curOpcode->num_params > 0)) {
2137 char regs[3][50];
2138 char operands[4][100];
2139 char tmp[256];
2140 char swzstring[20];
2141 int saturate = 0;
2142 /* Generate lines that handle input modifier computation */
2143 for (i = 1; i < curOpcode->num_params; i++) {
2144 if (gen_input_modifier_line(*(pToken+i), i-1, regs[i-1], tmp))
2145 addline(&lineNum, pgmStr, tmp);
2147 /* Handle saturation only when no shift is present in the output modifier */
2148 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
2149 saturate = 1;
2150 /* Handle output register */
2151 get_register_name(*pToken, tmp);
2152 strcpy(operands[0], tmp);
2153 get_write_mask(*pToken, tmp);
2154 strcat(operands[0], tmp);
2155 /* Handle input registers */
2156 for (i = 1; i < curOpcode->num_params; i++) {
2157 strcpy(operands[i], regs[i-1]);
2158 get_input_register_swizzle(*(pToken+i), swzstring);
2159 strcat(operands[i], swzstring);
2161 if (curOpcode->opcode == D3DSIO_CMP) {
2162 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
2163 } else if (curOpcode->opcode == D3DSIO_CND) {
2164 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
2165 addline(&lineNum, pgmStr, tmpLine);
2166 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
2167 } else {
2168 if (saturate)
2169 strcat(tmpLine, "_SAT");
2170 strcat(tmpLine, " ");
2171 strcat(tmpLine, operands[0]);
2172 for (i = 1; i < curOpcode->num_params; i++) {
2173 strcat(tmpLine, ", ");
2174 strcat(tmpLine, operands[i]);
2176 strcat(tmpLine,";");
2178 addline(&lineNum, pgmStr, tmpLine);
2179 pToken += curOpcode->num_params;
2181 if (curOpcode->num_params > 0) {
2182 DWORD param = *(pInstr+1);
2183 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
2184 /* Generate a line that handle the output modifier computation */
2185 char regstr[100];
2186 char write_mask[20];
2187 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
2188 get_register_name(param, regstr);
2189 get_write_mask(param, write_mask);
2190 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
2191 addline(&lineNum, pgmStr, tmpLine);
2196 strcpy(tmpLine, "MOV result.color, R0;");
2197 addline(&lineNum, pgmStr, tmpLine);
2199 strcpy(tmpLine, "END");
2200 addline(&lineNum, pgmStr, tmpLine);
2203 /* Create the hw shader */
2204 GL_EXTCALL(glGenProgramsARB(1, &pshader->prgId));
2205 TRACE_(d3d_hw_shader)("Creating a hw pixel shader, prg=%d\n", pshader->prgId);
2207 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, pshader->prgId));
2209 /* Create the program and check for errors */
2210 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
2211 if (glGetError() == GL_INVALID_OPERATION) {
2212 GLint errPos;
2213 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
2214 FIXME_(d3d_hw_shader)("HW PixelShader Error at position: %d\n%s\n", errPos, glGetString(GL_PROGRAM_ERROR_STRING_ARB));
2215 pshader->prgId = -1;
2218 HeapFree(GetProcessHeap(), 0, pgmStr);
2221 inline static VOID IDirect3DPixelShaderImpl_ParseProgram(IDirect3DPixelShaderImpl* pshader, CONST DWORD* pFunction) {
2222 const DWORD* pToken = pFunction;
2223 const SHADER_OPCODE* curOpcode = NULL;
2224 DWORD code;
2225 DWORD len = 0;
2226 DWORD i;
2227 int version = 0;
2229 if (NULL != pToken) {
2230 while (D3DPS_END() != *pToken) {
2231 if (pshader_is_version_token(*pToken)) { /** version */
2232 TRACE("ps.%lu.%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
2233 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
2234 ++pToken;
2235 ++len;
2236 continue;
2238 if (pshader_is_comment_token(*pToken)) { /** comment */
2239 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
2240 ++pToken;
2241 /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
2242 pToken += comment_len;
2243 len += comment_len + 1;
2244 continue;
2246 code = *pToken;
2247 curOpcode = pshader_program_get_opcode(code, version);
2248 ++pToken;
2249 ++len;
2250 if (NULL == curOpcode) {
2251 /* unknown current opcode ... */
2252 while (*pToken & 0x80000000) {
2253 TRACE("unrecognized opcode: %08lx\n", *pToken);
2254 ++pToken;
2255 ++len;
2257 } else {
2258 TRACE(" ");
2259 pshader_program_dump_opcode(curOpcode, code, *pToken);
2260 if (curOpcode->num_params > 0) {
2261 pshader_program_dump_param(*pToken, 0);
2262 ++pToken;
2263 ++len;
2264 for (i = 1; i < curOpcode->num_params; ++i) {
2265 TRACE(", ");
2266 if (D3DSIO_DEF != code) {
2267 pshader_program_dump_param(*pToken, 1);
2268 } else {
2269 TRACE("%f", *((const float*) pToken));
2271 ++pToken;
2272 ++len;
2275 TRACE("\n");
2277 pshader->functionLength = (len + 1) * sizeof(DWORD);
2279 } else {
2280 pshader->functionLength = 1; /* no Function defined use fixed function vertex processing */
2283 if (NULL != pFunction) {
2284 IDirect3DPixelShaderImpl_GenerateProgramArbHW(pshader, pFunction);
2287 if (NULL != pFunction) {
2288 pshader->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, pshader->functionLength);
2289 memcpy(pshader->function, pFunction, pshader->functionLength);
2290 } else {
2291 pshader->function = NULL;
2295 HRESULT WINAPI IDirect3DDeviceImpl_CreatePixelShader(IDirect3DDevice8Impl* This, CONST DWORD* pFunction, IDirect3DPixelShaderImpl** ppPixelShader) {
2296 IDirect3DPixelShaderImpl* object;
2298 object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(IDirect3DPixelShaderImpl));
2299 if (NULL == object) {
2300 *ppPixelShader = NULL;
2301 return D3DERR_OUTOFVIDEOMEMORY;
2303 /*object->lpVtbl = &Direct3DPixelShader9_Vtbl;*/
2304 object->device = This;
2305 object->ref = 1;
2307 object->data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(PSHADERDATA8));
2309 IDirect3DPixelShaderImpl_ParseProgram(object, pFunction);
2311 *ppPixelShader = object;
2312 return D3D_OK;
2315 HRESULT WINAPI IDirect3DPixelShaderImpl_GetFunction(IDirect3DPixelShaderImpl* This, VOID* pData, UINT* pSizeOfData) {
2316 if (NULL == pData) {
2317 *pSizeOfData = This->functionLength;
2318 return D3D_OK;
2320 if (*pSizeOfData < This->functionLength) {
2321 *pSizeOfData = This->functionLength;
2322 return D3DERR_MOREDATA;
2324 if (NULL == This->function) { /* no function defined */
2325 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
2326 (*(DWORD **) pData) = NULL;
2327 } else {
2328 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
2329 memcpy(pData, This->function, This->functionLength);
2331 return D3D_OK;
2334 HRESULT WINAPI IDirect3DPixelShaderImpl_SetConstantF(IDirect3DPixelShaderImpl* This, UINT StartRegister, CONST FLOAT* pConstantData, UINT Vector4fCount) {
2335 if (StartRegister + Vector4fCount > D3D8_VSHADER_MAX_CONSTANTS) {
2336 return D3DERR_INVALIDCALL;
2338 if (NULL == This->data) { /* temporary while datas not supported */
2339 FIXME("(%p) : PixelShader_SetConstant not fully supported yet\n", This);
2340 return D3DERR_INVALIDCALL;
2342 memcpy(&This->data->C[StartRegister], pConstantData, Vector4fCount * 4 * sizeof(FLOAT));
2343 return D3D_OK;
2346 HRESULT WINAPI IDirect3DPixelShaderImpl_GetConstantF(IDirect3DPixelShaderImpl* This, UINT StartRegister, FLOAT* pConstantData, UINT Vector4fCount) {
2347 if (StartRegister + Vector4fCount > D3D8_VSHADER_MAX_CONSTANTS) {
2348 return D3DERR_INVALIDCALL;
2350 if (NULL == This->data) { /* temporary while datas not supported */
2351 return D3DERR_INVALIDCALL;
2353 memcpy(pConstantData, &This->data->C[StartRegister], Vector4fCount * 4 * sizeof(FLOAT));
2354 return D3D_OK;
2358 /**********************************************************************************************************************************************
2359 **********************************************************************************************************************************************
2360 **********************************************************************************************************************************************
2361 **********************************************************************************************************************************************
2362 **********************************************************************************************************************************************/
2364 /***********************************************************************
2365 * ValidateVertexShader (D3D8.@)
2367 * PARAMS
2368 * toto result?
2370 BOOL WINAPI ValidateVertexShader(LPVOID pFunction, int param1, int param2, LPVOID toto)
2372 FIXME("(void): stub: pFunction %p, param1 %d, param2 %d, result? %p\n", pFunction, param1, param2, toto);
2373 return 0;
2376 /***********************************************************************
2377 * ValidatePixelShader (D3D8.@)
2379 * PARAMS
2380 * toto result?
2382 BOOL WINAPI ValidatePixelShader(LPVOID pFunction, int param1, int param2, LPVOID toto)
2384 FIXME("(void): stub: pFunction %p, param1 %d, param2 %d, result? %p\n", pFunction, param1, param2, toto);
2385 return TRUE;