2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
30 /* Shader debugging - Change the following line to enable debugging of software
32 #if 0 /* Musxt not be 1 in cvs version */
33 # define VSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 # define TRACE_VSVECTOR(name)
42 * DirectX9 SDK download
43 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
46 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
48 * Using Vertex Shaders
49 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
52 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
55 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
56 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
57 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
58 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
64 * http://msdn.microsoft.com/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
66 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
67 * http://developer.nvidia.com/view.asp?IO=vstovp
69 * NVIDIA: Memory Management with VAR
70 * http://developer.nvidia.com/view.asp?IO=var_memory_management
73 typedef void (*shader_fct_t
)();
75 typedef struct SHADER_OPCODE
{
78 CONST UINT num_params
;
79 shader_fct_t soft_fct
;
84 /*******************************
85 * vshader functions software VM
88 void vshader_add(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
93 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
94 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
97 void vshader_dp3(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
98 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
;
99 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
100 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
103 void vshader_dp4(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
104 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
+ s0
->w
* s1
->w
;
105 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
106 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
109 void vshader_dst(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
111 d
->y
= s0
->y
* s1
->y
;
114 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
115 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
118 void vshader_expp(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
124 tmp
.f
= floorf(s0
->w
);
125 d
->x
= powf(2.0f
, tmp
.f
);
126 d
->y
= s0
->w
- tmp
.f
;
127 tmp
.f
= powf(2.0f
, s0
->w
);
128 tmp
.d
&= 0xFFFFFF00U
;
131 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
132 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
135 void vshader_lit(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
137 d
->y
= (0.0f
< s0
->x
) ? s0
->x
: 0.0f
;
138 d
->z
= (0.0f
< s0
->x
&& 0.0f
< s0
->y
) ? powf(s0
->y
, s0
->w
) : 0.0f
;
140 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
141 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
144 void vshader_logp(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
145 float tmp_f
= fabsf(s0
->w
);
146 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
147 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
148 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
151 void vshader_mad(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
, D3DSHADERVECTOR
* s2
) {
152 d
->x
= s0
->x
* s1
->x
+ s2
->x
;
153 d
->y
= s0
->y
* s1
->y
+ s2
->y
;
154 d
->z
= s0
->z
* s1
->z
+ s2
->z
;
155 d
->w
= s0
->w
* s1
->w
+ s2
->w
;
156 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
157 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, s2
->x
, s2
->y
, s2
->z
, s2
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
160 void vshader_max(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
161 d
->x
= (s0
->x
>= s1
->x
) ? s0
->x
: s1
->x
;
162 d
->y
= (s0
->y
>= s1
->y
) ? s0
->y
: s1
->y
;
163 d
->z
= (s0
->z
>= s1
->z
) ? s0
->z
: s1
->z
;
164 d
->w
= (s0
->w
>= s1
->w
) ? s0
->w
: s1
->w
;
165 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
166 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
169 void vshader_min(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
170 d
->x
= (s0
->x
< s1
->x
) ? s0
->x
: s1
->x
;
171 d
->y
= (s0
->y
< s1
->y
) ? s0
->y
: s1
->y
;
172 d
->z
= (s0
->z
< s1
->z
) ? s0
->z
: s1
->z
;
173 d
->w
= (s0
->w
< s1
->w
) ? s0
->w
: s1
->w
;
174 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
175 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
178 void vshader_mov(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
183 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
184 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
187 void vshader_mul(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
188 d
->x
= s0
->x
* s1
->x
;
189 d
->y
= s0
->y
* s1
->y
;
190 d
->z
= s0
->z
* s1
->z
;
191 d
->w
= s0
->w
* s1
->w
;
192 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
193 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
196 void vshader_nop(void) {
197 /* NOPPPP ahhh too easy ;) */
200 void vshader_rcp(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
201 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== s0
->w
) ? HUGE_VAL
: 1.0f
/ s0
->w
;
202 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
203 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
206 void vshader_rsq(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
207 float tmp_f
= fabsf(s0
->w
);
208 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== tmp_f
) ? HUGE_VAL
: ((1.0f
!= tmp_f
) ? 1.0f
/ sqrtf(tmp_f
) : 1.0f
);
209 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
210 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
213 void vshader_sge(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
214 d
->x
= (s0
->x
>= s1
->x
) ? 1.0f
: 0.0f
;
215 d
->y
= (s0
->y
>= s1
->y
) ? 1.0f
: 0.0f
;
216 d
->z
= (s0
->z
>= s1
->z
) ? 1.0f
: 0.0f
;
217 d
->w
= (s0
->w
>= s1
->w
) ? 1.0f
: 0.0f
;
218 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
219 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
222 void vshader_slt(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
223 d
->x
= (s0
->x
< s1
->x
) ? 1.0f
: 0.0f
;
224 d
->y
= (s0
->y
< s1
->y
) ? 1.0f
: 0.0f
;
225 d
->z
= (s0
->z
< s1
->z
) ? 1.0f
: 0.0f
;
226 d
->w
= (s0
->w
< s1
->w
) ? 1.0f
: 0.0f
;
227 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
228 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
231 void vshader_sub(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
232 d
->x
= s0
->x
- s1
->x
;
233 d
->y
= s0
->y
- s1
->y
;
234 d
->z
= s0
->z
- s1
->z
;
235 d
->w
= s0
->w
- s1
->w
;
236 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
237 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
241 * Version 1.1 specific
244 void vshader_exp(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
245 d
->x
= d
->y
= d
->z
= d
->w
= powf(2.0f
, s0
->w
);
246 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
247 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
250 void vshader_log(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
251 float tmp_f
= fabsf(s0
->w
);
252 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
253 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
254 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
257 void vshader_frc(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
258 d
->x
= s0
->x
- floorf(s0
->x
);
259 d
->y
= s0
->y
- floorf(s0
->y
);
262 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
263 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
266 typedef FLOAT D3DMATRIX44
[4][4];
267 typedef FLOAT D3DMATRIX43
[4][3];
268 typedef FLOAT D3DMATRIX34
[4][4];
269 typedef FLOAT D3DMATRIX33
[4][3];
270 typedef FLOAT D3DMATRIX32
[4][2];
272 void vshader_m4x4(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, /*D3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat
) {
274 * BuGGY CODE: here only if cast not work for copy/paste
275 D3DSHADERVECTOR* mat2 = mat1 + 1;
276 D3DSHADERVECTOR* mat3 = mat1 + 2;
277 D3DSHADERVECTOR* mat4 = mat1 + 3;
278 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
279 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
280 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
281 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
283 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
284 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
285 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
286 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
+ mat
[3][3] * s0
->w
;
287 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
288 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
289 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
290 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], mat
[3][3], s0
->w
, d
->w
));
293 void vshader_m4x3(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DMATRIX43 mat
) {
294 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
295 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
296 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
298 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
299 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
300 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
301 VSTRACE(("executing m4x3(4): (%f) (%f) \n", s0
->w
, d
->w
));
304 void vshader_m3x4(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DMATRIX34 mat
) {
305 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
306 d
->y
= mat
[2][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
307 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
308 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
;
309 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
310 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
311 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
312 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], s0
->w
, d
->w
));
315 void vshader_m3x3(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DMATRIX33 mat
) {
316 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[2][2] * s0
->z
;
317 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[2][2] * s0
->z
;
318 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
320 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
321 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
322 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
323 VSTRACE(("executing m3x3(4): (%f) \n", d
->w
));
326 void vshader_m3x2(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DMATRIX32 mat
) {
328 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
329 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
335 * Version 2.0 specific
337 void vshader_lrp(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
, D3DSHADERVECTOR
* s2
, D3DSHADERVECTOR
* s3
) {
338 d
->x
= s0
->x
* (s1
->x
- s2
->x
) + s2
->x
;
339 d
->y
= s0
->y
* (s1
->y
- s2
->y
) + s2
->y
;
340 d
->z
= s0
->z
* (s1
->z
- s2
->z
) + s2
->z
;
341 d
->w
= s0
->w
* (s1
->w
- s2
->w
) + s2
->x
;
345 * log, exp, frc, m*x* seems to be macros ins ... to see
347 static CONST SHADER_OPCODE vshader_ins
[] = {
348 {D3DSIO_NOP
, "nop", 0, vshader_nop
, 0, 0},
349 {D3DSIO_MOV
, "mov", 2, vshader_mov
, 0, 0},
350 {D3DSIO_ADD
, "add", 3, vshader_add
, 0, 0},
351 {D3DSIO_SUB
, "sub", 3, vshader_sub
, 0, 0},
352 {D3DSIO_MAD
, "mad", 4, vshader_mad
, 0, 0},
353 {D3DSIO_MUL
, "mul", 3, vshader_mul
, 0, 0},
354 {D3DSIO_RCP
, "rcp", 2, vshader_rcp
, 0, 0},
355 {D3DSIO_RSQ
, "rsq", 2, vshader_rsq
, 0, 0},
356 {D3DSIO_DP3
, "dp3", 3, vshader_dp3
, 0, 0},
357 {D3DSIO_DP4
, "dp4", 3, vshader_dp4
, 0, 0},
358 {D3DSIO_MIN
, "min", 3, vshader_min
, 0, 0},
359 {D3DSIO_MAX
, "max", 3, vshader_max
, 0, 0},
360 {D3DSIO_SLT
, "slt", 3, vshader_slt
, 0, 0},
361 {D3DSIO_SGE
, "sge", 3, vshader_sge
, 0, 0},
362 {D3DSIO_EXP
, "exp", 2, vshader_exp
, 0, 0},
363 {D3DSIO_LOG
, "log", 2, vshader_log
, 0, 0},
364 {D3DSIO_LIT
, "lit", 2, vshader_lit
, 0, 0},
365 {D3DSIO_DST
, "dst", 3, vshader_dst
, 0, 0},
366 {D3DSIO_LRP
, "lrp", 5, vshader_lrp
, 0, 0},
367 {D3DSIO_FRC
, "frc", 2, vshader_frc
, 0, 0},
368 {D3DSIO_M4x4
, "m4x4", 3, vshader_m4x4
, 0, 0},
369 {D3DSIO_M4x3
, "m4x3", 3, vshader_m4x3
, 0, 0},
370 {D3DSIO_M3x4
, "m3x4", 3, vshader_m3x4
, 0, 0},
371 {D3DSIO_M3x3
, "m3x3", 3, vshader_m3x3
, 0, 0},
372 {D3DSIO_M3x2
, "m3x2", 3, vshader_m3x2
, 0, 0},
373 /** FIXME: use direct access so add the others opcodes as stubs */
374 {D3DSIO_EXPP
, "expp", 2, vshader_expp
, 0, 0},
375 {D3DSIO_LOGP
, "logp", 2, vshader_logp
, 0, 0},
377 {0, NULL
, 0, NULL
, 0, 0}
381 inline static const SHADER_OPCODE
* vshader_program_get_opcode(const DWORD code
) {
383 /** TODO: use dichotomic search */
384 while (NULL
!= vshader_ins
[i
].name
) {
385 if ((code
& D3DSI_OPCODE_MASK
) == vshader_ins
[i
].opcode
) {
386 return &vshader_ins
[i
];
393 inline static void vshader_program_dump_param(const DWORD param
, int input
) {
394 static const char* rastout_reg_names
[] = { "oPos", "oFog", "oPts" };
395 static const char swizzle_reg_chars
[] = "xyzw";
397 DWORD reg
= param
& 0x00001FFF;
398 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
);
400 if ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
) TRACE("-");
402 switch (regtype
<< D3DSP_REGTYPE_SHIFT
) {
404 TRACE("R[%lu]", reg
);
407 TRACE("V[%lu]", reg
);
410 TRACE("C[%s%lu]", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
412 case D3DSPR_ADDR
: /*case D3DSPR_TEXTURE:*/
413 TRACE("a[%lu]", reg
);
416 TRACE("%s", rastout_reg_names
[reg
]);
419 TRACE("oD[%lu]", reg
);
421 case D3DSPR_TEXCRDOUT
:
422 TRACE("oT[%lu]", reg
);
429 /** operand output */
430 if ((param
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
431 if (param
& D3DSP_WRITEMASK_0
) TRACE(".x");
432 if (param
& D3DSP_WRITEMASK_1
) TRACE(".y");
433 if (param
& D3DSP_WRITEMASK_2
) TRACE(".z");
434 if (param
& D3DSP_WRITEMASK_3
) TRACE(".w");
438 DWORD swizzle
= (param
& D3DVS_SWIZZLE_MASK
) >> D3DVS_SWIZZLE_SHIFT
;
439 DWORD swizzle_x
= swizzle
& 0x03;
440 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
441 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
442 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
444 * swizzle bits fields:
447 if ((D3DVS_NOSWIZZLE
>> D3DVS_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
448 if (swizzle_x
== swizzle_y
&&
449 swizzle_x
== swizzle_z
&&
450 swizzle_x
== swizzle_w
) {
451 TRACE(".%c", swizzle_reg_chars
[swizzle_x
]);
454 swizzle_reg_chars
[swizzle_x
],
455 swizzle_reg_chars
[swizzle_y
],
456 swizzle_reg_chars
[swizzle_z
],
457 swizzle_reg_chars
[swizzle_w
]);
463 inline static BOOL
vshader_is_version_token(DWORD token
) {
464 return 0xFFFE0000 == (token
& 0xFFFE0000);
467 inline static BOOL
vshader_is_comment_token(DWORD token
) {
468 return D3DSIO_COMMENT
== (token
& D3DSI_OPCODE_MASK
);
472 * Function parser ...
474 HRESULT WINAPI
IDirect3DVertexShaderImpl_ParseProgram(IDirect3DVertexShaderImpl
* vshader
, CONST DWORD
* pFunction
) {
475 const DWORD
* pToken
= pFunction
;
476 const SHADER_OPCODE
* curOpcode
= NULL
;
480 if (NULL
!= pToken
) {
481 while (D3DVS_END() != *pToken
) {
482 if (vshader_is_version_token(*pToken
)) { /** version */
483 TRACE("vs.%lu.%lu\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
488 if (vshader_is_comment_token(*pToken
)) { /** comment */
489 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
491 /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
492 pToken
+= comment_len
;
493 len
+= comment_len
+ 1;
496 curOpcode
= vshader_program_get_opcode(*pToken
);
499 if (NULL
== curOpcode
) {
500 /* unkown current opcode ... */
501 while (*pToken
& 0x80000000) {
502 TRACE("unrecognized opcode: %08lx\n", *pToken
);
507 TRACE("%s ", curOpcode
->name
);
508 if (curOpcode
->num_params
> 0) {
509 vshader_program_dump_param(*pToken
, 0);
512 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
514 vshader_program_dump_param(*pToken
, 1);
522 vshader
->functionLength
= (len
+ 1) * sizeof(DWORD
);
524 vshader
->functionLength
= 1; /* no Function defined use fixed function vertex processing */
526 /* copy the function ... because it will certainly be released by application */
528 if (NULL
!= pFunction
) {
529 vshader
->function
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, vshader
->functionLength
);
530 memcpy(vshader
->function
, pFunction
, vshader
->functionLength
);
532 vshader
->function
= NULL
;
537 BOOL
IDirect3DVertexShaderImpl_ExecuteHAL(IDirect3DVertexShaderImpl
* vshader
, VSHADERINPUTDATA
* input
, VSHADEROUTPUTDATA
* output
) {
539 * TODO: use the NV_vertex_program (or 1_1) extension
540 * and specifics vendors (ARB_vertex_program??) variants for it
545 HRESULT WINAPI
IDirect3DVertexShaderImpl_ExecuteSW(IDirect3DVertexShaderImpl
* vshader
, VSHADERINPUTDATA
* input
, VSHADEROUTPUTDATA
* output
) {
546 /** Vertex Shader Temporary Registers */
547 D3DSHADERVECTOR R
[12];
548 /*D3DSHADERSCALAR A0;*/
549 D3DSHADERVECTOR A
[1];
550 /** temporary Vector for modifier management */
552 D3DSHADERVECTOR s
[3];
554 const DWORD
* pToken
= vshader
->function
;
555 const SHADER_OPCODE
* curOpcode
= NULL
;
556 /** functions parameters */
557 D3DSHADERVECTOR
* p
[4];
558 D3DSHADERVECTOR
* p_send
[4];
561 /** init temporary register */
562 memset(R
, 0, 12 * sizeof(D3DSHADERVECTOR
));
564 /* vshader_program_parse(vshader); */
565 #if 0 /* Must not be 1 in cvs */
567 TRACE_VSVECTOR(vshader
->data
->C
[0]);
568 TRACE_VSVECTOR(vshader
->data
->C
[1]);
569 TRACE_VSVECTOR(vshader
->data
->C
[2]);
570 TRACE_VSVECTOR(vshader
->data
->C
[3]);
571 TRACE_VSVECTOR(vshader
->data
->C
[4]);
572 TRACE_VSVECTOR(vshader
->data
->C
[5]);
573 TRACE_VSVECTOR(vshader
->data
->C
[6]);
574 TRACE_VSVECTOR(vshader
->data
->C
[7]);
575 TRACE_VSVECTOR(vshader
->data
->C
[8]);
576 TRACE_VSVECTOR(vshader
->data
->C
[64]);
577 TRACE_VSVECTOR(input
->V
[D3DVSDE_POSITION
]);
578 TRACE_VSVECTOR(input
->V
[D3DVSDE_BLENDWEIGHT
]);
579 TRACE_VSVECTOR(input
->V
[D3DVSDE_BLENDINDICES
]);
580 TRACE_VSVECTOR(input
->V
[D3DVSDE_NORMAL
]);
581 TRACE_VSVECTOR(input
->V
[D3DVSDE_PSIZE
]);
582 TRACE_VSVECTOR(input
->V
[D3DVSDE_DIFFUSE
]);
583 TRACE_VSVECTOR(input
->V
[D3DVSDE_SPECULAR
]);
584 TRACE_VSVECTOR(input
->V
[D3DVSDE_TEXCOORD0
]);
585 TRACE_VSVECTOR(input
->V
[D3DVSDE_TEXCOORD1
]);
588 TRACE_VSVECTOR(vshader
->data
->C
[64]);
590 /* the first dword is the version tag */
593 if (vshader_is_version_token(*pToken
)) { /** version */
596 while (D3DVS_END() != *pToken
) {
597 if (vshader_is_comment_token(*pToken
)) { /** comment */
598 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
600 pToken
+= comment_len
;
603 curOpcode
= vshader_program_get_opcode(*pToken
);
605 if (NULL
== curOpcode
) {
607 /* unkown current opcode ... */
608 while (*pToken
& 0x80000000) {
610 TRACE("unrecognized opcode: pos=%d token=%08lX\n", (pToken
- 1) - vshader
->function
, *(pToken
- 1));
612 TRACE("unrecognized opcode param: pos=%d token=%08lX what=", pToken
- vshader
->function
, *pToken
);
613 vshader_program_dump_param(*pToken
, i
);
620 if (curOpcode
->num_params
> 0) {
621 /*TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken);*/
622 for (i
= 0; i
< curOpcode
->num_params
; ++i
) {
623 DWORD reg
= pToken
[i
] & 0x00001FFF;
624 DWORD regtype
= ((pToken
[i
] & D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
);
626 switch (regtype
<< D3DSP_REGTYPE_SHIFT
) {
628 /*TRACE("p[%d]=R[%d]\n", i, reg);*/
632 /*TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]);*/
633 p
[i
] = &input
->V
[reg
];
636 if (pToken
[i
] & D3DVS_ADDRMODE_RELATIVE
) {
637 p
[i
] = &vshader
->data
->C
[(DWORD
) A
[0].x
+ reg
];
639 p
[i
] = &vshader
->data
->C
[reg
];
642 case D3DSPR_ADDR
: /*case D3DSPR_TEXTURE:*/
644 ERR("cannot handle address registers != a0, forcing use of a0\n");
647 /*TRACE("p[%d]=A[%d]\n", i, reg);*/
652 case D3DSRO_POSITION
:
653 p
[i
] = &output
->oPos
;
656 p
[i
] = &output
->oFog
;
658 case D3DSRO_POINT_SIZE
:
659 p
[i
] = &output
->oPts
;
664 /*TRACE("p[%d]=oD[%d]\n", i, reg);*/
665 p
[i
] = &output
->oD
[reg
];
667 case D3DSPR_TEXCRDOUT
:
668 /*TRACE("p[%d]=oT[%d]\n", i, reg);*/
669 p
[i
] = &output
->oT
[reg
];
675 if (i
> 0) { /* input reg */
676 DWORD swizzle
= (pToken
[i
] & D3DVS_SWIZZLE_MASK
) >> D3DVS_SWIZZLE_SHIFT
;
677 UINT isNegative
= ((pToken
[i
] & D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
);
679 if (!isNegative
&& (D3DVS_NOSWIZZLE
>> D3DVS_SWIZZLE_SHIFT
) == swizzle
) {
680 /*TRACE("p[%d] not swizzled\n", i);*/
683 DWORD swizzle_x
= swizzle
& 0x03;
684 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
685 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
686 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
687 /*TRACE("p[%d] swizzled\n", i);*/
688 float* tt
= (float*) p
[i
];
689 s
[i
].x
= (isNegative
) ? -tt
[swizzle_x
] : tt
[swizzle_x
];
690 s
[i
].y
= (isNegative
) ? -tt
[swizzle_y
] : tt
[swizzle_y
];
691 s
[i
].z
= (isNegative
) ? -tt
[swizzle_z
] : tt
[swizzle_z
];
692 s
[i
].w
= (isNegative
) ? -tt
[swizzle_w
] : tt
[swizzle_w
];
695 } else { /* output reg */
696 if ((pToken
[i
] & D3DSP_WRITEMASK_ALL
) == D3DSP_WRITEMASK_ALL
) {
699 p_send
[i
] = &d
; /* to be post-processed for modifiers management */
705 switch (curOpcode
->num_params
) {
707 curOpcode
->soft_fct();
710 curOpcode
->soft_fct(p_send
[0]);
713 curOpcode
->soft_fct(p_send
[0], p_send
[1]);
716 curOpcode
->soft_fct(p_send
[0], p_send
[1], p_send
[2]);
719 curOpcode
->soft_fct(p_send
[0], p_send
[1], p_send
[2], p_send
[3]);
722 curOpcode
->soft_fct(p_send
[0], p_send
[1], p_send
[2], p_send
[3], p_send
[4]);
725 ERR("%s too many params: %u\n", curOpcode
->name
, curOpcode
->num_params
);
728 /* check if output reg modifier post-process */
729 if (curOpcode
->num_params
> 0 && (pToken
[0] & D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
730 if (pToken
[0] & D3DSP_WRITEMASK_0
) p
[0]->x
= d
.x
;
731 if (pToken
[0] & D3DSP_WRITEMASK_1
) p
[0]->y
= d
.y
;
732 if (pToken
[0] & D3DSP_WRITEMASK_2
) p
[0]->z
= d
.z
;
733 if (pToken
[0] & D3DSP_WRITEMASK_3
) p
[0]->w
= d
.w
;
737 TRACE_VSVECTOR(output
->oPos
);
738 TRACE_VSVECTOR(output
->oD
[0]);
739 TRACE_VSVECTOR(output
->oD
[1]);
740 TRACE_VSVECTOR(output
->oT
[0]);
741 TRACE_VSVECTOR(output
->oT
[1]);
742 TRACE_VSVECTOR(R
[0]);
743 TRACE_VSVECTOR(R
[1]);
744 TRACE_VSVECTOR(R
[2]);
745 TRACE_VSVECTOR(R
[3]);
746 TRACE_VSVECTOR(R
[4]);
747 TRACE_VSVECTOR(R
[5]);
750 /* to next opcode token */
751 pToken
+= curOpcode
->num_params
;
754 TRACE("End of current instruction:\n");
755 TRACE_VSVECTOR(output
->oPos
);
756 TRACE_VSVECTOR(output
->oD
[0]);
757 TRACE_VSVECTOR(output
->oD
[1]);
758 TRACE_VSVECTOR(output
->oT
[0]);
759 TRACE_VSVECTOR(output
->oT
[1]);
760 TRACE_VSVECTOR(R
[0]);
761 TRACE_VSVECTOR(R
[1]);
762 TRACE_VSVECTOR(R
[2]);
763 TRACE_VSVECTOR(R
[3]);
764 TRACE_VSVECTOR(R
[4]);
765 TRACE_VSVECTOR(R
[5]);
768 #if 0 /* Must not be 1 in cvs */
770 TRACE_VSVECTOR(output
->oPos
);
771 TRACE_VSVECTOR(output
->oD
[0]);
772 TRACE_VSVECTOR(output
->oD
[1]);
773 TRACE_VSVECTOR(output
->oT
[0]);
774 TRACE_VSVECTOR(output
->oT
[1]);
779 HRESULT WINAPI
IDirect3DVertexShaderImpl_GetFunction(IDirect3DVertexShaderImpl
* This
, VOID
* pData
, UINT
* pSizeOfData
) {
781 *pSizeOfData
= This
->functionLength
;
784 if (*pSizeOfData
< This
->functionLength
) {
785 *pSizeOfData
= This
->functionLength
;
786 return D3DERR_MOREDATA
;
788 if (NULL
== This
->function
) { /* no function defined */
789 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This
, pData
);
790 (*(DWORD
**) pData
) = NULL
;
792 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
793 memcpy(pData
, This
->function
, This
->functionLength
);
798 HRESULT WINAPI
IDirect3DVertexShaderImpl_SetConstantF(IDirect3DVertexShaderImpl
* This
, UINT StartRegister
, CONST FLOAT
* pConstantData
, UINT Vector4fCount
) {
799 FIXME("(%p) : stub\n", This
);
803 HRESULT WINAPI
IDirect3DVertexShaderImpl_GetConstantF(IDirect3DVertexShaderImpl
* This
, UINT StartRegister
, FLOAT
* pConstantData
, UINT Vector4fCount
) {
804 FIXME("(%p) : stub\n", This
);
808 HRESULT WINAPI
IDirect3DVertexShaderImpl_SetConstantI(IDirect3DVertexShaderImpl
* This
, UINT StartRegister
, CONST
int* pConstantData
, UINT Vector4iCount
) {
809 if (StartRegister
+ Vector4iCount
> D3D_VSHADER_MAX_CONSTANTS
) {
810 ERR("(%p) : SetVertexShaderConstantI C[%u] invalid\n", This
, StartRegister
);
811 return D3DERR_INVALIDCALL
;
813 if (NULL
== pConstantData
) {
814 return D3DERR_INVALIDCALL
;
816 FIXME("(%p) : stub\n", This
);
820 HRESULT WINAPI
IDirect3DVertexShaderImpl_GetConstantI(IDirect3DVertexShaderImpl
* This
, UINT StartRegister
, int* pConstantData
, UINT Vector4iCount
) {
821 TRACE("(%p) : C[%u] count=%u\n", This
, StartRegister
, Vector4iCount
);
822 if (StartRegister
+ Vector4iCount
> D3D_VSHADER_MAX_CONSTANTS
) {
823 return D3DERR_INVALIDCALL
;
825 if (NULL
== pConstantData
) {
826 return D3DERR_INVALIDCALL
;
828 FIXME("(%p) : stub\n", This
);
832 HRESULT WINAPI
IDirect3DVertexShaderImpl_SetConstantB(IDirect3DVertexShaderImpl
* This
, UINT StartRegister
, CONST BOOL
* pConstantData
, UINT BoolCount
) {
833 if (StartRegister
+ BoolCount
> D3D_VSHADER_MAX_CONSTANTS
) {
834 ERR("(%p) : SetVertexShaderConstantB C[%u] invalid\n", This
, StartRegister
);
835 return D3DERR_INVALIDCALL
;
837 if (NULL
== pConstantData
) {
838 return D3DERR_INVALIDCALL
;
840 FIXME("(%p) : stub\n", This
);
844 HRESULT WINAPI
IDirect3DVertexShaderImpl_GetConstantB(IDirect3DVertexShaderImpl
* This
, UINT StartRegister
, BOOL
* pConstantData
, UINT BoolCount
) {
845 FIXME("(%p) : stub\n", This
);