2 * shaders implementation
4 * Copyright 2002-2004 Raphael Junqueira
5 * Copyright 2004 Jason Edmeades
6 * Copyright 2004 Christian Costa
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 #include "wine/debug.h"
35 #include "d3d8_private.h"
37 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
38 WINE_DECLARE_DEBUG_CHANNEL(d3d_hw_shader
);
40 /* Shader debugging - Change the following line to enable debugging of software
42 #if 0 /* Must not be 1 in cvs version */
43 # define VSTRACE(A) TRACE A
44 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
47 # define TRACE_VSVECTOR(name)
51 * DirectX9 SDK download
52 * http://msdn.microsoft.com/library/default.asp?url=/downloads/list/directx.asp
55 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx07162002.asp
57 * Using Vertex Shaders
58 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dndrive/html/directx02192001.asp
61 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/whatsnew.asp
64 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/VertexShader2_0.asp
65 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader2_0/Instructions/Instructions.asp
66 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexDeclaration/VertexDeclaration.asp
67 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/VertexShader3_0/VertexShader3_0.asp
70 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/advancedtopics/VertexPipe/matrixstack/matrixstack.asp
73 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/programmingguide/GettingStarted/VertexFormats/vformats.asp
75 * NVIDIA: DX8 Vertex Shader to NV Vertex Program
76 * http://developer.nvidia.com/view.asp?IO=vstovp
78 * NVIDIA: Memory Management with VAR
79 * http://developer.nvidia.com/view.asp?IO=var_memory_management
82 typedef void (*shader_fct_t
)();
84 typedef struct SHADER_OPCODE
{
87 CONST UINT num_params
;
88 shader_fct_t soft_fct
;
93 /*******************************
94 * vshader functions software VM
97 void vshader_add(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
100 d
->z
= s0
->z
+ s1
->z
;
101 d
->w
= s0
->w
+ s1
->w
;
102 VSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
103 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
106 void vshader_dp3(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
107 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
;
108 VSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
109 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
112 void vshader_dp4(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
113 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
+ s0
->w
* s1
->w
;
114 VSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
115 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
118 void vshader_dst(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
120 d
->y
= s0
->y
* s1
->y
;
123 VSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
124 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
127 void vshader_expp(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
133 tmp
.f
= floorf(s0
->w
);
134 d
->x
= powf(2.0f
, tmp
.f
);
135 d
->y
= s0
->w
- tmp
.f
;
137 tmp
.f
= powf(2.0f
, s0
->w
);
138 tmp
.d
&= 0xFFFFFF00U
;
141 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
142 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
145 void vshader_lit(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
147 d
->y
= (0.0f
< s0
->x
) ? s0
->x
: 0.0f
;
148 d
->z
= (0.0f
< s0
->x
&& 0.0f
< s0
->y
) ? powf(s0
->y
, s0
->w
) : 0.0f
;
150 VSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
151 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
154 void vshader_logp(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
155 float tmp_f
= fabsf(s0
->w
);
156 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
157 VSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
158 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
161 void vshader_mad(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
, D3DSHADERVECTOR
* s2
) {
162 d
->x
= s0
->x
* s1
->x
+ s2
->x
;
163 d
->y
= s0
->y
* s1
->y
+ s2
->y
;
164 d
->z
= s0
->z
* s1
->z
+ s2
->z
;
165 d
->w
= s0
->w
* s1
->w
+ s2
->w
;
166 VSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
167 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, s2
->x
, s2
->y
, s2
->z
, s2
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
170 void vshader_max(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
171 d
->x
= (s0
->x
>= s1
->x
) ? s0
->x
: s1
->x
;
172 d
->y
= (s0
->y
>= s1
->y
) ? s0
->y
: s1
->y
;
173 d
->z
= (s0
->z
>= s1
->z
) ? s0
->z
: s1
->z
;
174 d
->w
= (s0
->w
>= s1
->w
) ? s0
->w
: s1
->w
;
175 VSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
176 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
179 void vshader_min(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
180 d
->x
= (s0
->x
< s1
->x
) ? s0
->x
: s1
->x
;
181 d
->y
= (s0
->y
< s1
->y
) ? s0
->y
: s1
->y
;
182 d
->z
= (s0
->z
< s1
->z
) ? s0
->z
: s1
->z
;
183 d
->w
= (s0
->w
< s1
->w
) ? s0
->w
: s1
->w
;
184 VSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
185 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
188 void vshader_mov(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
193 VSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
194 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
197 void vshader_mul(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
198 d
->x
= s0
->x
* s1
->x
;
199 d
->y
= s0
->y
* s1
->y
;
200 d
->z
= s0
->z
* s1
->z
;
201 d
->w
= s0
->w
* s1
->w
;
202 VSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
203 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
206 void vshader_nop(void) {
207 /* NOPPPP ahhh too easy ;) */
210 void vshader_rcp(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
211 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== s0
->w
) ? HUGE_VAL
: 1.0f
/ s0
->w
;
212 VSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
213 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
216 void vshader_rsq(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
217 float tmp_f
= fabsf(s0
->w
);
218 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== tmp_f
) ? HUGE_VAL
: ((1.0f
!= tmp_f
) ? 1.0f
/ sqrtf(tmp_f
) : 1.0f
);
219 VSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
220 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
223 void vshader_sge(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
224 d
->x
= (s0
->x
>= s1
->x
) ? 1.0f
: 0.0f
;
225 d
->y
= (s0
->y
>= s1
->y
) ? 1.0f
: 0.0f
;
226 d
->z
= (s0
->z
>= s1
->z
) ? 1.0f
: 0.0f
;
227 d
->w
= (s0
->w
>= s1
->w
) ? 1.0f
: 0.0f
;
228 VSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
229 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
232 void vshader_slt(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
233 d
->x
= (s0
->x
< s1
->x
) ? 1.0f
: 0.0f
;
234 d
->y
= (s0
->y
< s1
->y
) ? 1.0f
: 0.0f
;
235 d
->z
= (s0
->z
< s1
->z
) ? 1.0f
: 0.0f
;
236 d
->w
= (s0
->w
< s1
->w
) ? 1.0f
: 0.0f
;
237 VSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
238 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
241 void vshader_sub(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
242 d
->x
= s0
->x
- s1
->x
;
243 d
->y
= s0
->y
- s1
->y
;
244 d
->z
= s0
->z
- s1
->z
;
245 d
->w
= s0
->w
- s1
->w
;
246 VSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
247 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
251 * Version 1.1 specific
254 void vshader_exp(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
255 d
->x
= d
->y
= d
->z
= d
->w
= powf(2.0f
, s0
->w
);
256 VSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
257 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
260 void vshader_log(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
261 float tmp_f
= fabsf(s0
->w
);
262 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
263 VSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
264 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
267 void vshader_frc(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
268 d
->x
= s0
->x
- floorf(s0
->x
);
269 d
->y
= s0
->y
- floorf(s0
->y
);
272 VSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
273 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
276 typedef FLOAT D3DMATRIX44
[4][4];
277 typedef FLOAT D3DMATRIX43
[3][4];
278 typedef FLOAT D3DMATRIX34
[4][3];
279 typedef FLOAT D3DMATRIX33
[3][3];
280 typedef FLOAT D3DMATRIX32
[2][3];
282 void vshader_m4x4(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, /*D3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat
) {
284 * BuGGY CODE: here only if cast not work for copy/paste
285 D3DSHADERVECTOR* mat2 = mat1 + 1;
286 D3DSHADERVECTOR* mat3 = mat1 + 2;
287 D3DSHADERVECTOR* mat4 = mat1 + 3;
288 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
289 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
290 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
291 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
293 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
294 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
295 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
296 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
+ mat
[3][3] * s0
->w
;
297 VSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f)\n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
298 VSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f)\n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
299 VSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f)\n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
300 VSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f)\n", mat
[3][0], mat
[3][1], mat
[3][2], mat
[3][3], s0
->w
, d
->w
));
303 void vshader_m4x3(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DMATRIX43 mat
) {
304 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
305 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
306 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
308 VSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f)\n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
309 VSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f)\n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
310 VSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f)\n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
311 VSTRACE(("executing m4x3(4): (%f) (%f)\n", s0
->w
, d
->w
));
314 void vshader_m3x4(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DMATRIX34 mat
) {
315 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
316 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
317 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
318 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
;
319 VSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
320 VSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
321 VSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
322 VSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], s0
->w
, d
->w
));
325 void vshader_m3x3(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DMATRIX33 mat
) {
326 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
327 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
328 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
330 VSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
331 VSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
332 VSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
333 VSTRACE(("executing m3x3(4): (%f) \n", d
->w
));
336 void vshader_m3x2(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DMATRIX32 mat
) {
338 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
339 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
345 * Version 2.0 specific
347 void vshader_lrp(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
, D3DSHADERVECTOR
* s2
, D3DSHADERVECTOR
* s3
) {
348 d
->x
= s0
->x
* (s1
->x
- s2
->x
) + s2
->x
;
349 d
->y
= s0
->y
* (s1
->y
- s2
->y
) + s2
->y
;
350 d
->z
= s0
->z
* (s1
->z
- s2
->z
) + s2
->z
;
351 d
->w
= s0
->w
* (s1
->w
- s2
->w
) + s2
->x
;
355 * log, exp, frc, m*x* seems to be macros ins ... to see
356 * Note opcode must be in uppercase if direct mapping to GL hw shaders
358 static CONST SHADER_OPCODE vshader_ins
[] = {
359 {D3DSIO_NOP
, "NOP", 0, vshader_nop
, 0, 0},
360 {D3DSIO_MOV
, "MOV", 2, vshader_mov
, 0, 0},
361 {D3DSIO_ADD
, "ADD", 3, vshader_add
, 0, 0},
362 {D3DSIO_SUB
, "SUB", 3, vshader_sub
, 0, 0},
363 {D3DSIO_MAD
, "MAD", 4, vshader_mad
, 0, 0},
364 {D3DSIO_MUL
, "MUL", 3, vshader_mul
, 0, 0},
365 {D3DSIO_RCP
, "RCP", 2, vshader_rcp
, 0, 0},
366 {D3DSIO_RSQ
, "RSQ", 2, vshader_rsq
, 0, 0},
367 {D3DSIO_DP3
, "DP3", 3, vshader_dp3
, 0, 0},
368 {D3DSIO_DP4
, "DP4", 3, vshader_dp4
, 0, 0},
369 {D3DSIO_MIN
, "MIN", 3, vshader_min
, 0, 0},
370 {D3DSIO_MAX
, "MAX", 3, vshader_max
, 0, 0},
371 {D3DSIO_SLT
, "SLT", 3, vshader_slt
, 0, 0},
372 {D3DSIO_SGE
, "SGE", 3, vshader_sge
, 0, 0},
373 {D3DSIO_EXP
, "EXP", 2, vshader_exp
, 0, 0},
374 {D3DSIO_LOG
, "LOG", 2, vshader_log
, 0, 0},
375 {D3DSIO_LIT
, "LIT", 2, vshader_lit
, 0, 0},
376 {D3DSIO_DST
, "DST", 3, vshader_dst
, 0, 0},
377 {D3DSIO_LRP
, "LRP", 5, vshader_lrp
, 0, 0},
378 {D3DSIO_FRC
, "FRC", 2, vshader_frc
, 0, 0},
379 {D3DSIO_M4x4
, "M4X4", 3, vshader_m4x4
, 0, 0},
380 {D3DSIO_M4x3
, "M4X3", 3, vshader_m4x3
, 0, 0},
381 {D3DSIO_M3x4
, "M3X4", 3, vshader_m3x4
, 0, 0},
382 {D3DSIO_M3x3
, "M3X3", 3, vshader_m3x3
, 0, 0},
383 {D3DSIO_M3x2
, "M3X2", 3, vshader_m3x2
, 0, 0},
384 /** FIXME: use direct access so add the others opcodes as stubs */
385 {D3DSIO_EXPP
, "EXPP", 2, vshader_expp
, 0, 0},
386 {D3DSIO_LOGP
, "LOGP", 2, vshader_logp
, 0, 0},
388 {0, NULL
, 0, NULL
, 0, 0}
392 inline static const SHADER_OPCODE
* vshader_program_get_opcode(const DWORD code
) {
394 /** TODO: use dichotomic search */
395 while (NULL
!= vshader_ins
[i
].name
) {
396 if ((code
& D3DSI_OPCODE_MASK
) == vshader_ins
[i
].opcode
) {
397 return &vshader_ins
[i
];
404 inline static BOOL
vshader_is_version_token(DWORD token
) {
405 return 0xFFFE0000 == (token
& 0xFFFE0000);
408 inline static BOOL
vshader_is_comment_token(DWORD token
) {
409 return D3DSIO_COMMENT
== (token
& D3DSI_OPCODE_MASK
);
412 inline static void vshader_program_dump_param(const DWORD param
, int input
) {
413 static const char* rastout_reg_names
[] = { "oPos", "oFog", "oPts" };
414 static const char swizzle_reg_chars
[] = "xyzw";
416 DWORD reg
= param
& 0x00001FFF;
417 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
);
419 if ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
) TRACE("-");
421 switch (regtype
<< D3DSP_REGTYPE_SHIFT
) {
423 TRACE("R[%lu]", reg
);
426 TRACE("V[%lu]", reg
);
429 TRACE("C[%s%lu]", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
431 case D3DSPR_ADDR
: /*case D3DSPR_TEXTURE:*/
432 TRACE("a[%lu]", reg
);
435 TRACE("%s", rastout_reg_names
[reg
]);
438 TRACE("oD[%lu]", reg
);
440 case D3DSPR_TEXCRDOUT
:
441 TRACE("oT[%lu]", reg
);
448 /** operand output */
449 if ((param
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
450 if (param
& D3DSP_WRITEMASK_0
) TRACE(".x");
451 if (param
& D3DSP_WRITEMASK_1
) TRACE(".y");
452 if (param
& D3DSP_WRITEMASK_2
) TRACE(".z");
453 if (param
& D3DSP_WRITEMASK_3
) TRACE(".w");
457 DWORD swizzle
= (param
& D3DVS_SWIZZLE_MASK
) >> D3DVS_SWIZZLE_SHIFT
;
458 DWORD swizzle_x
= swizzle
& 0x03;
459 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
460 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
461 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
463 * swizzle bits fields:
466 if ((D3DVS_NOSWIZZLE
>> D3DVS_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
467 if (swizzle_x
== swizzle_y
&&
468 swizzle_x
== swizzle_z
&&
469 swizzle_x
== swizzle_w
) {
470 TRACE(".%c", swizzle_reg_chars
[swizzle_x
]);
473 swizzle_reg_chars
[swizzle_x
],
474 swizzle_reg_chars
[swizzle_y
],
475 swizzle_reg_chars
[swizzle_z
],
476 swizzle_reg_chars
[swizzle_w
]);
482 inline static void vshader_program_add_param(const DWORD param
, int input
, char *hwLine
) {
483 /*static const char* rastout_reg_names[] = { "oPos", "oFog", "oPts" }; */
484 static const char* hwrastout_reg_names
[] = { "result.position", "result.fogcoord", "result.pointsize" };
485 static const char swizzle_reg_chars
[] = "xyzw";
487 DWORD reg
= param
& 0x00001FFF;
488 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
);
491 if ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
) {
492 strcat(hwLine
, " -");
497 switch (regtype
<< D3DSP_REGTYPE_SHIFT
) {
499 sprintf(tmpReg
, "T%lu", reg
);
500 strcat(hwLine
, tmpReg
);
503 sprintf(tmpReg
, "vertex.attrib[%lu]", reg
);
504 strcat(hwLine
, tmpReg
);
507 sprintf(tmpReg
, "C[%s%lu]", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "A0.x + " : "", reg
);
508 strcat(hwLine
, tmpReg
);
510 case D3DSPR_ADDR
: /*case D3DSPR_TEXTURE:*/
511 sprintf(tmpReg
, "A%lu", reg
);
512 strcat(hwLine
, tmpReg
);
515 sprintf(tmpReg
, "%s", hwrastout_reg_names
[reg
]);
516 strcat(hwLine
, tmpReg
);
520 strcat(hwLine
, "result.color.primary");
522 strcat(hwLine
, "result.color.secondary");
525 case D3DSPR_TEXCRDOUT
:
526 sprintf(tmpReg
, "result.texcoord[%lu]", reg
);
527 strcat(hwLine
, tmpReg
);
534 /** operand output */
535 if ((param
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
537 if (param
& D3DSP_WRITEMASK_0
) {
540 if (param
& D3DSP_WRITEMASK_1
) {
543 if (param
& D3DSP_WRITEMASK_2
) {
546 if (param
& D3DSP_WRITEMASK_3
) {
552 DWORD swizzle
= (param
& D3DVS_SWIZZLE_MASK
) >> D3DVS_SWIZZLE_SHIFT
;
553 DWORD swizzle_x
= swizzle
& 0x03;
554 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
555 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
556 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
558 * swizzle bits fields:
561 if ((D3DVS_NOSWIZZLE
>> D3DVS_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
562 if (swizzle_x
== swizzle_y
&&
563 swizzle_x
== swizzle_z
&&
564 swizzle_x
== swizzle_w
) {
565 sprintf(tmpReg
, ".%c", swizzle_reg_chars
[swizzle_x
]);
566 strcat(hwLine
, tmpReg
);
568 sprintf(tmpReg
, ".%c%c%c%c",
569 swizzle_reg_chars
[swizzle_x
],
570 swizzle_reg_chars
[swizzle_y
],
571 swizzle_reg_chars
[swizzle_z
],
572 swizzle_reg_chars
[swizzle_w
]);
573 strcat(hwLine
, tmpReg
);
579 DWORD MacroExpansion
[4*4];
581 int ExpandMxMacro(DWORD macro_opcode
, const DWORD
* args
) {
585 switch(macro_opcode
) {
609 for (i
= 0; i
< nComponents
; i
++) {
610 MacroExpansion
[i
*4+0] = opcode
;
611 MacroExpansion
[i
*4+1] = ((*args
) & ~D3DSP_WRITEMASK_ALL
)|(D3DSP_WRITEMASK_0
<<i
);
612 MacroExpansion
[i
*4+2] = *(args
+1);
613 MacroExpansion
[i
*4+3] = (*(args
+2))+i
;
619 * Function parser ...
621 inline static VOID
IDirect3DVertexShaderImpl_GenerateProgramArbHW(IDirect3DVertexShaderImpl
* vshader
, CONST DWORD
* pFunction
) {
622 const DWORD
* pToken
= pFunction
;
623 const DWORD
* pSavedToken
= NULL
;
624 const SHADER_OPCODE
* curOpcode
= NULL
;
627 unsigned lineNum
= 0;
630 DWORD nUseAddressRegister
= 0;
631 DWORD nUseTempRegister
= 0;
634 IDirect3DDevice8Impl
* This
= vshader
->device
;
636 pgmStr
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, 65535); /* 64kb should be enough */
639 * First pass to determine what we need to declare:
640 * - Temporary variables
641 * - Address variables
643 if (NULL
!= pToken
) {
644 while (D3DVS_END() != *pToken
) {
645 if (vshader_is_version_token(*pToken
)) {
650 if (vshader_is_comment_token(*pToken
)) { /** comment */
651 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
653 pToken
+= comment_len
;
656 curOpcode
= vshader_program_get_opcode(*pToken
);
658 if (NULL
== curOpcode
) {
659 while (*pToken
& 0x80000000) {
660 /* skip unrecognized opcode */
664 if (curOpcode
->num_params
> 0) {
665 regtype
= ((((*pToken
) & D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) << D3DSP_REGTYPE_SHIFT
);
666 reg
= ((*pToken
) & 0x00001FFF);
667 /** we should validate GL_MAX_PROGRAM_ADDRESS_REGISTERS_AR limits here */
668 if (D3DSPR_ADDR
== regtype
&& nUseAddressRegister
<= reg
) nUseAddressRegister
= reg
+ 1;
669 /** we should validate GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB limits here */
670 if (D3DSPR_TEMP
== regtype
&& nUseTempRegister
<= reg
) nUseTempRegister
= reg
+ 1;
672 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
673 regtype
= ((((*pToken
) & D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) << D3DSP_REGTYPE_SHIFT
);
674 reg
= ((*pToken
) & 0x00001FFF);
675 /** we should validate GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB limits here */
676 if (D3DSPR_TEMP
== regtype
&& nUseTempRegister
<= reg
) nUseTempRegister
= reg
+ 1;
684 /** second pass, now generate */
687 if (NULL
!= pToken
) {
691 if ((nRemInstr
>= 0) && (--nRemInstr
== -1))
692 /* Macro is finished, continue normal path */
693 pToken
= pSavedToken
;
695 if (D3DVS_END() == *pToken
)
698 if (vshader_is_version_token(*pToken
)) { /** version */
700 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
701 int version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
705 TRACE_(d3d_hw_shader
)("vs.%lu.%lu;\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
707 /* Each release of vertex shaders has had different numbers of temp registers */
710 case 11: numTemps
=12;
712 strcpy(tmpLine
, "!!ARBvp1.0\n");
713 TRACE_(d3d_hw_shader
)("GL HW (%u) : %s", strlen(pgmStr
), tmpLine
); /* Don't add \n to this line as already in tmpLine */
715 case 20: numTemps
=12;
717 strcpy(tmpLine
, "!!ARBvp2.0\n");
718 FIXME_(d3d_hw_shader
)("No work done yet to support vs2.0 in hw\n");
719 TRACE_(d3d_hw_shader
)("GL HW (%u) : %s", strlen(pgmStr
), tmpLine
); /* Don't add \n to this line as already in tmpLine */
721 case 30: numTemps
=32;
723 strcpy(tmpLine
, "!!ARBvp3.0\n");
724 FIXME_(d3d_hw_shader
)("No work done yet to support vs3.0 in hw\n");
725 TRACE_(d3d_hw_shader
)("GL HW (%u) : %s", strlen(pgmStr
), tmpLine
); /* Don't add \n to this line as already in tmpLine */
730 strcpy(tmpLine
, "!!ARBvp1.0\n");
731 FIXME_(d3d_hw_shader
)("Unrecognized vertex shader version!\n");
733 strcat(pgmStr
,tmpLine
);
736 for (i
= 0; i
< nUseTempRegister
/*we should check numTemps here*/; i
++) {
737 sprintf(tmpLine
, "TEMP T%ld;\n", i
);
739 TRACE_(d3d_hw_shader
)("GL HW (%u, %u) : %s", lineNum
, strlen(pgmStr
), tmpLine
); /* Don't add \n to this line as already in tmpLine */
740 strcat(pgmStr
,tmpLine
);
742 for (i
= 0; i
< nUseAddressRegister
; i
++) {
743 sprintf(tmpLine
, "ADDRESS A%ld;\n", i
);
745 TRACE_(d3d_hw_shader
)("GL HW (%u, %u) : %s", lineNum
, strlen(pgmStr
), tmpLine
); /* Don't add \n to this line as already in tmpLine */
746 strcat(pgmStr
,tmpLine
);
748 /* Due to the dynamic constants binding mechanism, we need to declare
749 * all the constants for relative addressing. */
750 /* Mesa supports nly 95 constants for VS1.X although we should have at least 96. */
751 if (GL_VENDOR_NAME(This
) == VENDOR_MESA
|| GL_VENDOR_NAME(This
) == VENDOR_WINE
) {
754 sprintf(tmpLine
, "PARAM C[%d] = { program.env[0..%d] };\n", numConstants
, numConstants
-1);
755 TRACE_(d3d_hw_shader
)("GL HW (%u,%u) : %s", lineNum
, strlen(pgmStr
), tmpLine
); /* Don't add \n to this line as already in tmpLine */
756 strcat(pgmStr
, tmpLine
);
762 if (vshader_is_comment_token(*pToken
)) { /** comment */
763 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
765 /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
766 pToken
+= comment_len
;
769 curOpcode
= vshader_program_get_opcode(*pToken
);
771 if (NULL
== curOpcode
) {
772 /* unknown current opcode ... */
773 while (*pToken
& 0x80000000) {
774 TRACE_(d3d_hw_shader
)("unrecognized opcode: %08lx\n", *pToken
);
778 /* Build opcode for GL vertex_program */
779 switch (curOpcode
->opcode
) {
783 /* Address registers must be loaded with the ARL instruction */
784 if (((*pToken
) & D3DSP_REGTYPE_MASK
) == D3DSPR_ADDR
) {
785 if (0 < nUseAddressRegister
) {
786 strcpy(tmpLine
, "ARL");
789 FIXME_(d3d_hw_shader
)("Try to load an undeclared address register!\n");
807 strcpy(tmpLine
, curOpcode
->name
);
811 strcpy(tmpLine
, "EXP");
814 strcpy(tmpLine
, "LOG");
817 strcpy(tmpLine
, "EX2");
820 strcpy(tmpLine
, "LG2");
828 /* Expand the macro and get number of generated instruction */
829 nRemInstr
= ExpandMxMacro(curOpcode
->opcode
, pToken
);
830 /* Save point to next instruction */
831 pSavedToken
= pToken
+ 3;
832 /* Execute expanded macro */
833 pToken
= MacroExpansion
;
837 FIXME_(d3d_hw_shader
)("Can't handle opcode %s in hwShader\n", curOpcode
->name
);
840 if (curOpcode
->num_params
> 0) {
841 vshader_program_add_param(*pToken
, 0, tmpLine
);
844 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
845 strcat(tmpLine
, ",");
846 vshader_program_add_param(*pToken
, 1, tmpLine
);
850 strcat(tmpLine
,";\n");
852 TRACE_(d3d_hw_shader
)("GL HW (%u, %u) : %s", lineNum
, strlen(pgmStr
), tmpLine
); /* Don't add \n to this line as already in tmpLine */
853 strcat(pgmStr
, tmpLine
);
856 strcpy(tmpLine
, "END\n");
858 TRACE_(d3d_hw_shader
)("GL HW (%u, %u) : %s", lineNum
, strlen(pgmStr
), tmpLine
); /* Don't add \n to this line as already in tmpLine */
859 strcat(pgmStr
, tmpLine
);
862 /* Create the hw shader */
863 GL_EXTCALL(glGenProgramsARB(1, &vshader
->prgId
));
864 TRACE_(d3d_hw_shader
)("Creating a hw vertex shader, prg=%d\n", vshader
->prgId
);
866 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB
, vshader
->prgId
));
868 /* Create the program and check for errors */
869 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB
, GL_PROGRAM_FORMAT_ASCII_ARB
, strlen(pgmStr
), pgmStr
));
870 if (glGetError() == GL_INVALID_OPERATION
) {
872 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB
, &errPos
);
873 FIXME_(d3d_hw_shader
)("HW VertexShader Error at position: %d\n%s\n", errPos
, glGetString(GL_PROGRAM_ERROR_STRING_ARB
));
877 HeapFree(GetProcessHeap(), 0, pgmStr
);
880 inline static VOID
IDirect3DVertexShaderImpl_ParseProgram(IDirect3DVertexShaderImpl
* vshader
, CONST DWORD
* pFunction
, int useHW
) {
881 const DWORD
* pToken
= pFunction
;
882 const SHADER_OPCODE
* curOpcode
= NULL
;
886 if (NULL
!= pToken
) {
887 while (D3DVS_END() != *pToken
) {
888 if (vshader_is_version_token(*pToken
)) { /** version */
889 TRACE("vs.%lu.%lu\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
894 if (vshader_is_comment_token(*pToken
)) { /** comment */
895 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
897 /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
898 pToken
+= comment_len
;
899 len
+= comment_len
+ 1;
902 curOpcode
= vshader_program_get_opcode(*pToken
);
905 if (NULL
== curOpcode
) {
906 /* unknown current opcode ... */
907 while (*pToken
& 0x80000000) {
908 TRACE("unrecognized opcode: %08lx\n", *pToken
);
913 TRACE("%s ", curOpcode
->name
);
914 if (curOpcode
->num_params
> 0) {
915 vshader_program_dump_param(*pToken
, 0);
918 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
920 vshader_program_dump_param(*pToken
, 1);
928 vshader
->functionLength
= (len
+ 1) * sizeof(DWORD
);
930 vshader
->functionLength
= 1; /* no Function defined use fixed function vertex processing */
933 /* Generate HW shader in needed */
934 if (useHW
&& NULL
!= pFunction
) {
935 IDirect3DVertexShaderImpl_GenerateProgramArbHW(vshader
, pFunction
);
938 /* copy the function ... because it will certainly be released by application */
939 if (NULL
!= pFunction
) {
940 vshader
->function
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, vshader
->functionLength
);
941 memcpy(vshader
->function
, pFunction
, vshader
->functionLength
);
943 vshader
->function
= NULL
;
947 HRESULT WINAPI
IDirect3DDeviceImpl_CreateVertexShader(IDirect3DDevice8Impl
* This
, CONST DWORD
* pFunction
, DWORD Usage
, IDirect3DVertexShaderImpl
** ppVertexShader
) {
948 IDirect3DVertexShaderImpl
* object
;
951 object
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, sizeof(IDirect3DVertexShaderImpl
));
952 if (NULL
== object
) {
953 *ppVertexShader
= NULL
;
954 return D3DERR_OUTOFVIDEOMEMORY
;
956 /*object->lpVtbl = &Direct3DVextexShader9_Vtbl;*/
957 object
->device
= This
; /* FIXME: AddRef(This) */
960 object
->usage
= Usage
;
961 object
->data
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, sizeof(VSHADERDATA8
));
963 useHW
= (((vs_mode
== VS_HW
) && GL_SUPPORT(ARB_VERTEX_PROGRAM
)) &&
964 This
->devType
!= D3DDEVTYPE_REF
&&
965 object
->usage
!= D3DUSAGE_SOFTWAREPROCESSING
);
967 IDirect3DVertexShaderImpl_ParseProgram(object
, pFunction
, useHW
);
969 *ppVertexShader
= object
;
973 BOOL
IDirect3DVertexShaderImpl_ExecuteHAL(IDirect3DVertexShaderImpl
* vshader
, VSHADERINPUTDATA8
* input
, VSHADEROUTPUTDATA8
* output
) {
975 * TODO: use the NV_vertex_program (or 1_1) extension
976 * and specifics vendors (ARB_vertex_program??) variants for it
981 HRESULT WINAPI
IDirect3DVertexShaderImpl_ExecuteSW(IDirect3DVertexShaderImpl
* vshader
, VSHADERINPUTDATA8
* input
, VSHADEROUTPUTDATA8
* output
) {
982 /** Vertex Shader Temporary Registers */
983 D3DSHADERVECTOR R
[12];
984 /*D3DSHADERSCALAR A0;*/
985 D3DSHADERVECTOR A
[1];
986 /** temporary Vector for modifier management */
988 D3DSHADERVECTOR s
[3];
990 const DWORD
* pToken
= vshader
->function
;
991 const SHADER_OPCODE
* curOpcode
= NULL
;
992 /** functions parameters */
993 D3DSHADERVECTOR
* p
[5];
994 D3DSHADERVECTOR
* p_send
[5];
997 /** init temporary register */
998 memset(R
, 0, 12 * sizeof(D3DSHADERVECTOR
));
1000 /* vshader_program_parse(vshader); */
1001 #if 0 /* Must not be 1 in cvs */
1003 TRACE_VSVECTOR(vshader
->data
->C
[0]);
1004 TRACE_VSVECTOR(vshader
->data
->C
[1]);
1005 TRACE_VSVECTOR(vshader
->data
->C
[2]);
1006 TRACE_VSVECTOR(vshader
->data
->C
[3]);
1007 TRACE_VSVECTOR(vshader
->data
->C
[4]);
1008 TRACE_VSVECTOR(vshader
->data
->C
[5]);
1009 TRACE_VSVECTOR(vshader
->data
->C
[6]);
1010 TRACE_VSVECTOR(vshader
->data
->C
[7]);
1011 TRACE_VSVECTOR(vshader
->data
->C
[8]);
1012 TRACE_VSVECTOR(vshader
->data
->C
[64]);
1013 TRACE_VSVECTOR(input
->V
[D3DVSDE_POSITION
]);
1014 TRACE_VSVECTOR(input
->V
[D3DVSDE_BLENDWEIGHT
]);
1015 TRACE_VSVECTOR(input
->V
[D3DVSDE_BLENDINDICES
]);
1016 TRACE_VSVECTOR(input
->V
[D3DVSDE_NORMAL
]);
1017 TRACE_VSVECTOR(input
->V
[D3DVSDE_PSIZE
]);
1018 TRACE_VSVECTOR(input
->V
[D3DVSDE_DIFFUSE
]);
1019 TRACE_VSVECTOR(input
->V
[D3DVSDE_SPECULAR
]);
1020 TRACE_VSVECTOR(input
->V
[D3DVSDE_TEXCOORD0
]);
1021 TRACE_VSVECTOR(input
->V
[D3DVSDE_TEXCOORD1
]);
1024 TRACE_VSVECTOR(vshader
->data
->C
[64]);
1026 /* the first dword is the version tag */
1027 /* TODO: parse it */
1029 if (vshader_is_version_token(*pToken
)) { /** version */
1032 while (D3DVS_END() != *pToken
) {
1033 if (vshader_is_comment_token(*pToken
)) { /** comment */
1034 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1036 pToken
+= comment_len
;
1039 curOpcode
= vshader_program_get_opcode(*pToken
);
1041 if (NULL
== curOpcode
) {
1043 /* unknown current opcode ... */
1044 while (*pToken
& 0x80000000) {
1046 TRACE("unrecognized opcode: pos=%d token=%08lX\n", (pToken
- 1) - vshader
->function
, *(pToken
- 1));
1048 TRACE("unrecognized opcode param: pos=%d token=%08lX what=", pToken
- vshader
->function
, *pToken
);
1049 vshader_program_add_param(*pToken
, i
, NULL
); /* Add function just used for trace error scenario */
1056 if (curOpcode
->num_params
> 0) {
1057 /*TRACE(">> execting opcode: pos=%d opcode_name=%s token=%08lX\n", pToken - vshader->function, curOpcode->name, *pToken);*/
1058 for (i
= 0; i
< curOpcode
->num_params
; ++i
) {
1059 DWORD reg
= pToken
[i
] & 0x00001FFF;
1060 DWORD regtype
= ((pToken
[i
] & D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
);
1062 switch (regtype
<< D3DSP_REGTYPE_SHIFT
) {
1064 /*TRACE("p[%d]=R[%d]\n", i, reg);*/
1068 /*TRACE("p[%d]=V[%s]\n", i, VertexShaderDeclRegister[reg]);*/
1069 p
[i
] = &input
->V
[reg
];
1072 if (pToken
[i
] & D3DVS_ADDRMODE_RELATIVE
) {
1073 p
[i
] = &vshader
->data
->C
[(DWORD
) A
[0].x
+ reg
];
1075 p
[i
] = &vshader
->data
->C
[reg
];
1078 case D3DSPR_ADDR
: /*case D3DSPR_TEXTURE:*/
1080 ERR("cannot handle address registers != a0, forcing use of a0\n");
1083 /*TRACE("p[%d]=A[%d]\n", i, reg);*/
1086 case D3DSPR_RASTOUT
:
1088 case D3DSRO_POSITION
:
1089 p
[i
] = &output
->oPos
;
1092 p
[i
] = &output
->oFog
;
1094 case D3DSRO_POINT_SIZE
:
1095 p
[i
] = &output
->oPts
;
1099 case D3DSPR_ATTROUT
:
1100 /*TRACE("p[%d]=oD[%d]\n", i, reg);*/
1101 p
[i
] = &output
->oD
[reg
];
1103 case D3DSPR_TEXCRDOUT
:
1104 /*TRACE("p[%d]=oT[%d]\n", i, reg);*/
1105 p
[i
] = &output
->oT
[reg
];
1111 if (i
> 0) { /* input reg */
1112 DWORD swizzle
= (pToken
[i
] & D3DVS_SWIZZLE_MASK
) >> D3DVS_SWIZZLE_SHIFT
;
1113 UINT isNegative
= ((pToken
[i
] & D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
);
1115 if (!isNegative
&& (D3DVS_NOSWIZZLE
>> D3DVS_SWIZZLE_SHIFT
) == swizzle
) {
1116 /*TRACE("p[%d] not swizzled\n", i);*/
1119 DWORD swizzle_x
= swizzle
& 0x03;
1120 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
1121 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
1122 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
1123 /*TRACE("p[%d] swizzled\n", i);*/
1124 float* tt
= (float*) p
[i
];
1125 s
[i
].x
= (isNegative
) ? -tt
[swizzle_x
] : tt
[swizzle_x
];
1126 s
[i
].y
= (isNegative
) ? -tt
[swizzle_y
] : tt
[swizzle_y
];
1127 s
[i
].z
= (isNegative
) ? -tt
[swizzle_z
] : tt
[swizzle_z
];
1128 s
[i
].w
= (isNegative
) ? -tt
[swizzle_w
] : tt
[swizzle_w
];
1131 } else { /* output reg */
1132 if ((pToken
[i
] & D3DSP_WRITEMASK_ALL
) == D3DSP_WRITEMASK_ALL
) {
1135 p_send
[i
] = &d
; /* to be post-processed for modifiers management */
1141 switch (curOpcode
->num_params
) {
1143 curOpcode
->soft_fct();
1146 curOpcode
->soft_fct(p_send
[0]);
1149 curOpcode
->soft_fct(p_send
[0], p_send
[1]);
1152 curOpcode
->soft_fct(p_send
[0], p_send
[1], p_send
[2]);
1155 curOpcode
->soft_fct(p_send
[0], p_send
[1], p_send
[2], p_send
[3]);
1158 curOpcode
->soft_fct(p_send
[0], p_send
[1], p_send
[2], p_send
[3], p_send
[4]);
1161 ERR("%s too many params: %u\n", curOpcode
->name
, curOpcode
->num_params
);
1164 /* check if output reg modifier post-process */
1165 if (curOpcode
->num_params
> 0 && (pToken
[0] & D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
1166 if (pToken
[0] & D3DSP_WRITEMASK_0
) p
[0]->x
= d
.x
;
1167 if (pToken
[0] & D3DSP_WRITEMASK_1
) p
[0]->y
= d
.y
;
1168 if (pToken
[0] & D3DSP_WRITEMASK_2
) p
[0]->z
= d
.z
;
1169 if (pToken
[0] & D3DSP_WRITEMASK_3
) p
[0]->w
= d
.w
;
1173 TRACE_VSVECTOR(output
->oPos
);
1174 TRACE_VSVECTOR(output
->oD
[0]);
1175 TRACE_VSVECTOR(output
->oD
[1]);
1176 TRACE_VSVECTOR(output
->oT
[0]);
1177 TRACE_VSVECTOR(output
->oT
[1]);
1178 TRACE_VSVECTOR(R
[0]);
1179 TRACE_VSVECTOR(R
[1]);
1180 TRACE_VSVECTOR(R
[2]);
1181 TRACE_VSVECTOR(R
[3]);
1182 TRACE_VSVECTOR(R
[4]);
1183 TRACE_VSVECTOR(R
[5]);
1186 /* to next opcode token */
1187 pToken
+= curOpcode
->num_params
;
1190 TRACE("End of current instruction:\n");
1191 TRACE_VSVECTOR(output
->oPos
);
1192 TRACE_VSVECTOR(output
->oD
[0]);
1193 TRACE_VSVECTOR(output
->oD
[1]);
1194 TRACE_VSVECTOR(output
->oT
[0]);
1195 TRACE_VSVECTOR(output
->oT
[1]);
1196 TRACE_VSVECTOR(R
[0]);
1197 TRACE_VSVECTOR(R
[1]);
1198 TRACE_VSVECTOR(R
[2]);
1199 TRACE_VSVECTOR(R
[3]);
1200 TRACE_VSVECTOR(R
[4]);
1201 TRACE_VSVECTOR(R
[5]);
1204 #if 0 /* Must not be 1 in cvs */
1206 TRACE_VSVECTOR(output
->oPos
);
1207 TRACE_VSVECTOR(output
->oD
[0]);
1208 TRACE_VSVECTOR(output
->oD
[1]);
1209 TRACE_VSVECTOR(output
->oT
[0]);
1210 TRACE_VSVECTOR(output
->oT
[1]);
1215 HRESULT WINAPI
IDirect3DVertexShaderImpl_GetFunction(IDirect3DVertexShaderImpl
* This
, VOID
* pData
, UINT
* pSizeOfData
) {
1216 if (NULL
== pData
) {
1217 *pSizeOfData
= This
->functionLength
;
1220 if (*pSizeOfData
< This
->functionLength
) {
1221 *pSizeOfData
= This
->functionLength
;
1222 return D3DERR_MOREDATA
;
1224 if (NULL
== This
->function
) { /* no function defined */
1225 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This
, pData
);
1226 (*(DWORD
**) pData
) = NULL
;
1228 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
1229 memcpy(pData
, This
->function
, This
->functionLength
);
1234 HRESULT WINAPI
IDirect3DVertexShaderImpl_SetConstantF(IDirect3DVertexShaderImpl
* This
, UINT StartRegister
, CONST FLOAT
* pConstantData
, UINT Vector4fCount
) {
1235 if (StartRegister
+ Vector4fCount
> D3D8_VSHADER_MAX_CONSTANTS
) {
1236 return D3DERR_INVALIDCALL
;
1238 if (NULL
== This
->data
) { /* temporary while datas not supported */
1239 FIXME("(%p) : VertexShader_SetConstant not fully supported yet\n", This
);
1240 return D3DERR_INVALIDCALL
;
1242 memcpy(&This
->data
->C
[StartRegister
], pConstantData
, Vector4fCount
* 4 * sizeof(FLOAT
));
1246 HRESULT WINAPI
IDirect3DVertexShaderImpl_GetConstantF(IDirect3DVertexShaderImpl
* This
, UINT StartRegister
, FLOAT
* pConstantData
, UINT Vector4fCount
) {
1247 if (StartRegister
+ Vector4fCount
> D3D8_VSHADER_MAX_CONSTANTS
) {
1248 return D3DERR_INVALIDCALL
;
1250 if (NULL
== This
->data
) { /* temporary while datas not supported */
1251 return D3DERR_INVALIDCALL
;
1253 memcpy(pConstantData
, &This
->data
->C
[StartRegister
], Vector4fCount
* 4 * sizeof(FLOAT
));
1258 /**********************************************************************************************************************************************
1259 **********************************************************************************************************************************************
1260 **********************************************************************************************************************************************
1261 **********************************************************************************************************************************************
1262 **********************************************************************************************************************************************/
1264 void pshader_texcoord(D3DSHADERVECTOR
* d
) {
1267 void pshader_texkill(D3DSHADERVECTOR
* d
) {
1270 void pshader_tex(D3DSHADERVECTOR
* d
) {
1273 void pshader_texbem(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1276 void pshader_texbeml(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1279 void pshader_texreg2ar(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1282 void pshader_texreg2gb(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1285 void pshader_texm3x2pad(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1288 void pshader_texm3x2tex(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1291 void pshader_texm3x3pad(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1294 void pshader_texm3x3tex(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1297 void pshader_texm3x3diff(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1300 void pshader_texm3x3spec(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
1303 void pshader_texm3x3vspec(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1306 void pshader_cnd(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
, D3DSHADERVECTOR
* s2
) {
1309 void pshader_def(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
, D3DSHADERVECTOR
* s2
, D3DSHADERVECTOR
* s3
) {
1312 void pshader_texreg2rgb(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1315 void pshader_texdp3tex(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1318 void pshader_texm3x2depth(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1321 void pshader_texdp3(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1324 void pshader_texm3x3(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
) {
1327 void pshader_texdepth(D3DSHADERVECTOR
* d
) {
1330 void pshader_cmp(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
, D3DSHADERVECTOR
* s2
) {
1333 void pshader_bem(D3DSHADERVECTOR
* d
, D3DSHADERVECTOR
* s0
, D3DSHADERVECTOR
* s1
) {
1336 static CONST SHADER_OPCODE pshader_ins
[] = {
1337 {D3DSIO_NOP
, "nop", 0, vshader_nop
, 0, 0},
1338 {D3DSIO_MOV
, "mov", 2, vshader_mov
, 0, 0},
1339 {D3DSIO_ADD
, "add", 3, vshader_add
, 0, 0},
1340 {D3DSIO_SUB
, "sub", 3, vshader_sub
, 0, 0},
1341 {D3DSIO_MAD
, "mad", 4, vshader_mad
, 0, 0},
1342 {D3DSIO_MUL
, "mul", 3, vshader_mul
, 0, 0},
1343 {D3DSIO_RCP
, "rcp", 2, vshader_rcp
, 0, 0},
1344 {D3DSIO_RSQ
, "rsq", 2, vshader_rsq
, 0, 0},
1345 {D3DSIO_DP3
, "dp3", 3, vshader_dp3
, 0, 0},
1346 {D3DSIO_DP4
, "dp4", 3, vshader_dp4
, 0, 0},
1347 {D3DSIO_MIN
, "min", 3, vshader_min
, 0, 0},
1348 {D3DSIO_MAX
, "max", 3, vshader_max
, 0, 0},
1349 {D3DSIO_SLT
, "slt", 3, vshader_slt
, 0, 0},
1350 {D3DSIO_SGE
, "sge", 3, vshader_sge
, 0, 0},
1351 {D3DSIO_EXP
, "exp", 2, vshader_exp
, 0, 0},
1352 {D3DSIO_LOG
, "log", 2, vshader_log
, 0, 0},
1353 {D3DSIO_LIT
, "lit", 2, vshader_lit
, 0, 0},
1354 {D3DSIO_DST
, "dst", 3, vshader_dst
, 0, 0},
1355 {D3DSIO_LRP
, "lrp", 4, vshader_lrp
, 0, 0},
1356 {D3DSIO_FRC
, "frc", 2, vshader_frc
, 0, 0},
1357 {D3DSIO_M4x4
, "m4x4", 3, vshader_m4x4
, 0, 0},
1358 {D3DSIO_M4x3
, "m4x3", 3, vshader_m4x3
, 0, 0},
1359 {D3DSIO_M3x4
, "m3x4", 3, vshader_m3x4
, 0, 0},
1360 {D3DSIO_M3x3
, "m3x3", 3, vshader_m3x3
, 0, 0},
1361 {D3DSIO_M3x2
, "m3x2", 3, vshader_m3x2
, 0, 0},
1363 {D3DSIO_TEXCOORD
, "texcoord", 1, pshader_texcoord
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1364 {D3DSIO_TEXCOORD
, "texcrd", 2, pshader_texcoord
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
1365 {D3DSIO_TEXKILL
, "texkill", 1, pshader_texkill
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
1366 {D3DSIO_TEX
, "tex", 1, pshader_tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1367 {D3DSIO_TEX
, "texld", 2, pshader_tex
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
1368 {D3DSIO_TEXBEM
, "texbem", 2, pshader_texbem
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1369 {D3DSIO_TEXBEML
, "texbeml", 2, pshader_texbeml
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1370 {D3DSIO_TEXREG2AR
, "texreg2ar", 2, pshader_texreg2ar
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
1371 {D3DSIO_TEXREG2GB
, "texreg2gb", 2, pshader_texreg2gb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
1372 {D3DSIO_TEXM3x2PAD
, "texm3x2pad", 2, pshader_texm3x2pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1373 {D3DSIO_TEXM3x2TEX
, "texm3x2tex", 2, pshader_texm3x2tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1374 {D3DSIO_TEXM3x3PAD
, "texm3x3pad", 2, pshader_texm3x3pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1375 {D3DSIO_TEXM3x3TEX
, "texm3x3tex", 2, pshader_texm3x3tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1376 {D3DSIO_TEXM3x3DIFF
, "texm3x3diff", 2, pshader_texm3x3diff
, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
1377 {D3DSIO_TEXM3x3SPEC
, "texm3x3spec", 3, pshader_texm3x3spec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1378 {D3DSIO_TEXM3x3VSPEC
, "texm3x3vspec", 2, pshader_texm3x3vspec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
1380 {D3DSIO_EXPP
, "expp", 2, vshader_expp
, 0, 0},
1381 {D3DSIO_LOGP
, "logp", 2, vshader_logp
, 0, 0},
1383 {D3DSIO_CND
, "cnd", 4, pshader_cnd
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
1384 {D3DSIO_DEF
, "def", 5, pshader_def
, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
1385 {D3DSIO_TEXREG2RGB
, "texbreg2rgb", 2, pshader_texreg2rgb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
1387 {D3DSIO_TEXDP3TEX
, "texdp3tex", 2, pshader_texdp3tex
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
1388 {D3DSIO_TEXM3x2DEPTH
, "texm3x2depth", 2, pshader_texm3x2depth
, D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
1389 {D3DSIO_TEXDP3
, "texdp3", 2, pshader_texdp3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
1390 {D3DSIO_TEXM3x3
, "texm3x3", 2, pshader_texm3x3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
1391 {D3DSIO_TEXDEPTH
, "texdepth", 1, pshader_texdepth
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
1392 {D3DSIO_CMP
, "cmp", 4, pshader_cmp
, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
1393 {D3DSIO_BEM
, "bem", 3, pshader_bem
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
1395 {D3DSIO_PHASE
, "phase", 0, vshader_nop
, 0, 0},
1400 inline static const SHADER_OPCODE
* pshader_program_get_opcode(const DWORD code
, const int version
) {
1402 DWORD hex_version
= D3DPS_VERSION(version
/10, version
%10);
1403 /** TODO: use dichotomic search */
1404 while (NULL
!= pshader_ins
[i
].name
) {
1405 if ( ( (code
& D3DSI_OPCODE_MASK
) == pshader_ins
[i
].opcode
) &&
1406 ( ( (hex_version
>= pshader_ins
[i
].min_version
) && (hex_version
<= pshader_ins
[i
].max_version
)) ||
1407 ( (pshader_ins
[i
].min_version
== 0) && (pshader_ins
[i
].max_version
== 0) ) ) ) {
1408 return &pshader_ins
[i
];
1415 inline static BOOL
pshader_is_version_token(DWORD token
) {
1416 return 0xFFFF0000 == (token
& 0xFFFF0000);
1419 inline static BOOL
pshader_is_comment_token(DWORD token
) {
1420 return D3DSIO_COMMENT
== (token
& D3DSI_OPCODE_MASK
);
1423 inline static void pshader_program_dump_opcode(const SHADER_OPCODE
* curOpcode
, const DWORD code
, const DWORD output
) {
1424 if (0 != (code
& ~D3DSI_OPCODE_MASK
)) {
1425 DWORD mask
= (code
& ~D3DSI_OPCODE_MASK
);
1427 case 0x40000000: TRACE("+"); break;
1429 TRACE(" unhandled modifier(0x%08lx) ", mask
);
1432 TRACE("%s", curOpcode
->name
);
1434 * normally this is a destination reg modifier
1435 * but in pixel shaders asm code its specified as:
1436 * dp3_x4 t1.rgba, r1, c1
1438 * dp3_x2_sat r0, t0_bx2, v0_bx2
1439 * so for better debbuging i use the same norm
1441 if (0 != (output
& D3DSP_DSTSHIFT_MASK
)) {
1442 DWORD shift
= (output
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
1444 TRACE("_x%u", 1 << shift
);
1446 TRACE("_d%u", 1 << (16-shift
));
1449 if (0 != (output
& D3DSP_DSTMOD_MASK
)) {
1450 DWORD mask
= output
& D3DSP_DSTMOD_MASK
;
1452 case D3DSPDM_SATURATE
: TRACE("_sat"); break;
1454 TRACE("_unhandled_modifier(0x%08lx)", mask
);
1460 inline static void pshader_program_dump_param(const DWORD param
, int input
) {
1461 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1462 static const char swizzle_reg_chars
[] = "rgba";
1464 DWORD reg
= param
& 0x00001FFF;
1465 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
);
1468 if ( ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
) ||
1469 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_BIASNEG
) ||
1470 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_SIGNNEG
) ||
1471 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_X2NEG
) )
1473 else if ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_COMP
)
1477 switch (regtype
<< D3DSP_REGTYPE_SHIFT
) {
1479 TRACE("R[%lu]", reg
);
1482 TRACE("V[%lu]", reg
);
1485 TRACE("C[%s%lu]", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1487 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
1488 TRACE("t[%lu]", reg
);
1490 case D3DSPR_RASTOUT
:
1491 TRACE("%s", rastout_reg_names
[reg
]);
1493 case D3DSPR_ATTROUT
:
1494 TRACE("oD[%lu]", reg
);
1496 case D3DSPR_TEXCRDOUT
:
1497 TRACE("oT[%lu]", reg
);
1504 /** operand output */
1506 * for better debugging traces it's done into opcode dump code
1507 * @see pshader_program_dump_opcode
1508 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1509 DWORD mask = param & D3DSP_DSTMOD_MASK;
1511 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1513 TRACE("_unhandled_modifier(0x%08lx)", mask);
1516 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1517 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1519 TRACE("_x%u", 1 << shift);
1523 if ((param
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
1525 if (param
& D3DSP_WRITEMASK_0
) TRACE("r");
1526 if (param
& D3DSP_WRITEMASK_1
) TRACE("g");
1527 if (param
& D3DSP_WRITEMASK_2
) TRACE("b");
1528 if (param
& D3DSP_WRITEMASK_3
) TRACE("a");
1531 /** operand input */
1532 DWORD swizzle
= (param
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
1533 DWORD swizzle_x
= swizzle
& 0x03;
1534 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
1535 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
1536 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
1538 if (0 != (param
& D3DSP_SRCMOD_MASK
)) {
1539 DWORD mask
= param
& D3DSP_SRCMOD_MASK
;
1540 /*TRACE("_modifier(0x%08lx) ", mask);*/
1542 case D3DSPSM_NONE
: break;
1543 case D3DSPSM_NEG
: break;
1544 case D3DSPSM_BIAS
: TRACE("_bias"); break;
1545 case D3DSPSM_BIASNEG
: TRACE("_bias"); break;
1546 case D3DSPSM_SIGN
: TRACE("_bx2"); break;
1547 case D3DSPSM_SIGNNEG
: TRACE("_bx2"); break;
1548 case D3DSPSM_COMP
: break;
1549 case D3DSPSM_X2
: TRACE("_x2"); break;
1550 case D3DSPSM_X2NEG
: TRACE("_x2"); break;
1551 case D3DSPSM_DZ
: TRACE("_dz"); break;
1552 case D3DSPSM_DW
: TRACE("_dw"); break;
1554 TRACE("_unknown(0x%08lx)", mask
);
1559 * swizzle bits fields:
1562 if ((D3DSP_NOSWIZZLE
>> D3DSP_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1563 if (swizzle_x
== swizzle_y
&&
1564 swizzle_x
== swizzle_z
&&
1565 swizzle_x
== swizzle_w
) {
1566 TRACE(".%c", swizzle_reg_chars
[swizzle_x
]);
1569 swizzle_reg_chars
[swizzle_x
],
1570 swizzle_reg_chars
[swizzle_y
],
1571 swizzle_reg_chars
[swizzle_z
],
1572 swizzle_reg_chars
[swizzle_w
]);
1578 static int constants
[D3D8_PSHADER_MAX_CONSTANTS
];
1580 inline static void get_register_name(const DWORD param
, char* regstr
)
1582 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1584 DWORD reg
= param
& 0x00001FFF;
1585 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
);
1587 switch (regtype
<< D3DSP_REGTYPE_SHIFT
) {
1589 sprintf(regstr
, "R%lu", reg
);
1593 strcpy(regstr
, "fragment.color.primary");
1595 strcpy(regstr
, "fragment.color.secondary");
1600 sprintf(regstr
, "C%lu", reg
);
1602 sprintf(regstr
, "program.env[%lu]", reg
);
1604 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
1605 sprintf(regstr
,"T%lu", reg
);
1607 case D3DSPR_RASTOUT
:
1608 sprintf(regstr
, "%s", rastout_reg_names
[reg
]);
1610 case D3DSPR_ATTROUT
:
1611 sprintf(regstr
, "oD[%lu]", reg
);
1613 case D3DSPR_TEXCRDOUT
:
1614 sprintf(regstr
, "oT[%lu]", reg
);
1621 inline static void addline(unsigned int* lineNum
, char* pgm
, char* line
)
1624 TRACE_(d3d_hw_shader
)("GL HW (%u, %u) : %s\n", *lineNum
, strlen(pgm
), line
);
1629 static const char* shift_tab
[] = {
1630 "dummy", /* 0 (none) */
1631 "coefmul.x", /* 1 (x2) */
1632 "coefmul.y", /* 2 (x4) */
1633 "coefmul.z", /* 3 (x8) */
1634 "coefmul.w", /* 4 (x16) */
1635 "dummy", /* 5 (x32) */
1636 "dummy", /* 6 (x64) */
1637 "dummy", /* 7 (x128) */
1638 "dummy", /* 8 (d256) */
1639 "dummy", /* 9 (d128) */
1640 "dummy", /* 10 (d64) */
1641 "dummy", /* 11 (d32) */
1642 "coefdiv.w", /* 12 (d16) */
1643 "coefdiv.z", /* 13 (d8) */
1644 "coefdiv.y", /* 14 (d4) */
1645 "coefdiv.x" /* 15 (d2) */
1648 inline static void get_write_mask(const DWORD output_reg
, char* write_mask
)
1651 if ((output_reg
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
1652 strcat(write_mask
, ".");
1653 if (output_reg
& D3DSP_WRITEMASK_0
) strcat(write_mask
, "r");
1654 if (output_reg
& D3DSP_WRITEMASK_1
) strcat(write_mask
, "g");
1655 if (output_reg
& D3DSP_WRITEMASK_2
) strcat(write_mask
, "b");
1656 if (output_reg
& D3DSP_WRITEMASK_3
) strcat(write_mask
, "a");
1660 inline static void get_input_register_swizzle(const DWORD instr
, char* swzstring
)
1662 static const char swizzle_reg_chars
[] = "rgba";
1663 DWORD swizzle
= (instr
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
1664 DWORD swizzle_x
= swizzle
& 0x03;
1665 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
1666 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
1667 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
1669 * swizzle bits fields:
1673 if ((D3DSP_NOSWIZZLE
>> D3DSP_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1674 if (swizzle_x
== swizzle_y
&&
1675 swizzle_x
== swizzle_z
&&
1676 swizzle_x
== swizzle_w
) {
1677 sprintf(swzstring
, ".%c", swizzle_reg_chars
[swizzle_x
]);
1679 sprintf(swzstring
, ".%c%c%c%c",
1680 swizzle_reg_chars
[swizzle_x
],
1681 swizzle_reg_chars
[swizzle_y
],
1682 swizzle_reg_chars
[swizzle_z
],
1683 swizzle_reg_chars
[swizzle_w
]);
1688 inline static void gen_output_modifier_line(int saturate
, char* write_mask
, int shift
, char *regstr
, char* line
)
1690 /* Generate a line that does the output modifier computation */
1691 sprintf(line
, "MUL%s %s%s, %s, %s;", saturate
? "_SAT" : "", regstr
, write_mask
, regstr
, shift_tab
[shift
]);
1694 inline static int gen_input_modifier_line(const DWORD instr
, int tmpreg
, char* outregstr
, char* line
)
1696 /* Generate a line that does the input modifier computation and return the input register to use */
1697 static char regstr
[256];
1698 static char tmpline
[256];
1701 /* Assume a new line will be added */
1704 /* Get register name */
1705 get_register_name(instr
, regstr
);
1707 switch (instr
& D3DSP_SRCMOD_MASK
) {
1709 strcpy(outregstr
, regstr
);
1713 sprintf(outregstr
, "-%s", regstr
);
1717 sprintf(line
, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg
, regstr
);
1719 case D3DSPSM_BIASNEG
:
1720 sprintf(line
, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg
, regstr
);
1723 sprintf(line
, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg
, regstr
);
1725 case D3DSPSM_SIGNNEG
:
1726 sprintf(line
, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg
, regstr
);
1729 sprintf(line
, "SUB T%c, one.x, %s;", 'A' + tmpreg
, regstr
);
1732 sprintf(line
, "ADD T%c, %s, %s;", 'A' + tmpreg
, regstr
, regstr
);
1735 sprintf(line
, "ADD T%c, -%s, -%s;", 'A' + tmpreg
, regstr
, regstr
);
1738 sprintf(line
, "RCP T%c, %s.z;", 'A' + tmpreg
, regstr
);
1739 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
1740 strcat(line
, "\n"); /* Hack */
1741 strcat(line
, tmpline
);
1744 sprintf(line
, "RCP T%c, %s;", 'A' + tmpreg
, regstr
);
1745 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
1746 strcat(line
, "\n"); /* Hack */
1747 strcat(line
, tmpline
);
1750 strcpy(outregstr
, regstr
);
1755 /* Substitute the register name */
1756 sprintf(outregstr
, "T%c", 'A' + tmpreg
);
1765 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/PixelShader1_X/modifiers/sourceregistermodifiers.asp
1766 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/Shaders/PixelShader2_0/Registers/Registers.asp
1767 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/d3d/interfaces/IDirect3DPixelShader9/_IDirect3DPixelShader9.asp
1770 inline static VOID
IDirect3DPixelShaderImpl_GenerateProgramArbHW(IDirect3DPixelShaderImpl
* pshader
, CONST DWORD
* pFunction
) {
1771 const DWORD
* pToken
= pFunction
;
1772 const SHADER_OPCODE
* curOpcode
= NULL
;
1773 const DWORD
* pInstr
;
1777 unsigned lineNum
= 0;
1778 char *pgmStr
= NULL
;
1783 IDirect3DDevice8Impl
* This
= pshader
->device
;
1786 for(i
= 0; i
< D3D8_PSHADER_MAX_CONSTANTS
; i
++)
1789 pgmStr
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, 65535); /* 64kb should be enough */
1791 if (NULL
!= pToken
) {
1792 while (D3DPS_END() != *pToken
) {
1793 if (pshader_is_version_token(*pToken
)) { /** version */
1797 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1798 version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
1800 TRACE_(d3d_hw_shader
)("ps.%lu.%lu;\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
1802 /* Each release of pixel shaders has had different numbers of temp registers */
1808 case 14: numTemps
=12;
1810 strcpy(tmpLine
, "!!ARBfp1.0");
1812 case 20: numTemps
=12;
1814 strcpy(tmpLine
, "!!ARBfp2.0");
1815 FIXME_(d3d_hw_shader
)("No work done yet to support ps2.0 in hw\n");
1817 case 30: numTemps
=32;
1819 strcpy(tmpLine
, "!!ARBfp3.0");
1820 FIXME_(d3d_hw_shader
)("No work done yet to support ps3.0 in hw\n");
1825 strcpy(tmpLine
, "!!ARBfp1.0");
1826 FIXME_(d3d_hw_shader
)("Unrecognized pixel shader version!\n");
1828 addline(&lineNum
, pgmStr
, tmpLine
);
1830 for(i
= 0; i
< 6; i
++) {
1831 sprintf(tmpLine
, "TEMP T%lu;", i
);
1832 addline(&lineNum
, pgmStr
, tmpLine
);
1834 for(i
= 0; i
< 6; i
++) {
1835 sprintf(tmpLine
, "TEMP R%lu;", i
);
1836 addline(&lineNum
, pgmStr
, tmpLine
);
1839 sprintf(tmpLine
, "TEMP TMP;");
1840 addline(&lineNum
, pgmStr
, tmpLine
);
1841 sprintf(tmpLine
, "TEMP TMP2;");
1842 addline(&lineNum
, pgmStr
, tmpLine
);
1843 sprintf(tmpLine
, "TEMP TA;");
1844 addline(&lineNum
, pgmStr
, tmpLine
);
1845 sprintf(tmpLine
, "TEMP TB;");
1846 addline(&lineNum
, pgmStr
, tmpLine
);
1847 sprintf(tmpLine
, "TEMP TC;");
1848 addline(&lineNum
, pgmStr
, tmpLine
);
1850 strcpy(tmpLine
, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };");
1851 addline(&lineNum
, pgmStr
, tmpLine
);
1852 strcpy(tmpLine
, "PARAM coefmul = { 2, 4, 8, 16 };");
1853 addline(&lineNum
, pgmStr
, tmpLine
);
1854 strcpy(tmpLine
, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };");
1855 addline(&lineNum
, pgmStr
, tmpLine
);
1857 for(i
= 0; i
< 4; i
++) {
1858 sprintf(tmpLine
, "MOV T%lu, fragment.texcoord[%lu];", i
, i
);
1859 addline(&lineNum
, pgmStr
, tmpLine
);
1865 if (pshader_is_comment_token(*pToken
)) { /** comment */
1866 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1868 /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
1869 pToken
+= comment_len
;
1874 curOpcode
= pshader_program_get_opcode(code
, version
);
1876 if (NULL
== curOpcode
) {
1877 /* unknown current opcode ... */
1878 while (*pToken
& 0x80000000) {
1879 TRACE("unrecognized opcode: %08lx\n", *pToken
);
1885 /* Build opcode for GL vertex_program */
1886 switch (curOpcode
->opcode
) {
1892 DWORD reg
= *pToken
& 0x00001FFF;
1893 sprintf(tmpLine
, "PARAM C%lu = { %f, %f, %f, %f };", reg
,
1894 *((const float*)(pToken
+1)),
1895 *((const float*)(pToken
+2)),
1896 *((const float*)(pToken
+3)),
1897 *((const float*)(pToken
+4)) );
1898 addline(&lineNum
, pgmStr
, tmpLine
);
1904 case D3DSIO_TEXKILL
:
1905 strcpy(tmpLine
, "KIL");
1910 get_write_mask(*pToken
, tmp
);
1911 if (version
!= 14) {
1912 DWORD reg
= *pToken
& 0x00001FFF;
1913 sprintf(tmpLine
,"TEX T%lu%s, T%lu, texture[%lu], 2D;", reg
, tmp
, reg
, reg
);
1914 addline(&lineNum
, pgmStr
, tmpLine
);
1920 DWORD reg1
= *pToken
& 0x00001FFF;
1921 DWORD reg2
= *(pToken
+1) & 0x00001FFF;
1922 if (gen_input_modifier_line(*(pToken
+1), 0, reg
, line
))
1923 addline(&lineNum
, pgmStr
, line
);
1924 sprintf(tmpLine
,"TEX R%lu%s, %s, texture[%lu], 2D;", reg1
, tmp
, reg
, reg2
);
1925 addline(&lineNum
, pgmStr
, tmpLine
);
1931 case D3DSIO_TEXCOORD
:
1934 get_write_mask(*pToken
, tmp
);
1935 if (version
!= 14) {
1936 DWORD reg
= *pToken
& 0x00001FFF;
1937 sprintf(tmpLine
, "MOV T%lu%s, fragment.texcoord[%lu];", reg
, tmp
, reg
);
1938 addline(&lineNum
, pgmStr
, tmpLine
);
1942 DWORD reg1
= *pToken
& 0x00001FFF;
1943 DWORD reg2
= *(pToken
+1) & 0x00001FFF;
1944 sprintf(tmpLine
, "MOV R%lu%s, fragment.texcoord[%lu];", reg1
, tmp
, reg2
);
1945 addline(&lineNum
, pgmStr
, tmpLine
);
1951 case D3DSIO_TEXM3x2PAD
:
1953 DWORD reg
= *pToken
& 0x00001FFF;
1955 if (gen_input_modifier_line(*(pToken
+1), 0, buf
, tmpLine
))
1956 addline(&lineNum
, pgmStr
, tmpLine
);
1957 sprintf(tmpLine
, "DP3 TMP.x, T%lu, %s;", reg
, buf
);
1958 addline(&lineNum
, pgmStr
, tmpLine
);
1963 case D3DSIO_TEXM3x2TEX
:
1965 DWORD reg
= *pToken
& 0x00001FFF;
1967 if (gen_input_modifier_line(*(pToken
+1), 0, buf
, tmpLine
))
1968 addline(&lineNum
, pgmStr
, tmpLine
);
1969 sprintf(tmpLine
, "DP3 TMP.y, T%lu, %s;", reg
, buf
);
1970 addline(&lineNum
, pgmStr
, tmpLine
);
1971 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;", reg
, reg
);
1972 addline(&lineNum
, pgmStr
, tmpLine
);
1977 case D3DSIO_TEXREG2AR
:
1979 DWORD reg1
= *pToken
& 0x00001FFF;
1980 DWORD reg2
= *(pToken
+1) & 0x00001FFF;
1981 sprintf(tmpLine
, "MOV TMP.r, T%lu.a;", reg2
);
1982 addline(&lineNum
, pgmStr
, tmpLine
);
1983 sprintf(tmpLine
, "MOV TMP.g, T%lu.r;", reg2
);
1984 addline(&lineNum
, pgmStr
, tmpLine
);
1985 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;", reg1
, reg1
);
1986 addline(&lineNum
, pgmStr
, tmpLine
);
1991 case D3DSIO_TEXREG2GB
:
1993 DWORD reg1
= *pToken
& 0x00001FFF;
1994 DWORD reg2
= *(pToken
+1) & 0x00001FFF;
1995 sprintf(tmpLine
, "MOV TMP.r, T%lu.g;", reg2
);
1996 addline(&lineNum
, pgmStr
, tmpLine
);
1997 sprintf(tmpLine
, "MOV TMP.g, T%lu.b;", reg2
);
1998 addline(&lineNum
, pgmStr
, tmpLine
);
1999 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;", reg1
, reg1
);
2000 addline(&lineNum
, pgmStr
, tmpLine
);
2007 DWORD reg1
= *pToken
& 0x00001FFF;
2008 DWORD reg2
= *(pToken
+1) & 0x00001FFF;
2009 /* FIXME: Should apply the BUMPMAPENV matrix */
2010 sprintf(tmpLine
, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;", reg1
, reg2
);
2011 addline(&lineNum
, pgmStr
, tmpLine
);
2012 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;", reg1
, reg1
);
2013 addline(&lineNum
, pgmStr
, tmpLine
);
2018 case D3DSIO_TEXM3x3PAD
:
2020 DWORD reg
= *pToken
& 0x00001FFF;
2022 if (gen_input_modifier_line(*(pToken
+1), 0, buf
, tmpLine
))
2023 addline(&lineNum
, pgmStr
, tmpLine
);
2024 sprintf(tmpLine
, "DP3 TMP.%c, T%lu, %s;", 'x'+row
, reg
, buf
);
2025 addline(&lineNum
, pgmStr
, tmpLine
);
2031 case D3DSIO_TEXM3x3TEX
:
2033 DWORD reg
= *pToken
& 0x00001FFF;
2035 if (gen_input_modifier_line(*(pToken
+1), 0, buf
, tmpLine
))
2036 addline(&lineNum
, pgmStr
, tmpLine
);
2037 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;", reg
, buf
);
2038 addline(&lineNum
, pgmStr
, tmpLine
);
2039 /* Cubemap textures will be more used than 3D ones. */
2040 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;", reg
, reg
);
2041 addline(&lineNum
, pgmStr
, tmpLine
);
2046 case D3DSIO_TEXM3x3VSPEC
:
2048 DWORD reg
= *pToken
& 0x00001FFF;
2050 if (gen_input_modifier_line(*(pToken
+1), 0, buf
, tmpLine
))
2051 addline(&lineNum
, pgmStr
, tmpLine
);
2052 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;", reg
, buf
);
2053 addline(&lineNum
, pgmStr
, tmpLine
);
2054 /* Construct the eye-ray vector from w coordinates */
2055 sprintf(tmpLine
, "MOV TMP2.x, fragment.texcoord[%lu].w;", tcw
[0]);
2056 addline(&lineNum
, pgmStr
, tmpLine
);
2057 sprintf(tmpLine
, "MOV TMP2.y, fragment.texcoord[%lu].w;", tcw
[1]);
2058 addline(&lineNum
, pgmStr
, tmpLine
);
2059 sprintf(tmpLine
, "MOV TMP2.z, fragment.texcoord[%lu].w;", reg
);
2060 addline(&lineNum
, pgmStr
, tmpLine
);
2061 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
2062 sprintf(tmpLine
, "DP3 TMP.w, TMP, TMP2;");
2063 addline(&lineNum
, pgmStr
, tmpLine
);
2064 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;");
2065 addline(&lineNum
, pgmStr
, tmpLine
);
2066 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -TMP2;");
2067 addline(&lineNum
, pgmStr
, tmpLine
);
2068 /* Cubemap textures will be more used than 3D ones. */
2069 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;", reg
, reg
);
2070 addline(&lineNum
, pgmStr
, tmpLine
);
2076 case D3DSIO_TEXM3x3SPEC
:
2078 DWORD reg
= *pToken
& 0x00001FFF;
2079 DWORD reg3
= *(pToken
+2) & 0x00001FFF;
2081 if (gen_input_modifier_line(*(pToken
+1), 0, buf
, tmpLine
))
2082 addline(&lineNum
, pgmStr
, tmpLine
);
2083 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;", reg
, buf
);
2084 addline(&lineNum
, pgmStr
, tmpLine
);
2085 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
2086 sprintf(tmpLine
, "DP3 TMP.w, TMP, C[%lu];", reg3
);
2087 addline(&lineNum
, pgmStr
, tmpLine
);
2088 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;");
2089 addline(&lineNum
, pgmStr
, tmpLine
);
2090 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -C[%lu];", reg3
);
2091 addline(&lineNum
, pgmStr
, tmpLine
);
2092 /* Cubemap textures will be more used than 3D ones. */
2093 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;", reg
, reg
);
2094 addline(&lineNum
, pgmStr
, tmpLine
);
2105 strcpy(tmpLine
, "MOV");
2108 strcpy(tmpLine
, "MUL");
2111 strcpy(tmpLine
, "DP3");
2114 strcpy(tmpLine
, "MAD");
2117 strcpy(tmpLine
, "ADD");
2120 strcpy(tmpLine
, "SUB");
2123 strcpy(tmpLine
, "LRP");
2126 FIXME_(d3d_hw_shader
)("Can't handle opcode %s in hwShader\n", curOpcode
->name
);
2128 if (0 != (*pToken
& D3DSP_DSTMOD_MASK
)) {
2129 DWORD mask
= *pToken
& D3DSP_DSTMOD_MASK
;
2131 case D3DSPDM_SATURATE
: saturate
= TRUE
; break;
2133 TRACE("_unhandled_modifier(0x%08lx)", mask
);
2136 if (autoparam
&& (curOpcode
->num_params
> 0)) {
2138 char operands
[4][100];
2142 /* Generate lines that handle input modifier computation */
2143 for (i
= 1; i
< curOpcode
->num_params
; i
++) {
2144 if (gen_input_modifier_line(*(pToken
+i
), i
-1, regs
[i
-1], tmp
))
2145 addline(&lineNum
, pgmStr
, tmp
);
2147 /* Handle saturation only when no shift is present in the output modifier */
2148 if ((*pToken
& D3DSPDM_SATURATE
) && (0 == (*pToken
& D3DSP_DSTSHIFT_MASK
)))
2150 /* Handle output register */
2151 get_register_name(*pToken
, tmp
);
2152 strcpy(operands
[0], tmp
);
2153 get_write_mask(*pToken
, tmp
);
2154 strcat(operands
[0], tmp
);
2155 /* Handle input registers */
2156 for (i
= 1; i
< curOpcode
->num_params
; i
++) {
2157 strcpy(operands
[i
], regs
[i
-1]);
2158 get_input_register_swizzle(*(pToken
+i
), swzstring
);
2159 strcat(operands
[i
], swzstring
);
2161 if (curOpcode
->opcode
== D3DSIO_CMP
) {
2162 sprintf(tmpLine
, "CMP%s %s, %s, %s, %s;", (saturate
? "_SAT" : ""), operands
[0], operands
[1], operands
[3], operands
[2]);
2163 } else if (curOpcode
->opcode
== D3DSIO_CND
) {
2164 sprintf(tmpLine
, "ADD TMP, -%s, coefdiv.x;", operands
[1]);
2165 addline(&lineNum
, pgmStr
, tmpLine
);
2166 sprintf(tmpLine
, "CMP%s %s, TMP, %s, %s;", (saturate
? "_SAT" : ""), operands
[0], operands
[2], operands
[3]);
2169 strcat(tmpLine
, "_SAT");
2170 strcat(tmpLine
, " ");
2171 strcat(tmpLine
, operands
[0]);
2172 for (i
= 1; i
< curOpcode
->num_params
; i
++) {
2173 strcat(tmpLine
, ", ");
2174 strcat(tmpLine
, operands
[i
]);
2176 strcat(tmpLine
,";");
2178 addline(&lineNum
, pgmStr
, tmpLine
);
2179 pToken
+= curOpcode
->num_params
;
2181 if (curOpcode
->num_params
> 0) {
2182 DWORD param
= *(pInstr
+1);
2183 if (0 != (param
& D3DSP_DSTSHIFT_MASK
)) {
2184 /* Generate a line that handle the output modifier computation */
2186 char write_mask
[20];
2187 DWORD shift
= (param
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
2188 get_register_name(param
, regstr
);
2189 get_write_mask(param
, write_mask
);
2190 gen_output_modifier_line(saturate
, write_mask
, shift
, regstr
, tmpLine
);
2191 addline(&lineNum
, pgmStr
, tmpLine
);
2196 strcpy(tmpLine
, "MOV result.color, R0;");
2197 addline(&lineNum
, pgmStr
, tmpLine
);
2199 strcpy(tmpLine
, "END");
2200 addline(&lineNum
, pgmStr
, tmpLine
);
2203 /* Create the hw shader */
2204 GL_EXTCALL(glGenProgramsARB(1, &pshader
->prgId
));
2205 TRACE_(d3d_hw_shader
)("Creating a hw pixel shader, prg=%d\n", pshader
->prgId
);
2207 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB
, pshader
->prgId
));
2209 /* Create the program and check for errors */
2210 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB
, GL_PROGRAM_FORMAT_ASCII_ARB
, strlen(pgmStr
), pgmStr
));
2211 if (glGetError() == GL_INVALID_OPERATION
) {
2213 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB
, &errPos
);
2214 FIXME_(d3d_hw_shader
)("HW PixelShader Error at position: %d\n%s\n", errPos
, glGetString(GL_PROGRAM_ERROR_STRING_ARB
));
2215 pshader
->prgId
= -1;
2218 HeapFree(GetProcessHeap(), 0, pgmStr
);
2221 inline static VOID
IDirect3DPixelShaderImpl_ParseProgram(IDirect3DPixelShaderImpl
* pshader
, CONST DWORD
* pFunction
) {
2222 const DWORD
* pToken
= pFunction
;
2223 const SHADER_OPCODE
* curOpcode
= NULL
;
2229 if (NULL
!= pToken
) {
2230 while (D3DPS_END() != *pToken
) {
2231 if (pshader_is_version_token(*pToken
)) { /** version */
2232 TRACE("ps.%lu.%lu\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
2233 version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
2238 if (pshader_is_comment_token(*pToken
)) { /** comment */
2239 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
2241 /*TRACE("comment[%ld] ;%s\n", comment_len, (char*)pToken);*/
2242 pToken
+= comment_len
;
2243 len
+= comment_len
+ 1;
2247 curOpcode
= pshader_program_get_opcode(code
, version
);
2250 if (NULL
== curOpcode
) {
2251 /* unknown current opcode ... */
2252 while (*pToken
& 0x80000000) {
2253 TRACE("unrecognized opcode: %08lx\n", *pToken
);
2259 pshader_program_dump_opcode(curOpcode
, code
, *pToken
);
2260 if (curOpcode
->num_params
> 0) {
2261 pshader_program_dump_param(*pToken
, 0);
2264 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
2266 if (D3DSIO_DEF
!= code
) {
2267 pshader_program_dump_param(*pToken
, 1);
2269 TRACE("%f", *((const float*) pToken
));
2277 pshader
->functionLength
= (len
+ 1) * sizeof(DWORD
);
2280 pshader
->functionLength
= 1; /* no Function defined use fixed function vertex processing */
2283 if (NULL
!= pFunction
) {
2284 IDirect3DPixelShaderImpl_GenerateProgramArbHW(pshader
, pFunction
);
2287 if (NULL
!= pFunction
) {
2288 pshader
->function
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, pshader
->functionLength
);
2289 memcpy(pshader
->function
, pFunction
, pshader
->functionLength
);
2291 pshader
->function
= NULL
;
2295 HRESULT WINAPI
IDirect3DDeviceImpl_CreatePixelShader(IDirect3DDevice8Impl
* This
, CONST DWORD
* pFunction
, IDirect3DPixelShaderImpl
** ppPixelShader
) {
2296 IDirect3DPixelShaderImpl
* object
;
2298 object
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, sizeof(IDirect3DPixelShaderImpl
));
2299 if (NULL
== object
) {
2300 *ppPixelShader
= NULL
;
2301 return D3DERR_OUTOFVIDEOMEMORY
;
2303 /*object->lpVtbl = &Direct3DPixelShader9_Vtbl;*/
2304 object
->device
= This
;
2307 object
->data
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, sizeof(PSHADERDATA8
));
2309 IDirect3DPixelShaderImpl_ParseProgram(object
, pFunction
);
2311 *ppPixelShader
= object
;
2315 HRESULT WINAPI
IDirect3DPixelShaderImpl_GetFunction(IDirect3DPixelShaderImpl
* This
, VOID
* pData
, UINT
* pSizeOfData
) {
2316 if (NULL
== pData
) {
2317 *pSizeOfData
= This
->functionLength
;
2320 if (*pSizeOfData
< This
->functionLength
) {
2321 *pSizeOfData
= This
->functionLength
;
2322 return D3DERR_MOREDATA
;
2324 if (NULL
== This
->function
) { /* no function defined */
2325 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This
, pData
);
2326 (*(DWORD
**) pData
) = NULL
;
2328 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
2329 memcpy(pData
, This
->function
, This
->functionLength
);
2334 HRESULT WINAPI
IDirect3DPixelShaderImpl_SetConstantF(IDirect3DPixelShaderImpl
* This
, UINT StartRegister
, CONST FLOAT
* pConstantData
, UINT Vector4fCount
) {
2335 if (StartRegister
+ Vector4fCount
> D3D8_VSHADER_MAX_CONSTANTS
) {
2336 return D3DERR_INVALIDCALL
;
2338 if (NULL
== This
->data
) { /* temporary while datas not supported */
2339 FIXME("(%p) : PixelShader_SetConstant not fully supported yet\n", This
);
2340 return D3DERR_INVALIDCALL
;
2342 memcpy(&This
->data
->C
[StartRegister
], pConstantData
, Vector4fCount
* 4 * sizeof(FLOAT
));
2346 HRESULT WINAPI
IDirect3DPixelShaderImpl_GetConstantF(IDirect3DPixelShaderImpl
* This
, UINT StartRegister
, FLOAT
* pConstantData
, UINT Vector4fCount
) {
2347 if (StartRegister
+ Vector4fCount
> D3D8_VSHADER_MAX_CONSTANTS
) {
2348 return D3DERR_INVALIDCALL
;
2350 if (NULL
== This
->data
) { /* temporary while datas not supported */
2351 return D3DERR_INVALIDCALL
;
2353 memcpy(pConstantData
, &This
->data
->C
[StartRegister
], Vector4fCount
* 4 * sizeof(FLOAT
));
2358 /**********************************************************************************************************************************************
2359 **********************************************************************************************************************************************
2360 **********************************************************************************************************************************************
2361 **********************************************************************************************************************************************
2362 **********************************************************************************************************************************************/
2364 /***********************************************************************
2365 * ValidateVertexShader (D3D8.@)
2370 BOOL WINAPI
ValidateVertexShader(LPVOID pFunction
, int param1
, int param2
, LPVOID toto
)
2372 FIXME("(void): stub: pFunction %p, param1 %d, param2 %d, result? %p\n", pFunction
, param1
, param2
, toto
);
2376 /***********************************************************************
2377 * ValidatePixelShader (D3D8.@)
2382 BOOL WINAPI
ValidatePixelShader(LPVOID pFunction
, int param1
, int param2
, LPVOID toto
)
2384 FIXME("(void): stub: pFunction %p, param1 %d, param2 %d, result? %p\n", pFunction
, param1
, param2
, toto
);