2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 # define TRACE_VSVECTOR(name)
40 /* The maximum size of the program */
43 #define REGMASK 0x00001FFF
44 typedef void (*shader_fct_t
)();
46 typedef struct SHADER_OPCODE
{
50 CONST UINT num_params
;
51 shader_fct_t soft_fct
;
56 #define GLNAME_REQUIRE_GLSL ((const char *)1)
57 /* *******************************************
58 IWineD3DPixelShader IUnknown parts follow
59 ******************************************* */
60 HRESULT WINAPI
IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader
*iface
, REFIID riid
, LPVOID
*ppobj
)
62 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
63 TRACE("(%p)->(%s,%p)\n",This
,debugstr_guid(riid
),ppobj
);
64 if (IsEqualGUID(riid
, &IID_IUnknown
)
65 || IsEqualGUID(riid
, &IID_IWineD3DBase
)
66 || IsEqualGUID(riid
, &IID_IWineD3DPixelShader
)) {
67 IUnknown_AddRef(iface
);
74 ULONG WINAPI
IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader
*iface
) {
75 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
76 TRACE("(%p) : AddRef increasing from %ld\n", This
, This
->ref
);
77 return InterlockedIncrement(&This
->ref
);
80 ULONG WINAPI
IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader
*iface
) {
81 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
83 TRACE("(%p) : Releasing from %ld\n", This
, This
->ref
);
84 ref
= InterlockedDecrement(&This
->ref
);
86 HeapFree(GetProcessHeap(), 0, This
);
91 /* TODO: At the momeny the function parser is single pass, it achievs this
92 by passing constants to a couple of functions where they are then modified.
93 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
94 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
97 /* *******************************************
98 IWineD3DPixelShader IWineD3DPixelShader parts follow
99 ******************************************* */
101 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader
*iface
, IUnknown
** parent
){
102 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
104 *parent
= This
->parent
;
105 IUnknown_AddRef(*parent
);
106 TRACE("(%p) : returning %p\n", This
, *parent
);
110 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader
* iface
, IWineD3DDevice
**pDevice
){
111 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
112 IWineD3DDevice_AddRef((IWineD3DDevice
*)This
->wineD3DDevice
);
113 *pDevice
= (IWineD3DDevice
*)This
->wineD3DDevice
;
114 TRACE("(%p) returning %p\n", This
, *pDevice
);
119 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader
* impl
, VOID
* pData
, UINT
* pSizeOfData
) {
120 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)impl
;
121 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This
, pData
, pSizeOfData
);
124 *pSizeOfData
= This
->functionLength
;
127 if (*pSizeOfData
< This
->functionLength
) {
128 *pSizeOfData
= This
->functionLength
;
129 return D3DERR_MOREDATA
;
131 if (NULL
== This
->function
) { /* no function defined */
132 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This
, pData
);
133 (*(DWORD
**) pData
) = NULL
;
135 if (This
->functionLength
== 0) {
138 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
139 memcpy(pData
, This
->function
, This
->functionLength
);
144 /*******************************
145 * pshader functions software VM
148 void pshader_add(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
149 d
->x
= s0
->x
+ s1
->x
;
150 d
->y
= s0
->y
+ s1
->y
;
151 d
->z
= s0
->z
+ s1
->z
;
152 d
->w
= s0
->w
+ s1
->w
;
153 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
157 void pshader_dp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
158 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
;
159 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
160 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
163 void pshader_dp4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
164 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
+ s0
->w
* s1
->w
;
165 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
166 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
169 void pshader_dst(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
171 d
->y
= s0
->y
* s1
->y
;
174 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
175 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
178 void pshader_expp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
184 tmp
.f
= floorf(s0
->w
);
185 d
->x
= powf(2.0f
, tmp
.f
);
186 d
->y
= s0
->w
- tmp
.f
;
187 tmp
.f
= powf(2.0f
, s0
->w
);
188 tmp
.d
&= 0xFFFFFF00U
;
191 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
192 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
195 void pshader_lit(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
197 d
->y
= (0.0f
< s0
->x
) ? s0
->x
: 0.0f
;
198 d
->z
= (0.0f
< s0
->x
&& 0.0f
< s0
->y
) ? powf(s0
->y
, s0
->w
) : 0.0f
;
200 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
201 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
204 void pshader_logp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
205 float tmp_f
= fabsf(s0
->w
);
206 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
207 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
208 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
211 void pshader_mad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
212 d
->x
= s0
->x
* s1
->x
+ s2
->x
;
213 d
->y
= s0
->y
* s1
->y
+ s2
->y
;
214 d
->z
= s0
->z
* s1
->z
+ s2
->z
;
215 d
->w
= s0
->w
* s1
->w
+ s2
->w
;
216 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
217 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, s2
->x
, s2
->y
, s2
->z
, s2
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
220 void pshader_max(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
221 d
->x
= (s0
->x
>= s1
->x
) ? s0
->x
: s1
->x
;
222 d
->y
= (s0
->y
>= s1
->y
) ? s0
->y
: s1
->y
;
223 d
->z
= (s0
->z
>= s1
->z
) ? s0
->z
: s1
->z
;
224 d
->w
= (s0
->w
>= s1
->w
) ? s0
->w
: s1
->w
;
225 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
226 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
229 void pshader_min(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
230 d
->x
= (s0
->x
< s1
->x
) ? s0
->x
: s1
->x
;
231 d
->y
= (s0
->y
< s1
->y
) ? s0
->y
: s1
->y
;
232 d
->z
= (s0
->z
< s1
->z
) ? s0
->z
: s1
->z
;
233 d
->w
= (s0
->w
< s1
->w
) ? s0
->w
: s1
->w
;
234 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
235 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
238 void pshader_mov(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
243 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
244 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
247 void pshader_mul(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
248 d
->x
= s0
->x
* s1
->x
;
249 d
->y
= s0
->y
* s1
->y
;
250 d
->z
= s0
->z
* s1
->z
;
251 d
->w
= s0
->w
* s1
->w
;
252 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
253 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
256 void pshader_nop(void) {
257 /* NOPPPP ahhh too easy ;) */
258 PSTRACE(("executing nop\n"));
261 void pshader_rcp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
262 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== s0
->w
) ? HUGE_VAL
: 1.0f
/ s0
->w
;
263 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
264 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
267 void pshader_rsq(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
268 float tmp_f
= fabsf(s0
->w
);
269 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== tmp_f
) ? HUGE_VAL
: ((1.0f
!= tmp_f
) ? 1.0f
/ sqrtf(tmp_f
) : 1.0f
);
270 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
271 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
274 void pshader_sge(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
275 d
->x
= (s0
->x
>= s1
->x
) ? 1.0f
: 0.0f
;
276 d
->y
= (s0
->y
>= s1
->y
) ? 1.0f
: 0.0f
;
277 d
->z
= (s0
->z
>= s1
->z
) ? 1.0f
: 0.0f
;
278 d
->w
= (s0
->w
>= s1
->w
) ? 1.0f
: 0.0f
;
279 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
280 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
283 void pshader_slt(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
284 d
->x
= (s0
->x
< s1
->x
) ? 1.0f
: 0.0f
;
285 d
->y
= (s0
->y
< s1
->y
) ? 1.0f
: 0.0f
;
286 d
->z
= (s0
->z
< s1
->z
) ? 1.0f
: 0.0f
;
287 d
->w
= (s0
->w
< s1
->w
) ? 1.0f
: 0.0f
;
288 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
289 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
292 void pshader_sub(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
293 d
->x
= s0
->x
- s1
->x
;
294 d
->y
= s0
->y
- s1
->y
;
295 d
->z
= s0
->z
- s1
->z
;
296 d
->w
= s0
->w
- s1
->w
;
297 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
298 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
302 * Version 1.1 specific
305 void pshader_exp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
306 d
->x
= d
->y
= d
->z
= d
->w
= powf(2.0f
, s0
->w
);
307 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
308 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
311 void pshader_log(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
312 float tmp_f
= fabsf(s0
->w
);
313 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
314 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
315 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
318 void pshader_frc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
319 d
->x
= s0
->x
- floorf(s0
->x
);
320 d
->y
= s0
->y
- floorf(s0
->y
);
323 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
324 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
327 typedef FLOAT D3DMATRIX44
[4][4];
328 typedef FLOAT D3DMATRIX43
[4][3];
329 typedef FLOAT D3DMATRIX34
[3][4];
330 typedef FLOAT D3DMATRIX33
[3][3];
331 typedef FLOAT D3DMATRIX23
[2][3];
333 void pshader_m4x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat
) {
335 * Buggy CODE: here only if cast not work for copy/paste
336 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
337 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
338 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
339 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
340 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
341 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
342 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
344 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
345 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
346 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
347 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
+ mat
[3][3] * s0
->w
;
348 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
349 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
350 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
351 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], mat
[3][3], s0
->w
, d
->w
));
354 void pshader_m4x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX34 mat
) {
355 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
356 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
357 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
359 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
360 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
361 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
362 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0
->w
, d
->w
));
365 void pshader_m3x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX43 mat
) {
366 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
367 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
368 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
369 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
;
370 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
371 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
372 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
373 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], s0
->w
, d
->w
));
376 void pshader_m3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX33 mat
) {
377 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
378 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
379 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
381 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
382 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
383 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
384 PSTRACE(("executing m3x3(4): (%f) \n", d
->w
));
387 void pshader_m3x2(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX23 mat
) {
389 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
390 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
396 * Version 2.0 specific
398 void pshader_lrp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
399 d
->x
= s0
->x
* (s1
->x
- s2
->x
) + s2
->x
;
400 d
->y
= s0
->y
* (s1
->y
- s2
->y
) + s2
->y
;
401 d
->z
= s0
->z
* (s1
->z
- s2
->z
) + s2
->z
;
402 d
->w
= s0
->w
* (s1
->w
- s2
->w
) + s2
->w
;
405 void pshader_crs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
406 d
->x
= s0
->y
* s1
->z
- s0
->z
* s1
->y
;
407 d
->y
= s0
->z
* s1
->x
- s0
->x
* s1
->z
;
408 d
->z
= s0
->x
* s1
->y
- s0
->y
* s1
->x
;
409 d
->w
= 0.9f
; /* w is undefined, so set it to something safeish */
411 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
412 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
415 void pshader_abs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
420 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
421 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
425 void pshader_texcoord(WINED3DSHADERVECTOR
* d
) {
429 void pshader_texkill(WINED3DSHADERVECTOR
* d
) {
433 void pshader_tex(WINED3DSHADERVECTOR
* d
) {
436 void pshader_texld(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
440 void pshader_texbem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
444 void pshader_texbeml(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
448 void pshader_texreg2ar(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
452 void pshader_texreg2gb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
456 void pshader_texm3x2pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
460 void pshader_texm3x2tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
464 void pshader_texm3x3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
468 void pshader_texm3x3pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
472 void pshader_texm3x3diff(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
476 void pshader_texm3x3spec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
480 void pshader_texm3x3vspec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
484 void pshader_cnd(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
488 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
489 void pshader_def(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
, WINED3DSHADERVECTOR
* s3
) {
493 void pshader_texreg2rgb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
497 void pshader_texdp3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
501 void pshader_texm3x2depth(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
505 void pshader_texdp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
509 void pshader_texm3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
513 void pshader_texdepth(WINED3DSHADERVECTOR
* d
) {
517 void pshader_cmp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
521 void pshader_bem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
525 void pshader_call(WINED3DSHADERVECTOR
* d
) {
529 void pshader_callnz(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
533 void pshader_loop(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
537 void pshader_ret(WINED3DSHADERVECTOR
* d
) {
541 void pshader_endloop(WINED3DSHADERVECTOR
* d
) {
545 void pshader_dcl(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
549 void pshader_pow(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
553 void pshader_sng(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
557 void pshader_nrm(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
561 void pshader_sincos(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
565 void pshader_rep(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
569 void pshader_endrep(void) {
573 void pshader_if(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
577 void pshader_ifc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
581 void pshader_else(WINED3DSHADERVECTOR
* d
) {
585 void pshader_label(WINED3DSHADERVECTOR
* d
) {
589 void pshader_endif(WINED3DSHADERVECTOR
* d
) {
593 void pshader_break(WINED3DSHADERVECTOR
* d
) {
597 void pshader_breakc(WINED3DSHADERVECTOR
* d
) {
601 void pshader_mova(WINED3DSHADERVECTOR
* d
) {
605 void pshader_defb(WINED3DSHADERVECTOR
* d
) {
609 void pshader_defi(WINED3DSHADERVECTOR
* d
) {
613 void pshader_dp2add(WINED3DSHADERVECTOR
* d
) {
617 void pshader_dsx(WINED3DSHADERVECTOR
* d
) {
621 void pshader_dsy(WINED3DSHADERVECTOR
* d
) {
625 void pshader_texldd(WINED3DSHADERVECTOR
* d
) {
629 void pshader_setp(WINED3DSHADERVECTOR
* d
) {
633 void pshader_texldl(WINED3DSHADERVECTOR
* d
) {
637 void pshader_breakp(WINED3DSHADERVECTOR
* d
) {
641 * log, exp, frc, m*x* seems to be macros ins ... to see
643 static CONST SHADER_OPCODE pshader_ins
[] = {
644 {D3DSIO_NOP
, "nop", "NOP", 0, pshader_nop
, 0, 0},
645 {D3DSIO_MOV
, "mov", "MOV", 2, pshader_mov
, 0, 0},
646 {D3DSIO_ADD
, "add", "ADD", 3, pshader_add
, 0, 0},
647 {D3DSIO_SUB
, "sub", "SUB", 3, pshader_sub
, 0, 0},
648 {D3DSIO_MAD
, "mad", "MAD", 4, pshader_mad
, 0, 0},
649 {D3DSIO_MUL
, "mul", "MUL", 3, pshader_mul
, 0, 0},
650 {D3DSIO_RCP
, "rcp", "RCP", 2, pshader_rcp
, 0, 0},
651 {D3DSIO_RSQ
, "rsq", "RSQ", 2, pshader_rsq
, 0, 0},
652 {D3DSIO_DP3
, "dp3", "DP3", 3, pshader_dp3
, 0, 0},
653 {D3DSIO_DP4
, "dp4", "DP4", 3, pshader_dp4
, 0, 0},
654 {D3DSIO_MIN
, "min", "MIN", 3, pshader_min
, 0, 0},
655 {D3DSIO_MAX
, "max", "MAX", 3, pshader_max
, 0, 0},
656 {D3DSIO_SLT
, "slt", "SLT", 3, pshader_slt
, 0, 0},
657 {D3DSIO_SGE
, "sge", "SGE", 3, pshader_sge
, 0, 0},
658 {D3DSIO_ABS
, "abs", "ABS", 2, pshader_abs
, 0, 0},
659 {D3DSIO_EXP
, "exp", "EX2", 2, pshader_exp
, 0, 0},
660 {D3DSIO_LOG
, "log", "LG2", 2, pshader_log
, 0, 0},
661 {D3DSIO_LIT
, "lit", "LIT", 2, pshader_lit
, 0, 0},
662 {D3DSIO_DST
, "dst", "DST", 3, pshader_dst
, 0, 0},
663 {D3DSIO_LRP
, "lrp", "LRP", 4, pshader_lrp
, 0, 0},
664 {D3DSIO_FRC
, "frc", "FRC", 2, pshader_frc
, 0, 0},
665 {D3DSIO_M4x4
, "m4x4", "undefined", 3, pshader_m4x4
, 0, 0},
666 {D3DSIO_M4x3
, "m4x3", "undefined", 3, pshader_m4x3
, 0, 0},
667 {D3DSIO_M3x4
, "m3x4", "undefined", 3, pshader_m3x4
, 0, 0},
668 {D3DSIO_M3x3
, "m3x3", "undefined", 3, pshader_m3x3
, 0, 0},
669 {D3DSIO_M3x2
, "m3x2", "undefined", 3, pshader_m3x2
, 0, 0},
672 /** FIXME: use direct access so add the others opcodes as stubs */
673 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
674 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
675 {D3DSIO_CALL
, "call", GLNAME_REQUIRE_GLSL
, 1, pshader_call
, 0, 0},
676 {D3DSIO_CALLNZ
, "callnz", GLNAME_REQUIRE_GLSL
, 2, pshader_callnz
, 0, 0},
677 {D3DSIO_LOOP
, "loop", GLNAME_REQUIRE_GLSL
, 2, pshader_loop
, 0, 0},
678 {D3DSIO_RET
, "ret", GLNAME_REQUIRE_GLSL
, 0, pshader_ret
, 0, 0},
679 {D3DSIO_ENDLOOP
, "endloop", GLNAME_REQUIRE_GLSL
, 0, pshader_endloop
, 0, 0},
680 {D3DSIO_LABEL
, "label", GLNAME_REQUIRE_GLSL
, 1, pshader_label
, 0, 0},
681 /* DCL is a specil operation */
682 {D3DSIO_DCL
, "dcl", NULL
, 1, pshader_dcl
, 0, 0},
683 {D3DSIO_POW
, "pow", "POW", 3, pshader_pow
, 0, 0},
684 {D3DSIO_CRS
, "crs", "XPS", 3, pshader_crs
, 0, 0},
685 /* TODO: sng can possibly be performed as
688 {D3DSIO_SGN
, "sng", NULL
, 2, pshader_sng
, 0, 0},
689 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
692 MUL vec.xyz, vec, tmp;
693 but I think this is better because it accounts for w properly.
699 {D3DSIO_NRM
, "nrm", NULL
, 2, pshader_nrm
, 0, 0},
700 {D3DSIO_SINCOS
, "sincos", NULL
, 2, pshader_sincos
, 0, 0},
701 {D3DSIO_REP
, "rep", GLNAME_REQUIRE_GLSL
, 2, pshader_rep
, 0, 0},
702 {D3DSIO_ENDREP
, "endrep", GLNAME_REQUIRE_GLSL
, 0, pshader_endrep
, 0, 0},
703 {D3DSIO_IF
, "if", GLNAME_REQUIRE_GLSL
, 2, pshader_if
, 0, 0},
704 {D3DSIO_IFC
, "ifc", GLNAME_REQUIRE_GLSL
, 2, pshader_ifc
, 0, 0},
705 {D3DSIO_ELSE
, "else", GLNAME_REQUIRE_GLSL
, 2, pshader_else
, 0, 0},
706 {D3DSIO_ENDIF
, "endif", GLNAME_REQUIRE_GLSL
, 2, pshader_endif
, 0, 0},
707 {D3DSIO_BREAK
, "break", GLNAME_REQUIRE_GLSL
, 2, pshader_break
, 0, 0},
708 {D3DSIO_BREAKC
, "breakc", GLNAME_REQUIRE_GLSL
, 2, pshader_breakc
, 0, 0},
709 {D3DSIO_MOVA
, "mova", GLNAME_REQUIRE_GLSL
, 2, pshader_mova
, 0, 0},
710 {D3DSIO_DEFB
, "defb", GLNAME_REQUIRE_GLSL
, 2, pshader_defb
, 0, 0},
711 {D3DSIO_DEFI
, "defi", GLNAME_REQUIRE_GLSL
, 2, pshader_defi
, 0, 0},
713 {D3DSIO_TEXCOORD
, "texcoord", "undefined", 1, pshader_texcoord
, 0, D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXCOORD
, "texcrd", "undefined", 2, pshader_texcoord
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
715 {D3DSIO_TEXKILL
, "texkill", "KIL", 1, pshader_texkill
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
716 {D3DSIO_TEX
, "tex", "undefined", 1, pshader_tex
, 0, D3DPS_VERSION(1,3)},
717 {D3DSIO_TEX
, "texld", GLNAME_REQUIRE_GLSL
, 2, pshader_texld
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
718 {D3DSIO_TEXBEM
, "texbem", "undefined", 2, pshader_texbem
, 0, D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXBEML
, "texbeml", GLNAME_REQUIRE_GLSL
, 2, pshader_texbeml
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_TEXREG2AR
,"texreg2ar","undefined", 2, pshader_texreg2ar
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
721 {D3DSIO_TEXREG2GB
,"texreg2gb","undefined", 2, pshader_texreg2gb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
722 {D3DSIO_TEXM3x2PAD
, "texm3x2pad", "undefined", 2, pshader_texm3x2pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
723 {D3DSIO_TEXM3x2TEX
, "texm3x2tex", "undefined", 2, pshader_texm3x2tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
724 {D3DSIO_TEXM3x3PAD
, "texm3x3pad", "undefined", 2, pshader_texm3x3pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
725 {D3DSIO_TEXM3x3DIFF
, "texm3x3diff", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3diff
, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
726 {D3DSIO_TEXM3x3SPEC
, "texm3x3spec", "undefined", 3, pshader_texm3x3spec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXM3x3VSPEC
, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXM3x3TEX
, "texm3x3tex", "undefined", 2, pshader_texm3x3tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
729 {D3DSIO_EXPP
, "expp", "EXP", 2, pshader_expp
, 0, 0},
730 {D3DSIO_LOGP
, "logp", "LOG", 2, pshader_logp
, 0, 0},
731 {D3DSIO_CND
, "cnd", GLNAME_REQUIRE_GLSL
, 4, pshader_cnd
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
732 /* def is a special operation */
733 {D3DSIO_DEF
, "def", "undefined", 5, pshader_def
, 0, 0},
734 {D3DSIO_TEXREG2RGB
, "texreg2rgb", GLNAME_REQUIRE_GLSL
, 2, pshader_texreg2rgb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
735 {D3DSIO_TEXDP3TEX
, "texdp3tex", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3tex
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
736 {D3DSIO_TEXM3x2DEPTH
, "texm3x2depth", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x2depth
,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
737 {D3DSIO_TEXDP3
, "texdp3", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
738 {D3DSIO_TEXM3x3
, "texm3x3", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
739 {D3DSIO_TEXDEPTH
, "texdepth", GLNAME_REQUIRE_GLSL
,1, pshader_texdepth
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
740 {D3DSIO_CMP
, "cmp", GLNAME_REQUIRE_GLSL
, 4, pshader_cmp
, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
741 {D3DSIO_BEM
, "bem", GLNAME_REQUIRE_GLSL
, 3, pshader_bem
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
742 /* TODO: dp2add can be made out of multiple instuctions */
743 {D3DSIO_DP2ADD
, "dp2add", GLNAME_REQUIRE_GLSL
, 2, pshader_dp2add
, 0, 0},
744 {D3DSIO_DSX
, "dsx", GLNAME_REQUIRE_GLSL
, 2, pshader_dsx
, 0, 0},
745 {D3DSIO_DSY
, "dsy", GLNAME_REQUIRE_GLSL
, 2, pshader_dsy
, 0, 0},
746 {D3DSIO_TEXLDD
, "texldd", GLNAME_REQUIRE_GLSL
, 2, pshader_texldd
, 0, 0},
747 {D3DSIO_SETP
, "setp", GLNAME_REQUIRE_GLSL
, 2, pshader_setp
, 0, 0},
748 {D3DSIO_TEXLDL
, "texdl", GLNAME_REQUIRE_GLSL
, 2, pshader_texldl
, 0, 0},
749 {D3DSIO_BREAKP
, "breakp", GLNAME_REQUIRE_GLSL
, 2, pshader_breakp
, 0, 0},
750 {D3DSIO_PHASE
, "phase", GLNAME_REQUIRE_GLSL
, 0, pshader_nop
, 0, 0},
751 {0, NULL
, NULL
, 0, NULL
, 0, 0}
755 inline static const SHADER_OPCODE
* pshader_program_get_opcode(const DWORD code
, const int version
) {
757 DWORD hex_version
= D3DPS_VERSION(version
/10, version
%10);
758 /** TODO: use dichotomic search */
759 while (NULL
!= pshader_ins
[i
].name
) {
760 if (((code
& D3DSI_OPCODE_MASK
) == pshader_ins
[i
].opcode
) &&
761 (((hex_version
>= pshader_ins
[i
].min_version
) && (hex_version
<= pshader_ins
[i
].max_version
)) ||
762 ((pshader_ins
[i
].min_version
== 0) && (pshader_ins
[i
].max_version
== 0)))) {
763 return &pshader_ins
[i
];
767 FIXME("Unsupported opcode %lx(%ld) masked %lx version %d\n", code
, code
, code
& D3DSI_OPCODE_MASK
, version
);
771 inline static BOOL
pshader_is_version_token(DWORD token
) {
772 return 0xFFFF0000 == (token
& 0xFFFF0000);
775 inline static BOOL
pshader_is_comment_token(DWORD token
) {
776 return D3DSIO_COMMENT
== (token
& D3DSI_OPCODE_MASK
);
780 inline static void get_register_name(const DWORD param
, char* regstr
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
781 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
783 DWORD reg
= param
& REGMASK
;
784 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
);
788 sprintf(regstr
, "R%lu", reg
);
792 strcpy(regstr
, "fragment.color.primary");
794 strcpy(regstr
, "fragment.color.secondary");
799 sprintf(regstr
, "C%lu", reg
);
801 sprintf(regstr
, "program.env[%lu]", reg
);
803 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
804 sprintf(regstr
,"T%lu", reg
);
807 sprintf(regstr
, "%s", rastout_reg_names
[reg
]);
810 sprintf(regstr
, "oD[%lu]", reg
);
812 case D3DSPR_TEXCRDOUT
:
813 sprintf(regstr
, "oT[%lu]", reg
);
816 FIXME("Unhandled register name Type(%ld)\n", regtype
);
821 inline static void get_write_mask(const DWORD output_reg
, char *write_mask
) {
823 if ((output_reg
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
824 strcat(write_mask
, ".");
825 if (output_reg
& D3DSP_WRITEMASK_0
) strcat(write_mask
, "r");
826 if (output_reg
& D3DSP_WRITEMASK_1
) strcat(write_mask
, "g");
827 if (output_reg
& D3DSP_WRITEMASK_2
) strcat(write_mask
, "b");
828 if (output_reg
& D3DSP_WRITEMASK_3
) strcat(write_mask
, "a");
832 inline static void get_input_register_swizzle(const DWORD instr
, char *swzstring
) {
833 static const char swizzle_reg_chars
[] = "rgba";
834 DWORD swizzle
= (instr
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
835 DWORD swizzle_x
= swizzle
& 0x03;
836 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
837 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
838 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
840 * swizzle bits fields:
844 if ((D3DSP_NOSWIZZLE
>> D3DSP_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
845 if (swizzle_x
== swizzle_y
&&
846 swizzle_x
== swizzle_z
&&
847 swizzle_x
== swizzle_w
) {
848 sprintf(swzstring
, ".%c", swizzle_reg_chars
[swizzle_x
]);
850 sprintf(swzstring
, ".%c%c%c%c",
851 swizzle_reg_chars
[swizzle_x
],
852 swizzle_reg_chars
[swizzle_y
],
853 swizzle_reg_chars
[swizzle_z
],
854 swizzle_reg_chars
[swizzle_w
]);
859 inline static void addline(unsigned int *lineNum
, char *pgm
, unsigned int *pgmLength
, char *line
) {
860 int lineLen
= strlen(line
);
861 if(lineLen
+ *pgmLength
> PGMSIZE
- 1 /* - 1 to allow a NULL at the end */) {
862 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE
, lineLen
+ *pgmLength
);
865 memcpy(pgm
+ *pgmLength
, line
, lineLen
);
868 *pgmLength
+= lineLen
;
870 TRACE("GL HW (%u, %u) : %s", *lineNum
, *pgmLength
, line
);
873 static const char* shift_tab
[] = {
874 "dummy", /* 0 (none) */
875 "coefmul.x", /* 1 (x2) */
876 "coefmul.y", /* 2 (x4) */
877 "coefmul.z", /* 3 (x8) */
878 "coefmul.w", /* 4 (x16) */
879 "dummy", /* 5 (x32) */
880 "dummy", /* 6 (x64) */
881 "dummy", /* 7 (x128) */
882 "dummy", /* 8 (d256) */
883 "dummy", /* 9 (d128) */
884 "dummy", /* 10 (d64) */
885 "dummy", /* 11 (d32) */
886 "coefdiv.w", /* 12 (d16) */
887 "coefdiv.z", /* 13 (d8) */
888 "coefdiv.y", /* 14 (d4) */
889 "coefdiv.x" /* 15 (d2) */
892 inline static void gen_output_modifier_line(int saturate
, char *write_mask
, int shift
, char *regstr
, char* line
) {
893 /* Generate a line that does the output modifier computation */
894 sprintf(line
, "MUL%s %s%s, %s, %s;", saturate
? "_SAT" : "", regstr
, write_mask
, regstr
, shift_tab
[shift
]);
897 inline static int gen_input_modifier_line(const DWORD instr
, int tmpreg
, char *outregstr
, char *line
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
898 /* Generate a line that does the input modifier computation and return the input register to use */
899 static char regstr
[256];
900 static char tmpline
[256];
903 /* Assume a new line will be added */
906 /* Get register name */
907 get_register_name(instr
, regstr
, constants
);
909 TRACE(" Register name %s\n", regstr
);
910 switch (instr
& D3DSP_SRCMOD_MASK
) {
912 strcpy(outregstr
, regstr
);
916 sprintf(outregstr
, "-%s", regstr
);
920 sprintf(line
, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg
, regstr
);
922 case D3DSPSM_BIASNEG
:
923 sprintf(line
, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg
, regstr
);
926 sprintf(line
, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg
, regstr
);
928 case D3DSPSM_SIGNNEG
:
929 sprintf(line
, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg
, regstr
);
932 sprintf(line
, "SUB T%c, one.x, %s;", 'A' + tmpreg
, regstr
);
935 sprintf(line
, "ADD T%c, %s, %s;", 'A' + tmpreg
, regstr
, regstr
);
938 sprintf(line
, "ADD T%c, -%s, -%s;", 'A' + tmpreg
, regstr
, regstr
);
941 sprintf(line
, "RCP T%c, %s.z;", 'A' + tmpreg
, regstr
);
942 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
943 strcat(line
, "\n"); /* Hack */
944 strcat(line
, tmpline
);
947 sprintf(line
, "RCP T%c, %s;", 'A' + tmpreg
, regstr
);
948 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
949 strcat(line
, "\n"); /* Hack */
950 strcat(line
, tmpline
);
953 strcpy(outregstr
, regstr
);
958 /* Substitute the register name */
959 sprintf(outregstr
, "T%c", 'A' + tmpreg
);
964 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
965 inline static VOID
IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
966 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
967 const DWORD
*pToken
= pFunction
;
968 const SHADER_OPCODE
*curOpcode
= NULL
;
971 unsigned lineNum
= 0; /* The line number of the generated program (for loging)*/
972 char *pgmStr
= NULL
; /* A pointer to the program data generated by this function */
974 DWORD nUseAddressRegister
= 0;
975 #if 0 /* TODO: loop register (just another address register ) */
976 BOOL hasLoops
= FALSE
;
979 BOOL saturate
; /* clamp to 0.0 -> 1.0*/
980 int row
= 0; /* not sure, something to do with macros? */
982 int version
= 0; /* The version of the shader */
984 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
985 unsigned int pgmLength
= 0;
987 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
988 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
989 if (This
->device
->fixupVertexBufferSize
< PGMSIZE
) {
990 HeapFree(GetProcessHeap(), 0, This
->fixupVertexBuffer
);
991 This
->fixupVertexBuffer
= HeapAlloc(GetProcessHeap() , 0, PGMSIZE
);
992 This
->fixupVertexBufferSize
= PGMSIZE
;
993 This
->fixupVertexBuffer
[0] = 0;
995 pgmStr
= This
->device
->fixupVertexBuffer
;
997 pgmStr
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, PGMSIZE
); /* 64kb should be enough */
1001 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1002 for(i
= 0; i
< WINED3D_PSHADER_MAX_CONSTANTS
; i
++)
1003 This
->constants
[i
] = 0;
1005 if (NULL
!= pToken
) {
1006 while (D3DPS_END() != *pToken
) {
1007 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1009 instructionSize
= pToken
& SIZEBITS
>> 27;
1012 if (pshader_is_version_token(*pToken
)) { /** version */
1016 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1017 version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
1019 TRACE("found version token ps.%lu.%lu;\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
1021 /* Each release of pixel shaders has had different numbers of temp registers */
1027 case 14: numTemps
=12;
1029 strcpy(tmpLine
, "!!ARBfp1.0\n");
1031 case 20: numTemps
=12;
1033 strcpy(tmpLine
, "!!ARBfp2.0\n");
1034 FIXME("No work done yet to support ps2.0 in hw\n");
1036 case 30: numTemps
=32;
1038 strcpy(tmpLine
, "!!ARBfp3.0\n");
1039 FIXME("No work done yet to support ps3.0 in hw\n");
1044 strcpy(tmpLine
, "!!ARBfp1.0\n");
1045 FIXME("Unrecognized pixel shader version!\n");
1047 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1049 /* TODO: find out how many registers are really needed */
1050 for(i
= 0; i
< 6; i
++) {
1051 sprintf(tmpLine
, "TEMP T%lu;\n", i
);
1052 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1055 for(i
= 0; i
< 6; i
++) {
1056 sprintf(tmpLine
, "TEMP R%lu;\n", i
);
1057 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1060 sprintf(tmpLine
, "TEMP TMP;\n");
1061 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1062 sprintf(tmpLine
, "TEMP TMP2;\n");
1063 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1064 sprintf(tmpLine
, "TEMP TA;\n");
1065 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1066 sprintf(tmpLine
, "TEMP TB;\n");
1067 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1068 sprintf(tmpLine
, "TEMP TC;\n");
1069 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1071 strcpy(tmpLine
, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1072 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1073 strcpy(tmpLine
, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1074 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1075 strcpy(tmpLine
, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1076 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1078 for(i
= 0; i
< 4; i
++) {
1079 sprintf(tmpLine
, "MOV T%lu, fragment.texcoord[%lu];\n", i
, i
);
1080 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1087 if (pshader_is_comment_token(*pToken
)) { /** comment */
1088 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1090 FIXME("#%s\n", (char*)pToken
);
1091 pToken
+= comment_len
;
1095 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1099 curOpcode
= pshader_program_get_opcode(*pToken
, version
);
1101 if (NULL
== curOpcode
) {
1102 /* unknown current opcode ... (shouldn't be any!) */
1103 while (*pToken
& 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1104 FIXME("unrecognized opcode: %08lx\n", *pToken
);
1107 } else if (GLNAME_REQUIRE_GLSL
== curOpcode
->glname
) {
1108 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1109 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode
->name
);
1110 pToken
+= curOpcode
->num_params
;
1112 TRACE("Found opcode %s %s\n", curOpcode
->name
, curOpcode
->glname
);
1115 /* Build opcode for GL vertex_program */
1116 switch (curOpcode
->opcode
) {
1121 /* Address registers must be loaded with the ARL instruction */
1122 if ((((*pToken
) & D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) == D3DSPR_ADDR
) {
1123 if (((*pToken
) & REGMASK
) < nUseAddressRegister
) {
1124 strcpy(tmpLine
, "ARL");
1127 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This
, ((*pToken
) & REGMASK
));
1152 case D3DSIO_TEXKILL
:
1153 TRACE("Appending glname %s to tmpLine\n", curOpcode
->glname
);
1154 strcpy(tmpLine
, curOpcode
->glname
);
1158 DWORD reg
= *pToken
& REGMASK
;
1159 sprintf(tmpLine
, "PARAM C%lu = { %f, %f, %f, %f };\n", reg
,
1160 *((const float *)(pToken
+ 1)),
1161 *((const float *)(pToken
+ 2)),
1162 *((const float *)(pToken
+ 3)),
1163 *((const float *)(pToken
+ 4)) );
1165 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1167 This
->constants
[reg
] = 1;
1175 get_write_mask(*pToken
, tmp
);
1176 if (version
!= 14) {
1177 DWORD reg
= *pToken
& REGMASK
;
1178 sprintf(tmpLine
,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg
, tmp
, reg
, reg
);
1179 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1183 DWORD reg1
= *pToken
& REGMASK
;
1184 DWORD reg2
= *++pToken
& REGMASK
;
1185 if (gen_input_modifier_line(*pToken
, 0, reg
, tmpLine
, This
->constants
)) {
1186 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1188 sprintf(tmpLine
,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1
, tmp
, reg
, reg2
);
1189 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1195 case D3DSIO_TEXCOORD
:
1198 get_write_mask(*pToken
, tmp
);
1199 if (version
!= 14) {
1200 DWORD reg
= *pToken
& REGMASK
;
1201 sprintf(tmpLine
, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg
, tmp
, reg
);
1202 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1205 DWORD reg1
= *pToken
& REGMASK
;
1206 DWORD reg2
= *++pToken
& REGMASK
;
1207 sprintf(tmpLine
, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1
, tmp
, reg2
);
1208 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1214 case D3DSIO_TEXM3x2PAD
:
1216 DWORD reg
= *pToken
& REGMASK
;
1218 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1219 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1221 sprintf(tmpLine
, "DP3 TMP.x, T%lu, %s;\n", reg
, buf
);
1222 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1227 case D3DSIO_TEXM3x2TEX
:
1229 DWORD reg
= *pToken
& REGMASK
;
1231 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1232 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1234 sprintf(tmpLine
, "DP3 TMP.y, T%lu, %s;\n", reg
, buf
);
1235 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1236 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg
, reg
);
1237 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1242 case D3DSIO_TEXREG2AR
:
1244 DWORD reg1
= *pToken
& REGMASK
;
1245 DWORD reg2
= *++pToken
& REGMASK
;
1246 sprintf(tmpLine
, "MOV TMP.r, T%lu.a;\n", reg2
);
1247 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1248 sprintf(tmpLine
, "MOV TMP.g, T%lu.r;\n", reg2
);
1249 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1250 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1251 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1256 case D3DSIO_TEXREG2GB
:
1258 DWORD reg1
= *pToken
& REGMASK
;
1259 DWORD reg2
= *++pToken
& REGMASK
;
1260 sprintf(tmpLine
, "MOV TMP.r, T%lu.g;\n", reg2
);
1261 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1262 sprintf(tmpLine
, "MOV TMP.g, T%lu.b;\n", reg2
);
1263 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1264 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1265 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1272 DWORD reg1
= *pToken
& REGMASK
;
1273 DWORD reg2
= *++pToken
& REGMASK
;
1275 /* FIXME: Should apply the BUMPMAPENV matrix */
1276 sprintf(tmpLine
, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1
, reg2
);
1277 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1278 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1279 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1284 case D3DSIO_TEXM3x3PAD
:
1286 DWORD reg
= *pToken
& REGMASK
;
1288 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1289 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1291 sprintf(tmpLine
, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row
, reg
, buf
);
1292 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1298 case D3DSIO_TEXM3x3TEX
:
1300 DWORD reg
= *pToken
& REGMASK
;
1302 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1303 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1306 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1307 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1309 /* Cubemap textures will be more used than 3D ones. */
1310 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1311 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1316 case D3DSIO_TEXM3x3VSPEC
:
1318 DWORD reg
= *pToken
& REGMASK
;
1320 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1321 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1323 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1324 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1326 /* Construct the eye-ray vector from w coordinates */
1327 sprintf(tmpLine
, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw
[0]);
1328 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1329 sprintf(tmpLine
, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw
[1]);
1330 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1331 sprintf(tmpLine
, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg
);
1332 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1334 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1335 sprintf(tmpLine
, "DP3 TMP.w, TMP, TMP2;\n");
1336 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1337 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;\n");
1338 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1339 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1340 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1342 /* Cubemap textures will be more used than 3D ones. */
1343 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1344 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1350 case D3DSIO_TEXM3x3SPEC
:
1352 DWORD reg
= *pToken
& REGMASK
;
1353 DWORD reg3
= *(pToken
+ 2) & REGMASK
;
1355 if (gen_input_modifier_line(*(pToken
+ 1), 0, buf
, tmpLine
, This
->constants
)) {
1356 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1358 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1359 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1361 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1362 sprintf(tmpLine
, "DP3 TMP.w, TMP, C[%lu];\n", reg3
);
1363 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1365 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;\n");
1366 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1367 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3
);
1368 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1370 /* Cubemap textures will be more used than 3D ones. */
1371 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1372 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1380 if (curOpcode
->glname
== GLNAME_REQUIRE_GLSL
) {
1381 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode
->name
);
1383 FIXME("Can't handle opcode %s in hwShader\n", curOpcode
->name
);
1385 pToken
+= curOpcode
->num_params
; /* maybe + 1 */
1389 if (0 != (*pToken
& D3DSP_DSTMOD_MASK
)) {
1390 DWORD mask
= *pToken
& D3DSP_DSTMOD_MASK
;
1392 case D3DSPDM_SATURATE
: saturate
= TRUE
; break;
1393 #if 0 /* as yet unhandled modifiers */
1394 case D3DSPDM_CENTROID
: centroid
= TRUE
; break;
1395 case D3DSPDM_PP
: partialpresision
= TRUE
; break;
1396 case D3DSPDM_X2
: X2
= TRUE
; break;
1397 case D3DSPDM_X4
: X4
= TRUE
; break;
1398 case D3DSPDM_X8
: X8
= TRUE
; break;
1399 case D3DSPDM_D2
: D2
= TRUE
; break;
1400 case D3DSPDM_D4
: D4
= TRUE
; break;
1401 case D3DSPDM_D8
: D8
= TRUE
; break;
1404 TRACE("_unhandled_modifier(0x%08lx)\n", mask
);
1408 /* Generate input and output registers */
1409 if (curOpcode
->num_params
> 0) {
1411 char operands
[4][100];
1415 TRACE("(%p): Opcode has %d params\n", This
, curOpcode
->num_params
);
1417 /* Generate lines that handle input modifier computation */
1418 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1419 TRACE("(%p) : Param %ld token %lx\n", This
, i
, *(pToken
+ i
));
1420 if (gen_input_modifier_line(*(pToken
+ i
), i
- 1, regs
[i
- 1], tmpOp
, This
->constants
)) {
1421 addline(&lineNum
, pgmStr
, &pgmLength
, tmpOp
);
1425 /* Handle saturation only when no shift is present in the output modifier */
1426 if ((*pToken
& D3DSPDM_SATURATE
) && (0 == (*pToken
& D3DSP_DSTSHIFT_MASK
)))
1429 /* Handle output register */
1430 get_register_name(*pToken
, tmpOp
, This
->constants
);
1431 strcpy(operands
[0], tmpOp
);
1432 get_write_mask(*pToken
, tmpOp
);
1433 strcat(operands
[0], tmpOp
);
1435 /* This function works because of side effects from gen_input_modifier_line */
1436 /* Handle input registers */
1437 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1438 TRACE("(%p) : Regs = %s\n", This
, regs
[i
- 1]);
1439 strcpy(operands
[i
], regs
[i
- 1]);
1440 get_input_register_swizzle(*(pToken
+ i
), swzstring
);
1441 strcat(operands
[i
], swzstring
);
1444 switch(curOpcode
->opcode
) {
1446 sprintf(tmpLine
, "CMP%s %s, %s, %s, %s;\n", (saturate
? "_SAT" : ""), operands
[0], operands
[1], operands
[3], operands
[2]);
1449 sprintf(tmpLine
, "ADD TMP, -%s, coefdiv.x;", operands
[1]);
1450 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1451 sprintf(tmpLine
, "CMP%s %s, TMP, %s, %s;\n", (saturate
? "_SAT" : ""), operands
[0], operands
[2], operands
[3]);
1455 strcat(tmpLine
, "_SAT");
1456 strcat(tmpLine
, " ");
1457 strcat(tmpLine
, operands
[0]);
1458 for (i
= 1; i
< curOpcode
->num_params
; i
++) {
1459 strcat(tmpLine
, ", ");
1460 strcat(tmpLine
, operands
[i
]);
1462 strcat(tmpLine
,";\n");
1464 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1465 pToken
+= curOpcode
->num_params
;
1467 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1468 if (curOpcode
->num_params
> 0) {
1469 DWORD param
= *(pInstr
+ 1);
1470 if (0 != (param
& D3DSP_DSTSHIFT_MASK
)) {
1472 /* Generate a line that handle the output modifier computation */
1474 char write_mask
[20];
1475 DWORD shift
= (param
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
1476 get_register_name(param
, regstr
, This
->constants
);
1477 get_write_mask(param
, write_mask
);
1478 gen_output_modifier_line(saturate
, write_mask
, shift
, regstr
, tmpLine
);
1479 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1485 /* TODO: What about result.depth? */
1486 strcpy(tmpLine
, "MOV result.color, R0;\n");
1487 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1489 strcpy(tmpLine
, "END\n");
1490 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1493 /* finally null terminate the pgmStr*/
1494 pgmStr
[pgmLength
] = 0;
1495 if (GL_SUPPORT(ARB_VERTEX_PROGRAM
)) {
1496 /* Create the hw shader */
1498 /* pgmStr sometimes gets too long for a normal TRACE */
1499 TRACE("Generated program:\n");
1500 if (TRACE_ON(d3d_shader
)) {
1501 fprintf(stderr
, "%s\n", pgmStr
);
1504 /* TODO: change to resource.glObjectHandel or something like that */
1505 GL_EXTCALL(glGenProgramsARB(1, &This
->prgId
));
1507 TRACE("Creating a hw pixel shader, prg=%d\n", This
->prgId
);
1508 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB
, This
->prgId
));
1510 TRACE("Created hw pixel shader, prg=%d\n", This
->prgId
);
1511 /* Create the program and check for errors */
1512 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB
, GL_PROGRAM_FORMAT_ASCII_ARB
, strlen(pgmStr
), pgmStr
));
1513 if (glGetError() == GL_INVALID_OPERATION
) {
1515 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB
, &errPos
);
1516 FIXME("HW PixelShader Error at position %d: %s\n",
1517 errPos
, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB
)));
1521 #if 1 /* if were using the data buffer of device then we don't need to free it */
1522 HeapFree(GetProcessHeap(), 0, pgmStr
);
1526 inline static void pshader_program_dump_ps_param(const DWORD param
, int input
) {
1527 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1528 static const char swizzle_reg_chars
[] = "rgba";
1530 /* the unknown mask is for bits not yet accounted for by any other mask... */
1531 #define UNKNOWN_MASK 0xC000
1533 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1534 #define EXTENDED_REG 0x1800
1536 DWORD reg
= param
& D3DSP_REGNUM_MASK
;
1537 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) | ((param
& EXTENDED_REG
) >> 8);
1540 if ( ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
) ||
1541 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_BIASNEG
) ||
1542 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_SIGNNEG
) ||
1543 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_X2NEG
) )
1545 else if ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_COMP
)
1549 switch (regtype
/* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1557 TRACE("c%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1560 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
1563 case D3DSPR_RASTOUT
:
1564 TRACE("%s", rastout_reg_names
[reg
]);
1566 case D3DSPR_ATTROUT
:
1567 TRACE("oD%lu", reg
);
1569 case D3DSPR_TEXCRDOUT
:
1570 TRACE("oT%lu", reg
);
1572 case D3DSPR_CONSTINT
:
1573 TRACE("i%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1575 case D3DSPR_CONSTBOOL
:
1576 TRACE("b%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1582 TRACE("aL%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1589 /** operand output */
1591 * for better debugging traces it's done into opcode dump code
1592 * @see pshader_program_dump_opcode
1593 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1594 DWORD mask = param & D3DSP_DSTMOD_MASK;
1596 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1598 TRACE("_unhandled_modifier(0x%08lx)", mask);
1601 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1602 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1604 TRACE("_x%u", 1 << shift);
1608 if ((param
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
1610 if (param
& D3DSP_WRITEMASK_0
) TRACE(".r");
1611 if (param
& D3DSP_WRITEMASK_1
) TRACE(".g");
1612 if (param
& D3DSP_WRITEMASK_2
) TRACE(".b");
1613 if (param
& D3DSP_WRITEMASK_3
) TRACE(".a");
1616 /** operand input */
1617 DWORD swizzle
= (param
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
1618 DWORD swizzle_r
= swizzle
& 0x03;
1619 DWORD swizzle_g
= (swizzle
>> 2) & 0x03;
1620 DWORD swizzle_b
= (swizzle
>> 4) & 0x03;
1621 DWORD swizzle_a
= (swizzle
>> 6) & 0x03;
1623 if (0 != (param
& D3DSP_SRCMOD_MASK
)) {
1624 DWORD mask
= param
& D3DSP_SRCMOD_MASK
;
1625 /*TRACE("_modifier(0x%08lx) ", mask);*/
1627 case D3DSPSM_NONE
: break;
1628 case D3DSPSM_NEG
: break;
1629 case D3DSPSM_BIAS
: TRACE("_bias"); break;
1630 case D3DSPSM_BIASNEG
: TRACE("_bias"); break;
1631 case D3DSPSM_SIGN
: TRACE("_bx2"); break;
1632 case D3DSPSM_SIGNNEG
: TRACE("_bx2"); break;
1633 case D3DSPSM_COMP
: break;
1634 case D3DSPSM_X2
: TRACE("_x2"); break;
1635 case D3DSPSM_X2NEG
: TRACE("_x2"); break;
1636 case D3DSPSM_DZ
: TRACE("_dz"); break;
1637 case D3DSPSM_DW
: TRACE("_dw"); break;
1639 TRACE("_unknown(0x%08lx)", mask
);
1644 * swizzle bits fields:
1647 if ((D3DVS_NOSWIZZLE
>> D3DVS_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1648 if (swizzle_r
== swizzle_g
&&
1649 swizzle_r
== swizzle_b
&&
1650 swizzle_r
== swizzle_a
) {
1651 TRACE(".%c", swizzle_reg_chars
[swizzle_r
]);
1654 swizzle_reg_chars
[swizzle_r
],
1655 swizzle_reg_chars
[swizzle_g
],
1656 swizzle_reg_chars
[swizzle_b
],
1657 swizzle_reg_chars
[swizzle_a
]);
1663 HRESULT WINAPI
IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
1664 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
1665 const DWORD
* pToken
= pFunction
;
1666 const SHADER_OPCODE
*curOpcode
= NULL
;
1670 TRACE("(%p) : Parsing programme\n", This
);
1672 if (NULL
!= pToken
) {
1673 while (D3DPS_END() != *pToken
) {
1674 if (pshader_is_version_token(*pToken
)) { /** version */
1675 version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
1676 TRACE("ps_%lu_%lu\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
1681 if (pshader_is_comment_token(*pToken
)) { /** comment */
1682 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1684 TRACE("//%s\n", (char*)pToken
);
1685 pToken
+= comment_len
;
1686 len
+= comment_len
+ 1;
1690 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This
);
1692 curOpcode
= pshader_program_get_opcode(*pToken
, version
);
1695 if (NULL
== curOpcode
) {
1697 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1698 while (*pToken
& 0x80000000) {
1700 /* unknown current opcode ... */
1701 TRACE("unrecognized opcode: %08lx", *pToken
);
1708 if (curOpcode
->opcode
== D3DSIO_DCL
) {
1710 switch(*pToken
& 0xFFFF) {
1711 case D3DDECLUSAGE_POSITION
:
1712 TRACE("%s%ld ", "position",(*pToken
& 0xF0000) >> 16);
1714 case D3DDECLUSAGE_BLENDINDICES
:
1715 TRACE("%s ", "blend");
1717 case D3DDECLUSAGE_BLENDWEIGHT
:
1718 TRACE("%s ", "weight");
1720 case D3DDECLUSAGE_NORMAL
:
1721 TRACE("%s%ld ", "normal",(*pToken
& 0xF0000) >> 16);
1723 case D3DDECLUSAGE_PSIZE
:
1724 TRACE("%s ", "psize");
1726 case D3DDECLUSAGE_COLOR
:
1727 if((*pToken
& 0xF0000) >> 16 == 0) {
1728 TRACE("%s ", "color");
1730 TRACE("%s%ld ", "specular", ((*pToken
& 0xF0000) >> 16) - 1);
1733 case D3DDECLUSAGE_TEXCOORD
:
1734 TRACE("%s%ld ", "texture", (*pToken
& 0xF0000) >> 16);
1736 case D3DDECLUSAGE_TANGENT
:
1737 TRACE("%s ", "tangent");
1739 case D3DDECLUSAGE_BINORMAL
:
1740 TRACE("%s ", "binormal");
1742 case D3DDECLUSAGE_TESSFACTOR
:
1743 TRACE("%s ", "tessfactor");
1745 case D3DDECLUSAGE_POSITIONT
:
1746 TRACE("%s%ld ", "positionT",(*pToken
& 0xF0000) >> 16);
1748 case D3DDECLUSAGE_FOG
:
1749 TRACE("%s ", "fog");
1751 case D3DDECLUSAGE_DEPTH
:
1752 TRACE("%s ", "depth");
1754 case D3DDECLUSAGE_SAMPLE
:
1755 TRACE("%s ", "sample");
1758 FIXME("Unrecognised dcl %08lx", *pToken
& 0xFFFF);
1762 pshader_program_dump_ps_param(*pToken
, 0);
1766 if (curOpcode
->opcode
== D3DSIO_DEF
) {
1767 TRACE("def c%lu = ", *pToken
& 0xFF);
1770 TRACE("%f ,", *(float *)pToken
);
1773 TRACE("%f ,", *(float *)pToken
);
1776 TRACE("%f ,", *(float *)pToken
);
1779 TRACE("%f", *(float *)pToken
);
1783 TRACE("%s ", curOpcode
->name
);
1784 if (curOpcode
->num_params
> 0) {
1785 pshader_program_dump_ps_param(*pToken
, 0);
1788 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1790 pshader_program_dump_ps_param(*pToken
, 1);
1799 This
->functionLength
= (len
+ 1) * sizeof(DWORD
);
1801 This
->functionLength
= 1; /* no Function defined use fixed function vertex processing */
1804 /* Generate HW shader in needed */
1805 if (NULL
!= pFunction
&& wined3d_settings
.vs_mode
== VS_HW
) {
1806 TRACE("(%p) : Generating hardware program\n", This
);
1808 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface
, pFunction
);
1812 TRACE("(%p) : Copying the function\n", This
);
1813 /* copy the function ... because it will certainly be released by application */
1814 if (NULL
!= pFunction
) {
1815 This
->function
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, This
->functionLength
);
1816 memcpy((void *)This
->function
, pFunction
, This
->functionLength
);
1818 This
->function
= NULL
;
1821 /* TODO: Some proper return values for failures */
1822 TRACE("(%p) : Returning D3D_OK\n", This
);
1826 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl
=
1828 /*** IUnknown methods ***/
1829 IWineD3DPixelShaderImpl_QueryInterface
,
1830 IWineD3DPixelShaderImpl_AddRef
,
1831 IWineD3DPixelShaderImpl_Release
,
1832 /*** IWineD3DPixelShader methods ***/
1833 IWineD3DPixelShaderImpl_GetParent
,
1834 IWineD3DPixelShaderImpl_GetDevice
,
1835 IWineD3DPixelShaderImpl_GetFunction
,
1836 /* not part of d3d */
1837 IWineD3DPixelShaderImpl_SetFunction