2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 # define TRACE_VSVECTOR(name)
40 /* The maximum size of the program */
43 #define REGMASK 0x00001FFF
45 #define GLNAME_REQUIRE_GLSL ((const char *)1)
46 /* *******************************************
47 IWineD3DPixelShader IUnknown parts follow
48 ******************************************* */
49 HRESULT WINAPI
IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader
*iface
, REFIID riid
, LPVOID
*ppobj
)
51 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
52 TRACE("(%p)->(%s,%p)\n",This
,debugstr_guid(riid
),ppobj
);
53 if (IsEqualGUID(riid
, &IID_IUnknown
)
54 || IsEqualGUID(riid
, &IID_IWineD3DBase
)
55 || IsEqualGUID(riid
, &IID_IWineD3DBaseShader
)
56 || IsEqualGUID(riid
, &IID_IWineD3DPixelShader
)) {
57 IUnknown_AddRef(iface
);
64 ULONG WINAPI
IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader
*iface
) {
65 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
66 TRACE("(%p) : AddRef increasing from %ld\n", This
, This
->ref
);
67 return InterlockedIncrement(&This
->ref
);
70 ULONG WINAPI
IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader
*iface
) {
71 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
73 TRACE("(%p) : Releasing from %ld\n", This
, This
->ref
);
74 ref
= InterlockedDecrement(&This
->ref
);
76 HeapFree(GetProcessHeap(), 0, This
);
81 /* TODO: At the momeny the function parser is single pass, it achievs this
82 by passing constants to a couple of functions where they are then modified.
83 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
84 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
87 /* *******************************************
88 IWineD3DPixelShader IWineD3DPixelShader parts follow
89 ******************************************* */
91 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader
*iface
, IUnknown
** parent
){
92 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
94 *parent
= This
->parent
;
95 IUnknown_AddRef(*parent
);
96 TRACE("(%p) : returning %p\n", This
, *parent
);
100 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader
* iface
, IWineD3DDevice
**pDevice
){
101 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
102 IWineD3DDevice_AddRef((IWineD3DDevice
*)This
->wineD3DDevice
);
103 *pDevice
= (IWineD3DDevice
*)This
->wineD3DDevice
;
104 TRACE("(%p) returning %p\n", This
, *pDevice
);
109 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader
* impl
, VOID
* pData
, UINT
* pSizeOfData
) {
110 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)impl
;
111 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This
, pData
, pSizeOfData
);
114 *pSizeOfData
= This
->baseShader
.functionLength
;
117 if (*pSizeOfData
< This
->baseShader
.functionLength
) {
118 *pSizeOfData
= This
->baseShader
.functionLength
;
119 return WINED3DERR_MOREDATA
;
121 if (NULL
== This
->baseShader
.function
) { /* no function defined */
122 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This
, pData
);
123 (*(DWORD
**) pData
) = NULL
;
125 if (This
->baseShader
.functionLength
== 0) {
128 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
129 memcpy(pData
, This
->baseShader
.function
, This
->baseShader
.functionLength
);
134 /*******************************
135 * pshader functions software VM
138 void pshader_add(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
139 d
->x
= s0
->x
+ s1
->x
;
140 d
->y
= s0
->y
+ s1
->y
;
141 d
->z
= s0
->z
+ s1
->z
;
142 d
->w
= s0
->w
+ s1
->w
;
143 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
144 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
147 void pshader_dp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
148 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
;
149 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
150 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
153 void pshader_dp4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
154 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
+ s0
->w
* s1
->w
;
155 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
156 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
159 void pshader_dst(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
161 d
->y
= s0
->y
* s1
->y
;
164 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
165 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
168 void pshader_expp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
174 tmp
.f
= floorf(s0
->w
);
175 d
->x
= powf(2.0f
, tmp
.f
);
176 d
->y
= s0
->w
- tmp
.f
;
177 tmp
.f
= powf(2.0f
, s0
->w
);
178 tmp
.d
&= 0xFFFFFF00U
;
181 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
182 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
185 void pshader_lit(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
187 d
->y
= (0.0f
< s0
->x
) ? s0
->x
: 0.0f
;
188 d
->z
= (0.0f
< s0
->x
&& 0.0f
< s0
->y
) ? powf(s0
->y
, s0
->w
) : 0.0f
;
190 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
191 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
194 void pshader_logp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
195 float tmp_f
= fabsf(s0
->w
);
196 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
197 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
198 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
201 void pshader_mad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
202 d
->x
= s0
->x
* s1
->x
+ s2
->x
;
203 d
->y
= s0
->y
* s1
->y
+ s2
->y
;
204 d
->z
= s0
->z
* s1
->z
+ s2
->z
;
205 d
->w
= s0
->w
* s1
->w
+ s2
->w
;
206 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
207 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, s2
->x
, s2
->y
, s2
->z
, s2
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
210 void pshader_max(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
211 d
->x
= (s0
->x
>= s1
->x
) ? s0
->x
: s1
->x
;
212 d
->y
= (s0
->y
>= s1
->y
) ? s0
->y
: s1
->y
;
213 d
->z
= (s0
->z
>= s1
->z
) ? s0
->z
: s1
->z
;
214 d
->w
= (s0
->w
>= s1
->w
) ? s0
->w
: s1
->w
;
215 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
216 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
219 void pshader_min(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
220 d
->x
= (s0
->x
< s1
->x
) ? s0
->x
: s1
->x
;
221 d
->y
= (s0
->y
< s1
->y
) ? s0
->y
: s1
->y
;
222 d
->z
= (s0
->z
< s1
->z
) ? s0
->z
: s1
->z
;
223 d
->w
= (s0
->w
< s1
->w
) ? s0
->w
: s1
->w
;
224 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
225 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
228 void pshader_mov(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
233 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
234 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
237 void pshader_mul(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
238 d
->x
= s0
->x
* s1
->x
;
239 d
->y
= s0
->y
* s1
->y
;
240 d
->z
= s0
->z
* s1
->z
;
241 d
->w
= s0
->w
* s1
->w
;
242 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
246 void pshader_nop(void) {
247 /* NOPPPP ahhh too easy ;) */
248 PSTRACE(("executing nop\n"));
251 void pshader_rcp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
252 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== s0
->w
) ? HUGE_VAL
: 1.0f
/ s0
->w
;
253 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
254 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
257 void pshader_rsq(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
258 float tmp_f
= fabsf(s0
->w
);
259 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== tmp_f
) ? HUGE_VAL
: ((1.0f
!= tmp_f
) ? 1.0f
/ sqrtf(tmp_f
) : 1.0f
);
260 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
261 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
264 void pshader_sge(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
265 d
->x
= (s0
->x
>= s1
->x
) ? 1.0f
: 0.0f
;
266 d
->y
= (s0
->y
>= s1
->y
) ? 1.0f
: 0.0f
;
267 d
->z
= (s0
->z
>= s1
->z
) ? 1.0f
: 0.0f
;
268 d
->w
= (s0
->w
>= s1
->w
) ? 1.0f
: 0.0f
;
269 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
270 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
273 void pshader_slt(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
274 d
->x
= (s0
->x
< s1
->x
) ? 1.0f
: 0.0f
;
275 d
->y
= (s0
->y
< s1
->y
) ? 1.0f
: 0.0f
;
276 d
->z
= (s0
->z
< s1
->z
) ? 1.0f
: 0.0f
;
277 d
->w
= (s0
->w
< s1
->w
) ? 1.0f
: 0.0f
;
278 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
279 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
282 void pshader_sub(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
283 d
->x
= s0
->x
- s1
->x
;
284 d
->y
= s0
->y
- s1
->y
;
285 d
->z
= s0
->z
- s1
->z
;
286 d
->w
= s0
->w
- s1
->w
;
287 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
288 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
292 * Version 1.1 specific
295 void pshader_exp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
296 d
->x
= d
->y
= d
->z
= d
->w
= powf(2.0f
, s0
->w
);
297 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
298 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
301 void pshader_log(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
302 float tmp_f
= fabsf(s0
->w
);
303 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
304 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
305 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
308 void pshader_frc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
309 d
->x
= s0
->x
- floorf(s0
->x
);
310 d
->y
= s0
->y
- floorf(s0
->y
);
313 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
314 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
317 typedef FLOAT D3DMATRIX44
[4][4];
318 typedef FLOAT D3DMATRIX43
[4][3];
319 typedef FLOAT D3DMATRIX34
[3][4];
320 typedef FLOAT D3DMATRIX33
[3][3];
321 typedef FLOAT D3DMATRIX23
[2][3];
323 void pshader_m4x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat
) {
325 * Buggy CODE: here only if cast not work for copy/paste
326 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
327 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
328 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
329 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
330 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
331 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
332 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
334 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
335 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
336 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
337 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
+ mat
[3][3] * s0
->w
;
338 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
339 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
340 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
341 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], mat
[3][3], s0
->w
, d
->w
));
344 void pshader_m4x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX34 mat
) {
345 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
346 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
347 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
349 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
350 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
351 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
352 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0
->w
, d
->w
));
355 void pshader_m3x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX43 mat
) {
356 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
357 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
358 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
359 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
;
360 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
361 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
362 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
363 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], s0
->w
, d
->w
));
366 void pshader_m3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX33 mat
) {
367 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
368 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
369 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
371 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
372 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
373 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
374 PSTRACE(("executing m3x3(4): (%f) \n", d
->w
));
377 void pshader_m3x2(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX23 mat
) {
379 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
380 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
386 * Version 2.0 specific
388 void pshader_lrp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
389 d
->x
= s0
->x
* (s1
->x
- s2
->x
) + s2
->x
;
390 d
->y
= s0
->y
* (s1
->y
- s2
->y
) + s2
->y
;
391 d
->z
= s0
->z
* (s1
->z
- s2
->z
) + s2
->z
;
392 d
->w
= s0
->w
* (s1
->w
- s2
->w
) + s2
->w
;
395 void pshader_crs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
396 d
->x
= s0
->y
* s1
->z
- s0
->z
* s1
->y
;
397 d
->y
= s0
->z
* s1
->x
- s0
->x
* s1
->z
;
398 d
->z
= s0
->x
* s1
->y
- s0
->y
* s1
->x
;
399 d
->w
= 0.9f
; /* w is undefined, so set it to something safeish */
401 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
402 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
405 void pshader_abs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
410 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
411 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
415 void pshader_texcoord(WINED3DSHADERVECTOR
* d
) {
419 void pshader_texkill(WINED3DSHADERVECTOR
* d
) {
423 void pshader_tex(WINED3DSHADERVECTOR
* d
) {
426 void pshader_texld(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
430 void pshader_texbem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
434 void pshader_texbeml(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
438 void pshader_texreg2ar(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
442 void pshader_texreg2gb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
446 void pshader_texm3x2pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
450 void pshader_texm3x2tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
454 void pshader_texm3x3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
458 void pshader_texm3x3pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
462 void pshader_texm3x3diff(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
466 void pshader_texm3x3spec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
470 void pshader_texm3x3vspec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
474 void pshader_cnd(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
478 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
479 void pshader_def(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
, WINED3DSHADERVECTOR
* s3
) {
483 void pshader_texreg2rgb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
487 void pshader_texdp3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
491 void pshader_texm3x2depth(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
495 void pshader_texdp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
499 void pshader_texm3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
503 void pshader_texdepth(WINED3DSHADERVECTOR
* d
) {
507 void pshader_cmp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
511 void pshader_bem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
515 void pshader_call(WINED3DSHADERVECTOR
* d
) {
519 void pshader_callnz(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
523 void pshader_loop(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
527 void pshader_ret(WINED3DSHADERVECTOR
* d
) {
531 void pshader_endloop(WINED3DSHADERVECTOR
* d
) {
535 void pshader_dcl(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
539 void pshader_pow(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
543 void pshader_sng(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
547 void pshader_nrm(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
551 void pshader_sincos(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
555 void pshader_rep(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
559 void pshader_endrep(void) {
563 void pshader_if(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
567 void pshader_ifc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
571 void pshader_else(WINED3DSHADERVECTOR
* d
) {
575 void pshader_label(WINED3DSHADERVECTOR
* d
) {
579 void pshader_endif(WINED3DSHADERVECTOR
* d
) {
583 void pshader_break(WINED3DSHADERVECTOR
* d
) {
587 void pshader_breakc(WINED3DSHADERVECTOR
* d
) {
591 void pshader_mova(WINED3DSHADERVECTOR
* d
) {
595 void pshader_defb(WINED3DSHADERVECTOR
* d
) {
599 void pshader_defi(WINED3DSHADERVECTOR
* d
) {
603 void pshader_dp2add(WINED3DSHADERVECTOR
* d
) {
607 void pshader_dsx(WINED3DSHADERVECTOR
* d
) {
611 void pshader_dsy(WINED3DSHADERVECTOR
* d
) {
615 void pshader_texldd(WINED3DSHADERVECTOR
* d
) {
619 void pshader_setp(WINED3DSHADERVECTOR
* d
) {
623 void pshader_texldl(WINED3DSHADERVECTOR
* d
) {
627 void pshader_breakp(WINED3DSHADERVECTOR
* d
) {
631 * log, exp, frc, m*x* seems to be macros ins ... to see
633 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins
[] = {
634 {D3DSIO_NOP
, "nop", "NOP", 0, pshader_nop
, 0, 0},
635 {D3DSIO_MOV
, "mov", "MOV", 2, pshader_mov
, 0, 0},
636 {D3DSIO_ADD
, "add", "ADD", 3, pshader_add
, 0, 0},
637 {D3DSIO_SUB
, "sub", "SUB", 3, pshader_sub
, 0, 0},
638 {D3DSIO_MAD
, "mad", "MAD", 4, pshader_mad
, 0, 0},
639 {D3DSIO_MUL
, "mul", "MUL", 3, pshader_mul
, 0, 0},
640 {D3DSIO_RCP
, "rcp", "RCP", 2, pshader_rcp
, 0, 0},
641 {D3DSIO_RSQ
, "rsq", "RSQ", 2, pshader_rsq
, 0, 0},
642 {D3DSIO_DP3
, "dp3", "DP3", 3, pshader_dp3
, 0, 0},
643 {D3DSIO_DP4
, "dp4", "DP4", 3, pshader_dp4
, 0, 0},
644 {D3DSIO_MIN
, "min", "MIN", 3, pshader_min
, 0, 0},
645 {D3DSIO_MAX
, "max", "MAX", 3, pshader_max
, 0, 0},
646 {D3DSIO_SLT
, "slt", "SLT", 3, pshader_slt
, 0, 0},
647 {D3DSIO_SGE
, "sge", "SGE", 3, pshader_sge
, 0, 0},
648 {D3DSIO_ABS
, "abs", "ABS", 2, pshader_abs
, 0, 0},
649 {D3DSIO_EXP
, "exp", "EX2", 2, pshader_exp
, 0, 0},
650 {D3DSIO_LOG
, "log", "LG2", 2, pshader_log
, 0, 0},
651 {D3DSIO_LIT
, "lit", "LIT", 2, pshader_lit
, 0, 0},
652 {D3DSIO_DST
, "dst", "DST", 3, pshader_dst
, 0, 0},
653 {D3DSIO_LRP
, "lrp", "LRP", 4, pshader_lrp
, 0, 0},
654 {D3DSIO_FRC
, "frc", "FRC", 2, pshader_frc
, 0, 0},
655 {D3DSIO_M4x4
, "m4x4", "undefined", 3, pshader_m4x4
, 0, 0},
656 {D3DSIO_M4x3
, "m4x3", "undefined", 3, pshader_m4x3
, 0, 0},
657 {D3DSIO_M3x4
, "m3x4", "undefined", 3, pshader_m3x4
, 0, 0},
658 {D3DSIO_M3x3
, "m3x3", "undefined", 3, pshader_m3x3
, 0, 0},
659 {D3DSIO_M3x2
, "m3x2", "undefined", 3, pshader_m3x2
, 0, 0},
662 /** FIXME: use direct access so add the others opcodes as stubs */
663 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
664 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
665 {D3DSIO_CALL
, "call", GLNAME_REQUIRE_GLSL
, 1, pshader_call
, 0, 0},
666 {D3DSIO_CALLNZ
, "callnz", GLNAME_REQUIRE_GLSL
, 2, pshader_callnz
, 0, 0},
667 {D3DSIO_LOOP
, "loop", GLNAME_REQUIRE_GLSL
, 2, pshader_loop
, 0, 0},
668 {D3DSIO_RET
, "ret", GLNAME_REQUIRE_GLSL
, 0, pshader_ret
, 0, 0},
669 {D3DSIO_ENDLOOP
, "endloop", GLNAME_REQUIRE_GLSL
, 0, pshader_endloop
, 0, 0},
670 {D3DSIO_LABEL
, "label", GLNAME_REQUIRE_GLSL
, 1, pshader_label
, 0, 0},
671 /* DCL is a specil operation */
672 {D3DSIO_DCL
, "dcl", NULL
, 1, pshader_dcl
, 0, 0},
673 {D3DSIO_POW
, "pow", "POW", 3, pshader_pow
, 0, 0},
674 {D3DSIO_CRS
, "crs", "XPS", 3, pshader_crs
, 0, 0},
675 /* TODO: sng can possibly be performed as
678 {D3DSIO_SGN
, "sng", NULL
, 2, pshader_sng
, 0, 0},
679 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
682 MUL vec.xyz, vec, tmp;
683 but I think this is better because it accounts for w properly.
689 {D3DSIO_NRM
, "nrm", NULL
, 2, pshader_nrm
, 0, 0},
690 {D3DSIO_SINCOS
, "sincos", NULL
, 2, pshader_sincos
, 0, 0},
691 {D3DSIO_REP
, "rep", GLNAME_REQUIRE_GLSL
, 2, pshader_rep
, 0, 0},
692 {D3DSIO_ENDREP
, "endrep", GLNAME_REQUIRE_GLSL
, 0, pshader_endrep
, 0, 0},
693 {D3DSIO_IF
, "if", GLNAME_REQUIRE_GLSL
, 2, pshader_if
, 0, 0},
694 {D3DSIO_IFC
, "ifc", GLNAME_REQUIRE_GLSL
, 2, pshader_ifc
, 0, 0},
695 {D3DSIO_ELSE
, "else", GLNAME_REQUIRE_GLSL
, 2, pshader_else
, 0, 0},
696 {D3DSIO_ENDIF
, "endif", GLNAME_REQUIRE_GLSL
, 2, pshader_endif
, 0, 0},
697 {D3DSIO_BREAK
, "break", GLNAME_REQUIRE_GLSL
, 2, pshader_break
, 0, 0},
698 {D3DSIO_BREAKC
, "breakc", GLNAME_REQUIRE_GLSL
, 2, pshader_breakc
, 0, 0},
699 {D3DSIO_MOVA
, "mova", GLNAME_REQUIRE_GLSL
, 2, pshader_mova
, 0, 0},
700 {D3DSIO_DEFB
, "defb", GLNAME_REQUIRE_GLSL
, 2, pshader_defb
, 0, 0},
701 {D3DSIO_DEFI
, "defi", GLNAME_REQUIRE_GLSL
, 2, pshader_defi
, 0, 0},
703 {D3DSIO_TEXCOORD
, "texcoord", "undefined", 1, pshader_texcoord
, 0, D3DPS_VERSION(1,3)},
704 {D3DSIO_TEXCOORD
, "texcrd", "undefined", 2, pshader_texcoord
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
705 {D3DSIO_TEXKILL
, "texkill", "KIL", 1, pshader_texkill
, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
706 {D3DSIO_TEX
, "tex", "undefined", 1, pshader_tex
, 0, D3DPS_VERSION(1,3)},
707 {D3DSIO_TEX
, "texld", "undefined", 2, pshader_texld
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
708 {D3DSIO_TEX
, "texld", GLNAME_REQUIRE_GLSL
, 3, pshader_texld
, D3DPS_VERSION(2,0), -1},
709 {D3DSIO_TEXBEM
, "texbem", "undefined", 2, pshader_texbem
, 0, D3DPS_VERSION(1,3)},
710 {D3DSIO_TEXBEML
, "texbeml", GLNAME_REQUIRE_GLSL
, 2, pshader_texbeml
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
711 {D3DSIO_TEXREG2AR
,"texreg2ar","undefined", 2, pshader_texreg2ar
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
712 {D3DSIO_TEXREG2GB
,"texreg2gb","undefined", 2, pshader_texreg2gb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXM3x2PAD
, "texm3x2pad", "undefined", 2, pshader_texm3x2pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXM3x2TEX
, "texm3x2tex", "undefined", 2, pshader_texm3x2tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
715 {D3DSIO_TEXM3x3PAD
, "texm3x3pad", "undefined", 2, pshader_texm3x3pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
716 {D3DSIO_TEXM3x3DIFF
, "texm3x3diff", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3diff
, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
717 {D3DSIO_TEXM3x3SPEC
, "texm3x3spec", "undefined", 3, pshader_texm3x3spec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
718 {D3DSIO_TEXM3x3VSPEC
, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXM3x3TEX
, "texm3x3tex", "undefined", 2, pshader_texm3x3tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_EXPP
, "expp", "EXP", 2, pshader_expp
, 0, 0},
721 {D3DSIO_LOGP
, "logp", "LOG", 2, pshader_logp
, 0, 0},
722 {D3DSIO_CND
, "cnd", GLNAME_REQUIRE_GLSL
, 4, pshader_cnd
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
723 /* def is a special operation */
724 {D3DSIO_DEF
, "def", "undefined", 5, pshader_def
, 0, 0},
725 {D3DSIO_TEXREG2RGB
, "texreg2rgb", GLNAME_REQUIRE_GLSL
, 2, pshader_texreg2rgb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
726 {D3DSIO_TEXDP3TEX
, "texdp3tex", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3tex
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXM3x2DEPTH
, "texm3x2depth", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x2depth
,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXDP3
, "texdp3", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
729 {D3DSIO_TEXM3x3
, "texm3x3", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
730 {D3DSIO_TEXDEPTH
, "texdepth", GLNAME_REQUIRE_GLSL
,1, pshader_texdepth
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
731 {D3DSIO_CMP
, "cmp", GLNAME_REQUIRE_GLSL
, 4, pshader_cmp
, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
732 {D3DSIO_BEM
, "bem", GLNAME_REQUIRE_GLSL
, 3, pshader_bem
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
733 /* TODO: dp2add can be made out of multiple instuctions */
734 {D3DSIO_DP2ADD
, "dp2add", GLNAME_REQUIRE_GLSL
, 2, pshader_dp2add
, 0, 0},
735 {D3DSIO_DSX
, "dsx", GLNAME_REQUIRE_GLSL
, 2, pshader_dsx
, 0, 0},
736 {D3DSIO_DSY
, "dsy", GLNAME_REQUIRE_GLSL
, 2, pshader_dsy
, 0, 0},
737 {D3DSIO_TEXLDD
, "texldd", GLNAME_REQUIRE_GLSL
, 2, pshader_texldd
, 0, 0},
738 {D3DSIO_SETP
, "setp", GLNAME_REQUIRE_GLSL
, 2, pshader_setp
, 0, 0},
739 {D3DSIO_TEXLDL
, "texdl", GLNAME_REQUIRE_GLSL
, 2, pshader_texldl
, 0, 0},
740 {D3DSIO_BREAKP
, "breakp", GLNAME_REQUIRE_GLSL
, 2, pshader_breakp
, 0, 0},
741 {D3DSIO_PHASE
, "phase", GLNAME_REQUIRE_GLSL
, 0, pshader_nop
, 0, 0},
742 {0, NULL
, NULL
, 0, NULL
, 0, 0}
746 inline static const SHADER_OPCODE
* pshader_program_get_opcode(IWineD3DPixelShaderImpl
*This
, const DWORD code
) {
748 DWORD version
= This
->baseShader
.version
;
749 DWORD hex_version
= D3DPS_VERSION(version
/10, version
%10);
750 const SHADER_OPCODE
*shader_ins
= This
->baseShader
.shader_ins
;
752 /** TODO: use dichotomic search */
753 while (NULL
!= shader_ins
[i
].name
) {
754 if (((code
& D3DSI_OPCODE_MASK
) == shader_ins
[i
].opcode
) &&
755 (((hex_version
>= shader_ins
[i
].min_version
) && (hex_version
<= shader_ins
[i
].max_version
)) ||
756 ((shader_ins
[i
].min_version
== 0) && (shader_ins
[i
].max_version
== 0)))) {
757 return &shader_ins
[i
];
761 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code
, code
, code
& D3DSI_OPCODE_MASK
, version
);
765 inline static BOOL
pshader_is_version_token(DWORD token
) {
766 return 0xFFFF0000 == (token
& 0xFFFF0000);
769 inline static BOOL
pshader_is_comment_token(DWORD token
) {
770 return D3DSIO_COMMENT
== (token
& D3DSI_OPCODE_MASK
);
774 inline static void get_register_name(const DWORD param
, char* regstr
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
775 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
777 DWORD reg
= param
& REGMASK
;
778 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
);
782 sprintf(regstr
, "R%lu", reg
);
786 strcpy(regstr
, "fragment.color.primary");
788 strcpy(regstr
, "fragment.color.secondary");
793 sprintf(regstr
, "C%lu", reg
);
795 sprintf(regstr
, "program.env[%lu]", reg
);
797 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
798 sprintf(regstr
,"T%lu", reg
);
801 sprintf(regstr
, "%s", rastout_reg_names
[reg
]);
804 sprintf(regstr
, "oD[%lu]", reg
);
806 case D3DSPR_TEXCRDOUT
:
807 sprintf(regstr
, "oT[%lu]", reg
);
810 FIXME("Unhandled register name Type(%ld)\n", regtype
);
815 inline static void get_write_mask(const DWORD output_reg
, char *write_mask
) {
817 if ((output_reg
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
818 strcat(write_mask
, ".");
819 if (output_reg
& D3DSP_WRITEMASK_0
) strcat(write_mask
, "r");
820 if (output_reg
& D3DSP_WRITEMASK_1
) strcat(write_mask
, "g");
821 if (output_reg
& D3DSP_WRITEMASK_2
) strcat(write_mask
, "b");
822 if (output_reg
& D3DSP_WRITEMASK_3
) strcat(write_mask
, "a");
826 inline static void get_input_register_swizzle(const DWORD instr
, char *swzstring
) {
827 static const char swizzle_reg_chars
[] = "rgba";
828 DWORD swizzle
= (instr
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
829 DWORD swizzle_x
= swizzle
& 0x03;
830 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
831 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
832 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
834 * swizzle bits fields:
838 if ((D3DSP_NOSWIZZLE
>> D3DSP_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
839 if (swizzle_x
== swizzle_y
&&
840 swizzle_x
== swizzle_z
&&
841 swizzle_x
== swizzle_w
) {
842 sprintf(swzstring
, ".%c", swizzle_reg_chars
[swizzle_x
]);
844 sprintf(swzstring
, ".%c%c%c%c",
845 swizzle_reg_chars
[swizzle_x
],
846 swizzle_reg_chars
[swizzle_y
],
847 swizzle_reg_chars
[swizzle_z
],
848 swizzle_reg_chars
[swizzle_w
]);
853 inline static void addline(unsigned int *lineNum
, char *pgm
, unsigned int *pgmLength
, char *line
) {
854 int lineLen
= strlen(line
);
855 if(lineLen
+ *pgmLength
> PGMSIZE
- 1 /* - 1 to allow a NULL at the end */) {
856 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE
, lineLen
+ *pgmLength
);
859 memcpy(pgm
+ *pgmLength
, line
, lineLen
);
862 *pgmLength
+= lineLen
;
864 TRACE("GL HW (%u, %u) : %s", *lineNum
, *pgmLength
, line
);
867 static const char* shift_tab
[] = {
868 "dummy", /* 0 (none) */
869 "coefmul.x", /* 1 (x2) */
870 "coefmul.y", /* 2 (x4) */
871 "coefmul.z", /* 3 (x8) */
872 "coefmul.w", /* 4 (x16) */
873 "dummy", /* 5 (x32) */
874 "dummy", /* 6 (x64) */
875 "dummy", /* 7 (x128) */
876 "dummy", /* 8 (d256) */
877 "dummy", /* 9 (d128) */
878 "dummy", /* 10 (d64) */
879 "dummy", /* 11 (d32) */
880 "coefdiv.w", /* 12 (d16) */
881 "coefdiv.z", /* 13 (d8) */
882 "coefdiv.y", /* 14 (d4) */
883 "coefdiv.x" /* 15 (d2) */
886 inline static void gen_output_modifier_line(int saturate
, char *write_mask
, int shift
, char *regstr
, char* line
) {
887 /* Generate a line that does the output modifier computation */
888 sprintf(line
, "MUL%s %s%s, %s, %s;", saturate
? "_SAT" : "", regstr
, write_mask
, regstr
, shift_tab
[shift
]);
891 inline static int gen_input_modifier_line(const DWORD instr
, int tmpreg
, char *outregstr
, char *line
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
892 /* Generate a line that does the input modifier computation and return the input register to use */
893 static char regstr
[256];
894 static char tmpline
[256];
897 /* Assume a new line will be added */
900 /* Get register name */
901 get_register_name(instr
, regstr
, constants
);
903 TRACE(" Register name %s\n", regstr
);
904 switch (instr
& D3DSP_SRCMOD_MASK
) {
906 strcpy(outregstr
, regstr
);
910 sprintf(outregstr
, "-%s", regstr
);
914 sprintf(line
, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg
, regstr
);
916 case D3DSPSM_BIASNEG
:
917 sprintf(line
, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg
, regstr
);
920 sprintf(line
, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg
, regstr
);
922 case D3DSPSM_SIGNNEG
:
923 sprintf(line
, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg
, regstr
);
926 sprintf(line
, "SUB T%c, one.x, %s;", 'A' + tmpreg
, regstr
);
929 sprintf(line
, "ADD T%c, %s, %s;", 'A' + tmpreg
, regstr
, regstr
);
932 sprintf(line
, "ADD T%c, -%s, -%s;", 'A' + tmpreg
, regstr
, regstr
);
935 sprintf(line
, "RCP T%c, %s.z;", 'A' + tmpreg
, regstr
);
936 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
937 strcat(line
, "\n"); /* Hack */
938 strcat(line
, tmpline
);
941 sprintf(line
, "RCP T%c, %s;", 'A' + tmpreg
, regstr
);
942 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
943 strcat(line
, "\n"); /* Hack */
944 strcat(line
, tmpline
);
947 strcpy(outregstr
, regstr
);
952 /* Substitute the register name */
953 sprintf(outregstr
, "T%c", 'A' + tmpreg
);
958 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
959 inline static VOID
IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
960 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
961 const DWORD
*pToken
= pFunction
;
962 const SHADER_OPCODE
*curOpcode
= NULL
;
965 unsigned lineNum
= 0; /* The line number of the generated program (for loging)*/
966 char *pgmStr
= NULL
; /* A pointer to the program data generated by this function */
968 #if 0 /* TODO: loop register (just another address register ) */
969 BOOL hasLoops
= FALSE
;
972 BOOL saturate
; /* clamp to 0.0 -> 1.0*/
973 int row
= 0; /* not sure, something to do with macros? */
975 int version
= 0; /* The version of the shader */
977 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
978 unsigned int pgmLength
= 0;
980 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
981 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
982 if (This
->device
->fixupVertexBufferSize
< PGMSIZE
) {
983 HeapFree(GetProcessHeap(), 0, This
->fixupVertexBuffer
);
984 This
->fixupVertexBuffer
= HeapAlloc(GetProcessHeap() , 0, PGMSIZE
);
985 This
->fixupVertexBufferSize
= PGMSIZE
;
986 This
->fixupVertexBuffer
[0] = 0;
988 pgmStr
= This
->device
->fixupVertexBuffer
;
990 pgmStr
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, PGMSIZE
); /* 64kb should be enough */
994 /* TODO: Think about using a first pass to work out what's required for the second pass. */
995 for(i
= 0; i
< WINED3D_PSHADER_MAX_CONSTANTS
; i
++)
996 This
->constants
[i
] = 0;
998 if (NULL
!= pToken
) {
999 while (D3DPS_END() != *pToken
) {
1000 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1002 instructionSize
= pToken
& SIZEBITS
>> 27;
1005 if (pshader_is_version_token(*pToken
)) { /** version */
1009 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1010 version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
1012 TRACE("found version token ps.%lu.%lu;\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
1014 /* Each release of pixel shaders has had different numbers of temp registers */
1020 case 14: numTemps
=12;
1022 strcpy(tmpLine
, "!!ARBfp1.0\n");
1024 case 20: numTemps
=12;
1026 strcpy(tmpLine
, "!!ARBfp2.0\n");
1027 FIXME("No work done yet to support ps2.0 in hw\n");
1029 case 30: numTemps
=32;
1031 strcpy(tmpLine
, "!!ARBfp3.0\n");
1032 FIXME("No work done yet to support ps3.0 in hw\n");
1037 strcpy(tmpLine
, "!!ARBfp1.0\n");
1038 FIXME("Unrecognized pixel shader version!\n");
1040 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1042 /* TODO: find out how many registers are really needed */
1043 for(i
= 0; i
< 6; i
++) {
1044 sprintf(tmpLine
, "TEMP T%lu;\n", i
);
1045 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1048 for(i
= 0; i
< 6; i
++) {
1049 sprintf(tmpLine
, "TEMP R%lu;\n", i
);
1050 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1053 sprintf(tmpLine
, "TEMP TMP;\n");
1054 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1055 sprintf(tmpLine
, "TEMP TMP2;\n");
1056 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1057 sprintf(tmpLine
, "TEMP TA;\n");
1058 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1059 sprintf(tmpLine
, "TEMP TB;\n");
1060 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1061 sprintf(tmpLine
, "TEMP TC;\n");
1062 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1064 strcpy(tmpLine
, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1065 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1066 strcpy(tmpLine
, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1067 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1068 strcpy(tmpLine
, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1069 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1071 for(i
= 0; i
< 4; i
++) {
1072 sprintf(tmpLine
, "MOV T%lu, fragment.texcoord[%lu];\n", i
, i
);
1073 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1080 if (pshader_is_comment_token(*pToken
)) { /** comment */
1081 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1083 TRACE("#%s\n", (char*)pToken
);
1084 pToken
+= comment_len
;
1088 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1092 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
1094 if (NULL
== curOpcode
) {
1095 /* unknown current opcode ... (shouldn't be any!) */
1096 while (*pToken
& 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1097 FIXME("unrecognized opcode: %08lx\n", *pToken
);
1100 } else if (GLNAME_REQUIRE_GLSL
== curOpcode
->glname
) {
1101 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1102 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode
->name
);
1103 pToken
+= curOpcode
->num_params
;
1105 } else if (D3DSIO_DEF
== curOpcode
->opcode
) {
1107 /* Handle definitions here, they don't fit well with the
1108 * other instructions below [for now ] */
1110 DWORD reg
= *pToken
& REGMASK
;
1112 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1113 curOpcode
->name
, curOpcode
->glname
, curOpcode
->num_params
);
1115 sprintf(tmpLine
, "PARAM C%lu = { %f, %f, %f, %f };\n", reg
,
1116 *((const float *)(pToken
+ 1)),
1117 *((const float *)(pToken
+ 2)),
1118 *((const float *)(pToken
+ 3)),
1119 *((const float *)(pToken
+ 4)) );
1121 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1123 This
->constants
[reg
] = 1;
1129 /* Common processing: [inst] [dst] [src]* */
1132 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1133 curOpcode
->name
, curOpcode
->glname
, curOpcode
->num_params
);
1137 /* Build opcode for GL vertex_program */
1138 switch (curOpcode
->opcode
) {
1165 case D3DSIO_TEXKILL
:
1166 TRACE("Appending glname %s to tmpLine\n", curOpcode
->glname
);
1167 strcpy(tmpLine
, curOpcode
->glname
);
1172 get_write_mask(*pToken
, tmp
);
1173 if (version
!= 14) {
1174 DWORD reg
= *pToken
& REGMASK
;
1175 sprintf(tmpLine
,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg
, tmp
, reg
, reg
);
1176 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1180 DWORD reg1
= *pToken
& REGMASK
;
1181 if (gen_input_modifier_line(*++pToken
, 0, reg2
, tmpLine
, This
->constants
)) {
1182 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1184 sprintf(tmpLine
,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1
, tmp
, reg2
, reg1
);
1185 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1191 case D3DSIO_TEXCOORD
:
1194 get_write_mask(*pToken
, tmp
);
1195 if (version
!= 14) {
1196 DWORD reg
= *pToken
& REGMASK
;
1197 sprintf(tmpLine
, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg
, tmp
, reg
);
1198 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1201 DWORD reg1
= *pToken
& REGMASK
;
1202 DWORD reg2
= *++pToken
& REGMASK
;
1203 sprintf(tmpLine
, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1
, tmp
, reg2
);
1204 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1210 case D3DSIO_TEXM3x2PAD
:
1212 DWORD reg
= *pToken
& REGMASK
;
1214 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1215 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1217 sprintf(tmpLine
, "DP3 TMP.x, T%lu, %s;\n", reg
, buf
);
1218 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1223 case D3DSIO_TEXM3x2TEX
:
1225 DWORD reg
= *pToken
& REGMASK
;
1227 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1228 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1230 sprintf(tmpLine
, "DP3 TMP.y, T%lu, %s;\n", reg
, buf
);
1231 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1232 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg
, reg
);
1233 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1238 case D3DSIO_TEXREG2AR
:
1240 DWORD reg1
= *pToken
& REGMASK
;
1241 DWORD reg2
= *++pToken
& REGMASK
;
1242 sprintf(tmpLine
, "MOV TMP.r, T%lu.a;\n", reg2
);
1243 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1244 sprintf(tmpLine
, "MOV TMP.g, T%lu.r;\n", reg2
);
1245 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1246 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1247 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1252 case D3DSIO_TEXREG2GB
:
1254 DWORD reg1
= *pToken
& REGMASK
;
1255 DWORD reg2
= *++pToken
& REGMASK
;
1256 sprintf(tmpLine
, "MOV TMP.r, T%lu.g;\n", reg2
);
1257 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1258 sprintf(tmpLine
, "MOV TMP.g, T%lu.b;\n", reg2
);
1259 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1260 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1261 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1268 DWORD reg1
= *pToken
& REGMASK
;
1269 DWORD reg2
= *++pToken
& REGMASK
;
1271 /* FIXME: Should apply the BUMPMAPENV matrix */
1272 sprintf(tmpLine
, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1
, reg2
);
1273 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1274 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1275 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1280 case D3DSIO_TEXM3x3PAD
:
1282 DWORD reg
= *pToken
& REGMASK
;
1284 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1285 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1287 sprintf(tmpLine
, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row
, reg
, buf
);
1288 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1294 case D3DSIO_TEXM3x3TEX
:
1296 DWORD reg
= *pToken
& REGMASK
;
1298 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1299 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1302 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1303 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1305 /* Cubemap textures will be more used than 3D ones. */
1306 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1307 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1312 case D3DSIO_TEXM3x3VSPEC
:
1314 DWORD reg
= *pToken
& REGMASK
;
1316 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1317 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1319 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1320 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1322 /* Construct the eye-ray vector from w coordinates */
1323 sprintf(tmpLine
, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw
[0]);
1324 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1325 sprintf(tmpLine
, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw
[1]);
1326 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1327 sprintf(tmpLine
, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg
);
1328 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1330 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1331 sprintf(tmpLine
, "DP3 TMP.w, TMP, TMP2;\n");
1332 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1333 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;\n");
1334 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1335 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1336 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1338 /* Cubemap textures will be more used than 3D ones. */
1339 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1340 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1346 case D3DSIO_TEXM3x3SPEC
:
1348 DWORD reg
= *pToken
& REGMASK
;
1349 DWORD reg3
= *(pToken
+ 2) & REGMASK
;
1351 if (gen_input_modifier_line(*(pToken
+ 1), 0, buf
, tmpLine
, This
->constants
)) {
1352 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1354 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1355 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1357 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1358 sprintf(tmpLine
, "DP3 TMP.w, TMP, C[%lu];\n", reg3
);
1359 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1361 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;\n");
1362 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1363 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3
);
1364 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1366 /* Cubemap textures will be more used than 3D ones. */
1367 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1368 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1376 if (curOpcode
->glname
== GLNAME_REQUIRE_GLSL
) {
1377 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode
->name
);
1379 FIXME("Can't handle opcode %s in hwShader\n", curOpcode
->name
);
1381 pToken
+= curOpcode
->num_params
;
1385 if (0 != (*pToken
& D3DSP_DSTMOD_MASK
)) {
1386 DWORD mask
= *pToken
& D3DSP_DSTMOD_MASK
;
1388 case D3DSPDM_SATURATE
: saturate
= TRUE
; break;
1389 #if 0 /* as yet unhandled modifiers */
1390 case D3DSPDM_CENTROID
: centroid
= TRUE
; break;
1391 case D3DSPDM_PP
: partialpresision
= TRUE
; break;
1392 case D3DSPDM_X2
: X2
= TRUE
; break;
1393 case D3DSPDM_X4
: X4
= TRUE
; break;
1394 case D3DSPDM_X8
: X8
= TRUE
; break;
1395 case D3DSPDM_D2
: D2
= TRUE
; break;
1396 case D3DSPDM_D4
: D4
= TRUE
; break;
1397 case D3DSPDM_D8
: D8
= TRUE
; break;
1400 TRACE("_unhandled_modifier(0x%08lx)\n", mask
);
1404 /* Generate input and output registers */
1405 if (curOpcode
->num_params
> 0) {
1407 char operands
[4][100];
1412 /* Generate lines that handle input modifier computation */
1413 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1414 TRACE("(%p) : Param %ld token %lx\n", This
, i
, *(pToken
+ i
));
1415 if (gen_input_modifier_line(*(pToken
+ i
), i
- 1, regs
[i
- 1], tmpOp
, This
->constants
)) {
1416 addline(&lineNum
, pgmStr
, &pgmLength
, tmpOp
);
1420 /* Handle saturation only when no shift is present in the output modifier */
1421 if ((*pToken
& D3DSPDM_SATURATE
) && (0 == (*pToken
& D3DSP_DSTSHIFT_MASK
)))
1424 /* Handle output register */
1425 get_register_name(*pToken
, tmpOp
, This
->constants
);
1426 strcpy(operands
[0], tmpOp
);
1427 get_write_mask(*pToken
, tmpOp
);
1428 strcat(operands
[0], tmpOp
);
1430 /* This function works because of side effects from gen_input_modifier_line */
1431 /* Handle input registers */
1432 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1433 TRACE("(%p) : Regs = %s\n", This
, regs
[i
- 1]);
1434 strcpy(operands
[i
], regs
[i
- 1]);
1435 get_input_register_swizzle(*(pToken
+ i
), swzstring
);
1436 strcat(operands
[i
], swzstring
);
1439 switch(curOpcode
->opcode
) {
1441 sprintf(tmpLine
, "CMP%s %s, %s, %s, %s;\n", (saturate
? "_SAT" : ""), operands
[0], operands
[1], operands
[3], operands
[2]);
1444 sprintf(tmpLine
, "ADD TMP, -%s, coefdiv.x;", operands
[1]);
1445 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1446 sprintf(tmpLine
, "CMP%s %s, TMP, %s, %s;\n", (saturate
? "_SAT" : ""), operands
[0], operands
[2], operands
[3]);
1450 strcat(tmpLine
, "_SAT");
1451 strcat(tmpLine
, " ");
1452 strcat(tmpLine
, operands
[0]);
1453 for (i
= 1; i
< curOpcode
->num_params
; i
++) {
1454 strcat(tmpLine
, ", ");
1455 strcat(tmpLine
, operands
[i
]);
1457 strcat(tmpLine
,";\n");
1459 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1460 pToken
+= curOpcode
->num_params
;
1462 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1463 if (curOpcode
->num_params
> 0) {
1464 DWORD param
= *(pInstr
+ 1);
1465 if (0 != (param
& D3DSP_DSTSHIFT_MASK
)) {
1467 /* Generate a line that handle the output modifier computation */
1469 char write_mask
[20];
1470 DWORD shift
= (param
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
1471 get_register_name(param
, regstr
, This
->constants
);
1472 get_write_mask(param
, write_mask
);
1473 gen_output_modifier_line(saturate
, write_mask
, shift
, regstr
, tmpLine
);
1474 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1480 /* TODO: What about result.depth? */
1481 strcpy(tmpLine
, "MOV result.color, R0;\n");
1482 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1484 strcpy(tmpLine
, "END\n");
1485 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1488 /* finally null terminate the pgmStr*/
1489 pgmStr
[pgmLength
] = 0;
1490 if (GL_SUPPORT(ARB_VERTEX_PROGRAM
)) {
1491 /* Create the hw shader */
1493 /* pgmStr sometimes gets too long for a normal TRACE */
1494 TRACE("Generated program:\n");
1495 if (TRACE_ON(d3d_shader
)) {
1496 fprintf(stderr
, "%s\n", pgmStr
);
1499 /* TODO: change to resource.glObjectHandel or something like that */
1500 GL_EXTCALL(glGenProgramsARB(1, &This
->baseShader
.prgId
));
1502 TRACE("Creating a hw pixel shader, prg=%d\n", This
->baseShader
.prgId
);
1503 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB
, This
->baseShader
.prgId
));
1505 TRACE("Created hw pixel shader, prg=%d\n", This
->baseShader
.prgId
);
1506 /* Create the program and check for errors */
1507 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB
, GL_PROGRAM_FORMAT_ASCII_ARB
, strlen(pgmStr
), pgmStr
));
1508 if (glGetError() == GL_INVALID_OPERATION
) {
1510 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB
, &errPos
);
1511 FIXME("HW PixelShader Error at position %d: %s\n",
1512 errPos
, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB
)));
1513 This
->baseShader
.prgId
= -1;
1516 #if 1 /* if were using the data buffer of device then we don't need to free it */
1517 HeapFree(GetProcessHeap(), 0, pgmStr
);
1521 inline static void pshader_program_dump_ps_param(const DWORD param
, int input
) {
1522 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1523 static const char swizzle_reg_chars
[] = "rgba";
1525 /* the unknown mask is for bits not yet accounted for by any other mask... */
1526 #define UNKNOWN_MASK 0xC000
1528 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1529 #define EXTENDED_REG 0x1800
1531 DWORD reg
= param
& D3DSP_REGNUM_MASK
;
1532 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) | ((param
& EXTENDED_REG
) >> 8);
1535 if ( ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
) ||
1536 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_BIASNEG
) ||
1537 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_SIGNNEG
) ||
1538 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_X2NEG
) )
1540 else if ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_COMP
)
1544 switch (regtype
/* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1552 TRACE("c%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1555 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
1558 case D3DSPR_RASTOUT
:
1559 TRACE("%s", rastout_reg_names
[reg
]);
1561 case D3DSPR_ATTROUT
:
1562 TRACE("oD%lu", reg
);
1564 case D3DSPR_TEXCRDOUT
:
1565 TRACE("oT%lu", reg
);
1567 case D3DSPR_CONSTINT
:
1568 TRACE("i%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1570 case D3DSPR_CONSTBOOL
:
1571 TRACE("b%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1577 TRACE("aL%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1584 /** operand output */
1586 * for better debugging traces it's done into opcode dump code
1587 * @see pshader_program_dump_opcode
1588 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1589 DWORD mask = param & D3DSP_DSTMOD_MASK;
1591 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1593 TRACE("_unhandled_modifier(0x%08lx)", mask);
1596 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1597 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1599 TRACE("_x%u", 1 << shift);
1603 if ((param
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
1605 if (param
& D3DSP_WRITEMASK_0
) TRACE(".r");
1606 if (param
& D3DSP_WRITEMASK_1
) TRACE(".g");
1607 if (param
& D3DSP_WRITEMASK_2
) TRACE(".b");
1608 if (param
& D3DSP_WRITEMASK_3
) TRACE(".a");
1611 /** operand input */
1612 DWORD swizzle
= (param
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
1613 DWORD swizzle_r
= swizzle
& 0x03;
1614 DWORD swizzle_g
= (swizzle
>> 2) & 0x03;
1615 DWORD swizzle_b
= (swizzle
>> 4) & 0x03;
1616 DWORD swizzle_a
= (swizzle
>> 6) & 0x03;
1618 if (0 != (param
& D3DSP_SRCMOD_MASK
)) {
1619 DWORD mask
= param
& D3DSP_SRCMOD_MASK
;
1620 /*TRACE("_modifier(0x%08lx) ", mask);*/
1622 case D3DSPSM_NONE
: break;
1623 case D3DSPSM_NEG
: break;
1624 case D3DSPSM_BIAS
: TRACE("_bias"); break;
1625 case D3DSPSM_BIASNEG
: TRACE("_bias"); break;
1626 case D3DSPSM_SIGN
: TRACE("_bx2"); break;
1627 case D3DSPSM_SIGNNEG
: TRACE("_bx2"); break;
1628 case D3DSPSM_COMP
: break;
1629 case D3DSPSM_X2
: TRACE("_x2"); break;
1630 case D3DSPSM_X2NEG
: TRACE("_x2"); break;
1631 case D3DSPSM_DZ
: TRACE("_dz"); break;
1632 case D3DSPSM_DW
: TRACE("_dw"); break;
1634 TRACE("_unknown(0x%08lx)", mask
);
1639 * swizzle bits fields:
1642 if ((D3DVS_NOSWIZZLE
>> D3DVS_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1643 if (swizzle_r
== swizzle_g
&&
1644 swizzle_r
== swizzle_b
&&
1645 swizzle_r
== swizzle_a
) {
1646 TRACE(".%c", swizzle_reg_chars
[swizzle_r
]);
1649 swizzle_reg_chars
[swizzle_r
],
1650 swizzle_reg_chars
[swizzle_g
],
1651 swizzle_reg_chars
[swizzle_b
],
1652 swizzle_reg_chars
[swizzle_a
]);
1658 inline static void pshader_program_dump_decl_usage(IWineD3DPixelShaderImpl
*This
, DWORD token
) {
1660 switch(token
& 0xFFFF) {
1661 case D3DDECLUSAGE_POSITION
:
1662 TRACE("%s%ld ", "position",(token
& 0xF0000) >> 16);
1664 case D3DDECLUSAGE_BLENDINDICES
:
1665 TRACE("%s ", "blend");
1667 case D3DDECLUSAGE_BLENDWEIGHT
:
1668 TRACE("%s ", "weight");
1670 case D3DDECLUSAGE_NORMAL
:
1671 TRACE("%s%ld ", "normal",(token
& 0xF0000) >> 16);
1673 case D3DDECLUSAGE_PSIZE
:
1674 TRACE("%s ", "psize");
1676 case D3DDECLUSAGE_COLOR
:
1677 if((token
& 0xF0000) >> 16 == 0) {
1678 TRACE("%s ", "color");
1680 TRACE("%s%ld ", "specular", ((token
& 0xF0000) >> 16) - 1);
1683 case D3DDECLUSAGE_TEXCOORD
:
1684 TRACE("%s%ld ", "texture", (token
& 0xF0000) >> 16);
1686 case D3DDECLUSAGE_TANGENT
:
1687 TRACE("%s ", "tangent");
1689 case D3DDECLUSAGE_BINORMAL
:
1690 TRACE("%s ", "binormal");
1692 case D3DDECLUSAGE_TESSFACTOR
:
1693 TRACE("%s ", "tessfactor");
1695 case D3DDECLUSAGE_POSITIONT
:
1696 TRACE("%s%ld ", "positionT",(token
& 0xF0000) >> 16);
1698 case D3DDECLUSAGE_FOG
:
1699 TRACE("%s ", "fog");
1701 case D3DDECLUSAGE_DEPTH
:
1702 TRACE("%s ", "depth");
1704 case D3DDECLUSAGE_SAMPLE
:
1705 TRACE("%s ", "sample");
1708 FIXME("Unrecognised dcl %08lx", token
& 0xFFFF);
1712 HRESULT WINAPI
IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
1713 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
1714 const DWORD
* pToken
= pFunction
;
1715 const SHADER_OPCODE
*curOpcode
= NULL
;
1718 TRACE("(%p) : Parsing programme\n", This
);
1720 if (NULL
!= pToken
) {
1721 while (D3DPS_END() != *pToken
) {
1722 if (pshader_is_version_token(*pToken
)) { /** version */
1723 This
->baseShader
.version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
1724 TRACE("ps_%lu_%lu\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
1729 if (pshader_is_comment_token(*pToken
)) { /** comment */
1730 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1732 TRACE("//%s\n", (char*)pToken
);
1733 pToken
+= comment_len
;
1734 len
+= comment_len
+ 1;
1737 if (!This
->baseShader
.version
) {
1738 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This
);
1740 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
1743 if (NULL
== curOpcode
) {
1745 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1746 while (*pToken
& 0x80000000) {
1748 /* unknown current opcode ... */
1749 TRACE("unrecognized opcode: %08lx", *pToken
);
1756 if (curOpcode
->opcode
== D3DSIO_DCL
) {
1757 pshader_program_dump_decl_usage(This
, *pToken
);
1760 pshader_program_dump_ps_param(*pToken
, 0);
1764 if (curOpcode
->opcode
== D3DSIO_DEF
) {
1765 TRACE("def c%lu = ", *pToken
& 0xFF);
1768 TRACE("%f ,", *(float *)pToken
);
1771 TRACE("%f ,", *(float *)pToken
);
1774 TRACE("%f ,", *(float *)pToken
);
1777 TRACE("%f", *(float *)pToken
);
1781 TRACE("%s ", curOpcode
->name
);
1782 if (curOpcode
->num_params
> 0) {
1783 pshader_program_dump_ps_param(*pToken
, 0);
1786 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1788 pshader_program_dump_ps_param(*pToken
, 1);
1797 This
->baseShader
.functionLength
= (len
+ 1) * sizeof(DWORD
);
1799 This
->baseShader
.functionLength
= 1; /* no Function defined use fixed function vertex processing */
1802 /* Generate HW shader in needed */
1803 if (NULL
!= pFunction
&& wined3d_settings
.vs_mode
== VS_HW
) {
1804 TRACE("(%p) : Generating hardware program\n", This
);
1806 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface
, pFunction
);
1810 TRACE("(%p) : Copying the function\n", This
);
1811 /* copy the function ... because it will certainly be released by application */
1812 if (NULL
!= pFunction
) {
1813 This
->baseShader
.function
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, This
->baseShader
.functionLength
);
1814 memcpy((void *)This
->baseShader
.function
, pFunction
, This
->baseShader
.functionLength
);
1816 This
->baseShader
.function
= NULL
;
1819 /* TODO: Some proper return values for failures */
1820 TRACE("(%p) : Returning WINED3D_OK\n", This
);
1824 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl
=
1826 /*** IUnknown methods ***/
1827 IWineD3DPixelShaderImpl_QueryInterface
,
1828 IWineD3DPixelShaderImpl_AddRef
,
1829 IWineD3DPixelShaderImpl_Release
,
1830 /*** IWineD3DBase methods ***/
1831 IWineD3DPixelShaderImpl_GetParent
,
1832 /*** IWineD3DBaseShader methods ***/
1833 IWineD3DPixelShaderImpl_SetFunction
,
1834 /*** IWineD3DPixelShader methods ***/
1835 IWineD3DPixelShaderImpl_GetDevice
,
1836 IWineD3DPixelShaderImpl_GetFunction