2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2005 Oliver Stieber
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 #include "wined3d_private.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
32 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
34 #if 0 /* Must not be 1 in cvs version */
35 # define PSTRACE(A) TRACE A
36 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
39 # define TRACE_VSVECTOR(name)
42 #define GLNAME_REQUIRE_GLSL ((const char *)1)
43 /* *******************************************
44 IWineD3DPixelShader IUnknown parts follow
45 ******************************************* */
46 HRESULT WINAPI
IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader
*iface
, REFIID riid
, LPVOID
*ppobj
)
48 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
49 TRACE("(%p)->(%s,%p)\n",This
,debugstr_guid(riid
),ppobj
);
50 if (IsEqualGUID(riid
, &IID_IUnknown
)
51 || IsEqualGUID(riid
, &IID_IWineD3DBase
)
52 || IsEqualGUID(riid
, &IID_IWineD3DBaseShader
)
53 || IsEqualGUID(riid
, &IID_IWineD3DPixelShader
)) {
54 IUnknown_AddRef(iface
);
62 ULONG WINAPI
IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader
*iface
) {
63 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
64 TRACE("(%p) : AddRef increasing from %ld\n", This
, This
->ref
);
65 return InterlockedIncrement(&This
->ref
);
68 ULONG WINAPI
IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader
*iface
) {
69 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
71 TRACE("(%p) : Releasing from %ld\n", This
, This
->ref
);
72 ref
= InterlockedDecrement(&This
->ref
);
74 HeapFree(GetProcessHeap(), 0, This
);
79 /* TODO: At the momeny the function parser is single pass, it achievs this
80 by passing constants to a couple of functions where they are then modified.
81 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
82 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
85 /* *******************************************
86 IWineD3DPixelShader IWineD3DPixelShader parts follow
87 ******************************************* */
89 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader
*iface
, IUnknown
** parent
){
90 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
92 *parent
= This
->parent
;
93 IUnknown_AddRef(*parent
);
94 TRACE("(%p) : returning %p\n", This
, *parent
);
98 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader
* iface
, IWineD3DDevice
**pDevice
){
99 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
100 IWineD3DDevice_AddRef((IWineD3DDevice
*)This
->wineD3DDevice
);
101 *pDevice
= (IWineD3DDevice
*)This
->wineD3DDevice
;
102 TRACE("(%p) returning %p\n", This
, *pDevice
);
107 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader
* impl
, VOID
* pData
, UINT
* pSizeOfData
) {
108 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)impl
;
109 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This
, pData
, pSizeOfData
);
112 *pSizeOfData
= This
->baseShader
.functionLength
;
115 if (*pSizeOfData
< This
->baseShader
.functionLength
) {
116 *pSizeOfData
= This
->baseShader
.functionLength
;
117 return WINED3DERR_MOREDATA
;
119 if (NULL
== This
->baseShader
.function
) { /* no function defined */
120 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This
, pData
);
121 (*(DWORD
**) pData
) = NULL
;
123 if (This
->baseShader
.functionLength
== 0) {
126 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
127 memcpy(pData
, This
->baseShader
.function
, This
->baseShader
.functionLength
);
132 /*******************************
133 * pshader functions software VM
136 void pshader_add(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
137 d
->x
= s0
->x
+ s1
->x
;
138 d
->y
= s0
->y
+ s1
->y
;
139 d
->z
= s0
->z
+ s1
->z
;
140 d
->w
= s0
->w
+ s1
->w
;
141 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
142 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
145 void pshader_dp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
146 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
;
147 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
148 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
151 void pshader_dp4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
152 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
+ s0
->w
* s1
->w
;
153 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
157 void pshader_dst(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
159 d
->y
= s0
->y
* s1
->y
;
162 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
163 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
166 void pshader_expp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
172 tmp
.f
= floorf(s0
->w
);
173 d
->x
= powf(2.0f
, tmp
.f
);
174 d
->y
= s0
->w
- tmp
.f
;
175 tmp
.f
= powf(2.0f
, s0
->w
);
176 tmp
.d
&= 0xFFFFFF00U
;
179 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
180 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
183 void pshader_logp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
184 float tmp_f
= fabsf(s0
->w
);
185 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
186 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
187 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
190 void pshader_mad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
191 d
->x
= s0
->x
* s1
->x
+ s2
->x
;
192 d
->y
= s0
->y
* s1
->y
+ s2
->y
;
193 d
->z
= s0
->z
* s1
->z
+ s2
->z
;
194 d
->w
= s0
->w
* s1
->w
+ s2
->w
;
195 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
196 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, s2
->x
, s2
->y
, s2
->z
, s2
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
199 void pshader_max(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
200 d
->x
= (s0
->x
>= s1
->x
) ? s0
->x
: s1
->x
;
201 d
->y
= (s0
->y
>= s1
->y
) ? s0
->y
: s1
->y
;
202 d
->z
= (s0
->z
>= s1
->z
) ? s0
->z
: s1
->z
;
203 d
->w
= (s0
->w
>= s1
->w
) ? s0
->w
: s1
->w
;
204 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
205 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
208 void pshader_min(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
209 d
->x
= (s0
->x
< s1
->x
) ? s0
->x
: s1
->x
;
210 d
->y
= (s0
->y
< s1
->y
) ? s0
->y
: s1
->y
;
211 d
->z
= (s0
->z
< s1
->z
) ? s0
->z
: s1
->z
;
212 d
->w
= (s0
->w
< s1
->w
) ? s0
->w
: s1
->w
;
213 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
214 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
217 void pshader_mov(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
222 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
223 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
226 void pshader_mul(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
227 d
->x
= s0
->x
* s1
->x
;
228 d
->y
= s0
->y
* s1
->y
;
229 d
->z
= s0
->z
* s1
->z
;
230 d
->w
= s0
->w
* s1
->w
;
231 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
232 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
235 void pshader_nop(void) {
236 /* NOPPPP ahhh too easy ;) */
237 PSTRACE(("executing nop\n"));
240 void pshader_rcp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
241 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== s0
->w
) ? HUGE_VAL
: 1.0f
/ s0
->w
;
242 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
246 void pshader_rsq(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
247 float tmp_f
= fabsf(s0
->w
);
248 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== tmp_f
) ? HUGE_VAL
: ((1.0f
!= tmp_f
) ? 1.0f
/ sqrtf(tmp_f
) : 1.0f
);
249 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
250 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
253 void pshader_sge(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
254 d
->x
= (s0
->x
>= s1
->x
) ? 1.0f
: 0.0f
;
255 d
->y
= (s0
->y
>= s1
->y
) ? 1.0f
: 0.0f
;
256 d
->z
= (s0
->z
>= s1
->z
) ? 1.0f
: 0.0f
;
257 d
->w
= (s0
->w
>= s1
->w
) ? 1.0f
: 0.0f
;
258 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
259 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
262 void pshader_slt(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
263 d
->x
= (s0
->x
< s1
->x
) ? 1.0f
: 0.0f
;
264 d
->y
= (s0
->y
< s1
->y
) ? 1.0f
: 0.0f
;
265 d
->z
= (s0
->z
< s1
->z
) ? 1.0f
: 0.0f
;
266 d
->w
= (s0
->w
< s1
->w
) ? 1.0f
: 0.0f
;
267 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
268 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
271 void pshader_sub(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
272 d
->x
= s0
->x
- s1
->x
;
273 d
->y
= s0
->y
- s1
->y
;
274 d
->z
= s0
->z
- s1
->z
;
275 d
->w
= s0
->w
- s1
->w
;
276 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
277 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
281 * Version 1.1 specific
284 void pshader_exp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
285 d
->x
= d
->y
= d
->z
= d
->w
= powf(2.0f
, s0
->w
);
286 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
287 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
290 void pshader_log(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
291 float tmp_f
= fabsf(s0
->w
);
292 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
293 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
294 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
297 void pshader_frc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
298 d
->x
= s0
->x
- floorf(s0
->x
);
299 d
->y
= s0
->y
- floorf(s0
->y
);
302 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
303 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
306 typedef FLOAT D3DMATRIX44
[4][4];
307 typedef FLOAT D3DMATRIX43
[4][3];
308 typedef FLOAT D3DMATRIX34
[3][4];
309 typedef FLOAT D3DMATRIX33
[3][3];
310 typedef FLOAT D3DMATRIX23
[2][3];
312 void pshader_m4x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat
) {
314 * Buggy CODE: here only if cast not work for copy/paste
315 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
316 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
317 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
318 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
319 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
320 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
321 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
323 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
324 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
325 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
326 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
+ mat
[3][3] * s0
->w
;
327 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
328 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
329 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
330 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], mat
[3][3], s0
->w
, d
->w
));
333 void pshader_m4x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX34 mat
) {
334 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
335 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
336 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
338 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
339 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
340 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
341 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0
->w
, d
->w
));
344 void pshader_m3x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX43 mat
) {
345 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
346 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
347 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
348 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
;
349 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
350 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
351 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
352 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], s0
->w
, d
->w
));
355 void pshader_m3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX33 mat
) {
356 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
357 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
358 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
360 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
361 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
362 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
363 PSTRACE(("executing m3x3(4): (%f) \n", d
->w
));
366 void pshader_m3x2(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX23 mat
) {
368 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
369 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
375 * Version 2.0 specific
377 void pshader_lrp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
378 d
->x
= s0
->x
* (s1
->x
- s2
->x
) + s2
->x
;
379 d
->y
= s0
->y
* (s1
->y
- s2
->y
) + s2
->y
;
380 d
->z
= s0
->z
* (s1
->z
- s2
->z
) + s2
->z
;
381 d
->w
= s0
->w
* (s1
->w
- s2
->w
) + s2
->w
;
384 void pshader_crs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
385 d
->x
= s0
->y
* s1
->z
- s0
->z
* s1
->y
;
386 d
->y
= s0
->z
* s1
->x
- s0
->x
* s1
->z
;
387 d
->z
= s0
->x
* s1
->y
- s0
->y
* s1
->x
;
388 d
->w
= 0.9f
; /* w is undefined, so set it to something safeish */
390 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
391 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
394 void pshader_abs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
399 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
400 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
404 void pshader_texcoord(WINED3DSHADERVECTOR
* d
) {
408 void pshader_texkill(WINED3DSHADERVECTOR
* d
) {
412 void pshader_tex(WINED3DSHADERVECTOR
* d
) {
415 void pshader_texld(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
419 void pshader_texbem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
423 void pshader_texbeml(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
427 void pshader_texreg2ar(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
431 void pshader_texreg2gb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
435 void pshader_texm3x2pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
439 void pshader_texm3x2tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
443 void pshader_texm3x3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
447 void pshader_texm3x3pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
451 void pshader_texm3x3diff(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
455 void pshader_texm3x3spec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
459 void pshader_texm3x3vspec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
463 void pshader_cnd(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
467 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
468 void pshader_def(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
, WINED3DSHADERVECTOR
* s3
) {
472 void pshader_texreg2rgb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
476 void pshader_texdp3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
480 void pshader_texm3x2depth(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
484 void pshader_texdp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
488 void pshader_texm3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
492 void pshader_texdepth(WINED3DSHADERVECTOR
* d
) {
496 void pshader_cmp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
500 void pshader_bem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
504 void pshader_call(WINED3DSHADERVECTOR
* d
) {
508 void pshader_callnz(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
512 void pshader_loop(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
516 void pshader_ret(void) {
520 void pshader_endloop(void) {
524 void pshader_dcl(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
528 void pshader_pow(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
532 void pshader_nrm(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
536 void pshader_sincos(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
540 void pshader_rep(WINED3DSHADERVECTOR
* d
) {
544 void pshader_endrep(void) {
548 void pshader_if(WINED3DSHADERVECTOR
* d
) {
552 void pshader_ifc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
556 void pshader_else(void) {
560 void pshader_label(WINED3DSHADERVECTOR
* d
) {
564 void pshader_endif(void) {
568 void pshader_break(void) {
572 void pshader_breakc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
576 void pshader_breakp(WINED3DSHADERVECTOR
* d
) {
580 void pshader_defb(WINED3DSHADERVECTOR
* d
) {
584 void pshader_defi(WINED3DSHADERVECTOR
* d
) {
588 void pshader_dp2add(WINED3DSHADERVECTOR
* d
) {
592 void pshader_dsx(WINED3DSHADERVECTOR
* d
) {
596 void pshader_dsy(WINED3DSHADERVECTOR
* d
) {
600 void pshader_texldd(WINED3DSHADERVECTOR
* d
) {
604 void pshader_setp(WINED3DSHADERVECTOR
* d
) {
608 void pshader_texldl(WINED3DSHADERVECTOR
* d
) {
613 * log, exp, frc, m*x* seems to be macros ins ... to see
615 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins
[] = {
616 {D3DSIO_NOP
, "nop", "NOP", 0, pshader_nop
, 0, 0},
617 {D3DSIO_MOV
, "mov", "MOV", 2, pshader_mov
, 0, 0},
618 {D3DSIO_ADD
, "add", "ADD", 3, pshader_add
, 0, 0},
619 {D3DSIO_SUB
, "sub", "SUB", 3, pshader_sub
, 0, 0},
620 {D3DSIO_MAD
, "mad", "MAD", 4, pshader_mad
, 0, 0},
621 {D3DSIO_MUL
, "mul", "MUL", 3, pshader_mul
, 0, 0},
622 {D3DSIO_RCP
, "rcp", "RCP", 2, pshader_rcp
, 0, 0},
623 {D3DSIO_RSQ
, "rsq", "RSQ", 2, pshader_rsq
, 0, 0},
624 {D3DSIO_DP3
, "dp3", "DP3", 3, pshader_dp3
, 0, 0},
625 {D3DSIO_DP4
, "dp4", "DP4", 3, pshader_dp4
, 0, 0},
626 {D3DSIO_MIN
, "min", "MIN", 3, pshader_min
, 0, 0},
627 {D3DSIO_MAX
, "max", "MAX", 3, pshader_max
, 0, 0},
628 {D3DSIO_SLT
, "slt", "SLT", 3, pshader_slt
, 0, 0},
629 {D3DSIO_SGE
, "sge", "SGE", 3, pshader_sge
, 0, 0},
630 {D3DSIO_ABS
, "abs", "ABS", 2, pshader_abs
, 0, 0},
631 {D3DSIO_EXP
, "exp", "EX2", 2, pshader_exp
, 0, 0},
632 {D3DSIO_LOG
, "log", "LG2", 2, pshader_log
, 0, 0},
633 {D3DSIO_DST
, "dst", "DST", 3, pshader_dst
, 0, 0},
634 {D3DSIO_LRP
, "lrp", "LRP", 4, pshader_lrp
, 0, 0},
635 {D3DSIO_FRC
, "frc", "FRC", 2, pshader_frc
, 0, 0},
636 {D3DSIO_M4x4
, "m4x4", "undefined", 3, pshader_m4x4
, 0, 0},
637 {D3DSIO_M4x3
, "m4x3", "undefined", 3, pshader_m4x3
, 0, 0},
638 {D3DSIO_M3x4
, "m3x4", "undefined", 3, pshader_m3x4
, 0, 0},
639 {D3DSIO_M3x3
, "m3x3", "undefined", 3, pshader_m3x3
, 0, 0},
640 {D3DSIO_M3x2
, "m3x2", "undefined", 3, pshader_m3x2
, 0, 0},
643 /** FIXME: use direct access so add the others opcodes as stubs */
644 /* DCL is a specil operation */
645 {D3DSIO_DCL
, "dcl", NULL
, 2, pshader_dcl
, 0, 0},
646 {D3DSIO_POW
, "pow", "POW", 3, pshader_pow
, 0, 0},
647 {D3DSIO_CRS
, "crs", "XPS", 3, pshader_crs
, 0, 0},
648 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
651 MUL vec.xyz, vec, tmp;
652 but I think this is better because it accounts for w properly.
658 {D3DSIO_NRM
, "nrm", NULL
, 2, pshader_nrm
, 0, 0},
659 {D3DSIO_SINCOS
, "sincos", NULL
, 2, pshader_sincos
, 0, 0},
661 /* Flow control - requires GLSL or software shaders */
662 {D3DSIO_REP
, "rep", GLNAME_REQUIRE_GLSL
, 1, pshader_rep
, 0, 0},
663 {D3DSIO_ENDREP
, "endrep", GLNAME_REQUIRE_GLSL
, 0, pshader_endrep
, 0, 0},
664 {D3DSIO_IF
, "if", GLNAME_REQUIRE_GLSL
, 1, pshader_if
, 0, 0},
665 {D3DSIO_IFC
, "ifc", GLNAME_REQUIRE_GLSL
, 2, pshader_ifc
, 0, 0},
666 {D3DSIO_ELSE
, "else", GLNAME_REQUIRE_GLSL
, 0, pshader_else
, 0, 0},
667 {D3DSIO_ENDIF
, "endif", GLNAME_REQUIRE_GLSL
, 0, pshader_endif
, 0, 0},
668 {D3DSIO_BREAK
, "break", GLNAME_REQUIRE_GLSL
, 0, pshader_break
, 0, 0},
669 {D3DSIO_BREAKC
, "breakc", GLNAME_REQUIRE_GLSL
, 2, pshader_breakc
, 0, 0},
670 {D3DSIO_BREAKP
, "breakp", GLNAME_REQUIRE_GLSL
, 1, pshader_breakp
, 0, 0},
671 {D3DSIO_CALL
, "call", GLNAME_REQUIRE_GLSL
, 1, pshader_call
, 0, 0},
672 {D3DSIO_CALLNZ
, "callnz", GLNAME_REQUIRE_GLSL
, 2, pshader_callnz
, 0, 0},
673 {D3DSIO_LOOP
, "loop", GLNAME_REQUIRE_GLSL
, 2, pshader_loop
, 0, 0},
674 {D3DSIO_RET
, "ret", GLNAME_REQUIRE_GLSL
, 0, pshader_ret
, 0, 0},
675 {D3DSIO_ENDLOOP
, "endloop", GLNAME_REQUIRE_GLSL
, 0, pshader_endloop
, 0, 0},
676 {D3DSIO_LABEL
, "label", GLNAME_REQUIRE_GLSL
, 1, pshader_label
, 0, 0},
678 {D3DSIO_DEFB
, "defb", GLNAME_REQUIRE_GLSL
, 2, pshader_defb
, 0, 0},
679 {D3DSIO_DEFI
, "defi", GLNAME_REQUIRE_GLSL
, 2, pshader_defi
, 0, 0},
681 {D3DSIO_TEXCOORD
, "texcoord", "undefined", 1, pshader_texcoord
, 0, D3DPS_VERSION(1,3)},
682 {D3DSIO_TEXCOORD
, "texcrd", "undefined", 2, pshader_texcoord
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
683 {D3DSIO_TEXKILL
, "texkill", "KIL", 1, pshader_texkill
, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
684 {D3DSIO_TEX
, "tex", "undefined", 1, pshader_tex
, 0, D3DPS_VERSION(1,3)},
685 {D3DSIO_TEX
, "texld", "undefined", 2, pshader_texld
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
686 {D3DSIO_TEX
, "texld", "undefined", 3, pshader_texld
, D3DPS_VERSION(2,0), -1},
687 {D3DSIO_TEXBEM
, "texbem", "undefined", 2, pshader_texbem
, 0, D3DPS_VERSION(1,3)},
688 {D3DSIO_TEXBEML
, "texbeml", GLNAME_REQUIRE_GLSL
, 2, pshader_texbeml
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
689 {D3DSIO_TEXREG2AR
,"texreg2ar","undefined", 2, pshader_texreg2ar
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
690 {D3DSIO_TEXREG2GB
,"texreg2gb","undefined", 2, pshader_texreg2gb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
691 {D3DSIO_TEXM3x2PAD
, "texm3x2pad", "undefined", 2, pshader_texm3x2pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
692 {D3DSIO_TEXM3x2TEX
, "texm3x2tex", "undefined", 2, pshader_texm3x2tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
693 {D3DSIO_TEXM3x3PAD
, "texm3x3pad", "undefined", 2, pshader_texm3x3pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
694 {D3DSIO_TEXM3x3DIFF
, "texm3x3diff", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3diff
, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
695 {D3DSIO_TEXM3x3SPEC
, "texm3x3spec", "undefined", 3, pshader_texm3x3spec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
696 {D3DSIO_TEXM3x3VSPEC
, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
697 {D3DSIO_TEXM3x3TEX
, "texm3x3tex", "undefined", 2, pshader_texm3x3tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
698 {D3DSIO_EXPP
, "expp", "EXP", 2, pshader_expp
, 0, 0},
699 {D3DSIO_LOGP
, "logp", "LOG", 2, pshader_logp
, 0, 0},
700 {D3DSIO_CND
, "cnd", GLNAME_REQUIRE_GLSL
, 4, pshader_cnd
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
701 /* def is a special operation */
702 {D3DSIO_DEF
, "def", "undefined", 5, pshader_def
, 0, 0},
703 {D3DSIO_TEXREG2RGB
, "texreg2rgb", GLNAME_REQUIRE_GLSL
, 2, pshader_texreg2rgb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
704 {D3DSIO_TEXDP3TEX
, "texdp3tex", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3tex
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
705 {D3DSIO_TEXM3x2DEPTH
, "texm3x2depth", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x2depth
,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
706 {D3DSIO_TEXDP3
, "texdp3", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
707 {D3DSIO_TEXM3x3
, "texm3x3", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
708 {D3DSIO_TEXDEPTH
, "texdepth", GLNAME_REQUIRE_GLSL
,1, pshader_texdepth
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
709 {D3DSIO_CMP
, "cmp", GLNAME_REQUIRE_GLSL
, 4, pshader_cmp
, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
710 {D3DSIO_BEM
, "bem", GLNAME_REQUIRE_GLSL
, 3, pshader_bem
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
711 /* TODO: dp2add can be made out of multiple instuctions */
712 {D3DSIO_DP2ADD
, "dp2add", GLNAME_REQUIRE_GLSL
, 2, pshader_dp2add
, 0, 0},
713 {D3DSIO_DSX
, "dsx", GLNAME_REQUIRE_GLSL
, 2, pshader_dsx
, 0, 0},
714 {D3DSIO_DSY
, "dsy", GLNAME_REQUIRE_GLSL
, 2, pshader_dsy
, 0, 0},
715 {D3DSIO_TEXLDD
, "texldd", GLNAME_REQUIRE_GLSL
, 2, pshader_texldd
, 0, 0},
716 {D3DSIO_SETP
, "setp", GLNAME_REQUIRE_GLSL
, 2, pshader_setp
, 0, 0},
717 {D3DSIO_TEXLDL
, "texdl", GLNAME_REQUIRE_GLSL
, 2, pshader_texldl
, 0, 0},
718 {D3DSIO_PHASE
, "phase", GLNAME_REQUIRE_GLSL
, 0, pshader_nop
, 0, 0},
719 {0, NULL
, NULL
, 0, NULL
, 0, 0}
722 inline static const SHADER_OPCODE
* pshader_program_get_opcode(IWineD3DPixelShaderImpl
*This
, const DWORD code
) {
724 DWORD version
= This
->baseShader
.version
;
725 DWORD hex_version
= D3DPS_VERSION(version
/10, version
%10);
726 const SHADER_OPCODE
*shader_ins
= This
->baseShader
.shader_ins
;
728 /** TODO: use dichotomic search */
729 while (NULL
!= shader_ins
[i
].name
) {
730 if (((code
& D3DSI_OPCODE_MASK
) == shader_ins
[i
].opcode
) &&
731 (((hex_version
>= shader_ins
[i
].min_version
) && (hex_version
<= shader_ins
[i
].max_version
)) ||
732 ((shader_ins
[i
].min_version
== 0) && (shader_ins
[i
].max_version
== 0)))) {
733 return &shader_ins
[i
];
737 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code
, code
, code
& D3DSI_OPCODE_MASK
, version
);
741 inline static BOOL
pshader_is_version_token(DWORD token
) {
742 return 0xFFFF0000 == (token
& 0xFFFF0000);
745 inline static BOOL
pshader_is_comment_token(DWORD token
) {
746 return D3DSIO_COMMENT
== (token
& D3DSI_OPCODE_MASK
);
750 inline static void get_register_name(const DWORD param
, char* regstr
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
751 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
753 DWORD reg
= param
& D3DSP_REGNUM_MASK
;
754 DWORD regtype
= shader_get_regtype(param
);
758 sprintf(regstr
, "R%lu", reg
);
762 strcpy(regstr
, "fragment.color.primary");
764 strcpy(regstr
, "fragment.color.secondary");
769 sprintf(regstr
, "C%lu", reg
);
771 sprintf(regstr
, "program.env[%lu]", reg
);
773 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
774 sprintf(regstr
,"T%lu", reg
);
777 sprintf(regstr
, "%s", rastout_reg_names
[reg
]);
780 sprintf(regstr
, "oD[%lu]", reg
);
782 case D3DSPR_TEXCRDOUT
:
783 sprintf(regstr
, "oT[%lu]", reg
);
786 FIXME("Unhandled register name Type(%ld)\n", regtype
);
791 inline static void get_write_mask(const DWORD output_reg
, char *write_mask
) {
793 if ((output_reg
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
794 strcat(write_mask
, ".");
795 if (output_reg
& D3DSP_WRITEMASK_0
) strcat(write_mask
, "r");
796 if (output_reg
& D3DSP_WRITEMASK_1
) strcat(write_mask
, "g");
797 if (output_reg
& D3DSP_WRITEMASK_2
) strcat(write_mask
, "b");
798 if (output_reg
& D3DSP_WRITEMASK_3
) strcat(write_mask
, "a");
802 inline static void get_input_register_swizzle(const DWORD instr
, char *swzstring
) {
803 static const char swizzle_reg_chars
[] = "rgba";
804 DWORD swizzle
= (instr
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
805 DWORD swizzle_x
= swizzle
& 0x03;
806 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
807 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
808 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
810 * swizzle bits fields:
814 if ((D3DSP_NOSWIZZLE
>> D3DSP_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
815 if (swizzle_x
== swizzle_y
&&
816 swizzle_x
== swizzle_z
&&
817 swizzle_x
== swizzle_w
) {
818 sprintf(swzstring
, ".%c", swizzle_reg_chars
[swizzle_x
]);
820 sprintf(swzstring
, ".%c%c%c%c",
821 swizzle_reg_chars
[swizzle_x
],
822 swizzle_reg_chars
[swizzle_y
],
823 swizzle_reg_chars
[swizzle_z
],
824 swizzle_reg_chars
[swizzle_w
]);
829 static const char* shift_tab
[] = {
830 "dummy", /* 0 (none) */
831 "coefmul.x", /* 1 (x2) */
832 "coefmul.y", /* 2 (x4) */
833 "coefmul.z", /* 3 (x8) */
834 "coefmul.w", /* 4 (x16) */
835 "dummy", /* 5 (x32) */
836 "dummy", /* 6 (x64) */
837 "dummy", /* 7 (x128) */
838 "dummy", /* 8 (d256) */
839 "dummy", /* 9 (d128) */
840 "dummy", /* 10 (d64) */
841 "dummy", /* 11 (d32) */
842 "coefdiv.w", /* 12 (d16) */
843 "coefdiv.z", /* 13 (d8) */
844 "coefdiv.y", /* 14 (d4) */
845 "coefdiv.x" /* 15 (d2) */
848 inline static void gen_output_modifier_line(int saturate
, char *write_mask
, int shift
, char *regstr
, char* line
) {
849 /* Generate a line that does the output modifier computation */
850 sprintf(line
, "MUL%s %s%s, %s, %s;", saturate
? "_SAT" : "", regstr
, write_mask
, regstr
, shift_tab
[shift
]);
853 inline static int gen_input_modifier_line(const DWORD instr
, int tmpreg
, char *outregstr
, char *line
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
854 /* Generate a line that does the input modifier computation and return the input register to use */
855 static char regstr
[256];
856 static char tmpline
[256];
859 /* Assume a new line will be added */
862 /* Get register name */
863 get_register_name(instr
, regstr
, constants
);
865 TRACE(" Register name %s\n", regstr
);
866 switch (instr
& D3DSP_SRCMOD_MASK
) {
868 strcpy(outregstr
, regstr
);
872 sprintf(outregstr
, "-%s", regstr
);
876 sprintf(line
, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg
, regstr
);
878 case D3DSPSM_BIASNEG
:
879 sprintf(line
, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg
, regstr
);
882 sprintf(line
, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg
, regstr
);
884 case D3DSPSM_SIGNNEG
:
885 sprintf(line
, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg
, regstr
);
888 sprintf(line
, "SUB T%c, one.x, %s;", 'A' + tmpreg
, regstr
);
891 sprintf(line
, "ADD T%c, %s, %s;", 'A' + tmpreg
, regstr
, regstr
);
894 sprintf(line
, "ADD T%c, -%s, -%s;", 'A' + tmpreg
, regstr
, regstr
);
897 sprintf(line
, "RCP T%c, %s.z;", 'A' + tmpreg
, regstr
);
898 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
899 strcat(line
, "\n"); /* Hack */
900 strcat(line
, tmpline
);
903 sprintf(line
, "RCP T%c, %s.w;", 'A' + tmpreg
, regstr
);
904 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
905 strcat(line
, "\n"); /* Hack */
906 strcat(line
, tmpline
);
909 strcpy(outregstr
, regstr
);
914 /* Substitute the register name */
915 sprintf(outregstr
, "T%c", 'A' + tmpreg
);
921 inline static void pshader_program_get_registers_used(
922 IWineD3DPixelShaderImpl
*This
,
923 CONST DWORD
* pToken
, DWORD
* tempsUsed
, DWORD
* texUsed
) {
931 while (D3DVS_END() != *pToken
) {
932 CONST SHADER_OPCODE
* curOpcode
;
935 if (pshader_is_version_token(*pToken
)) {
940 } else if (pshader_is_comment_token(*pToken
)) {
941 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
943 pToken
+= comment_len
;
948 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
951 /* Skip declarations (for now) */
952 if (D3DSIO_DCL
== curOpcode
->opcode
) {
953 pToken
+= curOpcode
->num_params
;
956 /* Skip definitions (for now) */
957 } else if (D3DSIO_DEF
== curOpcode
->opcode
) {
958 pToken
+= curOpcode
->num_params
;
961 /* Set texture registers, and temporary registers */
965 for (i
= 0; i
< curOpcode
->num_params
; ++i
) {
966 DWORD regtype
= shader_get_regtype(*pToken
);
967 DWORD reg
= (*pToken
) & D3DSP_REGNUM_MASK
;
968 if (D3DSPR_TEXTURE
== regtype
)
969 *texUsed
|= (1 << reg
);
970 if (D3DSPR_TEMP
== regtype
)
971 *tempsUsed
|= (1 << reg
);
978 void pshader_set_version(
979 IWineD3DPixelShaderImpl
*This
,
982 DWORD major
= (version
>> 8) & 0x0F;
983 DWORD minor
= version
& 0x0F;
985 This
->baseShader
.hex_version
= version
;
986 This
->baseShader
.version
= major
* 10 + minor
;
987 TRACE("ps_%lu_%lu\n", major
, minor
);
989 This
->baseShader
.limits
.address
= 0;
991 switch (This
->baseShader
.version
) {
995 case 13: This
->baseShader
.limits
.temporary
= 2;
996 This
->baseShader
.limits
.constant_float
= 8;
997 This
->baseShader
.limits
.constant_int
= 0;
998 This
->baseShader
.limits
.constant_bool
= 0;
999 This
->baseShader
.limits
.texture
= 4;
1002 case 14: This
->baseShader
.limits
.temporary
= 6;
1003 This
->baseShader
.limits
.constant_float
= 8;
1004 This
->baseShader
.limits
.constant_int
= 0;
1005 This
->baseShader
.limits
.constant_bool
= 0;
1006 This
->baseShader
.limits
.texture
= 6;
1009 /* FIXME: temporaries must match D3DPSHADERCAPS2_0.NumTemps */
1010 case 20: This
->baseShader
.limits
.temporary
= 32;
1011 This
->baseShader
.limits
.constant_float
= 32;
1012 This
->baseShader
.limits
.constant_int
= 16;
1013 This
->baseShader
.limits
.constant_bool
= 16;
1014 This
->baseShader
.limits
.texture
= 8;
1017 case 30: This
->baseShader
.limits
.temporary
= 32;
1018 This
->baseShader
.limits
.constant_float
= 224;
1019 This
->baseShader
.limits
.constant_int
= 16;
1020 This
->baseShader
.limits
.constant_bool
= 16;
1021 This
->baseShader
.limits
.texture
= 0;
1024 default: This
->baseShader
.limits
.temporary
= 32;
1025 This
->baseShader
.limits
.constant_float
= 8;
1026 This
->baseShader
.limits
.constant_int
= 0;
1027 This
->baseShader
.limits
.constant_bool
= 0;
1028 This
->baseShader
.limits
.texture
= 8;
1029 FIXME("Unrecognized pixel shader version %lu!\n", version
);
1033 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
1034 inline static VOID
IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
1035 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
1036 const DWORD
*pToken
= pFunction
;
1037 const SHADER_OPCODE
*curOpcode
= NULL
;
1038 const DWORD
*pInstr
;
1041 #if 0 /* TODO: loop register (just another address register ) */
1042 BOOL hasLoops
= FALSE
;
1044 SHADER_BUFFER buffer
;
1046 BOOL saturate
; /* clamp to 0.0 -> 1.0*/
1047 int row
= 0; /* not sure, something to do with macros? */
1049 int version
= This
->baseShader
.version
;
1051 /* Keep bitmaps of used temporary and texture registers */
1052 DWORD tempsUsed
, texUsed
;
1054 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
1055 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
1056 if (This
->device
->fixupVertexBufferSize
< SHADER_PGMSIZE
) {
1057 HeapFree(GetProcessHeap(), 0, This
->fixupVertexBuffer
);
1058 This
->fixupVertexBuffer
= HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE
);
1059 This
->fixupVertexBufferSize
= SHADER_PGMSIZE
;
1060 This
->fixupVertexBuffer
[0] = 0;
1062 buffer
.buffer
= This
->device
->fixupVertexBuffer
;
1064 buffer
.buffer
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, SHADER_PGMSIZE
);
1069 /* FIXME: if jumps are used, use GLSL, else use ARB_fragment_program */
1070 shader_addline(&buffer
, "!!ARBfp1.0\n");
1072 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1073 for(i
= 0; i
< WINED3D_PSHADER_MAX_CONSTANTS
; i
++)
1074 This
->constants
[i
] = 0;
1076 /* First pass: figure out which temporary and texture registers are used */
1077 pshader_program_get_registers_used(This
, pToken
, &tempsUsed
, &texUsed
);
1078 TRACE("Texture registers used: %#lx, Temp registers used %#lx\n", texUsed
, tempsUsed
);
1080 /* TODO: check register usage against GL/Directx limits, and fail if they're exceeded */
1082 /* Pre-declare registers */
1083 for(i
= 0; i
< This
->baseShader
.limits
.texture
; i
++) {
1084 if (texUsed
& (1 << i
))
1085 shader_addline(&buffer
,"TEMP T%lu;\n", i
);
1088 for(i
= 0; i
< This
->baseShader
.limits
.temporary
; i
++) {
1089 if (tempsUsed
& (1 << i
))
1090 shader_addline(&buffer
, "TEMP R%lu;\n", i
);
1093 /* Necessary for internal operations */
1094 shader_addline(&buffer
, "TEMP TMP;\n");
1095 shader_addline(&buffer
, "TEMP TMP2;\n");
1096 shader_addline(&buffer
, "TEMP TA;\n");
1097 shader_addline(&buffer
, "TEMP TB;\n");
1098 shader_addline(&buffer
, "TEMP TC;\n");
1099 shader_addline(&buffer
, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1100 shader_addline(&buffer
, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1101 shader_addline(&buffer
, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1103 /* Texture coordinate registers must be pre-loaded */
1104 for (i
= 0; i
< This
->baseShader
.limits
.texture
; i
++) {
1105 if (texUsed
& (1 << i
))
1106 shader_addline(&buffer
, "MOV T%lu, fragment.texcoord[%lu];\n", i
, i
);
1109 /* Second pass, process opcodes */
1110 if (NULL
!= pToken
) {
1111 while (D3DPS_END() != *pToken
) {
1112 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1114 instructionSize
= pToken
& SIZEBITS
>> 27;
1118 /* Skip version token */
1119 if (pshader_is_version_token(*pToken
)) {
1124 /* Skip comment tokens */
1125 if (pshader_is_comment_token(*pToken
)) {
1126 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1128 TRACE("#%s\n", (char*)pToken
);
1129 pToken
+= comment_len
;
1133 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1137 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
1139 if (NULL
== curOpcode
) {
1140 /* unknown current opcode ... (shouldn't be any!) */
1141 while (*pToken
& 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1142 FIXME("unrecognized opcode: %08lx\n", *pToken
);
1145 } else if (GLNAME_REQUIRE_GLSL
== curOpcode
->glname
) {
1146 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1147 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode
->name
);
1148 pToken
+= curOpcode
->num_params
;
1150 } else if (D3DSIO_DEF
== curOpcode
->opcode
) {
1152 /* Handle definitions here, they don't fit well with the
1153 * other instructions below [for now ] */
1155 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1157 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1158 curOpcode
->name
, curOpcode
->glname
, curOpcode
->num_params
);
1160 shader_addline(&buffer
,
1161 "PARAM C%lu = { %f, %f, %f, %f };\n", reg
,
1162 *((const float *)(pToken
+ 1)),
1163 *((const float *)(pToken
+ 2)),
1164 *((const float *)(pToken
+ 3)),
1165 *((const float *)(pToken
+ 4)) );
1167 This
->constants
[reg
] = 1;
1173 /* Common processing: [inst] [dst]* [src]* */
1175 char output_rname
[256];
1176 char output_wmask
[20];
1178 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1179 curOpcode
->name
, curOpcode
->glname
, curOpcode
->num_params
);
1183 /* Build opcode for GL vertex_program */
1184 switch (curOpcode
->opcode
) {
1210 case D3DSIO_TEXKILL
:
1211 TRACE("Appending glname %s to tmpLine\n", curOpcode
->glname
);
1212 strcpy(tmpLine
, curOpcode
->glname
);
1218 char reg_coord_swz
[20] = "";
1219 DWORD reg_dest_code
;
1220 DWORD reg_sampler_code
;
1222 /* All versions have a destination register */
1223 reg_dest_code
= *pToken
& D3DSP_REGNUM_MASK
;
1224 get_register_name(*pToken
++, reg_dest
, This
->constants
);
1226 /* 1.0-1.3: Use destination register as coordinate source. No modifiers.
1227 1.4: Use provided coordinate source register. _dw, _dz, swizzle allowed.
1228 2.0+: Use provided coordinate source register. No modifiers.
1229 3.0+: Use provided coordinate source register. Swizzle allowed */
1231 strcpy(reg_coord
, reg_dest
);
1233 else if (version
== 14) {
1234 if (gen_input_modifier_line(*pToken
, 0, reg_coord
, tmpLine
, This
->constants
))
1235 shader_addline(&buffer
, tmpLine
);
1236 get_input_register_swizzle(*pToken
, reg_coord_swz
);
1239 else if (version
> 14 && version
< 30) {
1240 get_register_name(*pToken
, reg_coord
, This
->constants
);
1243 else if (version
>= 30) {
1244 get_input_register_swizzle(*pToken
, reg_coord_swz
);
1245 get_register_name(*pToken
, reg_coord
, This
->constants
);
1249 /* 1.0-1.4: Use destination register number as texture code.
1250 2.0+: Use provided sampler number as texure code. */
1252 reg_sampler_code
= reg_dest_code
;
1255 reg_sampler_code
= *pToken
& D3DSP_REGNUM_MASK
;
1259 shader_addline(&buffer
, "TEX %s, %s%s, texture[%lu], 2D;\n",
1260 reg_dest
, reg_coord
, reg_coord_swz
, reg_sampler_code
);
1264 case D3DSIO_TEXCOORD
:
1267 get_write_mask(*pToken
, tmp
);
1268 if (version
!= 14) {
1269 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1270 shader_addline(&buffer
, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg
, tmp
, reg
);
1273 DWORD reg1
= *pToken
& D3DSP_REGNUM_MASK
;
1274 DWORD reg2
= *++pToken
& D3DSP_REGNUM_MASK
;
1275 shader_addline(&buffer
, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1
, tmp
, reg2
);
1281 case D3DSIO_TEXM3x2PAD
:
1283 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1285 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
))
1286 shader_addline(&buffer
, tmpLine
);
1287 shader_addline(&buffer
, "DP3 TMP.x, T%lu, %s;\n", reg
, buf
);
1292 case D3DSIO_TEXM3x2TEX
:
1294 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1296 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
))
1297 shader_addline(&buffer
, tmpLine
);
1298 shader_addline(&buffer
, "DP3 TMP.y, T%lu, %s;\n", reg
, buf
);
1299 shader_addline(&buffer
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg
, reg
);
1304 case D3DSIO_TEXREG2AR
:
1306 DWORD reg1
= *pToken
& D3DSP_REGNUM_MASK
;
1307 DWORD reg2
= *++pToken
& D3DSP_REGNUM_MASK
;
1308 shader_addline(&buffer
, "MOV TMP.r, T%lu.a;\n", reg2
);
1309 shader_addline(&buffer
, "MOV TMP.g, T%lu.r;\n", reg2
);
1310 shader_addline(&buffer
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1315 case D3DSIO_TEXREG2GB
:
1317 DWORD reg1
= *pToken
& D3DSP_REGNUM_MASK
;
1318 DWORD reg2
= *++pToken
& D3DSP_REGNUM_MASK
;
1319 shader_addline(&buffer
, "MOV TMP.r, T%lu.g;\n", reg2
);
1320 shader_addline(&buffer
, "MOV TMP.g, T%lu.b;\n", reg2
);
1321 shader_addline(&buffer
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1328 DWORD reg1
= *pToken
& D3DSP_REGNUM_MASK
;
1329 DWORD reg2
= *++pToken
& D3DSP_REGNUM_MASK
;
1331 /* FIXME: Should apply the BUMPMAPENV matrix */
1332 shader_addline(&buffer
, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1
, reg2
);
1333 shader_addline(&buffer
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1338 case D3DSIO_TEXM3x3PAD
:
1340 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1342 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
))
1343 shader_addline(&buffer
, tmpLine
);
1344 shader_addline(&buffer
, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row
, reg
, buf
);
1350 case D3DSIO_TEXM3x3TEX
:
1352 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1354 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
))
1355 shader_addline(&buffer
, tmpLine
);
1356 shader_addline(&buffer
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1358 /* Cubemap textures will be more used than 3D ones. */
1359 shader_addline(&buffer
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1364 case D3DSIO_TEXM3x3VSPEC
:
1366 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1368 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
))
1369 shader_addline(&buffer
, tmpLine
);
1370 shader_addline(&buffer
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1372 /* Construct the eye-ray vector from w coordinates */
1373 shader_addline(&buffer
, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw
[0]);
1374 shader_addline(&buffer
, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw
[1]);
1375 shader_addline(&buffer
, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg
);
1377 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1378 shader_addline(&buffer
, "DP3 TMP.w, TMP, TMP2;\n");
1379 shader_addline(&buffer
, "MUL TMP, TMP.w, TMP;\n");
1380 shader_addline(&buffer
, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1382 /* Cubemap textures will be more used than 3D ones. */
1383 shader_addline(&buffer
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1389 case D3DSIO_TEXM3x3SPEC
:
1391 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1392 DWORD reg3
= *(pToken
+ 2) & D3DSP_REGNUM_MASK
;
1394 if (gen_input_modifier_line(*(pToken
+ 1), 0, buf
, tmpLine
, This
->constants
))
1395 shader_addline(&buffer
, tmpLine
);
1396 shader_addline(&buffer
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1398 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1399 shader_addline(&buffer
, "DP3 TMP.w, TMP, C[%lu];\n", reg3
);
1400 shader_addline(&buffer
, "MUL TMP, TMP.w, TMP;\n");
1401 shader_addline(&buffer
, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3
);
1403 /* Cubemap textures will be more used than 3D ones. */
1404 shader_addline(&buffer
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1412 if (curOpcode
->glname
== GLNAME_REQUIRE_GLSL
) {
1413 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode
->name
);
1415 FIXME("Can't handle opcode %s in hwShader\n", curOpcode
->name
);
1417 pToken
+= curOpcode
->num_params
;
1421 /* Process modifiers */
1422 if (0 != (*pToken
& D3DSP_DSTMOD_MASK
)) {
1423 DWORD mask
= *pToken
& D3DSP_DSTMOD_MASK
;
1425 case D3DSPDM_SATURATE
: saturate
= TRUE
; break;
1426 #if 0 /* as yet unhandled modifiers */
1427 case D3DSPDM_CENTROID
: centroid
= TRUE
; break;
1428 case D3DSPDM_PP
: partialpresision
= TRUE
; break;
1431 TRACE("_unhandled_modifier(0x%08lx)\n", mask
);
1434 shift
= (*pToken
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
1436 /* Generate input and output registers */
1437 if (curOpcode
->num_params
> 0) {
1439 char operands
[4][100];
1443 /* Generate lines that handle input modifier computation */
1444 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1445 TRACE("(%p) : Param %ld token %lx\n", This
, i
, *(pToken
+ i
));
1446 if (gen_input_modifier_line(*(pToken
+ i
), i
- 1, regs
[i
- 1], tmpOp
, This
->constants
)) {
1447 shader_addline(&buffer
, tmpOp
);
1451 /* Handle output register */
1452 get_register_name(*pToken
, output_rname
, This
->constants
);
1453 strcpy(operands
[0], output_rname
);
1454 get_write_mask(*pToken
, output_wmask
);
1455 strcat(operands
[0], output_wmask
);
1457 /* This function works because of side effects from gen_input_modifier_line */
1458 /* Handle input registers */
1459 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1460 TRACE("(%p) : Regs = %s\n", This
, regs
[i
- 1]);
1461 strcpy(operands
[i
], regs
[i
- 1]);
1462 get_input_register_swizzle(*(pToken
+ i
), swzstring
);
1463 strcat(operands
[i
], swzstring
);
1466 switch(curOpcode
->opcode
) {
1468 sprintf(tmpLine
, "CMP%s %s, %s, %s, %s;\n", (saturate
? "_SAT" : ""),
1469 operands
[0], operands
[1], operands
[3], operands
[2]);
1472 shader_addline(&buffer
, "ADD TMP, -%s, coefdiv.x;\n", operands
[1]);
1473 sprintf(tmpLine
, "CMP%s %s, TMP, %s, %s;\n", (saturate
? "_SAT" : ""),
1474 operands
[0], operands
[2], operands
[3]);
1477 if (saturate
&& (shift
== 0))
1478 strcat(tmpLine
, "_SAT");
1479 strcat(tmpLine
, " ");
1480 strcat(tmpLine
, operands
[0]);
1481 for (i
= 1; i
< curOpcode
->num_params
; i
++) {
1482 strcat(tmpLine
, ", ");
1483 strcat(tmpLine
, operands
[i
]);
1485 strcat(tmpLine
,";\n");
1487 shader_addline(&buffer
, tmpLine
);
1489 /* A shift requires another line. */
1491 gen_output_modifier_line(saturate
, output_wmask
, shift
, output_rname
, tmpLine
);
1492 shader_addline(&buffer
, tmpLine
);
1494 pToken
+= curOpcode
->num_params
;
1498 /* TODO: What about result.depth? */
1499 shader_addline(&buffer
, "MOV result.color, R0;\n");
1500 shader_addline(&buffer
, "END\n");
1503 /* finally null terminate the buffer */
1504 buffer
.buffer
[buffer
.bsize
] = 0;
1505 if (GL_SUPPORT(ARB_VERTEX_PROGRAM
)) {
1506 /* Create the hw shader */
1508 /* The program string sometimes gets too long for a normal TRACE */
1509 TRACE("Generated program:\n");
1510 if (TRACE_ON(d3d_shader
)) {
1511 fprintf(stderr
, "%s\n", buffer
.buffer
);
1514 /* TODO: change to resource.glObjectHandel or something like that */
1515 GL_EXTCALL(glGenProgramsARB(1, &This
->baseShader
.prgId
));
1517 TRACE("Creating a hw pixel shader, prg=%d\n", This
->baseShader
.prgId
);
1518 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB
, This
->baseShader
.prgId
));
1520 TRACE("Created hw pixel shader, prg=%d\n", This
->baseShader
.prgId
);
1521 /* Create the program and check for errors */
1522 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB
, GL_PROGRAM_FORMAT_ASCII_ARB
,
1523 buffer
.bsize
, buffer
.buffer
));
1525 if (glGetError() == GL_INVALID_OPERATION
) {
1527 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB
, &errPos
);
1528 FIXME("HW PixelShader Error at position %d: %s\n",
1529 errPos
, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB
)));
1530 This
->baseShader
.prgId
= -1;
1533 #if 1 /* if were using the data buffer of device then we don't need to free it */
1534 HeapFree(GetProcessHeap(), 0, buffer
.buffer
);
1538 inline static void pshader_program_dump_ins_modifiers(const DWORD output
) {
1540 DWORD shift
= (output
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
1541 DWORD mmask
= output
& D3DSP_DSTMOD_MASK
;
1545 case 13: TRACE("_d8"); break;
1546 case 14: TRACE("_d4"); break;
1547 case 15: TRACE("_d2"); break;
1548 case 1: TRACE("_x2"); break;
1549 case 2: TRACE("_x4"); break;
1550 case 3: TRACE("_x8"); break;
1551 default: TRACE("_unhandled_shift(%ld)", shift
); break;
1555 case D3DSPDM_NONE
: break;
1556 case D3DSPDM_SATURATE
: TRACE("_sat"); break;
1557 case D3DSPDM_PARTIALPRECISION
: TRACE("_pp"); break;
1558 case D3DSPDM_MSAMPCENTROID
: TRACE("_centroid"); break;
1559 default: TRACE("_unhandled_modifier(%#lx)", mmask
); break;
1563 inline static void pshader_program_dump_ps_param(const DWORD param
, int input
) {
1564 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1565 static const char swizzle_reg_chars
[] = "rgba";
1567 DWORD reg
= param
& D3DSP_REGNUM_MASK
;
1568 DWORD regtype
= shader_get_regtype(param
);
1571 if ( ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
) ||
1572 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_BIASNEG
) ||
1573 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_SIGNNEG
) ||
1574 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_X2NEG
) )
1576 else if ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_COMP
)
1588 TRACE("c%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1591 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
1594 case D3DSPR_RASTOUT
:
1595 TRACE("%s", rastout_reg_names
[reg
]);
1597 case D3DSPR_ATTROUT
:
1598 TRACE("oD%lu", reg
);
1600 case D3DSPR_TEXCRDOUT
:
1601 TRACE("oT%lu", reg
);
1603 case D3DSPR_CONSTINT
:
1604 TRACE("i%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1606 case D3DSPR_CONSTBOOL
:
1607 TRACE("b%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1613 TRACE("aL%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1615 case D3DSPR_SAMPLER
:
1619 TRACE("unhandled_rtype(%lx)", regtype
);
1624 /* operand output (for modifiers and shift, see dump_ins_modifiers) */
1626 if ((param
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
1628 if (param
& D3DSP_WRITEMASK_0
) TRACE(".r");
1629 if (param
& D3DSP_WRITEMASK_1
) TRACE(".g");
1630 if (param
& D3DSP_WRITEMASK_2
) TRACE(".b");
1631 if (param
& D3DSP_WRITEMASK_3
) TRACE(".a");
1634 /** operand input */
1635 DWORD swizzle
= (param
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
1636 DWORD swizzle_r
= swizzle
& 0x03;
1637 DWORD swizzle_g
= (swizzle
>> 2) & 0x03;
1638 DWORD swizzle_b
= (swizzle
>> 4) & 0x03;
1639 DWORD swizzle_a
= (swizzle
>> 6) & 0x03;
1641 if (0 != (param
& D3DSP_SRCMOD_MASK
)) {
1642 DWORD mask
= param
& D3DSP_SRCMOD_MASK
;
1643 /*TRACE("_modifier(0x%08lx) ", mask);*/
1645 case D3DSPSM_NONE
: break;
1646 case D3DSPSM_NEG
: break;
1647 case D3DSPSM_BIAS
: TRACE("_bias"); break;
1648 case D3DSPSM_BIASNEG
: TRACE("_bias"); break;
1649 case D3DSPSM_SIGN
: TRACE("_bx2"); break;
1650 case D3DSPSM_SIGNNEG
: TRACE("_bx2"); break;
1651 case D3DSPSM_COMP
: break;
1652 case D3DSPSM_X2
: TRACE("_x2"); break;
1653 case D3DSPSM_X2NEG
: TRACE("_x2"); break;
1654 case D3DSPSM_DZ
: TRACE("_dz"); break;
1655 case D3DSPSM_DW
: TRACE("_dw"); break;
1657 TRACE("_unknown(0x%08lx)", mask
);
1662 * swizzle bits fields:
1665 if ((D3DVS_NOSWIZZLE
>> D3DVS_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1666 if (swizzle_r
== swizzle_g
&&
1667 swizzle_r
== swizzle_b
&&
1668 swizzle_r
== swizzle_a
) {
1669 TRACE(".%c", swizzle_reg_chars
[swizzle_r
]);
1672 swizzle_reg_chars
[swizzle_r
],
1673 swizzle_reg_chars
[swizzle_g
],
1674 swizzle_reg_chars
[swizzle_b
],
1675 swizzle_reg_chars
[swizzle_a
]);
1681 inline static void pshader_program_dump_decl_usage(
1682 IWineD3DPixelShaderImpl
*This
, DWORD decl
, DWORD param
) {
1684 DWORD regtype
= shader_get_regtype(param
);
1687 if (regtype
== D3DSPR_SAMPLER
) {
1688 DWORD ttype
= decl
& D3DSP_TEXTURETYPE_MASK
;
1691 case D3DSTT_2D
: TRACE("2d "); break;
1692 case D3DSTT_CUBE
: TRACE("cube "); break;
1693 case D3DSTT_VOLUME
: TRACE("volume "); break;
1694 default: TRACE("unknown_ttype(%08lx) ", ttype
);
1699 DWORD usage
= decl
& D3DSP_DCL_USAGE_MASK
;
1700 DWORD idx
= (decl
& D3DSP_DCL_USAGEINDEX_MASK
) >> D3DSP_DCL_USAGEINDEX_SHIFT
;
1703 case D3DDECLUSAGE_POSITION
:
1704 TRACE("%s%ld ", "position", idx
);
1706 case D3DDECLUSAGE_BLENDINDICES
:
1707 TRACE("%s ", "blend");
1709 case D3DDECLUSAGE_BLENDWEIGHT
:
1710 TRACE("%s ", "weight");
1712 case D3DDECLUSAGE_NORMAL
:
1713 TRACE("%s%ld ", "normal", idx
);
1715 case D3DDECLUSAGE_PSIZE
:
1716 TRACE("%s ", "psize");
1718 case D3DDECLUSAGE_COLOR
:
1720 TRACE("%s ", "color");
1722 TRACE("%s%ld ", "specular", (idx
- 1));
1725 case D3DDECLUSAGE_TEXCOORD
:
1726 TRACE("%s%ld ", "texture", idx
);
1728 case D3DDECLUSAGE_TANGENT
:
1729 TRACE("%s ", "tangent");
1731 case D3DDECLUSAGE_BINORMAL
:
1732 TRACE("%s ", "binormal");
1734 case D3DDECLUSAGE_TESSFACTOR
:
1735 TRACE("%s ", "tessfactor");
1737 case D3DDECLUSAGE_POSITIONT
:
1738 TRACE("%s%ld ", "positionT", idx
);
1740 case D3DDECLUSAGE_FOG
:
1741 TRACE("%s ", "fog");
1743 case D3DDECLUSAGE_DEPTH
:
1744 TRACE("%s ", "depth");
1746 case D3DDECLUSAGE_SAMPLE
:
1747 TRACE("%s ", "sample");
1750 FIXME("Unrecognised dcl %08lx", usage
);
1755 HRESULT WINAPI
IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
1756 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
1757 const DWORD
* pToken
= pFunction
;
1758 const SHADER_OPCODE
*curOpcode
= NULL
;
1761 TRACE("(%p) : Parsing programme\n", This
);
1763 if (NULL
!= pToken
) {
1764 while (D3DPS_END() != *pToken
) {
1765 if (pshader_is_version_token(*pToken
)) { /** version */
1766 pshader_set_version(This
, *pToken
);
1771 if (pshader_is_comment_token(*pToken
)) { /** comment */
1772 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1774 TRACE("//%s\n", (char*)pToken
);
1775 pToken
+= comment_len
;
1776 len
+= comment_len
+ 1;
1779 if (!This
->baseShader
.version
) {
1780 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This
);
1782 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
1785 if (NULL
== curOpcode
) {
1787 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1788 while (*pToken
& 0x80000000) {
1790 /* unknown current opcode ... */
1791 TRACE("unrecognized opcode: %08lx", *pToken
);
1798 if (curOpcode
->opcode
== D3DSIO_DCL
) {
1799 pshader_program_dump_decl_usage(This
, *pToken
, *(pToken
+ 1));
1802 pshader_program_dump_ps_param(*pToken
, 0);
1806 if (curOpcode
->opcode
== D3DSIO_DEF
) {
1807 TRACE("def c%lu = ", *pToken
& 0xFF);
1810 TRACE("%f ,", *(float *)pToken
);
1813 TRACE("%f ,", *(float *)pToken
);
1816 TRACE("%f ,", *(float *)pToken
);
1819 TRACE("%f", *(float *)pToken
);
1823 TRACE("%s", curOpcode
->name
);
1824 if (curOpcode
->num_params
> 0) {
1825 pshader_program_dump_ins_modifiers(*pToken
);
1827 pshader_program_dump_ps_param(*pToken
, 0);
1830 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1832 pshader_program_dump_ps_param(*pToken
, 1);
1841 This
->baseShader
.functionLength
= (len
+ 1) * sizeof(DWORD
);
1843 This
->baseShader
.functionLength
= 1; /* no Function defined use fixed function vertex processing */
1846 /* Generate HW shader in needed */
1847 if (NULL
!= pFunction
&& wined3d_settings
.vs_mode
== VS_HW
) {
1848 TRACE("(%p) : Generating hardware program\n", This
);
1850 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface
, pFunction
);
1854 TRACE("(%p) : Copying the function\n", This
);
1855 /* copy the function ... because it will certainly be released by application */
1856 if (NULL
!= pFunction
) {
1857 This
->baseShader
.function
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, This
->baseShader
.functionLength
);
1858 memcpy((void *)This
->baseShader
.function
, pFunction
, This
->baseShader
.functionLength
);
1860 This
->baseShader
.function
= NULL
;
1863 /* TODO: Some proper return values for failures */
1864 TRACE("(%p) : Returning WINED3D_OK\n", This
);
1868 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl
=
1870 /*** IUnknown methods ***/
1871 IWineD3DPixelShaderImpl_QueryInterface
,
1872 IWineD3DPixelShaderImpl_AddRef
,
1873 IWineD3DPixelShaderImpl_Release
,
1874 /*** IWineD3DBase methods ***/
1875 IWineD3DPixelShaderImpl_GetParent
,
1876 /*** IWineD3DBaseShader methods ***/
1877 IWineD3DPixelShaderImpl_SetFunction
,
1878 /*** IWineD3DPixelShader methods ***/
1879 IWineD3DPixelShaderImpl_GetDevice
,
1880 IWineD3DPixelShaderImpl_GetFunction