2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 # define TRACE_VSVECTOR(name)
40 /* The maximum size of the program */
43 #define GLNAME_REQUIRE_GLSL ((const char *)1)
44 /* *******************************************
45 IWineD3DPixelShader IUnknown parts follow
46 ******************************************* */
47 HRESULT WINAPI
IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader
*iface
, REFIID riid
, LPVOID
*ppobj
)
49 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
50 TRACE("(%p)->(%s,%p)\n",This
,debugstr_guid(riid
),ppobj
);
51 if (IsEqualGUID(riid
, &IID_IUnknown
)
52 || IsEqualGUID(riid
, &IID_IWineD3DBase
)
53 || IsEqualGUID(riid
, &IID_IWineD3DBaseShader
)
54 || IsEqualGUID(riid
, &IID_IWineD3DPixelShader
)) {
55 IUnknown_AddRef(iface
);
63 ULONG WINAPI
IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader
*iface
) {
64 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
65 TRACE("(%p) : AddRef increasing from %ld\n", This
, This
->ref
);
66 return InterlockedIncrement(&This
->ref
);
69 ULONG WINAPI
IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader
*iface
) {
70 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
72 TRACE("(%p) : Releasing from %ld\n", This
, This
->ref
);
73 ref
= InterlockedDecrement(&This
->ref
);
75 HeapFree(GetProcessHeap(), 0, This
);
80 /* TODO: At the momeny the function parser is single pass, it achievs this
81 by passing constants to a couple of functions where they are then modified.
82 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
83 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
86 /* *******************************************
87 IWineD3DPixelShader IWineD3DPixelShader parts follow
88 ******************************************* */
90 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader
*iface
, IUnknown
** parent
){
91 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
93 *parent
= This
->parent
;
94 IUnknown_AddRef(*parent
);
95 TRACE("(%p) : returning %p\n", This
, *parent
);
99 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader
* iface
, IWineD3DDevice
**pDevice
){
100 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
101 IWineD3DDevice_AddRef((IWineD3DDevice
*)This
->wineD3DDevice
);
102 *pDevice
= (IWineD3DDevice
*)This
->wineD3DDevice
;
103 TRACE("(%p) returning %p\n", This
, *pDevice
);
108 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader
* impl
, VOID
* pData
, UINT
* pSizeOfData
) {
109 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)impl
;
110 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This
, pData
, pSizeOfData
);
113 *pSizeOfData
= This
->baseShader
.functionLength
;
116 if (*pSizeOfData
< This
->baseShader
.functionLength
) {
117 *pSizeOfData
= This
->baseShader
.functionLength
;
118 return WINED3DERR_MOREDATA
;
120 if (NULL
== This
->baseShader
.function
) { /* no function defined */
121 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This
, pData
);
122 (*(DWORD
**) pData
) = NULL
;
124 if (This
->baseShader
.functionLength
== 0) {
127 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
128 memcpy(pData
, This
->baseShader
.function
, This
->baseShader
.functionLength
);
133 /*******************************
134 * pshader functions software VM
137 void pshader_add(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
138 d
->x
= s0
->x
+ s1
->x
;
139 d
->y
= s0
->y
+ s1
->y
;
140 d
->z
= s0
->z
+ s1
->z
;
141 d
->w
= s0
->w
+ s1
->w
;
142 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
143 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
146 void pshader_dp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
147 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
;
148 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
149 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
152 void pshader_dp4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
153 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
+ s0
->w
* s1
->w
;
154 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
155 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
158 void pshader_dst(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
160 d
->y
= s0
->y
* s1
->y
;
163 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
164 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
167 void pshader_expp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
173 tmp
.f
= floorf(s0
->w
);
174 d
->x
= powf(2.0f
, tmp
.f
);
175 d
->y
= s0
->w
- tmp
.f
;
176 tmp
.f
= powf(2.0f
, s0
->w
);
177 tmp
.d
&= 0xFFFFFF00U
;
180 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
181 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
184 void pshader_logp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
185 float tmp_f
= fabsf(s0
->w
);
186 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
187 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
188 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
191 void pshader_mad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
192 d
->x
= s0
->x
* s1
->x
+ s2
->x
;
193 d
->y
= s0
->y
* s1
->y
+ s2
->y
;
194 d
->z
= s0
->z
* s1
->z
+ s2
->z
;
195 d
->w
= s0
->w
* s1
->w
+ s2
->w
;
196 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
197 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, s2
->x
, s2
->y
, s2
->z
, s2
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
200 void pshader_max(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
201 d
->x
= (s0
->x
>= s1
->x
) ? s0
->x
: s1
->x
;
202 d
->y
= (s0
->y
>= s1
->y
) ? s0
->y
: s1
->y
;
203 d
->z
= (s0
->z
>= s1
->z
) ? s0
->z
: s1
->z
;
204 d
->w
= (s0
->w
>= s1
->w
) ? s0
->w
: s1
->w
;
205 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
206 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
209 void pshader_min(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
210 d
->x
= (s0
->x
< s1
->x
) ? s0
->x
: s1
->x
;
211 d
->y
= (s0
->y
< s1
->y
) ? s0
->y
: s1
->y
;
212 d
->z
= (s0
->z
< s1
->z
) ? s0
->z
: s1
->z
;
213 d
->w
= (s0
->w
< s1
->w
) ? s0
->w
: s1
->w
;
214 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
215 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
218 void pshader_mov(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
223 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
224 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
227 void pshader_mul(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
228 d
->x
= s0
->x
* s1
->x
;
229 d
->y
= s0
->y
* s1
->y
;
230 d
->z
= s0
->z
* s1
->z
;
231 d
->w
= s0
->w
* s1
->w
;
232 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
233 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
236 void pshader_nop(void) {
237 /* NOPPPP ahhh too easy ;) */
238 PSTRACE(("executing nop\n"));
241 void pshader_rcp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
242 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== s0
->w
) ? HUGE_VAL
: 1.0f
/ s0
->w
;
243 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
244 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
247 void pshader_rsq(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
248 float tmp_f
= fabsf(s0
->w
);
249 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== tmp_f
) ? HUGE_VAL
: ((1.0f
!= tmp_f
) ? 1.0f
/ sqrtf(tmp_f
) : 1.0f
);
250 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
251 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
254 void pshader_sge(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
255 d
->x
= (s0
->x
>= s1
->x
) ? 1.0f
: 0.0f
;
256 d
->y
= (s0
->y
>= s1
->y
) ? 1.0f
: 0.0f
;
257 d
->z
= (s0
->z
>= s1
->z
) ? 1.0f
: 0.0f
;
258 d
->w
= (s0
->w
>= s1
->w
) ? 1.0f
: 0.0f
;
259 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
260 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
263 void pshader_slt(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
264 d
->x
= (s0
->x
< s1
->x
) ? 1.0f
: 0.0f
;
265 d
->y
= (s0
->y
< s1
->y
) ? 1.0f
: 0.0f
;
266 d
->z
= (s0
->z
< s1
->z
) ? 1.0f
: 0.0f
;
267 d
->w
= (s0
->w
< s1
->w
) ? 1.0f
: 0.0f
;
268 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
269 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
272 void pshader_sub(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
273 d
->x
= s0
->x
- s1
->x
;
274 d
->y
= s0
->y
- s1
->y
;
275 d
->z
= s0
->z
- s1
->z
;
276 d
->w
= s0
->w
- s1
->w
;
277 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
278 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
282 * Version 1.1 specific
285 void pshader_exp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
286 d
->x
= d
->y
= d
->z
= d
->w
= powf(2.0f
, s0
->w
);
287 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
288 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
291 void pshader_log(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
292 float tmp_f
= fabsf(s0
->w
);
293 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
294 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
295 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
298 void pshader_frc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
299 d
->x
= s0
->x
- floorf(s0
->x
);
300 d
->y
= s0
->y
- floorf(s0
->y
);
303 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
304 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
307 typedef FLOAT D3DMATRIX44
[4][4];
308 typedef FLOAT D3DMATRIX43
[4][3];
309 typedef FLOAT D3DMATRIX34
[3][4];
310 typedef FLOAT D3DMATRIX33
[3][3];
311 typedef FLOAT D3DMATRIX23
[2][3];
313 void pshader_m4x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat
) {
315 * Buggy CODE: here only if cast not work for copy/paste
316 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
317 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
318 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
319 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
320 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
321 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
322 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
324 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
325 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
326 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
327 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
+ mat
[3][3] * s0
->w
;
328 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
329 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
330 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
331 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], mat
[3][3], s0
->w
, d
->w
));
334 void pshader_m4x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX34 mat
) {
335 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
336 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
337 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
339 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
340 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
341 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
342 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0
->w
, d
->w
));
345 void pshader_m3x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX43 mat
) {
346 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
347 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
348 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
349 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
;
350 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
351 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
352 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
353 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], s0
->w
, d
->w
));
356 void pshader_m3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX33 mat
) {
357 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
358 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
359 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
361 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
362 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
363 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
364 PSTRACE(("executing m3x3(4): (%f) \n", d
->w
));
367 void pshader_m3x2(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX23 mat
) {
369 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
370 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
376 * Version 2.0 specific
378 void pshader_lrp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
379 d
->x
= s0
->x
* (s1
->x
- s2
->x
) + s2
->x
;
380 d
->y
= s0
->y
* (s1
->y
- s2
->y
) + s2
->y
;
381 d
->z
= s0
->z
* (s1
->z
- s2
->z
) + s2
->z
;
382 d
->w
= s0
->w
* (s1
->w
- s2
->w
) + s2
->w
;
385 void pshader_crs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
386 d
->x
= s0
->y
* s1
->z
- s0
->z
* s1
->y
;
387 d
->y
= s0
->z
* s1
->x
- s0
->x
* s1
->z
;
388 d
->z
= s0
->x
* s1
->y
- s0
->y
* s1
->x
;
389 d
->w
= 0.9f
; /* w is undefined, so set it to something safeish */
391 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
392 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
395 void pshader_abs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
400 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
401 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
405 void pshader_texcoord(WINED3DSHADERVECTOR
* d
) {
409 void pshader_texkill(WINED3DSHADERVECTOR
* d
) {
413 void pshader_tex(WINED3DSHADERVECTOR
* d
) {
416 void pshader_texld(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
420 void pshader_texbem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
424 void pshader_texbeml(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
428 void pshader_texreg2ar(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
432 void pshader_texreg2gb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
436 void pshader_texm3x2pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
440 void pshader_texm3x2tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
444 void pshader_texm3x3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
448 void pshader_texm3x3pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
452 void pshader_texm3x3diff(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
456 void pshader_texm3x3spec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
460 void pshader_texm3x3vspec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
464 void pshader_cnd(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
468 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
469 void pshader_def(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
, WINED3DSHADERVECTOR
* s3
) {
473 void pshader_texreg2rgb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
477 void pshader_texdp3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
481 void pshader_texm3x2depth(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
485 void pshader_texdp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
489 void pshader_texm3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
493 void pshader_texdepth(WINED3DSHADERVECTOR
* d
) {
497 void pshader_cmp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
501 void pshader_bem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
505 void pshader_call(WINED3DSHADERVECTOR
* d
) {
509 void pshader_callnz(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
513 void pshader_loop(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
517 void pshader_ret(void) {
521 void pshader_endloop(void) {
525 void pshader_dcl(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
529 void pshader_pow(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
533 void pshader_nrm(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
537 void pshader_sincos(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
541 void pshader_rep(WINED3DSHADERVECTOR
* d
) {
545 void pshader_endrep(void) {
549 void pshader_if(WINED3DSHADERVECTOR
* d
) {
553 void pshader_ifc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
557 void pshader_else(void) {
561 void pshader_label(WINED3DSHADERVECTOR
* d
) {
565 void pshader_endif(void) {
569 void pshader_break(void) {
573 void pshader_breakc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
577 void pshader_breakp(WINED3DSHADERVECTOR
* d
) {
581 void pshader_defb(WINED3DSHADERVECTOR
* d
) {
585 void pshader_defi(WINED3DSHADERVECTOR
* d
) {
589 void pshader_dp2add(WINED3DSHADERVECTOR
* d
) {
593 void pshader_dsx(WINED3DSHADERVECTOR
* d
) {
597 void pshader_dsy(WINED3DSHADERVECTOR
* d
) {
601 void pshader_texldd(WINED3DSHADERVECTOR
* d
) {
605 void pshader_setp(WINED3DSHADERVECTOR
* d
) {
609 void pshader_texldl(WINED3DSHADERVECTOR
* d
) {
614 * log, exp, frc, m*x* seems to be macros ins ... to see
616 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins
[] = {
617 {D3DSIO_NOP
, "nop", "NOP", 0, pshader_nop
, 0, 0},
618 {D3DSIO_MOV
, "mov", "MOV", 2, pshader_mov
, 0, 0},
619 {D3DSIO_ADD
, "add", "ADD", 3, pshader_add
, 0, 0},
620 {D3DSIO_SUB
, "sub", "SUB", 3, pshader_sub
, 0, 0},
621 {D3DSIO_MAD
, "mad", "MAD", 4, pshader_mad
, 0, 0},
622 {D3DSIO_MUL
, "mul", "MUL", 3, pshader_mul
, 0, 0},
623 {D3DSIO_RCP
, "rcp", "RCP", 2, pshader_rcp
, 0, 0},
624 {D3DSIO_RSQ
, "rsq", "RSQ", 2, pshader_rsq
, 0, 0},
625 {D3DSIO_DP3
, "dp3", "DP3", 3, pshader_dp3
, 0, 0},
626 {D3DSIO_DP4
, "dp4", "DP4", 3, pshader_dp4
, 0, 0},
627 {D3DSIO_MIN
, "min", "MIN", 3, pshader_min
, 0, 0},
628 {D3DSIO_MAX
, "max", "MAX", 3, pshader_max
, 0, 0},
629 {D3DSIO_SLT
, "slt", "SLT", 3, pshader_slt
, 0, 0},
630 {D3DSIO_SGE
, "sge", "SGE", 3, pshader_sge
, 0, 0},
631 {D3DSIO_ABS
, "abs", "ABS", 2, pshader_abs
, 0, 0},
632 {D3DSIO_EXP
, "exp", "EX2", 2, pshader_exp
, 0, 0},
633 {D3DSIO_LOG
, "log", "LG2", 2, pshader_log
, 0, 0},
634 {D3DSIO_DST
, "dst", "DST", 3, pshader_dst
, 0, 0},
635 {D3DSIO_LRP
, "lrp", "LRP", 4, pshader_lrp
, 0, 0},
636 {D3DSIO_FRC
, "frc", "FRC", 2, pshader_frc
, 0, 0},
637 {D3DSIO_M4x4
, "m4x4", "undefined", 3, pshader_m4x4
, 0, 0},
638 {D3DSIO_M4x3
, "m4x3", "undefined", 3, pshader_m4x3
, 0, 0},
639 {D3DSIO_M3x4
, "m3x4", "undefined", 3, pshader_m3x4
, 0, 0},
640 {D3DSIO_M3x3
, "m3x3", "undefined", 3, pshader_m3x3
, 0, 0},
641 {D3DSIO_M3x2
, "m3x2", "undefined", 3, pshader_m3x2
, 0, 0},
644 /** FIXME: use direct access so add the others opcodes as stubs */
645 /* DCL is a specil operation */
646 {D3DSIO_DCL
, "dcl", NULL
, 2, pshader_dcl
, 0, 0},
647 {D3DSIO_POW
, "pow", "POW", 3, pshader_pow
, 0, 0},
648 {D3DSIO_CRS
, "crs", "XPS", 3, pshader_crs
, 0, 0},
649 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
652 MUL vec.xyz, vec, tmp;
653 but I think this is better because it accounts for w properly.
659 {D3DSIO_NRM
, "nrm", NULL
, 2, pshader_nrm
, 0, 0},
660 {D3DSIO_SINCOS
, "sincos", NULL
, 2, pshader_sincos
, 0, 0},
662 /* Flow control - requires GLSL or software shaders */
663 {D3DSIO_REP
, "rep", GLNAME_REQUIRE_GLSL
, 1, pshader_rep
, 0, 0},
664 {D3DSIO_ENDREP
, "endrep", GLNAME_REQUIRE_GLSL
, 0, pshader_endrep
, 0, 0},
665 {D3DSIO_IF
, "if", GLNAME_REQUIRE_GLSL
, 1, pshader_if
, 0, 0},
666 {D3DSIO_IFC
, "ifc", GLNAME_REQUIRE_GLSL
, 2, pshader_ifc
, 0, 0},
667 {D3DSIO_ELSE
, "else", GLNAME_REQUIRE_GLSL
, 0, pshader_else
, 0, 0},
668 {D3DSIO_ENDIF
, "endif", GLNAME_REQUIRE_GLSL
, 0, pshader_endif
, 0, 0},
669 {D3DSIO_BREAK
, "break", GLNAME_REQUIRE_GLSL
, 0, pshader_break
, 0, 0},
670 {D3DSIO_BREAKC
, "breakc", GLNAME_REQUIRE_GLSL
, 2, pshader_breakc
, 0, 0},
671 {D3DSIO_BREAKP
, "breakp", GLNAME_REQUIRE_GLSL
, 1, pshader_breakp
, 0, 0},
672 {D3DSIO_CALL
, "call", GLNAME_REQUIRE_GLSL
, 1, pshader_call
, 0, 0},
673 {D3DSIO_CALLNZ
, "callnz", GLNAME_REQUIRE_GLSL
, 2, pshader_callnz
, 0, 0},
674 {D3DSIO_LOOP
, "loop", GLNAME_REQUIRE_GLSL
, 2, pshader_loop
, 0, 0},
675 {D3DSIO_RET
, "ret", GLNAME_REQUIRE_GLSL
, 0, pshader_ret
, 0, 0},
676 {D3DSIO_ENDLOOP
, "endloop", GLNAME_REQUIRE_GLSL
, 0, pshader_endloop
, 0, 0},
677 {D3DSIO_LABEL
, "label", GLNAME_REQUIRE_GLSL
, 1, pshader_label
, 0, 0},
679 {D3DSIO_DEFB
, "defb", GLNAME_REQUIRE_GLSL
, 2, pshader_defb
, 0, 0},
680 {D3DSIO_DEFI
, "defi", GLNAME_REQUIRE_GLSL
, 2, pshader_defi
, 0, 0},
682 {D3DSIO_TEXCOORD
, "texcoord", "undefined", 1, pshader_texcoord
, 0, D3DPS_VERSION(1,3)},
683 {D3DSIO_TEXCOORD
, "texcrd", "undefined", 2, pshader_texcoord
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
684 {D3DSIO_TEXKILL
, "texkill", "KIL", 1, pshader_texkill
, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
685 {D3DSIO_TEX
, "tex", "undefined", 1, pshader_tex
, 0, D3DPS_VERSION(1,3)},
686 {D3DSIO_TEX
, "texld", "undefined", 2, pshader_texld
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
687 {D3DSIO_TEX
, "texld", "undefined", 3, pshader_texld
, D3DPS_VERSION(2,0), -1},
688 {D3DSIO_TEXBEM
, "texbem", "undefined", 2, pshader_texbem
, 0, D3DPS_VERSION(1,3)},
689 {D3DSIO_TEXBEML
, "texbeml", GLNAME_REQUIRE_GLSL
, 2, pshader_texbeml
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
690 {D3DSIO_TEXREG2AR
,"texreg2ar","undefined", 2, pshader_texreg2ar
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
691 {D3DSIO_TEXREG2GB
,"texreg2gb","undefined", 2, pshader_texreg2gb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
692 {D3DSIO_TEXM3x2PAD
, "texm3x2pad", "undefined", 2, pshader_texm3x2pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
693 {D3DSIO_TEXM3x2TEX
, "texm3x2tex", "undefined", 2, pshader_texm3x2tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
694 {D3DSIO_TEXM3x3PAD
, "texm3x3pad", "undefined", 2, pshader_texm3x3pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
695 {D3DSIO_TEXM3x3DIFF
, "texm3x3diff", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3diff
, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
696 {D3DSIO_TEXM3x3SPEC
, "texm3x3spec", "undefined", 3, pshader_texm3x3spec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
697 {D3DSIO_TEXM3x3VSPEC
, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
698 {D3DSIO_TEXM3x3TEX
, "texm3x3tex", "undefined", 2, pshader_texm3x3tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
699 {D3DSIO_EXPP
, "expp", "EXP", 2, pshader_expp
, 0, 0},
700 {D3DSIO_LOGP
, "logp", "LOG", 2, pshader_logp
, 0, 0},
701 {D3DSIO_CND
, "cnd", GLNAME_REQUIRE_GLSL
, 4, pshader_cnd
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
702 /* def is a special operation */
703 {D3DSIO_DEF
, "def", "undefined", 5, pshader_def
, 0, 0},
704 {D3DSIO_TEXREG2RGB
, "texreg2rgb", GLNAME_REQUIRE_GLSL
, 2, pshader_texreg2rgb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
705 {D3DSIO_TEXDP3TEX
, "texdp3tex", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3tex
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
706 {D3DSIO_TEXM3x2DEPTH
, "texm3x2depth", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x2depth
,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
707 {D3DSIO_TEXDP3
, "texdp3", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
708 {D3DSIO_TEXM3x3
, "texm3x3", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
709 {D3DSIO_TEXDEPTH
, "texdepth", GLNAME_REQUIRE_GLSL
,1, pshader_texdepth
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
710 {D3DSIO_CMP
, "cmp", GLNAME_REQUIRE_GLSL
, 4, pshader_cmp
, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
711 {D3DSIO_BEM
, "bem", GLNAME_REQUIRE_GLSL
, 3, pshader_bem
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
712 /* TODO: dp2add can be made out of multiple instuctions */
713 {D3DSIO_DP2ADD
, "dp2add", GLNAME_REQUIRE_GLSL
, 2, pshader_dp2add
, 0, 0},
714 {D3DSIO_DSX
, "dsx", GLNAME_REQUIRE_GLSL
, 2, pshader_dsx
, 0, 0},
715 {D3DSIO_DSY
, "dsy", GLNAME_REQUIRE_GLSL
, 2, pshader_dsy
, 0, 0},
716 {D3DSIO_TEXLDD
, "texldd", GLNAME_REQUIRE_GLSL
, 2, pshader_texldd
, 0, 0},
717 {D3DSIO_SETP
, "setp", GLNAME_REQUIRE_GLSL
, 2, pshader_setp
, 0, 0},
718 {D3DSIO_TEXLDL
, "texdl", GLNAME_REQUIRE_GLSL
, 2, pshader_texldl
, 0, 0},
719 {D3DSIO_PHASE
, "phase", GLNAME_REQUIRE_GLSL
, 0, pshader_nop
, 0, 0},
720 {0, NULL
, NULL
, 0, NULL
, 0, 0}
723 inline static const SHADER_OPCODE
* pshader_program_get_opcode(IWineD3DPixelShaderImpl
*This
, const DWORD code
) {
725 DWORD version
= This
->baseShader
.version
;
726 DWORD hex_version
= D3DPS_VERSION(version
/10, version
%10);
727 const SHADER_OPCODE
*shader_ins
= This
->baseShader
.shader_ins
;
729 /** TODO: use dichotomic search */
730 while (NULL
!= shader_ins
[i
].name
) {
731 if (((code
& D3DSI_OPCODE_MASK
) == shader_ins
[i
].opcode
) &&
732 (((hex_version
>= shader_ins
[i
].min_version
) && (hex_version
<= shader_ins
[i
].max_version
)) ||
733 ((shader_ins
[i
].min_version
== 0) && (shader_ins
[i
].max_version
== 0)))) {
734 return &shader_ins
[i
];
738 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code
, code
, code
& D3DSI_OPCODE_MASK
, version
);
742 inline static BOOL
pshader_is_version_token(DWORD token
) {
743 return 0xFFFF0000 == (token
& 0xFFFF0000);
746 inline static BOOL
pshader_is_comment_token(DWORD token
) {
747 return D3DSIO_COMMENT
== (token
& D3DSI_OPCODE_MASK
);
751 inline static void get_register_name(const DWORD param
, char* regstr
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
752 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
754 DWORD reg
= param
& D3DSP_REGNUM_MASK
;
755 DWORD regtype
= shader_get_regtype(param
);
759 sprintf(regstr
, "R%lu", reg
);
763 strcpy(regstr
, "fragment.color.primary");
765 strcpy(regstr
, "fragment.color.secondary");
770 sprintf(regstr
, "C%lu", reg
);
772 sprintf(regstr
, "program.env[%lu]", reg
);
774 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
775 sprintf(regstr
,"T%lu", reg
);
778 sprintf(regstr
, "%s", rastout_reg_names
[reg
]);
781 sprintf(regstr
, "oD[%lu]", reg
);
783 case D3DSPR_TEXCRDOUT
:
784 sprintf(regstr
, "oT[%lu]", reg
);
787 FIXME("Unhandled register name Type(%ld)\n", regtype
);
792 inline static void get_write_mask(const DWORD output_reg
, char *write_mask
) {
794 if ((output_reg
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
795 strcat(write_mask
, ".");
796 if (output_reg
& D3DSP_WRITEMASK_0
) strcat(write_mask
, "r");
797 if (output_reg
& D3DSP_WRITEMASK_1
) strcat(write_mask
, "g");
798 if (output_reg
& D3DSP_WRITEMASK_2
) strcat(write_mask
, "b");
799 if (output_reg
& D3DSP_WRITEMASK_3
) strcat(write_mask
, "a");
803 inline static void get_input_register_swizzle(const DWORD instr
, char *swzstring
) {
804 static const char swizzle_reg_chars
[] = "rgba";
805 DWORD swizzle
= (instr
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
806 DWORD swizzle_x
= swizzle
& 0x03;
807 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
808 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
809 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
811 * swizzle bits fields:
815 if ((D3DSP_NOSWIZZLE
>> D3DSP_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
816 if (swizzle_x
== swizzle_y
&&
817 swizzle_x
== swizzle_z
&&
818 swizzle_x
== swizzle_w
) {
819 sprintf(swzstring
, ".%c", swizzle_reg_chars
[swizzle_x
]);
821 sprintf(swzstring
, ".%c%c%c%c",
822 swizzle_reg_chars
[swizzle_x
],
823 swizzle_reg_chars
[swizzle_y
],
824 swizzle_reg_chars
[swizzle_z
],
825 swizzle_reg_chars
[swizzle_w
]);
830 inline static void addline(unsigned int *lineNum
, char *pgm
, unsigned int *pgmLength
, char *line
) {
831 int lineLen
= strlen(line
);
832 if(lineLen
+ *pgmLength
> PGMSIZE
- 1 /* - 1 to allow a NULL at the end */) {
833 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE
, lineLen
+ *pgmLength
);
836 memcpy(pgm
+ *pgmLength
, line
, lineLen
);
839 *pgmLength
+= lineLen
;
841 TRACE("GL HW (%u, %u) : %s", *lineNum
, *pgmLength
, line
);
844 static const char* shift_tab
[] = {
845 "dummy", /* 0 (none) */
846 "coefmul.x", /* 1 (x2) */
847 "coefmul.y", /* 2 (x4) */
848 "coefmul.z", /* 3 (x8) */
849 "coefmul.w", /* 4 (x16) */
850 "dummy", /* 5 (x32) */
851 "dummy", /* 6 (x64) */
852 "dummy", /* 7 (x128) */
853 "dummy", /* 8 (d256) */
854 "dummy", /* 9 (d128) */
855 "dummy", /* 10 (d64) */
856 "dummy", /* 11 (d32) */
857 "coefdiv.w", /* 12 (d16) */
858 "coefdiv.z", /* 13 (d8) */
859 "coefdiv.y", /* 14 (d4) */
860 "coefdiv.x" /* 15 (d2) */
863 inline static void gen_output_modifier_line(int saturate
, char *write_mask
, int shift
, char *regstr
, char* line
) {
864 /* Generate a line that does the output modifier computation */
865 sprintf(line
, "MUL%s %s%s, %s, %s;", saturate
? "_SAT" : "", regstr
, write_mask
, regstr
, shift_tab
[shift
]);
868 inline static int gen_input_modifier_line(const DWORD instr
, int tmpreg
, char *outregstr
, char *line
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
869 /* Generate a line that does the input modifier computation and return the input register to use */
870 static char regstr
[256];
871 static char tmpline
[256];
874 /* Assume a new line will be added */
877 /* Get register name */
878 get_register_name(instr
, regstr
, constants
);
880 TRACE(" Register name %s\n", regstr
);
881 switch (instr
& D3DSP_SRCMOD_MASK
) {
883 strcpy(outregstr
, regstr
);
887 sprintf(outregstr
, "-%s", regstr
);
891 sprintf(line
, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg
, regstr
);
893 case D3DSPSM_BIASNEG
:
894 sprintf(line
, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg
, regstr
);
897 sprintf(line
, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg
, regstr
);
899 case D3DSPSM_SIGNNEG
:
900 sprintf(line
, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg
, regstr
);
903 sprintf(line
, "SUB T%c, one.x, %s;", 'A' + tmpreg
, regstr
);
906 sprintf(line
, "ADD T%c, %s, %s;", 'A' + tmpreg
, regstr
, regstr
);
909 sprintf(line
, "ADD T%c, -%s, -%s;", 'A' + tmpreg
, regstr
, regstr
);
912 sprintf(line
, "RCP T%c, %s.z;", 'A' + tmpreg
, regstr
);
913 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
914 strcat(line
, "\n"); /* Hack */
915 strcat(line
, tmpline
);
918 sprintf(line
, "RCP T%c, %s.w;", 'A' + tmpreg
, regstr
);
919 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
920 strcat(line
, "\n"); /* Hack */
921 strcat(line
, tmpline
);
924 strcpy(outregstr
, regstr
);
929 /* Substitute the register name */
930 sprintf(outregstr
, "T%c", 'A' + tmpreg
);
936 inline static void pshader_program_get_registers_used(
937 IWineD3DPixelShaderImpl
*This
,
938 CONST DWORD
* pToken
, DWORD
* tempsUsed
, DWORD
* texUsed
) {
946 while (D3DVS_END() != *pToken
) {
947 CONST SHADER_OPCODE
* curOpcode
;
950 if (pshader_is_version_token(*pToken
)) {
955 } else if (pshader_is_comment_token(*pToken
)) {
956 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
958 pToken
+= comment_len
;
963 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
966 /* Skip declarations (for now) */
967 if (D3DSIO_DCL
== curOpcode
->opcode
) {
968 pToken
+= curOpcode
->num_params
;
971 /* Skip definitions (for now) */
972 } else if (D3DSIO_DEF
== curOpcode
->opcode
) {
973 pToken
+= curOpcode
->num_params
;
976 /* Set texture registers, and temporary registers */
980 for (i
= 0; i
< curOpcode
->num_params
; ++i
) {
981 DWORD regtype
= shader_get_regtype(*pToken
);
982 DWORD reg
= (*pToken
) & D3DSP_REGNUM_MASK
;
983 if (D3DSPR_TEXTURE
== regtype
)
984 *texUsed
|= (1 << reg
);
985 if (D3DSPR_TEMP
== regtype
)
986 *tempsUsed
|= (1 << reg
);
993 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
994 inline static VOID
IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
995 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
996 const DWORD
*pToken
= pFunction
;
997 const SHADER_OPCODE
*curOpcode
= NULL
;
1000 unsigned lineNum
= 0; /* The line number of the generated program (for loging)*/
1001 char *pgmStr
= NULL
; /* A pointer to the program data generated by this function */
1003 #if 0 /* TODO: loop register (just another address register ) */
1004 BOOL hasLoops
= FALSE
;
1007 BOOL saturate
; /* clamp to 0.0 -> 1.0*/
1008 int row
= 0; /* not sure, something to do with macros? */
1010 int version
= 0; /* The version of the shader */
1012 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
1013 unsigned int pgmLength
= 0;
1015 /* Keep bitmaps of used temporary and texture registers */
1016 DWORD tempsUsed
, texUsed
;
1018 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
1019 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
1020 if (This
->device
->fixupVertexBufferSize
< PGMSIZE
) {
1021 HeapFree(GetProcessHeap(), 0, This
->fixupVertexBuffer
);
1022 This
->fixupVertexBuffer
= HeapAlloc(GetProcessHeap() , 0, PGMSIZE
);
1023 This
->fixupVertexBufferSize
= PGMSIZE
;
1024 This
->fixupVertexBuffer
[0] = 0;
1026 pgmStr
= This
->device
->fixupVertexBuffer
;
1028 pgmStr
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, PGMSIZE
); /* 64kb should be enough */
1031 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1032 for(i
= 0; i
< WINED3D_PSHADER_MAX_CONSTANTS
; i
++)
1033 This
->constants
[i
] = 0;
1035 /* First pass: figure out which temporary and texture registers are used */
1036 pshader_program_get_registers_used(This
, pToken
, &tempsUsed
, &texUsed
);
1037 TRACE("Texture registers used: %#lx, Temp registers used %#lx\n", texUsed
, tempsUsed
);
1039 /* TODO: check register usage against GL/Directx limits, and fail if they're exceeded */
1041 /* Second pass, process opcodes */
1042 if (NULL
!= pToken
) {
1043 while (D3DPS_END() != *pToken
) {
1044 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1046 instructionSize
= pToken
& SIZEBITS
>> 27;
1049 if (pshader_is_version_token(*pToken
)) { /** version */
1054 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1055 version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
1057 TRACE("found version token ps.%lu.%lu;\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
1059 /* Each release of pixel shaders has had different numbers of temp registers */
1064 case 13:numTemps
=12;
1068 case 14: numTemps
=12;
1072 case 20: numTemps
=12;
1075 FIXME("No work done yet to support ps2.0 in hw\n");
1077 case 30: numTemps
=32;
1080 FIXME("No work done yet to support ps3.0 in hw\n");
1086 FIXME("Unrecognized pixel shader version!\n");
1089 /* FIXME: if jumps are used, use GLSL, else use ARB_fragment_program */
1090 strcpy(tmpLine
, "!!ARBfp1.0\n");
1091 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1093 for(i
= 0; i
< numTex
; i
++) {
1094 if (texUsed
& (1 << i
)) {
1095 sprintf(tmpLine
, "TEMP T%lu;\n", i
);
1096 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1100 for(i
= 0; i
< numTemps
; i
++) {
1101 if (tempsUsed
& (1 << i
)) {
1102 sprintf(tmpLine
, "TEMP R%lu;\n", i
);
1103 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1107 sprintf(tmpLine
, "TEMP TMP;\n");
1108 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1109 sprintf(tmpLine
, "TEMP TMP2;\n");
1110 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1111 sprintf(tmpLine
, "TEMP TA;\n");
1112 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1113 sprintf(tmpLine
, "TEMP TB;\n");
1114 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1115 sprintf(tmpLine
, "TEMP TC;\n");
1116 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1118 strcpy(tmpLine
, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1119 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1120 strcpy(tmpLine
, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1121 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1122 strcpy(tmpLine
, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1123 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1125 for(i
= 0; i
< numTex
; i
++) {
1126 if (texUsed
& (1 << i
)) {
1127 sprintf(tmpLine
, "MOV T%lu, fragment.texcoord[%lu];\n", i
, i
);
1128 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1136 if (pshader_is_comment_token(*pToken
)) { /** comment */
1137 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1139 TRACE("#%s\n", (char*)pToken
);
1140 pToken
+= comment_len
;
1144 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1148 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
1150 if (NULL
== curOpcode
) {
1151 /* unknown current opcode ... (shouldn't be any!) */
1152 while (*pToken
& 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1153 FIXME("unrecognized opcode: %08lx\n", *pToken
);
1156 } else if (GLNAME_REQUIRE_GLSL
== curOpcode
->glname
) {
1157 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1158 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode
->name
);
1159 pToken
+= curOpcode
->num_params
;
1161 } else if (D3DSIO_DEF
== curOpcode
->opcode
) {
1163 /* Handle definitions here, they don't fit well with the
1164 * other instructions below [for now ] */
1166 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1168 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1169 curOpcode
->name
, curOpcode
->glname
, curOpcode
->num_params
);
1171 sprintf(tmpLine
, "PARAM C%lu = { %f, %f, %f, %f };\n", reg
,
1172 *((const float *)(pToken
+ 1)),
1173 *((const float *)(pToken
+ 2)),
1174 *((const float *)(pToken
+ 3)),
1175 *((const float *)(pToken
+ 4)) );
1177 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1179 This
->constants
[reg
] = 1;
1185 /* Common processing: [inst] [dst]* [src]* */
1187 char output_rname
[256];
1188 char output_wmask
[20];
1190 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1191 curOpcode
->name
, curOpcode
->glname
, curOpcode
->num_params
);
1195 /* Build opcode for GL vertex_program */
1196 switch (curOpcode
->opcode
) {
1222 case D3DSIO_TEXKILL
:
1223 TRACE("Appending glname %s to tmpLine\n", curOpcode
->glname
);
1224 strcpy(tmpLine
, curOpcode
->glname
);
1230 char reg_coord_swz
[20] = "";
1231 DWORD reg_dest_code
;
1232 DWORD reg_sampler_code
;
1234 /* All versions have a destination register */
1235 reg_dest_code
= *pToken
& D3DSP_REGNUM_MASK
;
1236 get_register_name(*pToken
++, reg_dest
, This
->constants
);
1238 /* 1.0-1.3: Use destination register as coordinate source. No modifiers.
1239 1.4: Use provided coordinate source register. _dw, _dz, swizzle allowed.
1240 2.0+: Use provided coordinate source register. No modifiers.
1241 3.0+: Use provided coordinate source register. Swizzle allowed */
1243 strcpy(reg_coord
, reg_dest
);
1245 else if (version
== 14) {
1246 if (gen_input_modifier_line(*pToken
, 0, reg_coord
, tmpLine
, This
->constants
))
1247 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1248 get_input_register_swizzle(*pToken
, reg_coord_swz
);
1251 else if (version
> 14 && version
< 30) {
1252 get_register_name(*pToken
, reg_coord
, This
->constants
);
1255 else if (version
>= 30) {
1256 get_input_register_swizzle(*pToken
, reg_coord_swz
);
1257 get_register_name(*pToken
, reg_coord
, This
->constants
);
1261 /* 1.0-1.4: Use destination register number as texture code.
1262 2.0+: Use provided sampler number as texure code. */
1264 reg_sampler_code
= reg_dest_code
;
1267 reg_sampler_code
= *pToken
& D3DSP_REGNUM_MASK
;
1271 sprintf(tmpLine
, "TEX %s, %s%s, texture[%lu], 2D;\n",
1272 reg_dest
, reg_coord
, reg_coord_swz
, reg_sampler_code
);
1273 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1277 case D3DSIO_TEXCOORD
:
1280 get_write_mask(*pToken
, tmp
);
1281 if (version
!= 14) {
1282 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1283 sprintf(tmpLine
, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg
, tmp
, reg
);
1284 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1287 DWORD reg1
= *pToken
& D3DSP_REGNUM_MASK
;
1288 DWORD reg2
= *++pToken
& D3DSP_REGNUM_MASK
;
1289 sprintf(tmpLine
, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1
, tmp
, reg2
);
1290 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1296 case D3DSIO_TEXM3x2PAD
:
1298 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1300 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1301 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1303 sprintf(tmpLine
, "DP3 TMP.x, T%lu, %s;\n", reg
, buf
);
1304 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1309 case D3DSIO_TEXM3x2TEX
:
1311 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1313 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1314 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1316 sprintf(tmpLine
, "DP3 TMP.y, T%lu, %s;\n", reg
, buf
);
1317 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1318 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg
, reg
);
1319 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1324 case D3DSIO_TEXREG2AR
:
1326 DWORD reg1
= *pToken
& D3DSP_REGNUM_MASK
;
1327 DWORD reg2
= *++pToken
& D3DSP_REGNUM_MASK
;
1328 sprintf(tmpLine
, "MOV TMP.r, T%lu.a;\n", reg2
);
1329 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1330 sprintf(tmpLine
, "MOV TMP.g, T%lu.r;\n", reg2
);
1331 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1332 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1333 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1338 case D3DSIO_TEXREG2GB
:
1340 DWORD reg1
= *pToken
& D3DSP_REGNUM_MASK
;
1341 DWORD reg2
= *++pToken
& D3DSP_REGNUM_MASK
;
1342 sprintf(tmpLine
, "MOV TMP.r, T%lu.g;\n", reg2
);
1343 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1344 sprintf(tmpLine
, "MOV TMP.g, T%lu.b;\n", reg2
);
1345 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1346 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1347 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1354 DWORD reg1
= *pToken
& D3DSP_REGNUM_MASK
;
1355 DWORD reg2
= *++pToken
& D3DSP_REGNUM_MASK
;
1357 /* FIXME: Should apply the BUMPMAPENV matrix */
1358 sprintf(tmpLine
, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1
, reg2
);
1359 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1360 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1361 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1366 case D3DSIO_TEXM3x3PAD
:
1368 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1370 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1371 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1373 sprintf(tmpLine
, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row
, reg
, buf
);
1374 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1380 case D3DSIO_TEXM3x3TEX
:
1382 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1384 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1385 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1388 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1389 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1391 /* Cubemap textures will be more used than 3D ones. */
1392 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1393 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1398 case D3DSIO_TEXM3x3VSPEC
:
1400 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1402 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1403 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1405 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1406 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1408 /* Construct the eye-ray vector from w coordinates */
1409 sprintf(tmpLine
, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw
[0]);
1410 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1411 sprintf(tmpLine
, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw
[1]);
1412 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1413 sprintf(tmpLine
, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg
);
1414 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1416 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1417 sprintf(tmpLine
, "DP3 TMP.w, TMP, TMP2;\n");
1418 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1419 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;\n");
1420 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1421 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1422 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1424 /* Cubemap textures will be more used than 3D ones. */
1425 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1426 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1432 case D3DSIO_TEXM3x3SPEC
:
1434 DWORD reg
= *pToken
& D3DSP_REGNUM_MASK
;
1435 DWORD reg3
= *(pToken
+ 2) & D3DSP_REGNUM_MASK
;
1437 if (gen_input_modifier_line(*(pToken
+ 1), 0, buf
, tmpLine
, This
->constants
)) {
1438 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1440 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1441 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1443 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1444 sprintf(tmpLine
, "DP3 TMP.w, TMP, C[%lu];\n", reg3
);
1445 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1447 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;\n");
1448 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1449 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3
);
1450 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1452 /* Cubemap textures will be more used than 3D ones. */
1453 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1454 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1462 if (curOpcode
->glname
== GLNAME_REQUIRE_GLSL
) {
1463 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode
->name
);
1465 FIXME("Can't handle opcode %s in hwShader\n", curOpcode
->name
);
1467 pToken
+= curOpcode
->num_params
;
1471 /* Process modifiers */
1472 if (0 != (*pToken
& D3DSP_DSTMOD_MASK
)) {
1473 DWORD mask
= *pToken
& D3DSP_DSTMOD_MASK
;
1475 case D3DSPDM_SATURATE
: saturate
= TRUE
; break;
1476 #if 0 /* as yet unhandled modifiers */
1477 case D3DSPDM_CENTROID
: centroid
= TRUE
; break;
1478 case D3DSPDM_PP
: partialpresision
= TRUE
; break;
1481 TRACE("_unhandled_modifier(0x%08lx)\n", mask
);
1484 shift
= (*pToken
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
1486 /* Generate input and output registers */
1487 if (curOpcode
->num_params
> 0) {
1489 char operands
[4][100];
1493 /* Generate lines that handle input modifier computation */
1494 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1495 TRACE("(%p) : Param %ld token %lx\n", This
, i
, *(pToken
+ i
));
1496 if (gen_input_modifier_line(*(pToken
+ i
), i
- 1, regs
[i
- 1], tmpOp
, This
->constants
)) {
1497 addline(&lineNum
, pgmStr
, &pgmLength
, tmpOp
);
1501 /* Handle output register */
1502 get_register_name(*pToken
, output_rname
, This
->constants
);
1503 strcpy(operands
[0], output_rname
);
1504 get_write_mask(*pToken
, output_wmask
);
1505 strcat(operands
[0], output_wmask
);
1507 /* This function works because of side effects from gen_input_modifier_line */
1508 /* Handle input registers */
1509 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1510 TRACE("(%p) : Regs = %s\n", This
, regs
[i
- 1]);
1511 strcpy(operands
[i
], regs
[i
- 1]);
1512 get_input_register_swizzle(*(pToken
+ i
), swzstring
);
1513 strcat(operands
[i
], swzstring
);
1516 switch(curOpcode
->opcode
) {
1518 sprintf(tmpLine
, "CMP%s %s, %s, %s, %s;\n", (saturate
? "_SAT" : ""), operands
[0], operands
[1], operands
[3], operands
[2]);
1521 sprintf(tmpLine
, "ADD TMP, -%s, coefdiv.x;", operands
[1]);
1522 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1523 sprintf(tmpLine
, "CMP%s %s, TMP, %s, %s;\n", (saturate
? "_SAT" : ""), operands
[0], operands
[2], operands
[3]);
1526 if (saturate
&& (shift
== 0))
1527 strcat(tmpLine
, "_SAT");
1528 strcat(tmpLine
, " ");
1529 strcat(tmpLine
, operands
[0]);
1530 for (i
= 1; i
< curOpcode
->num_params
; i
++) {
1531 strcat(tmpLine
, ", ");
1532 strcat(tmpLine
, operands
[i
]);
1534 strcat(tmpLine
,";\n");
1536 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1538 /* A shift requires another line. */
1540 gen_output_modifier_line(saturate
, output_wmask
, shift
, output_rname
, tmpLine
);
1541 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1543 pToken
+= curOpcode
->num_params
;
1547 /* TODO: What about result.depth? */
1548 strcpy(tmpLine
, "MOV result.color, R0;\n");
1549 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1551 strcpy(tmpLine
, "END\n");
1552 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1555 /* finally null terminate the pgmStr*/
1556 pgmStr
[pgmLength
] = 0;
1557 if (GL_SUPPORT(ARB_VERTEX_PROGRAM
)) {
1558 /* Create the hw shader */
1560 /* pgmStr sometimes gets too long for a normal TRACE */
1561 TRACE("Generated program:\n");
1562 if (TRACE_ON(d3d_shader
)) {
1563 fprintf(stderr
, "%s\n", pgmStr
);
1566 /* TODO: change to resource.glObjectHandel or something like that */
1567 GL_EXTCALL(glGenProgramsARB(1, &This
->baseShader
.prgId
));
1569 TRACE("Creating a hw pixel shader, prg=%d\n", This
->baseShader
.prgId
);
1570 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB
, This
->baseShader
.prgId
));
1572 TRACE("Created hw pixel shader, prg=%d\n", This
->baseShader
.prgId
);
1573 /* Create the program and check for errors */
1574 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB
, GL_PROGRAM_FORMAT_ASCII_ARB
, strlen(pgmStr
), pgmStr
));
1575 if (glGetError() == GL_INVALID_OPERATION
) {
1577 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB
, &errPos
);
1578 FIXME("HW PixelShader Error at position %d: %s\n",
1579 errPos
, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB
)));
1580 This
->baseShader
.prgId
= -1;
1583 #if 1 /* if were using the data buffer of device then we don't need to free it */
1584 HeapFree(GetProcessHeap(), 0, pgmStr
);
1588 inline static void pshader_program_dump_ins_modifiers(const DWORD output
) {
1590 DWORD shift
= (output
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
1591 DWORD mmask
= output
& D3DSP_DSTMOD_MASK
;
1595 case 13: TRACE("_d8"); break;
1596 case 14: TRACE("_d4"); break;
1597 case 15: TRACE("_d2"); break;
1598 case 1: TRACE("_x2"); break;
1599 case 2: TRACE("_x4"); break;
1600 case 3: TRACE("_x8"); break;
1601 default: TRACE("_unhandled_shift(%ld)", shift
); break;
1605 case D3DSPDM_NONE
: break;
1606 case D3DSPDM_SATURATE
: TRACE("_sat"); break;
1607 case D3DSPDM_PARTIALPRECISION
: TRACE("_pp"); break;
1608 case D3DSPDM_MSAMPCENTROID
: TRACE("_centroid"); break;
1609 default: TRACE("_unhandled_modifier(%#lx)", mmask
); break;
1613 inline static void pshader_program_dump_ps_param(const DWORD param
, int input
) {
1614 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1615 static const char swizzle_reg_chars
[] = "rgba";
1617 DWORD reg
= param
& D3DSP_REGNUM_MASK
;
1618 DWORD regtype
= shader_get_regtype(param
);
1621 if ( ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
) ||
1622 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_BIASNEG
) ||
1623 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_SIGNNEG
) ||
1624 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_X2NEG
) )
1626 else if ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_COMP
)
1638 TRACE("c%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1641 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
1644 case D3DSPR_RASTOUT
:
1645 TRACE("%s", rastout_reg_names
[reg
]);
1647 case D3DSPR_ATTROUT
:
1648 TRACE("oD%lu", reg
);
1650 case D3DSPR_TEXCRDOUT
:
1651 TRACE("oT%lu", reg
);
1653 case D3DSPR_CONSTINT
:
1654 TRACE("i%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1656 case D3DSPR_CONSTBOOL
:
1657 TRACE("b%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1663 TRACE("aL%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1665 case D3DSPR_SAMPLER
:
1669 TRACE("unhandled_rtype(%lx)", regtype
);
1674 /* operand output (for modifiers and shift, see dump_ins_modifiers) */
1676 if ((param
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
1678 if (param
& D3DSP_WRITEMASK_0
) TRACE(".r");
1679 if (param
& D3DSP_WRITEMASK_1
) TRACE(".g");
1680 if (param
& D3DSP_WRITEMASK_2
) TRACE(".b");
1681 if (param
& D3DSP_WRITEMASK_3
) TRACE(".a");
1684 /** operand input */
1685 DWORD swizzle
= (param
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
1686 DWORD swizzle_r
= swizzle
& 0x03;
1687 DWORD swizzle_g
= (swizzle
>> 2) & 0x03;
1688 DWORD swizzle_b
= (swizzle
>> 4) & 0x03;
1689 DWORD swizzle_a
= (swizzle
>> 6) & 0x03;
1691 if (0 != (param
& D3DSP_SRCMOD_MASK
)) {
1692 DWORD mask
= param
& D3DSP_SRCMOD_MASK
;
1693 /*TRACE("_modifier(0x%08lx) ", mask);*/
1695 case D3DSPSM_NONE
: break;
1696 case D3DSPSM_NEG
: break;
1697 case D3DSPSM_BIAS
: TRACE("_bias"); break;
1698 case D3DSPSM_BIASNEG
: TRACE("_bias"); break;
1699 case D3DSPSM_SIGN
: TRACE("_bx2"); break;
1700 case D3DSPSM_SIGNNEG
: TRACE("_bx2"); break;
1701 case D3DSPSM_COMP
: break;
1702 case D3DSPSM_X2
: TRACE("_x2"); break;
1703 case D3DSPSM_X2NEG
: TRACE("_x2"); break;
1704 case D3DSPSM_DZ
: TRACE("_dz"); break;
1705 case D3DSPSM_DW
: TRACE("_dw"); break;
1707 TRACE("_unknown(0x%08lx)", mask
);
1712 * swizzle bits fields:
1715 if ((D3DVS_NOSWIZZLE
>> D3DVS_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1716 if (swizzle_r
== swizzle_g
&&
1717 swizzle_r
== swizzle_b
&&
1718 swizzle_r
== swizzle_a
) {
1719 TRACE(".%c", swizzle_reg_chars
[swizzle_r
]);
1722 swizzle_reg_chars
[swizzle_r
],
1723 swizzle_reg_chars
[swizzle_g
],
1724 swizzle_reg_chars
[swizzle_b
],
1725 swizzle_reg_chars
[swizzle_a
]);
1731 inline static void pshader_program_dump_decl_usage(
1732 IWineD3DPixelShaderImpl
*This
, DWORD decl
, DWORD param
) {
1734 DWORD regtype
= shader_get_regtype(param
);
1737 if (regtype
== D3DSPR_SAMPLER
) {
1738 DWORD ttype
= decl
& D3DSP_TEXTURETYPE_MASK
;
1741 case D3DSTT_2D
: TRACE("2d "); break;
1742 case D3DSTT_CUBE
: TRACE("cube "); break;
1743 case D3DSTT_VOLUME
: TRACE("volume "); break;
1744 default: TRACE("unknown_ttype(%08lx) ", ttype
);
1749 DWORD usage
= decl
& D3DSP_DCL_USAGE_MASK
;
1750 DWORD idx
= (decl
& D3DSP_DCL_USAGEINDEX_MASK
) >> D3DSP_DCL_USAGEINDEX_SHIFT
;
1753 case D3DDECLUSAGE_POSITION
:
1754 TRACE("%s%ld ", "position", idx
);
1756 case D3DDECLUSAGE_BLENDINDICES
:
1757 TRACE("%s ", "blend");
1759 case D3DDECLUSAGE_BLENDWEIGHT
:
1760 TRACE("%s ", "weight");
1762 case D3DDECLUSAGE_NORMAL
:
1763 TRACE("%s%ld ", "normal", idx
);
1765 case D3DDECLUSAGE_PSIZE
:
1766 TRACE("%s ", "psize");
1768 case D3DDECLUSAGE_COLOR
:
1770 TRACE("%s ", "color");
1772 TRACE("%s%ld ", "specular", (idx
- 1));
1775 case D3DDECLUSAGE_TEXCOORD
:
1776 TRACE("%s%ld ", "texture", idx
);
1778 case D3DDECLUSAGE_TANGENT
:
1779 TRACE("%s ", "tangent");
1781 case D3DDECLUSAGE_BINORMAL
:
1782 TRACE("%s ", "binormal");
1784 case D3DDECLUSAGE_TESSFACTOR
:
1785 TRACE("%s ", "tessfactor");
1787 case D3DDECLUSAGE_POSITIONT
:
1788 TRACE("%s%ld ", "positionT", idx
);
1790 case D3DDECLUSAGE_FOG
:
1791 TRACE("%s ", "fog");
1793 case D3DDECLUSAGE_DEPTH
:
1794 TRACE("%s ", "depth");
1796 case D3DDECLUSAGE_SAMPLE
:
1797 TRACE("%s ", "sample");
1800 FIXME("Unrecognised dcl %08lx", usage
);
1805 HRESULT WINAPI
IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
1806 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
1807 const DWORD
* pToken
= pFunction
;
1808 const SHADER_OPCODE
*curOpcode
= NULL
;
1811 TRACE("(%p) : Parsing programme\n", This
);
1813 if (NULL
!= pToken
) {
1814 while (D3DPS_END() != *pToken
) {
1815 if (pshader_is_version_token(*pToken
)) { /** version */
1816 This
->baseShader
.version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
1817 TRACE("ps_%lu_%lu\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
1822 if (pshader_is_comment_token(*pToken
)) { /** comment */
1823 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1825 TRACE("//%s\n", (char*)pToken
);
1826 pToken
+= comment_len
;
1827 len
+= comment_len
+ 1;
1830 if (!This
->baseShader
.version
) {
1831 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This
);
1833 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
1836 if (NULL
== curOpcode
) {
1838 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1839 while (*pToken
& 0x80000000) {
1841 /* unknown current opcode ... */
1842 TRACE("unrecognized opcode: %08lx", *pToken
);
1849 if (curOpcode
->opcode
== D3DSIO_DCL
) {
1850 pshader_program_dump_decl_usage(This
, *pToken
, *(pToken
+ 1));
1853 pshader_program_dump_ps_param(*pToken
, 0);
1857 if (curOpcode
->opcode
== D3DSIO_DEF
) {
1858 TRACE("def c%lu = ", *pToken
& 0xFF);
1861 TRACE("%f ,", *(float *)pToken
);
1864 TRACE("%f ,", *(float *)pToken
);
1867 TRACE("%f ,", *(float *)pToken
);
1870 TRACE("%f", *(float *)pToken
);
1874 TRACE("%s", curOpcode
->name
);
1875 if (curOpcode
->num_params
> 0) {
1876 pshader_program_dump_ins_modifiers(*pToken
);
1878 pshader_program_dump_ps_param(*pToken
, 0);
1881 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1883 pshader_program_dump_ps_param(*pToken
, 1);
1892 This
->baseShader
.functionLength
= (len
+ 1) * sizeof(DWORD
);
1894 This
->baseShader
.functionLength
= 1; /* no Function defined use fixed function vertex processing */
1897 /* Generate HW shader in needed */
1898 if (NULL
!= pFunction
&& wined3d_settings
.vs_mode
== VS_HW
) {
1899 TRACE("(%p) : Generating hardware program\n", This
);
1901 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface
, pFunction
);
1905 TRACE("(%p) : Copying the function\n", This
);
1906 /* copy the function ... because it will certainly be released by application */
1907 if (NULL
!= pFunction
) {
1908 This
->baseShader
.function
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, This
->baseShader
.functionLength
);
1909 memcpy((void *)This
->baseShader
.function
, pFunction
, This
->baseShader
.functionLength
);
1911 This
->baseShader
.function
= NULL
;
1914 /* TODO: Some proper return values for failures */
1915 TRACE("(%p) : Returning WINED3D_OK\n", This
);
1919 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl
=
1921 /*** IUnknown methods ***/
1922 IWineD3DPixelShaderImpl_QueryInterface
,
1923 IWineD3DPixelShaderImpl_AddRef
,
1924 IWineD3DPixelShaderImpl_Release
,
1925 /*** IWineD3DBase methods ***/
1926 IWineD3DPixelShaderImpl_GetParent
,
1927 /*** IWineD3DBaseShader methods ***/
1928 IWineD3DPixelShaderImpl_SetFunction
,
1929 /*** IWineD3DPixelShader methods ***/
1930 IWineD3DPixelShaderImpl_GetDevice
,
1931 IWineD3DPixelShaderImpl_GetFunction