2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 # define TRACE_VSVECTOR(name)
40 /* The maximum size of the program */
43 #define REGMASK 0x00001FFF
45 #define GLNAME_REQUIRE_GLSL ((const char *)1)
46 /* *******************************************
47 IWineD3DPixelShader IUnknown parts follow
48 ******************************************* */
49 HRESULT WINAPI
IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader
*iface
, REFIID riid
, LPVOID
*ppobj
)
51 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
52 TRACE("(%p)->(%s,%p)\n",This
,debugstr_guid(riid
),ppobj
);
53 if (IsEqualGUID(riid
, &IID_IUnknown
)
54 || IsEqualGUID(riid
, &IID_IWineD3DBase
)
55 || IsEqualGUID(riid
, &IID_IWineD3DPixelShader
)) {
56 IUnknown_AddRef(iface
);
63 ULONG WINAPI
IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader
*iface
) {
64 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
65 TRACE("(%p) : AddRef increasing from %ld\n", This
, This
->ref
);
66 return InterlockedIncrement(&This
->ref
);
69 ULONG WINAPI
IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader
*iface
) {
70 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
72 TRACE("(%p) : Releasing from %ld\n", This
, This
->ref
);
73 ref
= InterlockedDecrement(&This
->ref
);
75 HeapFree(GetProcessHeap(), 0, This
);
80 /* TODO: At the momeny the function parser is single pass, it achievs this
81 by passing constants to a couple of functions where they are then modified.
82 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
83 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
86 /* *******************************************
87 IWineD3DPixelShader IWineD3DPixelShader parts follow
88 ******************************************* */
90 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader
*iface
, IUnknown
** parent
){
91 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
93 *parent
= This
->parent
;
94 IUnknown_AddRef(*parent
);
95 TRACE("(%p) : returning %p\n", This
, *parent
);
99 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader
* iface
, IWineD3DDevice
**pDevice
){
100 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
101 IWineD3DDevice_AddRef((IWineD3DDevice
*)This
->wineD3DDevice
);
102 *pDevice
= (IWineD3DDevice
*)This
->wineD3DDevice
;
103 TRACE("(%p) returning %p\n", This
, *pDevice
);
108 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader
* impl
, VOID
* pData
, UINT
* pSizeOfData
) {
109 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)impl
;
110 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This
, pData
, pSizeOfData
);
113 *pSizeOfData
= This
->functionLength
;
116 if (*pSizeOfData
< This
->functionLength
) {
117 *pSizeOfData
= This
->functionLength
;
118 return D3DERR_MOREDATA
;
120 if (NULL
== This
->function
) { /* no function defined */
121 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This
, pData
);
122 (*(DWORD
**) pData
) = NULL
;
124 if (This
->functionLength
== 0) {
127 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
128 memcpy(pData
, This
->function
, This
->functionLength
);
133 /*******************************
134 * pshader functions software VM
137 void pshader_add(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
138 d
->x
= s0
->x
+ s1
->x
;
139 d
->y
= s0
->y
+ s1
->y
;
140 d
->z
= s0
->z
+ s1
->z
;
141 d
->w
= s0
->w
+ s1
->w
;
142 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
143 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
146 void pshader_dp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
147 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
;
148 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
149 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
152 void pshader_dp4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
153 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
+ s0
->w
* s1
->w
;
154 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
155 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
158 void pshader_dst(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
160 d
->y
= s0
->y
* s1
->y
;
163 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
164 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
167 void pshader_expp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
173 tmp
.f
= floorf(s0
->w
);
174 d
->x
= powf(2.0f
, tmp
.f
);
175 d
->y
= s0
->w
- tmp
.f
;
176 tmp
.f
= powf(2.0f
, s0
->w
);
177 tmp
.d
&= 0xFFFFFF00U
;
180 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
181 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
184 void pshader_lit(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
186 d
->y
= (0.0f
< s0
->x
) ? s0
->x
: 0.0f
;
187 d
->z
= (0.0f
< s0
->x
&& 0.0f
< s0
->y
) ? powf(s0
->y
, s0
->w
) : 0.0f
;
189 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
190 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
193 void pshader_logp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
194 float tmp_f
= fabsf(s0
->w
);
195 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
196 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
197 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
200 void pshader_mad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
201 d
->x
= s0
->x
* s1
->x
+ s2
->x
;
202 d
->y
= s0
->y
* s1
->y
+ s2
->y
;
203 d
->z
= s0
->z
* s1
->z
+ s2
->z
;
204 d
->w
= s0
->w
* s1
->w
+ s2
->w
;
205 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
206 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, s2
->x
, s2
->y
, s2
->z
, s2
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
209 void pshader_max(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
210 d
->x
= (s0
->x
>= s1
->x
) ? s0
->x
: s1
->x
;
211 d
->y
= (s0
->y
>= s1
->y
) ? s0
->y
: s1
->y
;
212 d
->z
= (s0
->z
>= s1
->z
) ? s0
->z
: s1
->z
;
213 d
->w
= (s0
->w
>= s1
->w
) ? s0
->w
: s1
->w
;
214 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
215 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
218 void pshader_min(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
219 d
->x
= (s0
->x
< s1
->x
) ? s0
->x
: s1
->x
;
220 d
->y
= (s0
->y
< s1
->y
) ? s0
->y
: s1
->y
;
221 d
->z
= (s0
->z
< s1
->z
) ? s0
->z
: s1
->z
;
222 d
->w
= (s0
->w
< s1
->w
) ? s0
->w
: s1
->w
;
223 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
224 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
227 void pshader_mov(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
232 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
233 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
236 void pshader_mul(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
237 d
->x
= s0
->x
* s1
->x
;
238 d
->y
= s0
->y
* s1
->y
;
239 d
->z
= s0
->z
* s1
->z
;
240 d
->w
= s0
->w
* s1
->w
;
241 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
242 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
245 void pshader_nop(void) {
246 /* NOPPPP ahhh too easy ;) */
247 PSTRACE(("executing nop\n"));
250 void pshader_rcp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
251 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== s0
->w
) ? HUGE_VAL
: 1.0f
/ s0
->w
;
252 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
253 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
256 void pshader_rsq(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
257 float tmp_f
= fabsf(s0
->w
);
258 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== tmp_f
) ? HUGE_VAL
: ((1.0f
!= tmp_f
) ? 1.0f
/ sqrtf(tmp_f
) : 1.0f
);
259 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
260 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
263 void pshader_sge(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
264 d
->x
= (s0
->x
>= s1
->x
) ? 1.0f
: 0.0f
;
265 d
->y
= (s0
->y
>= s1
->y
) ? 1.0f
: 0.0f
;
266 d
->z
= (s0
->z
>= s1
->z
) ? 1.0f
: 0.0f
;
267 d
->w
= (s0
->w
>= s1
->w
) ? 1.0f
: 0.0f
;
268 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
269 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
272 void pshader_slt(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
273 d
->x
= (s0
->x
< s1
->x
) ? 1.0f
: 0.0f
;
274 d
->y
= (s0
->y
< s1
->y
) ? 1.0f
: 0.0f
;
275 d
->z
= (s0
->z
< s1
->z
) ? 1.0f
: 0.0f
;
276 d
->w
= (s0
->w
< s1
->w
) ? 1.0f
: 0.0f
;
277 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
278 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
281 void pshader_sub(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
282 d
->x
= s0
->x
- s1
->x
;
283 d
->y
= s0
->y
- s1
->y
;
284 d
->z
= s0
->z
- s1
->z
;
285 d
->w
= s0
->w
- s1
->w
;
286 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
287 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
291 * Version 1.1 specific
294 void pshader_exp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
295 d
->x
= d
->y
= d
->z
= d
->w
= powf(2.0f
, s0
->w
);
296 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
297 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
300 void pshader_log(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
301 float tmp_f
= fabsf(s0
->w
);
302 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
303 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
304 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
307 void pshader_frc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
308 d
->x
= s0
->x
- floorf(s0
->x
);
309 d
->y
= s0
->y
- floorf(s0
->y
);
312 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
313 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
316 typedef FLOAT D3DMATRIX44
[4][4];
317 typedef FLOAT D3DMATRIX43
[4][3];
318 typedef FLOAT D3DMATRIX34
[3][4];
319 typedef FLOAT D3DMATRIX33
[3][3];
320 typedef FLOAT D3DMATRIX23
[2][3];
322 void pshader_m4x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat
) {
324 * Buggy CODE: here only if cast not work for copy/paste
325 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
326 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
327 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
328 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
329 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
330 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
331 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
333 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
334 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
335 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
336 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
+ mat
[3][3] * s0
->w
;
337 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
338 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
339 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
340 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], mat
[3][3], s0
->w
, d
->w
));
343 void pshader_m4x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX34 mat
) {
344 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
345 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
346 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
348 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
349 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
350 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
351 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0
->w
, d
->w
));
354 void pshader_m3x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX43 mat
) {
355 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
356 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
357 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
358 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
;
359 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
360 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
361 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
362 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], s0
->w
, d
->w
));
365 void pshader_m3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX33 mat
) {
366 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
367 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
368 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
370 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
371 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
372 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
373 PSTRACE(("executing m3x3(4): (%f) \n", d
->w
));
376 void pshader_m3x2(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX23 mat
) {
378 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
379 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
385 * Version 2.0 specific
387 void pshader_lrp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
388 d
->x
= s0
->x
* (s1
->x
- s2
->x
) + s2
->x
;
389 d
->y
= s0
->y
* (s1
->y
- s2
->y
) + s2
->y
;
390 d
->z
= s0
->z
* (s1
->z
- s2
->z
) + s2
->z
;
391 d
->w
= s0
->w
* (s1
->w
- s2
->w
) + s2
->w
;
394 void pshader_crs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
395 d
->x
= s0
->y
* s1
->z
- s0
->z
* s1
->y
;
396 d
->y
= s0
->z
* s1
->x
- s0
->x
* s1
->z
;
397 d
->z
= s0
->x
* s1
->y
- s0
->y
* s1
->x
;
398 d
->w
= 0.9f
; /* w is undefined, so set it to something safeish */
400 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
401 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
404 void pshader_abs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
409 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
410 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
414 void pshader_texcoord(WINED3DSHADERVECTOR
* d
) {
418 void pshader_texkill(WINED3DSHADERVECTOR
* d
) {
422 void pshader_tex(WINED3DSHADERVECTOR
* d
) {
425 void pshader_texld(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
429 void pshader_texbem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
433 void pshader_texbeml(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
437 void pshader_texreg2ar(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
441 void pshader_texreg2gb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
445 void pshader_texm3x2pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
449 void pshader_texm3x2tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
453 void pshader_texm3x3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
457 void pshader_texm3x3pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
461 void pshader_texm3x3diff(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
465 void pshader_texm3x3spec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
469 void pshader_texm3x3vspec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
473 void pshader_cnd(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
477 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
478 void pshader_def(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
, WINED3DSHADERVECTOR
* s3
) {
482 void pshader_texreg2rgb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
486 void pshader_texdp3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
490 void pshader_texm3x2depth(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
494 void pshader_texdp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
498 void pshader_texm3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
502 void pshader_texdepth(WINED3DSHADERVECTOR
* d
) {
506 void pshader_cmp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
510 void pshader_bem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
514 void pshader_call(WINED3DSHADERVECTOR
* d
) {
518 void pshader_callnz(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
522 void pshader_loop(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
526 void pshader_ret(WINED3DSHADERVECTOR
* d
) {
530 void pshader_endloop(WINED3DSHADERVECTOR
* d
) {
534 void pshader_dcl(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
538 void pshader_pow(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
542 void pshader_sng(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
546 void pshader_nrm(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
550 void pshader_sincos(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
554 void pshader_rep(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
558 void pshader_endrep(void) {
562 void pshader_if(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
566 void pshader_ifc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
570 void pshader_else(WINED3DSHADERVECTOR
* d
) {
574 void pshader_label(WINED3DSHADERVECTOR
* d
) {
578 void pshader_endif(WINED3DSHADERVECTOR
* d
) {
582 void pshader_break(WINED3DSHADERVECTOR
* d
) {
586 void pshader_breakc(WINED3DSHADERVECTOR
* d
) {
590 void pshader_mova(WINED3DSHADERVECTOR
* d
) {
594 void pshader_defb(WINED3DSHADERVECTOR
* d
) {
598 void pshader_defi(WINED3DSHADERVECTOR
* d
) {
602 void pshader_dp2add(WINED3DSHADERVECTOR
* d
) {
606 void pshader_dsx(WINED3DSHADERVECTOR
* d
) {
610 void pshader_dsy(WINED3DSHADERVECTOR
* d
) {
614 void pshader_texldd(WINED3DSHADERVECTOR
* d
) {
618 void pshader_setp(WINED3DSHADERVECTOR
* d
) {
622 void pshader_texldl(WINED3DSHADERVECTOR
* d
) {
626 void pshader_breakp(WINED3DSHADERVECTOR
* d
) {
630 * log, exp, frc, m*x* seems to be macros ins ... to see
632 static CONST SHADER_OPCODE pshader_ins
[] = {
633 {D3DSIO_NOP
, "nop", "NOP", 0, pshader_nop
, 0, 0},
634 {D3DSIO_MOV
, "mov", "MOV", 2, pshader_mov
, 0, 0},
635 {D3DSIO_ADD
, "add", "ADD", 3, pshader_add
, 0, 0},
636 {D3DSIO_SUB
, "sub", "SUB", 3, pshader_sub
, 0, 0},
637 {D3DSIO_MAD
, "mad", "MAD", 4, pshader_mad
, 0, 0},
638 {D3DSIO_MUL
, "mul", "MUL", 3, pshader_mul
, 0, 0},
639 {D3DSIO_RCP
, "rcp", "RCP", 2, pshader_rcp
, 0, 0},
640 {D3DSIO_RSQ
, "rsq", "RSQ", 2, pshader_rsq
, 0, 0},
641 {D3DSIO_DP3
, "dp3", "DP3", 3, pshader_dp3
, 0, 0},
642 {D3DSIO_DP4
, "dp4", "DP4", 3, pshader_dp4
, 0, 0},
643 {D3DSIO_MIN
, "min", "MIN", 3, pshader_min
, 0, 0},
644 {D3DSIO_MAX
, "max", "MAX", 3, pshader_max
, 0, 0},
645 {D3DSIO_SLT
, "slt", "SLT", 3, pshader_slt
, 0, 0},
646 {D3DSIO_SGE
, "sge", "SGE", 3, pshader_sge
, 0, 0},
647 {D3DSIO_ABS
, "abs", "ABS", 2, pshader_abs
, 0, 0},
648 {D3DSIO_EXP
, "exp", "EX2", 2, pshader_exp
, 0, 0},
649 {D3DSIO_LOG
, "log", "LG2", 2, pshader_log
, 0, 0},
650 {D3DSIO_LIT
, "lit", "LIT", 2, pshader_lit
, 0, 0},
651 {D3DSIO_DST
, "dst", "DST", 3, pshader_dst
, 0, 0},
652 {D3DSIO_LRP
, "lrp", "LRP", 4, pshader_lrp
, 0, 0},
653 {D3DSIO_FRC
, "frc", "FRC", 2, pshader_frc
, 0, 0},
654 {D3DSIO_M4x4
, "m4x4", "undefined", 3, pshader_m4x4
, 0, 0},
655 {D3DSIO_M4x3
, "m4x3", "undefined", 3, pshader_m4x3
, 0, 0},
656 {D3DSIO_M3x4
, "m3x4", "undefined", 3, pshader_m3x4
, 0, 0},
657 {D3DSIO_M3x3
, "m3x3", "undefined", 3, pshader_m3x3
, 0, 0},
658 {D3DSIO_M3x2
, "m3x2", "undefined", 3, pshader_m3x2
, 0, 0},
661 /** FIXME: use direct access so add the others opcodes as stubs */
662 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
663 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
664 {D3DSIO_CALL
, "call", GLNAME_REQUIRE_GLSL
, 1, pshader_call
, 0, 0},
665 {D3DSIO_CALLNZ
, "callnz", GLNAME_REQUIRE_GLSL
, 2, pshader_callnz
, 0, 0},
666 {D3DSIO_LOOP
, "loop", GLNAME_REQUIRE_GLSL
, 2, pshader_loop
, 0, 0},
667 {D3DSIO_RET
, "ret", GLNAME_REQUIRE_GLSL
, 0, pshader_ret
, 0, 0},
668 {D3DSIO_ENDLOOP
, "endloop", GLNAME_REQUIRE_GLSL
, 0, pshader_endloop
, 0, 0},
669 {D3DSIO_LABEL
, "label", GLNAME_REQUIRE_GLSL
, 1, pshader_label
, 0, 0},
670 /* DCL is a specil operation */
671 {D3DSIO_DCL
, "dcl", NULL
, 1, pshader_dcl
, 0, 0},
672 {D3DSIO_POW
, "pow", "POW", 3, pshader_pow
, 0, 0},
673 {D3DSIO_CRS
, "crs", "XPS", 3, pshader_crs
, 0, 0},
674 /* TODO: sng can possibly be performed as
677 {D3DSIO_SGN
, "sng", NULL
, 2, pshader_sng
, 0, 0},
678 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
681 MUL vec.xyz, vec, tmp;
682 but I think this is better because it accounts for w properly.
688 {D3DSIO_NRM
, "nrm", NULL
, 2, pshader_nrm
, 0, 0},
689 {D3DSIO_SINCOS
, "sincos", NULL
, 2, pshader_sincos
, 0, 0},
690 {D3DSIO_REP
, "rep", GLNAME_REQUIRE_GLSL
, 2, pshader_rep
, 0, 0},
691 {D3DSIO_ENDREP
, "endrep", GLNAME_REQUIRE_GLSL
, 0, pshader_endrep
, 0, 0},
692 {D3DSIO_IF
, "if", GLNAME_REQUIRE_GLSL
, 2, pshader_if
, 0, 0},
693 {D3DSIO_IFC
, "ifc", GLNAME_REQUIRE_GLSL
, 2, pshader_ifc
, 0, 0},
694 {D3DSIO_ELSE
, "else", GLNAME_REQUIRE_GLSL
, 2, pshader_else
, 0, 0},
695 {D3DSIO_ENDIF
, "endif", GLNAME_REQUIRE_GLSL
, 2, pshader_endif
, 0, 0},
696 {D3DSIO_BREAK
, "break", GLNAME_REQUIRE_GLSL
, 2, pshader_break
, 0, 0},
697 {D3DSIO_BREAKC
, "breakc", GLNAME_REQUIRE_GLSL
, 2, pshader_breakc
, 0, 0},
698 {D3DSIO_MOVA
, "mova", GLNAME_REQUIRE_GLSL
, 2, pshader_mova
, 0, 0},
699 {D3DSIO_DEFB
, "defb", GLNAME_REQUIRE_GLSL
, 2, pshader_defb
, 0, 0},
700 {D3DSIO_DEFI
, "defi", GLNAME_REQUIRE_GLSL
, 2, pshader_defi
, 0, 0},
702 {D3DSIO_TEXCOORD
, "texcoord", "undefined", 1, pshader_texcoord
, 0, D3DPS_VERSION(1,3)},
703 {D3DSIO_TEXCOORD
, "texcrd", "undefined", 2, pshader_texcoord
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
704 {D3DSIO_TEXKILL
, "texkill", "KIL", 1, pshader_texkill
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
705 {D3DSIO_TEX
, "tex", "undefined", 1, pshader_tex
, 0, D3DPS_VERSION(1,3)},
706 {D3DSIO_TEX
, "texld", GLNAME_REQUIRE_GLSL
, 2, pshader_texld
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
707 {D3DSIO_TEX
, "texld", GLNAME_REQUIRE_GLSL
, 3, pshader_texld
, D3DPS_VERSION(2,0), -1},
708 {D3DSIO_TEXBEM
, "texbem", "undefined", 2, pshader_texbem
, 0, D3DPS_VERSION(1,3)},
709 {D3DSIO_TEXBEML
, "texbeml", GLNAME_REQUIRE_GLSL
, 2, pshader_texbeml
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
710 {D3DSIO_TEXREG2AR
,"texreg2ar","undefined", 2, pshader_texreg2ar
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
711 {D3DSIO_TEXREG2GB
,"texreg2gb","undefined", 2, pshader_texreg2gb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
712 {D3DSIO_TEXM3x2PAD
, "texm3x2pad", "undefined", 2, pshader_texm3x2pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXM3x2TEX
, "texm3x2tex", "undefined", 2, pshader_texm3x2tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXM3x3PAD
, "texm3x3pad", "undefined", 2, pshader_texm3x3pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
715 {D3DSIO_TEXM3x3DIFF
, "texm3x3diff", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3diff
, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
716 {D3DSIO_TEXM3x3SPEC
, "texm3x3spec", "undefined", 3, pshader_texm3x3spec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
717 {D3DSIO_TEXM3x3VSPEC
, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
718 {D3DSIO_TEXM3x3TEX
, "texm3x3tex", "undefined", 2, pshader_texm3x3tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
719 {D3DSIO_EXPP
, "expp", "EXP", 2, pshader_expp
, 0, 0},
720 {D3DSIO_LOGP
, "logp", "LOG", 2, pshader_logp
, 0, 0},
721 {D3DSIO_CND
, "cnd", GLNAME_REQUIRE_GLSL
, 4, pshader_cnd
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
722 /* def is a special operation */
723 {D3DSIO_DEF
, "def", "undefined", 5, pshader_def
, 0, 0},
724 {D3DSIO_TEXREG2RGB
, "texreg2rgb", GLNAME_REQUIRE_GLSL
, 2, pshader_texreg2rgb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
725 {D3DSIO_TEXDP3TEX
, "texdp3tex", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3tex
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
726 {D3DSIO_TEXM3x2DEPTH
, "texm3x2depth", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x2depth
,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXDP3
, "texdp3", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXM3x3
, "texm3x3", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
729 {D3DSIO_TEXDEPTH
, "texdepth", GLNAME_REQUIRE_GLSL
,1, pshader_texdepth
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
730 {D3DSIO_CMP
, "cmp", GLNAME_REQUIRE_GLSL
, 4, pshader_cmp
, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
731 {D3DSIO_BEM
, "bem", GLNAME_REQUIRE_GLSL
, 3, pshader_bem
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
732 /* TODO: dp2add can be made out of multiple instuctions */
733 {D3DSIO_DP2ADD
, "dp2add", GLNAME_REQUIRE_GLSL
, 2, pshader_dp2add
, 0, 0},
734 {D3DSIO_DSX
, "dsx", GLNAME_REQUIRE_GLSL
, 2, pshader_dsx
, 0, 0},
735 {D3DSIO_DSY
, "dsy", GLNAME_REQUIRE_GLSL
, 2, pshader_dsy
, 0, 0},
736 {D3DSIO_TEXLDD
, "texldd", GLNAME_REQUIRE_GLSL
, 2, pshader_texldd
, 0, 0},
737 {D3DSIO_SETP
, "setp", GLNAME_REQUIRE_GLSL
, 2, pshader_setp
, 0, 0},
738 {D3DSIO_TEXLDL
, "texdl", GLNAME_REQUIRE_GLSL
, 2, pshader_texldl
, 0, 0},
739 {D3DSIO_BREAKP
, "breakp", GLNAME_REQUIRE_GLSL
, 2, pshader_breakp
, 0, 0},
740 {D3DSIO_PHASE
, "phase", GLNAME_REQUIRE_GLSL
, 0, pshader_nop
, 0, 0},
741 {0, NULL
, NULL
, 0, NULL
, 0, 0}
745 inline static const SHADER_OPCODE
* pshader_program_get_opcode(IWineD3DPixelShaderImpl
*This
, const DWORD code
) {
747 DWORD version
= This
->version
;
748 DWORD hex_version
= D3DPS_VERSION(version
/10, version
%10);
749 /** TODO: use dichotomic search */
750 while (NULL
!= pshader_ins
[i
].name
) {
751 if (((code
& D3DSI_OPCODE_MASK
) == pshader_ins
[i
].opcode
) &&
752 (((hex_version
>= pshader_ins
[i
].min_version
) && (hex_version
<= pshader_ins
[i
].max_version
)) ||
753 ((pshader_ins
[i
].min_version
== 0) && (pshader_ins
[i
].max_version
== 0)))) {
754 return &pshader_ins
[i
];
758 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code
, code
, code
& D3DSI_OPCODE_MASK
, version
);
762 inline static BOOL
pshader_is_version_token(DWORD token
) {
763 return 0xFFFF0000 == (token
& 0xFFFF0000);
766 inline static BOOL
pshader_is_comment_token(DWORD token
) {
767 return D3DSIO_COMMENT
== (token
& D3DSI_OPCODE_MASK
);
771 inline static void get_register_name(const DWORD param
, char* regstr
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
772 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
774 DWORD reg
= param
& REGMASK
;
775 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
);
779 sprintf(regstr
, "R%lu", reg
);
783 strcpy(regstr
, "fragment.color.primary");
785 strcpy(regstr
, "fragment.color.secondary");
790 sprintf(regstr
, "C%lu", reg
);
792 sprintf(regstr
, "program.env[%lu]", reg
);
794 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
795 sprintf(regstr
,"T%lu", reg
);
798 sprintf(regstr
, "%s", rastout_reg_names
[reg
]);
801 sprintf(regstr
, "oD[%lu]", reg
);
803 case D3DSPR_TEXCRDOUT
:
804 sprintf(regstr
, "oT[%lu]", reg
);
807 FIXME("Unhandled register name Type(%ld)\n", regtype
);
812 inline static void get_write_mask(const DWORD output_reg
, char *write_mask
) {
814 if ((output_reg
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
815 strcat(write_mask
, ".");
816 if (output_reg
& D3DSP_WRITEMASK_0
) strcat(write_mask
, "r");
817 if (output_reg
& D3DSP_WRITEMASK_1
) strcat(write_mask
, "g");
818 if (output_reg
& D3DSP_WRITEMASK_2
) strcat(write_mask
, "b");
819 if (output_reg
& D3DSP_WRITEMASK_3
) strcat(write_mask
, "a");
823 inline static void get_input_register_swizzle(const DWORD instr
, char *swzstring
) {
824 static const char swizzle_reg_chars
[] = "rgba";
825 DWORD swizzle
= (instr
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
826 DWORD swizzle_x
= swizzle
& 0x03;
827 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
828 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
829 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
831 * swizzle bits fields:
835 if ((D3DSP_NOSWIZZLE
>> D3DSP_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
836 if (swizzle_x
== swizzle_y
&&
837 swizzle_x
== swizzle_z
&&
838 swizzle_x
== swizzle_w
) {
839 sprintf(swzstring
, ".%c", swizzle_reg_chars
[swizzle_x
]);
841 sprintf(swzstring
, ".%c%c%c%c",
842 swizzle_reg_chars
[swizzle_x
],
843 swizzle_reg_chars
[swizzle_y
],
844 swizzle_reg_chars
[swizzle_z
],
845 swizzle_reg_chars
[swizzle_w
]);
850 inline static void addline(unsigned int *lineNum
, char *pgm
, unsigned int *pgmLength
, char *line
) {
851 int lineLen
= strlen(line
);
852 if(lineLen
+ *pgmLength
> PGMSIZE
- 1 /* - 1 to allow a NULL at the end */) {
853 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE
, lineLen
+ *pgmLength
);
856 memcpy(pgm
+ *pgmLength
, line
, lineLen
);
859 *pgmLength
+= lineLen
;
861 TRACE("GL HW (%u, %u) : %s", *lineNum
, *pgmLength
, line
);
864 static const char* shift_tab
[] = {
865 "dummy", /* 0 (none) */
866 "coefmul.x", /* 1 (x2) */
867 "coefmul.y", /* 2 (x4) */
868 "coefmul.z", /* 3 (x8) */
869 "coefmul.w", /* 4 (x16) */
870 "dummy", /* 5 (x32) */
871 "dummy", /* 6 (x64) */
872 "dummy", /* 7 (x128) */
873 "dummy", /* 8 (d256) */
874 "dummy", /* 9 (d128) */
875 "dummy", /* 10 (d64) */
876 "dummy", /* 11 (d32) */
877 "coefdiv.w", /* 12 (d16) */
878 "coefdiv.z", /* 13 (d8) */
879 "coefdiv.y", /* 14 (d4) */
880 "coefdiv.x" /* 15 (d2) */
883 inline static void gen_output_modifier_line(int saturate
, char *write_mask
, int shift
, char *regstr
, char* line
) {
884 /* Generate a line that does the output modifier computation */
885 sprintf(line
, "MUL%s %s%s, %s, %s;", saturate
? "_SAT" : "", regstr
, write_mask
, regstr
, shift_tab
[shift
]);
888 inline static int gen_input_modifier_line(const DWORD instr
, int tmpreg
, char *outregstr
, char *line
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
889 /* Generate a line that does the input modifier computation and return the input register to use */
890 static char regstr
[256];
891 static char tmpline
[256];
894 /* Assume a new line will be added */
897 /* Get register name */
898 get_register_name(instr
, regstr
, constants
);
900 TRACE(" Register name %s\n", regstr
);
901 switch (instr
& D3DSP_SRCMOD_MASK
) {
903 strcpy(outregstr
, regstr
);
907 sprintf(outregstr
, "-%s", regstr
);
911 sprintf(line
, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg
, regstr
);
913 case D3DSPSM_BIASNEG
:
914 sprintf(line
, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg
, regstr
);
917 sprintf(line
, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg
, regstr
);
919 case D3DSPSM_SIGNNEG
:
920 sprintf(line
, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg
, regstr
);
923 sprintf(line
, "SUB T%c, one.x, %s;", 'A' + tmpreg
, regstr
);
926 sprintf(line
, "ADD T%c, %s, %s;", 'A' + tmpreg
, regstr
, regstr
);
929 sprintf(line
, "ADD T%c, -%s, -%s;", 'A' + tmpreg
, regstr
, regstr
);
932 sprintf(line
, "RCP T%c, %s.z;", 'A' + tmpreg
, regstr
);
933 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
934 strcat(line
, "\n"); /* Hack */
935 strcat(line
, tmpline
);
938 sprintf(line
, "RCP T%c, %s;", 'A' + tmpreg
, regstr
);
939 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
940 strcat(line
, "\n"); /* Hack */
941 strcat(line
, tmpline
);
944 strcpy(outregstr
, regstr
);
949 /* Substitute the register name */
950 sprintf(outregstr
, "T%c", 'A' + tmpreg
);
955 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
956 inline static VOID
IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
957 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
958 const DWORD
*pToken
= pFunction
;
959 const SHADER_OPCODE
*curOpcode
= NULL
;
962 unsigned lineNum
= 0; /* The line number of the generated program (for loging)*/
963 char *pgmStr
= NULL
; /* A pointer to the program data generated by this function */
965 DWORD nUseAddressRegister
= 0;
966 #if 0 /* TODO: loop register (just another address register ) */
967 BOOL hasLoops
= FALSE
;
970 BOOL saturate
; /* clamp to 0.0 -> 1.0*/
971 int row
= 0; /* not sure, something to do with macros? */
973 int version
= 0; /* The version of the shader */
975 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
976 unsigned int pgmLength
= 0;
978 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
979 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
980 if (This
->device
->fixupVertexBufferSize
< PGMSIZE
) {
981 HeapFree(GetProcessHeap(), 0, This
->fixupVertexBuffer
);
982 This
->fixupVertexBuffer
= HeapAlloc(GetProcessHeap() , 0, PGMSIZE
);
983 This
->fixupVertexBufferSize
= PGMSIZE
;
984 This
->fixupVertexBuffer
[0] = 0;
986 pgmStr
= This
->device
->fixupVertexBuffer
;
988 pgmStr
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, PGMSIZE
); /* 64kb should be enough */
992 /* TODO: Think about using a first pass to work out what's required for the second pass. */
993 for(i
= 0; i
< WINED3D_PSHADER_MAX_CONSTANTS
; i
++)
994 This
->constants
[i
] = 0;
996 if (NULL
!= pToken
) {
997 while (D3DPS_END() != *pToken
) {
998 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1000 instructionSize
= pToken
& SIZEBITS
>> 27;
1003 if (pshader_is_version_token(*pToken
)) { /** version */
1007 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1008 version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
1010 TRACE("found version token ps.%lu.%lu;\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
1012 /* Each release of pixel shaders has had different numbers of temp registers */
1018 case 14: numTemps
=12;
1020 strcpy(tmpLine
, "!!ARBfp1.0\n");
1022 case 20: numTemps
=12;
1024 strcpy(tmpLine
, "!!ARBfp2.0\n");
1025 FIXME("No work done yet to support ps2.0 in hw\n");
1027 case 30: numTemps
=32;
1029 strcpy(tmpLine
, "!!ARBfp3.0\n");
1030 FIXME("No work done yet to support ps3.0 in hw\n");
1035 strcpy(tmpLine
, "!!ARBfp1.0\n");
1036 FIXME("Unrecognized pixel shader version!\n");
1038 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1040 /* TODO: find out how many registers are really needed */
1041 for(i
= 0; i
< 6; i
++) {
1042 sprintf(tmpLine
, "TEMP T%lu;\n", i
);
1043 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1046 for(i
= 0; i
< 6; i
++) {
1047 sprintf(tmpLine
, "TEMP R%lu;\n", i
);
1048 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1051 sprintf(tmpLine
, "TEMP TMP;\n");
1052 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1053 sprintf(tmpLine
, "TEMP TMP2;\n");
1054 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1055 sprintf(tmpLine
, "TEMP TA;\n");
1056 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1057 sprintf(tmpLine
, "TEMP TB;\n");
1058 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1059 sprintf(tmpLine
, "TEMP TC;\n");
1060 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1062 strcpy(tmpLine
, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1063 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1064 strcpy(tmpLine
, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1065 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1066 strcpy(tmpLine
, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1067 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1069 for(i
= 0; i
< 4; i
++) {
1070 sprintf(tmpLine
, "MOV T%lu, fragment.texcoord[%lu];\n", i
, i
);
1071 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1078 if (pshader_is_comment_token(*pToken
)) { /** comment */
1079 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1081 FIXME("#%s\n", (char*)pToken
);
1082 pToken
+= comment_len
;
1086 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1090 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
1092 if (NULL
== curOpcode
) {
1093 /* unknown current opcode ... (shouldn't be any!) */
1094 while (*pToken
& 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1095 FIXME("unrecognized opcode: %08lx\n", *pToken
);
1098 } else if (GLNAME_REQUIRE_GLSL
== curOpcode
->glname
) {
1099 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1100 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode
->name
);
1101 pToken
+= curOpcode
->num_params
;
1103 TRACE("Found opcode %s %s\n", curOpcode
->name
, curOpcode
->glname
);
1106 /* Build opcode for GL vertex_program */
1107 switch (curOpcode
->opcode
) {
1112 /* Address registers must be loaded with the ARL instruction */
1113 if ((((*pToken
) & D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) == D3DSPR_ADDR
) {
1114 if (((*pToken
) & REGMASK
) < nUseAddressRegister
) {
1115 strcpy(tmpLine
, "ARL");
1118 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This
, ((*pToken
) & REGMASK
));
1143 case D3DSIO_TEXKILL
:
1144 TRACE("Appending glname %s to tmpLine\n", curOpcode
->glname
);
1145 strcpy(tmpLine
, curOpcode
->glname
);
1149 DWORD reg
= *pToken
& REGMASK
;
1150 sprintf(tmpLine
, "PARAM C%lu = { %f, %f, %f, %f };\n", reg
,
1151 *((const float *)(pToken
+ 1)),
1152 *((const float *)(pToken
+ 2)),
1153 *((const float *)(pToken
+ 3)),
1154 *((const float *)(pToken
+ 4)) );
1156 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1158 This
->constants
[reg
] = 1;
1166 get_write_mask(*pToken
, tmp
);
1167 if (version
!= 14) {
1168 DWORD reg
= *pToken
& REGMASK
;
1169 sprintf(tmpLine
,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg
, tmp
, reg
, reg
);
1170 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1174 DWORD reg1
= *pToken
& REGMASK
;
1175 DWORD reg2
= *++pToken
& REGMASK
;
1176 if (gen_input_modifier_line(*pToken
, 0, reg
, tmpLine
, This
->constants
)) {
1177 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1179 sprintf(tmpLine
,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1
, tmp
, reg
, reg2
);
1180 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1186 case D3DSIO_TEXCOORD
:
1189 get_write_mask(*pToken
, tmp
);
1190 if (version
!= 14) {
1191 DWORD reg
= *pToken
& REGMASK
;
1192 sprintf(tmpLine
, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg
, tmp
, reg
);
1193 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1196 DWORD reg1
= *pToken
& REGMASK
;
1197 DWORD reg2
= *++pToken
& REGMASK
;
1198 sprintf(tmpLine
, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1
, tmp
, reg2
);
1199 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1205 case D3DSIO_TEXM3x2PAD
:
1207 DWORD reg
= *pToken
& REGMASK
;
1209 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1210 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1212 sprintf(tmpLine
, "DP3 TMP.x, T%lu, %s;\n", reg
, buf
);
1213 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1218 case D3DSIO_TEXM3x2TEX
:
1220 DWORD reg
= *pToken
& REGMASK
;
1222 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1223 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1225 sprintf(tmpLine
, "DP3 TMP.y, T%lu, %s;\n", reg
, buf
);
1226 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1227 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg
, reg
);
1228 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1233 case D3DSIO_TEXREG2AR
:
1235 DWORD reg1
= *pToken
& REGMASK
;
1236 DWORD reg2
= *++pToken
& REGMASK
;
1237 sprintf(tmpLine
, "MOV TMP.r, T%lu.a;\n", reg2
);
1238 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1239 sprintf(tmpLine
, "MOV TMP.g, T%lu.r;\n", reg2
);
1240 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1241 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1242 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1247 case D3DSIO_TEXREG2GB
:
1249 DWORD reg1
= *pToken
& REGMASK
;
1250 DWORD reg2
= *++pToken
& REGMASK
;
1251 sprintf(tmpLine
, "MOV TMP.r, T%lu.g;\n", reg2
);
1252 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1253 sprintf(tmpLine
, "MOV TMP.g, T%lu.b;\n", reg2
);
1254 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1255 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1256 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1263 DWORD reg1
= *pToken
& REGMASK
;
1264 DWORD reg2
= *++pToken
& REGMASK
;
1266 /* FIXME: Should apply the BUMPMAPENV matrix */
1267 sprintf(tmpLine
, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1
, reg2
);
1268 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1269 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1270 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1275 case D3DSIO_TEXM3x3PAD
:
1277 DWORD reg
= *pToken
& REGMASK
;
1279 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1280 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1282 sprintf(tmpLine
, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row
, reg
, buf
);
1283 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1289 case D3DSIO_TEXM3x3TEX
:
1291 DWORD reg
= *pToken
& REGMASK
;
1293 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1294 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1297 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1298 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1300 /* Cubemap textures will be more used than 3D ones. */
1301 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1302 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1307 case D3DSIO_TEXM3x3VSPEC
:
1309 DWORD reg
= *pToken
& REGMASK
;
1311 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1312 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1314 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1315 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1317 /* Construct the eye-ray vector from w coordinates */
1318 sprintf(tmpLine
, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw
[0]);
1319 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1320 sprintf(tmpLine
, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw
[1]);
1321 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1322 sprintf(tmpLine
, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg
);
1323 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1325 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1326 sprintf(tmpLine
, "DP3 TMP.w, TMP, TMP2;\n");
1327 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1328 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;\n");
1329 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1330 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1331 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1333 /* Cubemap textures will be more used than 3D ones. */
1334 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1335 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1341 case D3DSIO_TEXM3x3SPEC
:
1343 DWORD reg
= *pToken
& REGMASK
;
1344 DWORD reg3
= *(pToken
+ 2) & REGMASK
;
1346 if (gen_input_modifier_line(*(pToken
+ 1), 0, buf
, tmpLine
, This
->constants
)) {
1347 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1349 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1350 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1352 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1353 sprintf(tmpLine
, "DP3 TMP.w, TMP, C[%lu];\n", reg3
);
1354 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1356 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;\n");
1357 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1358 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3
);
1359 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1361 /* Cubemap textures will be more used than 3D ones. */
1362 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1363 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1371 if (curOpcode
->glname
== GLNAME_REQUIRE_GLSL
) {
1372 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode
->name
);
1374 FIXME("Can't handle opcode %s in hwShader\n", curOpcode
->name
);
1376 pToken
+= curOpcode
->num_params
; /* maybe + 1 */
1380 if (0 != (*pToken
& D3DSP_DSTMOD_MASK
)) {
1381 DWORD mask
= *pToken
& D3DSP_DSTMOD_MASK
;
1383 case D3DSPDM_SATURATE
: saturate
= TRUE
; break;
1384 #if 0 /* as yet unhandled modifiers */
1385 case D3DSPDM_CENTROID
: centroid
= TRUE
; break;
1386 case D3DSPDM_PP
: partialpresision
= TRUE
; break;
1387 case D3DSPDM_X2
: X2
= TRUE
; break;
1388 case D3DSPDM_X4
: X4
= TRUE
; break;
1389 case D3DSPDM_X8
: X8
= TRUE
; break;
1390 case D3DSPDM_D2
: D2
= TRUE
; break;
1391 case D3DSPDM_D4
: D4
= TRUE
; break;
1392 case D3DSPDM_D8
: D8
= TRUE
; break;
1395 TRACE("_unhandled_modifier(0x%08lx)\n", mask
);
1399 /* Generate input and output registers */
1400 if (curOpcode
->num_params
> 0) {
1402 char operands
[4][100];
1406 TRACE("(%p): Opcode has %d params\n", This
, curOpcode
->num_params
);
1408 /* Generate lines that handle input modifier computation */
1409 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1410 TRACE("(%p) : Param %ld token %lx\n", This
, i
, *(pToken
+ i
));
1411 if (gen_input_modifier_line(*(pToken
+ i
), i
- 1, regs
[i
- 1], tmpOp
, This
->constants
)) {
1412 addline(&lineNum
, pgmStr
, &pgmLength
, tmpOp
);
1416 /* Handle saturation only when no shift is present in the output modifier */
1417 if ((*pToken
& D3DSPDM_SATURATE
) && (0 == (*pToken
& D3DSP_DSTSHIFT_MASK
)))
1420 /* Handle output register */
1421 get_register_name(*pToken
, tmpOp
, This
->constants
);
1422 strcpy(operands
[0], tmpOp
);
1423 get_write_mask(*pToken
, tmpOp
);
1424 strcat(operands
[0], tmpOp
);
1426 /* This function works because of side effects from gen_input_modifier_line */
1427 /* Handle input registers */
1428 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1429 TRACE("(%p) : Regs = %s\n", This
, regs
[i
- 1]);
1430 strcpy(operands
[i
], regs
[i
- 1]);
1431 get_input_register_swizzle(*(pToken
+ i
), swzstring
);
1432 strcat(operands
[i
], swzstring
);
1435 switch(curOpcode
->opcode
) {
1437 sprintf(tmpLine
, "CMP%s %s, %s, %s, %s;\n", (saturate
? "_SAT" : ""), operands
[0], operands
[1], operands
[3], operands
[2]);
1440 sprintf(tmpLine
, "ADD TMP, -%s, coefdiv.x;", operands
[1]);
1441 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1442 sprintf(tmpLine
, "CMP%s %s, TMP, %s, %s;\n", (saturate
? "_SAT" : ""), operands
[0], operands
[2], operands
[3]);
1446 strcat(tmpLine
, "_SAT");
1447 strcat(tmpLine
, " ");
1448 strcat(tmpLine
, operands
[0]);
1449 for (i
= 1; i
< curOpcode
->num_params
; i
++) {
1450 strcat(tmpLine
, ", ");
1451 strcat(tmpLine
, operands
[i
]);
1453 strcat(tmpLine
,";\n");
1455 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1456 pToken
+= curOpcode
->num_params
;
1458 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1459 if (curOpcode
->num_params
> 0) {
1460 DWORD param
= *(pInstr
+ 1);
1461 if (0 != (param
& D3DSP_DSTSHIFT_MASK
)) {
1463 /* Generate a line that handle the output modifier computation */
1465 char write_mask
[20];
1466 DWORD shift
= (param
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
1467 get_register_name(param
, regstr
, This
->constants
);
1468 get_write_mask(param
, write_mask
);
1469 gen_output_modifier_line(saturate
, write_mask
, shift
, regstr
, tmpLine
);
1470 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1476 /* TODO: What about result.depth? */
1477 strcpy(tmpLine
, "MOV result.color, R0;\n");
1478 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1480 strcpy(tmpLine
, "END\n");
1481 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1484 /* finally null terminate the pgmStr*/
1485 pgmStr
[pgmLength
] = 0;
1486 if (GL_SUPPORT(ARB_VERTEX_PROGRAM
)) {
1487 /* Create the hw shader */
1489 /* pgmStr sometimes gets too long for a normal TRACE */
1490 TRACE("Generated program:\n");
1491 if (TRACE_ON(d3d_shader
)) {
1492 fprintf(stderr
, "%s\n", pgmStr
);
1495 /* TODO: change to resource.glObjectHandel or something like that */
1496 GL_EXTCALL(glGenProgramsARB(1, &This
->prgId
));
1498 TRACE("Creating a hw pixel shader, prg=%d\n", This
->prgId
);
1499 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB
, This
->prgId
));
1501 TRACE("Created hw pixel shader, prg=%d\n", This
->prgId
);
1502 /* Create the program and check for errors */
1503 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB
, GL_PROGRAM_FORMAT_ASCII_ARB
, strlen(pgmStr
), pgmStr
));
1504 if (glGetError() == GL_INVALID_OPERATION
) {
1506 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB
, &errPos
);
1507 FIXME("HW PixelShader Error at position %d: %s\n",
1508 errPos
, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB
)));
1512 #if 1 /* if were using the data buffer of device then we don't need to free it */
1513 HeapFree(GetProcessHeap(), 0, pgmStr
);
1517 inline static void pshader_program_dump_ps_param(const DWORD param
, int input
) {
1518 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1519 static const char swizzle_reg_chars
[] = "rgba";
1521 /* the unknown mask is for bits not yet accounted for by any other mask... */
1522 #define UNKNOWN_MASK 0xC000
1524 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1525 #define EXTENDED_REG 0x1800
1527 DWORD reg
= param
& D3DSP_REGNUM_MASK
;
1528 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) | ((param
& EXTENDED_REG
) >> 8);
1531 if ( ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
) ||
1532 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_BIASNEG
) ||
1533 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_SIGNNEG
) ||
1534 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_X2NEG
) )
1536 else if ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_COMP
)
1540 switch (regtype
/* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1548 TRACE("c%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1551 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
1554 case D3DSPR_RASTOUT
:
1555 TRACE("%s", rastout_reg_names
[reg
]);
1557 case D3DSPR_ATTROUT
:
1558 TRACE("oD%lu", reg
);
1560 case D3DSPR_TEXCRDOUT
:
1561 TRACE("oT%lu", reg
);
1563 case D3DSPR_CONSTINT
:
1564 TRACE("i%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1566 case D3DSPR_CONSTBOOL
:
1567 TRACE("b%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1573 TRACE("aL%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1580 /** operand output */
1582 * for better debugging traces it's done into opcode dump code
1583 * @see pshader_program_dump_opcode
1584 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1585 DWORD mask = param & D3DSP_DSTMOD_MASK;
1587 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1589 TRACE("_unhandled_modifier(0x%08lx)", mask);
1592 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1593 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1595 TRACE("_x%u", 1 << shift);
1599 if ((param
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
1601 if (param
& D3DSP_WRITEMASK_0
) TRACE(".r");
1602 if (param
& D3DSP_WRITEMASK_1
) TRACE(".g");
1603 if (param
& D3DSP_WRITEMASK_2
) TRACE(".b");
1604 if (param
& D3DSP_WRITEMASK_3
) TRACE(".a");
1607 /** operand input */
1608 DWORD swizzle
= (param
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
1609 DWORD swizzle_r
= swizzle
& 0x03;
1610 DWORD swizzle_g
= (swizzle
>> 2) & 0x03;
1611 DWORD swizzle_b
= (swizzle
>> 4) & 0x03;
1612 DWORD swizzle_a
= (swizzle
>> 6) & 0x03;
1614 if (0 != (param
& D3DSP_SRCMOD_MASK
)) {
1615 DWORD mask
= param
& D3DSP_SRCMOD_MASK
;
1616 /*TRACE("_modifier(0x%08lx) ", mask);*/
1618 case D3DSPSM_NONE
: break;
1619 case D3DSPSM_NEG
: break;
1620 case D3DSPSM_BIAS
: TRACE("_bias"); break;
1621 case D3DSPSM_BIASNEG
: TRACE("_bias"); break;
1622 case D3DSPSM_SIGN
: TRACE("_bx2"); break;
1623 case D3DSPSM_SIGNNEG
: TRACE("_bx2"); break;
1624 case D3DSPSM_COMP
: break;
1625 case D3DSPSM_X2
: TRACE("_x2"); break;
1626 case D3DSPSM_X2NEG
: TRACE("_x2"); break;
1627 case D3DSPSM_DZ
: TRACE("_dz"); break;
1628 case D3DSPSM_DW
: TRACE("_dw"); break;
1630 TRACE("_unknown(0x%08lx)", mask
);
1635 * swizzle bits fields:
1638 if ((D3DVS_NOSWIZZLE
>> D3DVS_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1639 if (swizzle_r
== swizzle_g
&&
1640 swizzle_r
== swizzle_b
&&
1641 swizzle_r
== swizzle_a
) {
1642 TRACE(".%c", swizzle_reg_chars
[swizzle_r
]);
1645 swizzle_reg_chars
[swizzle_r
],
1646 swizzle_reg_chars
[swizzle_g
],
1647 swizzle_reg_chars
[swizzle_b
],
1648 swizzle_reg_chars
[swizzle_a
]);
1654 inline static void pshader_program_dump_decl_usage(IWineD3DPixelShaderImpl
*This
, DWORD token
) {
1656 switch(token
& 0xFFFF) {
1657 case D3DDECLUSAGE_POSITION
:
1658 TRACE("%s%ld ", "position",(token
& 0xF0000) >> 16);
1660 case D3DDECLUSAGE_BLENDINDICES
:
1661 TRACE("%s ", "blend");
1663 case D3DDECLUSAGE_BLENDWEIGHT
:
1664 TRACE("%s ", "weight");
1666 case D3DDECLUSAGE_NORMAL
:
1667 TRACE("%s%ld ", "normal",(token
& 0xF0000) >> 16);
1669 case D3DDECLUSAGE_PSIZE
:
1670 TRACE("%s ", "psize");
1672 case D3DDECLUSAGE_COLOR
:
1673 if((token
& 0xF0000) >> 16 == 0) {
1674 TRACE("%s ", "color");
1676 TRACE("%s%ld ", "specular", ((token
& 0xF0000) >> 16) - 1);
1679 case D3DDECLUSAGE_TEXCOORD
:
1680 TRACE("%s%ld ", "texture", (token
& 0xF0000) >> 16);
1682 case D3DDECLUSAGE_TANGENT
:
1683 TRACE("%s ", "tangent");
1685 case D3DDECLUSAGE_BINORMAL
:
1686 TRACE("%s ", "binormal");
1688 case D3DDECLUSAGE_TESSFACTOR
:
1689 TRACE("%s ", "tessfactor");
1691 case D3DDECLUSAGE_POSITIONT
:
1692 TRACE("%s%ld ", "positionT",(token
& 0xF0000) >> 16);
1694 case D3DDECLUSAGE_FOG
:
1695 TRACE("%s ", "fog");
1697 case D3DDECLUSAGE_DEPTH
:
1698 TRACE("%s ", "depth");
1700 case D3DDECLUSAGE_SAMPLE
:
1701 TRACE("%s ", "sample");
1704 FIXME("Unrecognised dcl %08lx", token
& 0xFFFF);
1708 HRESULT WINAPI
IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
1709 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
1710 const DWORD
* pToken
= pFunction
;
1711 const SHADER_OPCODE
*curOpcode
= NULL
;
1714 TRACE("(%p) : Parsing programme\n", This
);
1716 if (NULL
!= pToken
) {
1717 while (D3DPS_END() != *pToken
) {
1718 if (pshader_is_version_token(*pToken
)) { /** version */
1719 This
->version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
1720 TRACE("ps_%lu_%lu\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
1725 if (pshader_is_comment_token(*pToken
)) { /** comment */
1726 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1728 TRACE("//%s\n", (char*)pToken
);
1729 pToken
+= comment_len
;
1730 len
+= comment_len
+ 1;
1733 if (!This
->version
) {
1734 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This
);
1736 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
1739 if (NULL
== curOpcode
) {
1741 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1742 while (*pToken
& 0x80000000) {
1744 /* unknown current opcode ... */
1745 TRACE("unrecognized opcode: %08lx", *pToken
);
1752 if (curOpcode
->opcode
== D3DSIO_DCL
) {
1753 pshader_program_dump_decl_usage(This
, *pToken
);
1756 pshader_program_dump_ps_param(*pToken
, 0);
1760 if (curOpcode
->opcode
== D3DSIO_DEF
) {
1761 TRACE("def c%lu = ", *pToken
& 0xFF);
1764 TRACE("%f ,", *(float *)pToken
);
1767 TRACE("%f ,", *(float *)pToken
);
1770 TRACE("%f ,", *(float *)pToken
);
1773 TRACE("%f", *(float *)pToken
);
1777 TRACE("%s ", curOpcode
->name
);
1778 if (curOpcode
->num_params
> 0) {
1779 pshader_program_dump_ps_param(*pToken
, 0);
1782 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1784 pshader_program_dump_ps_param(*pToken
, 1);
1793 This
->functionLength
= (len
+ 1) * sizeof(DWORD
);
1795 This
->functionLength
= 1; /* no Function defined use fixed function vertex processing */
1798 /* Generate HW shader in needed */
1799 if (NULL
!= pFunction
&& wined3d_settings
.vs_mode
== VS_HW
) {
1800 TRACE("(%p) : Generating hardware program\n", This
);
1802 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface
, pFunction
);
1806 TRACE("(%p) : Copying the function\n", This
);
1807 /* copy the function ... because it will certainly be released by application */
1808 if (NULL
!= pFunction
) {
1809 This
->function
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, This
->functionLength
);
1810 memcpy((void *)This
->function
, pFunction
, This
->functionLength
);
1812 This
->function
= NULL
;
1815 /* TODO: Some proper return values for failures */
1816 TRACE("(%p) : Returning D3D_OK\n", This
);
1820 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl
=
1822 /*** IUnknown methods ***/
1823 IWineD3DPixelShaderImpl_QueryInterface
,
1824 IWineD3DPixelShaderImpl_AddRef
,
1825 IWineD3DPixelShaderImpl_Release
,
1826 /*** IWineD3DPixelShader methods ***/
1827 IWineD3DPixelShaderImpl_GetParent
,
1828 IWineD3DPixelShaderImpl_GetDevice
,
1829 IWineD3DPixelShaderImpl_GetFunction
,
1830 /* not part of d3d */
1831 IWineD3DPixelShaderImpl_SetFunction