2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2005 Oliver Stieber
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 #include "wined3d_private.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
32 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
34 #if 0 /* Must not be 1 in cvs version */
35 # define PSTRACE(A) TRACE A
36 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
39 # define TRACE_VSVECTOR(name)
42 #define GLNAME_REQUIRE_GLSL ((const char *)1)
43 /* *******************************************
44 IWineD3DPixelShader IUnknown parts follow
45 ******************************************* */
46 HRESULT WINAPI
IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader
*iface
, REFIID riid
, LPVOID
*ppobj
)
48 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
49 TRACE("(%p)->(%s,%p)\n",This
,debugstr_guid(riid
),ppobj
);
50 if (IsEqualGUID(riid
, &IID_IUnknown
)
51 || IsEqualGUID(riid
, &IID_IWineD3DBase
)
52 || IsEqualGUID(riid
, &IID_IWineD3DBaseShader
)
53 || IsEqualGUID(riid
, &IID_IWineD3DPixelShader
)) {
54 IUnknown_AddRef(iface
);
62 ULONG WINAPI
IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader
*iface
) {
63 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
64 TRACE("(%p) : AddRef increasing from %ld\n", This
, This
->ref
);
65 return InterlockedIncrement(&This
->ref
);
68 ULONG WINAPI
IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader
*iface
) {
69 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
71 TRACE("(%p) : Releasing from %ld\n", This
, This
->ref
);
72 ref
= InterlockedDecrement(&This
->ref
);
74 HeapFree(GetProcessHeap(), 0, This
);
79 /* TODO: At the momeny the function parser is single pass, it achievs this
80 by passing constants to a couple of functions where they are then modified.
81 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
82 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
85 /* *******************************************
86 IWineD3DPixelShader IWineD3DPixelShader parts follow
87 ******************************************* */
89 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader
*iface
, IUnknown
** parent
){
90 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
92 *parent
= This
->parent
;
93 IUnknown_AddRef(*parent
);
94 TRACE("(%p) : returning %p\n", This
, *parent
);
98 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader
* iface
, IWineD3DDevice
**pDevice
){
99 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
100 IWineD3DDevice_AddRef((IWineD3DDevice
*)This
->wineD3DDevice
);
101 *pDevice
= (IWineD3DDevice
*)This
->wineD3DDevice
;
102 TRACE("(%p) returning %p\n", This
, *pDevice
);
107 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader
* impl
, VOID
* pData
, UINT
* pSizeOfData
) {
108 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)impl
;
109 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This
, pData
, pSizeOfData
);
112 *pSizeOfData
= This
->baseShader
.functionLength
;
115 if (*pSizeOfData
< This
->baseShader
.functionLength
) {
116 *pSizeOfData
= This
->baseShader
.functionLength
;
117 return WINED3DERR_MOREDATA
;
119 if (NULL
== This
->baseShader
.function
) { /* no function defined */
120 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This
, pData
);
121 (*(DWORD
**) pData
) = NULL
;
123 if (This
->baseShader
.functionLength
== 0) {
126 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
127 memcpy(pData
, This
->baseShader
.function
, This
->baseShader
.functionLength
);
132 /*******************************
133 * pshader functions software VM
136 void pshader_add(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
137 d
->x
= s0
->x
+ s1
->x
;
138 d
->y
= s0
->y
+ s1
->y
;
139 d
->z
= s0
->z
+ s1
->z
;
140 d
->w
= s0
->w
+ s1
->w
;
141 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
142 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
145 void pshader_dp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
146 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
;
147 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
148 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
151 void pshader_dp4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
152 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
+ s0
->w
* s1
->w
;
153 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
157 void pshader_dst(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
159 d
->y
= s0
->y
* s1
->y
;
162 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
163 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
166 void pshader_expp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
172 tmp
.f
= floorf(s0
->w
);
173 d
->x
= powf(2.0f
, tmp
.f
);
174 d
->y
= s0
->w
- tmp
.f
;
175 tmp
.f
= powf(2.0f
, s0
->w
);
176 tmp
.d
&= 0xFFFFFF00U
;
179 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
180 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
183 void pshader_logp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
184 float tmp_f
= fabsf(s0
->w
);
185 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
186 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
187 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
190 void pshader_mad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
191 d
->x
= s0
->x
* s1
->x
+ s2
->x
;
192 d
->y
= s0
->y
* s1
->y
+ s2
->y
;
193 d
->z
= s0
->z
* s1
->z
+ s2
->z
;
194 d
->w
= s0
->w
* s1
->w
+ s2
->w
;
195 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
196 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, s2
->x
, s2
->y
, s2
->z
, s2
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
199 void pshader_max(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
200 d
->x
= (s0
->x
>= s1
->x
) ? s0
->x
: s1
->x
;
201 d
->y
= (s0
->y
>= s1
->y
) ? s0
->y
: s1
->y
;
202 d
->z
= (s0
->z
>= s1
->z
) ? s0
->z
: s1
->z
;
203 d
->w
= (s0
->w
>= s1
->w
) ? s0
->w
: s1
->w
;
204 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
205 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
208 void pshader_min(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
209 d
->x
= (s0
->x
< s1
->x
) ? s0
->x
: s1
->x
;
210 d
->y
= (s0
->y
< s1
->y
) ? s0
->y
: s1
->y
;
211 d
->z
= (s0
->z
< s1
->z
) ? s0
->z
: s1
->z
;
212 d
->w
= (s0
->w
< s1
->w
) ? s0
->w
: s1
->w
;
213 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
214 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
217 void pshader_mov(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
222 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
223 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
226 void pshader_mul(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
227 d
->x
= s0
->x
* s1
->x
;
228 d
->y
= s0
->y
* s1
->y
;
229 d
->z
= s0
->z
* s1
->z
;
230 d
->w
= s0
->w
* s1
->w
;
231 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
232 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
235 void pshader_nop(void) {
236 /* NOPPPP ahhh too easy ;) */
237 PSTRACE(("executing nop\n"));
240 void pshader_rcp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
241 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== s0
->w
) ? HUGE_VAL
: 1.0f
/ s0
->w
;
242 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
246 void pshader_rsq(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
247 float tmp_f
= fabsf(s0
->w
);
248 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== tmp_f
) ? HUGE_VAL
: ((1.0f
!= tmp_f
) ? 1.0f
/ sqrtf(tmp_f
) : 1.0f
);
249 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
250 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
253 void pshader_sge(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
254 d
->x
= (s0
->x
>= s1
->x
) ? 1.0f
: 0.0f
;
255 d
->y
= (s0
->y
>= s1
->y
) ? 1.0f
: 0.0f
;
256 d
->z
= (s0
->z
>= s1
->z
) ? 1.0f
: 0.0f
;
257 d
->w
= (s0
->w
>= s1
->w
) ? 1.0f
: 0.0f
;
258 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
259 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
262 void pshader_slt(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
263 d
->x
= (s0
->x
< s1
->x
) ? 1.0f
: 0.0f
;
264 d
->y
= (s0
->y
< s1
->y
) ? 1.0f
: 0.0f
;
265 d
->z
= (s0
->z
< s1
->z
) ? 1.0f
: 0.0f
;
266 d
->w
= (s0
->w
< s1
->w
) ? 1.0f
: 0.0f
;
267 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
268 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
271 void pshader_sub(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
272 d
->x
= s0
->x
- s1
->x
;
273 d
->y
= s0
->y
- s1
->y
;
274 d
->z
= s0
->z
- s1
->z
;
275 d
->w
= s0
->w
- s1
->w
;
276 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
277 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
281 * Version 1.1 specific
284 void pshader_exp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
285 d
->x
= d
->y
= d
->z
= d
->w
= powf(2.0f
, s0
->w
);
286 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
287 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
290 void pshader_log(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
291 float tmp_f
= fabsf(s0
->w
);
292 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
293 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
294 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
297 void pshader_frc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
298 d
->x
= s0
->x
- floorf(s0
->x
);
299 d
->y
= s0
->y
- floorf(s0
->y
);
302 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
303 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
306 typedef FLOAT D3DMATRIX44
[4][4];
307 typedef FLOAT D3DMATRIX43
[4][3];
308 typedef FLOAT D3DMATRIX34
[3][4];
309 typedef FLOAT D3DMATRIX33
[3][3];
310 typedef FLOAT D3DMATRIX23
[2][3];
312 void pshader_m4x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat
) {
314 * Buggy CODE: here only if cast not work for copy/paste
315 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
316 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
317 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
318 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
319 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
320 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
321 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
323 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
324 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
325 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
326 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
+ mat
[3][3] * s0
->w
;
327 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
328 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
329 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
330 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], mat
[3][3], s0
->w
, d
->w
));
333 void pshader_m4x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX34 mat
) {
334 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
335 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
336 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
338 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
339 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
340 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
341 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0
->w
, d
->w
));
344 void pshader_m3x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX43 mat
) {
345 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
346 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
347 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
348 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
;
349 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
350 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
351 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
352 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], s0
->w
, d
->w
));
355 void pshader_m3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX33 mat
) {
356 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
357 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
358 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
360 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
361 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
362 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
363 PSTRACE(("executing m3x3(4): (%f) \n", d
->w
));
366 void pshader_m3x2(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX23 mat
) {
368 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
369 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
375 * Version 2.0 specific
377 void pshader_lrp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
378 d
->x
= s0
->x
* (s1
->x
- s2
->x
) + s2
->x
;
379 d
->y
= s0
->y
* (s1
->y
- s2
->y
) + s2
->y
;
380 d
->z
= s0
->z
* (s1
->z
- s2
->z
) + s2
->z
;
381 d
->w
= s0
->w
* (s1
->w
- s2
->w
) + s2
->w
;
384 void pshader_crs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
385 d
->x
= s0
->y
* s1
->z
- s0
->z
* s1
->y
;
386 d
->y
= s0
->z
* s1
->x
- s0
->x
* s1
->z
;
387 d
->z
= s0
->x
* s1
->y
- s0
->y
* s1
->x
;
388 d
->w
= 0.9f
; /* w is undefined, so set it to something safeish */
390 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
391 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
394 void pshader_abs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
399 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
400 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
404 void pshader_texcoord(WINED3DSHADERVECTOR
* d
) {
408 void pshader_texkill(WINED3DSHADERVECTOR
* d
) {
412 void pshader_tex(WINED3DSHADERVECTOR
* d
) {
415 void pshader_texld(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
419 void pshader_texbem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
423 void pshader_texbeml(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
427 void pshader_texreg2ar(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
431 void pshader_texreg2gb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
435 void pshader_texm3x2pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
439 void pshader_texm3x2tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
443 void pshader_texm3x3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
447 void pshader_texm3x3pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
451 void pshader_texm3x3diff(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
455 void pshader_texm3x3spec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
459 void pshader_texm3x3vspec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
463 void pshader_cnd(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
467 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
468 void pshader_def(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
, WINED3DSHADERVECTOR
* s3
) {
472 void pshader_texreg2rgb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
476 void pshader_texdp3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
480 void pshader_texm3x2depth(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
484 void pshader_texdp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
488 void pshader_texm3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
492 void pshader_texdepth(WINED3DSHADERVECTOR
* d
) {
496 void pshader_cmp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
500 void pshader_bem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
504 void pshader_call(WINED3DSHADERVECTOR
* d
) {
508 void pshader_callnz(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
512 void pshader_loop(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
516 void pshader_ret(void) {
520 void pshader_endloop(void) {
524 void pshader_dcl(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
528 void pshader_pow(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
532 void pshader_nrm(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
536 void pshader_sincos3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
540 void pshader_sincos2(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
544 void pshader_rep(WINED3DSHADERVECTOR
* d
) {
548 void pshader_endrep(void) {
552 void pshader_if(WINED3DSHADERVECTOR
* d
) {
556 void pshader_ifc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
560 void pshader_else(void) {
564 void pshader_label(WINED3DSHADERVECTOR
* d
) {
568 void pshader_endif(void) {
572 void pshader_break(void) {
576 void pshader_breakc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
580 void pshader_breakp(WINED3DSHADERVECTOR
* d
) {
584 void pshader_defb(WINED3DSHADERVECTOR
* d
) {
588 void pshader_defi(WINED3DSHADERVECTOR
* d
) {
592 void pshader_dp2add(WINED3DSHADERVECTOR
* d
) {
596 void pshader_dsx(WINED3DSHADERVECTOR
* d
) {
600 void pshader_dsy(WINED3DSHADERVECTOR
* d
) {
604 void pshader_texldd(WINED3DSHADERVECTOR
* d
) {
608 void pshader_setp(WINED3DSHADERVECTOR
* d
) {
612 void pshader_texldl(WINED3DSHADERVECTOR
* d
) {
617 void pshader_hw_map2gl(SHADER_OPCODE_ARG
* arg
);
618 void pshader_hw_tex(SHADER_OPCODE_ARG
* arg
);
619 void pshader_hw_texcoord(SHADER_OPCODE_ARG
* arg
);
620 void pshader_hw_texreg2ar(SHADER_OPCODE_ARG
* arg
);
621 void pshader_hw_texreg2gb(SHADER_OPCODE_ARG
* arg
);
622 void pshader_hw_texbem(SHADER_OPCODE_ARG
* arg
);
623 void pshader_hw_def(SHADER_OPCODE_ARG
* arg
);
624 void pshader_hw_texm3x2pad(SHADER_OPCODE_ARG
* arg
);
625 void pshader_hw_texm3x2tex(SHADER_OPCODE_ARG
* arg
);
626 void pshader_hw_texm3x3pad(SHADER_OPCODE_ARG
* arg
);
627 void pshader_hw_texm3x3tex(SHADER_OPCODE_ARG
* arg
);
628 void pshader_hw_texm3x3spec(SHADER_OPCODE_ARG
* arg
);
629 void pshader_hw_texm3x3vspec(SHADER_OPCODE_ARG
* arg
);
632 * log, exp, frc, m*x* seems to be macros ins ... to see
634 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins
[] = {
637 {D3DSIO_NOP
, "nop", "NOP", 0, pshader_nop
, pshader_hw_map2gl
, NULL
, 0, 0},
638 {D3DSIO_MOV
, "mov", "MOV", 2, pshader_mov
, pshader_hw_map2gl
, NULL
, 0, 0},
639 {D3DSIO_ADD
, "add", "ADD", 3, pshader_add
, pshader_hw_map2gl
, NULL
, 0, 0},
640 {D3DSIO_SUB
, "sub", "SUB", 3, pshader_sub
, pshader_hw_map2gl
, NULL
, 0, 0},
641 {D3DSIO_MAD
, "mad", "MAD", 4, pshader_mad
, pshader_hw_map2gl
, NULL
, 0, 0},
642 {D3DSIO_MUL
, "mul", "MUL", 3, pshader_mul
, pshader_hw_map2gl
, NULL
, 0, 0},
643 {D3DSIO_RCP
, "rcp", "RCP", 2, pshader_rcp
, pshader_hw_map2gl
, NULL
, 0, 0},
644 {D3DSIO_RSQ
, "rsq", "RSQ", 2, pshader_rsq
, pshader_hw_map2gl
, NULL
, 0, 0},
645 {D3DSIO_DP3
, "dp3", "DP3", 3, pshader_dp3
, pshader_hw_map2gl
, NULL
, 0, 0},
646 {D3DSIO_DP4
, "dp4", "DP4", 3, pshader_dp4
, pshader_hw_map2gl
, NULL
, 0, 0},
647 {D3DSIO_MIN
, "min", "MIN", 3, pshader_min
, pshader_hw_map2gl
, NULL
, 0, 0},
648 {D3DSIO_MAX
, "max", "MAX", 3, pshader_max
, pshader_hw_map2gl
, NULL
, 0, 0},
649 {D3DSIO_SLT
, "slt", "SLT", 3, pshader_slt
, pshader_hw_map2gl
, NULL
, 0, 0},
650 {D3DSIO_SGE
, "sge", "SGE", 3, pshader_sge
, pshader_hw_map2gl
, NULL
, 0, 0},
651 {D3DSIO_ABS
, "abs", "ABS", 2, pshader_abs
, pshader_hw_map2gl
, NULL
, 0, 0},
652 {D3DSIO_EXP
, "exp", "EX2", 2, pshader_exp
, pshader_hw_map2gl
, NULL
, 0, 0},
653 {D3DSIO_LOG
, "log", "LG2", 2, pshader_log
, pshader_hw_map2gl
, NULL
, 0, 0},
654 {D3DSIO_EXPP
, "expp", "EXP", 2, pshader_expp
, pshader_hw_map2gl
, NULL
, 0, 0},
655 {D3DSIO_LOGP
, "logp", "LOG", 2, pshader_logp
, pshader_hw_map2gl
, NULL
, 0, 0},
656 {D3DSIO_DST
, "dst", "DST", 3, pshader_dst
, pshader_hw_map2gl
, NULL
, 0, 0},
657 {D3DSIO_LRP
, "lrp", "LRP", 4, pshader_lrp
, pshader_hw_map2gl
, NULL
, 0, 0},
658 {D3DSIO_FRC
, "frc", "FRC", 2, pshader_frc
, pshader_hw_map2gl
, NULL
, 0, 0},
659 {D3DSIO_CND
, "cnd", GLNAME_REQUIRE_GLSL
, 4, pshader_cnd
, NULL
, NULL
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
660 {D3DSIO_CMP
, "cmp", GLNAME_REQUIRE_GLSL
, 4, pshader_cmp
, NULL
, NULL
, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
661 {D3DSIO_POW
, "pow", "POW", 3, pshader_pow
, NULL
, NULL
, 0, 0},
662 {D3DSIO_CRS
, "crs", "XPS", 3, pshader_crs
, NULL
, NULL
, 0, 0},
663 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
666 MUL vec.xyz, vec, tmp;
667 but I think this is better because it accounts for w properly.
673 {D3DSIO_NRM
, "nrm", NULL
, 2, pshader_nrm
, NULL
, NULL
, 0, 0},
674 {D3DSIO_SINCOS
, "sincos", NULL
, 4, pshader_sincos2
, NULL
, NULL
, D3DPS_VERSION(2,0), D3DPS_VERSION(2,0)},
675 {D3DSIO_SINCOS
, "sincos", NULL
, 2, pshader_sincos3
, NULL
, NULL
, D3DPS_VERSION(3,0), -1},
676 /* TODO: dp2add can be made out of multiple instuctions */
677 {D3DSIO_DP2ADD
, "dp2add", GLNAME_REQUIRE_GLSL
, 2, pshader_dp2add
, NULL
, NULL
, 0, 0},
680 {D3DSIO_M4x4
, "m4x4", "undefined", 3, pshader_m4x4
, NULL
, NULL
, 0, 0},
681 {D3DSIO_M4x3
, "m4x3", "undefined", 3, pshader_m4x3
, NULL
, NULL
, 0, 0},
682 {D3DSIO_M3x4
, "m3x4", "undefined", 3, pshader_m3x4
, NULL
, NULL
, 0, 0},
683 {D3DSIO_M3x3
, "m3x3", "undefined", 3, pshader_m3x3
, NULL
, NULL
, 0, 0},
684 {D3DSIO_M3x2
, "m3x2", "undefined", 3, pshader_m3x2
, NULL
, NULL
, 0, 0},
686 /* Register declarations */
687 {D3DSIO_DCL
, "dcl", NULL
, 2, pshader_dcl
, NULL
, NULL
, 0, 0},
689 /* Flow control - requires GLSL or software shaders */
690 {D3DSIO_REP
, "rep", GLNAME_REQUIRE_GLSL
, 1, pshader_rep
, NULL
, NULL
, 0, 0},
691 {D3DSIO_ENDREP
, "endrep", GLNAME_REQUIRE_GLSL
, 0, pshader_endrep
, NULL
, NULL
, 0, 0},
692 {D3DSIO_IF
, "if", GLNAME_REQUIRE_GLSL
, 1, pshader_if
, NULL
, NULL
, 0, 0},
693 {D3DSIO_IFC
, "ifc", GLNAME_REQUIRE_GLSL
, 2, pshader_ifc
, NULL
, NULL
, 0, 0},
694 {D3DSIO_ELSE
, "else", GLNAME_REQUIRE_GLSL
, 0, pshader_else
, NULL
, NULL
, 0, 0},
695 {D3DSIO_ENDIF
, "endif", GLNAME_REQUIRE_GLSL
, 0, pshader_endif
, NULL
, NULL
, 0, 0},
696 {D3DSIO_BREAK
, "break", GLNAME_REQUIRE_GLSL
, 0, pshader_break
, NULL
, NULL
, 0, 0},
697 {D3DSIO_BREAKC
, "breakc", GLNAME_REQUIRE_GLSL
, 2, pshader_breakc
, NULL
, NULL
, 0, 0},
698 {D3DSIO_BREAKP
, "breakp", GLNAME_REQUIRE_GLSL
, 1, pshader_breakp
, NULL
, NULL
, 0, 0},
699 {D3DSIO_CALL
, "call", GLNAME_REQUIRE_GLSL
, 1, pshader_call
, NULL
, NULL
, 0, 0},
700 {D3DSIO_CALLNZ
, "callnz", GLNAME_REQUIRE_GLSL
, 2, pshader_callnz
, NULL
, NULL
, 0, 0},
701 {D3DSIO_LOOP
, "loop", GLNAME_REQUIRE_GLSL
, 2, pshader_loop
, NULL
, NULL
, 0, 0},
702 {D3DSIO_RET
, "ret", GLNAME_REQUIRE_GLSL
, 0, pshader_ret
, NULL
, NULL
, 0, 0},
703 {D3DSIO_ENDLOOP
, "endloop", GLNAME_REQUIRE_GLSL
, 0, pshader_endloop
, NULL
, NULL
, 0, 0},
704 {D3DSIO_LABEL
, "label", GLNAME_REQUIRE_GLSL
, 1, pshader_label
, NULL
, NULL
, 0, 0},
706 /* Constant definitions */
707 {D3DSIO_DEF
, "def", "undefined", 5, pshader_def
, pshader_hw_def
, NULL
, 0, 0},
708 {D3DSIO_DEFB
, "defb", GLNAME_REQUIRE_GLSL
, 2, pshader_defb
, NULL
, NULL
, 0, 0},
709 {D3DSIO_DEFI
, "defi", GLNAME_REQUIRE_GLSL
, 2, pshader_defi
, NULL
, NULL
, 0, 0},
712 {D3DSIO_TEXCOORD
, "texcoord", "undefined", 1, pshader_texcoord
, pshader_hw_texcoord
, NULL
, 0, D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXCOORD
, "texcrd", "undefined", 2, pshader_texcoord
, pshader_hw_texcoord
, NULL
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
714 {D3DSIO_TEXKILL
, "texkill", "KIL", 1, pshader_texkill
, pshader_hw_map2gl
, NULL
, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
715 {D3DSIO_TEX
, "tex", "undefined", 1, pshader_tex
, pshader_hw_tex
, NULL
, 0, D3DPS_VERSION(1,3)},
716 {D3DSIO_TEX
, "texld", "undefined", 2, pshader_texld
, pshader_hw_tex
, NULL
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
717 {D3DSIO_TEX
, "texld", "undefined", 3, pshader_texld
, pshader_hw_tex
, NULL
, D3DPS_VERSION(2,0), -1},
718 {D3DSIO_TEXBEM
, "texbem", "undefined", 2, pshader_texbem
, pshader_hw_texbem
, NULL
, 0, D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXBEML
, "texbeml", GLNAME_REQUIRE_GLSL
, 2, pshader_texbeml
, NULL
, NULL
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_TEXREG2AR
,"texreg2ar","undefined", 2, pshader_texreg2ar
, pshader_hw_texreg2ar
, NULL
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
721 {D3DSIO_TEXREG2GB
,"texreg2gb","undefined", 2, pshader_texreg2gb
, pshader_hw_texreg2gb
, NULL
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
722 {D3DSIO_TEXREG2RGB
, "texreg2rgb", GLNAME_REQUIRE_GLSL
, 2, pshader_texreg2rgb
, NULL
, NULL
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
723 {D3DSIO_TEXM3x2PAD
, "texm3x2pad", "undefined", 2, pshader_texm3x2pad
, pshader_hw_texm3x2pad
, NULL
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
724 {D3DSIO_TEXM3x2TEX
, "texm3x2tex", "undefined", 2, pshader_texm3x2tex
, pshader_hw_texm3x2tex
, NULL
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
725 {D3DSIO_TEXM3x3PAD
, "texm3x3pad", "undefined", 2, pshader_texm3x3pad
, pshader_hw_texm3x3pad
, NULL
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
726 {D3DSIO_TEXM3x3DIFF
, "texm3x3diff", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3diff
, NULL
, NULL
, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
727 {D3DSIO_TEXM3x3SPEC
, "texm3x3spec", "undefined", 3, pshader_texm3x3spec
, pshader_hw_texm3x3spec
, NULL
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXM3x3VSPEC
, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec
, pshader_hw_texm3x3vspec
, NULL
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
729 {D3DSIO_TEXM3x3TEX
, "texm3x3tex", "undefined", 2, pshader_texm3x3tex
, pshader_hw_texm3x3tex
, NULL
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
730 {D3DSIO_TEXDP3TEX
, "texdp3tex", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3tex
, NULL
, NULL
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
731 {D3DSIO_TEXM3x2DEPTH
, "texm3x2depth", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x2depth
, NULL
, NULL
, D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
732 {D3DSIO_TEXDP3
, "texdp3", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3
, NULL
, NULL
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
733 {D3DSIO_TEXM3x3
, "texm3x3", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3
, NULL
, NULL
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
734 {D3DSIO_TEXDEPTH
, "texdepth", GLNAME_REQUIRE_GLSL
, 1, pshader_texdepth
, NULL
, NULL
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
735 {D3DSIO_BEM
, "bem", GLNAME_REQUIRE_GLSL
, 3, pshader_bem
, NULL
, NULL
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
736 /* TODO: dp2add can be made out of multiple instuctions */
737 {D3DSIO_DSX
, "dsx", GLNAME_REQUIRE_GLSL
, 2, pshader_dsx
, NULL
, NULL
, 0, 0},
738 {D3DSIO_DSY
, "dsy", GLNAME_REQUIRE_GLSL
, 2, pshader_dsy
, NULL
, NULL
, 0, 0},
739 {D3DSIO_TEXLDD
, "texldd", GLNAME_REQUIRE_GLSL
, 2, pshader_texldd
, NULL
, NULL
, 0, 0},
740 {D3DSIO_SETP
, "setp", GLNAME_REQUIRE_GLSL
, 2, pshader_setp
, NULL
, NULL
, 0, 0},
741 {D3DSIO_TEXLDL
, "texdl", GLNAME_REQUIRE_GLSL
, 2, pshader_texldl
, NULL
, NULL
, 0, 0},
742 {D3DSIO_PHASE
, "phase", GLNAME_REQUIRE_GLSL
, 0, pshader_nop
, NULL
, NULL
, 0, 0},
743 {0, NULL
, NULL
, 0, NULL
, NULL
, 0, 0}
746 inline static void get_register_name(const DWORD param
, char* regstr
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
747 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
749 DWORD reg
= param
& D3DSP_REGNUM_MASK
;
750 DWORD regtype
= shader_get_regtype(param
);
754 sprintf(regstr
, "R%lu", reg
);
758 strcpy(regstr
, "fragment.color.primary");
760 strcpy(regstr
, "fragment.color.secondary");
765 sprintf(regstr
, "C%lu", reg
);
767 sprintf(regstr
, "program.env[%lu]", reg
);
769 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
770 sprintf(regstr
,"T%lu", reg
);
773 sprintf(regstr
, "%s", rastout_reg_names
[reg
]);
776 sprintf(regstr
, "oD[%lu]", reg
);
778 case D3DSPR_TEXCRDOUT
:
779 sprintf(regstr
, "oT[%lu]", reg
);
782 FIXME("Unhandled register name Type(%ld)\n", regtype
);
787 inline static void get_write_mask(const DWORD output_reg
, char *write_mask
) {
789 if ((output_reg
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
790 strcat(write_mask
, ".");
791 if (output_reg
& D3DSP_WRITEMASK_0
) strcat(write_mask
, "r");
792 if (output_reg
& D3DSP_WRITEMASK_1
) strcat(write_mask
, "g");
793 if (output_reg
& D3DSP_WRITEMASK_2
) strcat(write_mask
, "b");
794 if (output_reg
& D3DSP_WRITEMASK_3
) strcat(write_mask
, "a");
798 inline static void get_input_register_swizzle(const DWORD instr
, char *swzstring
) {
799 static const char swizzle_reg_chars
[] = "rgba";
800 DWORD swizzle
= (instr
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
801 DWORD swizzle_x
= swizzle
& 0x03;
802 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
803 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
804 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
806 * swizzle bits fields:
810 if ((D3DSP_NOSWIZZLE
>> D3DSP_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
811 if (swizzle_x
== swizzle_y
&&
812 swizzle_x
== swizzle_z
&&
813 swizzle_x
== swizzle_w
) {
814 sprintf(swzstring
, ".%c", swizzle_reg_chars
[swizzle_x
]);
816 sprintf(swzstring
, ".%c%c%c%c",
817 swizzle_reg_chars
[swizzle_x
],
818 swizzle_reg_chars
[swizzle_y
],
819 swizzle_reg_chars
[swizzle_z
],
820 swizzle_reg_chars
[swizzle_w
]);
825 static const char* shift_tab
[] = {
826 "dummy", /* 0 (none) */
827 "coefmul.x", /* 1 (x2) */
828 "coefmul.y", /* 2 (x4) */
829 "coefmul.z", /* 3 (x8) */
830 "coefmul.w", /* 4 (x16) */
831 "dummy", /* 5 (x32) */
832 "dummy", /* 6 (x64) */
833 "dummy", /* 7 (x128) */
834 "dummy", /* 8 (d256) */
835 "dummy", /* 9 (d128) */
836 "dummy", /* 10 (d64) */
837 "dummy", /* 11 (d32) */
838 "coefdiv.w", /* 12 (d16) */
839 "coefdiv.z", /* 13 (d8) */
840 "coefdiv.y", /* 14 (d4) */
841 "coefdiv.x" /* 15 (d2) */
844 inline static void gen_output_modifier_line(int saturate
, char *write_mask
, int shift
, char *regstr
, char* line
) {
845 /* Generate a line that does the output modifier computation */
846 sprintf(line
, "MUL%s %s%s, %s, %s;", saturate
? "_SAT" : "", regstr
, write_mask
, regstr
, shift_tab
[shift
]);
849 inline static int gen_input_modifier_line(const DWORD instr
, int tmpreg
, char *outregstr
, char *line
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
850 /* Generate a line that does the input modifier computation and return the input register to use */
851 static char regstr
[256];
852 static char tmpline
[256];
855 /* Assume a new line will be added */
858 /* Get register name */
859 get_register_name(instr
, regstr
, constants
);
861 TRACE(" Register name %s\n", regstr
);
862 switch (instr
& D3DSP_SRCMOD_MASK
) {
864 strcpy(outregstr
, regstr
);
868 sprintf(outregstr
, "-%s", regstr
);
872 sprintf(line
, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg
, regstr
);
874 case D3DSPSM_BIASNEG
:
875 sprintf(line
, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg
, regstr
);
878 sprintf(line
, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg
, regstr
);
880 case D3DSPSM_SIGNNEG
:
881 sprintf(line
, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg
, regstr
);
884 sprintf(line
, "SUB T%c, one.x, %s;", 'A' + tmpreg
, regstr
);
887 sprintf(line
, "ADD T%c, %s, %s;", 'A' + tmpreg
, regstr
, regstr
);
890 sprintf(line
, "ADD T%c, -%s, -%s;", 'A' + tmpreg
, regstr
, regstr
);
893 sprintf(line
, "RCP T%c, %s.z;", 'A' + tmpreg
, regstr
);
894 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
895 strcat(line
, "\n"); /* Hack */
896 strcat(line
, tmpline
);
899 sprintf(line
, "RCP T%c, %s.w;", 'A' + tmpreg
, regstr
);
900 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
901 strcat(line
, "\n"); /* Hack */
902 strcat(line
, tmpline
);
905 strcpy(outregstr
, regstr
);
910 /* Substitute the register name */
911 sprintf(outregstr
, "T%c", 'A' + tmpreg
);
917 void pshader_set_version(
918 IWineD3DPixelShaderImpl
*This
,
921 DWORD major
= (version
>> 8) & 0x0F;
922 DWORD minor
= version
& 0x0F;
924 This
->baseShader
.hex_version
= version
;
925 This
->baseShader
.version
= major
* 10 + minor
;
926 TRACE("ps_%lu_%lu\n", major
, minor
);
928 This
->baseShader
.limits
.address
= 0;
930 switch (This
->baseShader
.version
) {
934 case 13: This
->baseShader
.limits
.temporary
= 2;
935 This
->baseShader
.limits
.constant_float
= 8;
936 This
->baseShader
.limits
.constant_int
= 0;
937 This
->baseShader
.limits
.constant_bool
= 0;
938 This
->baseShader
.limits
.texture
= 4;
941 case 14: This
->baseShader
.limits
.temporary
= 6;
942 This
->baseShader
.limits
.constant_float
= 8;
943 This
->baseShader
.limits
.constant_int
= 0;
944 This
->baseShader
.limits
.constant_bool
= 0;
945 This
->baseShader
.limits
.texture
= 6;
948 /* FIXME: temporaries must match D3DPSHADERCAPS2_0.NumTemps */
949 case 20: This
->baseShader
.limits
.temporary
= 32;
950 This
->baseShader
.limits
.constant_float
= 32;
951 This
->baseShader
.limits
.constant_int
= 16;
952 This
->baseShader
.limits
.constant_bool
= 16;
953 This
->baseShader
.limits
.texture
= 8;
956 case 30: This
->baseShader
.limits
.temporary
= 32;
957 This
->baseShader
.limits
.constant_float
= 224;
958 This
->baseShader
.limits
.constant_int
= 16;
959 This
->baseShader
.limits
.constant_bool
= 16;
960 This
->baseShader
.limits
.texture
= 0;
963 default: This
->baseShader
.limits
.temporary
= 32;
964 This
->baseShader
.limits
.constant_float
= 8;
965 This
->baseShader
.limits
.constant_int
= 0;
966 This
->baseShader
.limits
.constant_bool
= 0;
967 This
->baseShader
.limits
.texture
= 8;
968 FIXME("Unrecognized pixel shader version %lx!\n", version
);
972 /* Map the opcode 1-to-1 to the GL code */
973 /* FIXME: fix CMP/CND, get rid of this switch */
974 void pshader_hw_map2gl(SHADER_OPCODE_ARG
* arg
) {
976 IWineD3DPixelShaderImpl
* This
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
977 CONST SHADER_OPCODE
* curOpcode
= arg
->opcode
;
978 SHADER_BUFFER
* buffer
= arg
->buffer
;
979 DWORD dst
= arg
->dst
;
980 DWORD
* src
= arg
->src
;
984 char output_rname
[256];
985 char output_wmask
[20];
986 BOOL saturate
= FALSE
;
989 TRACE("Appending glname %s to tmpLine\n", curOpcode
->glname
);
990 strcpy(tmpLine
, curOpcode
->glname
);
992 /* Process modifiers */
993 if (0 != (dst
& D3DSP_DSTMOD_MASK
)) {
994 DWORD mask
= dst
& D3DSP_DSTMOD_MASK
;
996 case D3DSPDM_SATURATE
: saturate
= TRUE
; break;
997 #if 0 /* as yet unhandled modifiers */
998 case D3DSPDM_CENTROID
: centroid
= TRUE
; break;
999 case D3DSPDM_PP
: partialpresision
= TRUE
; break;
1002 TRACE("_unhandled_modifier(0x%08lx)\n", mask
);
1005 shift
= (dst
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
1007 /* Generate input and output registers */
1008 if (curOpcode
->num_params
> 0) {
1010 char operands
[4][100];
1014 /* Generate lines that handle input modifier computation */
1015 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1016 TRACE("(%p) : Param %u token %lx\n", This
, i
, src
[i
- 1]);
1017 if (gen_input_modifier_line(src
[i
- 1], i
- 1, regs
[i
- 1], tmpOp
, This
->constants
))
1018 shader_addline(buffer
, tmpOp
);
1021 /* Handle output register */
1022 get_register_name(dst
, output_rname
, This
->constants
);
1023 strcpy(operands
[0], output_rname
);
1024 get_write_mask(dst
, output_wmask
);
1025 strcat(operands
[0], output_wmask
);
1027 /* This function works because of side effects from gen_input_modifier_line */
1028 /* Handle input registers */
1029 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1030 TRACE("(%p) : Regs = %s\n", This
, regs
[i
- 1]);
1031 strcpy(operands
[i
], regs
[i
- 1]);
1032 get_input_register_swizzle(src
[i
- 1], swzstring
);
1033 strcat(operands
[i
], swzstring
);
1036 switch(curOpcode
->opcode
) {
1038 sprintf(tmpLine
, "CMP%s %s, %s, %s, %s;\n", (saturate
? "_SAT" : ""),
1039 operands
[0], operands
[1], operands
[3], operands
[2]);
1042 shader_addline(buffer
, "ADD TMP, -%s, coefdiv.x;\n", operands
[1]);
1043 sprintf(tmpLine
, "CMP%s %s, TMP, %s, %s;\n", (saturate
? "_SAT" : ""),
1044 operands
[0], operands
[2], operands
[3]);
1048 if (saturate
&& (shift
== 0))
1049 strcat(tmpLine
, "_SAT");
1050 strcat(tmpLine
, " ");
1051 strcat(tmpLine
, operands
[0]);
1052 for (i
= 1; i
< curOpcode
->num_params
; i
++) {
1053 strcat(tmpLine
, ", ");
1054 strcat(tmpLine
, operands
[i
]);
1056 strcat(tmpLine
,";\n");
1058 shader_addline(buffer
, tmpLine
);
1060 /* A shift requires another line. */
1062 gen_output_modifier_line(saturate
, output_wmask
, shift
, output_rname
, tmpLine
);
1063 shader_addline(buffer
, tmpLine
);
1068 void pshader_hw_tex(SHADER_OPCODE_ARG
* arg
) {
1070 IWineD3DPixelShaderImpl
* This
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
1071 DWORD dst
= arg
->dst
;
1072 DWORD
* src
= arg
->src
;
1073 SHADER_BUFFER
* buffer
= arg
->buffer
;
1074 DWORD version
= This
->baseShader
.version
;
1079 char reg_coord_swz
[20] = "";
1080 DWORD reg_dest_code
;
1081 DWORD reg_sampler_code
;
1083 /* All versions have a destination register */
1084 reg_dest_code
= dst
& D3DSP_REGNUM_MASK
;
1085 get_register_name(dst
, reg_dest
, This
->constants
);
1087 /* 1.0-1.3: Use destination register as coordinate source. No modifiers.
1088 1.4: Use provided coordinate source register. _dw, _dz, swizzle allowed.
1089 2.0+: Use provided coordinate source register. No modifiers.
1090 3.0+: Use provided coordinate source register. Swizzle allowed */
1092 strcpy(reg_coord
, reg_dest
);
1093 else if (version
== 14) {
1094 if (gen_input_modifier_line(src
[0], 0, reg_coord
, tmpLine
, This
->constants
))
1095 shader_addline(buffer
, tmpLine
);
1096 get_input_register_swizzle(src
[0], reg_coord_swz
);
1098 else if (version
> 14 && version
< 30)
1099 get_register_name(src
[0], reg_coord
, This
->constants
);
1100 else if (version
>= 30) {
1101 get_input_register_swizzle(src
[0], reg_coord_swz
);
1102 get_register_name(src
[0], reg_coord
, This
->constants
);
1105 /* 1.0-1.4: Use destination register number as texture code.
1106 2.0+: Use provided sampler number as texure code. */
1108 reg_sampler_code
= reg_dest_code
;
1110 reg_sampler_code
= src
[1] & D3DSP_REGNUM_MASK
;
1112 shader_addline(buffer
, "TEX %s, %s%s, texture[%lu], 2D;\n",
1113 reg_dest
, reg_coord
, reg_coord_swz
, reg_sampler_code
);
1116 void pshader_hw_texcoord(SHADER_OPCODE_ARG
* arg
) {
1118 IWineD3DPixelShaderImpl
* This
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
1119 DWORD dst
= arg
->dst
;
1120 DWORD
* src
= arg
->src
;
1121 SHADER_BUFFER
* buffer
= arg
->buffer
;
1122 DWORD version
= This
->baseShader
.version
;
1125 get_write_mask(dst
, tmp
);
1126 if (version
!= 14) {
1127 DWORD reg
= dst
& D3DSP_REGNUM_MASK
;
1128 shader_addline(buffer
, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg
, tmp
, reg
);
1130 DWORD reg1
= dst
& D3DSP_REGNUM_MASK
;
1131 DWORD reg2
= src
[0] & D3DSP_REGNUM_MASK
;
1132 shader_addline(buffer
, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1
, tmp
, reg2
);
1136 void pshader_hw_texreg2ar(SHADER_OPCODE_ARG
* arg
) {
1138 SHADER_BUFFER
* buffer
= arg
->buffer
;
1140 DWORD reg1
= arg
->dst
& D3DSP_REGNUM_MASK
;
1141 DWORD reg2
= arg
->src
[0] & D3DSP_REGNUM_MASK
;
1142 shader_addline(buffer
, "MOV TMP.r, T%lu.a;\n", reg2
);
1143 shader_addline(buffer
, "MOV TMP.g, T%lu.r;\n", reg2
);
1144 shader_addline(buffer
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1147 void pshader_hw_texreg2gb(SHADER_OPCODE_ARG
* arg
) {
1149 SHADER_BUFFER
* buffer
= arg
->buffer
;
1151 DWORD reg1
= arg
->dst
& D3DSP_REGNUM_MASK
;
1152 DWORD reg2
= arg
->src
[0] & D3DSP_REGNUM_MASK
;
1153 shader_addline(buffer
, "MOV TMP.r, T%lu.g;\n", reg2
);
1154 shader_addline(buffer
, "MOV TMP.g, T%lu.b;\n", reg2
);
1155 shader_addline(buffer
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1158 void pshader_hw_texbem(SHADER_OPCODE_ARG
* arg
) {
1160 SHADER_BUFFER
* buffer
= arg
->buffer
;
1162 DWORD reg1
= arg
->dst
& D3DSP_REGNUM_MASK
;
1163 DWORD reg2
= arg
->src
[0] & D3DSP_REGNUM_MASK
;
1165 /* FIXME: Should apply the BUMPMAPENV matrix */
1166 shader_addline(buffer
, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1
, reg2
);
1167 shader_addline(buffer
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1170 void pshader_hw_def(SHADER_OPCODE_ARG
* arg
) {
1172 IWineD3DPixelShaderImpl
* shader
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
1173 DWORD reg
= arg
->dst
& D3DSP_REGNUM_MASK
;
1174 SHADER_BUFFER
* buffer
= arg
->buffer
;
1176 shader_addline(buffer
,
1177 "PARAM C%lu = { %f, %f, %f, %f };\n", reg
,
1178 *((float*) (arg
->src
+ 0)),
1179 *((float*) (arg
->src
+ 1)),
1180 *((float*) (arg
->src
+ 2)),
1181 *((float*) (arg
->src
+ 3)) );
1183 shader
->constants
[reg
] = 1;
1186 void pshader_hw_texm3x2pad(SHADER_OPCODE_ARG
* arg
) {
1188 IWineD3DPixelShaderImpl
* shader
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
1189 DWORD reg
= arg
->dst
& D3DSP_REGNUM_MASK
;
1190 SHADER_BUFFER
* buffer
= arg
->buffer
;
1194 if (gen_input_modifier_line(arg
->src
[0], 0, buf
, tmpLine
, shader
->constants
))
1195 shader_addline(buffer
, tmpLine
);
1196 shader_addline(buffer
, "DP3 TMP.x, T%lu, %s;\n", reg
, buf
);
1199 void pshader_hw_texm3x2tex(SHADER_OPCODE_ARG
* arg
) {
1201 IWineD3DPixelShaderImpl
* shader
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
1202 DWORD reg
= arg
->dst
& D3DSP_REGNUM_MASK
;
1203 SHADER_BUFFER
* buffer
= arg
->buffer
;
1207 if (gen_input_modifier_line(arg
->src
[0], 0, buf
, tmpLine
, shader
->constants
))
1208 shader_addline(buffer
, tmpLine
);
1209 shader_addline(buffer
, "DP3 TMP.y, T%lu, %s;\n", reg
, buf
);
1210 shader_addline(buffer
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg
, reg
);
1213 void pshader_hw_texm3x3pad(SHADER_OPCODE_ARG
* arg
) {
1215 IWineD3DPixelShaderImpl
* shader
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
1216 DWORD reg
= arg
->dst
& D3DSP_REGNUM_MASK
;
1217 SHADER_BUFFER
* buffer
= arg
->buffer
;
1218 SHADER_PARSE_STATE current_state
= shader
->baseShader
.parse_state
;
1222 if (gen_input_modifier_line(arg
->src
[0], 0, buf
, tmpLine
, shader
->constants
))
1223 shader_addline(buffer
, tmpLine
);
1224 shader_addline(buffer
, "DP3 TMP.%c, T%lu, %s;\n", 'x' + current_state
.current_row
, reg
, buf
);
1225 current_state
.texcoord_w
[current_state
.current_row
++] = reg
;
1228 void pshader_hw_texm3x3tex(SHADER_OPCODE_ARG
* arg
) {
1230 IWineD3DPixelShaderImpl
* shader
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
1231 DWORD reg
= arg
->dst
& D3DSP_REGNUM_MASK
;
1232 SHADER_BUFFER
* buffer
= arg
->buffer
;
1233 SHADER_PARSE_STATE current_state
= shader
->baseShader
.parse_state
;
1237 if (gen_input_modifier_line(arg
->src
[0], 0, buf
, tmpLine
, shader
->constants
))
1238 shader_addline(buffer
, tmpLine
);
1239 shader_addline(buffer
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1241 /* Cubemap textures will be more used than 3D ones. */
1242 shader_addline(buffer
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1243 current_state
.current_row
= 0;
1246 void pshader_hw_texm3x3vspec(SHADER_OPCODE_ARG
* arg
) {
1248 IWineD3DPixelShaderImpl
* shader
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
1249 DWORD reg
= arg
->dst
& D3DSP_REGNUM_MASK
;
1250 SHADER_BUFFER
* buffer
= arg
->buffer
;
1251 SHADER_PARSE_STATE current_state
= shader
->baseShader
.parse_state
;
1255 if (gen_input_modifier_line(arg
->src
[0], 0, buf
, tmpLine
, shader
->constants
))
1256 shader_addline(buffer
, tmpLine
);
1257 shader_addline(buffer
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1259 /* Construct the eye-ray vector from w coordinates */
1260 shader_addline(buffer
, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", current_state
.texcoord_w
[0]);
1261 shader_addline(buffer
, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", current_state
.texcoord_w
[1]);
1262 shader_addline(buffer
, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg
);
1264 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1265 shader_addline(buffer
, "DP3 TMP.w, TMP, TMP2;\n");
1266 shader_addline(buffer
, "MUL TMP, TMP.w, TMP;\n");
1267 shader_addline(buffer
, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1269 /* Cubemap textures will be more used than 3D ones. */
1270 shader_addline(buffer
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1271 current_state
.current_row
= 0;
1274 void pshader_hw_texm3x3spec(SHADER_OPCODE_ARG
* arg
) {
1276 IWineD3DPixelShaderImpl
* shader
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
1277 DWORD reg
= arg
->dst
& D3DSP_REGNUM_MASK
;
1278 DWORD reg3
= arg
->src
[1] & D3DSP_REGNUM_MASK
;
1279 SHADER_PARSE_STATE current_state
= shader
->baseShader
.parse_state
;
1280 SHADER_BUFFER
* buffer
= arg
->buffer
;
1284 if (gen_input_modifier_line(arg
->src
[0], 0, buf
, tmpLine
, shader
->constants
))
1285 shader_addline(buffer
, tmpLine
);
1286 shader_addline(buffer
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1288 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1289 shader_addline(buffer
, "DP3 TMP.w, TMP, C[%lu];\n", reg3
);
1290 shader_addline(buffer
, "MUL TMP, TMP.w, TMP;\n");
1291 shader_addline(buffer
, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3
);
1293 /* Cubemap textures will be more used than 3D ones. */
1294 shader_addline(buffer
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1295 current_state
.current_row
= 0;
1298 /** Generate a pixel shader string using either GL_FRAGMENT_PROGRAM_ARB
1299 or GLSL and send it to the card */
1300 inline static VOID
IWineD3DPixelShaderImpl_GenerateShader(
1301 IWineD3DPixelShader
*iface
,
1302 CONST DWORD
*pFunction
) {
1304 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
1305 SHADER_BUFFER buffer
;
1307 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
1308 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
1309 if (This
->device
->fixupVertexBufferSize
< SHADER_PGMSIZE
) {
1310 HeapFree(GetProcessHeap(), 0, This
->fixupVertexBuffer
);
1311 This
->fixupVertexBuffer
= HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE
);
1312 This
->fixupVertexBufferSize
= PGMSIZE
;
1313 This
->fixupVertexBuffer
[0] = 0;
1315 buffer
.buffer
= This
->device
->fixupVertexBuffer
;
1317 buffer
.buffer
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, SHADER_PGMSIZE
);
1322 /* TODO: Optionally, generate the GLSL shader instead */
1323 if (GL_SUPPORT(ARB_VERTEX_PROGRAM
)) {
1324 /* Create the hw ARB shader */
1325 shader_addline(&buffer
, "!!ARBfp1.0\n");
1327 shader_addline(&buffer
, "TEMP TMP;\n"); /* Used in matrix ops */
1328 shader_addline(&buffer
, "TEMP TMP2;\n"); /* Used in matrix ops */
1329 shader_addline(&buffer
, "TEMP TA;\n"); /* Used for modifiers */
1330 shader_addline(&buffer
, "TEMP TB;\n"); /* Used for modifiers */
1331 shader_addline(&buffer
, "TEMP TC;\n"); /* Used for modifiers */
1332 shader_addline(&buffer
, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1333 shader_addline(&buffer
, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1334 shader_addline(&buffer
, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1336 /** Call the base shader generation routine to generate most
1337 of the pixel shader string for us */
1338 generate_base_shader( (IWineD3DBaseShader
*) This
, &buffer
, pFunction
);
1340 /*FIXME: This next line isn't valid for certain pixel shader versions */
1341 shader_addline(&buffer
, "MOV result.color, R0;\n");
1342 shader_addline(&buffer
, "END\n\0");
1344 /* TODO: change to resource.glObjectHandle or something like that */
1345 GL_EXTCALL(glGenProgramsARB(1, &This
->baseShader
.prgId
));
1347 TRACE("Creating a hw pixel shader, prg=%d\n", This
->baseShader
.prgId
);
1348 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB
, This
->baseShader
.prgId
));
1350 TRACE("Created hw pixel shader, prg=%d\n", This
->baseShader
.prgId
);
1351 /* Create the program and check for errors */
1352 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB
, GL_PROGRAM_FORMAT_ASCII_ARB
,
1353 buffer
.bsize
, buffer
.buffer
));
1355 if (glGetError() == GL_INVALID_OPERATION
) {
1357 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB
, &errPos
);
1358 FIXME("HW PixelShader Error at position %d: %s\n",
1359 errPos
, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB
)));
1360 This
->baseShader
.prgId
= -1;
1364 #if 1 /* if were using the data buffer of device then we don't need to free it */
1365 HeapFree(GetProcessHeap(), 0, buffer
.buffer
);
1369 HRESULT WINAPI
IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
1370 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
1371 const DWORD
* pToken
= pFunction
;
1372 const SHADER_OPCODE
*curOpcode
= NULL
;
1375 TRACE("(%p) : Parsing programme\n", This
);
1377 if (NULL
!= pToken
) {
1378 while (D3DPS_END() != *pToken
) {
1379 if (shader_is_pshader_version(*pToken
)) { /** version */
1380 pshader_set_version(This
, *pToken
);
1385 if (shader_is_comment(*pToken
)) { /** comment */
1386 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1388 TRACE("//%s\n", (char*)pToken
);
1389 pToken
+= comment_len
;
1390 len
+= comment_len
+ 1;
1393 if (!This
->baseShader
.version
) {
1394 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This
);
1396 curOpcode
= shader_get_opcode((IWineD3DBaseShader
*) This
, *pToken
);
1399 if (NULL
== curOpcode
) {
1401 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1402 while (*pToken
& 0x80000000) {
1404 /* unknown current opcode ... */
1405 TRACE("unrecognized opcode: %08lx", *pToken
);
1412 if (curOpcode
->opcode
== D3DSIO_DCL
) {
1413 DWORD usage
= *pToken
;
1414 DWORD param
= *(pToken
+ 1);
1415 DWORD regtype
= shader_get_regtype(param
);
1417 /* Only print extended declaration for samplers or 3.0 input registers */
1418 if (regtype
== D3DSPR_SAMPLER
||
1419 (This
->baseShader
.version
>= 30 && regtype
== D3DSPR_INPUT
))
1420 shader_program_dump_decl_usage(usage
, param
);
1424 shader_dump_ins_modifiers(param
);
1426 shader_dump_param((IWineD3DBaseShader
*) This
, param
, 0);
1431 if (curOpcode
->opcode
== D3DSIO_DEF
) {
1432 TRACE("def c%lu = ", *pToken
& 0xFF);
1435 TRACE("%f ,", *(float *)pToken
);
1438 TRACE("%f ,", *(float *)pToken
);
1441 TRACE("%f ,", *(float *)pToken
);
1444 TRACE("%f", *(float *)pToken
);
1448 TRACE("%s", curOpcode
->name
);
1449 if (curOpcode
->num_params
> 0) {
1450 shader_dump_ins_modifiers(*pToken
);
1452 shader_dump_param((IWineD3DBaseShader
*) This
, *pToken
, 0);
1455 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1457 shader_dump_param((IWineD3DBaseShader
*) This
, *pToken
, 1);
1466 This
->baseShader
.functionLength
= (len
+ 1) * sizeof(DWORD
);
1468 This
->baseShader
.functionLength
= 1; /* no Function defined use fixed function vertex processing */
1471 /* Generate HW shader in needed */
1472 if (NULL
!= pFunction
&& wined3d_settings
.vs_mode
== VS_HW
) {
1473 TRACE("(%p) : Generating hardware program\n", This
);
1475 IWineD3DPixelShaderImpl_GenerateShader(iface
, pFunction
);
1479 TRACE("(%p) : Copying the function\n", This
);
1480 /* copy the function ... because it will certainly be released by application */
1481 if (NULL
!= pFunction
) {
1482 This
->baseShader
.function
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, This
->baseShader
.functionLength
);
1483 memcpy((void *)This
->baseShader
.function
, pFunction
, This
->baseShader
.functionLength
);
1485 This
->baseShader
.function
= NULL
;
1488 /* TODO: Some proper return values for failures */
1489 TRACE("(%p) : Returning WINED3D_OK\n", This
);
1493 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl
=
1495 /*** IUnknown methods ***/
1496 IWineD3DPixelShaderImpl_QueryInterface
,
1497 IWineD3DPixelShaderImpl_AddRef
,
1498 IWineD3DPixelShaderImpl_Release
,
1499 /*** IWineD3DBase methods ***/
1500 IWineD3DPixelShaderImpl_GetParent
,
1501 /*** IWineD3DBaseShader methods ***/
1502 IWineD3DPixelShaderImpl_SetFunction
,
1503 /*** IWineD3DPixelShader methods ***/
1504 IWineD3DPixelShaderImpl_GetDevice
,
1505 IWineD3DPixelShaderImpl_GetFunction