2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 # define TRACE_VSVECTOR(name)
40 /* The maximum size of the program */
43 #define REGMASK 0x00001FFF
44 typedef void (*shader_fct_t
)();
46 typedef struct SHADER_OPCODE
{
50 CONST UINT num_params
;
51 shader_fct_t soft_fct
;
56 #define GLNAME_REQUIRE_GLSL ((const char *)1)
57 /* *******************************************
58 IWineD3DPixelShader IUnknown parts follow
59 ******************************************* */
60 HRESULT WINAPI
IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader
*iface
, REFIID riid
, LPVOID
*ppobj
)
62 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
63 TRACE("(%p)->(%s,%p)\n",This
,debugstr_guid(riid
),ppobj
);
64 if (IsEqualGUID(riid
, &IID_IUnknown
)
65 || IsEqualGUID(riid
, &IID_IWineD3DBase
)
66 || IsEqualGUID(riid
, &IID_IWineD3DPixelShader
)) {
67 IUnknown_AddRef(iface
);
74 ULONG WINAPI
IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader
*iface
) {
75 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
76 TRACE("(%p) : AddRef increasing from %ld\n", This
, This
->ref
);
77 return InterlockedIncrement(&This
->ref
);
80 ULONG WINAPI
IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader
*iface
) {
81 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
83 TRACE("(%p) : Releasing from %ld\n", This
, This
->ref
);
84 ref
= InterlockedDecrement(&This
->ref
);
86 HeapFree(GetProcessHeap(), 0, This
);
91 /* TODO: At the momeny the function parser is single pass, it achievs this
92 by passing constants to a couple of functions where they are then modified.
93 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
94 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
97 /* *******************************************
98 IWineD3DPixelShader IWineD3DPixelShader parts follow
99 ******************************************* */
101 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader
*iface
, IUnknown
** parent
){
102 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
104 *parent
= This
->parent
;
105 IUnknown_AddRef(*parent
);
106 TRACE("(%p) : returning %p\n", This
, *parent
);
110 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader
* iface
, IWineD3DDevice
**pDevice
){
111 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
112 IWineD3DDevice_AddRef((IWineD3DDevice
*)This
->wineD3DDevice
);
113 *pDevice
= (IWineD3DDevice
*)This
->wineD3DDevice
;
114 TRACE("(%p) returning %p\n", This
, *pDevice
);
119 HRESULT WINAPI
IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader
* impl
, VOID
* pData
, UINT
* pSizeOfData
) {
120 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)impl
;
121 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This
, pData
, pSizeOfData
);
124 *pSizeOfData
= This
->functionLength
;
127 if (*pSizeOfData
< This
->functionLength
) {
128 *pSizeOfData
= This
->functionLength
;
129 return D3DERR_MOREDATA
;
131 if (NULL
== This
->function
) { /* no function defined */
132 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This
, pData
);
133 (*(DWORD
**) pData
) = NULL
;
135 if (This
->functionLength
== 0) {
138 TRACE("(%p) : GetFunction copying to %p\n", This
, pData
);
139 memcpy(pData
, This
->function
, This
->functionLength
);
144 /*******************************
145 * pshader functions software VM
148 void pshader_add(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
149 d
->x
= s0
->x
+ s1
->x
;
150 d
->y
= s0
->y
+ s1
->y
;
151 d
->z
= s0
->z
+ s1
->z
;
152 d
->w
= s0
->w
+ s1
->w
;
153 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
157 void pshader_dp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
158 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
;
159 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
160 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
163 void pshader_dp4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
164 d
->x
= d
->y
= d
->z
= d
->w
= s0
->x
* s1
->x
+ s0
->y
* s1
->y
+ s0
->z
* s1
->z
+ s0
->w
* s1
->w
;
165 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
166 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
169 void pshader_dst(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
171 d
->y
= s0
->y
* s1
->y
;
174 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
175 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
178 void pshader_expp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
184 tmp
.f
= floorf(s0
->w
);
185 d
->x
= powf(2.0f
, tmp
.f
);
186 d
->y
= s0
->w
- tmp
.f
;
187 tmp
.f
= powf(2.0f
, s0
->w
);
188 tmp
.d
&= 0xFFFFFF00U
;
191 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
192 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
195 void pshader_lit(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
197 d
->y
= (0.0f
< s0
->x
) ? s0
->x
: 0.0f
;
198 d
->z
= (0.0f
< s0
->x
&& 0.0f
< s0
->y
) ? powf(s0
->y
, s0
->w
) : 0.0f
;
200 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
201 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
204 void pshader_logp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
205 float tmp_f
= fabsf(s0
->w
);
206 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
207 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
208 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
211 void pshader_mad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
212 d
->x
= s0
->x
* s1
->x
+ s2
->x
;
213 d
->y
= s0
->y
* s1
->y
+ s2
->y
;
214 d
->z
= s0
->z
* s1
->z
+ s2
->z
;
215 d
->w
= s0
->w
* s1
->w
+ s2
->w
;
216 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
217 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, s2
->x
, s2
->y
, s2
->z
, s2
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
220 void pshader_max(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
221 d
->x
= (s0
->x
>= s1
->x
) ? s0
->x
: s1
->x
;
222 d
->y
= (s0
->y
>= s1
->y
) ? s0
->y
: s1
->y
;
223 d
->z
= (s0
->z
>= s1
->z
) ? s0
->z
: s1
->z
;
224 d
->w
= (s0
->w
>= s1
->w
) ? s0
->w
: s1
->w
;
225 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
226 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
229 void pshader_min(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
230 d
->x
= (s0
->x
< s1
->x
) ? s0
->x
: s1
->x
;
231 d
->y
= (s0
->y
< s1
->y
) ? s0
->y
: s1
->y
;
232 d
->z
= (s0
->z
< s1
->z
) ? s0
->z
: s1
->z
;
233 d
->w
= (s0
->w
< s1
->w
) ? s0
->w
: s1
->w
;
234 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
235 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
238 void pshader_mov(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
243 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
244 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
247 void pshader_mul(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
248 d
->x
= s0
->x
* s1
->x
;
249 d
->y
= s0
->y
* s1
->y
;
250 d
->z
= s0
->z
* s1
->z
;
251 d
->w
= s0
->w
* s1
->w
;
252 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
253 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
256 void pshader_nop(void) {
257 /* NOPPPP ahhh too easy ;) */
258 PSTRACE(("executing nop\n"));
261 void pshader_rcp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
262 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== s0
->w
) ? HUGE_VAL
: 1.0f
/ s0
->w
;
263 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
264 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
267 void pshader_rsq(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
268 float tmp_f
= fabsf(s0
->w
);
269 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
== tmp_f
) ? HUGE_VAL
: ((1.0f
!= tmp_f
) ? 1.0f
/ sqrtf(tmp_f
) : 1.0f
);
270 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
271 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
274 void pshader_sge(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
275 d
->x
= (s0
->x
>= s1
->x
) ? 1.0f
: 0.0f
;
276 d
->y
= (s0
->y
>= s1
->y
) ? 1.0f
: 0.0f
;
277 d
->z
= (s0
->z
>= s1
->z
) ? 1.0f
: 0.0f
;
278 d
->w
= (s0
->w
>= s1
->w
) ? 1.0f
: 0.0f
;
279 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
280 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
283 void pshader_slt(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
284 d
->x
= (s0
->x
< s1
->x
) ? 1.0f
: 0.0f
;
285 d
->y
= (s0
->y
< s1
->y
) ? 1.0f
: 0.0f
;
286 d
->z
= (s0
->z
< s1
->z
) ? 1.0f
: 0.0f
;
287 d
->w
= (s0
->w
< s1
->w
) ? 1.0f
: 0.0f
;
288 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
289 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
292 void pshader_sub(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
293 d
->x
= s0
->x
- s1
->x
;
294 d
->y
= s0
->y
- s1
->y
;
295 d
->z
= s0
->z
- s1
->z
;
296 d
->w
= s0
->w
- s1
->w
;
297 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
298 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
302 * Version 1.1 specific
305 void pshader_exp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
306 d
->x
= d
->y
= d
->z
= d
->w
= powf(2.0f
, s0
->w
);
307 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
308 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
311 void pshader_log(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
312 float tmp_f
= fabsf(s0
->w
);
313 d
->x
= d
->y
= d
->z
= d
->w
= (0.0f
!= tmp_f
) ? logf(tmp_f
) / logf(2.0f
) : -HUGE_VAL
;
314 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
315 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
318 void pshader_frc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
319 d
->x
= s0
->x
- floorf(s0
->x
);
320 d
->y
= s0
->y
- floorf(s0
->y
);
323 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
324 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
327 typedef FLOAT D3DMATRIX44
[4][4];
328 typedef FLOAT D3DMATRIX43
[4][3];
329 typedef FLOAT D3DMATRIX34
[3][4];
330 typedef FLOAT D3DMATRIX33
[3][3];
331 typedef FLOAT D3DMATRIX23
[2][3];
333 void pshader_m4x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat
) {
335 * Buggy CODE: here only if cast not work for copy/paste
336 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
337 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
338 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
339 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
340 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
341 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
342 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
344 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
345 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
346 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
347 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
+ mat
[3][3] * s0
->w
;
348 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
349 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
350 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
351 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], mat
[3][3], s0
->w
, d
->w
));
354 void pshader_m4x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX34 mat
) {
355 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
+ mat
[0][3] * s0
->w
;
356 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
+ mat
[1][3] * s0
->w
;
357 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
+ mat
[2][3] * s0
->w
;
359 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], mat
[0][3], s0
->x
, d
->x
));
360 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], mat
[1][3], s0
->y
, d
->y
));
361 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], mat
[2][3], s0
->z
, d
->z
));
362 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0
->w
, d
->w
));
365 void pshader_m3x4(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX43 mat
) {
366 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
367 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
368 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
369 d
->w
= mat
[3][0] * s0
->x
+ mat
[3][1] * s0
->y
+ mat
[3][2] * s0
->z
;
370 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
371 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
372 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
373 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat
[3][0], mat
[3][1], mat
[3][2], s0
->w
, d
->w
));
376 void pshader_m3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX33 mat
) {
377 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
378 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
379 d
->z
= mat
[2][0] * s0
->x
+ mat
[2][1] * s0
->y
+ mat
[2][2] * s0
->z
;
381 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat
[0][0], mat
[0][1], mat
[0][2], s0
->x
, d
->x
));
382 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat
[1][0], mat
[1][1], mat
[1][2], s0
->y
, d
->y
));
383 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat
[2][0], mat
[2][1], mat
[2][2], s0
->z
, d
->z
));
384 PSTRACE(("executing m3x3(4): (%f) \n", d
->w
));
387 void pshader_m3x2(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, D3DMATRIX23 mat
) {
389 d
->x
= mat
[0][0] * s0
->x
+ mat
[0][1] * s0
->y
+ mat
[0][2] * s0
->z
;
390 d
->y
= mat
[1][0] * s0
->x
+ mat
[1][1] * s0
->y
+ mat
[1][2] * s0
->z
;
396 * Version 2.0 specific
398 void pshader_lrp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
399 d
->x
= s0
->x
* (s1
->x
- s2
->x
) + s2
->x
;
400 d
->y
= s0
->y
* (s1
->y
- s2
->y
) + s2
->y
;
401 d
->z
= s0
->z
* (s1
->z
- s2
->z
) + s2
->z
;
402 d
->w
= s0
->w
* (s1
->w
- s2
->w
) + s2
->w
;
405 void pshader_crs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
406 d
->x
= s0
->y
* s1
->z
- s0
->z
* s1
->y
;
407 d
->y
= s0
->z
* s1
->x
- s0
->x
* s1
->z
;
408 d
->z
= s0
->x
* s1
->y
- s0
->y
* s1
->x
;
409 d
->w
= 0.9f
; /* w is undefined, so set it to something safeish */
411 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
412 s0
->x
, s0
->y
, s0
->z
, s0
->w
, s1
->x
, s1
->y
, s1
->z
, s1
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
415 void pshader_abs(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
420 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
421 s0
->x
, s0
->y
, s0
->z
, s0
->w
, d
->x
, d
->y
, d
->z
, d
->w
));
425 void pshader_texcoord(WINED3DSHADERVECTOR
* d
) {
429 void pshader_texkill(WINED3DSHADERVECTOR
* d
) {
433 void pshader_tex(WINED3DSHADERVECTOR
* d
) {
436 void pshader_texld(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
440 void pshader_texbem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
444 void pshader_texbeml(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
448 void pshader_texreg2ar(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
452 void pshader_texreg2gb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
456 void pshader_texm3x2pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
460 void pshader_texm3x2tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
464 void pshader_texm3x3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
468 void pshader_texm3x3pad(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
472 void pshader_texm3x3diff(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
476 void pshader_texm3x3spec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
480 void pshader_texm3x3vspec(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
484 void pshader_cnd(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
488 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
489 void pshader_def(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
, WINED3DSHADERVECTOR
* s3
) {
493 void pshader_texreg2rgb(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
497 void pshader_texdp3tex(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
501 void pshader_texm3x2depth(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
505 void pshader_texdp3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
509 void pshader_texm3x3(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
513 void pshader_texdepth(WINED3DSHADERVECTOR
* d
) {
517 void pshader_cmp(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
, WINED3DSHADERVECTOR
* s2
) {
521 void pshader_bem(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
525 void pshader_call(WINED3DSHADERVECTOR
* d
) {
529 void pshader_callnz(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
533 void pshader_loop(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
537 void pshader_ret(WINED3DSHADERVECTOR
* d
) {
541 void pshader_endloop(WINED3DSHADERVECTOR
* d
) {
545 void pshader_dcl(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
549 void pshader_pow(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
, WINED3DSHADERVECTOR
* s1
) {
553 void pshader_sng(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
557 void pshader_nrm(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
561 void pshader_sincos(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
565 void pshader_rep(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
569 void pshader_endrep(void) {
573 void pshader_if(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
577 void pshader_ifc(WINED3DSHADERVECTOR
* d
, WINED3DSHADERVECTOR
* s0
) {
581 void pshader_else(WINED3DSHADERVECTOR
* d
) {
585 void pshader_label(WINED3DSHADERVECTOR
* d
) {
589 void pshader_endif(WINED3DSHADERVECTOR
* d
) {
593 void pshader_break(WINED3DSHADERVECTOR
* d
) {
597 void pshader_breakc(WINED3DSHADERVECTOR
* d
) {
601 void pshader_mova(WINED3DSHADERVECTOR
* d
) {
605 void pshader_defb(WINED3DSHADERVECTOR
* d
) {
609 void pshader_defi(WINED3DSHADERVECTOR
* d
) {
613 void pshader_dp2add(WINED3DSHADERVECTOR
* d
) {
617 void pshader_dsx(WINED3DSHADERVECTOR
* d
) {
621 void pshader_dsy(WINED3DSHADERVECTOR
* d
) {
625 void pshader_texldd(WINED3DSHADERVECTOR
* d
) {
629 void pshader_setp(WINED3DSHADERVECTOR
* d
) {
633 void pshader_texldl(WINED3DSHADERVECTOR
* d
) {
637 void pshader_breakp(WINED3DSHADERVECTOR
* d
) {
641 * log, exp, frc, m*x* seems to be macros ins ... to see
643 static CONST SHADER_OPCODE pshader_ins
[] = {
644 {D3DSIO_NOP
, "nop", "NOP", 0, pshader_nop
, 0, 0},
645 {D3DSIO_MOV
, "mov", "MOV", 2, pshader_mov
, 0, 0},
646 {D3DSIO_ADD
, "add", "ADD", 3, pshader_add
, 0, 0},
647 {D3DSIO_SUB
, "sub", "SUB", 3, pshader_sub
, 0, 0},
648 {D3DSIO_MAD
, "mad", "MAD", 4, pshader_mad
, 0, 0},
649 {D3DSIO_MUL
, "mul", "MUL", 3, pshader_mul
, 0, 0},
650 {D3DSIO_RCP
, "rcp", "RCP", 2, pshader_rcp
, 0, 0},
651 {D3DSIO_RSQ
, "rsq", "RSQ", 2, pshader_rsq
, 0, 0},
652 {D3DSIO_DP3
, "dp3", "DP3", 3, pshader_dp3
, 0, 0},
653 {D3DSIO_DP4
, "dp4", "DP4", 3, pshader_dp4
, 0, 0},
654 {D3DSIO_MIN
, "min", "MIN", 3, pshader_min
, 0, 0},
655 {D3DSIO_MAX
, "max", "MAX", 3, pshader_max
, 0, 0},
656 {D3DSIO_SLT
, "slt", "SLT", 3, pshader_slt
, 0, 0},
657 {D3DSIO_SGE
, "sge", "SGE", 3, pshader_sge
, 0, 0},
658 {D3DSIO_ABS
, "abs", "ABS", 2, pshader_abs
, 0, 0},
659 {D3DSIO_EXP
, "exp", "EX2", 2, pshader_exp
, 0, 0},
660 {D3DSIO_LOG
, "log", "LG2", 2, pshader_log
, 0, 0},
661 {D3DSIO_LIT
, "lit", "LIT", 2, pshader_lit
, 0, 0},
662 {D3DSIO_DST
, "dst", "DST", 3, pshader_dst
, 0, 0},
663 {D3DSIO_LRP
, "lrp", "LRP", 4, pshader_lrp
, 0, 0},
664 {D3DSIO_FRC
, "frc", "FRC", 2, pshader_frc
, 0, 0},
665 {D3DSIO_M4x4
, "m4x4", "undefined", 3, pshader_m4x4
, 0, 0},
666 {D3DSIO_M4x3
, "m4x3", "undefined", 3, pshader_m4x3
, 0, 0},
667 {D3DSIO_M3x4
, "m3x4", "undefined", 3, pshader_m3x4
, 0, 0},
668 {D3DSIO_M3x3
, "m3x3", "undefined", 3, pshader_m3x3
, 0, 0},
669 {D3DSIO_M3x2
, "m3x2", "undefined", 3, pshader_m3x2
, 0, 0},
672 /** FIXME: use direct access so add the others opcodes as stubs */
673 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
674 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
675 {D3DSIO_CALL
, "call", GLNAME_REQUIRE_GLSL
, 1, pshader_call
, 0, 0},
676 {D3DSIO_CALLNZ
, "callnz", GLNAME_REQUIRE_GLSL
, 2, pshader_callnz
, 0, 0},
677 {D3DSIO_LOOP
, "loop", GLNAME_REQUIRE_GLSL
, 2, pshader_loop
, 0, 0},
678 {D3DSIO_RET
, "ret", GLNAME_REQUIRE_GLSL
, 0, pshader_ret
, 0, 0},
679 {D3DSIO_ENDLOOP
, "endloop", GLNAME_REQUIRE_GLSL
, 0, pshader_endloop
, 0, 0},
680 {D3DSIO_LABEL
, "label", GLNAME_REQUIRE_GLSL
, 1, pshader_label
, 0, 0},
681 /* DCL is a specil operation */
682 {D3DSIO_DCL
, "dcl", NULL
, 1, pshader_dcl
, 0, 0},
683 {D3DSIO_POW
, "pow", "POW", 3, pshader_pow
, 0, 0},
684 {D3DSIO_CRS
, "crs", "XPS", 3, pshader_crs
, 0, 0},
685 /* TODO: sng can possibly be performed as
688 {D3DSIO_SGN
, "sng", NULL
, 2, pshader_sng
, 0, 0},
689 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
692 MUL vec.xyz, vec, tmp;
693 but I think this is better because it accounts for w properly.
699 {D3DSIO_NRM
, "nrm", NULL
, 2, pshader_nrm
, 0, 0},
700 {D3DSIO_SINCOS
, "sincos", NULL
, 2, pshader_sincos
, 0, 0},
701 {D3DSIO_REP
, "rep", GLNAME_REQUIRE_GLSL
, 2, pshader_rep
, 0, 0},
702 {D3DSIO_ENDREP
, "endrep", GLNAME_REQUIRE_GLSL
, 0, pshader_endrep
, 0, 0},
703 {D3DSIO_IF
, "if", GLNAME_REQUIRE_GLSL
, 2, pshader_if
, 0, 0},
704 {D3DSIO_IFC
, "ifc", GLNAME_REQUIRE_GLSL
, 2, pshader_ifc
, 0, 0},
705 {D3DSIO_ELSE
, "else", GLNAME_REQUIRE_GLSL
, 2, pshader_else
, 0, 0},
706 {D3DSIO_ENDIF
, "endif", GLNAME_REQUIRE_GLSL
, 2, pshader_endif
, 0, 0},
707 {D3DSIO_BREAK
, "break", GLNAME_REQUIRE_GLSL
, 2, pshader_break
, 0, 0},
708 {D3DSIO_BREAKC
, "breakc", GLNAME_REQUIRE_GLSL
, 2, pshader_breakc
, 0, 0},
709 {D3DSIO_MOVA
, "mova", GLNAME_REQUIRE_GLSL
, 2, pshader_mova
, 0, 0},
710 {D3DSIO_DEFB
, "defb", GLNAME_REQUIRE_GLSL
, 2, pshader_defb
, 0, 0},
711 {D3DSIO_DEFI
, "defi", GLNAME_REQUIRE_GLSL
, 2, pshader_defi
, 0, 0},
713 {D3DSIO_TEXCOORD
, "texcoord", "undefined", 1, pshader_texcoord
, 0, D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXCOORD
, "texcrd", "undefined", 2, pshader_texcoord
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
715 {D3DSIO_TEXKILL
, "texkill", "KIL", 1, pshader_texkill
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
716 {D3DSIO_TEX
, "tex", "undefined", 1, pshader_tex
, 0, D3DPS_VERSION(1,3)},
717 {D3DSIO_TEX
, "texld", GLNAME_REQUIRE_GLSL
, 2, pshader_texld
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
718 {D3DSIO_TEXBEM
, "texbem", "undefined", 2, pshader_texbem
, 0, D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXBEML
, "texbeml", GLNAME_REQUIRE_GLSL
, 2, pshader_texbeml
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_TEXREG2AR
,"texreg2ar","undefined", 2, pshader_texreg2ar
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
721 {D3DSIO_TEXREG2GB
,"texreg2gb","undefined", 2, pshader_texreg2gb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
722 {D3DSIO_TEXM3x2PAD
, "texm3x2pad", "undefined", 2, pshader_texm3x2pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
723 {D3DSIO_TEXM3x2TEX
, "texm3x2tex", "undefined", 2, pshader_texm3x2tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
724 {D3DSIO_TEXM3x3PAD
, "texm3x3pad", "undefined", 2, pshader_texm3x3pad
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
725 {D3DSIO_TEXM3x3DIFF
, "texm3x3diff", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3diff
, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
726 {D3DSIO_TEXM3x3SPEC
, "texm3x3spec", "undefined", 3, pshader_texm3x3spec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXM3x3VSPEC
, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXM3x3TEX
, "texm3x3tex", "undefined", 2, pshader_texm3x3tex
, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
729 {D3DSIO_EXPP
, "expp", "EXP", 2, pshader_expp
, 0, 0},
730 {D3DSIO_LOGP
, "logp", "LOG", 2, pshader_logp
, 0, 0},
731 {D3DSIO_CND
, "cnd", GLNAME_REQUIRE_GLSL
, 4, pshader_cnd
, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
732 /* def is a special operation */
733 {D3DSIO_DEF
, "def", "undefined", 5, pshader_def
, 0, 0},
734 {D3DSIO_TEXREG2RGB
, "texreg2rgb", GLNAME_REQUIRE_GLSL
, 2, pshader_texreg2rgb
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
735 {D3DSIO_TEXDP3TEX
, "texdp3tex", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3tex
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
736 {D3DSIO_TEXM3x2DEPTH
, "texm3x2depth", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x2depth
,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
737 {D3DSIO_TEXDP3
, "texdp3", GLNAME_REQUIRE_GLSL
, 2, pshader_texdp3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
738 {D3DSIO_TEXM3x3
, "texm3x3", GLNAME_REQUIRE_GLSL
, 2, pshader_texm3x3
, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
739 {D3DSIO_TEXDEPTH
, "texdepth", GLNAME_REQUIRE_GLSL
,1, pshader_texdepth
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
740 {D3DSIO_CMP
, "cmp", GLNAME_REQUIRE_GLSL
, 4, pshader_cmp
, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
741 {D3DSIO_BEM
, "bem", GLNAME_REQUIRE_GLSL
, 3, pshader_bem
, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
742 /* TODO: dp2add can be made out of multiple instuctions */
743 {D3DSIO_DP2ADD
, "dp2add", GLNAME_REQUIRE_GLSL
, 2, pshader_dp2add
, 0, 0},
744 {D3DSIO_DSX
, "dsx", GLNAME_REQUIRE_GLSL
, 2, pshader_dsx
, 0, 0},
745 {D3DSIO_DSY
, "dsy", GLNAME_REQUIRE_GLSL
, 2, pshader_dsy
, 0, 0},
746 {D3DSIO_TEXLDD
, "texldd", GLNAME_REQUIRE_GLSL
, 2, pshader_texldd
, 0, 0},
747 {D3DSIO_SETP
, "setp", GLNAME_REQUIRE_GLSL
, 2, pshader_setp
, 0, 0},
748 {D3DSIO_TEXLDL
, "texdl", GLNAME_REQUIRE_GLSL
, 2, pshader_texldl
, 0, 0},
749 {D3DSIO_BREAKP
, "breakp", GLNAME_REQUIRE_GLSL
, 2, pshader_breakp
, 0, 0},
750 {D3DSIO_PHASE
, "phase", GLNAME_REQUIRE_GLSL
, 0, pshader_nop
, 0, 0},
751 {0, NULL
, NULL
, 0, NULL
, 0, 0}
755 inline static const SHADER_OPCODE
* pshader_program_get_opcode(IWineD3DPixelShaderImpl
*This
, const DWORD code
) {
757 DWORD version
= This
->version
;
758 DWORD hex_version
= D3DPS_VERSION(version
/10, version
%10);
759 /** TODO: use dichotomic search */
760 while (NULL
!= pshader_ins
[i
].name
) {
761 if (((code
& D3DSI_OPCODE_MASK
) == pshader_ins
[i
].opcode
) &&
762 (((hex_version
>= pshader_ins
[i
].min_version
) && (hex_version
<= pshader_ins
[i
].max_version
)) ||
763 ((pshader_ins
[i
].min_version
== 0) && (pshader_ins
[i
].max_version
== 0)))) {
764 return &pshader_ins
[i
];
768 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code
, code
, code
& D3DSI_OPCODE_MASK
, version
);
772 inline static BOOL
pshader_is_version_token(DWORD token
) {
773 return 0xFFFF0000 == (token
& 0xFFFF0000);
776 inline static BOOL
pshader_is_comment_token(DWORD token
) {
777 return D3DSIO_COMMENT
== (token
& D3DSI_OPCODE_MASK
);
781 inline static void get_register_name(const DWORD param
, char* regstr
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
782 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
784 DWORD reg
= param
& REGMASK
;
785 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
);
789 sprintf(regstr
, "R%lu", reg
);
793 strcpy(regstr
, "fragment.color.primary");
795 strcpy(regstr
, "fragment.color.secondary");
800 sprintf(regstr
, "C%lu", reg
);
802 sprintf(regstr
, "program.env[%lu]", reg
);
804 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
805 sprintf(regstr
,"T%lu", reg
);
808 sprintf(regstr
, "%s", rastout_reg_names
[reg
]);
811 sprintf(regstr
, "oD[%lu]", reg
);
813 case D3DSPR_TEXCRDOUT
:
814 sprintf(regstr
, "oT[%lu]", reg
);
817 FIXME("Unhandled register name Type(%ld)\n", regtype
);
822 inline static void get_write_mask(const DWORD output_reg
, char *write_mask
) {
824 if ((output_reg
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
825 strcat(write_mask
, ".");
826 if (output_reg
& D3DSP_WRITEMASK_0
) strcat(write_mask
, "r");
827 if (output_reg
& D3DSP_WRITEMASK_1
) strcat(write_mask
, "g");
828 if (output_reg
& D3DSP_WRITEMASK_2
) strcat(write_mask
, "b");
829 if (output_reg
& D3DSP_WRITEMASK_3
) strcat(write_mask
, "a");
833 inline static void get_input_register_swizzle(const DWORD instr
, char *swzstring
) {
834 static const char swizzle_reg_chars
[] = "rgba";
835 DWORD swizzle
= (instr
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
836 DWORD swizzle_x
= swizzle
& 0x03;
837 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
838 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
839 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
841 * swizzle bits fields:
845 if ((D3DSP_NOSWIZZLE
>> D3DSP_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
846 if (swizzle_x
== swizzle_y
&&
847 swizzle_x
== swizzle_z
&&
848 swizzle_x
== swizzle_w
) {
849 sprintf(swzstring
, ".%c", swizzle_reg_chars
[swizzle_x
]);
851 sprintf(swzstring
, ".%c%c%c%c",
852 swizzle_reg_chars
[swizzle_x
],
853 swizzle_reg_chars
[swizzle_y
],
854 swizzle_reg_chars
[swizzle_z
],
855 swizzle_reg_chars
[swizzle_w
]);
860 inline static void addline(unsigned int *lineNum
, char *pgm
, unsigned int *pgmLength
, char *line
) {
861 int lineLen
= strlen(line
);
862 if(lineLen
+ *pgmLength
> PGMSIZE
- 1 /* - 1 to allow a NULL at the end */) {
863 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE
, lineLen
+ *pgmLength
);
866 memcpy(pgm
+ *pgmLength
, line
, lineLen
);
869 *pgmLength
+= lineLen
;
871 TRACE("GL HW (%u, %u) : %s", *lineNum
, *pgmLength
, line
);
874 static const char* shift_tab
[] = {
875 "dummy", /* 0 (none) */
876 "coefmul.x", /* 1 (x2) */
877 "coefmul.y", /* 2 (x4) */
878 "coefmul.z", /* 3 (x8) */
879 "coefmul.w", /* 4 (x16) */
880 "dummy", /* 5 (x32) */
881 "dummy", /* 6 (x64) */
882 "dummy", /* 7 (x128) */
883 "dummy", /* 8 (d256) */
884 "dummy", /* 9 (d128) */
885 "dummy", /* 10 (d64) */
886 "dummy", /* 11 (d32) */
887 "coefdiv.w", /* 12 (d16) */
888 "coefdiv.z", /* 13 (d8) */
889 "coefdiv.y", /* 14 (d4) */
890 "coefdiv.x" /* 15 (d2) */
893 inline static void gen_output_modifier_line(int saturate
, char *write_mask
, int shift
, char *regstr
, char* line
) {
894 /* Generate a line that does the output modifier computation */
895 sprintf(line
, "MUL%s %s%s, %s, %s;", saturate
? "_SAT" : "", regstr
, write_mask
, regstr
, shift_tab
[shift
]);
898 inline static int gen_input_modifier_line(const DWORD instr
, int tmpreg
, char *outregstr
, char *line
, char constants
[WINED3D_PSHADER_MAX_CONSTANTS
]) {
899 /* Generate a line that does the input modifier computation and return the input register to use */
900 static char regstr
[256];
901 static char tmpline
[256];
904 /* Assume a new line will be added */
907 /* Get register name */
908 get_register_name(instr
, regstr
, constants
);
910 TRACE(" Register name %s\n", regstr
);
911 switch (instr
& D3DSP_SRCMOD_MASK
) {
913 strcpy(outregstr
, regstr
);
917 sprintf(outregstr
, "-%s", regstr
);
921 sprintf(line
, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg
, regstr
);
923 case D3DSPSM_BIASNEG
:
924 sprintf(line
, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg
, regstr
);
927 sprintf(line
, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg
, regstr
);
929 case D3DSPSM_SIGNNEG
:
930 sprintf(line
, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg
, regstr
);
933 sprintf(line
, "SUB T%c, one.x, %s;", 'A' + tmpreg
, regstr
);
936 sprintf(line
, "ADD T%c, %s, %s;", 'A' + tmpreg
, regstr
, regstr
);
939 sprintf(line
, "ADD T%c, -%s, -%s;", 'A' + tmpreg
, regstr
, regstr
);
942 sprintf(line
, "RCP T%c, %s.z;", 'A' + tmpreg
, regstr
);
943 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
944 strcat(line
, "\n"); /* Hack */
945 strcat(line
, tmpline
);
948 sprintf(line
, "RCP T%c, %s;", 'A' + tmpreg
, regstr
);
949 sprintf(tmpline
, "MUL T%c, %s, T%c;", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
950 strcat(line
, "\n"); /* Hack */
951 strcat(line
, tmpline
);
954 strcpy(outregstr
, regstr
);
959 /* Substitute the register name */
960 sprintf(outregstr
, "T%c", 'A' + tmpreg
);
965 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
966 inline static VOID
IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
967 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
968 const DWORD
*pToken
= pFunction
;
969 const SHADER_OPCODE
*curOpcode
= NULL
;
972 unsigned lineNum
= 0; /* The line number of the generated program (for loging)*/
973 char *pgmStr
= NULL
; /* A pointer to the program data generated by this function */
975 DWORD nUseAddressRegister
= 0;
976 #if 0 /* TODO: loop register (just another address register ) */
977 BOOL hasLoops
= FALSE
;
980 BOOL saturate
; /* clamp to 0.0 -> 1.0*/
981 int row
= 0; /* not sure, something to do with macros? */
983 int version
= 0; /* The version of the shader */
985 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
986 unsigned int pgmLength
= 0;
988 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
989 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
990 if (This
->device
->fixupVertexBufferSize
< PGMSIZE
) {
991 HeapFree(GetProcessHeap(), 0, This
->fixupVertexBuffer
);
992 This
->fixupVertexBuffer
= HeapAlloc(GetProcessHeap() , 0, PGMSIZE
);
993 This
->fixupVertexBufferSize
= PGMSIZE
;
994 This
->fixupVertexBuffer
[0] = 0;
996 pgmStr
= This
->device
->fixupVertexBuffer
;
998 pgmStr
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, PGMSIZE
); /* 64kb should be enough */
1002 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1003 for(i
= 0; i
< WINED3D_PSHADER_MAX_CONSTANTS
; i
++)
1004 This
->constants
[i
] = 0;
1006 if (NULL
!= pToken
) {
1007 while (D3DPS_END() != *pToken
) {
1008 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1010 instructionSize
= pToken
& SIZEBITS
>> 27;
1013 if (pshader_is_version_token(*pToken
)) { /** version */
1017 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1018 version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
1020 TRACE("found version token ps.%lu.%lu;\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
1022 /* Each release of pixel shaders has had different numbers of temp registers */
1028 case 14: numTemps
=12;
1030 strcpy(tmpLine
, "!!ARBfp1.0\n");
1032 case 20: numTemps
=12;
1034 strcpy(tmpLine
, "!!ARBfp2.0\n");
1035 FIXME("No work done yet to support ps2.0 in hw\n");
1037 case 30: numTemps
=32;
1039 strcpy(tmpLine
, "!!ARBfp3.0\n");
1040 FIXME("No work done yet to support ps3.0 in hw\n");
1045 strcpy(tmpLine
, "!!ARBfp1.0\n");
1046 FIXME("Unrecognized pixel shader version!\n");
1048 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1050 /* TODO: find out how many registers are really needed */
1051 for(i
= 0; i
< 6; i
++) {
1052 sprintf(tmpLine
, "TEMP T%lu;\n", i
);
1053 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1056 for(i
= 0; i
< 6; i
++) {
1057 sprintf(tmpLine
, "TEMP R%lu;\n", i
);
1058 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1061 sprintf(tmpLine
, "TEMP TMP;\n");
1062 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1063 sprintf(tmpLine
, "TEMP TMP2;\n");
1064 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1065 sprintf(tmpLine
, "TEMP TA;\n");
1066 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1067 sprintf(tmpLine
, "TEMP TB;\n");
1068 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1069 sprintf(tmpLine
, "TEMP TC;\n");
1070 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1072 strcpy(tmpLine
, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1073 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1074 strcpy(tmpLine
, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1075 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1076 strcpy(tmpLine
, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1077 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1079 for(i
= 0; i
< 4; i
++) {
1080 sprintf(tmpLine
, "MOV T%lu, fragment.texcoord[%lu];\n", i
, i
);
1081 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1088 if (pshader_is_comment_token(*pToken
)) { /** comment */
1089 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1091 FIXME("#%s\n", (char*)pToken
);
1092 pToken
+= comment_len
;
1096 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1100 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
1102 if (NULL
== curOpcode
) {
1103 /* unknown current opcode ... (shouldn't be any!) */
1104 while (*pToken
& 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1105 FIXME("unrecognized opcode: %08lx\n", *pToken
);
1108 } else if (GLNAME_REQUIRE_GLSL
== curOpcode
->glname
) {
1109 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1110 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode
->name
);
1111 pToken
+= curOpcode
->num_params
;
1113 TRACE("Found opcode %s %s\n", curOpcode
->name
, curOpcode
->glname
);
1116 /* Build opcode for GL vertex_program */
1117 switch (curOpcode
->opcode
) {
1122 /* Address registers must be loaded with the ARL instruction */
1123 if ((((*pToken
) & D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) == D3DSPR_ADDR
) {
1124 if (((*pToken
) & REGMASK
) < nUseAddressRegister
) {
1125 strcpy(tmpLine
, "ARL");
1128 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This
, ((*pToken
) & REGMASK
));
1153 case D3DSIO_TEXKILL
:
1154 TRACE("Appending glname %s to tmpLine\n", curOpcode
->glname
);
1155 strcpy(tmpLine
, curOpcode
->glname
);
1159 DWORD reg
= *pToken
& REGMASK
;
1160 sprintf(tmpLine
, "PARAM C%lu = { %f, %f, %f, %f };\n", reg
,
1161 *((const float *)(pToken
+ 1)),
1162 *((const float *)(pToken
+ 2)),
1163 *((const float *)(pToken
+ 3)),
1164 *((const float *)(pToken
+ 4)) );
1166 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1168 This
->constants
[reg
] = 1;
1176 get_write_mask(*pToken
, tmp
);
1177 if (version
!= 14) {
1178 DWORD reg
= *pToken
& REGMASK
;
1179 sprintf(tmpLine
,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg
, tmp
, reg
, reg
);
1180 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1184 DWORD reg1
= *pToken
& REGMASK
;
1185 DWORD reg2
= *++pToken
& REGMASK
;
1186 if (gen_input_modifier_line(*pToken
, 0, reg
, tmpLine
, This
->constants
)) {
1187 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1189 sprintf(tmpLine
,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1
, tmp
, reg
, reg2
);
1190 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1196 case D3DSIO_TEXCOORD
:
1199 get_write_mask(*pToken
, tmp
);
1200 if (version
!= 14) {
1201 DWORD reg
= *pToken
& REGMASK
;
1202 sprintf(tmpLine
, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg
, tmp
, reg
);
1203 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1206 DWORD reg1
= *pToken
& REGMASK
;
1207 DWORD reg2
= *++pToken
& REGMASK
;
1208 sprintf(tmpLine
, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1
, tmp
, reg2
);
1209 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1215 case D3DSIO_TEXM3x2PAD
:
1217 DWORD reg
= *pToken
& REGMASK
;
1219 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1220 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1222 sprintf(tmpLine
, "DP3 TMP.x, T%lu, %s;\n", reg
, buf
);
1223 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1228 case D3DSIO_TEXM3x2TEX
:
1230 DWORD reg
= *pToken
& REGMASK
;
1232 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1233 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1235 sprintf(tmpLine
, "DP3 TMP.y, T%lu, %s;\n", reg
, buf
);
1236 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1237 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg
, reg
);
1238 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1243 case D3DSIO_TEXREG2AR
:
1245 DWORD reg1
= *pToken
& REGMASK
;
1246 DWORD reg2
= *++pToken
& REGMASK
;
1247 sprintf(tmpLine
, "MOV TMP.r, T%lu.a;\n", reg2
);
1248 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1249 sprintf(tmpLine
, "MOV TMP.g, T%lu.r;\n", reg2
);
1250 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1251 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1252 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1257 case D3DSIO_TEXREG2GB
:
1259 DWORD reg1
= *pToken
& REGMASK
;
1260 DWORD reg2
= *++pToken
& REGMASK
;
1261 sprintf(tmpLine
, "MOV TMP.r, T%lu.g;\n", reg2
);
1262 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1263 sprintf(tmpLine
, "MOV TMP.g, T%lu.b;\n", reg2
);
1264 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1265 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1266 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1273 DWORD reg1
= *pToken
& REGMASK
;
1274 DWORD reg2
= *++pToken
& REGMASK
;
1276 /* FIXME: Should apply the BUMPMAPENV matrix */
1277 sprintf(tmpLine
, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1
, reg2
);
1278 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1279 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1
, reg1
);
1280 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1285 case D3DSIO_TEXM3x3PAD
:
1287 DWORD reg
= *pToken
& REGMASK
;
1289 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1290 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1292 sprintf(tmpLine
, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row
, reg
, buf
);
1293 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1299 case D3DSIO_TEXM3x3TEX
:
1301 DWORD reg
= *pToken
& REGMASK
;
1303 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1304 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1307 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1308 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1310 /* Cubemap textures will be more used than 3D ones. */
1311 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1312 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1317 case D3DSIO_TEXM3x3VSPEC
:
1319 DWORD reg
= *pToken
& REGMASK
;
1321 if (gen_input_modifier_line(*++pToken
, 0, buf
, tmpLine
, This
->constants
)) {
1322 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1324 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1325 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1327 /* Construct the eye-ray vector from w coordinates */
1328 sprintf(tmpLine
, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw
[0]);
1329 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1330 sprintf(tmpLine
, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw
[1]);
1331 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1332 sprintf(tmpLine
, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg
);
1333 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1335 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1336 sprintf(tmpLine
, "DP3 TMP.w, TMP, TMP2;\n");
1337 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1338 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;\n");
1339 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1340 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1341 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1343 /* Cubemap textures will be more used than 3D ones. */
1344 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1345 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1351 case D3DSIO_TEXM3x3SPEC
:
1353 DWORD reg
= *pToken
& REGMASK
;
1354 DWORD reg3
= *(pToken
+ 2) & REGMASK
;
1356 if (gen_input_modifier_line(*(pToken
+ 1), 0, buf
, tmpLine
, This
->constants
)) {
1357 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1359 sprintf(tmpLine
, "DP3 TMP.z, T%lu, %s;\n", reg
, buf
);
1360 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1362 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1363 sprintf(tmpLine
, "DP3 TMP.w, TMP, C[%lu];\n", reg3
);
1364 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1366 sprintf(tmpLine
, "MUL TMP, TMP.w, TMP;\n");
1367 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1368 sprintf(tmpLine
, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3
);
1369 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1371 /* Cubemap textures will be more used than 3D ones. */
1372 sprintf(tmpLine
, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg
, reg
);
1373 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1381 if (curOpcode
->glname
== GLNAME_REQUIRE_GLSL
) {
1382 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode
->name
);
1384 FIXME("Can't handle opcode %s in hwShader\n", curOpcode
->name
);
1386 pToken
+= curOpcode
->num_params
; /* maybe + 1 */
1390 if (0 != (*pToken
& D3DSP_DSTMOD_MASK
)) {
1391 DWORD mask
= *pToken
& D3DSP_DSTMOD_MASK
;
1393 case D3DSPDM_SATURATE
: saturate
= TRUE
; break;
1394 #if 0 /* as yet unhandled modifiers */
1395 case D3DSPDM_CENTROID
: centroid
= TRUE
; break;
1396 case D3DSPDM_PP
: partialpresision
= TRUE
; break;
1397 case D3DSPDM_X2
: X2
= TRUE
; break;
1398 case D3DSPDM_X4
: X4
= TRUE
; break;
1399 case D3DSPDM_X8
: X8
= TRUE
; break;
1400 case D3DSPDM_D2
: D2
= TRUE
; break;
1401 case D3DSPDM_D4
: D4
= TRUE
; break;
1402 case D3DSPDM_D8
: D8
= TRUE
; break;
1405 TRACE("_unhandled_modifier(0x%08lx)\n", mask
);
1409 /* Generate input and output registers */
1410 if (curOpcode
->num_params
> 0) {
1412 char operands
[4][100];
1416 TRACE("(%p): Opcode has %d params\n", This
, curOpcode
->num_params
);
1418 /* Generate lines that handle input modifier computation */
1419 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1420 TRACE("(%p) : Param %ld token %lx\n", This
, i
, *(pToken
+ i
));
1421 if (gen_input_modifier_line(*(pToken
+ i
), i
- 1, regs
[i
- 1], tmpOp
, This
->constants
)) {
1422 addline(&lineNum
, pgmStr
, &pgmLength
, tmpOp
);
1426 /* Handle saturation only when no shift is present in the output modifier */
1427 if ((*pToken
& D3DSPDM_SATURATE
) && (0 == (*pToken
& D3DSP_DSTSHIFT_MASK
)))
1430 /* Handle output register */
1431 get_register_name(*pToken
, tmpOp
, This
->constants
);
1432 strcpy(operands
[0], tmpOp
);
1433 get_write_mask(*pToken
, tmpOp
);
1434 strcat(operands
[0], tmpOp
);
1436 /* This function works because of side effects from gen_input_modifier_line */
1437 /* Handle input registers */
1438 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1439 TRACE("(%p) : Regs = %s\n", This
, regs
[i
- 1]);
1440 strcpy(operands
[i
], regs
[i
- 1]);
1441 get_input_register_swizzle(*(pToken
+ i
), swzstring
);
1442 strcat(operands
[i
], swzstring
);
1445 switch(curOpcode
->opcode
) {
1447 sprintf(tmpLine
, "CMP%s %s, %s, %s, %s;\n", (saturate
? "_SAT" : ""), operands
[0], operands
[1], operands
[3], operands
[2]);
1450 sprintf(tmpLine
, "ADD TMP, -%s, coefdiv.x;", operands
[1]);
1451 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1452 sprintf(tmpLine
, "CMP%s %s, TMP, %s, %s;\n", (saturate
? "_SAT" : ""), operands
[0], operands
[2], operands
[3]);
1456 strcat(tmpLine
, "_SAT");
1457 strcat(tmpLine
, " ");
1458 strcat(tmpLine
, operands
[0]);
1459 for (i
= 1; i
< curOpcode
->num_params
; i
++) {
1460 strcat(tmpLine
, ", ");
1461 strcat(tmpLine
, operands
[i
]);
1463 strcat(tmpLine
,";\n");
1465 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1466 pToken
+= curOpcode
->num_params
;
1468 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1469 if (curOpcode
->num_params
> 0) {
1470 DWORD param
= *(pInstr
+ 1);
1471 if (0 != (param
& D3DSP_DSTSHIFT_MASK
)) {
1473 /* Generate a line that handle the output modifier computation */
1475 char write_mask
[20];
1476 DWORD shift
= (param
& D3DSP_DSTSHIFT_MASK
) >> D3DSP_DSTSHIFT_SHIFT
;
1477 get_register_name(param
, regstr
, This
->constants
);
1478 get_write_mask(param
, write_mask
);
1479 gen_output_modifier_line(saturate
, write_mask
, shift
, regstr
, tmpLine
);
1480 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1486 /* TODO: What about result.depth? */
1487 strcpy(tmpLine
, "MOV result.color, R0;\n");
1488 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1490 strcpy(tmpLine
, "END\n");
1491 addline(&lineNum
, pgmStr
, &pgmLength
, tmpLine
);
1494 /* finally null terminate the pgmStr*/
1495 pgmStr
[pgmLength
] = 0;
1496 if (GL_SUPPORT(ARB_VERTEX_PROGRAM
)) {
1497 /* Create the hw shader */
1499 /* pgmStr sometimes gets too long for a normal TRACE */
1500 TRACE("Generated program:\n");
1501 if (TRACE_ON(d3d_shader
)) {
1502 fprintf(stderr
, "%s\n", pgmStr
);
1505 /* TODO: change to resource.glObjectHandel or something like that */
1506 GL_EXTCALL(glGenProgramsARB(1, &This
->prgId
));
1508 TRACE("Creating a hw pixel shader, prg=%d\n", This
->prgId
);
1509 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB
, This
->prgId
));
1511 TRACE("Created hw pixel shader, prg=%d\n", This
->prgId
);
1512 /* Create the program and check for errors */
1513 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB
, GL_PROGRAM_FORMAT_ASCII_ARB
, strlen(pgmStr
), pgmStr
));
1514 if (glGetError() == GL_INVALID_OPERATION
) {
1516 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB
, &errPos
);
1517 FIXME("HW PixelShader Error at position %d: %s\n",
1518 errPos
, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB
)));
1522 #if 1 /* if were using the data buffer of device then we don't need to free it */
1523 HeapFree(GetProcessHeap(), 0, pgmStr
);
1527 inline static void pshader_program_dump_ps_param(const DWORD param
, int input
) {
1528 static const char* rastout_reg_names
[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1529 static const char swizzle_reg_chars
[] = "rgba";
1531 /* the unknown mask is for bits not yet accounted for by any other mask... */
1532 #define UNKNOWN_MASK 0xC000
1534 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1535 #define EXTENDED_REG 0x1800
1537 DWORD reg
= param
& D3DSP_REGNUM_MASK
;
1538 DWORD regtype
= ((param
& D3DSP_REGTYPE_MASK
) >> D3DSP_REGTYPE_SHIFT
) | ((param
& EXTENDED_REG
) >> 8);
1541 if ( ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_NEG
) ||
1542 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_BIASNEG
) ||
1543 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_SIGNNEG
) ||
1544 ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_X2NEG
) )
1546 else if ((param
& D3DSP_SRCMOD_MASK
) == D3DSPSM_COMP
)
1550 switch (regtype
/* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1558 TRACE("c%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1561 case D3DSPR_TEXTURE
: /* case D3DSPR_ADDR: */
1564 case D3DSPR_RASTOUT
:
1565 TRACE("%s", rastout_reg_names
[reg
]);
1567 case D3DSPR_ATTROUT
:
1568 TRACE("oD%lu", reg
);
1570 case D3DSPR_TEXCRDOUT
:
1571 TRACE("oT%lu", reg
);
1573 case D3DSPR_CONSTINT
:
1574 TRACE("i%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1576 case D3DSPR_CONSTBOOL
:
1577 TRACE("b%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1583 TRACE("aL%s%lu", (param
& D3DVS_ADDRMODE_RELATIVE
) ? "a0.x + " : "", reg
);
1590 /** operand output */
1592 * for better debugging traces it's done into opcode dump code
1593 * @see pshader_program_dump_opcode
1594 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1595 DWORD mask = param & D3DSP_DSTMOD_MASK;
1597 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1599 TRACE("_unhandled_modifier(0x%08lx)", mask);
1602 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1603 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1605 TRACE("_x%u", 1 << shift);
1609 if ((param
& D3DSP_WRITEMASK_ALL
) != D3DSP_WRITEMASK_ALL
) {
1611 if (param
& D3DSP_WRITEMASK_0
) TRACE(".r");
1612 if (param
& D3DSP_WRITEMASK_1
) TRACE(".g");
1613 if (param
& D3DSP_WRITEMASK_2
) TRACE(".b");
1614 if (param
& D3DSP_WRITEMASK_3
) TRACE(".a");
1617 /** operand input */
1618 DWORD swizzle
= (param
& D3DSP_SWIZZLE_MASK
) >> D3DSP_SWIZZLE_SHIFT
;
1619 DWORD swizzle_r
= swizzle
& 0x03;
1620 DWORD swizzle_g
= (swizzle
>> 2) & 0x03;
1621 DWORD swizzle_b
= (swizzle
>> 4) & 0x03;
1622 DWORD swizzle_a
= (swizzle
>> 6) & 0x03;
1624 if (0 != (param
& D3DSP_SRCMOD_MASK
)) {
1625 DWORD mask
= param
& D3DSP_SRCMOD_MASK
;
1626 /*TRACE("_modifier(0x%08lx) ", mask);*/
1628 case D3DSPSM_NONE
: break;
1629 case D3DSPSM_NEG
: break;
1630 case D3DSPSM_BIAS
: TRACE("_bias"); break;
1631 case D3DSPSM_BIASNEG
: TRACE("_bias"); break;
1632 case D3DSPSM_SIGN
: TRACE("_bx2"); break;
1633 case D3DSPSM_SIGNNEG
: TRACE("_bx2"); break;
1634 case D3DSPSM_COMP
: break;
1635 case D3DSPSM_X2
: TRACE("_x2"); break;
1636 case D3DSPSM_X2NEG
: TRACE("_x2"); break;
1637 case D3DSPSM_DZ
: TRACE("_dz"); break;
1638 case D3DSPSM_DW
: TRACE("_dw"); break;
1640 TRACE("_unknown(0x%08lx)", mask
);
1645 * swizzle bits fields:
1648 if ((D3DVS_NOSWIZZLE
>> D3DVS_SWIZZLE_SHIFT
) != swizzle
) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1649 if (swizzle_r
== swizzle_g
&&
1650 swizzle_r
== swizzle_b
&&
1651 swizzle_r
== swizzle_a
) {
1652 TRACE(".%c", swizzle_reg_chars
[swizzle_r
]);
1655 swizzle_reg_chars
[swizzle_r
],
1656 swizzle_reg_chars
[swizzle_g
],
1657 swizzle_reg_chars
[swizzle_b
],
1658 swizzle_reg_chars
[swizzle_a
]);
1664 inline static void pshader_program_dump_decl_usage(IWineD3DPixelShaderImpl
*This
, DWORD token
) {
1666 switch(token
& 0xFFFF) {
1667 case D3DDECLUSAGE_POSITION
:
1668 TRACE("%s%ld ", "position",(token
& 0xF0000) >> 16);
1670 case D3DDECLUSAGE_BLENDINDICES
:
1671 TRACE("%s ", "blend");
1673 case D3DDECLUSAGE_BLENDWEIGHT
:
1674 TRACE("%s ", "weight");
1676 case D3DDECLUSAGE_NORMAL
:
1677 TRACE("%s%ld ", "normal",(token
& 0xF0000) >> 16);
1679 case D3DDECLUSAGE_PSIZE
:
1680 TRACE("%s ", "psize");
1682 case D3DDECLUSAGE_COLOR
:
1683 if((token
& 0xF0000) >> 16 == 0) {
1684 TRACE("%s ", "color");
1686 TRACE("%s%ld ", "specular", ((token
& 0xF0000) >> 16) - 1);
1689 case D3DDECLUSAGE_TEXCOORD
:
1690 TRACE("%s%ld ", "texture", (token
& 0xF0000) >> 16);
1692 case D3DDECLUSAGE_TANGENT
:
1693 TRACE("%s ", "tangent");
1695 case D3DDECLUSAGE_BINORMAL
:
1696 TRACE("%s ", "binormal");
1698 case D3DDECLUSAGE_TESSFACTOR
:
1699 TRACE("%s ", "tessfactor");
1701 case D3DDECLUSAGE_POSITIONT
:
1702 TRACE("%s%ld ", "positionT",(token
& 0xF0000) >> 16);
1704 case D3DDECLUSAGE_FOG
:
1705 TRACE("%s ", "fog");
1707 case D3DDECLUSAGE_DEPTH
:
1708 TRACE("%s ", "depth");
1710 case D3DDECLUSAGE_SAMPLE
:
1711 TRACE("%s ", "sample");
1714 FIXME("Unrecognised dcl %08lx", token
& 0xFFFF);
1718 HRESULT WINAPI
IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader
*iface
, CONST DWORD
*pFunction
) {
1719 IWineD3DPixelShaderImpl
*This
= (IWineD3DPixelShaderImpl
*)iface
;
1720 const DWORD
* pToken
= pFunction
;
1721 const SHADER_OPCODE
*curOpcode
= NULL
;
1724 TRACE("(%p) : Parsing programme\n", This
);
1726 if (NULL
!= pToken
) {
1727 while (D3DPS_END() != *pToken
) {
1728 if (pshader_is_version_token(*pToken
)) { /** version */
1729 This
->version
= (((*pToken
>> 8) & 0x0F) * 10) + (*pToken
& 0x0F);
1730 TRACE("ps_%lu_%lu\n", (*pToken
>> 8) & 0x0F, (*pToken
& 0x0F));
1735 if (pshader_is_comment_token(*pToken
)) { /** comment */
1736 DWORD comment_len
= (*pToken
& D3DSI_COMMENTSIZE_MASK
) >> D3DSI_COMMENTSIZE_SHIFT
;
1738 TRACE("//%s\n", (char*)pToken
);
1739 pToken
+= comment_len
;
1740 len
+= comment_len
+ 1;
1743 if (!This
->version
) {
1744 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This
);
1746 curOpcode
= pshader_program_get_opcode(This
, *pToken
);
1749 if (NULL
== curOpcode
) {
1751 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1752 while (*pToken
& 0x80000000) {
1754 /* unknown current opcode ... */
1755 TRACE("unrecognized opcode: %08lx", *pToken
);
1762 if (curOpcode
->opcode
== D3DSIO_DCL
) {
1763 pshader_program_dump_decl_usage(This
, *pToken
);
1766 pshader_program_dump_ps_param(*pToken
, 0);
1770 if (curOpcode
->opcode
== D3DSIO_DEF
) {
1771 TRACE("def c%lu = ", *pToken
& 0xFF);
1774 TRACE("%f ,", *(float *)pToken
);
1777 TRACE("%f ,", *(float *)pToken
);
1780 TRACE("%f ,", *(float *)pToken
);
1783 TRACE("%f", *(float *)pToken
);
1787 TRACE("%s ", curOpcode
->name
);
1788 if (curOpcode
->num_params
> 0) {
1789 pshader_program_dump_ps_param(*pToken
, 0);
1792 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
1794 pshader_program_dump_ps_param(*pToken
, 1);
1803 This
->functionLength
= (len
+ 1) * sizeof(DWORD
);
1805 This
->functionLength
= 1; /* no Function defined use fixed function vertex processing */
1808 /* Generate HW shader in needed */
1809 if (NULL
!= pFunction
&& wined3d_settings
.vs_mode
== VS_HW
) {
1810 TRACE("(%p) : Generating hardware program\n", This
);
1812 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface
, pFunction
);
1816 TRACE("(%p) : Copying the function\n", This
);
1817 /* copy the function ... because it will certainly be released by application */
1818 if (NULL
!= pFunction
) {
1819 This
->function
= HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY
, This
->functionLength
);
1820 memcpy((void *)This
->function
, pFunction
, This
->functionLength
);
1822 This
->function
= NULL
;
1825 /* TODO: Some proper return values for failures */
1826 TRACE("(%p) : Returning D3D_OK\n", This
);
1830 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl
=
1832 /*** IUnknown methods ***/
1833 IWineD3DPixelShaderImpl_QueryInterface
,
1834 IWineD3DPixelShaderImpl_AddRef
,
1835 IWineD3DPixelShaderImpl_Release
,
1836 /*** IWineD3DPixelShader methods ***/
1837 IWineD3DPixelShaderImpl_GetParent
,
1838 IWineD3DPixelShaderImpl_GetDevice
,
1839 IWineD3DPixelShaderImpl_GetFunction
,
1840 /* not part of d3d */
1841 IWineD3DPixelShaderImpl_SetFunction