wined3d: Skip unhandled opcodes in vshaders.
[wine/wine-kai.git] / dlls / wined3d / pixelshader.c
blob08d194a024f921f4b9d400cf66992d081ea0b773
1 /*
2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "config.h"
23 #include <math.h>
24 #include <stdio.h>
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
35 #else
36 # define PSTRACE(A)
37 # define TRACE_VSVECTOR(name)
38 #endif
40 /* The maximum size of the program */
41 #define PGMSIZE 65535
43 #define REGMASK 0x00001FFF
45 #define GLNAME_REQUIRE_GLSL ((const char *)1)
46 /* *******************************************
47 IWineD3DPixelShader IUnknown parts follow
48 ******************************************* */
49 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
51 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
52 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
53 if (IsEqualGUID(riid, &IID_IUnknown)
54 || IsEqualGUID(riid, &IID_IWineD3DBase)
55 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
56 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
57 IUnknown_AddRef(iface);
58 *ppobj = This;
59 return WINED3D_OK;
61 return E_NOINTERFACE;
64 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
65 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
66 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
67 return InterlockedIncrement(&This->ref);
70 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
71 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
72 ULONG ref;
73 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
74 ref = InterlockedDecrement(&This->ref);
75 if (ref == 0) {
76 HeapFree(GetProcessHeap(), 0, This);
78 return ref;
81 /* TODO: At the momeny the function parser is single pass, it achievs this
82 by passing constants to a couple of functions where they are then modified.
83 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
84 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
87 /* *******************************************
88 IWineD3DPixelShader IWineD3DPixelShader parts follow
89 ******************************************* */
91 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
92 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
94 *parent = This->parent;
95 IUnknown_AddRef(*parent);
96 TRACE("(%p) : returning %p\n", This, *parent);
97 return WINED3D_OK;
100 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
101 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
102 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
103 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
104 TRACE("(%p) returning %p\n", This, *pDevice);
105 return WINED3D_OK;
109 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
110 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
111 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
113 if (NULL == pData) {
114 *pSizeOfData = This->baseShader.functionLength;
115 return WINED3D_OK;
117 if (*pSizeOfData < This->baseShader.functionLength) {
118 *pSizeOfData = This->baseShader.functionLength;
119 return WINED3DERR_MOREDATA;
121 if (NULL == This->baseShader.function) { /* no function defined */
122 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
123 (*(DWORD **) pData) = NULL;
124 } else {
125 if (This->baseShader.functionLength == 0) {
128 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
129 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
131 return WINED3D_OK;
134 /*******************************
135 * pshader functions software VM
138 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
139 d->x = s0->x + s1->x;
140 d->y = s0->y + s1->y;
141 d->z = s0->z + s1->z;
142 d->w = s0->w + s1->w;
143 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
144 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
147 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
148 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
149 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
150 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
153 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
154 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
155 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
156 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
159 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
160 d->x = 1.0f;
161 d->y = s0->y * s1->y;
162 d->z = s0->z;
163 d->w = s1->w;
164 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
165 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
168 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
169 union {
170 float f;
171 DWORD d;
172 } tmp;
174 tmp.f = floorf(s0->w);
175 d->x = powf(2.0f, tmp.f);
176 d->y = s0->w - tmp.f;
177 tmp.f = powf(2.0f, s0->w);
178 tmp.d &= 0xFFFFFF00U;
179 d->z = tmp.f;
180 d->w = 1.0f;
181 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
182 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
185 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
186 d->x = 1.0f;
187 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
188 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
189 d->w = 1.0f;
190 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
191 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
194 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
195 float tmp_f = fabsf(s0->w);
196 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
197 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
198 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
201 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
202 d->x = s0->x * s1->x + s2->x;
203 d->y = s0->y * s1->y + s2->y;
204 d->z = s0->z * s1->z + s2->z;
205 d->w = s0->w * s1->w + s2->w;
206 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
207 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
210 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
211 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
212 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
213 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
214 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
215 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
216 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
219 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
220 d->x = (s0->x < s1->x) ? s0->x : s1->x;
221 d->y = (s0->y < s1->y) ? s0->y : s1->y;
222 d->z = (s0->z < s1->z) ? s0->z : s1->z;
223 d->w = (s0->w < s1->w) ? s0->w : s1->w;
224 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
225 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
228 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
229 d->x = s0->x;
230 d->y = s0->y;
231 d->z = s0->z;
232 d->w = s0->w;
233 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
234 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
237 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
238 d->x = s0->x * s1->x;
239 d->y = s0->y * s1->y;
240 d->z = s0->z * s1->z;
241 d->w = s0->w * s1->w;
242 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
246 void pshader_nop(void) {
247 /* NOPPPP ahhh too easy ;) */
248 PSTRACE(("executing nop\n"));
251 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
252 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
253 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
254 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
257 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
258 float tmp_f = fabsf(s0->w);
259 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
260 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
261 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
264 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
265 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
266 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
267 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
268 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
269 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
270 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
273 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
274 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
275 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
276 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
277 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
278 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
279 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
282 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
283 d->x = s0->x - s1->x;
284 d->y = s0->y - s1->y;
285 d->z = s0->z - s1->z;
286 d->w = s0->w - s1->w;
287 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
288 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
292 * Version 1.1 specific
295 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
296 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
297 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
298 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
301 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
302 float tmp_f = fabsf(s0->w);
303 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
304 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
305 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
308 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
309 d->x = s0->x - floorf(s0->x);
310 d->y = s0->y - floorf(s0->y);
311 d->z = 0.0f;
312 d->w = 1.0f;
313 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
314 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
317 typedef FLOAT D3DMATRIX44[4][4];
318 typedef FLOAT D3DMATRIX43[4][3];
319 typedef FLOAT D3DMATRIX34[3][4];
320 typedef FLOAT D3DMATRIX33[3][3];
321 typedef FLOAT D3DMATRIX23[2][3];
323 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
325 * Buggy CODE: here only if cast not work for copy/paste
326 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
327 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
328 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
329 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
330 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
331 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
332 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
334 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
335 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
336 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
337 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
338 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
339 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
340 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
341 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
344 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
345 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
346 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
347 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
348 d->w = 1.0f;
349 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
350 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
351 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
352 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
355 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
356 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
357 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
358 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
359 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
360 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
361 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
362 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
363 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
366 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
367 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
368 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
369 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
370 d->w = 1.0f;
371 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
372 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
373 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
374 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
377 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
378 FIXME("check\n");
379 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
380 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
381 d->z = 0.0f;
382 d->w = 1.0f;
386 * Version 2.0 specific
388 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
389 d->x = s0->x * (s1->x - s2->x) + s2->x;
390 d->y = s0->y * (s1->y - s2->y) + s2->y;
391 d->z = s0->z * (s1->z - s2->z) + s2->z;
392 d->w = s0->w * (s1->w - s2->w) + s2->w;
395 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
396 d->x = s0->y * s1->z - s0->z * s1->y;
397 d->y = s0->z * s1->x - s0->x * s1->z;
398 d->z = s0->x * s1->y - s0->y * s1->x;
399 d->w = 0.9f; /* w is undefined, so set it to something safeish */
401 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
402 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
405 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
406 d->x = fabsf(s0->x);
407 d->y = fabsf(s0->y);
408 d->z = fabsf(s0->z);
409 d->w = fabsf(s0->w);
410 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
411 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
414 /* Stubs */
415 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
416 FIXME(" : Stub\n");
419 void pshader_texkill(WINED3DSHADERVECTOR* d) {
420 FIXME(" : Stub\n");
423 void pshader_tex(WINED3DSHADERVECTOR* d) {
424 FIXME(" : Stub\n");
426 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
427 FIXME(" : Stub\n");
430 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
431 FIXME(" : Stub\n");
434 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
435 FIXME(" : Stub\n");
438 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
439 FIXME(" : Stub\n");
442 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
443 FIXME(" : Stub\n");
446 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
447 FIXME(" : Stub\n");
450 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
451 FIXME(" : Stub\n");
454 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
455 FIXME(" : Stub\n");
458 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
459 FIXME(" : Stub\n");
462 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
463 FIXME(" : Stub\n");
466 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
467 FIXME(" : Stub\n");
470 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
471 FIXME(" : Stub\n");
474 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
475 FIXME(" : Stub\n");
478 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
479 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
480 FIXME(" : Stub\n");
483 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
484 FIXME(" : Stub\n");
487 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
488 FIXME(" : Stub\n");
491 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
492 FIXME(" : Stub\n");
495 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
496 FIXME(" : Stub\n");
499 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
500 FIXME(" : Stub\n");
503 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
504 FIXME(" : Stub\n");
507 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
508 FIXME(" : Stub\n");
511 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
512 FIXME(" : Stub\n");
515 void pshader_call(WINED3DSHADERVECTOR* d) {
516 FIXME(" : Stub\n");
519 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
520 FIXME(" : Stub\n");
523 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
524 FIXME(" : Stub\n");
527 void pshader_ret(WINED3DSHADERVECTOR* d) {
528 FIXME(" : Stub\n");
531 void pshader_endloop(WINED3DSHADERVECTOR* d) {
532 FIXME(" : Stub\n");
535 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
536 FIXME(" : Stub\n");
539 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
540 FIXME(" : Stub\n");
543 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
544 FIXME(" : Stub\n");
547 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
548 FIXME(" : Stub\n");
551 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
552 FIXME(" : Stub\n");
555 void pshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
556 FIXME(" : Stub\n");
559 void pshader_endrep(void) {
560 FIXME(" : Stub\n");
563 void pshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
564 FIXME(" : Stub\n");
567 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
568 FIXME(" : Stub\n");
571 void pshader_else(WINED3DSHADERVECTOR* d) {
572 FIXME(" : Stub\n");
575 void pshader_label(WINED3DSHADERVECTOR* d) {
576 FIXME(" : Stub\n");
579 void pshader_endif(WINED3DSHADERVECTOR* d) {
580 FIXME(" : Stub\n");
583 void pshader_break(WINED3DSHADERVECTOR* d) {
584 FIXME(" : Stub\n");
587 void pshader_breakc(WINED3DSHADERVECTOR* d) {
588 FIXME(" : Stub\n");
591 void pshader_mova(WINED3DSHADERVECTOR* d) {
592 FIXME(" : Stub\n");
595 void pshader_defb(WINED3DSHADERVECTOR* d) {
596 FIXME(" : Stub\n");
599 void pshader_defi(WINED3DSHADERVECTOR* d) {
600 FIXME(" : Stub\n");
603 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
604 FIXME(" : Stub\n");
607 void pshader_dsx(WINED3DSHADERVECTOR* d) {
608 FIXME(" : Stub\n");
611 void pshader_dsy(WINED3DSHADERVECTOR* d) {
612 FIXME(" : Stub\n");
615 void pshader_texldd(WINED3DSHADERVECTOR* d) {
616 FIXME(" : Stub\n");
619 void pshader_setp(WINED3DSHADERVECTOR* d) {
620 FIXME(" : Stub\n");
623 void pshader_texldl(WINED3DSHADERVECTOR* d) {
624 FIXME(" : Stub\n");
627 void pshader_breakp(WINED3DSHADERVECTOR* d) {
628 FIXME(" : Stub\n");
631 * log, exp, frc, m*x* seems to be macros ins ... to see
633 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = {
634 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
635 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
636 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
637 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
638 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
639 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
640 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
641 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
642 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
643 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
644 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
645 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
646 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
647 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
648 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
649 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
650 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
651 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
652 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
653 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
654 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
655 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
656 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
657 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
658 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
659 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
662 /** FIXME: use direct access so add the others opcodes as stubs */
663 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
664 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
665 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
666 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
667 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
668 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
669 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
670 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
671 /* DCL is a specil operation */
672 {D3DSIO_DCL, "dcl", NULL, 1, pshader_dcl, 0, 0},
673 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
674 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
675 /* TODO: sng can possibly be performed as
676 RCP tmp, vec
677 MUL out, tmp, vec*/
678 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
679 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
680 DP3 tmp , vec, vec;
681 RSQ tmp, tmp.x;
682 MUL vec.xyz, vec, tmp;
683 but I think this is better because it accounts for w properly.
684 DP3 tmp , vec, vec;
685 RSQ tmp, tmp.x;
686 MUL vec, vec, tmp;
689 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
690 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
691 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, pshader_rep, 0, 0},
692 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
693 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, pshader_if, 0, 0},
694 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
695 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, pshader_else, 0, 0},
696 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, pshader_endif, 0, 0},
697 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, pshader_break, 0, 0},
698 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
699 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
700 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
701 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
703 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
704 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
705 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
706 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
707 {D3DSIO_TEX, "texld", "undefined", 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
708 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 3, pshader_texld, D3DPS_VERSION(2,0), -1},
709 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
710 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
711 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
712 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
715 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
716 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
717 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
718 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
721 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
722 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
723 /* def is a special operation */
724 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
725 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
726 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
729 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
730 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
731 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
732 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
733 /* TODO: dp2add can be made out of multiple instuctions */
734 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
735 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
736 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
737 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
738 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
739 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
740 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, pshader_breakp, 0, 0},
741 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
742 {0, NULL, NULL, 0, NULL, 0, 0}
746 inline static const SHADER_OPCODE* pshader_program_get_opcode(IWineD3DPixelShaderImpl *This, const DWORD code) {
747 DWORD i = 0;
748 DWORD version = This->baseShader.version;
749 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
750 const SHADER_OPCODE *shader_ins = This->baseShader.shader_ins;
752 /** TODO: use dichotomic search */
753 while (NULL != shader_ins[i].name) {
754 if (((code & D3DSI_OPCODE_MASK) == shader_ins[i].opcode) &&
755 (((hex_version >= shader_ins[i].min_version) && (hex_version <= shader_ins[i].max_version)) ||
756 ((shader_ins[i].min_version == 0) && (shader_ins[i].max_version == 0)))) {
757 return &shader_ins[i];
759 ++i;
761 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code, code, code & D3DSI_OPCODE_MASK, version);
762 return NULL;
765 inline static BOOL pshader_is_version_token(DWORD token) {
766 return 0xFFFF0000 == (token & 0xFFFF0000);
769 inline static BOOL pshader_is_comment_token(DWORD token) {
770 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
774 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
775 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
777 DWORD reg = param & REGMASK;
778 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
780 switch (regtype) {
781 case D3DSPR_TEMP:
782 sprintf(regstr, "R%lu", reg);
783 break;
784 case D3DSPR_INPUT:
785 if (reg==0) {
786 strcpy(regstr, "fragment.color.primary");
787 } else {
788 strcpy(regstr, "fragment.color.secondary");
790 break;
791 case D3DSPR_CONST:
792 if (constants[reg])
793 sprintf(regstr, "C%lu", reg);
794 else
795 sprintf(regstr, "program.env[%lu]", reg);
796 break;
797 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
798 sprintf(regstr,"T%lu", reg);
799 break;
800 case D3DSPR_RASTOUT:
801 sprintf(regstr, "%s", rastout_reg_names[reg]);
802 break;
803 case D3DSPR_ATTROUT:
804 sprintf(regstr, "oD[%lu]", reg);
805 break;
806 case D3DSPR_TEXCRDOUT:
807 sprintf(regstr, "oT[%lu]", reg);
808 break;
809 default:
810 FIXME("Unhandled register name Type(%ld)\n", regtype);
811 break;
815 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
816 *write_mask = 0;
817 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
818 strcat(write_mask, ".");
819 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
820 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
821 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
822 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
826 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
827 static const char swizzle_reg_chars[] = "rgba";
828 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
829 DWORD swizzle_x = swizzle & 0x03;
830 DWORD swizzle_y = (swizzle >> 2) & 0x03;
831 DWORD swizzle_z = (swizzle >> 4) & 0x03;
832 DWORD swizzle_w = (swizzle >> 6) & 0x03;
834 * swizzle bits fields:
835 * WWZZYYXX
837 *swzstring = 0;
838 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
839 if (swizzle_x == swizzle_y &&
840 swizzle_x == swizzle_z &&
841 swizzle_x == swizzle_w) {
842 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
843 } else {
844 sprintf(swzstring, ".%c%c%c%c",
845 swizzle_reg_chars[swizzle_x],
846 swizzle_reg_chars[swizzle_y],
847 swizzle_reg_chars[swizzle_z],
848 swizzle_reg_chars[swizzle_w]);
853 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
854 int lineLen = strlen(line);
855 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
856 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
857 return;
858 } else {
859 memcpy(pgm + *pgmLength, line, lineLen);
862 *pgmLength += lineLen;
863 ++(*lineNum);
864 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
867 static const char* shift_tab[] = {
868 "dummy", /* 0 (none) */
869 "coefmul.x", /* 1 (x2) */
870 "coefmul.y", /* 2 (x4) */
871 "coefmul.z", /* 3 (x8) */
872 "coefmul.w", /* 4 (x16) */
873 "dummy", /* 5 (x32) */
874 "dummy", /* 6 (x64) */
875 "dummy", /* 7 (x128) */
876 "dummy", /* 8 (d256) */
877 "dummy", /* 9 (d128) */
878 "dummy", /* 10 (d64) */
879 "dummy", /* 11 (d32) */
880 "coefdiv.w", /* 12 (d16) */
881 "coefdiv.z", /* 13 (d8) */
882 "coefdiv.y", /* 14 (d4) */
883 "coefdiv.x" /* 15 (d2) */
886 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
887 /* Generate a line that does the output modifier computation */
888 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
891 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
892 /* Generate a line that does the input modifier computation and return the input register to use */
893 static char regstr[256];
894 static char tmpline[256];
895 int insert_line;
897 /* Assume a new line will be added */
898 insert_line = 1;
900 /* Get register name */
901 get_register_name(instr, regstr, constants);
903 TRACE(" Register name %s\n", regstr);
904 switch (instr & D3DSP_SRCMOD_MASK) {
905 case D3DSPSM_NONE:
906 strcpy(outregstr, regstr);
907 insert_line = 0;
908 break;
909 case D3DSPSM_NEG:
910 sprintf(outregstr, "-%s", regstr);
911 insert_line = 0;
912 break;
913 case D3DSPSM_BIAS:
914 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
915 break;
916 case D3DSPSM_BIASNEG:
917 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
918 break;
919 case D3DSPSM_SIGN:
920 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
921 break;
922 case D3DSPSM_SIGNNEG:
923 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
924 break;
925 case D3DSPSM_COMP:
926 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
927 break;
928 case D3DSPSM_X2:
929 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
930 break;
931 case D3DSPSM_X2NEG:
932 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
933 break;
934 case D3DSPSM_DZ:
935 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
936 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
937 strcat(line, "\n"); /* Hack */
938 strcat(line, tmpline);
939 break;
940 case D3DSPSM_DW:
941 sprintf(line, "RCP T%c, %s;", 'A' + tmpreg, regstr);
942 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
943 strcat(line, "\n"); /* Hack */
944 strcat(line, tmpline);
945 break;
946 default:
947 strcpy(outregstr, regstr);
948 insert_line = 0;
951 if (insert_line) {
952 /* Substitute the register name */
953 sprintf(outregstr, "T%c", 'A' + tmpreg);
956 return insert_line;
958 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
959 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
960 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
961 const DWORD *pToken = pFunction;
962 const SHADER_OPCODE *curOpcode = NULL;
963 const DWORD *pInstr;
964 DWORD i;
965 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
966 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
967 char tmpLine[255];
968 #if 0 /* TODO: loop register (just another address register ) */
969 BOOL hasLoops = FALSE;
970 #endif
972 BOOL saturate; /* clamp to 0.0 -> 1.0*/
973 int row = 0; /* not sure, something to do with macros? */
974 DWORD tcw[2];
975 int version = 0; /* The version of the shader */
977 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
978 unsigned int pgmLength = 0;
980 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
981 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
982 if (This->device->fixupVertexBufferSize < PGMSIZE) {
983 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
984 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
985 This->fixupVertexBufferSize = PGMSIZE;
986 This->fixupVertexBuffer[0] = 0;
988 pgmStr = This->device->fixupVertexBuffer;
989 #else
990 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
991 #endif
994 /* TODO: Think about using a first pass to work out what's required for the second pass. */
995 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
996 This->constants[i] = 0;
998 if (NULL != pToken) {
999 while (D3DPS_END() != *pToken) {
1000 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1001 if (version >= 2) {
1002 instructionSize = pToken & SIZEBITS >> 27;
1004 #endif
1005 if (pshader_is_version_token(*pToken)) { /** version */
1006 int numTemps;
1007 int numConstants;
1009 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1010 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1012 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1014 /* Each release of pixel shaders has had different numbers of temp registers */
1015 switch (version) {
1016 case 10:
1017 case 11:
1018 case 12:
1019 case 13:
1020 case 14: numTemps=12;
1021 numConstants=8;
1022 strcpy(tmpLine, "!!ARBfp1.0\n");
1023 break;
1024 case 20: numTemps=12;
1025 numConstants=8;
1026 strcpy(tmpLine, "!!ARBfp2.0\n");
1027 FIXME("No work done yet to support ps2.0 in hw\n");
1028 break;
1029 case 30: numTemps=32;
1030 numConstants=8;
1031 strcpy(tmpLine, "!!ARBfp3.0\n");
1032 FIXME("No work done yet to support ps3.0 in hw\n");
1033 break;
1034 default:
1035 numTemps=12;
1036 numConstants=8;
1037 strcpy(tmpLine, "!!ARBfp1.0\n");
1038 FIXME("Unrecognized pixel shader version!\n");
1040 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1042 /* TODO: find out how many registers are really needed */
1043 for(i = 0; i < 6; i++) {
1044 sprintf(tmpLine, "TEMP T%lu;\n", i);
1045 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1048 for(i = 0; i < 6; i++) {
1049 sprintf(tmpLine, "TEMP R%lu;\n", i);
1050 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1053 sprintf(tmpLine, "TEMP TMP;\n");
1054 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1055 sprintf(tmpLine, "TEMP TMP2;\n");
1056 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1057 sprintf(tmpLine, "TEMP TA;\n");
1058 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1059 sprintf(tmpLine, "TEMP TB;\n");
1060 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1061 sprintf(tmpLine, "TEMP TC;\n");
1062 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1064 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1065 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1066 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1067 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1068 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1069 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1071 for(i = 0; i < 4; i++) {
1072 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1073 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1076 ++pToken;
1077 continue;
1080 if (pshader_is_comment_token(*pToken)) { /** comment */
1081 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1082 ++pToken;
1083 TRACE("#%s\n", (char*)pToken);
1084 pToken += comment_len;
1085 continue;
1087 /* here */
1088 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1089 code = *pToken;
1090 #endif
1091 pInstr = pToken;
1092 curOpcode = pshader_program_get_opcode(This, *pToken);
1093 ++pToken;
1094 if (NULL == curOpcode) {
1095 /* unknown current opcode ... (shouldn't be any!) */
1096 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1097 FIXME("unrecognized opcode: %08lx\n", *pToken);
1098 ++pToken;
1100 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1101 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1102 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1103 pToken += curOpcode->num_params;
1105 } else if (D3DSIO_DEF == curOpcode->opcode) {
1107 /* Handle definitions here, they don't fit well with the
1108 * other instructions below [for now ] */
1110 DWORD reg = *pToken & REGMASK;
1112 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1113 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1115 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1116 *((const float *)(pToken + 1)),
1117 *((const float *)(pToken + 2)),
1118 *((const float *)(pToken + 3)),
1119 *((const float *)(pToken + 4)) );
1121 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1123 This->constants[reg] = 1;
1124 pToken += 5;
1125 continue;
1127 } else {
1129 /* Common processing: [inst] [dst] [src]* */
1132 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1133 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1135 saturate = FALSE;
1137 /* Build opcode for GL vertex_program */
1138 switch (curOpcode->opcode) {
1139 case D3DSIO_NOP:
1140 case D3DSIO_PHASE:
1141 continue;
1142 case D3DSIO_MOV:
1143 case D3DSIO_CND:
1144 case D3DSIO_CMP:
1145 case D3DSIO_ADD:
1146 case D3DSIO_SUB:
1147 case D3DSIO_MAD:
1148 case D3DSIO_MUL:
1149 case D3DSIO_RCP:
1150 case D3DSIO_RSQ:
1151 case D3DSIO_DP3:
1152 case D3DSIO_DP4:
1153 case D3DSIO_MIN:
1154 case D3DSIO_MAX:
1155 case D3DSIO_SLT:
1156 case D3DSIO_SGE:
1157 case D3DSIO_LIT:
1158 case D3DSIO_DST:
1159 case D3DSIO_FRC:
1160 case D3DSIO_EXPP:
1161 case D3DSIO_LOGP:
1162 case D3DSIO_EXP:
1163 case D3DSIO_LOG:
1164 case D3DSIO_LRP:
1165 case D3DSIO_TEXKILL:
1166 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1167 strcpy(tmpLine, curOpcode->glname);
1168 break;
1169 case D3DSIO_TEX:
1171 char tmp[20];
1172 get_write_mask(*pToken, tmp);
1173 if (version != 14) {
1174 DWORD reg = *pToken & REGMASK;
1175 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1176 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1177 ++pToken;
1178 } else {
1179 char reg2[20];
1180 DWORD reg1 = *pToken & REGMASK;
1181 if (gen_input_modifier_line(*++pToken, 0, reg2, tmpLine, This->constants)) {
1182 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1184 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg2, reg1);
1185 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1186 ++pToken;
1188 continue;
1190 break;
1191 case D3DSIO_TEXCOORD:
1193 char tmp[20];
1194 get_write_mask(*pToken, tmp);
1195 if (version != 14) {
1196 DWORD reg = *pToken & REGMASK;
1197 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1198 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1199 ++pToken;
1200 } else {
1201 DWORD reg1 = *pToken & REGMASK;
1202 DWORD reg2 = *++pToken & REGMASK;
1203 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1204 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1205 ++pToken;
1207 continue;
1209 break;
1210 case D3DSIO_TEXM3x2PAD:
1212 DWORD reg = *pToken & REGMASK;
1213 char buf[50];
1214 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1215 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1217 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1218 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1219 ++pToken;
1220 continue;
1222 break;
1223 case D3DSIO_TEXM3x2TEX:
1225 DWORD reg = *pToken & REGMASK;
1226 char buf[50];
1227 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1228 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1230 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1231 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1232 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1233 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1234 ++pToken;
1235 continue;
1237 break;
1238 case D3DSIO_TEXREG2AR:
1240 DWORD reg1 = *pToken & REGMASK;
1241 DWORD reg2 = *++pToken & REGMASK;
1242 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1243 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1244 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1245 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1246 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1247 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1248 ++pToken;
1249 continue;
1251 break;
1252 case D3DSIO_TEXREG2GB:
1254 DWORD reg1 = *pToken & REGMASK;
1255 DWORD reg2 = *++pToken & REGMASK;
1256 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1257 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1258 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1259 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1260 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1261 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1262 ++pToken;
1263 continue;
1265 break;
1266 case D3DSIO_TEXBEM:
1268 DWORD reg1 = *pToken & REGMASK;
1269 DWORD reg2 = *++pToken & REGMASK;
1271 /* FIXME: Should apply the BUMPMAPENV matrix */
1272 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1273 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1274 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1275 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1276 ++pToken;
1277 continue;
1279 break;
1280 case D3DSIO_TEXM3x3PAD:
1282 DWORD reg = *pToken & REGMASK;
1283 char buf[50];
1284 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1285 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1287 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1288 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1289 tcw[row++] = reg;
1290 ++pToken;
1291 continue;
1293 break;
1294 case D3DSIO_TEXM3x3TEX:
1296 DWORD reg = *pToken & REGMASK;
1297 char buf[50];
1298 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1299 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1302 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1303 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1305 /* Cubemap textures will be more used than 3D ones. */
1306 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1307 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1308 row = 0;
1309 ++pToken;
1310 continue;
1312 case D3DSIO_TEXM3x3VSPEC:
1314 DWORD reg = *pToken & REGMASK;
1315 char buf[50];
1316 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1317 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1319 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1320 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1322 /* Construct the eye-ray vector from w coordinates */
1323 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1324 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1325 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1326 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1327 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1328 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1330 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1331 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1332 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1333 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1334 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1335 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1336 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1338 /* Cubemap textures will be more used than 3D ones. */
1339 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1340 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1341 row = 0;
1342 ++pToken;
1343 continue;
1345 break;
1346 case D3DSIO_TEXM3x3SPEC:
1348 DWORD reg = *pToken & REGMASK;
1349 DWORD reg3 = *(pToken + 2) & REGMASK;
1350 char buf[50];
1351 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1352 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1354 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1355 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1357 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1358 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1359 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1361 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1362 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1363 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1364 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1366 /* Cubemap textures will be more used than 3D ones. */
1367 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1368 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1369 row = 0;
1370 pToken += 3;
1371 continue;
1373 break;
1375 default:
1376 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1377 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1378 } else {
1379 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1381 pToken += curOpcode->num_params;
1382 continue;
1385 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1386 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1387 switch (mask) {
1388 case D3DSPDM_SATURATE: saturate = TRUE; break;
1389 #if 0 /* as yet unhandled modifiers */
1390 case D3DSPDM_CENTROID: centroid = TRUE; break;
1391 case D3DSPDM_PP: partialpresision = TRUE; break;
1392 case D3DSPDM_X2: X2 = TRUE; break;
1393 case D3DSPDM_X4: X4 = TRUE; break;
1394 case D3DSPDM_X8: X8 = TRUE; break;
1395 case D3DSPDM_D2: D2 = TRUE; break;
1396 case D3DSPDM_D4: D4 = TRUE; break;
1397 case D3DSPDM_D8: D8 = TRUE; break;
1398 #endif
1399 default:
1400 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1404 /* Generate input and output registers */
1405 if (curOpcode->num_params > 0) {
1406 char regs[5][50];
1407 char operands[4][100];
1408 char swzstring[20];
1409 int saturate = 0;
1410 char tmpOp[256];
1412 /* Generate lines that handle input modifier computation */
1413 for (i = 1; i < curOpcode->num_params; ++i) {
1414 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1415 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpOp, This->constants)) {
1416 addline(&lineNum, pgmStr, &pgmLength, tmpOp);
1420 /* Handle saturation only when no shift is present in the output modifier */
1421 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1422 saturate = 1;
1424 /* Handle output register */
1425 get_register_name(*pToken, tmpOp, This->constants);
1426 strcpy(operands[0], tmpOp);
1427 get_write_mask(*pToken, tmpOp);
1428 strcat(operands[0], tmpOp);
1430 /* This function works because of side effects from gen_input_modifier_line */
1431 /* Handle input registers */
1432 for (i = 1; i < curOpcode->num_params; ++i) {
1433 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1434 strcpy(operands[i], regs[i - 1]);
1435 get_input_register_swizzle(*(pToken + i), swzstring);
1436 strcat(operands[i], swzstring);
1439 switch(curOpcode->opcode) {
1440 case D3DSIO_CMP:
1441 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1442 break;
1443 case D3DSIO_CND:
1444 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1445 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1446 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1447 break;
1448 default:
1449 if (saturate)
1450 strcat(tmpLine, "_SAT");
1451 strcat(tmpLine, " ");
1452 strcat(tmpLine, operands[0]);
1453 for (i = 1; i < curOpcode->num_params; i++) {
1454 strcat(tmpLine, ", ");
1455 strcat(tmpLine, operands[i]);
1457 strcat(tmpLine,";\n");
1459 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1460 pToken += curOpcode->num_params;
1462 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1463 if (curOpcode->num_params > 0) {
1464 DWORD param = *(pInstr + 1);
1465 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1467 /* Generate a line that handle the output modifier computation */
1468 char regstr[100];
1469 char write_mask[20];
1470 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1471 get_register_name(param, regstr, This->constants);
1472 get_write_mask(param, write_mask);
1473 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1474 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1477 #endif
1480 /* TODO: What about result.depth? */
1481 strcpy(tmpLine, "MOV result.color, R0;\n");
1482 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1484 strcpy(tmpLine, "END\n");
1485 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1488 /* finally null terminate the pgmStr*/
1489 pgmStr[pgmLength] = 0;
1490 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1491 /* Create the hw shader */
1493 /* pgmStr sometimes gets too long for a normal TRACE */
1494 TRACE("Generated program:\n");
1495 if (TRACE_ON(d3d_shader)) {
1496 fprintf(stderr, "%s\n", pgmStr);
1499 /* TODO: change to resource.glObjectHandel or something like that */
1500 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
1502 TRACE("Creating a hw pixel shader, prg=%d\n", This->baseShader.prgId);
1503 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->baseShader.prgId));
1505 TRACE("Created hw pixel shader, prg=%d\n", This->baseShader.prgId);
1506 /* Create the program and check for errors */
1507 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1508 if (glGetError() == GL_INVALID_OPERATION) {
1509 GLint errPos;
1510 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1511 FIXME("HW PixelShader Error at position %d: %s\n",
1512 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1513 This->baseShader.prgId = -1;
1516 #if 1 /* if were using the data buffer of device then we don't need to free it */
1517 HeapFree(GetProcessHeap(), 0, pgmStr);
1518 #endif
1521 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1522 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1523 static const char swizzle_reg_chars[] = "rgba";
1525 /* the unknown mask is for bits not yet accounted for by any other mask... */
1526 #define UNKNOWN_MASK 0xC000
1528 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1529 #define EXTENDED_REG 0x1800
1531 DWORD reg = param & D3DSP_REGNUM_MASK;
1532 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1534 if (input) {
1535 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1536 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1537 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1538 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1539 TRACE("-");
1540 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1541 TRACE("1-");
1544 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1545 case D3DSPR_TEMP:
1546 TRACE("r%lu", reg);
1547 break;
1548 case D3DSPR_INPUT:
1549 TRACE("v%lu", reg);
1550 break;
1551 case D3DSPR_CONST:
1552 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1553 break;
1555 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1556 TRACE("t%lu", reg);
1557 break;
1558 case D3DSPR_RASTOUT:
1559 TRACE("%s", rastout_reg_names[reg]);
1560 break;
1561 case D3DSPR_ATTROUT:
1562 TRACE("oD%lu", reg);
1563 break;
1564 case D3DSPR_TEXCRDOUT:
1565 TRACE("oT%lu", reg);
1566 break;
1567 case D3DSPR_CONSTINT:
1568 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1569 break;
1570 case D3DSPR_CONSTBOOL:
1571 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1572 break;
1573 case D3DSPR_LABEL:
1574 TRACE("l%lu", reg);
1575 break;
1576 case D3DSPR_LOOP:
1577 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1578 break;
1579 default:
1580 break;
1583 if (!input) {
1584 /** operand output */
1586 * for better debugging traces it's done into opcode dump code
1587 * @see pshader_program_dump_opcode
1588 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1589 DWORD mask = param & D3DSP_DSTMOD_MASK;
1590 switch (mask) {
1591 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1592 default:
1593 TRACE("_unhandled_modifier(0x%08lx)", mask);
1596 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1597 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1598 if (shift > 0) {
1599 TRACE("_x%u", 1 << shift);
1603 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1604 TRACE(".");
1605 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1606 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1607 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1608 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1610 } else {
1611 /** operand input */
1612 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1613 DWORD swizzle_r = swizzle & 0x03;
1614 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1615 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1616 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1618 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1619 DWORD mask = param & D3DSP_SRCMOD_MASK;
1620 /*TRACE("_modifier(0x%08lx) ", mask);*/
1621 switch (mask) {
1622 case D3DSPSM_NONE: break;
1623 case D3DSPSM_NEG: break;
1624 case D3DSPSM_BIAS: TRACE("_bias"); break;
1625 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1626 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1627 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1628 case D3DSPSM_COMP: break;
1629 case D3DSPSM_X2: TRACE("_x2"); break;
1630 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1631 case D3DSPSM_DZ: TRACE("_dz"); break;
1632 case D3DSPSM_DW: TRACE("_dw"); break;
1633 default:
1634 TRACE("_unknown(0x%08lx)", mask);
1639 * swizzle bits fields:
1640 * RRGGBBAA
1642 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1643 if (swizzle_r == swizzle_g &&
1644 swizzle_r == swizzle_b &&
1645 swizzle_r == swizzle_a) {
1646 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1647 } else {
1648 TRACE(".%c%c%c%c",
1649 swizzle_reg_chars[swizzle_r],
1650 swizzle_reg_chars[swizzle_g],
1651 swizzle_reg_chars[swizzle_b],
1652 swizzle_reg_chars[swizzle_a]);
1658 inline static void pshader_program_dump_decl_usage(IWineD3DPixelShaderImpl *This, DWORD token) {
1659 TRACE("dcl_");
1660 switch(token & 0xFFFF) {
1661 case D3DDECLUSAGE_POSITION:
1662 TRACE("%s%ld ", "position",(token & 0xF0000) >> 16);
1663 break;
1664 case D3DDECLUSAGE_BLENDINDICES:
1665 TRACE("%s ", "blend");
1666 break;
1667 case D3DDECLUSAGE_BLENDWEIGHT:
1668 TRACE("%s ", "weight");
1669 break;
1670 case D3DDECLUSAGE_NORMAL:
1671 TRACE("%s%ld ", "normal",(token & 0xF0000) >> 16);
1672 break;
1673 case D3DDECLUSAGE_PSIZE:
1674 TRACE("%s ", "psize");
1675 break;
1676 case D3DDECLUSAGE_COLOR:
1677 if((token & 0xF0000) >> 16 == 0) {
1678 TRACE("%s ", "color");
1679 } else {
1680 TRACE("%s%ld ", "specular", ((token & 0xF0000) >> 16) - 1);
1682 break;
1683 case D3DDECLUSAGE_TEXCOORD:
1684 TRACE("%s%ld ", "texture", (token & 0xF0000) >> 16);
1685 break;
1686 case D3DDECLUSAGE_TANGENT:
1687 TRACE("%s ", "tangent");
1688 break;
1689 case D3DDECLUSAGE_BINORMAL:
1690 TRACE("%s ", "binormal");
1691 break;
1692 case D3DDECLUSAGE_TESSFACTOR:
1693 TRACE("%s ", "tessfactor");
1694 break;
1695 case D3DDECLUSAGE_POSITIONT:
1696 TRACE("%s%ld ", "positionT",(token & 0xF0000) >> 16);
1697 break;
1698 case D3DDECLUSAGE_FOG:
1699 TRACE("%s ", "fog");
1700 break;
1701 case D3DDECLUSAGE_DEPTH:
1702 TRACE("%s ", "depth");
1703 break;
1704 case D3DDECLUSAGE_SAMPLE:
1705 TRACE("%s ", "sample");
1706 break;
1707 default:
1708 FIXME("Unrecognised dcl %08lx", token & 0xFFFF);
1712 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1713 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1714 const DWORD* pToken = pFunction;
1715 const SHADER_OPCODE *curOpcode = NULL;
1716 DWORD len = 0;
1717 DWORD i;
1718 TRACE("(%p) : Parsing programme\n", This);
1720 if (NULL != pToken) {
1721 while (D3DPS_END() != *pToken) {
1722 if (pshader_is_version_token(*pToken)) { /** version */
1723 This->baseShader.version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1724 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1725 ++pToken;
1726 ++len;
1727 continue;
1729 if (pshader_is_comment_token(*pToken)) { /** comment */
1730 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1731 ++pToken;
1732 TRACE("//%s\n", (char*)pToken);
1733 pToken += comment_len;
1734 len += comment_len + 1;
1735 continue;
1737 if (!This->baseShader.version) {
1738 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1740 curOpcode = pshader_program_get_opcode(This, *pToken);
1741 ++pToken;
1742 ++len;
1743 if (NULL == curOpcode) {
1745 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1746 while (*pToken & 0x80000000) {
1748 /* unknown current opcode ... */
1749 TRACE("unrecognized opcode: %08lx", *pToken);
1750 ++pToken;
1751 ++len;
1752 TRACE("\n");
1755 } else {
1756 if (curOpcode->opcode == D3DSIO_DCL) {
1757 pshader_program_dump_decl_usage(This, *pToken);
1758 ++pToken;
1759 ++len;
1760 pshader_program_dump_ps_param(*pToken, 0);
1761 ++pToken;
1762 ++len;
1763 } else
1764 if (curOpcode->opcode == D3DSIO_DEF) {
1765 TRACE("def c%lu = ", *pToken & 0xFF);
1766 ++pToken;
1767 ++len;
1768 TRACE("%f ,", *(float *)pToken);
1769 ++pToken;
1770 ++len;
1771 TRACE("%f ,", *(float *)pToken);
1772 ++pToken;
1773 ++len;
1774 TRACE("%f ,", *(float *)pToken);
1775 ++pToken;
1776 ++len;
1777 TRACE("%f", *(float *)pToken);
1778 ++pToken;
1779 ++len;
1780 } else {
1781 TRACE("%s ", curOpcode->name);
1782 if (curOpcode->num_params > 0) {
1783 pshader_program_dump_ps_param(*pToken, 0);
1784 ++pToken;
1785 ++len;
1786 for (i = 1; i < curOpcode->num_params; ++i) {
1787 TRACE(", ");
1788 pshader_program_dump_ps_param(*pToken, 1);
1789 ++pToken;
1790 ++len;
1794 TRACE("\n");
1797 This->baseShader.functionLength = (len + 1) * sizeof(DWORD);
1798 } else {
1799 This->baseShader.functionLength = 1; /* no Function defined use fixed function vertex processing */
1802 /* Generate HW shader in needed */
1803 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1804 TRACE("(%p) : Generating hardware program\n", This);
1805 #if 1
1806 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1807 #endif
1810 TRACE("(%p) : Copying the function\n", This);
1811 /* copy the function ... because it will certainly be released by application */
1812 if (NULL != pFunction) {
1813 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1814 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1815 } else {
1816 This->baseShader.function = NULL;
1819 /* TODO: Some proper return values for failures */
1820 TRACE("(%p) : Returning WINED3D_OK\n", This);
1821 return WINED3D_OK;
1824 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1826 /*** IUnknown methods ***/
1827 IWineD3DPixelShaderImpl_QueryInterface,
1828 IWineD3DPixelShaderImpl_AddRef,
1829 IWineD3DPixelShaderImpl_Release,
1830 /*** IWineD3DBase methods ***/
1831 IWineD3DPixelShaderImpl_GetParent,
1832 /*** IWineD3DBaseShader methods ***/
1833 IWineD3DPixelShaderImpl_SetFunction,
1834 /*** IWineD3DPixelShader methods ***/
1835 IWineD3DPixelShaderImpl_GetDevice,
1836 IWineD3DPixelShaderImpl_GetFunction