shdocvw: Add class definition for InternetExplorer.
[wine/multimedia.git] / dlls / wined3d / pixelshader.c
blob2952982c6e42797aad3cd6080cd5d661e4578846
1 /*
2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "config.h"
23 #include <math.h>
24 #include <stdio.h>
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
35 #else
36 # define PSTRACE(A)
37 # define TRACE_VSVECTOR(name)
38 #endif
40 /* The maximum size of the program */
41 #define PGMSIZE 65535
43 #define REGMASK 0x00001FFF
45 #define GLNAME_REQUIRE_GLSL ((const char *)1)
46 /* *******************************************
47 IWineD3DPixelShader IUnknown parts follow
48 ******************************************* */
49 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
51 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
52 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
53 if (IsEqualGUID(riid, &IID_IUnknown)
54 || IsEqualGUID(riid, &IID_IWineD3DBase)
55 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
56 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
57 IUnknown_AddRef(iface);
58 *ppobj = This;
59 return D3D_OK;
61 return E_NOINTERFACE;
64 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
65 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
66 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
67 return InterlockedIncrement(&This->ref);
70 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
71 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
72 ULONG ref;
73 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
74 ref = InterlockedDecrement(&This->ref);
75 if (ref == 0) {
76 HeapFree(GetProcessHeap(), 0, This);
78 return ref;
81 /* TODO: At the momeny the function parser is single pass, it achievs this
82 by passing constants to a couple of functions where they are then modified.
83 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
84 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
87 /* *******************************************
88 IWineD3DPixelShader IWineD3DPixelShader parts follow
89 ******************************************* */
91 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
92 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
94 *parent = This->parent;
95 IUnknown_AddRef(*parent);
96 TRACE("(%p) : returning %p\n", This, *parent);
97 return D3D_OK;
100 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
101 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
102 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
103 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
104 TRACE("(%p) returning %p\n", This, *pDevice);
105 return D3D_OK;
109 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
110 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
111 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
113 if (NULL == pData) {
114 *pSizeOfData = This->baseShader.functionLength;
115 return D3D_OK;
117 if (*pSizeOfData < This->baseShader.functionLength) {
118 *pSizeOfData = This->baseShader.functionLength;
119 return D3DERR_MOREDATA;
121 if (NULL == This->baseShader.function) { /* no function defined */
122 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
123 (*(DWORD **) pData) = NULL;
124 } else {
125 if (This->baseShader.functionLength == 0) {
128 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
129 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
131 return D3D_OK;
134 /*******************************
135 * pshader functions software VM
138 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
139 d->x = s0->x + s1->x;
140 d->y = s0->y + s1->y;
141 d->z = s0->z + s1->z;
142 d->w = s0->w + s1->w;
143 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
144 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
147 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
148 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
149 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
150 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
153 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
154 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
155 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
156 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
159 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
160 d->x = 1.0f;
161 d->y = s0->y * s1->y;
162 d->z = s0->z;
163 d->w = s1->w;
164 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
165 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
168 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
169 union {
170 float f;
171 DWORD d;
172 } tmp;
174 tmp.f = floorf(s0->w);
175 d->x = powf(2.0f, tmp.f);
176 d->y = s0->w - tmp.f;
177 tmp.f = powf(2.0f, s0->w);
178 tmp.d &= 0xFFFFFF00U;
179 d->z = tmp.f;
180 d->w = 1.0f;
181 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
182 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
185 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
186 d->x = 1.0f;
187 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
188 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
189 d->w = 1.0f;
190 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
191 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
194 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
195 float tmp_f = fabsf(s0->w);
196 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
197 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
198 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
201 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
202 d->x = s0->x * s1->x + s2->x;
203 d->y = s0->y * s1->y + s2->y;
204 d->z = s0->z * s1->z + s2->z;
205 d->w = s0->w * s1->w + s2->w;
206 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
207 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
210 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
211 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
212 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
213 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
214 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
215 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
216 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
219 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
220 d->x = (s0->x < s1->x) ? s0->x : s1->x;
221 d->y = (s0->y < s1->y) ? s0->y : s1->y;
222 d->z = (s0->z < s1->z) ? s0->z : s1->z;
223 d->w = (s0->w < s1->w) ? s0->w : s1->w;
224 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
225 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
228 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
229 d->x = s0->x;
230 d->y = s0->y;
231 d->z = s0->z;
232 d->w = s0->w;
233 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
234 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
237 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
238 d->x = s0->x * s1->x;
239 d->y = s0->y * s1->y;
240 d->z = s0->z * s1->z;
241 d->w = s0->w * s1->w;
242 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
246 void pshader_nop(void) {
247 /* NOPPPP ahhh too easy ;) */
248 PSTRACE(("executing nop\n"));
251 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
252 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
253 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
254 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
257 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
258 float tmp_f = fabsf(s0->w);
259 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
260 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
261 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
264 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
265 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
266 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
267 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
268 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
269 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
270 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
273 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
274 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
275 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
276 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
277 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
278 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
279 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
282 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
283 d->x = s0->x - s1->x;
284 d->y = s0->y - s1->y;
285 d->z = s0->z - s1->z;
286 d->w = s0->w - s1->w;
287 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
288 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
292 * Version 1.1 specific
295 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
296 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
297 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
298 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
301 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
302 float tmp_f = fabsf(s0->w);
303 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
304 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
305 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
308 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
309 d->x = s0->x - floorf(s0->x);
310 d->y = s0->y - floorf(s0->y);
311 d->z = 0.0f;
312 d->w = 1.0f;
313 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
314 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
317 typedef FLOAT D3DMATRIX44[4][4];
318 typedef FLOAT D3DMATRIX43[4][3];
319 typedef FLOAT D3DMATRIX34[3][4];
320 typedef FLOAT D3DMATRIX33[3][3];
321 typedef FLOAT D3DMATRIX23[2][3];
323 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
325 * Buggy CODE: here only if cast not work for copy/paste
326 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
327 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
328 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
329 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
330 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
331 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
332 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
334 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
335 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
336 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
337 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
338 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
339 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
340 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
341 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
344 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
345 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
346 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
347 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
348 d->w = 1.0f;
349 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
350 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
351 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
352 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
355 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
356 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
357 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
358 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
359 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
360 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
361 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
362 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
363 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
366 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
367 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
368 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
369 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
370 d->w = 1.0f;
371 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
372 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
373 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
374 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
377 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
378 FIXME("check\n");
379 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
380 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
381 d->z = 0.0f;
382 d->w = 1.0f;
386 * Version 2.0 specific
388 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
389 d->x = s0->x * (s1->x - s2->x) + s2->x;
390 d->y = s0->y * (s1->y - s2->y) + s2->y;
391 d->z = s0->z * (s1->z - s2->z) + s2->z;
392 d->w = s0->w * (s1->w - s2->w) + s2->w;
395 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
396 d->x = s0->y * s1->z - s0->z * s1->y;
397 d->y = s0->z * s1->x - s0->x * s1->z;
398 d->z = s0->x * s1->y - s0->y * s1->x;
399 d->w = 0.9f; /* w is undefined, so set it to something safeish */
401 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
402 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
405 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
406 d->x = fabsf(s0->x);
407 d->y = fabsf(s0->y);
408 d->z = fabsf(s0->z);
409 d->w = fabsf(s0->w);
410 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
411 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
414 /* Stubs */
415 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
416 FIXME(" : Stub\n");
419 void pshader_texkill(WINED3DSHADERVECTOR* d) {
420 FIXME(" : Stub\n");
423 void pshader_tex(WINED3DSHADERVECTOR* d) {
424 FIXME(" : Stub\n");
426 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
427 FIXME(" : Stub\n");
430 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
431 FIXME(" : Stub\n");
434 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
435 FIXME(" : Stub\n");
438 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
439 FIXME(" : Stub\n");
442 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
443 FIXME(" : Stub\n");
446 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
447 FIXME(" : Stub\n");
450 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
451 FIXME(" : Stub\n");
454 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
455 FIXME(" : Stub\n");
458 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
459 FIXME(" : Stub\n");
462 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
463 FIXME(" : Stub\n");
466 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
467 FIXME(" : Stub\n");
470 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
471 FIXME(" : Stub\n");
474 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
475 FIXME(" : Stub\n");
478 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
479 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
480 FIXME(" : Stub\n");
483 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
484 FIXME(" : Stub\n");
487 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
488 FIXME(" : Stub\n");
491 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
492 FIXME(" : Stub\n");
495 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
496 FIXME(" : Stub\n");
499 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
500 FIXME(" : Stub\n");
503 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
504 FIXME(" : Stub\n");
507 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
508 FIXME(" : Stub\n");
511 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
512 FIXME(" : Stub\n");
515 void pshader_call(WINED3DSHADERVECTOR* d) {
516 FIXME(" : Stub\n");
519 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
520 FIXME(" : Stub\n");
523 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
524 FIXME(" : Stub\n");
527 void pshader_ret(WINED3DSHADERVECTOR* d) {
528 FIXME(" : Stub\n");
531 void pshader_endloop(WINED3DSHADERVECTOR* d) {
532 FIXME(" : Stub\n");
535 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
536 FIXME(" : Stub\n");
539 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
540 FIXME(" : Stub\n");
543 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
544 FIXME(" : Stub\n");
547 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
548 FIXME(" : Stub\n");
551 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
552 FIXME(" : Stub\n");
555 void pshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
556 FIXME(" : Stub\n");
559 void pshader_endrep(void) {
560 FIXME(" : Stub\n");
563 void pshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
564 FIXME(" : Stub\n");
567 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
568 FIXME(" : Stub\n");
571 void pshader_else(WINED3DSHADERVECTOR* d) {
572 FIXME(" : Stub\n");
575 void pshader_label(WINED3DSHADERVECTOR* d) {
576 FIXME(" : Stub\n");
579 void pshader_endif(WINED3DSHADERVECTOR* d) {
580 FIXME(" : Stub\n");
583 void pshader_break(WINED3DSHADERVECTOR* d) {
584 FIXME(" : Stub\n");
587 void pshader_breakc(WINED3DSHADERVECTOR* d) {
588 FIXME(" : Stub\n");
591 void pshader_mova(WINED3DSHADERVECTOR* d) {
592 FIXME(" : Stub\n");
595 void pshader_defb(WINED3DSHADERVECTOR* d) {
596 FIXME(" : Stub\n");
599 void pshader_defi(WINED3DSHADERVECTOR* d) {
600 FIXME(" : Stub\n");
603 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
604 FIXME(" : Stub\n");
607 void pshader_dsx(WINED3DSHADERVECTOR* d) {
608 FIXME(" : Stub\n");
611 void pshader_dsy(WINED3DSHADERVECTOR* d) {
612 FIXME(" : Stub\n");
615 void pshader_texldd(WINED3DSHADERVECTOR* d) {
616 FIXME(" : Stub\n");
619 void pshader_setp(WINED3DSHADERVECTOR* d) {
620 FIXME(" : Stub\n");
623 void pshader_texldl(WINED3DSHADERVECTOR* d) {
624 FIXME(" : Stub\n");
627 void pshader_breakp(WINED3DSHADERVECTOR* d) {
628 FIXME(" : Stub\n");
631 * log, exp, frc, m*x* seems to be macros ins ... to see
633 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = {
634 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
635 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
636 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
637 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
638 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
639 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
640 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
641 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
642 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
643 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
644 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
645 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
646 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
647 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
648 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
649 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
650 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
651 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
652 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
653 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
654 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
655 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
656 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
657 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
658 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
659 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
662 /** FIXME: use direct access so add the others opcodes as stubs */
663 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
664 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
665 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
666 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
667 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
668 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
669 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
670 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
671 /* DCL is a specil operation */
672 {D3DSIO_DCL, "dcl", NULL, 1, pshader_dcl, 0, 0},
673 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
674 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
675 /* TODO: sng can possibly be performed as
676 RCP tmp, vec
677 MUL out, tmp, vec*/
678 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
679 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
680 DP3 tmp , vec, vec;
681 RSQ tmp, tmp.x;
682 MUL vec.xyz, vec, tmp;
683 but I think this is better because it accounts for w properly.
684 DP3 tmp , vec, vec;
685 RSQ tmp, tmp.x;
686 MUL vec, vec, tmp;
689 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
690 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
691 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, pshader_rep, 0, 0},
692 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
693 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, pshader_if, 0, 0},
694 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
695 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, pshader_else, 0, 0},
696 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, pshader_endif, 0, 0},
697 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, pshader_break, 0, 0},
698 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
699 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
700 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
701 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
703 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
704 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
705 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
706 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
707 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
708 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 3, pshader_texld, D3DPS_VERSION(2,0), -1},
709 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
710 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
711 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
712 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
715 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
716 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
717 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
718 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
721 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
722 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
723 /* def is a special operation */
724 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
725 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
726 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
729 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
730 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
731 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
732 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
733 /* TODO: dp2add can be made out of multiple instuctions */
734 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
735 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
736 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
737 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
738 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
739 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
740 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, pshader_breakp, 0, 0},
741 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
742 {0, NULL, NULL, 0, NULL, 0, 0}
746 inline static const SHADER_OPCODE* pshader_program_get_opcode(IWineD3DPixelShaderImpl *This, const DWORD code) {
747 DWORD i = 0;
748 DWORD version = This->baseShader.version;
749 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
750 const SHADER_OPCODE *shader_ins = This->baseShader.shader_ins;
752 /** TODO: use dichotomic search */
753 while (NULL != shader_ins[i].name) {
754 if (((code & D3DSI_OPCODE_MASK) == shader_ins[i].opcode) &&
755 (((hex_version >= shader_ins[i].min_version) && (hex_version <= shader_ins[i].max_version)) ||
756 ((shader_ins[i].min_version == 0) && (shader_ins[i].max_version == 0)))) {
757 return &shader_ins[i];
759 ++i;
761 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code, code, code & D3DSI_OPCODE_MASK, version);
762 return NULL;
765 inline static BOOL pshader_is_version_token(DWORD token) {
766 return 0xFFFF0000 == (token & 0xFFFF0000);
769 inline static BOOL pshader_is_comment_token(DWORD token) {
770 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
774 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
775 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
777 DWORD reg = param & REGMASK;
778 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
780 switch (regtype) {
781 case D3DSPR_TEMP:
782 sprintf(regstr, "R%lu", reg);
783 break;
784 case D3DSPR_INPUT:
785 if (reg==0) {
786 strcpy(regstr, "fragment.color.primary");
787 } else {
788 strcpy(regstr, "fragment.color.secondary");
790 break;
791 case D3DSPR_CONST:
792 if (constants[reg])
793 sprintf(regstr, "C%lu", reg);
794 else
795 sprintf(regstr, "program.env[%lu]", reg);
796 break;
797 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
798 sprintf(regstr,"T%lu", reg);
799 break;
800 case D3DSPR_RASTOUT:
801 sprintf(regstr, "%s", rastout_reg_names[reg]);
802 break;
803 case D3DSPR_ATTROUT:
804 sprintf(regstr, "oD[%lu]", reg);
805 break;
806 case D3DSPR_TEXCRDOUT:
807 sprintf(regstr, "oT[%lu]", reg);
808 break;
809 default:
810 FIXME("Unhandled register name Type(%ld)\n", regtype);
811 break;
815 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
816 *write_mask = 0;
817 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
818 strcat(write_mask, ".");
819 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
820 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
821 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
822 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
826 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
827 static const char swizzle_reg_chars[] = "rgba";
828 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
829 DWORD swizzle_x = swizzle & 0x03;
830 DWORD swizzle_y = (swizzle >> 2) & 0x03;
831 DWORD swizzle_z = (swizzle >> 4) & 0x03;
832 DWORD swizzle_w = (swizzle >> 6) & 0x03;
834 * swizzle bits fields:
835 * WWZZYYXX
837 *swzstring = 0;
838 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
839 if (swizzle_x == swizzle_y &&
840 swizzle_x == swizzle_z &&
841 swizzle_x == swizzle_w) {
842 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
843 } else {
844 sprintf(swzstring, ".%c%c%c%c",
845 swizzle_reg_chars[swizzle_x],
846 swizzle_reg_chars[swizzle_y],
847 swizzle_reg_chars[swizzle_z],
848 swizzle_reg_chars[swizzle_w]);
853 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
854 int lineLen = strlen(line);
855 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
856 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
857 return;
858 } else {
859 memcpy(pgm + *pgmLength, line, lineLen);
862 *pgmLength += lineLen;
863 ++(*lineNum);
864 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
867 static const char* shift_tab[] = {
868 "dummy", /* 0 (none) */
869 "coefmul.x", /* 1 (x2) */
870 "coefmul.y", /* 2 (x4) */
871 "coefmul.z", /* 3 (x8) */
872 "coefmul.w", /* 4 (x16) */
873 "dummy", /* 5 (x32) */
874 "dummy", /* 6 (x64) */
875 "dummy", /* 7 (x128) */
876 "dummy", /* 8 (d256) */
877 "dummy", /* 9 (d128) */
878 "dummy", /* 10 (d64) */
879 "dummy", /* 11 (d32) */
880 "coefdiv.w", /* 12 (d16) */
881 "coefdiv.z", /* 13 (d8) */
882 "coefdiv.y", /* 14 (d4) */
883 "coefdiv.x" /* 15 (d2) */
886 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
887 /* Generate a line that does the output modifier computation */
888 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
891 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
892 /* Generate a line that does the input modifier computation and return the input register to use */
893 static char regstr[256];
894 static char tmpline[256];
895 int insert_line;
897 /* Assume a new line will be added */
898 insert_line = 1;
900 /* Get register name */
901 get_register_name(instr, regstr, constants);
903 TRACE(" Register name %s\n", regstr);
904 switch (instr & D3DSP_SRCMOD_MASK) {
905 case D3DSPSM_NONE:
906 strcpy(outregstr, regstr);
907 insert_line = 0;
908 break;
909 case D3DSPSM_NEG:
910 sprintf(outregstr, "-%s", regstr);
911 insert_line = 0;
912 break;
913 case D3DSPSM_BIAS:
914 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
915 break;
916 case D3DSPSM_BIASNEG:
917 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
918 break;
919 case D3DSPSM_SIGN:
920 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
921 break;
922 case D3DSPSM_SIGNNEG:
923 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
924 break;
925 case D3DSPSM_COMP:
926 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
927 break;
928 case D3DSPSM_X2:
929 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
930 break;
931 case D3DSPSM_X2NEG:
932 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
933 break;
934 case D3DSPSM_DZ:
935 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
936 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
937 strcat(line, "\n"); /* Hack */
938 strcat(line, tmpline);
939 break;
940 case D3DSPSM_DW:
941 sprintf(line, "RCP T%c, %s;", 'A' + tmpreg, regstr);
942 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
943 strcat(line, "\n"); /* Hack */
944 strcat(line, tmpline);
945 break;
946 default:
947 strcpy(outregstr, regstr);
948 insert_line = 0;
951 if (insert_line) {
952 /* Substitute the register name */
953 sprintf(outregstr, "T%c", 'A' + tmpreg);
956 return insert_line;
958 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
959 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
960 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
961 const DWORD *pToken = pFunction;
962 const SHADER_OPCODE *curOpcode = NULL;
963 const DWORD *pInstr;
964 DWORD i;
965 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
966 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
967 char tmpLine[255];
968 DWORD nUseAddressRegister = 0;
969 #if 0 /* TODO: loop register (just another address register ) */
970 BOOL hasLoops = FALSE;
971 #endif
973 BOOL saturate; /* clamp to 0.0 -> 1.0*/
974 int row = 0; /* not sure, something to do with macros? */
975 DWORD tcw[2];
976 int version = 0; /* The version of the shader */
978 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
979 unsigned int pgmLength = 0;
981 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
982 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
983 if (This->device->fixupVertexBufferSize < PGMSIZE) {
984 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
985 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
986 This->fixupVertexBufferSize = PGMSIZE;
987 This->fixupVertexBuffer[0] = 0;
989 pgmStr = This->device->fixupVertexBuffer;
990 #else
991 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
992 #endif
995 /* TODO: Think about using a first pass to work out what's required for the second pass. */
996 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
997 This->constants[i] = 0;
999 if (NULL != pToken) {
1000 while (D3DPS_END() != *pToken) {
1001 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1002 if (version >= 2) {
1003 instructionSize = pToken & SIZEBITS >> 27;
1005 #endif
1006 if (pshader_is_version_token(*pToken)) { /** version */
1007 int numTemps;
1008 int numConstants;
1010 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1011 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1013 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1015 /* Each release of pixel shaders has had different numbers of temp registers */
1016 switch (version) {
1017 case 10:
1018 case 11:
1019 case 12:
1020 case 13:
1021 case 14: numTemps=12;
1022 numConstants=8;
1023 strcpy(tmpLine, "!!ARBfp1.0\n");
1024 break;
1025 case 20: numTemps=12;
1026 numConstants=8;
1027 strcpy(tmpLine, "!!ARBfp2.0\n");
1028 FIXME("No work done yet to support ps2.0 in hw\n");
1029 break;
1030 case 30: numTemps=32;
1031 numConstants=8;
1032 strcpy(tmpLine, "!!ARBfp3.0\n");
1033 FIXME("No work done yet to support ps3.0 in hw\n");
1034 break;
1035 default:
1036 numTemps=12;
1037 numConstants=8;
1038 strcpy(tmpLine, "!!ARBfp1.0\n");
1039 FIXME("Unrecognized pixel shader version!\n");
1041 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1043 /* TODO: find out how many registers are really needed */
1044 for(i = 0; i < 6; i++) {
1045 sprintf(tmpLine, "TEMP T%lu;\n", i);
1046 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1049 for(i = 0; i < 6; i++) {
1050 sprintf(tmpLine, "TEMP R%lu;\n", i);
1051 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1054 sprintf(tmpLine, "TEMP TMP;\n");
1055 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1056 sprintf(tmpLine, "TEMP TMP2;\n");
1057 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1058 sprintf(tmpLine, "TEMP TA;\n");
1059 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1060 sprintf(tmpLine, "TEMP TB;\n");
1061 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1062 sprintf(tmpLine, "TEMP TC;\n");
1063 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1065 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1066 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1067 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1068 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1069 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1070 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1072 for(i = 0; i < 4; i++) {
1073 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1074 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1077 ++pToken;
1078 continue;
1081 if (pshader_is_comment_token(*pToken)) { /** comment */
1082 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1083 ++pToken;
1084 FIXME("#%s\n", (char*)pToken);
1085 pToken += comment_len;
1086 continue;
1088 /* here */
1089 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1090 code = *pToken;
1091 #endif
1092 pInstr = pToken;
1093 curOpcode = pshader_program_get_opcode(This, *pToken);
1094 ++pToken;
1095 if (NULL == curOpcode) {
1096 /* unknown current opcode ... (shouldn't be any!) */
1097 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1098 FIXME("unrecognized opcode: %08lx\n", *pToken);
1099 ++pToken;
1101 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1102 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1103 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1104 pToken += curOpcode->num_params;
1105 } else {
1106 TRACE("Found opcode %s %s\n", curOpcode->name, curOpcode->glname);
1107 saturate = FALSE;
1109 /* Build opcode for GL vertex_program */
1110 switch (curOpcode->opcode) {
1111 case D3DSIO_NOP:
1112 case D3DSIO_PHASE:
1113 continue;
1114 case D3DSIO_MOV:
1115 /* Address registers must be loaded with the ARL instruction */
1116 if ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) == D3DSPR_ADDR) {
1117 if (((*pToken) & REGMASK) < nUseAddressRegister) {
1118 strcpy(tmpLine, "ARL");
1119 break;
1120 } else
1121 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This, ((*pToken) & REGMASK));
1123 /* fall through */
1124 case D3DSIO_CND:
1125 case D3DSIO_CMP:
1126 case D3DSIO_ADD:
1127 case D3DSIO_SUB:
1128 case D3DSIO_MAD:
1129 case D3DSIO_MUL:
1130 case D3DSIO_RCP:
1131 case D3DSIO_RSQ:
1132 case D3DSIO_DP3:
1133 case D3DSIO_DP4:
1134 case D3DSIO_MIN:
1135 case D3DSIO_MAX:
1136 case D3DSIO_SLT:
1137 case D3DSIO_SGE:
1138 case D3DSIO_LIT:
1139 case D3DSIO_DST:
1140 case D3DSIO_FRC:
1141 case D3DSIO_EXPP:
1142 case D3DSIO_LOGP:
1143 case D3DSIO_EXP:
1144 case D3DSIO_LOG:
1145 case D3DSIO_LRP:
1146 case D3DSIO_TEXKILL:
1147 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1148 strcpy(tmpLine, curOpcode->glname);
1149 break;
1150 case D3DSIO_DEF:
1152 DWORD reg = *pToken & REGMASK;
1153 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1154 *((const float *)(pToken + 1)),
1155 *((const float *)(pToken + 2)),
1156 *((const float *)(pToken + 3)),
1157 *((const float *)(pToken + 4)) );
1159 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1161 This->constants[reg] = 1;
1162 pToken += 5;
1163 continue;
1165 break;
1166 case D3DSIO_TEX:
1168 char tmp[20];
1169 get_write_mask(*pToken, tmp);
1170 if (version != 14) {
1171 DWORD reg = *pToken & REGMASK;
1172 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1173 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1174 ++pToken;
1175 } else {
1176 char reg[20];
1177 DWORD reg1 = *pToken & REGMASK;
1178 DWORD reg2 = *++pToken & REGMASK;
1179 if (gen_input_modifier_line(*pToken, 0, reg, tmpLine, This->constants)) {
1180 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1182 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg, reg2);
1183 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1184 ++pToken;
1186 continue;
1188 break;
1189 case D3DSIO_TEXCOORD:
1191 char tmp[20];
1192 get_write_mask(*pToken, tmp);
1193 if (version != 14) {
1194 DWORD reg = *pToken & REGMASK;
1195 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1196 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1197 ++pToken;
1198 } else {
1199 DWORD reg1 = *pToken & REGMASK;
1200 DWORD reg2 = *++pToken & REGMASK;
1201 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1202 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1203 ++pToken;
1205 continue;
1207 break;
1208 case D3DSIO_TEXM3x2PAD:
1210 DWORD reg = *pToken & REGMASK;
1211 char buf[50];
1212 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1213 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1215 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1216 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1217 ++pToken;
1218 continue;
1220 break;
1221 case D3DSIO_TEXM3x2TEX:
1223 DWORD reg = *pToken & REGMASK;
1224 char buf[50];
1225 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1226 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1228 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1229 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1230 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1231 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1232 ++pToken;
1233 continue;
1235 break;
1236 case D3DSIO_TEXREG2AR:
1238 DWORD reg1 = *pToken & REGMASK;
1239 DWORD reg2 = *++pToken & REGMASK;
1240 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1241 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1242 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1243 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1244 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1245 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1246 ++pToken;
1247 continue;
1249 break;
1250 case D3DSIO_TEXREG2GB:
1252 DWORD reg1 = *pToken & REGMASK;
1253 DWORD reg2 = *++pToken & REGMASK;
1254 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1255 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1256 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1257 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1258 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1259 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1260 ++pToken;
1261 continue;
1263 break;
1264 case D3DSIO_TEXBEM:
1266 DWORD reg1 = *pToken & REGMASK;
1267 DWORD reg2 = *++pToken & REGMASK;
1269 /* FIXME: Should apply the BUMPMAPENV matrix */
1270 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1271 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1272 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1273 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1274 ++pToken;
1275 continue;
1277 break;
1278 case D3DSIO_TEXM3x3PAD:
1280 DWORD reg = *pToken & REGMASK;
1281 char buf[50];
1282 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1283 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1285 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1286 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1287 tcw[row++] = reg;
1288 ++pToken;
1289 continue;
1291 break;
1292 case D3DSIO_TEXM3x3TEX:
1294 DWORD reg = *pToken & REGMASK;
1295 char buf[50];
1296 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1297 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1300 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1301 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1303 /* Cubemap textures will be more used than 3D ones. */
1304 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1305 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1306 row = 0;
1307 ++pToken;
1308 continue;
1310 case D3DSIO_TEXM3x3VSPEC:
1312 DWORD reg = *pToken & REGMASK;
1313 char buf[50];
1314 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1315 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1317 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1318 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1320 /* Construct the eye-ray vector from w coordinates */
1321 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1322 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1323 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1324 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1325 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1326 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1328 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1329 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1330 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1331 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1332 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1333 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1334 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1336 /* Cubemap textures will be more used than 3D ones. */
1337 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1338 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1339 row = 0;
1340 ++pToken;
1341 continue;
1343 break;
1344 case D3DSIO_TEXM3x3SPEC:
1346 DWORD reg = *pToken & REGMASK;
1347 DWORD reg3 = *(pToken + 2) & REGMASK;
1348 char buf[50];
1349 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1350 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1352 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1353 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1355 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1356 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1357 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1359 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1360 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1361 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1362 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1364 /* Cubemap textures will be more used than 3D ones. */
1365 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1366 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1367 row = 0;
1368 pToken += 3;
1369 continue;
1371 break;
1373 default:
1374 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1375 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1376 } else {
1377 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1379 pToken += curOpcode->num_params; /* maybe + 1 */
1380 continue;
1383 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1384 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1385 switch (mask) {
1386 case D3DSPDM_SATURATE: saturate = TRUE; break;
1387 #if 0 /* as yet unhandled modifiers */
1388 case D3DSPDM_CENTROID: centroid = TRUE; break;
1389 case D3DSPDM_PP: partialpresision = TRUE; break;
1390 case D3DSPDM_X2: X2 = TRUE; break;
1391 case D3DSPDM_X4: X4 = TRUE; break;
1392 case D3DSPDM_X8: X8 = TRUE; break;
1393 case D3DSPDM_D2: D2 = TRUE; break;
1394 case D3DSPDM_D4: D4 = TRUE; break;
1395 case D3DSPDM_D8: D8 = TRUE; break;
1396 #endif
1397 default:
1398 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1402 /* Generate input and output registers */
1403 if (curOpcode->num_params > 0) {
1404 char regs[5][50];
1405 char operands[4][100];
1406 char swzstring[20];
1407 int saturate = 0;
1408 char tmpOp[256];
1409 TRACE("(%p): Opcode has %d params\n", This, curOpcode->num_params);
1411 /* Generate lines that handle input modifier computation */
1412 for (i = 1; i < curOpcode->num_params; ++i) {
1413 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1414 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpOp, This->constants)) {
1415 addline(&lineNum, pgmStr, &pgmLength, tmpOp);
1419 /* Handle saturation only when no shift is present in the output modifier */
1420 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1421 saturate = 1;
1423 /* Handle output register */
1424 get_register_name(*pToken, tmpOp, This->constants);
1425 strcpy(operands[0], tmpOp);
1426 get_write_mask(*pToken, tmpOp);
1427 strcat(operands[0], tmpOp);
1429 /* This function works because of side effects from gen_input_modifier_line */
1430 /* Handle input registers */
1431 for (i = 1; i < curOpcode->num_params; ++i) {
1432 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1433 strcpy(operands[i], regs[i - 1]);
1434 get_input_register_swizzle(*(pToken + i), swzstring);
1435 strcat(operands[i], swzstring);
1438 switch(curOpcode->opcode) {
1439 case D3DSIO_CMP:
1440 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1441 break;
1442 case D3DSIO_CND:
1443 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1444 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1445 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1446 break;
1447 default:
1448 if (saturate)
1449 strcat(tmpLine, "_SAT");
1450 strcat(tmpLine, " ");
1451 strcat(tmpLine, operands[0]);
1452 for (i = 1; i < curOpcode->num_params; i++) {
1453 strcat(tmpLine, ", ");
1454 strcat(tmpLine, operands[i]);
1456 strcat(tmpLine,";\n");
1458 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1459 pToken += curOpcode->num_params;
1461 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1462 if (curOpcode->num_params > 0) {
1463 DWORD param = *(pInstr + 1);
1464 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1466 /* Generate a line that handle the output modifier computation */
1467 char regstr[100];
1468 char write_mask[20];
1469 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1470 get_register_name(param, regstr, This->constants);
1471 get_write_mask(param, write_mask);
1472 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1473 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1476 #endif
1479 /* TODO: What about result.depth? */
1480 strcpy(tmpLine, "MOV result.color, R0;\n");
1481 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1483 strcpy(tmpLine, "END\n");
1484 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1487 /* finally null terminate the pgmStr*/
1488 pgmStr[pgmLength] = 0;
1489 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1490 /* Create the hw shader */
1492 /* pgmStr sometimes gets too long for a normal TRACE */
1493 TRACE("Generated program:\n");
1494 if (TRACE_ON(d3d_shader)) {
1495 fprintf(stderr, "%s\n", pgmStr);
1498 /* TODO: change to resource.glObjectHandel or something like that */
1499 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
1501 TRACE("Creating a hw pixel shader, prg=%d\n", This->baseShader.prgId);
1502 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->baseShader.prgId));
1504 TRACE("Created hw pixel shader, prg=%d\n", This->baseShader.prgId);
1505 /* Create the program and check for errors */
1506 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1507 if (glGetError() == GL_INVALID_OPERATION) {
1508 GLint errPos;
1509 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1510 FIXME("HW PixelShader Error at position %d: %s\n",
1511 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1512 This->baseShader.prgId = -1;
1515 #if 1 /* if were using the data buffer of device then we don't need to free it */
1516 HeapFree(GetProcessHeap(), 0, pgmStr);
1517 #endif
1520 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1521 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1522 static const char swizzle_reg_chars[] = "rgba";
1524 /* the unknown mask is for bits not yet accounted for by any other mask... */
1525 #define UNKNOWN_MASK 0xC000
1527 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1528 #define EXTENDED_REG 0x1800
1530 DWORD reg = param & D3DSP_REGNUM_MASK;
1531 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1533 if (input) {
1534 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1535 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1536 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1537 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1538 TRACE("-");
1539 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1540 TRACE("1-");
1543 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1544 case D3DSPR_TEMP:
1545 TRACE("r%lu", reg);
1546 break;
1547 case D3DSPR_INPUT:
1548 TRACE("v%lu", reg);
1549 break;
1550 case D3DSPR_CONST:
1551 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1552 break;
1554 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1555 TRACE("t%lu", reg);
1556 break;
1557 case D3DSPR_RASTOUT:
1558 TRACE("%s", rastout_reg_names[reg]);
1559 break;
1560 case D3DSPR_ATTROUT:
1561 TRACE("oD%lu", reg);
1562 break;
1563 case D3DSPR_TEXCRDOUT:
1564 TRACE("oT%lu", reg);
1565 break;
1566 case D3DSPR_CONSTINT:
1567 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1568 break;
1569 case D3DSPR_CONSTBOOL:
1570 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1571 break;
1572 case D3DSPR_LABEL:
1573 TRACE("l%lu", reg);
1574 break;
1575 case D3DSPR_LOOP:
1576 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1577 break;
1578 default:
1579 break;
1582 if (!input) {
1583 /** operand output */
1585 * for better debugging traces it's done into opcode dump code
1586 * @see pshader_program_dump_opcode
1587 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1588 DWORD mask = param & D3DSP_DSTMOD_MASK;
1589 switch (mask) {
1590 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1591 default:
1592 TRACE("_unhandled_modifier(0x%08lx)", mask);
1595 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1596 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1597 if (shift > 0) {
1598 TRACE("_x%u", 1 << shift);
1602 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1603 TRACE(".");
1604 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1605 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1606 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1607 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1609 } else {
1610 /** operand input */
1611 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1612 DWORD swizzle_r = swizzle & 0x03;
1613 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1614 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1615 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1617 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1618 DWORD mask = param & D3DSP_SRCMOD_MASK;
1619 /*TRACE("_modifier(0x%08lx) ", mask);*/
1620 switch (mask) {
1621 case D3DSPSM_NONE: break;
1622 case D3DSPSM_NEG: break;
1623 case D3DSPSM_BIAS: TRACE("_bias"); break;
1624 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1625 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1626 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1627 case D3DSPSM_COMP: break;
1628 case D3DSPSM_X2: TRACE("_x2"); break;
1629 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1630 case D3DSPSM_DZ: TRACE("_dz"); break;
1631 case D3DSPSM_DW: TRACE("_dw"); break;
1632 default:
1633 TRACE("_unknown(0x%08lx)", mask);
1638 * swizzle bits fields:
1639 * RRGGBBAA
1641 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1642 if (swizzle_r == swizzle_g &&
1643 swizzle_r == swizzle_b &&
1644 swizzle_r == swizzle_a) {
1645 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1646 } else {
1647 TRACE(".%c%c%c%c",
1648 swizzle_reg_chars[swizzle_r],
1649 swizzle_reg_chars[swizzle_g],
1650 swizzle_reg_chars[swizzle_b],
1651 swizzle_reg_chars[swizzle_a]);
1657 inline static void pshader_program_dump_decl_usage(IWineD3DPixelShaderImpl *This, DWORD token) {
1658 TRACE("dcl_");
1659 switch(token & 0xFFFF) {
1660 case D3DDECLUSAGE_POSITION:
1661 TRACE("%s%ld ", "position",(token & 0xF0000) >> 16);
1662 break;
1663 case D3DDECLUSAGE_BLENDINDICES:
1664 TRACE("%s ", "blend");
1665 break;
1666 case D3DDECLUSAGE_BLENDWEIGHT:
1667 TRACE("%s ", "weight");
1668 break;
1669 case D3DDECLUSAGE_NORMAL:
1670 TRACE("%s%ld ", "normal",(token & 0xF0000) >> 16);
1671 break;
1672 case D3DDECLUSAGE_PSIZE:
1673 TRACE("%s ", "psize");
1674 break;
1675 case D3DDECLUSAGE_COLOR:
1676 if((token & 0xF0000) >> 16 == 0) {
1677 TRACE("%s ", "color");
1678 } else {
1679 TRACE("%s%ld ", "specular", ((token & 0xF0000) >> 16) - 1);
1681 break;
1682 case D3DDECLUSAGE_TEXCOORD:
1683 TRACE("%s%ld ", "texture", (token & 0xF0000) >> 16);
1684 break;
1685 case D3DDECLUSAGE_TANGENT:
1686 TRACE("%s ", "tangent");
1687 break;
1688 case D3DDECLUSAGE_BINORMAL:
1689 TRACE("%s ", "binormal");
1690 break;
1691 case D3DDECLUSAGE_TESSFACTOR:
1692 TRACE("%s ", "tessfactor");
1693 break;
1694 case D3DDECLUSAGE_POSITIONT:
1695 TRACE("%s%ld ", "positionT",(token & 0xF0000) >> 16);
1696 break;
1697 case D3DDECLUSAGE_FOG:
1698 TRACE("%s ", "fog");
1699 break;
1700 case D3DDECLUSAGE_DEPTH:
1701 TRACE("%s ", "depth");
1702 break;
1703 case D3DDECLUSAGE_SAMPLE:
1704 TRACE("%s ", "sample");
1705 break;
1706 default:
1707 FIXME("Unrecognised dcl %08lx", token & 0xFFFF);
1711 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1712 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1713 const DWORD* pToken = pFunction;
1714 const SHADER_OPCODE *curOpcode = NULL;
1715 DWORD len = 0;
1716 DWORD i;
1717 TRACE("(%p) : Parsing programme\n", This);
1719 if (NULL != pToken) {
1720 while (D3DPS_END() != *pToken) {
1721 if (pshader_is_version_token(*pToken)) { /** version */
1722 This->baseShader.version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1723 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1724 ++pToken;
1725 ++len;
1726 continue;
1728 if (pshader_is_comment_token(*pToken)) { /** comment */
1729 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1730 ++pToken;
1731 TRACE("//%s\n", (char*)pToken);
1732 pToken += comment_len;
1733 len += comment_len + 1;
1734 continue;
1736 if (!This->baseShader.version) {
1737 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1739 curOpcode = pshader_program_get_opcode(This, *pToken);
1740 ++pToken;
1741 ++len;
1742 if (NULL == curOpcode) {
1744 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1745 while (*pToken & 0x80000000) {
1747 /* unknown current opcode ... */
1748 TRACE("unrecognized opcode: %08lx", *pToken);
1749 ++pToken;
1750 ++len;
1751 TRACE("\n");
1754 } else {
1755 if (curOpcode->opcode == D3DSIO_DCL) {
1756 pshader_program_dump_decl_usage(This, *pToken);
1757 ++pToken;
1758 ++len;
1759 pshader_program_dump_ps_param(*pToken, 0);
1760 ++pToken;
1761 ++len;
1762 } else
1763 if (curOpcode->opcode == D3DSIO_DEF) {
1764 TRACE("def c%lu = ", *pToken & 0xFF);
1765 ++pToken;
1766 ++len;
1767 TRACE("%f ,", *(float *)pToken);
1768 ++pToken;
1769 ++len;
1770 TRACE("%f ,", *(float *)pToken);
1771 ++pToken;
1772 ++len;
1773 TRACE("%f ,", *(float *)pToken);
1774 ++pToken;
1775 ++len;
1776 TRACE("%f", *(float *)pToken);
1777 ++pToken;
1778 ++len;
1779 } else {
1780 TRACE("%s ", curOpcode->name);
1781 if (curOpcode->num_params > 0) {
1782 pshader_program_dump_ps_param(*pToken, 0);
1783 ++pToken;
1784 ++len;
1785 for (i = 1; i < curOpcode->num_params; ++i) {
1786 TRACE(", ");
1787 pshader_program_dump_ps_param(*pToken, 1);
1788 ++pToken;
1789 ++len;
1793 TRACE("\n");
1796 This->baseShader.functionLength = (len + 1) * sizeof(DWORD);
1797 } else {
1798 This->baseShader.functionLength = 1; /* no Function defined use fixed function vertex processing */
1801 /* Generate HW shader in needed */
1802 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1803 TRACE("(%p) : Generating hardware program\n", This);
1804 #if 1
1805 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1806 #endif
1809 TRACE("(%p) : Copying the function\n", This);
1810 /* copy the function ... because it will certainly be released by application */
1811 if (NULL != pFunction) {
1812 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1813 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1814 } else {
1815 This->baseShader.function = NULL;
1818 /* TODO: Some proper return values for failures */
1819 TRACE("(%p) : Returning D3D_OK\n", This);
1820 return D3D_OK;
1823 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1825 /*** IUnknown methods ***/
1826 IWineD3DPixelShaderImpl_QueryInterface,
1827 IWineD3DPixelShaderImpl_AddRef,
1828 IWineD3DPixelShaderImpl_Release,
1829 /*** IWineD3DBase methods ***/
1830 IWineD3DPixelShaderImpl_GetParent,
1831 /*** IWineD3DBaseShader methods ***/
1832 IWineD3DPixelShaderImpl_SetFunction,
1833 /*** IWineD3DPixelShader methods ***/
1834 IWineD3DPixelShaderImpl_GetDevice,
1835 IWineD3DPixelShaderImpl_GetFunction