wined3d: Don't override variable that still has something we need.
[wine/dcerpc.git] / dlls / wined3d / pixelshader.c
blob1b24f3de3e4052a986b0a81c3b174570b0dbcc62
1 /*
2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "config.h"
23 #include <math.h>
24 #include <stdio.h>
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
35 #else
36 # define PSTRACE(A)
37 # define TRACE_VSVECTOR(name)
38 #endif
40 /* The maximum size of the program */
41 #define PGMSIZE 65535
43 #define REGMASK 0x00001FFF
44 typedef void (*shader_fct_t)();
46 typedef struct SHADER_OPCODE {
47 unsigned int opcode;
48 const char* name;
49 const char* glname;
50 CONST UINT num_params;
51 shader_fct_t soft_fct;
52 DWORD min_version;
53 DWORD max_version;
54 } SHADER_OPCODE;
56 #define GLNAME_REQUIRE_GLSL ((const char *)1)
57 /* *******************************************
58 IWineD3DPixelShader IUnknown parts follow
59 ******************************************* */
60 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
62 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
63 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
64 if (IsEqualGUID(riid, &IID_IUnknown)
65 || IsEqualGUID(riid, &IID_IWineD3DBase)
66 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
67 IUnknown_AddRef(iface);
68 *ppobj = This;
69 return D3D_OK;
71 return E_NOINTERFACE;
74 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
75 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
76 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
77 return InterlockedIncrement(&This->ref);
80 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
81 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
82 ULONG ref;
83 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
84 ref = InterlockedDecrement(&This->ref);
85 if (ref == 0) {
86 HeapFree(GetProcessHeap(), 0, This);
88 return ref;
91 /* TODO: At the momeny the function parser is single pass, it achievs this
92 by passing constants to a couple of functions where they are then modified.
93 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
94 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
97 /* *******************************************
98 IWineD3DPixelShader IWineD3DPixelShader parts follow
99 ******************************************* */
101 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
102 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
104 *parent = This->parent;
105 IUnknown_AddRef(*parent);
106 TRACE("(%p) : returning %p\n", This, *parent);
107 return D3D_OK;
110 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
111 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
112 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
113 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
114 TRACE("(%p) returning %p\n", This, *pDevice);
115 return D3D_OK;
119 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
120 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
121 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
123 if (NULL == pData) {
124 *pSizeOfData = This->functionLength;
125 return D3D_OK;
127 if (*pSizeOfData < This->functionLength) {
128 *pSizeOfData = This->functionLength;
129 return D3DERR_MOREDATA;
131 if (NULL == This->function) { /* no function defined */
132 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
133 (*(DWORD **) pData) = NULL;
134 } else {
135 if (This->functionLength == 0) {
138 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
139 memcpy(pData, This->function, This->functionLength);
141 return D3D_OK;
144 /*******************************
145 * pshader functions software VM
148 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
149 d->x = s0->x + s1->x;
150 d->y = s0->y + s1->y;
151 d->z = s0->z + s1->z;
152 d->w = s0->w + s1->w;
153 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
157 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
158 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
159 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
160 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
163 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
164 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
165 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
166 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
169 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
170 d->x = 1.0f;
171 d->y = s0->y * s1->y;
172 d->z = s0->z;
173 d->w = s1->w;
174 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
175 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
178 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
179 union {
180 float f;
181 DWORD d;
182 } tmp;
184 tmp.f = floorf(s0->w);
185 d->x = powf(2.0f, tmp.f);
186 d->y = s0->w - tmp.f;
187 tmp.f = powf(2.0f, s0->w);
188 tmp.d &= 0xFFFFFF00U;
189 d->z = tmp.f;
190 d->w = 1.0f;
191 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
192 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
195 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
196 d->x = 1.0f;
197 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
198 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
199 d->w = 1.0f;
200 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
201 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
204 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
205 float tmp_f = fabsf(s0->w);
206 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
207 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
208 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
211 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
212 d->x = s0->x * s1->x + s2->x;
213 d->y = s0->y * s1->y + s2->y;
214 d->z = s0->z * s1->z + s2->z;
215 d->w = s0->w * s1->w + s2->w;
216 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
217 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
220 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
221 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
222 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
223 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
224 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
225 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
226 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
229 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
230 d->x = (s0->x < s1->x) ? s0->x : s1->x;
231 d->y = (s0->y < s1->y) ? s0->y : s1->y;
232 d->z = (s0->z < s1->z) ? s0->z : s1->z;
233 d->w = (s0->w < s1->w) ? s0->w : s1->w;
234 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
235 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
238 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
239 d->x = s0->x;
240 d->y = s0->y;
241 d->z = s0->z;
242 d->w = s0->w;
243 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
244 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
247 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
248 d->x = s0->x * s1->x;
249 d->y = s0->y * s1->y;
250 d->z = s0->z * s1->z;
251 d->w = s0->w * s1->w;
252 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
253 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
256 void pshader_nop(void) {
257 /* NOPPPP ahhh too easy ;) */
258 PSTRACE(("executing nop\n"));
261 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
262 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
263 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
264 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
267 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
268 float tmp_f = fabsf(s0->w);
269 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
270 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
271 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
274 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
275 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
276 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
277 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
278 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
279 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
280 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
283 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
284 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
285 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
286 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
287 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
288 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
289 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
292 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
293 d->x = s0->x - s1->x;
294 d->y = s0->y - s1->y;
295 d->z = s0->z - s1->z;
296 d->w = s0->w - s1->w;
297 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
298 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
302 * Version 1.1 specific
305 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
306 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
307 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
308 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
311 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
312 float tmp_f = fabsf(s0->w);
313 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
314 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
315 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
318 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
319 d->x = s0->x - floorf(s0->x);
320 d->y = s0->y - floorf(s0->y);
321 d->z = 0.0f;
322 d->w = 1.0f;
323 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
324 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
327 typedef FLOAT D3DMATRIX44[4][4];
328 typedef FLOAT D3DMATRIX43[4][3];
329 typedef FLOAT D3DMATRIX34[3][4];
330 typedef FLOAT D3DMATRIX33[3][3];
331 typedef FLOAT D3DMATRIX23[2][3];
333 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
335 * Buggy CODE: here only if cast not work for copy/paste
336 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
337 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
338 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
339 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
340 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
341 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
342 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
344 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
345 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
346 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
347 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
348 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
349 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
350 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
351 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
354 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
355 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
356 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
357 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
358 d->w = 1.0f;
359 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
360 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
361 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
362 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
365 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
366 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
367 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
368 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
369 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
370 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
371 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
372 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
373 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
376 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
377 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
378 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
379 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
380 d->w = 1.0f;
381 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
382 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
383 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
384 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
387 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
388 FIXME("check\n");
389 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
390 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
391 d->z = 0.0f;
392 d->w = 1.0f;
396 * Version 2.0 specific
398 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
399 d->x = s0->x * (s1->x - s2->x) + s2->x;
400 d->y = s0->y * (s1->y - s2->y) + s2->y;
401 d->z = s0->z * (s1->z - s2->z) + s2->z;
402 d->w = s0->w * (s1->w - s2->w) + s2->w;
405 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
406 d->x = s0->y * s1->z - s0->z * s1->y;
407 d->y = s0->z * s1->x - s0->x * s1->z;
408 d->z = s0->x * s1->y - s0->y * s1->x;
409 d->w = 0.9f; /* w is undefined, so set it to something safeish */
411 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
412 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
415 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
416 d->x = fabsf(s0->x);
417 d->y = fabsf(s0->y);
418 d->z = fabsf(s0->z);
419 d->w = fabsf(s0->w);
420 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
421 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
424 /* Stubs */
425 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
426 FIXME(" : Stub\n");
429 void pshader_texkill(WINED3DSHADERVECTOR* d) {
430 FIXME(" : Stub\n");
433 void pshader_tex(WINED3DSHADERVECTOR* d) {
434 FIXME(" : Stub\n");
436 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
437 FIXME(" : Stub\n");
440 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
441 FIXME(" : Stub\n");
444 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
445 FIXME(" : Stub\n");
448 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
449 FIXME(" : Stub\n");
452 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
453 FIXME(" : Stub\n");
456 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
457 FIXME(" : Stub\n");
460 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
461 FIXME(" : Stub\n");
464 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
465 FIXME(" : Stub\n");
468 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
469 FIXME(" : Stub\n");
472 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
473 FIXME(" : Stub\n");
476 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
477 FIXME(" : Stub\n");
480 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
481 FIXME(" : Stub\n");
484 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
485 FIXME(" : Stub\n");
488 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
489 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
490 FIXME(" : Stub\n");
493 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
494 FIXME(" : Stub\n");
497 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
498 FIXME(" : Stub\n");
501 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
502 FIXME(" : Stub\n");
505 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
506 FIXME(" : Stub\n");
509 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
510 FIXME(" : Stub\n");
513 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
514 FIXME(" : Stub\n");
517 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
518 FIXME(" : Stub\n");
521 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
522 FIXME(" : Stub\n");
525 void pshader_call(WINED3DSHADERVECTOR* d) {
526 FIXME(" : Stub\n");
529 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
530 FIXME(" : Stub\n");
533 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
534 FIXME(" : Stub\n");
537 void pshader_ret(WINED3DSHADERVECTOR* d) {
538 FIXME(" : Stub\n");
541 void pshader_endloop(WINED3DSHADERVECTOR* d) {
542 FIXME(" : Stub\n");
545 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
546 FIXME(" : Stub\n");
549 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
550 FIXME(" : Stub\n");
553 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
554 FIXME(" : Stub\n");
557 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
558 FIXME(" : Stub\n");
561 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
562 FIXME(" : Stub\n");
565 void pshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
566 FIXME(" : Stub\n");
569 void pshader_endrep(void) {
570 FIXME(" : Stub\n");
573 void pshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
574 FIXME(" : Stub\n");
577 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
578 FIXME(" : Stub\n");
581 void pshader_else(WINED3DSHADERVECTOR* d) {
582 FIXME(" : Stub\n");
585 void pshader_label(WINED3DSHADERVECTOR* d) {
586 FIXME(" : Stub\n");
589 void pshader_endif(WINED3DSHADERVECTOR* d) {
590 FIXME(" : Stub\n");
593 void pshader_break(WINED3DSHADERVECTOR* d) {
594 FIXME(" : Stub\n");
597 void pshader_breakc(WINED3DSHADERVECTOR* d) {
598 FIXME(" : Stub\n");
601 void pshader_mova(WINED3DSHADERVECTOR* d) {
602 FIXME(" : Stub\n");
605 void pshader_defb(WINED3DSHADERVECTOR* d) {
606 FIXME(" : Stub\n");
609 void pshader_defi(WINED3DSHADERVECTOR* d) {
610 FIXME(" : Stub\n");
613 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
614 FIXME(" : Stub\n");
617 void pshader_dsx(WINED3DSHADERVECTOR* d) {
618 FIXME(" : Stub\n");
621 void pshader_dsy(WINED3DSHADERVECTOR* d) {
622 FIXME(" : Stub\n");
625 void pshader_texldd(WINED3DSHADERVECTOR* d) {
626 FIXME(" : Stub\n");
629 void pshader_setp(WINED3DSHADERVECTOR* d) {
630 FIXME(" : Stub\n");
633 void pshader_texldl(WINED3DSHADERVECTOR* d) {
634 FIXME(" : Stub\n");
637 void pshader_breakp(WINED3DSHADERVECTOR* d) {
638 FIXME(" : Stub\n");
641 * log, exp, frc, m*x* seems to be macros ins ... to see
643 static CONST SHADER_OPCODE pshader_ins [] = {
644 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
645 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
646 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
647 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
648 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
649 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
650 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
651 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
652 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
653 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
654 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
655 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
656 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
657 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
658 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
659 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
660 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
661 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
662 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
663 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
664 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
665 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
666 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
667 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
668 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
669 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
672 /** FIXME: use direct access so add the others opcodes as stubs */
673 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
674 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
675 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
676 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
677 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
678 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
679 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
680 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
681 /* DCL is a specil operation */
682 {D3DSIO_DCL, "dcl", NULL, 1, pshader_dcl, 0, 0},
683 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
684 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
685 /* TODO: sng can possibly be performed as
686 RCP tmp, vec
687 MUL out, tmp, vec*/
688 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
689 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
690 DP3 tmp , vec, vec;
691 RSQ tmp, tmp.x;
692 MUL vec.xyz, vec, tmp;
693 but I think this is better because it accounts for w properly.
694 DP3 tmp , vec, vec;
695 RSQ tmp, tmp.x;
696 MUL vec, vec, tmp;
699 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
700 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
701 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, pshader_rep, 0, 0},
702 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
703 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, pshader_if, 0, 0},
704 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
705 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, pshader_else, 0, 0},
706 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, pshader_endif, 0, 0},
707 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, pshader_break, 0, 0},
708 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
709 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
710 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
711 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
713 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
715 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
716 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
717 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
718 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
721 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
722 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
723 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
724 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
725 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
726 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
729 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
730 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
731 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
732 /* def is a special operation */
733 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
734 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
735 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
736 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
737 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
738 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
739 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
740 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
741 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
742 /* TODO: dp2add can be made out of multiple instuctions */
743 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
744 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
745 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
746 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
747 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
748 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
749 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, pshader_breakp, 0, 0},
750 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
751 {0, NULL, NULL, 0, NULL, 0, 0}
755 inline static const SHADER_OPCODE* pshader_program_get_opcode(const DWORD code, const int version) {
756 DWORD i = 0;
757 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
758 /** TODO: use dichotomic search */
759 while (NULL != pshader_ins[i].name) {
760 if (((code & D3DSI_OPCODE_MASK) == pshader_ins[i].opcode) &&
761 (((hex_version >= pshader_ins[i].min_version) && (hex_version <= pshader_ins[i].max_version)) ||
762 ((pshader_ins[i].min_version == 0) && (pshader_ins[i].max_version == 0)))) {
763 return &pshader_ins[i];
765 ++i;
767 FIXME("Unsupported opcode %lx(%ld) masked %lx version %d\n", code, code, code & D3DSI_OPCODE_MASK, version);
768 return NULL;
771 inline static BOOL pshader_is_version_token(DWORD token) {
772 return 0xFFFF0000 == (token & 0xFFFF0000);
775 inline static BOOL pshader_is_comment_token(DWORD token) {
776 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
780 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
781 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
783 DWORD reg = param & REGMASK;
784 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
786 switch (regtype) {
787 case D3DSPR_TEMP:
788 sprintf(regstr, "R%lu", reg);
789 break;
790 case D3DSPR_INPUT:
791 if (reg==0) {
792 strcpy(regstr, "fragment.color.primary");
793 } else {
794 strcpy(regstr, "fragment.color.secondary");
796 break;
797 case D3DSPR_CONST:
798 if (constants[reg])
799 sprintf(regstr, "C%lu", reg);
800 else
801 sprintf(regstr, "program.env[%lu]", reg);
802 break;
803 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
804 sprintf(regstr,"T%lu", reg);
805 break;
806 case D3DSPR_RASTOUT:
807 sprintf(regstr, "%s", rastout_reg_names[reg]);
808 break;
809 case D3DSPR_ATTROUT:
810 sprintf(regstr, "oD[%lu]", reg);
811 break;
812 case D3DSPR_TEXCRDOUT:
813 sprintf(regstr, "oT[%lu]", reg);
814 break;
815 default:
816 FIXME("Unhandled register name Type(%ld)\n", regtype);
817 break;
821 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
822 *write_mask = 0;
823 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
824 strcat(write_mask, ".");
825 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
826 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
827 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
828 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
832 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
833 static const char swizzle_reg_chars[] = "rgba";
834 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
835 DWORD swizzle_x = swizzle & 0x03;
836 DWORD swizzle_y = (swizzle >> 2) & 0x03;
837 DWORD swizzle_z = (swizzle >> 4) & 0x03;
838 DWORD swizzle_w = (swizzle >> 6) & 0x03;
840 * swizzle bits fields:
841 * WWZZYYXX
843 *swzstring = 0;
844 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
845 if (swizzle_x == swizzle_y &&
846 swizzle_x == swizzle_z &&
847 swizzle_x == swizzle_w) {
848 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
849 } else {
850 sprintf(swzstring, ".%c%c%c%c",
851 swizzle_reg_chars[swizzle_x],
852 swizzle_reg_chars[swizzle_y],
853 swizzle_reg_chars[swizzle_z],
854 swizzle_reg_chars[swizzle_w]);
859 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
860 int lineLen = strlen(line);
861 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
862 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
863 return;
864 } else {
865 memcpy(pgm + *pgmLength, line, lineLen);
868 *pgmLength += lineLen;
869 ++(*lineNum);
870 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
873 static const char* shift_tab[] = {
874 "dummy", /* 0 (none) */
875 "coefmul.x", /* 1 (x2) */
876 "coefmul.y", /* 2 (x4) */
877 "coefmul.z", /* 3 (x8) */
878 "coefmul.w", /* 4 (x16) */
879 "dummy", /* 5 (x32) */
880 "dummy", /* 6 (x64) */
881 "dummy", /* 7 (x128) */
882 "dummy", /* 8 (d256) */
883 "dummy", /* 9 (d128) */
884 "dummy", /* 10 (d64) */
885 "dummy", /* 11 (d32) */
886 "coefdiv.w", /* 12 (d16) */
887 "coefdiv.z", /* 13 (d8) */
888 "coefdiv.y", /* 14 (d4) */
889 "coefdiv.x" /* 15 (d2) */
892 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
893 /* Generate a line that does the output modifier computation */
894 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
897 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
898 /* Generate a line that does the input modifier computation and return the input register to use */
899 static char regstr[256];
900 static char tmpline[256];
901 int insert_line;
903 /* Assume a new line will be added */
904 insert_line = 1;
906 /* Get register name */
907 get_register_name(instr, regstr, constants);
909 TRACE(" Register name %s\n", regstr);
910 switch (instr & D3DSP_SRCMOD_MASK) {
911 case D3DSPSM_NONE:
912 strcpy(outregstr, regstr);
913 insert_line = 0;
914 break;
915 case D3DSPSM_NEG:
916 sprintf(outregstr, "-%s", regstr);
917 insert_line = 0;
918 break;
919 case D3DSPSM_BIAS:
920 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
921 break;
922 case D3DSPSM_BIASNEG:
923 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
924 break;
925 case D3DSPSM_SIGN:
926 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
927 break;
928 case D3DSPSM_SIGNNEG:
929 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
930 break;
931 case D3DSPSM_COMP:
932 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
933 break;
934 case D3DSPSM_X2:
935 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
936 break;
937 case D3DSPSM_X2NEG:
938 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
939 break;
940 case D3DSPSM_DZ:
941 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
942 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
943 strcat(line, "\n"); /* Hack */
944 strcat(line, tmpline);
945 break;
946 case D3DSPSM_DW:
947 sprintf(line, "RCP T%c, %s;", 'A' + tmpreg, regstr);
948 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
949 strcat(line, "\n"); /* Hack */
950 strcat(line, tmpline);
951 break;
952 default:
953 strcpy(outregstr, regstr);
954 insert_line = 0;
957 if (insert_line) {
958 /* Substitute the register name */
959 sprintf(outregstr, "T%c", 'A' + tmpreg);
962 return insert_line;
964 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
965 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
966 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
967 const DWORD *pToken = pFunction;
968 const SHADER_OPCODE *curOpcode = NULL;
969 const DWORD *pInstr;
970 DWORD i;
971 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
972 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
973 char tmpLine[255];
974 DWORD nUseAddressRegister = 0;
975 #if 0 /* TODO: loop register (just another address register ) */
976 BOOL hasLoops = FALSE;
977 #endif
979 BOOL saturate; /* clamp to 0.0 -> 1.0*/
980 int row = 0; /* not sure, something to do with macros? */
981 DWORD tcw[2];
982 int version = 0; /* The version of the shader */
984 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
985 unsigned int pgmLength = 0;
987 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
988 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
989 if (This->device->fixupVertexBufferSize < PGMSIZE) {
990 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
991 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
992 This->fixupVertexBufferSize = PGMSIZE;
993 This->fixupVertexBuffer[0] = 0;
995 pgmStr = This->device->fixupVertexBuffer;
996 #else
997 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
998 #endif
1001 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1002 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
1003 This->constants[i] = 0;
1005 if (NULL != pToken) {
1006 while (D3DPS_END() != *pToken) {
1007 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1008 if (version >= 2) {
1009 instructionSize = pToken & SIZEBITS >> 27;
1011 #endif
1012 if (pshader_is_version_token(*pToken)) { /** version */
1013 int numTemps;
1014 int numConstants;
1016 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1017 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1019 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1021 /* Each release of pixel shaders has had different numbers of temp registers */
1022 switch (version) {
1023 case 10:
1024 case 11:
1025 case 12:
1026 case 13:
1027 case 14: numTemps=12;
1028 numConstants=8;
1029 strcpy(tmpLine, "!!ARBfp1.0\n");
1030 break;
1031 case 20: numTemps=12;
1032 numConstants=8;
1033 strcpy(tmpLine, "!!ARBfp2.0\n");
1034 FIXME("No work done yet to support ps2.0 in hw\n");
1035 break;
1036 case 30: numTemps=32;
1037 numConstants=8;
1038 strcpy(tmpLine, "!!ARBfp3.0\n");
1039 FIXME("No work done yet to support ps3.0 in hw\n");
1040 break;
1041 default:
1042 numTemps=12;
1043 numConstants=8;
1044 strcpy(tmpLine, "!!ARBfp1.0\n");
1045 FIXME("Unrecognized pixel shader version!\n");
1047 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1049 /* TODO: find out how many registers are really needed */
1050 for(i = 0; i < 6; i++) {
1051 sprintf(tmpLine, "TEMP T%lu;\n", i);
1052 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1055 for(i = 0; i < 6; i++) {
1056 sprintf(tmpLine, "TEMP R%lu;\n", i);
1057 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1060 sprintf(tmpLine, "TEMP TMP;\n");
1061 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1062 sprintf(tmpLine, "TEMP TMP2;\n");
1063 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1064 sprintf(tmpLine, "TEMP TA;\n");
1065 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1066 sprintf(tmpLine, "TEMP TB;\n");
1067 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1068 sprintf(tmpLine, "TEMP TC;\n");
1069 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1071 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1072 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1073 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1074 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1075 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1076 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1078 for(i = 0; i < 4; i++) {
1079 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1080 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1083 ++pToken;
1084 continue;
1087 if (pshader_is_comment_token(*pToken)) { /** comment */
1088 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1089 ++pToken;
1090 FIXME("#%s\n", (char*)pToken);
1091 pToken += comment_len;
1092 continue;
1094 /* here */
1095 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1096 code = *pToken;
1097 #endif
1098 pInstr = pToken;
1099 curOpcode = pshader_program_get_opcode(*pToken, version);
1100 ++pToken;
1101 if (NULL == curOpcode) {
1102 /* unknown current opcode ... (shouldn't be any!) */
1103 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1104 FIXME("unrecognized opcode: %08lx\n", *pToken);
1105 ++pToken;
1107 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1108 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1109 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1110 pToken += curOpcode->num_params;
1111 } else {
1112 TRACE("Found opcode %s %s\n", curOpcode->name, curOpcode->glname);
1113 saturate = FALSE;
1115 /* Build opcode for GL vertex_program */
1116 switch (curOpcode->opcode) {
1117 case D3DSIO_NOP:
1118 case D3DSIO_PHASE:
1119 continue;
1120 case D3DSIO_MOV:
1121 /* Address registers must be loaded with the ARL instruction */
1122 if ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) == D3DSPR_ADDR) {
1123 if (((*pToken) & REGMASK) < nUseAddressRegister) {
1124 strcpy(tmpLine, "ARL");
1125 break;
1126 } else
1127 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This, ((*pToken) & REGMASK));
1129 /* fall through */
1130 case D3DSIO_CND:
1131 case D3DSIO_CMP:
1132 case D3DSIO_ADD:
1133 case D3DSIO_SUB:
1134 case D3DSIO_MAD:
1135 case D3DSIO_MUL:
1136 case D3DSIO_RCP:
1137 case D3DSIO_RSQ:
1138 case D3DSIO_DP3:
1139 case D3DSIO_DP4:
1140 case D3DSIO_MIN:
1141 case D3DSIO_MAX:
1142 case D3DSIO_SLT:
1143 case D3DSIO_SGE:
1144 case D3DSIO_LIT:
1145 case D3DSIO_DST:
1146 case D3DSIO_FRC:
1147 case D3DSIO_EXPP:
1148 case D3DSIO_LOGP:
1149 case D3DSIO_EXP:
1150 case D3DSIO_LOG:
1151 case D3DSIO_LRP:
1152 case D3DSIO_TEXKILL:
1153 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1154 strcpy(tmpLine, curOpcode->glname);
1155 break;
1156 case D3DSIO_DEF:
1158 DWORD reg = *pToken & REGMASK;
1159 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1160 *((const float *)(pToken + 1)),
1161 *((const float *)(pToken + 2)),
1162 *((const float *)(pToken + 3)),
1163 *((const float *)(pToken + 4)) );
1165 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1167 This->constants[reg] = 1;
1168 pToken += 5;
1169 continue;
1171 break;
1172 case D3DSIO_TEX:
1174 char tmp[20];
1175 get_write_mask(*pToken, tmp);
1176 if (version != 14) {
1177 DWORD reg = *pToken & REGMASK;
1178 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1179 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1180 ++pToken;
1181 } else {
1182 char reg[20];
1183 DWORD reg1 = *pToken & REGMASK;
1184 DWORD reg2 = *++pToken & REGMASK;
1185 if (gen_input_modifier_line(*pToken, 0, reg, tmpLine, This->constants)) {
1186 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1188 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg, reg2);
1189 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1190 ++pToken;
1192 continue;
1194 break;
1195 case D3DSIO_TEXCOORD:
1197 char tmp[20];
1198 get_write_mask(*pToken, tmp);
1199 if (version != 14) {
1200 DWORD reg = *pToken & REGMASK;
1201 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1202 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1203 ++pToken;
1204 } else {
1205 DWORD reg1 = *pToken & REGMASK;
1206 DWORD reg2 = *++pToken & REGMASK;
1207 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1208 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1209 ++pToken;
1211 continue;
1213 break;
1214 case D3DSIO_TEXM3x2PAD:
1216 DWORD reg = *pToken & REGMASK;
1217 char buf[50];
1218 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1219 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1221 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1222 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1223 ++pToken;
1224 continue;
1226 break;
1227 case D3DSIO_TEXM3x2TEX:
1229 DWORD reg = *pToken & REGMASK;
1230 char buf[50];
1231 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1232 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1234 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1235 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1236 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1237 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1238 ++pToken;
1239 continue;
1241 break;
1242 case D3DSIO_TEXREG2AR:
1244 DWORD reg1 = *pToken & REGMASK;
1245 DWORD reg2 = *++pToken & REGMASK;
1246 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1247 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1248 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1249 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1250 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1251 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1252 ++pToken;
1253 continue;
1255 break;
1256 case D3DSIO_TEXREG2GB:
1258 DWORD reg1 = *pToken & REGMASK;
1259 DWORD reg2 = *++pToken & REGMASK;
1260 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1261 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1262 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1263 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1264 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1265 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1266 ++pToken;
1267 continue;
1269 break;
1270 case D3DSIO_TEXBEM:
1272 DWORD reg1 = *pToken & REGMASK;
1273 DWORD reg2 = *++pToken & REGMASK;
1275 /* FIXME: Should apply the BUMPMAPENV matrix */
1276 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1277 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1278 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1279 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1280 ++pToken;
1281 continue;
1283 break;
1284 case D3DSIO_TEXM3x3PAD:
1286 DWORD reg = *pToken & REGMASK;
1287 char buf[50];
1288 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1289 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1291 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1292 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1293 tcw[row++] = reg;
1294 ++pToken;
1295 continue;
1297 break;
1298 case D3DSIO_TEXM3x3TEX:
1300 DWORD reg = *pToken & REGMASK;
1301 char buf[50];
1302 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1303 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1306 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1307 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1309 /* Cubemap textures will be more used than 3D ones. */
1310 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1311 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1312 row = 0;
1313 ++pToken;
1314 continue;
1316 case D3DSIO_TEXM3x3VSPEC:
1318 DWORD reg = *pToken & REGMASK;
1319 char buf[50];
1320 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1321 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1323 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1324 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1326 /* Construct the eye-ray vector from w coordinates */
1327 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1328 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1329 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1330 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1331 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1332 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1334 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1335 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1336 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1337 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1338 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1339 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1340 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1342 /* Cubemap textures will be more used than 3D ones. */
1343 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1344 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1345 row = 0;
1346 ++pToken;
1347 continue;
1349 break;
1350 case D3DSIO_TEXM3x3SPEC:
1352 DWORD reg = *pToken & REGMASK;
1353 DWORD reg3 = *(pToken + 2) & REGMASK;
1354 char buf[50];
1355 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1356 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1358 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1359 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1361 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1362 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1363 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1365 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1366 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1367 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1368 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1370 /* Cubemap textures will be more used than 3D ones. */
1371 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1372 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1373 row = 0;
1374 pToken += 3;
1375 continue;
1377 break;
1379 default:
1380 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1381 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1382 } else {
1383 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1385 pToken += curOpcode->num_params; /* maybe + 1 */
1386 continue;
1389 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1390 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1391 switch (mask) {
1392 case D3DSPDM_SATURATE: saturate = TRUE; break;
1393 #if 0 /* as yet unhandled modifiers */
1394 case D3DSPDM_CENTROID: centroid = TRUE; break;
1395 case D3DSPDM_PP: partialpresision = TRUE; break;
1396 case D3DSPDM_X2: X2 = TRUE; break;
1397 case D3DSPDM_X4: X4 = TRUE; break;
1398 case D3DSPDM_X8: X8 = TRUE; break;
1399 case D3DSPDM_D2: D2 = TRUE; break;
1400 case D3DSPDM_D4: D4 = TRUE; break;
1401 case D3DSPDM_D8: D8 = TRUE; break;
1402 #endif
1403 default:
1404 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1408 /* Generate input and output registers */
1409 if (curOpcode->num_params > 0) {
1410 char regs[5][50];
1411 char operands[4][100];
1412 char swzstring[20];
1413 int saturate = 0;
1414 char tmpOp[256];
1415 TRACE("(%p): Opcode has %d params\n", This, curOpcode->num_params);
1417 /* Generate lines that handle input modifier computation */
1418 for (i = 1; i < curOpcode->num_params; ++i) {
1419 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1420 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpOp, This->constants)) {
1421 addline(&lineNum, pgmStr, &pgmLength, tmpOp);
1425 /* Handle saturation only when no shift is present in the output modifier */
1426 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1427 saturate = 1;
1429 /* Handle output register */
1430 get_register_name(*pToken, tmpOp, This->constants);
1431 strcpy(operands[0], tmpOp);
1432 get_write_mask(*pToken, tmpOp);
1433 strcat(operands[0], tmpOp);
1435 /* This function works because of side effects from gen_input_modifier_line */
1436 /* Handle input registers */
1437 for (i = 1; i < curOpcode->num_params; ++i) {
1438 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1439 strcpy(operands[i], regs[i - 1]);
1440 get_input_register_swizzle(*(pToken + i), swzstring);
1441 strcat(operands[i], swzstring);
1444 switch(curOpcode->opcode) {
1445 case D3DSIO_CMP:
1446 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1447 break;
1448 case D3DSIO_CND:
1449 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1450 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1451 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1452 break;
1453 default:
1454 if (saturate)
1455 strcat(tmpLine, "_SAT");
1456 strcat(tmpLine, " ");
1457 strcat(tmpLine, operands[0]);
1458 for (i = 1; i < curOpcode->num_params; i++) {
1459 strcat(tmpLine, ", ");
1460 strcat(tmpLine, operands[i]);
1462 strcat(tmpLine,";\n");
1464 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1465 pToken += curOpcode->num_params;
1467 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1468 if (curOpcode->num_params > 0) {
1469 DWORD param = *(pInstr + 1);
1470 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1472 /* Generate a line that handle the output modifier computation */
1473 char regstr[100];
1474 char write_mask[20];
1475 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1476 get_register_name(param, regstr, This->constants);
1477 get_write_mask(param, write_mask);
1478 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1479 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1482 #endif
1485 /* TODO: What about result.depth? */
1486 strcpy(tmpLine, "MOV result.color, R0;\n");
1487 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1489 strcpy(tmpLine, "END\n");
1490 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1493 /* finally null terminate the pgmStr*/
1494 pgmStr[pgmLength] = 0;
1495 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1496 /* Create the hw shader */
1498 /* pgmStr sometimes gets too long for a normal TRACE */
1499 TRACE("Generated program:\n");
1500 if (TRACE_ON(d3d_shader)) {
1501 fprintf(stderr, "%s\n", pgmStr);
1504 /* TODO: change to resource.glObjectHandel or something like that */
1505 GL_EXTCALL(glGenProgramsARB(1, &This->prgId));
1507 TRACE("Creating a hw pixel shader, prg=%d\n", This->prgId);
1508 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->prgId));
1510 TRACE("Created hw pixel shader, prg=%d\n", This->prgId);
1511 /* Create the program and check for errors */
1512 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1513 if (glGetError() == GL_INVALID_OPERATION) {
1514 GLint errPos;
1515 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1516 FIXME("HW PixelShader Error at position %d: %s\n",
1517 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1518 This->prgId = -1;
1521 #if 1 /* if were using the data buffer of device then we don't need to free it */
1522 HeapFree(GetProcessHeap(), 0, pgmStr);
1523 #endif
1526 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1527 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1528 static const char swizzle_reg_chars[] = "rgba";
1530 /* the unknown mask is for bits not yet accounted for by any other mask... */
1531 #define UNKNOWN_MASK 0xC000
1533 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1534 #define EXTENDED_REG 0x1800
1536 DWORD reg = param & D3DSP_REGNUM_MASK;
1537 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1539 if (input) {
1540 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1541 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1542 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1543 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1544 TRACE("-");
1545 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1546 TRACE("1-");
1549 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1550 case D3DSPR_TEMP:
1551 TRACE("r%lu", reg);
1552 break;
1553 case D3DSPR_INPUT:
1554 TRACE("v%lu", reg);
1555 break;
1556 case D3DSPR_CONST:
1557 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1558 break;
1560 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1561 TRACE("t%lu", reg);
1562 break;
1563 case D3DSPR_RASTOUT:
1564 TRACE("%s", rastout_reg_names[reg]);
1565 break;
1566 case D3DSPR_ATTROUT:
1567 TRACE("oD%lu", reg);
1568 break;
1569 case D3DSPR_TEXCRDOUT:
1570 TRACE("oT%lu", reg);
1571 break;
1572 case D3DSPR_CONSTINT:
1573 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1574 break;
1575 case D3DSPR_CONSTBOOL:
1576 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1577 break;
1578 case D3DSPR_LABEL:
1579 TRACE("l%lu", reg);
1580 break;
1581 case D3DSPR_LOOP:
1582 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1583 break;
1584 default:
1585 break;
1588 if (!input) {
1589 /** operand output */
1591 * for better debugging traces it's done into opcode dump code
1592 * @see pshader_program_dump_opcode
1593 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1594 DWORD mask = param & D3DSP_DSTMOD_MASK;
1595 switch (mask) {
1596 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1597 default:
1598 TRACE("_unhandled_modifier(0x%08lx)", mask);
1601 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1602 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1603 if (shift > 0) {
1604 TRACE("_x%u", 1 << shift);
1608 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1609 TRACE(".");
1610 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1611 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1612 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1613 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1615 } else {
1616 /** operand input */
1617 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1618 DWORD swizzle_r = swizzle & 0x03;
1619 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1620 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1621 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1623 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1624 DWORD mask = param & D3DSP_SRCMOD_MASK;
1625 /*TRACE("_modifier(0x%08lx) ", mask);*/
1626 switch (mask) {
1627 case D3DSPSM_NONE: break;
1628 case D3DSPSM_NEG: break;
1629 case D3DSPSM_BIAS: TRACE("_bias"); break;
1630 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1631 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1632 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1633 case D3DSPSM_COMP: break;
1634 case D3DSPSM_X2: TRACE("_x2"); break;
1635 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1636 case D3DSPSM_DZ: TRACE("_dz"); break;
1637 case D3DSPSM_DW: TRACE("_dw"); break;
1638 default:
1639 TRACE("_unknown(0x%08lx)", mask);
1644 * swizzle bits fields:
1645 * RRGGBBAA
1647 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1648 if (swizzle_r == swizzle_g &&
1649 swizzle_r == swizzle_b &&
1650 swizzle_r == swizzle_a) {
1651 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1652 } else {
1653 TRACE(".%c%c%c%c",
1654 swizzle_reg_chars[swizzle_r],
1655 swizzle_reg_chars[swizzle_g],
1656 swizzle_reg_chars[swizzle_b],
1657 swizzle_reg_chars[swizzle_a]);
1663 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1664 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1665 const DWORD* pToken = pFunction;
1666 const SHADER_OPCODE *curOpcode = NULL;
1667 DWORD len = 0;
1668 DWORD i;
1669 int version = 0;
1670 TRACE("(%p) : Parsing programme\n", This);
1672 if (NULL != pToken) {
1673 while (D3DPS_END() != *pToken) {
1674 if (pshader_is_version_token(*pToken)) { /** version */
1675 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1676 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1677 ++pToken;
1678 ++len;
1679 continue;
1681 if (pshader_is_comment_token(*pToken)) { /** comment */
1682 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1683 ++pToken;
1684 TRACE("//%s\n", (char*)pToken);
1685 pToken += comment_len;
1686 len += comment_len + 1;
1687 continue;
1689 if (!version) {
1690 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1692 curOpcode = pshader_program_get_opcode(*pToken, version);
1693 ++pToken;
1694 ++len;
1695 if (NULL == curOpcode) {
1697 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1698 while (*pToken & 0x80000000) {
1700 /* unknown current opcode ... */
1701 TRACE("unrecognized opcode: %08lx", *pToken);
1702 ++pToken;
1703 ++len;
1704 TRACE("\n");
1707 } else {
1708 if (curOpcode->opcode == D3DSIO_DCL) {
1709 TRACE("dcl_");
1710 switch(*pToken & 0xFFFF) {
1711 case D3DDECLUSAGE_POSITION:
1712 TRACE("%s%ld ", "position",(*pToken & 0xF0000) >> 16);
1713 break;
1714 case D3DDECLUSAGE_BLENDINDICES:
1715 TRACE("%s ", "blend");
1716 break;
1717 case D3DDECLUSAGE_BLENDWEIGHT:
1718 TRACE("%s ", "weight");
1719 break;
1720 case D3DDECLUSAGE_NORMAL:
1721 TRACE("%s%ld ", "normal",(*pToken & 0xF0000) >> 16);
1722 break;
1723 case D3DDECLUSAGE_PSIZE:
1724 TRACE("%s ", "psize");
1725 break;
1726 case D3DDECLUSAGE_COLOR:
1727 if((*pToken & 0xF0000) >> 16 == 0) {
1728 TRACE("%s ", "color");
1729 } else {
1730 TRACE("%s%ld ", "specular", ((*pToken & 0xF0000) >> 16) - 1);
1732 break;
1733 case D3DDECLUSAGE_TEXCOORD:
1734 TRACE("%s%ld ", "texture", (*pToken & 0xF0000) >> 16);
1735 break;
1736 case D3DDECLUSAGE_TANGENT:
1737 TRACE("%s ", "tangent");
1738 break;
1739 case D3DDECLUSAGE_BINORMAL:
1740 TRACE("%s ", "binormal");
1741 break;
1742 case D3DDECLUSAGE_TESSFACTOR:
1743 TRACE("%s ", "tessfactor");
1744 break;
1745 case D3DDECLUSAGE_POSITIONT:
1746 TRACE("%s%ld ", "positionT",(*pToken & 0xF0000) >> 16);
1747 break;
1748 case D3DDECLUSAGE_FOG:
1749 TRACE("%s ", "fog");
1750 break;
1751 case D3DDECLUSAGE_DEPTH:
1752 TRACE("%s ", "depth");
1753 break;
1754 case D3DDECLUSAGE_SAMPLE:
1755 TRACE("%s ", "sample");
1756 break;
1757 default:
1758 FIXME("Unrecognised dcl %08lx", *pToken & 0xFFFF);
1760 ++pToken;
1761 ++len;
1762 pshader_program_dump_ps_param(*pToken, 0);
1763 ++pToken;
1764 ++len;
1765 } else
1766 if (curOpcode->opcode == D3DSIO_DEF) {
1767 TRACE("def c%lu = ", *pToken & 0xFF);
1768 ++pToken;
1769 ++len;
1770 TRACE("%f ,", *(float *)pToken);
1771 ++pToken;
1772 ++len;
1773 TRACE("%f ,", *(float *)pToken);
1774 ++pToken;
1775 ++len;
1776 TRACE("%f ,", *(float *)pToken);
1777 ++pToken;
1778 ++len;
1779 TRACE("%f", *(float *)pToken);
1780 ++pToken;
1781 ++len;
1782 } else {
1783 TRACE("%s ", curOpcode->name);
1784 if (curOpcode->num_params > 0) {
1785 pshader_program_dump_ps_param(*pToken, 0);
1786 ++pToken;
1787 ++len;
1788 for (i = 1; i < curOpcode->num_params; ++i) {
1789 TRACE(", ");
1790 pshader_program_dump_ps_param(*pToken, 1);
1791 ++pToken;
1792 ++len;
1796 TRACE("\n");
1799 This->functionLength = (len + 1) * sizeof(DWORD);
1800 } else {
1801 This->functionLength = 1; /* no Function defined use fixed function vertex processing */
1804 /* Generate HW shader in needed */
1805 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1806 TRACE("(%p) : Generating hardware program\n", This);
1807 #if 1
1808 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1809 #endif
1812 TRACE("(%p) : Copying the function\n", This);
1813 /* copy the function ... because it will certainly be released by application */
1814 if (NULL != pFunction) {
1815 This->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->functionLength);
1816 memcpy((void *)This->function, pFunction, This->functionLength);
1817 } else {
1818 This->function = NULL;
1821 /* TODO: Some proper return values for failures */
1822 TRACE("(%p) : Returning D3D_OK\n", This);
1823 return D3D_OK;
1826 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1828 /*** IUnknown methods ***/
1829 IWineD3DPixelShaderImpl_QueryInterface,
1830 IWineD3DPixelShaderImpl_AddRef,
1831 IWineD3DPixelShaderImpl_Release,
1832 /*** IWineD3DPixelShader methods ***/
1833 IWineD3DPixelShaderImpl_GetParent,
1834 IWineD3DPixelShaderImpl_GetDevice,
1835 IWineD3DPixelShaderImpl_GetFunction,
1836 /* not part of d3d */
1837 IWineD3DPixelShaderImpl_SetFunction