wined3d: Move the D3DSIO_DCL dumping code into its own function.
[wine/multimedia.git] / dlls / wined3d / pixelshader.c
blobe5311c735e67a0aef7fb4b3f0bf9e604b04cdaeb
1 /*
2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "config.h"
23 #include <math.h>
24 #include <stdio.h>
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
35 #else
36 # define PSTRACE(A)
37 # define TRACE_VSVECTOR(name)
38 #endif
40 /* The maximum size of the program */
41 #define PGMSIZE 65535
43 #define REGMASK 0x00001FFF
44 typedef void (*shader_fct_t)();
46 typedef struct SHADER_OPCODE {
47 unsigned int opcode;
48 const char* name;
49 const char* glname;
50 CONST UINT num_params;
51 shader_fct_t soft_fct;
52 DWORD min_version;
53 DWORD max_version;
54 } SHADER_OPCODE;
56 #define GLNAME_REQUIRE_GLSL ((const char *)1)
57 /* *******************************************
58 IWineD3DPixelShader IUnknown parts follow
59 ******************************************* */
60 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
62 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
63 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
64 if (IsEqualGUID(riid, &IID_IUnknown)
65 || IsEqualGUID(riid, &IID_IWineD3DBase)
66 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
67 IUnknown_AddRef(iface);
68 *ppobj = This;
69 return D3D_OK;
71 return E_NOINTERFACE;
74 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
75 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
76 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
77 return InterlockedIncrement(&This->ref);
80 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
81 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
82 ULONG ref;
83 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
84 ref = InterlockedDecrement(&This->ref);
85 if (ref == 0) {
86 HeapFree(GetProcessHeap(), 0, This);
88 return ref;
91 /* TODO: At the momeny the function parser is single pass, it achievs this
92 by passing constants to a couple of functions where they are then modified.
93 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
94 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
97 /* *******************************************
98 IWineD3DPixelShader IWineD3DPixelShader parts follow
99 ******************************************* */
101 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
102 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
104 *parent = This->parent;
105 IUnknown_AddRef(*parent);
106 TRACE("(%p) : returning %p\n", This, *parent);
107 return D3D_OK;
110 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
111 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
112 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
113 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
114 TRACE("(%p) returning %p\n", This, *pDevice);
115 return D3D_OK;
119 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
120 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
121 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
123 if (NULL == pData) {
124 *pSizeOfData = This->functionLength;
125 return D3D_OK;
127 if (*pSizeOfData < This->functionLength) {
128 *pSizeOfData = This->functionLength;
129 return D3DERR_MOREDATA;
131 if (NULL == This->function) { /* no function defined */
132 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
133 (*(DWORD **) pData) = NULL;
134 } else {
135 if (This->functionLength == 0) {
138 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
139 memcpy(pData, This->function, This->functionLength);
141 return D3D_OK;
144 /*******************************
145 * pshader functions software VM
148 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
149 d->x = s0->x + s1->x;
150 d->y = s0->y + s1->y;
151 d->z = s0->z + s1->z;
152 d->w = s0->w + s1->w;
153 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
157 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
158 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
159 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
160 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
163 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
164 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
165 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
166 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
169 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
170 d->x = 1.0f;
171 d->y = s0->y * s1->y;
172 d->z = s0->z;
173 d->w = s1->w;
174 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
175 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
178 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
179 union {
180 float f;
181 DWORD d;
182 } tmp;
184 tmp.f = floorf(s0->w);
185 d->x = powf(2.0f, tmp.f);
186 d->y = s0->w - tmp.f;
187 tmp.f = powf(2.0f, s0->w);
188 tmp.d &= 0xFFFFFF00U;
189 d->z = tmp.f;
190 d->w = 1.0f;
191 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
192 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
195 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
196 d->x = 1.0f;
197 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
198 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
199 d->w = 1.0f;
200 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
201 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
204 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
205 float tmp_f = fabsf(s0->w);
206 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
207 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
208 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
211 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
212 d->x = s0->x * s1->x + s2->x;
213 d->y = s0->y * s1->y + s2->y;
214 d->z = s0->z * s1->z + s2->z;
215 d->w = s0->w * s1->w + s2->w;
216 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
217 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
220 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
221 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
222 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
223 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
224 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
225 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
226 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
229 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
230 d->x = (s0->x < s1->x) ? s0->x : s1->x;
231 d->y = (s0->y < s1->y) ? s0->y : s1->y;
232 d->z = (s0->z < s1->z) ? s0->z : s1->z;
233 d->w = (s0->w < s1->w) ? s0->w : s1->w;
234 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
235 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
238 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
239 d->x = s0->x;
240 d->y = s0->y;
241 d->z = s0->z;
242 d->w = s0->w;
243 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
244 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
247 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
248 d->x = s0->x * s1->x;
249 d->y = s0->y * s1->y;
250 d->z = s0->z * s1->z;
251 d->w = s0->w * s1->w;
252 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
253 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
256 void pshader_nop(void) {
257 /* NOPPPP ahhh too easy ;) */
258 PSTRACE(("executing nop\n"));
261 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
262 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
263 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
264 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
267 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
268 float tmp_f = fabsf(s0->w);
269 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
270 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
271 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
274 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
275 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
276 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
277 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
278 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
279 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
280 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
283 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
284 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
285 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
286 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
287 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
288 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
289 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
292 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
293 d->x = s0->x - s1->x;
294 d->y = s0->y - s1->y;
295 d->z = s0->z - s1->z;
296 d->w = s0->w - s1->w;
297 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
298 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
302 * Version 1.1 specific
305 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
306 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
307 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
308 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
311 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
312 float tmp_f = fabsf(s0->w);
313 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
314 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
315 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
318 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
319 d->x = s0->x - floorf(s0->x);
320 d->y = s0->y - floorf(s0->y);
321 d->z = 0.0f;
322 d->w = 1.0f;
323 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
324 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
327 typedef FLOAT D3DMATRIX44[4][4];
328 typedef FLOAT D3DMATRIX43[4][3];
329 typedef FLOAT D3DMATRIX34[3][4];
330 typedef FLOAT D3DMATRIX33[3][3];
331 typedef FLOAT D3DMATRIX23[2][3];
333 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
335 * Buggy CODE: here only if cast not work for copy/paste
336 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
337 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
338 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
339 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
340 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
341 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
342 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
344 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
345 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
346 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
347 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
348 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
349 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
350 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
351 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
354 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
355 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
356 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
357 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
358 d->w = 1.0f;
359 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
360 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
361 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
362 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
365 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
366 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
367 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
368 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
369 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
370 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
371 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
372 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
373 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
376 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
377 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
378 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
379 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
380 d->w = 1.0f;
381 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
382 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
383 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
384 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
387 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
388 FIXME("check\n");
389 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
390 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
391 d->z = 0.0f;
392 d->w = 1.0f;
396 * Version 2.0 specific
398 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
399 d->x = s0->x * (s1->x - s2->x) + s2->x;
400 d->y = s0->y * (s1->y - s2->y) + s2->y;
401 d->z = s0->z * (s1->z - s2->z) + s2->z;
402 d->w = s0->w * (s1->w - s2->w) + s2->w;
405 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
406 d->x = s0->y * s1->z - s0->z * s1->y;
407 d->y = s0->z * s1->x - s0->x * s1->z;
408 d->z = s0->x * s1->y - s0->y * s1->x;
409 d->w = 0.9f; /* w is undefined, so set it to something safeish */
411 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
412 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
415 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
416 d->x = fabsf(s0->x);
417 d->y = fabsf(s0->y);
418 d->z = fabsf(s0->z);
419 d->w = fabsf(s0->w);
420 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
421 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
424 /* Stubs */
425 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
426 FIXME(" : Stub\n");
429 void pshader_texkill(WINED3DSHADERVECTOR* d) {
430 FIXME(" : Stub\n");
433 void pshader_tex(WINED3DSHADERVECTOR* d) {
434 FIXME(" : Stub\n");
436 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
437 FIXME(" : Stub\n");
440 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
441 FIXME(" : Stub\n");
444 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
445 FIXME(" : Stub\n");
448 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
449 FIXME(" : Stub\n");
452 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
453 FIXME(" : Stub\n");
456 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
457 FIXME(" : Stub\n");
460 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
461 FIXME(" : Stub\n");
464 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
465 FIXME(" : Stub\n");
468 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
469 FIXME(" : Stub\n");
472 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
473 FIXME(" : Stub\n");
476 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
477 FIXME(" : Stub\n");
480 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
481 FIXME(" : Stub\n");
484 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
485 FIXME(" : Stub\n");
488 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
489 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
490 FIXME(" : Stub\n");
493 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
494 FIXME(" : Stub\n");
497 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
498 FIXME(" : Stub\n");
501 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
502 FIXME(" : Stub\n");
505 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
506 FIXME(" : Stub\n");
509 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
510 FIXME(" : Stub\n");
513 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
514 FIXME(" : Stub\n");
517 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
518 FIXME(" : Stub\n");
521 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
522 FIXME(" : Stub\n");
525 void pshader_call(WINED3DSHADERVECTOR* d) {
526 FIXME(" : Stub\n");
529 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
530 FIXME(" : Stub\n");
533 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
534 FIXME(" : Stub\n");
537 void pshader_ret(WINED3DSHADERVECTOR* d) {
538 FIXME(" : Stub\n");
541 void pshader_endloop(WINED3DSHADERVECTOR* d) {
542 FIXME(" : Stub\n");
545 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
546 FIXME(" : Stub\n");
549 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
550 FIXME(" : Stub\n");
553 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
554 FIXME(" : Stub\n");
557 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
558 FIXME(" : Stub\n");
561 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
562 FIXME(" : Stub\n");
565 void pshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
566 FIXME(" : Stub\n");
569 void pshader_endrep(void) {
570 FIXME(" : Stub\n");
573 void pshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
574 FIXME(" : Stub\n");
577 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
578 FIXME(" : Stub\n");
581 void pshader_else(WINED3DSHADERVECTOR* d) {
582 FIXME(" : Stub\n");
585 void pshader_label(WINED3DSHADERVECTOR* d) {
586 FIXME(" : Stub\n");
589 void pshader_endif(WINED3DSHADERVECTOR* d) {
590 FIXME(" : Stub\n");
593 void pshader_break(WINED3DSHADERVECTOR* d) {
594 FIXME(" : Stub\n");
597 void pshader_breakc(WINED3DSHADERVECTOR* d) {
598 FIXME(" : Stub\n");
601 void pshader_mova(WINED3DSHADERVECTOR* d) {
602 FIXME(" : Stub\n");
605 void pshader_defb(WINED3DSHADERVECTOR* d) {
606 FIXME(" : Stub\n");
609 void pshader_defi(WINED3DSHADERVECTOR* d) {
610 FIXME(" : Stub\n");
613 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
614 FIXME(" : Stub\n");
617 void pshader_dsx(WINED3DSHADERVECTOR* d) {
618 FIXME(" : Stub\n");
621 void pshader_dsy(WINED3DSHADERVECTOR* d) {
622 FIXME(" : Stub\n");
625 void pshader_texldd(WINED3DSHADERVECTOR* d) {
626 FIXME(" : Stub\n");
629 void pshader_setp(WINED3DSHADERVECTOR* d) {
630 FIXME(" : Stub\n");
633 void pshader_texldl(WINED3DSHADERVECTOR* d) {
634 FIXME(" : Stub\n");
637 void pshader_breakp(WINED3DSHADERVECTOR* d) {
638 FIXME(" : Stub\n");
641 * log, exp, frc, m*x* seems to be macros ins ... to see
643 static CONST SHADER_OPCODE pshader_ins [] = {
644 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
645 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
646 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
647 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
648 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
649 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
650 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
651 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
652 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
653 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
654 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
655 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
656 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
657 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
658 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
659 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
660 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
661 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
662 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
663 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
664 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
665 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
666 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
667 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
668 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
669 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
672 /** FIXME: use direct access so add the others opcodes as stubs */
673 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
674 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
675 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
676 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
677 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
678 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
679 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
680 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
681 /* DCL is a specil operation */
682 {D3DSIO_DCL, "dcl", NULL, 1, pshader_dcl, 0, 0},
683 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
684 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
685 /* TODO: sng can possibly be performed as
686 RCP tmp, vec
687 MUL out, tmp, vec*/
688 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
689 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
690 DP3 tmp , vec, vec;
691 RSQ tmp, tmp.x;
692 MUL vec.xyz, vec, tmp;
693 but I think this is better because it accounts for w properly.
694 DP3 tmp , vec, vec;
695 RSQ tmp, tmp.x;
696 MUL vec, vec, tmp;
699 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
700 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
701 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, pshader_rep, 0, 0},
702 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
703 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, pshader_if, 0, 0},
704 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
705 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, pshader_else, 0, 0},
706 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, pshader_endif, 0, 0},
707 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, pshader_break, 0, 0},
708 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
709 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
710 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
711 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
713 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
715 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
716 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
717 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
718 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
721 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
722 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
723 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
724 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
725 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
726 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
729 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
730 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
731 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
732 /* def is a special operation */
733 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
734 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
735 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
736 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
737 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
738 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
739 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
740 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
741 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
742 /* TODO: dp2add can be made out of multiple instuctions */
743 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
744 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
745 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
746 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
747 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
748 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
749 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, pshader_breakp, 0, 0},
750 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
751 {0, NULL, NULL, 0, NULL, 0, 0}
755 inline static const SHADER_OPCODE* pshader_program_get_opcode(IWineD3DPixelShaderImpl *This, const DWORD code) {
756 DWORD i = 0;
757 DWORD version = This->version;
758 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
759 /** TODO: use dichotomic search */
760 while (NULL != pshader_ins[i].name) {
761 if (((code & D3DSI_OPCODE_MASK) == pshader_ins[i].opcode) &&
762 (((hex_version >= pshader_ins[i].min_version) && (hex_version <= pshader_ins[i].max_version)) ||
763 ((pshader_ins[i].min_version == 0) && (pshader_ins[i].max_version == 0)))) {
764 return &pshader_ins[i];
766 ++i;
768 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code, code, code & D3DSI_OPCODE_MASK, version);
769 return NULL;
772 inline static BOOL pshader_is_version_token(DWORD token) {
773 return 0xFFFF0000 == (token & 0xFFFF0000);
776 inline static BOOL pshader_is_comment_token(DWORD token) {
777 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
781 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
782 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
784 DWORD reg = param & REGMASK;
785 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
787 switch (regtype) {
788 case D3DSPR_TEMP:
789 sprintf(regstr, "R%lu", reg);
790 break;
791 case D3DSPR_INPUT:
792 if (reg==0) {
793 strcpy(regstr, "fragment.color.primary");
794 } else {
795 strcpy(regstr, "fragment.color.secondary");
797 break;
798 case D3DSPR_CONST:
799 if (constants[reg])
800 sprintf(regstr, "C%lu", reg);
801 else
802 sprintf(regstr, "program.env[%lu]", reg);
803 break;
804 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
805 sprintf(regstr,"T%lu", reg);
806 break;
807 case D3DSPR_RASTOUT:
808 sprintf(regstr, "%s", rastout_reg_names[reg]);
809 break;
810 case D3DSPR_ATTROUT:
811 sprintf(regstr, "oD[%lu]", reg);
812 break;
813 case D3DSPR_TEXCRDOUT:
814 sprintf(regstr, "oT[%lu]", reg);
815 break;
816 default:
817 FIXME("Unhandled register name Type(%ld)\n", regtype);
818 break;
822 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
823 *write_mask = 0;
824 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
825 strcat(write_mask, ".");
826 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
827 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
828 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
829 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
833 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
834 static const char swizzle_reg_chars[] = "rgba";
835 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
836 DWORD swizzle_x = swizzle & 0x03;
837 DWORD swizzle_y = (swizzle >> 2) & 0x03;
838 DWORD swizzle_z = (swizzle >> 4) & 0x03;
839 DWORD swizzle_w = (swizzle >> 6) & 0x03;
841 * swizzle bits fields:
842 * WWZZYYXX
844 *swzstring = 0;
845 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
846 if (swizzle_x == swizzle_y &&
847 swizzle_x == swizzle_z &&
848 swizzle_x == swizzle_w) {
849 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
850 } else {
851 sprintf(swzstring, ".%c%c%c%c",
852 swizzle_reg_chars[swizzle_x],
853 swizzle_reg_chars[swizzle_y],
854 swizzle_reg_chars[swizzle_z],
855 swizzle_reg_chars[swizzle_w]);
860 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
861 int lineLen = strlen(line);
862 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
863 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
864 return;
865 } else {
866 memcpy(pgm + *pgmLength, line, lineLen);
869 *pgmLength += lineLen;
870 ++(*lineNum);
871 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
874 static const char* shift_tab[] = {
875 "dummy", /* 0 (none) */
876 "coefmul.x", /* 1 (x2) */
877 "coefmul.y", /* 2 (x4) */
878 "coefmul.z", /* 3 (x8) */
879 "coefmul.w", /* 4 (x16) */
880 "dummy", /* 5 (x32) */
881 "dummy", /* 6 (x64) */
882 "dummy", /* 7 (x128) */
883 "dummy", /* 8 (d256) */
884 "dummy", /* 9 (d128) */
885 "dummy", /* 10 (d64) */
886 "dummy", /* 11 (d32) */
887 "coefdiv.w", /* 12 (d16) */
888 "coefdiv.z", /* 13 (d8) */
889 "coefdiv.y", /* 14 (d4) */
890 "coefdiv.x" /* 15 (d2) */
893 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
894 /* Generate a line that does the output modifier computation */
895 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
898 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
899 /* Generate a line that does the input modifier computation and return the input register to use */
900 static char regstr[256];
901 static char tmpline[256];
902 int insert_line;
904 /* Assume a new line will be added */
905 insert_line = 1;
907 /* Get register name */
908 get_register_name(instr, regstr, constants);
910 TRACE(" Register name %s\n", regstr);
911 switch (instr & D3DSP_SRCMOD_MASK) {
912 case D3DSPSM_NONE:
913 strcpy(outregstr, regstr);
914 insert_line = 0;
915 break;
916 case D3DSPSM_NEG:
917 sprintf(outregstr, "-%s", regstr);
918 insert_line = 0;
919 break;
920 case D3DSPSM_BIAS:
921 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
922 break;
923 case D3DSPSM_BIASNEG:
924 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
925 break;
926 case D3DSPSM_SIGN:
927 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
928 break;
929 case D3DSPSM_SIGNNEG:
930 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
931 break;
932 case D3DSPSM_COMP:
933 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
934 break;
935 case D3DSPSM_X2:
936 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
937 break;
938 case D3DSPSM_X2NEG:
939 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
940 break;
941 case D3DSPSM_DZ:
942 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
943 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
944 strcat(line, "\n"); /* Hack */
945 strcat(line, tmpline);
946 break;
947 case D3DSPSM_DW:
948 sprintf(line, "RCP T%c, %s;", 'A' + tmpreg, regstr);
949 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
950 strcat(line, "\n"); /* Hack */
951 strcat(line, tmpline);
952 break;
953 default:
954 strcpy(outregstr, regstr);
955 insert_line = 0;
958 if (insert_line) {
959 /* Substitute the register name */
960 sprintf(outregstr, "T%c", 'A' + tmpreg);
963 return insert_line;
965 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
966 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
967 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
968 const DWORD *pToken = pFunction;
969 const SHADER_OPCODE *curOpcode = NULL;
970 const DWORD *pInstr;
971 DWORD i;
972 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
973 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
974 char tmpLine[255];
975 DWORD nUseAddressRegister = 0;
976 #if 0 /* TODO: loop register (just another address register ) */
977 BOOL hasLoops = FALSE;
978 #endif
980 BOOL saturate; /* clamp to 0.0 -> 1.0*/
981 int row = 0; /* not sure, something to do with macros? */
982 DWORD tcw[2];
983 int version = 0; /* The version of the shader */
985 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
986 unsigned int pgmLength = 0;
988 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
989 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
990 if (This->device->fixupVertexBufferSize < PGMSIZE) {
991 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
992 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
993 This->fixupVertexBufferSize = PGMSIZE;
994 This->fixupVertexBuffer[0] = 0;
996 pgmStr = This->device->fixupVertexBuffer;
997 #else
998 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
999 #endif
1002 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1003 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
1004 This->constants[i] = 0;
1006 if (NULL != pToken) {
1007 while (D3DPS_END() != *pToken) {
1008 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1009 if (version >= 2) {
1010 instructionSize = pToken & SIZEBITS >> 27;
1012 #endif
1013 if (pshader_is_version_token(*pToken)) { /** version */
1014 int numTemps;
1015 int numConstants;
1017 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1018 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1020 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1022 /* Each release of pixel shaders has had different numbers of temp registers */
1023 switch (version) {
1024 case 10:
1025 case 11:
1026 case 12:
1027 case 13:
1028 case 14: numTemps=12;
1029 numConstants=8;
1030 strcpy(tmpLine, "!!ARBfp1.0\n");
1031 break;
1032 case 20: numTemps=12;
1033 numConstants=8;
1034 strcpy(tmpLine, "!!ARBfp2.0\n");
1035 FIXME("No work done yet to support ps2.0 in hw\n");
1036 break;
1037 case 30: numTemps=32;
1038 numConstants=8;
1039 strcpy(tmpLine, "!!ARBfp3.0\n");
1040 FIXME("No work done yet to support ps3.0 in hw\n");
1041 break;
1042 default:
1043 numTemps=12;
1044 numConstants=8;
1045 strcpy(tmpLine, "!!ARBfp1.0\n");
1046 FIXME("Unrecognized pixel shader version!\n");
1048 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1050 /* TODO: find out how many registers are really needed */
1051 for(i = 0; i < 6; i++) {
1052 sprintf(tmpLine, "TEMP T%lu;\n", i);
1053 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1056 for(i = 0; i < 6; i++) {
1057 sprintf(tmpLine, "TEMP R%lu;\n", i);
1058 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1061 sprintf(tmpLine, "TEMP TMP;\n");
1062 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1063 sprintf(tmpLine, "TEMP TMP2;\n");
1064 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1065 sprintf(tmpLine, "TEMP TA;\n");
1066 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1067 sprintf(tmpLine, "TEMP TB;\n");
1068 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1069 sprintf(tmpLine, "TEMP TC;\n");
1070 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1072 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1073 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1074 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1075 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1076 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1077 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1079 for(i = 0; i < 4; i++) {
1080 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1081 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1084 ++pToken;
1085 continue;
1088 if (pshader_is_comment_token(*pToken)) { /** comment */
1089 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1090 ++pToken;
1091 FIXME("#%s\n", (char*)pToken);
1092 pToken += comment_len;
1093 continue;
1095 /* here */
1096 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1097 code = *pToken;
1098 #endif
1099 pInstr = pToken;
1100 curOpcode = pshader_program_get_opcode(This, *pToken);
1101 ++pToken;
1102 if (NULL == curOpcode) {
1103 /* unknown current opcode ... (shouldn't be any!) */
1104 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1105 FIXME("unrecognized opcode: %08lx\n", *pToken);
1106 ++pToken;
1108 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1109 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1110 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1111 pToken += curOpcode->num_params;
1112 } else {
1113 TRACE("Found opcode %s %s\n", curOpcode->name, curOpcode->glname);
1114 saturate = FALSE;
1116 /* Build opcode for GL vertex_program */
1117 switch (curOpcode->opcode) {
1118 case D3DSIO_NOP:
1119 case D3DSIO_PHASE:
1120 continue;
1121 case D3DSIO_MOV:
1122 /* Address registers must be loaded with the ARL instruction */
1123 if ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) == D3DSPR_ADDR) {
1124 if (((*pToken) & REGMASK) < nUseAddressRegister) {
1125 strcpy(tmpLine, "ARL");
1126 break;
1127 } else
1128 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This, ((*pToken) & REGMASK));
1130 /* fall through */
1131 case D3DSIO_CND:
1132 case D3DSIO_CMP:
1133 case D3DSIO_ADD:
1134 case D3DSIO_SUB:
1135 case D3DSIO_MAD:
1136 case D3DSIO_MUL:
1137 case D3DSIO_RCP:
1138 case D3DSIO_RSQ:
1139 case D3DSIO_DP3:
1140 case D3DSIO_DP4:
1141 case D3DSIO_MIN:
1142 case D3DSIO_MAX:
1143 case D3DSIO_SLT:
1144 case D3DSIO_SGE:
1145 case D3DSIO_LIT:
1146 case D3DSIO_DST:
1147 case D3DSIO_FRC:
1148 case D3DSIO_EXPP:
1149 case D3DSIO_LOGP:
1150 case D3DSIO_EXP:
1151 case D3DSIO_LOG:
1152 case D3DSIO_LRP:
1153 case D3DSIO_TEXKILL:
1154 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1155 strcpy(tmpLine, curOpcode->glname);
1156 break;
1157 case D3DSIO_DEF:
1159 DWORD reg = *pToken & REGMASK;
1160 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1161 *((const float *)(pToken + 1)),
1162 *((const float *)(pToken + 2)),
1163 *((const float *)(pToken + 3)),
1164 *((const float *)(pToken + 4)) );
1166 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1168 This->constants[reg] = 1;
1169 pToken += 5;
1170 continue;
1172 break;
1173 case D3DSIO_TEX:
1175 char tmp[20];
1176 get_write_mask(*pToken, tmp);
1177 if (version != 14) {
1178 DWORD reg = *pToken & REGMASK;
1179 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1180 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1181 ++pToken;
1182 } else {
1183 char reg[20];
1184 DWORD reg1 = *pToken & REGMASK;
1185 DWORD reg2 = *++pToken & REGMASK;
1186 if (gen_input_modifier_line(*pToken, 0, reg, tmpLine, This->constants)) {
1187 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1189 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg, reg2);
1190 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1191 ++pToken;
1193 continue;
1195 break;
1196 case D3DSIO_TEXCOORD:
1198 char tmp[20];
1199 get_write_mask(*pToken, tmp);
1200 if (version != 14) {
1201 DWORD reg = *pToken & REGMASK;
1202 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1203 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1204 ++pToken;
1205 } else {
1206 DWORD reg1 = *pToken & REGMASK;
1207 DWORD reg2 = *++pToken & REGMASK;
1208 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1209 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1210 ++pToken;
1212 continue;
1214 break;
1215 case D3DSIO_TEXM3x2PAD:
1217 DWORD reg = *pToken & REGMASK;
1218 char buf[50];
1219 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1220 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1222 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1223 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1224 ++pToken;
1225 continue;
1227 break;
1228 case D3DSIO_TEXM3x2TEX:
1230 DWORD reg = *pToken & REGMASK;
1231 char buf[50];
1232 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1233 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1235 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1236 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1237 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1238 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1239 ++pToken;
1240 continue;
1242 break;
1243 case D3DSIO_TEXREG2AR:
1245 DWORD reg1 = *pToken & REGMASK;
1246 DWORD reg2 = *++pToken & REGMASK;
1247 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1248 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1249 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1250 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1251 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1252 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1253 ++pToken;
1254 continue;
1256 break;
1257 case D3DSIO_TEXREG2GB:
1259 DWORD reg1 = *pToken & REGMASK;
1260 DWORD reg2 = *++pToken & REGMASK;
1261 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1262 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1263 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1264 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1265 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1266 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1267 ++pToken;
1268 continue;
1270 break;
1271 case D3DSIO_TEXBEM:
1273 DWORD reg1 = *pToken & REGMASK;
1274 DWORD reg2 = *++pToken & REGMASK;
1276 /* FIXME: Should apply the BUMPMAPENV matrix */
1277 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1278 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1279 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1280 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1281 ++pToken;
1282 continue;
1284 break;
1285 case D3DSIO_TEXM3x3PAD:
1287 DWORD reg = *pToken & REGMASK;
1288 char buf[50];
1289 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1290 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1292 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1293 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1294 tcw[row++] = reg;
1295 ++pToken;
1296 continue;
1298 break;
1299 case D3DSIO_TEXM3x3TEX:
1301 DWORD reg = *pToken & REGMASK;
1302 char buf[50];
1303 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1304 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1307 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1308 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1310 /* Cubemap textures will be more used than 3D ones. */
1311 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1312 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1313 row = 0;
1314 ++pToken;
1315 continue;
1317 case D3DSIO_TEXM3x3VSPEC:
1319 DWORD reg = *pToken & REGMASK;
1320 char buf[50];
1321 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1322 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1324 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1325 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1327 /* Construct the eye-ray vector from w coordinates */
1328 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1329 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1330 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1331 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1332 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1333 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1335 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1336 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1337 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1338 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1339 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1340 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1341 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1343 /* Cubemap textures will be more used than 3D ones. */
1344 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1345 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1346 row = 0;
1347 ++pToken;
1348 continue;
1350 break;
1351 case D3DSIO_TEXM3x3SPEC:
1353 DWORD reg = *pToken & REGMASK;
1354 DWORD reg3 = *(pToken + 2) & REGMASK;
1355 char buf[50];
1356 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1357 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1359 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1360 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1362 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1363 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1364 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1366 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1367 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1368 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1369 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1371 /* Cubemap textures will be more used than 3D ones. */
1372 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1373 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1374 row = 0;
1375 pToken += 3;
1376 continue;
1378 break;
1380 default:
1381 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1382 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1383 } else {
1384 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1386 pToken += curOpcode->num_params; /* maybe + 1 */
1387 continue;
1390 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1391 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1392 switch (mask) {
1393 case D3DSPDM_SATURATE: saturate = TRUE; break;
1394 #if 0 /* as yet unhandled modifiers */
1395 case D3DSPDM_CENTROID: centroid = TRUE; break;
1396 case D3DSPDM_PP: partialpresision = TRUE; break;
1397 case D3DSPDM_X2: X2 = TRUE; break;
1398 case D3DSPDM_X4: X4 = TRUE; break;
1399 case D3DSPDM_X8: X8 = TRUE; break;
1400 case D3DSPDM_D2: D2 = TRUE; break;
1401 case D3DSPDM_D4: D4 = TRUE; break;
1402 case D3DSPDM_D8: D8 = TRUE; break;
1403 #endif
1404 default:
1405 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1409 /* Generate input and output registers */
1410 if (curOpcode->num_params > 0) {
1411 char regs[5][50];
1412 char operands[4][100];
1413 char swzstring[20];
1414 int saturate = 0;
1415 char tmpOp[256];
1416 TRACE("(%p): Opcode has %d params\n", This, curOpcode->num_params);
1418 /* Generate lines that handle input modifier computation */
1419 for (i = 1; i < curOpcode->num_params; ++i) {
1420 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1421 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpOp, This->constants)) {
1422 addline(&lineNum, pgmStr, &pgmLength, tmpOp);
1426 /* Handle saturation only when no shift is present in the output modifier */
1427 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1428 saturate = 1;
1430 /* Handle output register */
1431 get_register_name(*pToken, tmpOp, This->constants);
1432 strcpy(operands[0], tmpOp);
1433 get_write_mask(*pToken, tmpOp);
1434 strcat(operands[0], tmpOp);
1436 /* This function works because of side effects from gen_input_modifier_line */
1437 /* Handle input registers */
1438 for (i = 1; i < curOpcode->num_params; ++i) {
1439 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1440 strcpy(operands[i], regs[i - 1]);
1441 get_input_register_swizzle(*(pToken + i), swzstring);
1442 strcat(operands[i], swzstring);
1445 switch(curOpcode->opcode) {
1446 case D3DSIO_CMP:
1447 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1448 break;
1449 case D3DSIO_CND:
1450 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1451 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1452 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1453 break;
1454 default:
1455 if (saturate)
1456 strcat(tmpLine, "_SAT");
1457 strcat(tmpLine, " ");
1458 strcat(tmpLine, operands[0]);
1459 for (i = 1; i < curOpcode->num_params; i++) {
1460 strcat(tmpLine, ", ");
1461 strcat(tmpLine, operands[i]);
1463 strcat(tmpLine,";\n");
1465 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1466 pToken += curOpcode->num_params;
1468 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1469 if (curOpcode->num_params > 0) {
1470 DWORD param = *(pInstr + 1);
1471 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1473 /* Generate a line that handle the output modifier computation */
1474 char regstr[100];
1475 char write_mask[20];
1476 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1477 get_register_name(param, regstr, This->constants);
1478 get_write_mask(param, write_mask);
1479 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1480 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1483 #endif
1486 /* TODO: What about result.depth? */
1487 strcpy(tmpLine, "MOV result.color, R0;\n");
1488 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1490 strcpy(tmpLine, "END\n");
1491 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1494 /* finally null terminate the pgmStr*/
1495 pgmStr[pgmLength] = 0;
1496 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1497 /* Create the hw shader */
1499 /* pgmStr sometimes gets too long for a normal TRACE */
1500 TRACE("Generated program:\n");
1501 if (TRACE_ON(d3d_shader)) {
1502 fprintf(stderr, "%s\n", pgmStr);
1505 /* TODO: change to resource.glObjectHandel or something like that */
1506 GL_EXTCALL(glGenProgramsARB(1, &This->prgId));
1508 TRACE("Creating a hw pixel shader, prg=%d\n", This->prgId);
1509 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->prgId));
1511 TRACE("Created hw pixel shader, prg=%d\n", This->prgId);
1512 /* Create the program and check for errors */
1513 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1514 if (glGetError() == GL_INVALID_OPERATION) {
1515 GLint errPos;
1516 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1517 FIXME("HW PixelShader Error at position %d: %s\n",
1518 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1519 This->prgId = -1;
1522 #if 1 /* if were using the data buffer of device then we don't need to free it */
1523 HeapFree(GetProcessHeap(), 0, pgmStr);
1524 #endif
1527 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1528 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1529 static const char swizzle_reg_chars[] = "rgba";
1531 /* the unknown mask is for bits not yet accounted for by any other mask... */
1532 #define UNKNOWN_MASK 0xC000
1534 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1535 #define EXTENDED_REG 0x1800
1537 DWORD reg = param & D3DSP_REGNUM_MASK;
1538 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1540 if (input) {
1541 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1542 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1543 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1544 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1545 TRACE("-");
1546 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1547 TRACE("1-");
1550 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1551 case D3DSPR_TEMP:
1552 TRACE("r%lu", reg);
1553 break;
1554 case D3DSPR_INPUT:
1555 TRACE("v%lu", reg);
1556 break;
1557 case D3DSPR_CONST:
1558 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1559 break;
1561 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1562 TRACE("t%lu", reg);
1563 break;
1564 case D3DSPR_RASTOUT:
1565 TRACE("%s", rastout_reg_names[reg]);
1566 break;
1567 case D3DSPR_ATTROUT:
1568 TRACE("oD%lu", reg);
1569 break;
1570 case D3DSPR_TEXCRDOUT:
1571 TRACE("oT%lu", reg);
1572 break;
1573 case D3DSPR_CONSTINT:
1574 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1575 break;
1576 case D3DSPR_CONSTBOOL:
1577 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1578 break;
1579 case D3DSPR_LABEL:
1580 TRACE("l%lu", reg);
1581 break;
1582 case D3DSPR_LOOP:
1583 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1584 break;
1585 default:
1586 break;
1589 if (!input) {
1590 /** operand output */
1592 * for better debugging traces it's done into opcode dump code
1593 * @see pshader_program_dump_opcode
1594 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1595 DWORD mask = param & D3DSP_DSTMOD_MASK;
1596 switch (mask) {
1597 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1598 default:
1599 TRACE("_unhandled_modifier(0x%08lx)", mask);
1602 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1603 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1604 if (shift > 0) {
1605 TRACE("_x%u", 1 << shift);
1609 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1610 TRACE(".");
1611 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1612 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1613 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1614 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1616 } else {
1617 /** operand input */
1618 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1619 DWORD swizzle_r = swizzle & 0x03;
1620 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1621 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1622 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1624 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1625 DWORD mask = param & D3DSP_SRCMOD_MASK;
1626 /*TRACE("_modifier(0x%08lx) ", mask);*/
1627 switch (mask) {
1628 case D3DSPSM_NONE: break;
1629 case D3DSPSM_NEG: break;
1630 case D3DSPSM_BIAS: TRACE("_bias"); break;
1631 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1632 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1633 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1634 case D3DSPSM_COMP: break;
1635 case D3DSPSM_X2: TRACE("_x2"); break;
1636 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1637 case D3DSPSM_DZ: TRACE("_dz"); break;
1638 case D3DSPSM_DW: TRACE("_dw"); break;
1639 default:
1640 TRACE("_unknown(0x%08lx)", mask);
1645 * swizzle bits fields:
1646 * RRGGBBAA
1648 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1649 if (swizzle_r == swizzle_g &&
1650 swizzle_r == swizzle_b &&
1651 swizzle_r == swizzle_a) {
1652 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1653 } else {
1654 TRACE(".%c%c%c%c",
1655 swizzle_reg_chars[swizzle_r],
1656 swizzle_reg_chars[swizzle_g],
1657 swizzle_reg_chars[swizzle_b],
1658 swizzle_reg_chars[swizzle_a]);
1664 inline static void pshader_program_dump_decl_usage(IWineD3DPixelShaderImpl *This, DWORD token) {
1665 TRACE("dcl_");
1666 switch(token & 0xFFFF) {
1667 case D3DDECLUSAGE_POSITION:
1668 TRACE("%s%ld ", "position",(token & 0xF0000) >> 16);
1669 break;
1670 case D3DDECLUSAGE_BLENDINDICES:
1671 TRACE("%s ", "blend");
1672 break;
1673 case D3DDECLUSAGE_BLENDWEIGHT:
1674 TRACE("%s ", "weight");
1675 break;
1676 case D3DDECLUSAGE_NORMAL:
1677 TRACE("%s%ld ", "normal",(token & 0xF0000) >> 16);
1678 break;
1679 case D3DDECLUSAGE_PSIZE:
1680 TRACE("%s ", "psize");
1681 break;
1682 case D3DDECLUSAGE_COLOR:
1683 if((token & 0xF0000) >> 16 == 0) {
1684 TRACE("%s ", "color");
1685 } else {
1686 TRACE("%s%ld ", "specular", ((token & 0xF0000) >> 16) - 1);
1688 break;
1689 case D3DDECLUSAGE_TEXCOORD:
1690 TRACE("%s%ld ", "texture", (token & 0xF0000) >> 16);
1691 break;
1692 case D3DDECLUSAGE_TANGENT:
1693 TRACE("%s ", "tangent");
1694 break;
1695 case D3DDECLUSAGE_BINORMAL:
1696 TRACE("%s ", "binormal");
1697 break;
1698 case D3DDECLUSAGE_TESSFACTOR:
1699 TRACE("%s ", "tessfactor");
1700 break;
1701 case D3DDECLUSAGE_POSITIONT:
1702 TRACE("%s%ld ", "positionT",(token & 0xF0000) >> 16);
1703 break;
1704 case D3DDECLUSAGE_FOG:
1705 TRACE("%s ", "fog");
1706 break;
1707 case D3DDECLUSAGE_DEPTH:
1708 TRACE("%s ", "depth");
1709 break;
1710 case D3DDECLUSAGE_SAMPLE:
1711 TRACE("%s ", "sample");
1712 break;
1713 default:
1714 FIXME("Unrecognised dcl %08lx", token & 0xFFFF);
1718 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1719 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1720 const DWORD* pToken = pFunction;
1721 const SHADER_OPCODE *curOpcode = NULL;
1722 DWORD len = 0;
1723 DWORD i;
1724 TRACE("(%p) : Parsing programme\n", This);
1726 if (NULL != pToken) {
1727 while (D3DPS_END() != *pToken) {
1728 if (pshader_is_version_token(*pToken)) { /** version */
1729 This->version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1730 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1731 ++pToken;
1732 ++len;
1733 continue;
1735 if (pshader_is_comment_token(*pToken)) { /** comment */
1736 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1737 ++pToken;
1738 TRACE("//%s\n", (char*)pToken);
1739 pToken += comment_len;
1740 len += comment_len + 1;
1741 continue;
1743 if (!This->version) {
1744 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1746 curOpcode = pshader_program_get_opcode(This, *pToken);
1747 ++pToken;
1748 ++len;
1749 if (NULL == curOpcode) {
1751 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1752 while (*pToken & 0x80000000) {
1754 /* unknown current opcode ... */
1755 TRACE("unrecognized opcode: %08lx", *pToken);
1756 ++pToken;
1757 ++len;
1758 TRACE("\n");
1761 } else {
1762 if (curOpcode->opcode == D3DSIO_DCL) {
1763 pshader_program_dump_decl_usage(This, *pToken);
1764 ++pToken;
1765 ++len;
1766 pshader_program_dump_ps_param(*pToken, 0);
1767 ++pToken;
1768 ++len;
1769 } else
1770 if (curOpcode->opcode == D3DSIO_DEF) {
1771 TRACE("def c%lu = ", *pToken & 0xFF);
1772 ++pToken;
1773 ++len;
1774 TRACE("%f ,", *(float *)pToken);
1775 ++pToken;
1776 ++len;
1777 TRACE("%f ,", *(float *)pToken);
1778 ++pToken;
1779 ++len;
1780 TRACE("%f ,", *(float *)pToken);
1781 ++pToken;
1782 ++len;
1783 TRACE("%f", *(float *)pToken);
1784 ++pToken;
1785 ++len;
1786 } else {
1787 TRACE("%s ", curOpcode->name);
1788 if (curOpcode->num_params > 0) {
1789 pshader_program_dump_ps_param(*pToken, 0);
1790 ++pToken;
1791 ++len;
1792 for (i = 1; i < curOpcode->num_params; ++i) {
1793 TRACE(", ");
1794 pshader_program_dump_ps_param(*pToken, 1);
1795 ++pToken;
1796 ++len;
1800 TRACE("\n");
1803 This->functionLength = (len + 1) * sizeof(DWORD);
1804 } else {
1805 This->functionLength = 1; /* no Function defined use fixed function vertex processing */
1808 /* Generate HW shader in needed */
1809 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1810 TRACE("(%p) : Generating hardware program\n", This);
1811 #if 1
1812 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1813 #endif
1816 TRACE("(%p) : Copying the function\n", This);
1817 /* copy the function ... because it will certainly be released by application */
1818 if (NULL != pFunction) {
1819 This->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->functionLength);
1820 memcpy((void *)This->function, pFunction, This->functionLength);
1821 } else {
1822 This->function = NULL;
1825 /* TODO: Some proper return values for failures */
1826 TRACE("(%p) : Returning D3D_OK\n", This);
1827 return D3D_OK;
1830 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1832 /*** IUnknown methods ***/
1833 IWineD3DPixelShaderImpl_QueryInterface,
1834 IWineD3DPixelShaderImpl_AddRef,
1835 IWineD3DPixelShaderImpl_Release,
1836 /*** IWineD3DPixelShader methods ***/
1837 IWineD3DPixelShaderImpl_GetParent,
1838 IWineD3DPixelShaderImpl_GetDevice,
1839 IWineD3DPixelShaderImpl_GetFunction,
1840 /* not part of d3d */
1841 IWineD3DPixelShaderImpl_SetFunction