Implement a pixel shader parser and cross compiler. All version of
[wine/wine64.git] / dlls / wined3d / pixelshader.c
blob87b4f4426fdc0c34616d725055df2d5b6d3a5642
1 /*
2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "config.h"
23 #include <math.h>
24 #include <stdio.h>
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
35 #else
36 # define PSTRACE(A)
37 # define TRACE_VSVECTOR(name)
38 #endif
40 /* The maximum size of the program */
41 #define PGMSIZE 65535
43 #define REGMASK 0x00001FFF
44 typedef void (*shader_fct_t)();
46 typedef struct SHADER_OPCODE {
47 unsigned int opcode;
48 const char* name;
49 const char* glname;
50 CONST UINT num_params;
51 shader_fct_t soft_fct;
52 DWORD min_version;
53 DWORD max_version;
54 } SHADER_OPCODE;
56 #define GLNAME_REQUIRE_GLSL ((const char *)1)
57 /* *******************************************
58 IWineD3DPixelShader IUnknown parts follow
59 ******************************************* */
60 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
62 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
63 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
64 if (IsEqualGUID(riid, &IID_IUnknown)
65 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
66 IUnknown_AddRef(iface);
67 *ppobj = This;
68 return D3D_OK;
70 return E_NOINTERFACE;
73 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
74 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
75 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
76 return InterlockedIncrement(&This->ref);
79 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
80 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
81 ULONG ref;
82 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
83 ref = InterlockedDecrement(&This->ref);
84 if (ref == 0) {
85 HeapFree(GetProcessHeap(), 0, This);
87 return ref;
90 /* TODO: At the momeny the function parser is single pass, it achievs this
91 by passing constants to a couple of functions where they are then modified.
92 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
93 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
96 /* *******************************************
97 IWineD3DPixelShader IWineD3DPixelShader parts follow
98 ******************************************* */
100 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
101 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
103 *parent= (IUnknown*) parent;
104 IUnknown_AddRef(*parent);
105 TRACE("(%p) : returning %p\n", This, *parent);
106 return D3D_OK;
109 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
110 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
111 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
112 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
113 TRACE("(%p) returning %p\n", This, *pDevice);
114 return D3D_OK;
118 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
119 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
120 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
122 if (NULL == pData) {
123 *pSizeOfData = This->functionLength;
124 return D3D_OK;
126 if (*pSizeOfData < This->functionLength) {
127 *pSizeOfData = This->functionLength;
128 return D3DERR_MOREDATA;
130 if (NULL == This->function) { /* no function defined */
131 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
132 (*(DWORD **) pData) = NULL;
133 } else {
134 if (This->functionLength == 0) {
137 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
138 memcpy(pData, This->function, This->functionLength);
140 return D3D_OK;
143 /*******************************
144 * pshader functions software VM
147 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
148 d->x = s0->x + s1->x;
149 d->y = s0->y + s1->y;
150 d->z = s0->z + s1->z;
151 d->w = s0->w + s1->w;
152 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
153 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
156 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
157 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
158 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
159 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
162 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
163 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
164 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
165 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
168 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
169 d->x = 1.0f;
170 d->y = s0->y * s1->y;
171 d->z = s0->z;
172 d->w = s1->w;
173 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
174 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
177 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
178 union {
179 float f;
180 DWORD d;
181 } tmp;
183 tmp.f = floorf(s0->w);
184 d->x = powf(2.0f, tmp.f);
185 d->y = s0->w - tmp.f;
186 tmp.f = powf(2.0f, s0->w);
187 tmp.d &= 0xFFFFFF00U;
188 d->z = tmp.f;
189 d->w = 1.0f;
190 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
191 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
194 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
195 d->x = 1.0f;
196 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
197 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
198 d->w = 1.0f;
199 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
200 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
203 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
204 float tmp_f = fabsf(s0->w);
205 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
206 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
207 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
210 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
211 d->x = s0->x * s1->x + s2->x;
212 d->y = s0->y * s1->y + s2->y;
213 d->z = s0->z * s1->z + s2->z;
214 d->w = s0->w * s1->w + s2->w;
215 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
216 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
219 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
220 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
221 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
222 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
223 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
224 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
225 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
228 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
229 d->x = (s0->x < s1->x) ? s0->x : s1->x;
230 d->y = (s0->y < s1->y) ? s0->y : s1->y;
231 d->z = (s0->z < s1->z) ? s0->z : s1->z;
232 d->w = (s0->w < s1->w) ? s0->w : s1->w;
233 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
234 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
237 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
238 d->x = s0->x;
239 d->y = s0->y;
240 d->z = s0->z;
241 d->w = s0->w;
242 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
246 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
247 d->x = s0->x * s1->x;
248 d->y = s0->y * s1->y;
249 d->z = s0->z * s1->z;
250 d->w = s0->w * s1->w;
251 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
252 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
255 void pshader_nop(void) {
256 /* NOPPPP ahhh too easy ;) */
257 PSTRACE(("executing nop\n"));
260 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
261 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
262 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
263 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
266 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
267 float tmp_f = fabsf(s0->w);
268 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
269 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
270 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
273 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
274 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
275 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
276 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
277 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
278 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
279 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
282 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
283 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
284 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
285 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
286 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
287 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
288 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
291 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
292 d->x = s0->x - s1->x;
293 d->y = s0->y - s1->y;
294 d->z = s0->z - s1->z;
295 d->w = s0->w - s1->w;
296 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
297 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
301 * Version 1.1 specific
304 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
305 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
306 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
307 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
310 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
311 float tmp_f = fabsf(s0->w);
312 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
313 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
314 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
317 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
318 d->x = s0->x - floorf(s0->x);
319 d->y = s0->y - floorf(s0->y);
320 d->z = 0.0f;
321 d->w = 1.0f;
322 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
323 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
326 typedef FLOAT D3DMATRIX44[4][4];
327 typedef FLOAT D3DMATRIX43[4][3];
328 typedef FLOAT D3DMATRIX34[4][4];
329 typedef FLOAT D3DMATRIX33[4][3];
330 typedef FLOAT D3DMATRIX32[4][2];
332 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
334 * Buggy CODE: here only if cast not work for copy/paste
335 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
336 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
337 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
338 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
339 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
340 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
341 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
343 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
344 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
345 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
346 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
347 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
348 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
349 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
350 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
353 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
354 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
355 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
356 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
357 d->w = 1.0f;
358 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
359 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
360 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
361 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
364 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
365 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
366 d->y = mat[2][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
367 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
368 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
369 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
370 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
371 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
372 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
375 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
376 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[2][2] * s0->z;
377 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[2][2] * s0->z;
378 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
379 d->w = 1.0f;
380 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
381 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
382 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
383 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
386 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX32 mat) {
387 FIXME("check\n");
388 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
389 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
390 d->z = 0.0f;
391 d->w = 1.0f;
395 * Version 2.0 specific
397 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
398 d->x = s0->x * (s1->x - s2->x) + s2->x;
399 d->y = s0->y * (s1->y - s2->y) + s2->y;
400 d->z = s0->z * (s1->z - s2->z) + s2->z;
401 d->w = s0->w * (s1->w - s2->w) + s2->x;
404 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
405 d->x = s0->y * s1->z - s0->z * s1->y;
406 d->y = s0->z * s1->x - s0->x * s1->z;
407 d->z = s0->x * s1->y - s0->y * s1->x;
408 d->w = 0.9f; /* w is undefined, so set it to something safeish */
410 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
411 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
414 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
415 d->x = fabsf(s0->x);
416 d->y = fabsf(s0->y);
417 d->z = fabsf(s0->z);
418 d->w = fabsf(s0->w);
419 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
420 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
423 /* Stubs */
424 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
425 FIXME(" : Stub\n");
428 void pshader_texkill(WINED3DSHADERVECTOR* d) {
429 FIXME(" : Stub\n");
432 void pshader_tex(WINED3DSHADERVECTOR* d) {
433 FIXME(" : Stub\n");
435 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
436 FIXME(" : Stub\n");
439 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
440 FIXME(" : Stub\n");
443 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
444 FIXME(" : Stub\n");
447 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
448 FIXME(" : Stub\n");
451 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
452 FIXME(" : Stub\n");
455 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
456 FIXME(" : Stub\n");
459 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
460 FIXME(" : Stub\n");
463 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
464 FIXME(" : Stub\n");
467 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
468 FIXME(" : Stub\n");
471 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
472 FIXME(" : Stub\n");
475 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
476 FIXME(" : Stub\n");
479 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
480 FIXME(" : Stub\n");
483 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
484 FIXME(" : Stub\n");
487 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
488 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
489 FIXME(" : Stub\n");
492 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
493 FIXME(" : Stub\n");
496 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
497 FIXME(" : Stub\n");
500 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
501 FIXME(" : Stub\n");
504 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
505 FIXME(" : Stub\n");
508 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
509 FIXME(" : Stub\n");
512 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
513 FIXME(" : Stub\n");
516 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
517 FIXME(" : Stub\n");
520 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
521 FIXME(" : Stub\n");
524 void pshader_call(WINED3DSHADERVECTOR* d) {
525 FIXME(" : Stub\n");
528 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
529 FIXME(" : Stub\n");
532 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
533 FIXME(" : Stub\n");
536 void pshader_ret(WINED3DSHADERVECTOR* d) {
537 FIXME(" : Stub\n");
540 void pshader_endloop(WINED3DSHADERVECTOR* d) {
541 FIXME(" : Stub\n");
544 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
545 FIXME(" : Stub\n");
548 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
549 FIXME(" : Stub\n");
552 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
553 FIXME(" : Stub\n");
556 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
557 FIXME(" : Stub\n");
560 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
561 FIXME(" : Stub\n");
564 void pshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
565 FIXME(" : Stub\n");
568 void pshader_endrep(void) {
569 FIXME(" : Stub\n");
572 void pshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
573 FIXME(" : Stub\n");
576 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
577 FIXME(" : Stub\n");
580 void pshader_else(WINED3DSHADERVECTOR* d) {
581 FIXME(" : Stub\n");
584 void pshader_label(WINED3DSHADERVECTOR* d) {
585 FIXME(" : Stub\n");
588 void pshader_endif(WINED3DSHADERVECTOR* d) {
589 FIXME(" : Stub\n");
592 void pshader_break(WINED3DSHADERVECTOR* d) {
593 FIXME(" : Stub\n");
596 void pshader_breakc(WINED3DSHADERVECTOR* d) {
597 FIXME(" : Stub\n");
600 void pshader_mova(WINED3DSHADERVECTOR* d) {
601 FIXME(" : Stub\n");
604 void pshader_defb(WINED3DSHADERVECTOR* d) {
605 FIXME(" : Stub\n");
608 void pshader_defi(WINED3DSHADERVECTOR* d) {
609 FIXME(" : Stub\n");
612 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
613 FIXME(" : Stub\n");
616 void pshader_dsx(WINED3DSHADERVECTOR* d) {
617 FIXME(" : Stub\n");
620 void pshader_dsy(WINED3DSHADERVECTOR* d) {
621 FIXME(" : Stub\n");
624 void pshader_texldd(WINED3DSHADERVECTOR* d) {
625 FIXME(" : Stub\n");
628 void pshader_setp(WINED3DSHADERVECTOR* d) {
629 FIXME(" : Stub\n");
632 void pshader_texldl(WINED3DSHADERVECTOR* d) {
633 FIXME(" : Stub\n");
636 void pshader_breakp(WINED3DSHADERVECTOR* d) {
637 FIXME(" : Stub\n");
640 * log, exp, frc, m*x* seems to be macros ins ... to see
642 static CONST SHADER_OPCODE pshader_ins [] = {
643 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
644 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
645 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
646 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
647 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
648 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
649 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
650 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
651 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
652 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
653 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
654 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
655 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
656 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
657 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
658 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
659 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
660 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
661 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
662 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
663 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
664 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
665 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
666 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
667 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
668 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
671 /** FIXME: use direct access so add the others opcodes as stubs */
672 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
673 They can be easly managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
674 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
675 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
676 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
677 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
678 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
679 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
680 /* DCL is a specil operation */
681 {D3DSIO_DCL, "dcl", NULL, 1, pshader_dcl, 0, 0},
682 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
683 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
684 /* TODO: sng can possibly be performed as
685 RCP tmp, vec
686 MUL out, tmp, vec*/
687 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
688 /* TODO: xyz normalise can be performed is VS_ARB using one tempory register,
689 DP3 tmp , vec, vec;
690 RSQ tmp, tmp.x;
691 MUL vec.xyz, vec, tmp;
692 but I think this is better because it accounts for w properly.
693 DP3 tmp , vec, vec;
694 RSQ tmp, tmp.x;
695 MUL vec, vec, tmp;
698 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
699 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
700 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, pshader_rep, 0, 0},
701 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
702 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, pshader_if, 0, 0},
703 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
704 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, pshader_else, 0, 0},
705 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, pshader_endif, 0, 0},
706 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, pshader_break, 0, 0},
707 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
708 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
709 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
710 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
712 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
714 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
715 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
716 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
717 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
718 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
720 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
721 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
722 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
723 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
724 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
725 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
726 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
727 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
728 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
729 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
730 /* def is a special opperation */
731 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
732 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
733 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
734 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
735 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
736 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
737 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
738 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
739 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
740 /* TODO: dp2add can be made out of multiple instuctions */
741 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
742 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
743 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
744 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
745 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
746 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
747 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, pshader_breakp, 0, 0},
748 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
749 {0, NULL, NULL, 0, NULL, 0, 0}
753 inline static const SHADER_OPCODE* pshader_program_get_opcode(const DWORD code, const int version) {
754 DWORD i = 0;
755 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
756 /** TODO: use dichotomic search */
757 while (NULL != pshader_ins[i].name) {
758 if (((code & D3DSI_OPCODE_MASK) == pshader_ins[i].opcode) &&
759 (((hex_version >= pshader_ins[i].min_version) && (hex_version <= pshader_ins[i].max_version)) ||
760 ((pshader_ins[i].min_version == 0) && (pshader_ins[i].max_version == 0)))) {
761 return &pshader_ins[i];
763 ++i;
765 FIXME("Unsupported opcode %lx(%ld) masked %lx version %d\n", code, code, code & D3DSI_OPCODE_MASK, version);
766 return NULL;
769 inline static BOOL pshader_is_version_token(DWORD token) {
770 return 0xFFFF0000 == (token & 0xFFFF0000);
773 inline static BOOL pshader_is_comment_token(DWORD token) {
774 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
778 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
779 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
781 DWORD reg = param & REGMASK;
782 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
784 switch (regtype) {
785 case D3DSPR_TEMP:
786 sprintf(regstr, "R%lu", reg);
787 break;
788 case D3DSPR_INPUT:
789 if (reg==0) {
790 strcpy(regstr, "fragment.color.primary");
791 } else {
792 strcpy(regstr, "fragment.color.secondary");
794 break;
795 case D3DSPR_CONST:
796 if (constants[reg])
797 sprintf(regstr, "C%lu", reg);
798 else
799 sprintf(regstr, "program.env[%lu]", reg);
800 break;
801 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
802 sprintf(regstr,"T%lu", reg);
803 break;
804 case D3DSPR_RASTOUT:
805 sprintf(regstr, "%s", rastout_reg_names[reg]);
806 break;
807 case D3DSPR_ATTROUT:
808 sprintf(regstr, "oD[%lu]", reg);
809 break;
810 case D3DSPR_TEXCRDOUT:
811 sprintf(regstr, "oT[%lu]", reg);
812 break;
813 default:
814 FIXME("Unhandled register name Type(%ld)\n", regtype);
815 break;
819 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
820 *write_mask = 0;
821 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
822 strcat(write_mask, ".");
823 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
824 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
825 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
826 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
830 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
831 static const char swizzle_reg_chars[] = "rgba";
832 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
833 DWORD swizzle_x = swizzle & 0x03;
834 DWORD swizzle_y = (swizzle >> 2) & 0x03;
835 DWORD swizzle_z = (swizzle >> 4) & 0x03;
836 DWORD swizzle_w = (swizzle >> 6) & 0x03;
838 * swizzle bits fields:
839 * WWZZYYXX
841 *swzstring = 0;
842 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
843 if (swizzle_x == swizzle_y &&
844 swizzle_x == swizzle_z &&
845 swizzle_x == swizzle_w) {
846 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
847 } else {
848 sprintf(swzstring, ".%c%c%c%c",
849 swizzle_reg_chars[swizzle_x],
850 swizzle_reg_chars[swizzle_y],
851 swizzle_reg_chars[swizzle_z],
852 swizzle_reg_chars[swizzle_w]);
857 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
858 int lineLen = strlen(line);
859 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
860 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
861 return;
862 } else {
863 memcpy(pgm + *pgmLength, line, lineLen);
866 *pgmLength += lineLen;
867 ++lineNum;
868 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
871 static const char* shift_tab[] = {
872 "dummy", /* 0 (none) */
873 "coefmul.x", /* 1 (x2) */
874 "coefmul.y", /* 2 (x4) */
875 "coefmul.z", /* 3 (x8) */
876 "coefmul.w", /* 4 (x16) */
877 "dummy", /* 5 (x32) */
878 "dummy", /* 6 (x64) */
879 "dummy", /* 7 (x128) */
880 "dummy", /* 8 (d256) */
881 "dummy", /* 9 (d128) */
882 "dummy", /* 10 (d64) */
883 "dummy", /* 11 (d32) */
884 "coefdiv.w", /* 12 (d16) */
885 "coefdiv.z", /* 13 (d8) */
886 "coefdiv.y", /* 14 (d4) */
887 "coefdiv.x" /* 15 (d2) */
890 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
891 /* Generate a line that does the output modifier computation */
892 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
895 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
896 /* Generate a line that does the input modifier computation and return the input register to use */
897 static char regstr[256];
898 static char tmpline[256];
899 int insert_line;
901 /* Assume a new line will be added */
902 insert_line = 1;
904 /* Get register name */
905 get_register_name(instr, regstr, constants);
907 TRACE(" Register name %s\n", regstr);
908 switch (instr & D3DSP_SRCMOD_MASK) {
909 case D3DSPSM_NONE:
910 strcpy(outregstr, regstr);
911 insert_line = 0;
912 break;
913 case D3DSPSM_NEG:
914 sprintf(outregstr, "-%s", regstr);
915 insert_line = 0;
916 break;
917 case D3DSPSM_BIAS:
918 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
919 break;
920 case D3DSPSM_BIASNEG:
921 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
922 break;
923 case D3DSPSM_SIGN:
924 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
925 break;
926 case D3DSPSM_SIGNNEG:
927 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
928 break;
929 case D3DSPSM_COMP:
930 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
931 break;
932 case D3DSPSM_X2:
933 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
934 break;
935 case D3DSPSM_X2NEG:
936 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
937 break;
938 case D3DSPSM_DZ:
939 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
940 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
941 strcat(line, "\n"); /* Hack */
942 strcat(line, tmpline);
943 break;
944 case D3DSPSM_DW:
945 sprintf(line, "RCP T%c, %s;", 'A' + tmpreg, regstr);
946 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
947 strcat(line, "\n"); /* Hack */
948 strcat(line, tmpline);
949 break;
950 default:
951 strcpy(outregstr, regstr);
952 insert_line = 0;
955 if (insert_line) {
956 /* Substitute the register name */
957 sprintf(outregstr, "T%c", 'A' + tmpreg);
960 return insert_line;
962 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
963 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
964 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
965 const DWORD *pToken = pFunction;
966 const SHADER_OPCODE *curOpcode = NULL;
967 const DWORD *pInstr;
968 DWORD i;
969 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
970 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
971 char tmpLine[255];
972 DWORD nUseAddressRegister = 0;
973 #if 0 /* TODO: loop register (just another address register ) */
974 BOOL hasLoops = FALSE;
975 #endif
977 BOOL saturate; /* clamp to 0.0 -> 1.0*/
978 int row = 0; /* not sure, something to do with macros? */
979 DWORD tcw[2];
980 int version = 0; /* The version of the shader */
982 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
983 unsigned int pgmLength = 0;
985 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
986 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occure... */
987 if (This->device->fixupVertexBufferSize < PGMSIZE) {
988 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
989 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
990 This->fixupVertexBufferSize = PGMSIZE;
991 This->fixupVertexBuffer[0] = 0;
993 pgmStr = This->device->fixupVertexBuffer;
994 #else
995 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
996 #endif
999 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1000 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
1001 This->constants[i] = 0;
1003 if (NULL != pToken) {
1004 while (D3DPS_END() != *pToken) {
1005 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1006 if (version >= 2) {
1007 instructionSize = pToken & SIZEBITS >> 27;
1009 #endif
1010 if (pshader_is_version_token(*pToken)) { /** version */
1011 int numTemps;
1012 int numConstants;
1014 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1015 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1017 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1019 /* Each release of pixel shaders has had different numbers of temp registers */
1020 switch (version) {
1021 case 10:
1022 case 11:
1023 case 12:
1024 case 13:
1025 case 14: numTemps=12;
1026 numConstants=8;
1027 strcpy(tmpLine, "!!ARBfp1.0\n");
1028 break;
1029 case 20: numTemps=12;
1030 numConstants=8;
1031 strcpy(tmpLine, "!!ARBfp2.0\n");
1032 FIXME("No work done yet to support ps2.0 in hw\n");
1033 break;
1034 case 30: numTemps=32;
1035 numConstants=8;
1036 strcpy(tmpLine, "!!ARBfp3.0\n");
1037 FIXME("No work done yet to support ps3.0 in hw\n");
1038 break;
1039 default:
1040 numTemps=12;
1041 numConstants=8;
1042 strcpy(tmpLine, "!!ARBfp1.0\n");
1043 FIXME("Unrecognized pixel shader version!\n");
1045 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1047 /* TODO: find out how many registers are really needed */
1048 for(i = 0; i < 6; i++) {
1049 sprintf(tmpLine, "TEMP T%lu;\n", i);
1050 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1053 for(i = 0; i < 6; i++) {
1054 sprintf(tmpLine, "TEMP R%lu;\n", i);
1055 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1058 sprintf(tmpLine, "TEMP TMP;\n");
1059 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1060 sprintf(tmpLine, "TEMP TMP2;\n");
1061 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1062 sprintf(tmpLine, "TEMP TA;\n");
1063 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1064 sprintf(tmpLine, "TEMP TB;\n");
1065 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1066 sprintf(tmpLine, "TEMP TC;\n");
1067 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1069 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1070 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1071 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1072 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1073 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1074 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1076 for(i = 0; i < 4; i++) {
1077 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1078 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1081 ++pToken;
1082 continue;
1085 if (pshader_is_comment_token(*pToken)) { /** comment */
1086 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1087 ++pToken;
1088 FIXME("#%s\n", (char*)pToken);
1089 pToken += comment_len;
1090 continue;
1092 /* here */
1093 #if 0 /* Not sure what thease are here for, the're not required for vshaders */
1094 code = *pToken;
1095 #endif
1096 pInstr = pToken;
1097 curOpcode = pshader_program_get_opcode(*pToken, version);
1098 TRACE("Found opcode %s %s\n", curOpcode->name,curOpcode->glname);
1099 ++pToken;
1100 if (NULL == curOpcode) {
1101 /* unkown current opcode ... (shouldn't be any!) */
1102 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1103 FIXME("unrecognized opcode: %08lx\n", *pToken);
1104 ++pToken;
1106 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1107 /* if the token isn't supported by this cross compiler then skip it and it's parameters */
1108 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1109 pToken += curOpcode->num_params;
1110 } else {
1111 saturate = FALSE;
1113 /* Build opcode for GL vertex_program */
1114 switch (curOpcode->opcode) {
1115 case D3DSIO_NOP:
1116 case D3DSIO_PHASE:
1117 continue;
1118 case D3DSIO_MOV:
1119 /* Address registers must be loaded with the ARL instruction */
1120 if ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) == D3DSPR_ADDR) {
1121 if (((*pToken) & REGMASK) < nUseAddressRegister) {
1122 strcpy(tmpLine, "ARL");
1123 break;
1124 } else
1125 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This, ((*pToken) & REGMASK));
1127 /* fall through */
1128 case D3DSIO_CND:
1129 case D3DSIO_CMP:
1130 case D3DSIO_ADD:
1131 case D3DSIO_SUB:
1132 case D3DSIO_MAD:
1133 case D3DSIO_MUL:
1134 case D3DSIO_RCP:
1135 case D3DSIO_RSQ:
1136 case D3DSIO_DP3:
1137 case D3DSIO_DP4:
1138 case D3DSIO_MIN:
1139 case D3DSIO_MAX:
1140 case D3DSIO_SLT:
1141 case D3DSIO_SGE:
1142 case D3DSIO_LIT:
1143 case D3DSIO_DST:
1144 case D3DSIO_FRC:
1145 case D3DSIO_EXPP:
1146 case D3DSIO_LOGP:
1147 case D3DSIO_EXP:
1148 case D3DSIO_LOG:
1149 case D3DSIO_LRP:
1150 case D3DSIO_TEXKILL:
1151 TRACE("Appending glname %s to tmpLine \n", curOpcode->glname);
1152 strcpy(tmpLine, curOpcode->glname);
1153 break;
1154 case D3DSIO_DEF:
1156 DWORD reg = *pToken & REGMASK;
1157 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1158 *((const float *)(pToken + 1)),
1159 *((const float *)(pToken + 2)),
1160 *((const float *)(pToken + 3)),
1161 *((const float *)(pToken + 4)) );
1163 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1165 This->constants[reg] = 1;
1166 pToken += 5;
1167 continue;
1169 break;
1170 case D3DSIO_TEX:
1172 char tmp[20];
1173 get_write_mask(*pToken, tmp);
1174 if (version != 14) {
1175 DWORD reg = *pToken & REGMASK;
1176 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1177 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1178 ++pToken;
1179 } else {
1180 char reg[20];
1181 DWORD reg1 = *pToken & REGMASK;
1182 DWORD reg2 = *++pToken & REGMASK;
1183 if (gen_input_modifier_line(*pToken, 0, reg, tmpLine, This->constants)) {
1184 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1186 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg, reg2);
1187 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1188 ++pToken;
1190 continue;
1192 break;
1193 case D3DSIO_TEXCOORD:
1195 char tmp[20];
1196 get_write_mask(*pToken, tmp);
1197 if (version != 14) {
1198 DWORD reg = *pToken & REGMASK;
1199 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1200 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1201 ++pToken;
1202 } else {
1203 DWORD reg1 = *pToken & REGMASK;
1204 DWORD reg2 = *++pToken & REGMASK;
1205 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1206 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1207 ++pToken;
1209 continue;
1211 break;
1212 case D3DSIO_TEXM3x2PAD:
1214 DWORD reg = *pToken & REGMASK;
1215 char buf[50];
1216 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1217 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1219 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1220 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1221 ++pToken;
1222 continue;
1224 break;
1225 case D3DSIO_TEXM3x2TEX:
1227 DWORD reg = *pToken & REGMASK;
1228 char buf[50];
1229 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1230 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1232 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1233 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1234 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1235 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1236 ++pToken;
1237 continue;
1239 break;
1240 case D3DSIO_TEXREG2AR:
1242 DWORD reg1 = *pToken & REGMASK;
1243 DWORD reg2 = *++pToken & REGMASK;
1244 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1245 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1246 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1247 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1248 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1249 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1250 ++pToken;
1251 continue;
1253 break;
1254 case D3DSIO_TEXREG2GB:
1256 DWORD reg1 = *pToken & REGMASK;
1257 DWORD reg2 = *++pToken & REGMASK;
1258 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1259 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1260 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1261 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1262 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1263 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1264 ++pToken;
1265 continue;
1267 break;
1268 case D3DSIO_TEXBEM:
1270 DWORD reg1 = *pToken & REGMASK;
1271 DWORD reg2 = *++pToken & REGMASK;
1273 /* FIXME: Should apply the BUMPMAPENV matrix */
1274 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1275 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1276 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1277 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1278 ++pToken;
1279 continue;
1281 break;
1282 case D3DSIO_TEXM3x3PAD:
1284 DWORD reg = *pToken & REGMASK;
1285 char buf[50];
1286 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1287 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1289 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1290 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1291 tcw[row++] = reg;
1292 ++pToken;
1293 continue;
1295 break;
1296 case D3DSIO_TEXM3x3TEX:
1298 DWORD reg = *pToken & REGMASK;
1299 char buf[50];
1300 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1301 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1304 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1305 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1307 /* Cubemap textures will be more used than 3D ones. */
1308 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1309 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1310 row = 0;
1311 ++pToken;
1312 continue;
1314 case D3DSIO_TEXM3x3VSPEC:
1316 DWORD reg = *pToken & REGMASK;
1317 char buf[50];
1318 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1319 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1321 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1322 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1324 /* Construct the eye-ray vector from w coordinates */
1325 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1326 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1327 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1328 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1329 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1330 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1332 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1333 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1334 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1335 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1336 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1337 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1338 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1340 /* Cubemap textures will be more used than 3D ones. */
1341 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1342 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1343 row = 0;
1344 ++pToken;
1345 continue;
1347 break;
1348 case D3DSIO_TEXM3x3SPEC:
1350 DWORD reg = *pToken & REGMASK;
1351 DWORD reg3 = *(pToken + 2) & REGMASK;
1352 char buf[50];
1353 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1354 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1356 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1357 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1359 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1360 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1361 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1363 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1364 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1365 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1366 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1368 /* Cubemap textures will be more used than 3D ones. */
1369 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1370 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1371 row = 0;
1372 pToken += 3;
1373 continue;
1375 break;
1377 default:
1378 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1379 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1380 } else {
1381 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1383 pToken += curOpcode->num_params; /* maybe + 1 */
1384 continue;
1387 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1388 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1389 switch (mask) {
1390 case D3DSPDM_SATURATE: saturate = TRUE; break;
1391 #if 0 /* as yet unhandled modifiers */
1392 case D3DSPDM_CENTROID: centroid = TRUE; break;
1393 case D3DSPDM_PP: partialpresision = TRUE; break;
1394 case D3DSPDM_X2: X2 = TRUE; break;
1395 case D3DSPDM_X4: X4 = TRUE; break;
1396 case D3DSPDM_X8: X8 = TRUE; break;
1397 case D3DSPDM_D2: D2 = TRUE; break;
1398 case D3DSPDM_D4: D4 = TRUE; break;
1399 case D3DSPDM_D8: D8 = TRUE; break;
1400 #endif
1401 default:
1402 TRACE("_unhandled_modifier(0x%08lx)", mask);
1406 /* Generate input and output registers */
1407 if (curOpcode->num_params > 0) {
1408 char regs[5][50];
1409 char operands[4][100];
1410 char swzstring[20];
1411 int saturate = 0;
1412 char tmpOp[256];
1413 TRACE("(%p): Opcode has %d params\n", This, curOpcode->num_params);
1415 /* Generate lines that handle input modifier computation */
1416 for (i = 1; i < curOpcode->num_params; ++i) {
1417 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1418 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpLine, This->constants)) {
1419 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1423 /* Handle saturation only when no shift is present in the output modifier */
1424 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1425 saturate = 1;
1427 /* Handle output register */
1428 get_register_name(*pToken, tmpOp, This->constants);
1429 strcpy(operands[0], tmpOp);
1430 get_write_mask(*pToken, tmpOp);
1431 strcat(operands[0], tmpOp);
1433 /* This function works because of side effects from gen_input_modifier_line */
1434 /* Handle input registers */
1435 for (i = 1; i < curOpcode->num_params; ++i) {
1436 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1437 strcpy(operands[i], regs[i - 1]);
1438 get_input_register_swizzle(*(pToken + i), swzstring);
1439 strcat(operands[i], swzstring);
1442 switch(curOpcode->opcode) {
1443 case D3DSIO_CMP:
1444 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1445 break;
1446 case D3DSIO_CND:
1447 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1448 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1449 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1450 break;
1451 default:
1452 if (saturate)
1453 strcat(tmpLine, "_SAT");
1454 strcat(tmpLine, " ");
1455 strcat(tmpLine, operands[0]);
1456 for (i = 1; i < curOpcode->num_params; i++) {
1457 strcat(tmpLine, ", ");
1458 strcat(tmpLine, operands[i]);
1460 strcat(tmpLine,";\n");
1462 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1463 pToken += curOpcode->num_params;
1465 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1466 if (curOpcode->num_params > 0) {
1467 DWORD param = *(pInstr + 1);
1468 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1470 /* Generate a line that handle the output modifier computation */
1471 char regstr[100];
1472 char write_mask[20];
1473 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1474 get_register_name(param, regstr, This->constants);
1475 get_write_mask(param, write_mask);
1476 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1477 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1480 #endif
1483 /* TODO: What about result.depth? */
1484 strcpy(tmpLine, "MOV result.color, R0;\n");
1485 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1487 strcpy(tmpLine, "END\n");
1488 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1491 /* finally null terminate the pgmStr*/
1492 pgmStr[pgmLength] = 0;
1493 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1494 TRACE("(%p) : Generated program %s\n", This, pgmStr);
1495 /* Create the hw shader */
1497 /* TODO: change to resource.glObjectHandel or something like that */
1498 GL_EXTCALL(glGenProgramsARB(1, &This->prgId));
1500 TRACE("Creating a hw pixel shader, prg=%d\n", This->prgId);
1501 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->prgId));
1503 TRACE("Created hw pixel shader, prg=%d\n", This->prgId);
1504 /* Create the program and check for errors */
1505 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1506 if (glGetError() == GL_INVALID_OPERATION) {
1507 GLint errPos;
1508 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1509 FIXME("HW PixelShader Error at position: %d\n%s\n", errPos, glGetString(GL_PROGRAM_ERROR_STRING_ARB));
1510 This->prgId = -1;
1513 #if 1 /* if were using the data buffer of device then we don't need to free it */
1514 HeapFree(GetProcessHeap(), 0, pgmStr);
1515 #endif
1518 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1519 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1520 static const char swizzle_reg_chars[] = "rgba";
1522 /* the unknown mask is for bits not yet accounted for by any other mask... */
1523 #define UNKNOWN_MASK 0xC000
1525 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1526 #define EXTENDED_REG 0x1800
1528 DWORD reg = param & D3DSP_REGNUM_MASK;
1529 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1531 if (input) {
1532 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1533 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1534 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1535 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1536 TRACE("-");
1537 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1538 TRACE("1-");
1541 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1542 case D3DSPR_TEMP:
1543 TRACE("r%lu", reg);
1544 break;
1545 case D3DSPR_INPUT:
1546 TRACE("v%lu", reg);
1547 break;
1548 case D3DSPR_CONST:
1549 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1550 break;
1552 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1553 TRACE("t%lu", reg);
1554 break;
1555 case D3DSPR_RASTOUT:
1556 TRACE("%s", rastout_reg_names[reg]);
1557 break;
1558 case D3DSPR_ATTROUT:
1559 TRACE("oD%lu", reg);
1560 break;
1561 case D3DSPR_TEXCRDOUT:
1562 TRACE("oT%lu", reg);
1563 break;
1564 case D3DSPR_CONSTINT:
1565 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1566 break;
1567 case D3DSPR_CONSTBOOL:
1568 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1569 break;
1570 case D3DSPR_LABEL:
1571 TRACE("l%lu", reg);
1572 break;
1573 case D3DSPR_LOOP:
1574 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1575 break;
1576 default:
1577 break;
1580 if (!input) {
1581 /** operand output */
1583 * for better debugging traces it's done into opcode dump code
1584 * @see pshader_program_dump_opcode
1585 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1586 DWORD mask = param & D3DSP_DSTMOD_MASK;
1587 switch (mask) {
1588 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1589 default:
1590 TRACE("_unhandled_modifier(0x%08lx)", mask);
1593 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1594 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1595 if (shift > 0) {
1596 TRACE("_x%u", 1 << shift);
1600 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1601 TRACE(".");
1602 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1603 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1604 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1605 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1607 } else {
1608 /** operand input */
1609 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1610 DWORD swizzle_r = swizzle & 0x03;
1611 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1612 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1613 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1615 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1616 DWORD mask = param & D3DSP_SRCMOD_MASK;
1617 /*TRACE("_modifier(0x%08lx) ", mask);*/
1618 switch (mask) {
1619 case D3DSPSM_NONE: break;
1620 case D3DSPSM_NEG: break;
1621 case D3DSPSM_BIAS: TRACE("_bias"); break;
1622 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1623 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1624 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1625 case D3DSPSM_COMP: break;
1626 case D3DSPSM_X2: TRACE("_x2"); break;
1627 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1628 case D3DSPSM_DZ: TRACE("_dz"); break;
1629 case D3DSPSM_DW: TRACE("_dw"); break;
1630 default:
1631 TRACE("_unknown(0x%08lx)", mask);
1636 * swizzle bits fields:
1637 * RRGGBBAA
1639 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1640 if (swizzle_r == swizzle_g &&
1641 swizzle_r == swizzle_b &&
1642 swizzle_r == swizzle_a) {
1643 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1644 } else {
1645 TRACE(".%c%c%c%c",
1646 swizzle_reg_chars[swizzle_r],
1647 swizzle_reg_chars[swizzle_g],
1648 swizzle_reg_chars[swizzle_b],
1649 swizzle_reg_chars[swizzle_a]);
1655 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1656 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1657 const DWORD* pToken = pFunction;
1658 const SHADER_OPCODE *curOpcode = NULL;
1659 DWORD len = 0;
1660 DWORD i;
1661 int version = 0;
1662 TRACE("(%p) : Parsing programme\n", This);
1664 if (NULL != pToken) {
1665 while (D3DPS_END() != *pToken) {
1666 if (pshader_is_version_token(*pToken)) { /** version */
1667 version = *pToken & 0xFF;
1668 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1669 ++pToken;
1670 ++len;
1671 continue;
1673 if (pshader_is_comment_token(*pToken)) { /** comment */
1674 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1675 ++pToken;
1676 TRACE("//%s\n", (char*)pToken);
1677 pToken += comment_len;
1678 len += comment_len + 1;
1679 continue;
1681 if (!version) {
1682 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1684 curOpcode = pshader_program_get_opcode(*pToken, version);
1685 ++pToken;
1686 ++len;
1687 if (NULL == curOpcode) {
1689 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1690 while (*pToken & 0x80000000) {
1692 /* unkown current opcode ... */
1693 TRACE("unrecognized opcode: %08lx", *pToken);
1694 ++pToken;
1695 ++len;
1696 TRACE("\n");
1699 } else {
1700 if (curOpcode->opcode == D3DSIO_DCL) {
1701 TRACE("dcl_");
1702 switch(*pToken & 0xFFFF) {
1703 case D3DDECLUSAGE_POSITION:
1704 TRACE("%s%ld ", "position",(*pToken & 0xF0000) >> 16);
1705 break;
1706 case D3DDECLUSAGE_BLENDINDICES:
1707 TRACE("%s ", "blend");
1708 break;
1709 case D3DDECLUSAGE_BLENDWEIGHT:
1710 TRACE("%s ", "weight");
1711 break;
1712 case D3DDECLUSAGE_NORMAL:
1713 TRACE("%s%ld ", "normal",(*pToken & 0xF0000) >> 16);
1714 break;
1715 case D3DDECLUSAGE_PSIZE:
1716 TRACE("%s ", "psize");
1717 break;
1718 case D3DDECLUSAGE_COLOR:
1719 if((*pToken & 0xF0000) >> 16 == 0) {
1720 TRACE("%s ", "color");
1721 } else {
1722 TRACE("%s%ld ", "specular", ((*pToken & 0xF0000) >> 16) - 1);
1724 break;
1725 case D3DDECLUSAGE_TEXCOORD:
1726 TRACE("%s%ld ", "texture", (*pToken & 0xF0000) >> 16);
1727 break;
1728 case D3DDECLUSAGE_TANGENT:
1729 TRACE("%s ", "tangent");
1730 break;
1731 case D3DDECLUSAGE_BINORMAL:
1732 TRACE("%s ", "binormal");
1733 break;
1734 case D3DDECLUSAGE_TESSFACTOR:
1735 TRACE("%s ", "tessfactor");
1736 break;
1737 case D3DDECLUSAGE_POSITIONT:
1738 TRACE("%s%ld ", "positionT",(*pToken & 0xF0000) >> 16);
1739 break;
1740 case D3DDECLUSAGE_FOG:
1741 TRACE("%s ", "fog");
1742 break;
1743 case D3DDECLUSAGE_DEPTH:
1744 TRACE("%s ", "depth");
1745 break;
1746 case D3DDECLUSAGE_SAMPLE:
1747 TRACE("%s ", "sample");
1748 break;
1749 default:
1750 FIXME("Unrecognised dcl %08lx", *pToken & 0xFFFF);
1752 ++pToken;
1753 ++len;
1754 pshader_program_dump_ps_param(*pToken, 0);
1755 ++pToken;
1756 ++len;
1757 } else
1758 if (curOpcode->opcode == D3DSIO_DEF) {
1759 TRACE("def c%lu = ", *pToken & 0xFF);
1760 ++pToken;
1761 ++len;
1762 TRACE("%f ,", *(float *)pToken);
1763 ++pToken;
1764 ++len;
1765 TRACE("%f ,", *(float *)pToken);
1766 ++pToken;
1767 ++len;
1768 TRACE("%f ,", *(float *)pToken);
1769 ++pToken;
1770 ++len;
1771 TRACE("%f", *(float *)pToken);
1772 ++pToken;
1773 ++len;
1774 } else {
1775 TRACE("%s ", curOpcode->name);
1776 if (curOpcode->num_params > 0) {
1777 pshader_program_dump_ps_param(*pToken, 0);
1778 ++pToken;
1779 ++len;
1780 for (i = 1; i < curOpcode->num_params; ++i) {
1781 TRACE(", ");
1782 pshader_program_dump_ps_param(*pToken, 1);
1783 ++pToken;
1784 ++len;
1788 TRACE("\n");
1791 This->functionLength = (len + 1) * sizeof(DWORD);
1792 } else {
1793 This->functionLength = 1; /* no Function defined use fixed function vertex processing */
1796 /* Generate HW shader in needed */
1797 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1798 TRACE("(%p) : Generating hardware program\n", This);
1799 #if 1
1800 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1801 #endif
1804 TRACE("(%p) : Copying the function\n", This);
1805 /* copy the function ... because it will certainly be released by application */
1806 if (NULL != pFunction) {
1807 This->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->functionLength);
1808 memcpy((void *)This->function, pFunction, This->functionLength);
1809 } else {
1810 This->function = NULL;
1813 /* TODO: Some proper return values for failures */
1814 TRACE("(%p) : Returning D3D_OK\n", This);
1815 return D3D_OK;
1818 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1820 /*** IUnknown methods ***/
1821 IWineD3DPixelShaderImpl_QueryInterface,
1822 IWineD3DPixelShaderImpl_AddRef,
1823 IWineD3DPixelShaderImpl_Release,
1824 /*** IWineD3DPixelShader methods ***/
1825 IWineD3DPixelShaderImpl_GetParent,
1826 IWineD3DPixelShaderImpl_GetDevice,
1827 IWineD3DPixelShaderImpl_GetFunction,
1828 /* not part of d3d */
1829 IWineD3DPixelShaderImpl_SetFunction