wined3d: Use correct register number mask.
[wine/multimedia.git] / dlls / wined3d / pixelshader.c
blobef6aa351d14549c94239b24fd9d18f2642689ef1
1 /*
2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "config.h"
23 #include <math.h>
24 #include <stdio.h>
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
35 #else
36 # define PSTRACE(A)
37 # define TRACE_VSVECTOR(name)
38 #endif
40 /* The maximum size of the program */
41 #define PGMSIZE 65535
43 #define GLNAME_REQUIRE_GLSL ((const char *)1)
44 /* *******************************************
45 IWineD3DPixelShader IUnknown parts follow
46 ******************************************* */
47 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
49 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
50 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
51 if (IsEqualGUID(riid, &IID_IUnknown)
52 || IsEqualGUID(riid, &IID_IWineD3DBase)
53 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
54 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
55 IUnknown_AddRef(iface);
56 *ppobj = This;
57 return WINED3D_OK;
59 return E_NOINTERFACE;
62 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
63 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
64 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
65 return InterlockedIncrement(&This->ref);
68 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
69 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
70 ULONG ref;
71 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
72 ref = InterlockedDecrement(&This->ref);
73 if (ref == 0) {
74 HeapFree(GetProcessHeap(), 0, This);
76 return ref;
79 /* TODO: At the momeny the function parser is single pass, it achievs this
80 by passing constants to a couple of functions where they are then modified.
81 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
82 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
85 /* *******************************************
86 IWineD3DPixelShader IWineD3DPixelShader parts follow
87 ******************************************* */
89 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
90 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
92 *parent = This->parent;
93 IUnknown_AddRef(*parent);
94 TRACE("(%p) : returning %p\n", This, *parent);
95 return WINED3D_OK;
98 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
99 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
100 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
101 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
102 TRACE("(%p) returning %p\n", This, *pDevice);
103 return WINED3D_OK;
107 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
108 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
109 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
111 if (NULL == pData) {
112 *pSizeOfData = This->baseShader.functionLength;
113 return WINED3D_OK;
115 if (*pSizeOfData < This->baseShader.functionLength) {
116 *pSizeOfData = This->baseShader.functionLength;
117 return WINED3DERR_MOREDATA;
119 if (NULL == This->baseShader.function) { /* no function defined */
120 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
121 (*(DWORD **) pData) = NULL;
122 } else {
123 if (This->baseShader.functionLength == 0) {
126 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
127 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
129 return WINED3D_OK;
132 /*******************************
133 * pshader functions software VM
136 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
137 d->x = s0->x + s1->x;
138 d->y = s0->y + s1->y;
139 d->z = s0->z + s1->z;
140 d->w = s0->w + s1->w;
141 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
142 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
145 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
146 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
147 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
148 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
151 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
152 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
153 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
157 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
158 d->x = 1.0f;
159 d->y = s0->y * s1->y;
160 d->z = s0->z;
161 d->w = s1->w;
162 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
163 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
166 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
167 union {
168 float f;
169 DWORD d;
170 } tmp;
172 tmp.f = floorf(s0->w);
173 d->x = powf(2.0f, tmp.f);
174 d->y = s0->w - tmp.f;
175 tmp.f = powf(2.0f, s0->w);
176 tmp.d &= 0xFFFFFF00U;
177 d->z = tmp.f;
178 d->w = 1.0f;
179 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
180 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
183 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
184 d->x = 1.0f;
185 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
186 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
187 d->w = 1.0f;
188 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
189 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
192 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
193 float tmp_f = fabsf(s0->w);
194 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
195 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
196 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
199 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
200 d->x = s0->x * s1->x + s2->x;
201 d->y = s0->y * s1->y + s2->y;
202 d->z = s0->z * s1->z + s2->z;
203 d->w = s0->w * s1->w + s2->w;
204 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
205 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
208 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
209 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
210 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
211 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
212 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
213 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
214 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
217 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
218 d->x = (s0->x < s1->x) ? s0->x : s1->x;
219 d->y = (s0->y < s1->y) ? s0->y : s1->y;
220 d->z = (s0->z < s1->z) ? s0->z : s1->z;
221 d->w = (s0->w < s1->w) ? s0->w : s1->w;
222 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
223 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
226 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
227 d->x = s0->x;
228 d->y = s0->y;
229 d->z = s0->z;
230 d->w = s0->w;
231 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
232 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
235 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
236 d->x = s0->x * s1->x;
237 d->y = s0->y * s1->y;
238 d->z = s0->z * s1->z;
239 d->w = s0->w * s1->w;
240 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
241 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
244 void pshader_nop(void) {
245 /* NOPPPP ahhh too easy ;) */
246 PSTRACE(("executing nop\n"));
249 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
250 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
251 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
252 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
255 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
256 float tmp_f = fabsf(s0->w);
257 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
258 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
259 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
262 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
263 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
264 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
265 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
266 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
267 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
268 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
271 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
272 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
273 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
274 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
275 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
276 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
277 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
280 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
281 d->x = s0->x - s1->x;
282 d->y = s0->y - s1->y;
283 d->z = s0->z - s1->z;
284 d->w = s0->w - s1->w;
285 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
286 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
290 * Version 1.1 specific
293 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
294 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
295 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
296 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
299 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
300 float tmp_f = fabsf(s0->w);
301 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
302 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
303 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
306 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
307 d->x = s0->x - floorf(s0->x);
308 d->y = s0->y - floorf(s0->y);
309 d->z = 0.0f;
310 d->w = 1.0f;
311 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
312 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
315 typedef FLOAT D3DMATRIX44[4][4];
316 typedef FLOAT D3DMATRIX43[4][3];
317 typedef FLOAT D3DMATRIX34[3][4];
318 typedef FLOAT D3DMATRIX33[3][3];
319 typedef FLOAT D3DMATRIX23[2][3];
321 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
323 * Buggy CODE: here only if cast not work for copy/paste
324 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
325 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
326 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
327 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
328 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
329 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
330 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
332 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
333 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
334 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
335 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
336 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
337 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
338 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
339 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
342 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
343 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
344 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
345 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
346 d->w = 1.0f;
347 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
348 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
349 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
350 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
353 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
354 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
355 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
356 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
357 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
358 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
359 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
360 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
361 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
364 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
365 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
366 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
367 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
368 d->w = 1.0f;
369 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
370 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
371 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
372 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
375 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
376 FIXME("check\n");
377 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
378 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
379 d->z = 0.0f;
380 d->w = 1.0f;
384 * Version 2.0 specific
386 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
387 d->x = s0->x * (s1->x - s2->x) + s2->x;
388 d->y = s0->y * (s1->y - s2->y) + s2->y;
389 d->z = s0->z * (s1->z - s2->z) + s2->z;
390 d->w = s0->w * (s1->w - s2->w) + s2->w;
393 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
394 d->x = s0->y * s1->z - s0->z * s1->y;
395 d->y = s0->z * s1->x - s0->x * s1->z;
396 d->z = s0->x * s1->y - s0->y * s1->x;
397 d->w = 0.9f; /* w is undefined, so set it to something safeish */
399 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
400 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
403 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
404 d->x = fabsf(s0->x);
405 d->y = fabsf(s0->y);
406 d->z = fabsf(s0->z);
407 d->w = fabsf(s0->w);
408 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
409 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
412 /* Stubs */
413 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
414 FIXME(" : Stub\n");
417 void pshader_texkill(WINED3DSHADERVECTOR* d) {
418 FIXME(" : Stub\n");
421 void pshader_tex(WINED3DSHADERVECTOR* d) {
422 FIXME(" : Stub\n");
424 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
425 FIXME(" : Stub\n");
428 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
429 FIXME(" : Stub\n");
432 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
433 FIXME(" : Stub\n");
436 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
437 FIXME(" : Stub\n");
440 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
441 FIXME(" : Stub\n");
444 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
445 FIXME(" : Stub\n");
448 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
449 FIXME(" : Stub\n");
452 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
453 FIXME(" : Stub\n");
456 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
457 FIXME(" : Stub\n");
460 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
461 FIXME(" : Stub\n");
464 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
465 FIXME(" : Stub\n");
468 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
469 FIXME(" : Stub\n");
472 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
473 FIXME(" : Stub\n");
476 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
477 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
478 FIXME(" : Stub\n");
481 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
482 FIXME(" : Stub\n");
485 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
486 FIXME(" : Stub\n");
489 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
490 FIXME(" : Stub\n");
493 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
494 FIXME(" : Stub\n");
497 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
498 FIXME(" : Stub\n");
501 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
502 FIXME(" : Stub\n");
505 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
506 FIXME(" : Stub\n");
509 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
510 FIXME(" : Stub\n");
513 void pshader_call(WINED3DSHADERVECTOR* d) {
514 FIXME(" : Stub\n");
517 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
518 FIXME(" : Stub\n");
521 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
522 FIXME(" : Stub\n");
525 void pshader_ret(void) {
526 FIXME(" : Stub\n");
529 void pshader_endloop(void) {
530 FIXME(" : Stub\n");
533 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
534 FIXME(" : Stub\n");
537 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
538 FIXME(" : Stub\n");
541 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
542 FIXME(" : Stub\n");
545 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
546 FIXME(" : Stub\n");
549 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
550 FIXME(" : Stub\n");
553 void pshader_rep(WINED3DSHADERVECTOR* d) {
554 FIXME(" : Stub\n");
557 void pshader_endrep(void) {
558 FIXME(" : Stub\n");
561 void pshader_if(WINED3DSHADERVECTOR* d) {
562 FIXME(" : Stub\n");
565 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
566 FIXME(" : Stub\n");
569 void pshader_else(void) {
570 FIXME(" : Stub\n");
573 void pshader_label(WINED3DSHADERVECTOR* d) {
574 FIXME(" : Stub\n");
577 void pshader_endif(void) {
578 FIXME(" : Stub\n");
581 void pshader_break(void) {
582 FIXME(" : Stub\n");
585 void pshader_breakc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
586 FIXME(" : Stub\n");
589 void pshader_breakp(WINED3DSHADERVECTOR* d) {
590 FIXME(" : Stub\n");
593 void pshader_mova(WINED3DSHADERVECTOR* d) {
594 FIXME(" : Stub\n");
597 void pshader_defb(WINED3DSHADERVECTOR* d) {
598 FIXME(" : Stub\n");
601 void pshader_defi(WINED3DSHADERVECTOR* d) {
602 FIXME(" : Stub\n");
605 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
606 FIXME(" : Stub\n");
609 void pshader_dsx(WINED3DSHADERVECTOR* d) {
610 FIXME(" : Stub\n");
613 void pshader_dsy(WINED3DSHADERVECTOR* d) {
614 FIXME(" : Stub\n");
617 void pshader_texldd(WINED3DSHADERVECTOR* d) {
618 FIXME(" : Stub\n");
621 void pshader_setp(WINED3DSHADERVECTOR* d) {
622 FIXME(" : Stub\n");
625 void pshader_texldl(WINED3DSHADERVECTOR* d) {
626 FIXME(" : Stub\n");
630 * log, exp, frc, m*x* seems to be macros ins ... to see
632 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = {
633 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
634 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
635 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
636 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
637 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
638 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
639 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
640 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
641 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
642 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
643 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
644 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
645 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
646 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
647 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
648 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
649 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
650 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
651 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
652 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
653 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
654 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
655 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
656 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
657 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
658 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
661 /** FIXME: use direct access so add the others opcodes as stubs */
662 /* DCL is a specil operation */
663 {D3DSIO_DCL, "dcl", NULL, 2, pshader_dcl, 0, 0},
664 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
665 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
666 /* TODO: sng can possibly be performed as
667 RCP tmp, vec
668 MUL out, tmp, vec*/
669 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
670 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
671 DP3 tmp , vec, vec;
672 RSQ tmp, tmp.x;
673 MUL vec.xyz, vec, tmp;
674 but I think this is better because it accounts for w properly.
675 DP3 tmp , vec, vec;
676 RSQ tmp, tmp.x;
677 MUL vec, vec, tmp;
680 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
681 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
683 /* Flow control - requires GLSL or software shaders */
684 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 1, pshader_rep, 0, 0},
685 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
686 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 1, pshader_if, 0, 0},
687 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
688 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 0, pshader_else, 0, 0},
689 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 0, pshader_endif, 0, 0},
690 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 0, pshader_break, 0, 0},
691 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
692 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 1, pshader_breakp, 0, 0},
693 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
694 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
695 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
696 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
697 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
698 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
700 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
701 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
702 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
704 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
705 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
706 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
707 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
708 {D3DSIO_TEX, "texld", "undefined", 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
709 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 3, pshader_texld, D3DPS_VERSION(2,0), -1},
710 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
711 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
712 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
715 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
716 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
717 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
718 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
719 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
720 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
721 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
722 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
723 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
724 /* def is a special operation */
725 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
726 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
729 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
730 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
731 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
732 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
733 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
734 /* TODO: dp2add can be made out of multiple instuctions */
735 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
736 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
737 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
738 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
739 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
740 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
741 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
742 {0, NULL, NULL, 0, NULL, 0, 0}
746 inline static const SHADER_OPCODE* pshader_program_get_opcode(IWineD3DPixelShaderImpl *This, const DWORD code) {
747 DWORD i = 0;
748 DWORD version = This->baseShader.version;
749 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
750 const SHADER_OPCODE *shader_ins = This->baseShader.shader_ins;
752 /** TODO: use dichotomic search */
753 while (NULL != shader_ins[i].name) {
754 if (((code & D3DSI_OPCODE_MASK) == shader_ins[i].opcode) &&
755 (((hex_version >= shader_ins[i].min_version) && (hex_version <= shader_ins[i].max_version)) ||
756 ((shader_ins[i].min_version == 0) && (shader_ins[i].max_version == 0)))) {
757 return &shader_ins[i];
759 ++i;
761 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code, code, code & D3DSI_OPCODE_MASK, version);
762 return NULL;
765 inline static BOOL pshader_is_version_token(DWORD token) {
766 return 0xFFFF0000 == (token & 0xFFFF0000);
769 inline static BOOL pshader_is_comment_token(DWORD token) {
770 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
774 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
775 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
777 DWORD reg = param & D3DSP_REGNUM_MASK;
778 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
780 switch (regtype) {
781 case D3DSPR_TEMP:
782 sprintf(regstr, "R%lu", reg);
783 break;
784 case D3DSPR_INPUT:
785 if (reg==0) {
786 strcpy(regstr, "fragment.color.primary");
787 } else {
788 strcpy(regstr, "fragment.color.secondary");
790 break;
791 case D3DSPR_CONST:
792 if (constants[reg])
793 sprintf(regstr, "C%lu", reg);
794 else
795 sprintf(regstr, "program.env[%lu]", reg);
796 break;
797 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
798 sprintf(regstr,"T%lu", reg);
799 break;
800 case D3DSPR_RASTOUT:
801 sprintf(regstr, "%s", rastout_reg_names[reg]);
802 break;
803 case D3DSPR_ATTROUT:
804 sprintf(regstr, "oD[%lu]", reg);
805 break;
806 case D3DSPR_TEXCRDOUT:
807 sprintf(regstr, "oT[%lu]", reg);
808 break;
809 default:
810 FIXME("Unhandled register name Type(%ld)\n", regtype);
811 break;
815 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
816 *write_mask = 0;
817 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
818 strcat(write_mask, ".");
819 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
820 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
821 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
822 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
826 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
827 static const char swizzle_reg_chars[] = "rgba";
828 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
829 DWORD swizzle_x = swizzle & 0x03;
830 DWORD swizzle_y = (swizzle >> 2) & 0x03;
831 DWORD swizzle_z = (swizzle >> 4) & 0x03;
832 DWORD swizzle_w = (swizzle >> 6) & 0x03;
834 * swizzle bits fields:
835 * WWZZYYXX
837 *swzstring = 0;
838 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
839 if (swizzle_x == swizzle_y &&
840 swizzle_x == swizzle_z &&
841 swizzle_x == swizzle_w) {
842 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
843 } else {
844 sprintf(swzstring, ".%c%c%c%c",
845 swizzle_reg_chars[swizzle_x],
846 swizzle_reg_chars[swizzle_y],
847 swizzle_reg_chars[swizzle_z],
848 swizzle_reg_chars[swizzle_w]);
853 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
854 int lineLen = strlen(line);
855 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
856 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
857 return;
858 } else {
859 memcpy(pgm + *pgmLength, line, lineLen);
862 *pgmLength += lineLen;
863 ++(*lineNum);
864 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
867 static const char* shift_tab[] = {
868 "dummy", /* 0 (none) */
869 "coefmul.x", /* 1 (x2) */
870 "coefmul.y", /* 2 (x4) */
871 "coefmul.z", /* 3 (x8) */
872 "coefmul.w", /* 4 (x16) */
873 "dummy", /* 5 (x32) */
874 "dummy", /* 6 (x64) */
875 "dummy", /* 7 (x128) */
876 "dummy", /* 8 (d256) */
877 "dummy", /* 9 (d128) */
878 "dummy", /* 10 (d64) */
879 "dummy", /* 11 (d32) */
880 "coefdiv.w", /* 12 (d16) */
881 "coefdiv.z", /* 13 (d8) */
882 "coefdiv.y", /* 14 (d4) */
883 "coefdiv.x" /* 15 (d2) */
886 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
887 /* Generate a line that does the output modifier computation */
888 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
891 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
892 /* Generate a line that does the input modifier computation and return the input register to use */
893 static char regstr[256];
894 static char tmpline[256];
895 int insert_line;
897 /* Assume a new line will be added */
898 insert_line = 1;
900 /* Get register name */
901 get_register_name(instr, regstr, constants);
903 TRACE(" Register name %s\n", regstr);
904 switch (instr & D3DSP_SRCMOD_MASK) {
905 case D3DSPSM_NONE:
906 strcpy(outregstr, regstr);
907 insert_line = 0;
908 break;
909 case D3DSPSM_NEG:
910 sprintf(outregstr, "-%s", regstr);
911 insert_line = 0;
912 break;
913 case D3DSPSM_BIAS:
914 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
915 break;
916 case D3DSPSM_BIASNEG:
917 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
918 break;
919 case D3DSPSM_SIGN:
920 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
921 break;
922 case D3DSPSM_SIGNNEG:
923 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
924 break;
925 case D3DSPSM_COMP:
926 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
927 break;
928 case D3DSPSM_X2:
929 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
930 break;
931 case D3DSPSM_X2NEG:
932 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
933 break;
934 case D3DSPSM_DZ:
935 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
936 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
937 strcat(line, "\n"); /* Hack */
938 strcat(line, tmpline);
939 break;
940 case D3DSPSM_DW:
941 sprintf(line, "RCP T%c, %s.w;", 'A' + tmpreg, regstr);
942 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
943 strcat(line, "\n"); /* Hack */
944 strcat(line, tmpline);
945 break;
946 default:
947 strcpy(outregstr, regstr);
948 insert_line = 0;
951 if (insert_line) {
952 /* Substitute the register name */
953 sprintf(outregstr, "T%c", 'A' + tmpreg);
956 return insert_line;
958 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
959 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
960 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
961 const DWORD *pToken = pFunction;
962 const SHADER_OPCODE *curOpcode = NULL;
963 const DWORD *pInstr;
964 DWORD i;
965 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
966 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
967 char tmpLine[255];
968 #if 0 /* TODO: loop register (just another address register ) */
969 BOOL hasLoops = FALSE;
970 #endif
972 BOOL saturate; /* clamp to 0.0 -> 1.0*/
973 int row = 0; /* not sure, something to do with macros? */
974 DWORD tcw[2];
975 int version = 0; /* The version of the shader */
977 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
978 unsigned int pgmLength = 0;
980 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
981 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
982 if (This->device->fixupVertexBufferSize < PGMSIZE) {
983 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
984 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
985 This->fixupVertexBufferSize = PGMSIZE;
986 This->fixupVertexBuffer[0] = 0;
988 pgmStr = This->device->fixupVertexBuffer;
989 #else
990 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
991 #endif
994 /* TODO: Think about using a first pass to work out what's required for the second pass. */
995 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
996 This->constants[i] = 0;
998 if (NULL != pToken) {
999 while (D3DPS_END() != *pToken) {
1000 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1001 if (version >= 2) {
1002 instructionSize = pToken & SIZEBITS >> 27;
1004 #endif
1005 if (pshader_is_version_token(*pToken)) { /** version */
1006 int numTemps;
1007 int numConstants;
1009 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1010 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1012 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1014 /* Each release of pixel shaders has had different numbers of temp registers */
1015 switch (version) {
1016 case 10:
1017 case 11:
1018 case 12:
1019 case 13:
1020 case 14: numTemps=12;
1021 numConstants=8;
1022 strcpy(tmpLine, "!!ARBfp1.0\n");
1023 break;
1024 case 20: numTemps=12;
1025 numConstants=8;
1026 strcpy(tmpLine, "!!ARBfp2.0\n");
1027 FIXME("No work done yet to support ps2.0 in hw\n");
1028 break;
1029 case 30: numTemps=32;
1030 numConstants=8;
1031 strcpy(tmpLine, "!!ARBfp3.0\n");
1032 FIXME("No work done yet to support ps3.0 in hw\n");
1033 break;
1034 default:
1035 numTemps=12;
1036 numConstants=8;
1037 strcpy(tmpLine, "!!ARBfp1.0\n");
1038 FIXME("Unrecognized pixel shader version!\n");
1040 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1042 /* TODO: find out how many registers are really needed */
1043 for(i = 0; i < 6; i++) {
1044 sprintf(tmpLine, "TEMP T%lu;\n", i);
1045 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1048 for(i = 0; i < 6; i++) {
1049 sprintf(tmpLine, "TEMP R%lu;\n", i);
1050 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1053 sprintf(tmpLine, "TEMP TMP;\n");
1054 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1055 sprintf(tmpLine, "TEMP TMP2;\n");
1056 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1057 sprintf(tmpLine, "TEMP TA;\n");
1058 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1059 sprintf(tmpLine, "TEMP TB;\n");
1060 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1061 sprintf(tmpLine, "TEMP TC;\n");
1062 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1064 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1065 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1066 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1067 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1068 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1069 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1071 for(i = 0; i < 4; i++) {
1072 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1073 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1076 ++pToken;
1077 continue;
1080 if (pshader_is_comment_token(*pToken)) { /** comment */
1081 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1082 ++pToken;
1083 TRACE("#%s\n", (char*)pToken);
1084 pToken += comment_len;
1085 continue;
1087 /* here */
1088 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1089 code = *pToken;
1090 #endif
1091 pInstr = pToken;
1092 curOpcode = pshader_program_get_opcode(This, *pToken);
1093 ++pToken;
1094 if (NULL == curOpcode) {
1095 /* unknown current opcode ... (shouldn't be any!) */
1096 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1097 FIXME("unrecognized opcode: %08lx\n", *pToken);
1098 ++pToken;
1100 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1101 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1102 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1103 pToken += curOpcode->num_params;
1105 } else if (D3DSIO_DEF == curOpcode->opcode) {
1107 /* Handle definitions here, they don't fit well with the
1108 * other instructions below [for now ] */
1110 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1112 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1113 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1115 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1116 *((const float *)(pToken + 1)),
1117 *((const float *)(pToken + 2)),
1118 *((const float *)(pToken + 3)),
1119 *((const float *)(pToken + 4)) );
1121 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1123 This->constants[reg] = 1;
1124 pToken += 5;
1125 continue;
1127 } else {
1129 /* Common processing: [inst] [dst] [src]* */
1132 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1133 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1135 saturate = FALSE;
1137 /* Build opcode for GL vertex_program */
1138 switch (curOpcode->opcode) {
1139 case D3DSIO_NOP:
1140 case D3DSIO_PHASE:
1141 continue;
1142 case D3DSIO_MOV:
1143 case D3DSIO_CND:
1144 case D3DSIO_CMP:
1145 case D3DSIO_ADD:
1146 case D3DSIO_SUB:
1147 case D3DSIO_MAD:
1148 case D3DSIO_MUL:
1149 case D3DSIO_RCP:
1150 case D3DSIO_RSQ:
1151 case D3DSIO_DP3:
1152 case D3DSIO_DP4:
1153 case D3DSIO_MIN:
1154 case D3DSIO_MAX:
1155 case D3DSIO_SLT:
1156 case D3DSIO_SGE:
1157 case D3DSIO_LIT:
1158 case D3DSIO_DST:
1159 case D3DSIO_FRC:
1160 case D3DSIO_EXPP:
1161 case D3DSIO_LOGP:
1162 case D3DSIO_EXP:
1163 case D3DSIO_LOG:
1164 case D3DSIO_LRP:
1165 case D3DSIO_TEXKILL:
1166 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1167 strcpy(tmpLine, curOpcode->glname);
1168 break;
1169 case D3DSIO_TEX:
1171 char tmp[20];
1172 get_write_mask(*pToken, tmp);
1173 if (version != 14) {
1174 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1175 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1176 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1177 ++pToken;
1178 } else {
1179 char reg2[20];
1180 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1181 if (gen_input_modifier_line(*++pToken, 0, reg2, tmpLine, This->constants)) {
1182 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1184 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg2, reg1);
1185 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1186 ++pToken;
1188 continue;
1190 break;
1191 case D3DSIO_TEXCOORD:
1193 char tmp[20];
1194 get_write_mask(*pToken, tmp);
1195 if (version != 14) {
1196 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1197 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1198 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1199 ++pToken;
1200 } else {
1201 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1202 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1203 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1204 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1205 ++pToken;
1207 continue;
1209 break;
1210 case D3DSIO_TEXM3x2PAD:
1212 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1213 char buf[50];
1214 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1215 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1217 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1218 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1219 ++pToken;
1220 continue;
1222 break;
1223 case D3DSIO_TEXM3x2TEX:
1225 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1226 char buf[50];
1227 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1228 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1230 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1231 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1232 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1233 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1234 ++pToken;
1235 continue;
1237 break;
1238 case D3DSIO_TEXREG2AR:
1240 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1241 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1242 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1243 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1244 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1245 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1246 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1247 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1248 ++pToken;
1249 continue;
1251 break;
1252 case D3DSIO_TEXREG2GB:
1254 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1255 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1256 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1257 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1258 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1259 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1260 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1261 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1262 ++pToken;
1263 continue;
1265 break;
1266 case D3DSIO_TEXBEM:
1268 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1269 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1271 /* FIXME: Should apply the BUMPMAPENV matrix */
1272 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1273 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1274 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1275 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1276 ++pToken;
1277 continue;
1279 break;
1280 case D3DSIO_TEXM3x3PAD:
1282 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1283 char buf[50];
1284 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1285 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1287 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1288 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1289 tcw[row++] = reg;
1290 ++pToken;
1291 continue;
1293 break;
1294 case D3DSIO_TEXM3x3TEX:
1296 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1297 char buf[50];
1298 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1299 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1302 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1303 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1305 /* Cubemap textures will be more used than 3D ones. */
1306 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1307 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1308 row = 0;
1309 ++pToken;
1310 continue;
1312 case D3DSIO_TEXM3x3VSPEC:
1314 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1315 char buf[50];
1316 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1317 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1319 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1320 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1322 /* Construct the eye-ray vector from w coordinates */
1323 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1324 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1325 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1326 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1327 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1328 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1330 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1331 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1332 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1333 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1334 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1335 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1336 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1338 /* Cubemap textures will be more used than 3D ones. */
1339 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1340 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1341 row = 0;
1342 ++pToken;
1343 continue;
1345 break;
1346 case D3DSIO_TEXM3x3SPEC:
1348 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1349 DWORD reg3 = *(pToken + 2) & D3DSP_REGNUM_MASK;
1350 char buf[50];
1351 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1352 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1354 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1355 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1357 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1358 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1359 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1361 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1362 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1363 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1364 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1366 /* Cubemap textures will be more used than 3D ones. */
1367 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1368 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1369 row = 0;
1370 pToken += 3;
1371 continue;
1373 break;
1375 default:
1376 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1377 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1378 } else {
1379 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1381 pToken += curOpcode->num_params;
1382 continue;
1385 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1386 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1387 switch (mask) {
1388 case D3DSPDM_SATURATE: saturate = TRUE; break;
1389 #if 0 /* as yet unhandled modifiers */
1390 case D3DSPDM_CENTROID: centroid = TRUE; break;
1391 case D3DSPDM_PP: partialpresision = TRUE; break;
1392 case D3DSPDM_X2: X2 = TRUE; break;
1393 case D3DSPDM_X4: X4 = TRUE; break;
1394 case D3DSPDM_X8: X8 = TRUE; break;
1395 case D3DSPDM_D2: D2 = TRUE; break;
1396 case D3DSPDM_D4: D4 = TRUE; break;
1397 case D3DSPDM_D8: D8 = TRUE; break;
1398 #endif
1399 default:
1400 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1404 /* Generate input and output registers */
1405 if (curOpcode->num_params > 0) {
1406 char regs[5][50];
1407 char operands[4][100];
1408 char swzstring[20];
1409 int saturate = 0;
1410 char tmpOp[256];
1412 /* Generate lines that handle input modifier computation */
1413 for (i = 1; i < curOpcode->num_params; ++i) {
1414 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1415 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpOp, This->constants)) {
1416 addline(&lineNum, pgmStr, &pgmLength, tmpOp);
1420 /* Handle saturation only when no shift is present in the output modifier */
1421 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1422 saturate = 1;
1424 /* Handle output register */
1425 get_register_name(*pToken, tmpOp, This->constants);
1426 strcpy(operands[0], tmpOp);
1427 get_write_mask(*pToken, tmpOp);
1428 strcat(operands[0], tmpOp);
1430 /* This function works because of side effects from gen_input_modifier_line */
1431 /* Handle input registers */
1432 for (i = 1; i < curOpcode->num_params; ++i) {
1433 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1434 strcpy(operands[i], regs[i - 1]);
1435 get_input_register_swizzle(*(pToken + i), swzstring);
1436 strcat(operands[i], swzstring);
1439 switch(curOpcode->opcode) {
1440 case D3DSIO_CMP:
1441 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1442 break;
1443 case D3DSIO_CND:
1444 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1445 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1446 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1447 break;
1448 default:
1449 if (saturate)
1450 strcat(tmpLine, "_SAT");
1451 strcat(tmpLine, " ");
1452 strcat(tmpLine, operands[0]);
1453 for (i = 1; i < curOpcode->num_params; i++) {
1454 strcat(tmpLine, ", ");
1455 strcat(tmpLine, operands[i]);
1457 strcat(tmpLine,";\n");
1459 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1460 pToken += curOpcode->num_params;
1462 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1463 if (curOpcode->num_params > 0) {
1464 DWORD param = *(pInstr + 1);
1465 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1467 /* Generate a line that handle the output modifier computation */
1468 char regstr[100];
1469 char write_mask[20];
1470 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1471 get_register_name(param, regstr, This->constants);
1472 get_write_mask(param, write_mask);
1473 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1474 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1477 #endif
1480 /* TODO: What about result.depth? */
1481 strcpy(tmpLine, "MOV result.color, R0;\n");
1482 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1484 strcpy(tmpLine, "END\n");
1485 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1488 /* finally null terminate the pgmStr*/
1489 pgmStr[pgmLength] = 0;
1490 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1491 /* Create the hw shader */
1493 /* pgmStr sometimes gets too long for a normal TRACE */
1494 TRACE("Generated program:\n");
1495 if (TRACE_ON(d3d_shader)) {
1496 fprintf(stderr, "%s\n", pgmStr);
1499 /* TODO: change to resource.glObjectHandel or something like that */
1500 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
1502 TRACE("Creating a hw pixel shader, prg=%d\n", This->baseShader.prgId);
1503 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->baseShader.prgId));
1505 TRACE("Created hw pixel shader, prg=%d\n", This->baseShader.prgId);
1506 /* Create the program and check for errors */
1507 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1508 if (glGetError() == GL_INVALID_OPERATION) {
1509 GLint errPos;
1510 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1511 FIXME("HW PixelShader Error at position %d: %s\n",
1512 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1513 This->baseShader.prgId = -1;
1516 #if 1 /* if were using the data buffer of device then we don't need to free it */
1517 HeapFree(GetProcessHeap(), 0, pgmStr);
1518 #endif
1521 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1522 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1523 static const char swizzle_reg_chars[] = "rgba";
1525 /* the unknown mask is for bits not yet accounted for by any other mask... */
1526 #define UNKNOWN_MASK 0xC000
1528 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1529 #define EXTENDED_REG 0x1800
1531 DWORD reg = param & D3DSP_REGNUM_MASK;
1532 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1534 if (input) {
1535 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1536 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1537 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1538 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1539 TRACE("-");
1540 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1541 TRACE("1-");
1544 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1545 case D3DSPR_TEMP:
1546 TRACE("r%lu", reg);
1547 break;
1548 case D3DSPR_INPUT:
1549 TRACE("v%lu", reg);
1550 break;
1551 case D3DSPR_CONST:
1552 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1553 break;
1555 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1556 TRACE("t%lu", reg);
1557 break;
1558 case D3DSPR_RASTOUT:
1559 TRACE("%s", rastout_reg_names[reg]);
1560 break;
1561 case D3DSPR_ATTROUT:
1562 TRACE("oD%lu", reg);
1563 break;
1564 case D3DSPR_TEXCRDOUT:
1565 TRACE("oT%lu", reg);
1566 break;
1567 case D3DSPR_CONSTINT:
1568 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1569 break;
1570 case D3DSPR_CONSTBOOL:
1571 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1572 break;
1573 case D3DSPR_LABEL:
1574 TRACE("l%lu", reg);
1575 break;
1576 case D3DSPR_LOOP:
1577 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1578 break;
1579 default:
1580 break;
1583 if (!input) {
1584 /** operand output */
1586 * for better debugging traces it's done into opcode dump code
1587 * @see pshader_program_dump_opcode
1588 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1589 DWORD mask = param & D3DSP_DSTMOD_MASK;
1590 switch (mask) {
1591 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1592 default:
1593 TRACE("_unhandled_modifier(0x%08lx)", mask);
1596 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1597 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1598 if (shift > 0) {
1599 TRACE("_x%u", 1 << shift);
1603 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1604 TRACE(".");
1605 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1606 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1607 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1608 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1610 } else {
1611 /** operand input */
1612 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1613 DWORD swizzle_r = swizzle & 0x03;
1614 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1615 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1616 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1618 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1619 DWORD mask = param & D3DSP_SRCMOD_MASK;
1620 /*TRACE("_modifier(0x%08lx) ", mask);*/
1621 switch (mask) {
1622 case D3DSPSM_NONE: break;
1623 case D3DSPSM_NEG: break;
1624 case D3DSPSM_BIAS: TRACE("_bias"); break;
1625 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1626 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1627 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1628 case D3DSPSM_COMP: break;
1629 case D3DSPSM_X2: TRACE("_x2"); break;
1630 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1631 case D3DSPSM_DZ: TRACE("_dz"); break;
1632 case D3DSPSM_DW: TRACE("_dw"); break;
1633 default:
1634 TRACE("_unknown(0x%08lx)", mask);
1639 * swizzle bits fields:
1640 * RRGGBBAA
1642 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1643 if (swizzle_r == swizzle_g &&
1644 swizzle_r == swizzle_b &&
1645 swizzle_r == swizzle_a) {
1646 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1647 } else {
1648 TRACE(".%c%c%c%c",
1649 swizzle_reg_chars[swizzle_r],
1650 swizzle_reg_chars[swizzle_g],
1651 swizzle_reg_chars[swizzle_b],
1652 swizzle_reg_chars[swizzle_a]);
1658 inline static void pshader_program_dump_decl_usage(IWineD3DPixelShaderImpl *This, DWORD token) {
1659 TRACE("dcl_");
1660 switch(token & 0xFFFF) {
1661 case D3DDECLUSAGE_POSITION:
1662 TRACE("%s%ld ", "position",(token & 0xF0000) >> 16);
1663 break;
1664 case D3DDECLUSAGE_BLENDINDICES:
1665 TRACE("%s ", "blend");
1666 break;
1667 case D3DDECLUSAGE_BLENDWEIGHT:
1668 TRACE("%s ", "weight");
1669 break;
1670 case D3DDECLUSAGE_NORMAL:
1671 TRACE("%s%ld ", "normal",(token & 0xF0000) >> 16);
1672 break;
1673 case D3DDECLUSAGE_PSIZE:
1674 TRACE("%s ", "psize");
1675 break;
1676 case D3DDECLUSAGE_COLOR:
1677 if((token & 0xF0000) >> 16 == 0) {
1678 TRACE("%s ", "color");
1679 } else {
1680 TRACE("%s%ld ", "specular", ((token & 0xF0000) >> 16) - 1);
1682 break;
1683 case D3DDECLUSAGE_TEXCOORD:
1684 TRACE("%s%ld ", "texture", (token & 0xF0000) >> 16);
1685 break;
1686 case D3DDECLUSAGE_TANGENT:
1687 TRACE("%s ", "tangent");
1688 break;
1689 case D3DDECLUSAGE_BINORMAL:
1690 TRACE("%s ", "binormal");
1691 break;
1692 case D3DDECLUSAGE_TESSFACTOR:
1693 TRACE("%s ", "tessfactor");
1694 break;
1695 case D3DDECLUSAGE_POSITIONT:
1696 TRACE("%s%ld ", "positionT",(token & 0xF0000) >> 16);
1697 break;
1698 case D3DDECLUSAGE_FOG:
1699 TRACE("%s ", "fog");
1700 break;
1701 case D3DDECLUSAGE_DEPTH:
1702 TRACE("%s ", "depth");
1703 break;
1704 case D3DDECLUSAGE_SAMPLE:
1705 TRACE("%s ", "sample");
1706 break;
1707 default:
1708 FIXME("Unrecognised dcl %08lx", token & 0xFFFF);
1712 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1713 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1714 const DWORD* pToken = pFunction;
1715 const SHADER_OPCODE *curOpcode = NULL;
1716 DWORD len = 0;
1717 DWORD i;
1718 TRACE("(%p) : Parsing programme\n", This);
1720 if (NULL != pToken) {
1721 while (D3DPS_END() != *pToken) {
1722 if (pshader_is_version_token(*pToken)) { /** version */
1723 This->baseShader.version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1724 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1725 ++pToken;
1726 ++len;
1727 continue;
1729 if (pshader_is_comment_token(*pToken)) { /** comment */
1730 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1731 ++pToken;
1732 TRACE("//%s\n", (char*)pToken);
1733 pToken += comment_len;
1734 len += comment_len + 1;
1735 continue;
1737 if (!This->baseShader.version) {
1738 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1740 curOpcode = pshader_program_get_opcode(This, *pToken);
1741 ++pToken;
1742 ++len;
1743 if (NULL == curOpcode) {
1745 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1746 while (*pToken & 0x80000000) {
1748 /* unknown current opcode ... */
1749 TRACE("unrecognized opcode: %08lx", *pToken);
1750 ++pToken;
1751 ++len;
1752 TRACE("\n");
1755 } else {
1756 if (curOpcode->opcode == D3DSIO_DCL) {
1757 pshader_program_dump_decl_usage(This, *pToken);
1758 ++pToken;
1759 ++len;
1760 pshader_program_dump_ps_param(*pToken, 0);
1761 ++pToken;
1762 ++len;
1763 } else
1764 if (curOpcode->opcode == D3DSIO_DEF) {
1765 TRACE("def c%lu = ", *pToken & 0xFF);
1766 ++pToken;
1767 ++len;
1768 TRACE("%f ,", *(float *)pToken);
1769 ++pToken;
1770 ++len;
1771 TRACE("%f ,", *(float *)pToken);
1772 ++pToken;
1773 ++len;
1774 TRACE("%f ,", *(float *)pToken);
1775 ++pToken;
1776 ++len;
1777 TRACE("%f", *(float *)pToken);
1778 ++pToken;
1779 ++len;
1780 } else {
1781 TRACE("%s ", curOpcode->name);
1782 if (curOpcode->num_params > 0) {
1783 pshader_program_dump_ps_param(*pToken, 0);
1784 ++pToken;
1785 ++len;
1786 for (i = 1; i < curOpcode->num_params; ++i) {
1787 TRACE(", ");
1788 pshader_program_dump_ps_param(*pToken, 1);
1789 ++pToken;
1790 ++len;
1794 TRACE("\n");
1797 This->baseShader.functionLength = (len + 1) * sizeof(DWORD);
1798 } else {
1799 This->baseShader.functionLength = 1; /* no Function defined use fixed function vertex processing */
1802 /* Generate HW shader in needed */
1803 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1804 TRACE("(%p) : Generating hardware program\n", This);
1805 #if 1
1806 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1807 #endif
1810 TRACE("(%p) : Copying the function\n", This);
1811 /* copy the function ... because it will certainly be released by application */
1812 if (NULL != pFunction) {
1813 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1814 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1815 } else {
1816 This->baseShader.function = NULL;
1819 /* TODO: Some proper return values for failures */
1820 TRACE("(%p) : Returning WINED3D_OK\n", This);
1821 return WINED3D_OK;
1824 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1826 /*** IUnknown methods ***/
1827 IWineD3DPixelShaderImpl_QueryInterface,
1828 IWineD3DPixelShaderImpl_AddRef,
1829 IWineD3DPixelShaderImpl_Release,
1830 /*** IWineD3DBase methods ***/
1831 IWineD3DPixelShaderImpl_GetParent,
1832 /*** IWineD3DBaseShader methods ***/
1833 IWineD3DPixelShaderImpl_SetFunction,
1834 /*** IWineD3DPixelShader methods ***/
1835 IWineD3DPixelShaderImpl_GetDevice,
1836 IWineD3DPixelShaderImpl_GetFunction