wined3d: Clean up per version shader limits code.
[wine/multimedia.git] / dlls / wined3d / pixelshader.c
blobf52c98c52cae5e410e60989a5ba0134be880a3bf
1 /*
2 * shaders implementation
4 * Copyright 2002-2003 Jason Edmeades
5 * Copyright 2002-2003 Raphael Junqueira
6 * Copyright 2005 Oliver Stieber
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "config.h"
25 #include <math.h>
26 #include <stdio.h>
28 #include "wined3d_private.h"
30 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
32 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
34 #if 0 /* Must not be 1 in cvs version */
35 # define PSTRACE(A) TRACE A
36 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
37 #else
38 # define PSTRACE(A)
39 # define TRACE_VSVECTOR(name)
40 #endif
42 #define GLNAME_REQUIRE_GLSL ((const char *)1)
43 /* *******************************************
44 IWineD3DPixelShader IUnknown parts follow
45 ******************************************* */
46 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
48 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
49 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
50 if (IsEqualGUID(riid, &IID_IUnknown)
51 || IsEqualGUID(riid, &IID_IWineD3DBase)
52 || IsEqualGUID(riid, &IID_IWineD3DBaseShader)
53 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
54 IUnknown_AddRef(iface);
55 *ppobj = This;
56 return S_OK;
58 *ppobj = NULL;
59 return E_NOINTERFACE;
62 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
63 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
64 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
65 return InterlockedIncrement(&This->ref);
68 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
69 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
70 ULONG ref;
71 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
72 ref = InterlockedDecrement(&This->ref);
73 if (ref == 0) {
74 HeapFree(GetProcessHeap(), 0, This);
76 return ref;
79 /* TODO: At the momeny the function parser is single pass, it achievs this
80 by passing constants to a couple of functions where they are then modified.
81 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
82 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
85 /* *******************************************
86 IWineD3DPixelShader IWineD3DPixelShader parts follow
87 ******************************************* */
89 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
90 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
92 *parent = This->parent;
93 IUnknown_AddRef(*parent);
94 TRACE("(%p) : returning %p\n", This, *parent);
95 return WINED3D_OK;
98 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
99 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
100 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
101 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
102 TRACE("(%p) returning %p\n", This, *pDevice);
103 return WINED3D_OK;
107 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
108 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
109 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
111 if (NULL == pData) {
112 *pSizeOfData = This->baseShader.functionLength;
113 return WINED3D_OK;
115 if (*pSizeOfData < This->baseShader.functionLength) {
116 *pSizeOfData = This->baseShader.functionLength;
117 return WINED3DERR_MOREDATA;
119 if (NULL == This->baseShader.function) { /* no function defined */
120 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
121 (*(DWORD **) pData) = NULL;
122 } else {
123 if (This->baseShader.functionLength == 0) {
126 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
127 memcpy(pData, This->baseShader.function, This->baseShader.functionLength);
129 return WINED3D_OK;
132 /*******************************
133 * pshader functions software VM
136 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
137 d->x = s0->x + s1->x;
138 d->y = s0->y + s1->y;
139 d->z = s0->z + s1->z;
140 d->w = s0->w + s1->w;
141 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
142 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
145 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
146 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
147 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
148 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
151 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
152 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
153 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
154 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
157 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
158 d->x = 1.0f;
159 d->y = s0->y * s1->y;
160 d->z = s0->z;
161 d->w = s1->w;
162 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
163 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
166 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
167 union {
168 float f;
169 DWORD d;
170 } tmp;
172 tmp.f = floorf(s0->w);
173 d->x = powf(2.0f, tmp.f);
174 d->y = s0->w - tmp.f;
175 tmp.f = powf(2.0f, s0->w);
176 tmp.d &= 0xFFFFFF00U;
177 d->z = tmp.f;
178 d->w = 1.0f;
179 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
180 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
183 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
184 float tmp_f = fabsf(s0->w);
185 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
186 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
187 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
190 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
191 d->x = s0->x * s1->x + s2->x;
192 d->y = s0->y * s1->y + s2->y;
193 d->z = s0->z * s1->z + s2->z;
194 d->w = s0->w * s1->w + s2->w;
195 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
196 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
199 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
200 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
201 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
202 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
203 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
204 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
205 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
208 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
209 d->x = (s0->x < s1->x) ? s0->x : s1->x;
210 d->y = (s0->y < s1->y) ? s0->y : s1->y;
211 d->z = (s0->z < s1->z) ? s0->z : s1->z;
212 d->w = (s0->w < s1->w) ? s0->w : s1->w;
213 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
214 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
217 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
218 d->x = s0->x;
219 d->y = s0->y;
220 d->z = s0->z;
221 d->w = s0->w;
222 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
223 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
226 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
227 d->x = s0->x * s1->x;
228 d->y = s0->y * s1->y;
229 d->z = s0->z * s1->z;
230 d->w = s0->w * s1->w;
231 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
232 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
235 void pshader_nop(void) {
236 /* NOPPPP ahhh too easy ;) */
237 PSTRACE(("executing nop\n"));
240 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
241 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
242 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
243 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
246 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
247 float tmp_f = fabsf(s0->w);
248 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
249 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
250 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
253 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
254 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
255 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
256 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
257 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
258 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
259 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
262 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
263 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
264 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
265 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
266 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
267 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
268 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
271 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
272 d->x = s0->x - s1->x;
273 d->y = s0->y - s1->y;
274 d->z = s0->z - s1->z;
275 d->w = s0->w - s1->w;
276 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
277 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
281 * Version 1.1 specific
284 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
285 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
286 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
287 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
290 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
291 float tmp_f = fabsf(s0->w);
292 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
293 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
294 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
297 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
298 d->x = s0->x - floorf(s0->x);
299 d->y = s0->y - floorf(s0->y);
300 d->z = 0.0f;
301 d->w = 1.0f;
302 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
303 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
306 typedef FLOAT D3DMATRIX44[4][4];
307 typedef FLOAT D3DMATRIX43[4][3];
308 typedef FLOAT D3DMATRIX34[3][4];
309 typedef FLOAT D3DMATRIX33[3][3];
310 typedef FLOAT D3DMATRIX23[2][3];
312 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
314 * Buggy CODE: here only if cast not work for copy/paste
315 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
316 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
317 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
318 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
319 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
320 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
321 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
323 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
324 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
325 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
326 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
327 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
328 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
329 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
330 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
333 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
334 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
335 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
336 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
337 d->w = 1.0f;
338 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
339 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
340 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
341 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
344 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
345 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
346 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
347 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
348 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
349 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
350 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
351 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
352 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
355 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
356 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
357 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
358 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
359 d->w = 1.0f;
360 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
361 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
362 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
363 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
366 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
367 FIXME("check\n");
368 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
369 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
370 d->z = 0.0f;
371 d->w = 1.0f;
375 * Version 2.0 specific
377 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
378 d->x = s0->x * (s1->x - s2->x) + s2->x;
379 d->y = s0->y * (s1->y - s2->y) + s2->y;
380 d->z = s0->z * (s1->z - s2->z) + s2->z;
381 d->w = s0->w * (s1->w - s2->w) + s2->w;
384 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
385 d->x = s0->y * s1->z - s0->z * s1->y;
386 d->y = s0->z * s1->x - s0->x * s1->z;
387 d->z = s0->x * s1->y - s0->y * s1->x;
388 d->w = 0.9f; /* w is undefined, so set it to something safeish */
390 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
391 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
394 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
395 d->x = fabsf(s0->x);
396 d->y = fabsf(s0->y);
397 d->z = fabsf(s0->z);
398 d->w = fabsf(s0->w);
399 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
400 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
403 /* Stubs */
404 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
405 FIXME(" : Stub\n");
408 void pshader_texkill(WINED3DSHADERVECTOR* d) {
409 FIXME(" : Stub\n");
412 void pshader_tex(WINED3DSHADERVECTOR* d) {
413 FIXME(" : Stub\n");
415 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
416 FIXME(" : Stub\n");
419 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
420 FIXME(" : Stub\n");
423 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
424 FIXME(" : Stub\n");
427 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
428 FIXME(" : Stub\n");
431 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
432 FIXME(" : Stub\n");
435 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
436 FIXME(" : Stub\n");
439 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
440 FIXME(" : Stub\n");
443 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
444 FIXME(" : Stub\n");
447 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
448 FIXME(" : Stub\n");
451 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
452 FIXME(" : Stub\n");
455 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
456 FIXME(" : Stub\n");
459 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
460 FIXME(" : Stub\n");
463 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
464 FIXME(" : Stub\n");
467 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
468 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
469 FIXME(" : Stub\n");
472 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
473 FIXME(" : Stub\n");
476 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
477 FIXME(" : Stub\n");
480 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
481 FIXME(" : Stub\n");
484 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
485 FIXME(" : Stub\n");
488 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
489 FIXME(" : Stub\n");
492 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
493 FIXME(" : Stub\n");
496 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
497 FIXME(" : Stub\n");
500 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
501 FIXME(" : Stub\n");
504 void pshader_call(WINED3DSHADERVECTOR* d) {
505 FIXME(" : Stub\n");
508 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
509 FIXME(" : Stub\n");
512 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
513 FIXME(" : Stub\n");
516 void pshader_ret(void) {
517 FIXME(" : Stub\n");
520 void pshader_endloop(void) {
521 FIXME(" : Stub\n");
524 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
525 FIXME(" : Stub\n");
528 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
529 FIXME(" : Stub\n");
532 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
533 FIXME(" : Stub\n");
536 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
537 FIXME(" : Stub\n");
540 void pshader_rep(WINED3DSHADERVECTOR* d) {
541 FIXME(" : Stub\n");
544 void pshader_endrep(void) {
545 FIXME(" : Stub\n");
548 void pshader_if(WINED3DSHADERVECTOR* d) {
549 FIXME(" : Stub\n");
552 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
553 FIXME(" : Stub\n");
556 void pshader_else(void) {
557 FIXME(" : Stub\n");
560 void pshader_label(WINED3DSHADERVECTOR* d) {
561 FIXME(" : Stub\n");
564 void pshader_endif(void) {
565 FIXME(" : Stub\n");
568 void pshader_break(void) {
569 FIXME(" : Stub\n");
572 void pshader_breakc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
573 FIXME(" : Stub\n");
576 void pshader_breakp(WINED3DSHADERVECTOR* d) {
577 FIXME(" : Stub\n");
580 void pshader_defb(WINED3DSHADERVECTOR* d) {
581 FIXME(" : Stub\n");
584 void pshader_defi(WINED3DSHADERVECTOR* d) {
585 FIXME(" : Stub\n");
588 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
589 FIXME(" : Stub\n");
592 void pshader_dsx(WINED3DSHADERVECTOR* d) {
593 FIXME(" : Stub\n");
596 void pshader_dsy(WINED3DSHADERVECTOR* d) {
597 FIXME(" : Stub\n");
600 void pshader_texldd(WINED3DSHADERVECTOR* d) {
601 FIXME(" : Stub\n");
604 void pshader_setp(WINED3DSHADERVECTOR* d) {
605 FIXME(" : Stub\n");
608 void pshader_texldl(WINED3DSHADERVECTOR* d) {
609 FIXME(" : Stub\n");
613 * log, exp, frc, m*x* seems to be macros ins ... to see
615 CONST SHADER_OPCODE IWineD3DPixelShaderImpl_shader_ins[] = {
616 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
617 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
618 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
619 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
620 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
621 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
622 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
623 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
624 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
625 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
626 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
627 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
628 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
629 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
630 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
631 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
632 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
633 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
634 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
635 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
636 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
637 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
638 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
639 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
640 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
643 /** FIXME: use direct access so add the others opcodes as stubs */
644 /* DCL is a specil operation */
645 {D3DSIO_DCL, "dcl", NULL, 2, pshader_dcl, 0, 0},
646 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
647 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
648 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
649 DP3 tmp , vec, vec;
650 RSQ tmp, tmp.x;
651 MUL vec.xyz, vec, tmp;
652 but I think this is better because it accounts for w properly.
653 DP3 tmp , vec, vec;
654 RSQ tmp, tmp.x;
655 MUL vec, vec, tmp;
658 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
659 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
661 /* Flow control - requires GLSL or software shaders */
662 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 1, pshader_rep, 0, 0},
663 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
664 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 1, pshader_if, 0, 0},
665 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
666 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 0, pshader_else, 0, 0},
667 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 0, pshader_endif, 0, 0},
668 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 0, pshader_break, 0, 0},
669 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
670 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 1, pshader_breakp, 0, 0},
671 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
672 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
673 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
674 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
675 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
676 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
678 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
679 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
681 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
682 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
683 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(3,0)},
684 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
685 {D3DSIO_TEX, "texld", "undefined", 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
686 {D3DSIO_TEX, "texld", "undefined", 3, pshader_texld, D3DPS_VERSION(2,0), -1},
687 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
688 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
689 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
690 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
691 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
692 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
693 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
694 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
695 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
696 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
697 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
698 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
699 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
700 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
701 /* def is a special operation */
702 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
703 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
704 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
705 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
706 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
707 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
708 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
709 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
710 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
711 /* TODO: dp2add can be made out of multiple instuctions */
712 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
713 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
714 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
715 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
716 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
717 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
718 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
719 {0, NULL, NULL, 0, NULL, 0, 0}
722 inline static const SHADER_OPCODE* pshader_program_get_opcode(IWineD3DPixelShaderImpl *This, const DWORD code) {
723 DWORD i = 0;
724 DWORD version = This->baseShader.version;
725 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
726 const SHADER_OPCODE *shader_ins = This->baseShader.shader_ins;
728 /** TODO: use dichotomic search */
729 while (NULL != shader_ins[i].name) {
730 if (((code & D3DSI_OPCODE_MASK) == shader_ins[i].opcode) &&
731 (((hex_version >= shader_ins[i].min_version) && (hex_version <= shader_ins[i].max_version)) ||
732 ((shader_ins[i].min_version == 0) && (shader_ins[i].max_version == 0)))) {
733 return &shader_ins[i];
735 ++i;
737 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code, code, code & D3DSI_OPCODE_MASK, version);
738 return NULL;
741 inline static BOOL pshader_is_version_token(DWORD token) {
742 return 0xFFFF0000 == (token & 0xFFFF0000);
745 inline static BOOL pshader_is_comment_token(DWORD token) {
746 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
750 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
751 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
753 DWORD reg = param & D3DSP_REGNUM_MASK;
754 DWORD regtype = shader_get_regtype(param);
756 switch (regtype) {
757 case D3DSPR_TEMP:
758 sprintf(regstr, "R%lu", reg);
759 break;
760 case D3DSPR_INPUT:
761 if (reg==0) {
762 strcpy(regstr, "fragment.color.primary");
763 } else {
764 strcpy(regstr, "fragment.color.secondary");
766 break;
767 case D3DSPR_CONST:
768 if (constants[reg])
769 sprintf(regstr, "C%lu", reg);
770 else
771 sprintf(regstr, "program.env[%lu]", reg);
772 break;
773 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
774 sprintf(regstr,"T%lu", reg);
775 break;
776 case D3DSPR_RASTOUT:
777 sprintf(regstr, "%s", rastout_reg_names[reg]);
778 break;
779 case D3DSPR_ATTROUT:
780 sprintf(regstr, "oD[%lu]", reg);
781 break;
782 case D3DSPR_TEXCRDOUT:
783 sprintf(regstr, "oT[%lu]", reg);
784 break;
785 default:
786 FIXME("Unhandled register name Type(%ld)\n", regtype);
787 break;
791 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
792 *write_mask = 0;
793 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
794 strcat(write_mask, ".");
795 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
796 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
797 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
798 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
802 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
803 static const char swizzle_reg_chars[] = "rgba";
804 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
805 DWORD swizzle_x = swizzle & 0x03;
806 DWORD swizzle_y = (swizzle >> 2) & 0x03;
807 DWORD swizzle_z = (swizzle >> 4) & 0x03;
808 DWORD swizzle_w = (swizzle >> 6) & 0x03;
810 * swizzle bits fields:
811 * WWZZYYXX
813 *swzstring = 0;
814 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
815 if (swizzle_x == swizzle_y &&
816 swizzle_x == swizzle_z &&
817 swizzle_x == swizzle_w) {
818 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
819 } else {
820 sprintf(swzstring, ".%c%c%c%c",
821 swizzle_reg_chars[swizzle_x],
822 swizzle_reg_chars[swizzle_y],
823 swizzle_reg_chars[swizzle_z],
824 swizzle_reg_chars[swizzle_w]);
829 static const char* shift_tab[] = {
830 "dummy", /* 0 (none) */
831 "coefmul.x", /* 1 (x2) */
832 "coefmul.y", /* 2 (x4) */
833 "coefmul.z", /* 3 (x8) */
834 "coefmul.w", /* 4 (x16) */
835 "dummy", /* 5 (x32) */
836 "dummy", /* 6 (x64) */
837 "dummy", /* 7 (x128) */
838 "dummy", /* 8 (d256) */
839 "dummy", /* 9 (d128) */
840 "dummy", /* 10 (d64) */
841 "dummy", /* 11 (d32) */
842 "coefdiv.w", /* 12 (d16) */
843 "coefdiv.z", /* 13 (d8) */
844 "coefdiv.y", /* 14 (d4) */
845 "coefdiv.x" /* 15 (d2) */
848 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
849 /* Generate a line that does the output modifier computation */
850 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
853 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
854 /* Generate a line that does the input modifier computation and return the input register to use */
855 static char regstr[256];
856 static char tmpline[256];
857 int insert_line;
859 /* Assume a new line will be added */
860 insert_line = 1;
862 /* Get register name */
863 get_register_name(instr, regstr, constants);
865 TRACE(" Register name %s\n", regstr);
866 switch (instr & D3DSP_SRCMOD_MASK) {
867 case D3DSPSM_NONE:
868 strcpy(outregstr, regstr);
869 insert_line = 0;
870 break;
871 case D3DSPSM_NEG:
872 sprintf(outregstr, "-%s", regstr);
873 insert_line = 0;
874 break;
875 case D3DSPSM_BIAS:
876 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
877 break;
878 case D3DSPSM_BIASNEG:
879 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
880 break;
881 case D3DSPSM_SIGN:
882 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
883 break;
884 case D3DSPSM_SIGNNEG:
885 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
886 break;
887 case D3DSPSM_COMP:
888 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
889 break;
890 case D3DSPSM_X2:
891 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
892 break;
893 case D3DSPSM_X2NEG:
894 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
895 break;
896 case D3DSPSM_DZ:
897 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
898 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
899 strcat(line, "\n"); /* Hack */
900 strcat(line, tmpline);
901 break;
902 case D3DSPSM_DW:
903 sprintf(line, "RCP T%c, %s.w;", 'A' + tmpreg, regstr);
904 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
905 strcat(line, "\n"); /* Hack */
906 strcat(line, tmpline);
907 break;
908 default:
909 strcpy(outregstr, regstr);
910 insert_line = 0;
913 if (insert_line) {
914 /* Substitute the register name */
915 sprintf(outregstr, "T%c", 'A' + tmpreg);
918 return insert_line;
921 inline static void pshader_program_get_registers_used(
922 IWineD3DPixelShaderImpl *This,
923 CONST DWORD* pToken, DWORD* tempsUsed, DWORD* texUsed) {
925 if (pToken == NULL)
926 return;
928 *tempsUsed = 0;
929 *texUsed = 0;
931 while (D3DVS_END() != *pToken) {
932 CONST SHADER_OPCODE* curOpcode;
934 /* Skip version */
935 if (pshader_is_version_token(*pToken)) {
936 ++pToken;
937 continue;
939 /* Skip comments */
940 } else if (pshader_is_comment_token(*pToken)) {
941 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
942 ++pToken;
943 pToken += comment_len;
944 continue;
947 /* Fetch opcode */
948 curOpcode = pshader_program_get_opcode(This, *pToken);
949 ++pToken;
951 /* Skip declarations (for now) */
952 if (D3DSIO_DCL == curOpcode->opcode) {
953 pToken += curOpcode->num_params;
954 continue;
956 /* Skip definitions (for now) */
957 } else if (D3DSIO_DEF == curOpcode->opcode) {
958 pToken += curOpcode->num_params;
959 continue;
961 /* Set texture registers, and temporary registers */
962 } else {
963 int i;
965 for (i = 0; i < curOpcode->num_params; ++i) {
966 DWORD regtype = shader_get_regtype(*pToken);
967 DWORD reg = (*pToken) & D3DSP_REGNUM_MASK;
968 if (D3DSPR_TEXTURE == regtype)
969 *texUsed |= (1 << reg);
970 if (D3DSPR_TEMP == regtype)
971 *tempsUsed |= (1 << reg);
972 ++pToken;
978 void pshader_set_version(
979 IWineD3DPixelShaderImpl *This,
980 DWORD version) {
982 DWORD major = (version >> 8) & 0x0F;
983 DWORD minor = version & 0x0F;
985 This->baseShader.hex_version = version;
986 This->baseShader.version = major * 10 + minor;
987 TRACE("ps_%lu_%lu\n", major, minor);
989 This->baseShader.limits.address = 0;
991 switch (This->baseShader.version) {
992 case 10:
993 case 11:
994 case 12:
995 case 13: This->baseShader.limits.temporary = 2;
996 This->baseShader.limits.constant_float = 8;
997 This->baseShader.limits.constant_int = 0;
998 This->baseShader.limits.constant_bool = 0;
999 This->baseShader.limits.texture = 4;
1000 break;
1002 case 14: This->baseShader.limits.temporary = 6;
1003 This->baseShader.limits.constant_float = 8;
1004 This->baseShader.limits.constant_int = 0;
1005 This->baseShader.limits.constant_bool = 0;
1006 This->baseShader.limits.texture = 6;
1007 break;
1009 /* FIXME: temporaries must match D3DPSHADERCAPS2_0.NumTemps */
1010 case 20: This->baseShader.limits.temporary = 32;
1011 This->baseShader.limits.constant_float = 32;
1012 This->baseShader.limits.constant_int = 16;
1013 This->baseShader.limits.constant_bool = 16;
1014 This->baseShader.limits.texture = 8;
1015 break;
1017 case 30: This->baseShader.limits.temporary = 32;
1018 This->baseShader.limits.constant_float = 224;
1019 This->baseShader.limits.constant_int = 16;
1020 This->baseShader.limits.constant_bool = 16;
1021 This->baseShader.limits.texture = 0;
1022 break;
1024 default: This->baseShader.limits.temporary = 32;
1025 This->baseShader.limits.constant_float = 8;
1026 This->baseShader.limits.constant_int = 0;
1027 This->baseShader.limits.constant_bool = 0;
1028 This->baseShader.limits.texture = 8;
1029 FIXME("Unrecognized pixel shader version %lu!\n", version);
1033 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
1034 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1035 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1036 const DWORD *pToken = pFunction;
1037 const SHADER_OPCODE *curOpcode = NULL;
1038 const DWORD *pInstr;
1039 DWORD i;
1040 char tmpLine[255];
1041 #if 0 /* TODO: loop register (just another address register ) */
1042 BOOL hasLoops = FALSE;
1043 #endif
1044 SHADER_BUFFER buffer;
1046 BOOL saturate; /* clamp to 0.0 -> 1.0*/
1047 int row = 0; /* not sure, something to do with macros? */
1048 DWORD tcw[2];
1049 int version = This->baseShader.version;
1051 /* Keep bitmaps of used temporary and texture registers */
1052 DWORD tempsUsed, texUsed;
1054 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
1055 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
1056 if (This->device->fixupVertexBufferSize < SHADER_PGMSIZE) {
1057 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
1058 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, SHADER_PGMSIZE);
1059 This->fixupVertexBufferSize = SHADER_PGMSIZE;
1060 This->fixupVertexBuffer[0] = 0;
1062 buffer.buffer = This->device->fixupVertexBuffer;
1063 #else
1064 buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
1065 #endif
1066 buffer.bsize = 0;
1067 buffer.lineNo = 0;
1069 /* FIXME: if jumps are used, use GLSL, else use ARB_fragment_program */
1070 shader_addline(&buffer, "!!ARBfp1.0\n");
1072 /* TODO: Think about using a first pass to work out what's required for the second pass. */
1073 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
1074 This->constants[i] = 0;
1076 /* First pass: figure out which temporary and texture registers are used */
1077 pshader_program_get_registers_used(This, pToken, &tempsUsed, &texUsed);
1078 TRACE("Texture registers used: %#lx, Temp registers used %#lx\n", texUsed, tempsUsed);
1080 /* TODO: check register usage against GL/Directx limits, and fail if they're exceeded */
1082 /* Pre-declare registers */
1083 for(i = 0; i < This->baseShader.limits.texture; i++) {
1084 if (texUsed & (1 << i))
1085 shader_addline(&buffer,"TEMP T%lu;\n", i);
1088 for(i = 0; i < This->baseShader.limits.temporary; i++) {
1089 if (tempsUsed & (1 << i))
1090 shader_addline(&buffer, "TEMP R%lu;\n", i);
1093 /* Necessary for internal operations */
1094 shader_addline(&buffer, "TEMP TMP;\n");
1095 shader_addline(&buffer, "TEMP TMP2;\n");
1096 shader_addline(&buffer, "TEMP TA;\n");
1097 shader_addline(&buffer, "TEMP TB;\n");
1098 shader_addline(&buffer, "TEMP TC;\n");
1099 shader_addline(&buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1100 shader_addline(&buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1101 shader_addline(&buffer, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1103 /* Texture coordinate registers must be pre-loaded */
1104 for (i = 0; i < This->baseShader.limits.texture; i++) {
1105 if (texUsed & (1 << i))
1106 shader_addline(&buffer, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1109 /* Second pass, process opcodes */
1110 if (NULL != pToken) {
1111 while (D3DPS_END() != *pToken) {
1112 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
1113 if (version >= 2) {
1114 instructionSize = pToken & SIZEBITS >> 27;
1116 #endif
1118 /* Skip version token */
1119 if (pshader_is_version_token(*pToken)) {
1120 ++pToken;
1121 continue;
1124 /* Skip comment tokens */
1125 if (pshader_is_comment_token(*pToken)) {
1126 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1127 ++pToken;
1128 TRACE("#%s\n", (char*)pToken);
1129 pToken += comment_len;
1130 continue;
1132 /* here */
1133 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1134 code = *pToken;
1135 #endif
1136 pInstr = pToken;
1137 curOpcode = pshader_program_get_opcode(This, *pToken);
1138 ++pToken;
1139 if (NULL == curOpcode) {
1140 /* unknown current opcode ... (shouldn't be any!) */
1141 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1142 FIXME("unrecognized opcode: %08lx\n", *pToken);
1143 ++pToken;
1145 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1146 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1147 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1148 pToken += curOpcode->num_params;
1150 } else if (D3DSIO_DEF == curOpcode->opcode) {
1152 /* Handle definitions here, they don't fit well with the
1153 * other instructions below [for now ] */
1155 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1157 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1158 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1160 shader_addline(&buffer,
1161 "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1162 *((const float *)(pToken + 1)),
1163 *((const float *)(pToken + 2)),
1164 *((const float *)(pToken + 3)),
1165 *((const float *)(pToken + 4)) );
1167 This->constants[reg] = 1;
1168 pToken += 5;
1169 continue;
1171 } else {
1173 /* Common processing: [inst] [dst]* [src]* */
1174 DWORD shift;
1175 char output_rname[256];
1176 char output_wmask[20];
1178 TRACE("Found opcode D3D:%s GL:%s, PARAMS:%d, \n",
1179 curOpcode->name, curOpcode->glname, curOpcode->num_params);
1181 saturate = FALSE;
1183 /* Build opcode for GL vertex_program */
1184 switch (curOpcode->opcode) {
1185 case D3DSIO_NOP:
1186 case D3DSIO_PHASE:
1187 continue;
1188 case D3DSIO_MOV:
1189 case D3DSIO_CND:
1190 case D3DSIO_CMP:
1191 case D3DSIO_ADD:
1192 case D3DSIO_SUB:
1193 case D3DSIO_MAD:
1194 case D3DSIO_MUL:
1195 case D3DSIO_RCP:
1196 case D3DSIO_RSQ:
1197 case D3DSIO_DP3:
1198 case D3DSIO_DP4:
1199 case D3DSIO_MIN:
1200 case D3DSIO_MAX:
1201 case D3DSIO_SLT:
1202 case D3DSIO_SGE:
1203 case D3DSIO_DST:
1204 case D3DSIO_FRC:
1205 case D3DSIO_EXPP:
1206 case D3DSIO_LOGP:
1207 case D3DSIO_EXP:
1208 case D3DSIO_LOG:
1209 case D3DSIO_LRP:
1210 case D3DSIO_TEXKILL:
1211 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1212 strcpy(tmpLine, curOpcode->glname);
1213 break;
1214 case D3DSIO_TEX:
1216 char reg_dest[40];
1217 char reg_coord[40];
1218 char reg_coord_swz[20] = "";
1219 DWORD reg_dest_code;
1220 DWORD reg_sampler_code;
1222 /* All versions have a destination register */
1223 reg_dest_code = *pToken & D3DSP_REGNUM_MASK;
1224 get_register_name(*pToken++, reg_dest, This->constants);
1226 /* 1.0-1.3: Use destination register as coordinate source. No modifiers.
1227 1.4: Use provided coordinate source register. _dw, _dz, swizzle allowed.
1228 2.0+: Use provided coordinate source register. No modifiers.
1229 3.0+: Use provided coordinate source register. Swizzle allowed */
1230 if (version < 14)
1231 strcpy(reg_coord, reg_dest);
1233 else if (version == 14) {
1234 if (gen_input_modifier_line(*pToken, 0, reg_coord, tmpLine, This->constants))
1235 shader_addline(&buffer, tmpLine);
1236 get_input_register_swizzle(*pToken, reg_coord_swz);
1237 pToken++;
1239 else if (version > 14 && version < 30) {
1240 get_register_name(*pToken, reg_coord, This->constants);
1241 pToken++;
1243 else if (version >= 30) {
1244 get_input_register_swizzle(*pToken, reg_coord_swz);
1245 get_register_name(*pToken, reg_coord, This->constants);
1246 pToken++;
1249 /* 1.0-1.4: Use destination register number as texture code.
1250 2.0+: Use provided sampler number as texure code. */
1251 if (version < 20)
1252 reg_sampler_code = reg_dest_code;
1254 else {
1255 reg_sampler_code = *pToken & D3DSP_REGNUM_MASK;
1256 pToken++;
1259 shader_addline(&buffer, "TEX %s, %s%s, texture[%lu], 2D;\n",
1260 reg_dest, reg_coord, reg_coord_swz, reg_sampler_code);
1261 continue;
1263 break;
1264 case D3DSIO_TEXCOORD:
1266 char tmp[20];
1267 get_write_mask(*pToken, tmp);
1268 if (version != 14) {
1269 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1270 shader_addline(&buffer, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1271 ++pToken;
1272 } else {
1273 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1274 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1275 shader_addline(&buffer, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1276 ++pToken;
1278 continue;
1280 break;
1281 case D3DSIO_TEXM3x2PAD:
1283 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1284 char buf[50];
1285 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1286 shader_addline(&buffer, tmpLine);
1287 shader_addline(&buffer, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1288 ++pToken;
1289 continue;
1291 break;
1292 case D3DSIO_TEXM3x2TEX:
1294 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1295 char buf[50];
1296 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1297 shader_addline(&buffer, tmpLine);
1298 shader_addline(&buffer, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1299 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1300 ++pToken;
1301 continue;
1303 break;
1304 case D3DSIO_TEXREG2AR:
1306 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1307 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1308 shader_addline(&buffer, "MOV TMP.r, T%lu.a;\n", reg2);
1309 shader_addline(&buffer, "MOV TMP.g, T%lu.r;\n", reg2);
1310 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1311 ++pToken;
1312 continue;
1314 break;
1315 case D3DSIO_TEXREG2GB:
1317 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1318 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1319 shader_addline(&buffer, "MOV TMP.r, T%lu.g;\n", reg2);
1320 shader_addline(&buffer, "MOV TMP.g, T%lu.b;\n", reg2);
1321 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1322 ++pToken;
1323 continue;
1325 break;
1326 case D3DSIO_TEXBEM:
1328 DWORD reg1 = *pToken & D3DSP_REGNUM_MASK;
1329 DWORD reg2 = *++pToken & D3DSP_REGNUM_MASK;
1331 /* FIXME: Should apply the BUMPMAPENV matrix */
1332 shader_addline(&buffer, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1333 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1334 ++pToken;
1335 continue;
1337 break;
1338 case D3DSIO_TEXM3x3PAD:
1340 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1341 char buf[50];
1342 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1343 shader_addline(&buffer, tmpLine);
1344 shader_addline(&buffer, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1345 tcw[row++] = reg;
1346 ++pToken;
1347 continue;
1349 break;
1350 case D3DSIO_TEXM3x3TEX:
1352 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1353 char buf[50];
1354 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1355 shader_addline(&buffer, tmpLine);
1356 shader_addline(&buffer, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1358 /* Cubemap textures will be more used than 3D ones. */
1359 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1360 row = 0;
1361 ++pToken;
1362 continue;
1364 case D3DSIO_TEXM3x3VSPEC:
1366 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1367 char buf[50];
1368 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants))
1369 shader_addline(&buffer, tmpLine);
1370 shader_addline(&buffer, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1372 /* Construct the eye-ray vector from w coordinates */
1373 shader_addline(&buffer, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1374 shader_addline(&buffer, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1375 shader_addline(&buffer, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1377 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1378 shader_addline(&buffer, "DP3 TMP.w, TMP, TMP2;\n");
1379 shader_addline(&buffer, "MUL TMP, TMP.w, TMP;\n");
1380 shader_addline(&buffer, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1382 /* Cubemap textures will be more used than 3D ones. */
1383 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1384 row = 0;
1385 ++pToken;
1386 continue;
1388 break;
1389 case D3DSIO_TEXM3x3SPEC:
1391 DWORD reg = *pToken & D3DSP_REGNUM_MASK;
1392 DWORD reg3 = *(pToken + 2) & D3DSP_REGNUM_MASK;
1393 char buf[50];
1394 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants))
1395 shader_addline(&buffer, tmpLine);
1396 shader_addline(&buffer, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1398 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1399 shader_addline(&buffer, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1400 shader_addline(&buffer, "MUL TMP, TMP.w, TMP;\n");
1401 shader_addline(&buffer, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1403 /* Cubemap textures will be more used than 3D ones. */
1404 shader_addline(&buffer, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1405 row = 0;
1406 pToken += 3;
1407 continue;
1409 break;
1411 default:
1412 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1413 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1414 } else {
1415 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1417 pToken += curOpcode->num_params;
1418 continue;
1421 /* Process modifiers */
1422 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1423 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1424 switch (mask) {
1425 case D3DSPDM_SATURATE: saturate = TRUE; break;
1426 #if 0 /* as yet unhandled modifiers */
1427 case D3DSPDM_CENTROID: centroid = TRUE; break;
1428 case D3DSPDM_PP: partialpresision = TRUE; break;
1429 #endif
1430 default:
1431 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1434 shift = (*pToken & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1436 /* Generate input and output registers */
1437 if (curOpcode->num_params > 0) {
1438 char regs[5][50];
1439 char operands[4][100];
1440 char swzstring[20];
1441 char tmpOp[256];
1443 /* Generate lines that handle input modifier computation */
1444 for (i = 1; i < curOpcode->num_params; ++i) {
1445 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1446 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpOp, This->constants)) {
1447 shader_addline(&buffer, tmpOp);
1451 /* Handle output register */
1452 get_register_name(*pToken, output_rname, This->constants);
1453 strcpy(operands[0], output_rname);
1454 get_write_mask(*pToken, output_wmask);
1455 strcat(operands[0], output_wmask);
1457 /* This function works because of side effects from gen_input_modifier_line */
1458 /* Handle input registers */
1459 for (i = 1; i < curOpcode->num_params; ++i) {
1460 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1461 strcpy(operands[i], regs[i - 1]);
1462 get_input_register_swizzle(*(pToken + i), swzstring);
1463 strcat(operands[i], swzstring);
1466 switch(curOpcode->opcode) {
1467 case D3DSIO_CMP:
1468 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""),
1469 operands[0], operands[1], operands[3], operands[2]);
1470 break;
1471 case D3DSIO_CND:
1472 shader_addline(&buffer, "ADD TMP, -%s, coefdiv.x;\n", operands[1]);
1473 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""),
1474 operands[0], operands[2], operands[3]);
1475 break;
1476 default:
1477 if (saturate && (shift == 0))
1478 strcat(tmpLine, "_SAT");
1479 strcat(tmpLine, " ");
1480 strcat(tmpLine, operands[0]);
1481 for (i = 1; i < curOpcode->num_params; i++) {
1482 strcat(tmpLine, ", ");
1483 strcat(tmpLine, operands[i]);
1485 strcat(tmpLine,";\n");
1487 shader_addline(&buffer, tmpLine);
1489 /* A shift requires another line. */
1490 if (shift != 0) {
1491 gen_output_modifier_line(saturate, output_wmask, shift, output_rname, tmpLine);
1492 shader_addline(&buffer, tmpLine);
1494 pToken += curOpcode->num_params;
1498 /* TODO: What about result.depth? */
1499 shader_addline(&buffer, "MOV result.color, R0;\n");
1500 shader_addline(&buffer, "END\n");
1503 /* finally null terminate the buffer */
1504 buffer.buffer[buffer.bsize] = 0;
1505 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1506 /* Create the hw shader */
1508 /* The program string sometimes gets too long for a normal TRACE */
1509 TRACE("Generated program:\n");
1510 if (TRACE_ON(d3d_shader)) {
1511 fprintf(stderr, "%s\n", buffer.buffer);
1514 /* TODO: change to resource.glObjectHandel or something like that */
1515 GL_EXTCALL(glGenProgramsARB(1, &This->baseShader.prgId));
1517 TRACE("Creating a hw pixel shader, prg=%d\n", This->baseShader.prgId);
1518 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->baseShader.prgId));
1520 TRACE("Created hw pixel shader, prg=%d\n", This->baseShader.prgId);
1521 /* Create the program and check for errors */
1522 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
1523 buffer.bsize, buffer.buffer));
1525 if (glGetError() == GL_INVALID_OPERATION) {
1526 GLint errPos;
1527 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1528 FIXME("HW PixelShader Error at position %d: %s\n",
1529 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1530 This->baseShader.prgId = -1;
1533 #if 1 /* if were using the data buffer of device then we don't need to free it */
1534 HeapFree(GetProcessHeap(), 0, buffer.buffer);
1535 #endif
1538 inline static void pshader_program_dump_ins_modifiers(const DWORD output) {
1540 DWORD shift = (output & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1541 DWORD mmask = output & D3DSP_DSTMOD_MASK;
1543 switch (shift) {
1544 case 0: break;
1545 case 13: TRACE("_d8"); break;
1546 case 14: TRACE("_d4"); break;
1547 case 15: TRACE("_d2"); break;
1548 case 1: TRACE("_x2"); break;
1549 case 2: TRACE("_x4"); break;
1550 case 3: TRACE("_x8"); break;
1551 default: TRACE("_unhandled_shift(%ld)", shift); break;
1554 switch(mmask) {
1555 case D3DSPDM_NONE: break;
1556 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1557 case D3DSPDM_PARTIALPRECISION: TRACE("_pp"); break;
1558 case D3DSPDM_MSAMPCENTROID: TRACE("_centroid"); break;
1559 default: TRACE("_unhandled_modifier(%#lx)", mmask); break;
1563 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1564 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1565 static const char swizzle_reg_chars[] = "rgba";
1567 DWORD reg = param & D3DSP_REGNUM_MASK;
1568 DWORD regtype = shader_get_regtype(param);
1570 if (input) {
1571 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1572 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1573 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1574 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1575 TRACE("-");
1576 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1577 TRACE("1-");
1580 switch (regtype) {
1581 case D3DSPR_TEMP:
1582 TRACE("r%lu", reg);
1583 break;
1584 case D3DSPR_INPUT:
1585 TRACE("v%lu", reg);
1586 break;
1587 case D3DSPR_CONST:
1588 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1589 break;
1591 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1592 TRACE("t%lu", reg);
1593 break;
1594 case D3DSPR_RASTOUT:
1595 TRACE("%s", rastout_reg_names[reg]);
1596 break;
1597 case D3DSPR_ATTROUT:
1598 TRACE("oD%lu", reg);
1599 break;
1600 case D3DSPR_TEXCRDOUT:
1601 TRACE("oT%lu", reg);
1602 break;
1603 case D3DSPR_CONSTINT:
1604 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1605 break;
1606 case D3DSPR_CONSTBOOL:
1607 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1608 break;
1609 case D3DSPR_LABEL:
1610 TRACE("l%lu", reg);
1611 break;
1612 case D3DSPR_LOOP:
1613 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1614 break;
1615 case D3DSPR_SAMPLER:
1616 TRACE("s%lu", reg);
1617 break;
1618 default:
1619 TRACE("unhandled_rtype(%lx)", regtype);
1620 break;
1623 if (!input) {
1624 /* operand output (for modifiers and shift, see dump_ins_modifiers) */
1626 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1627 TRACE(".");
1628 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1629 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1630 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1631 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1633 } else {
1634 /** operand input */
1635 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1636 DWORD swizzle_r = swizzle & 0x03;
1637 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1638 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1639 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1641 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1642 DWORD mask = param & D3DSP_SRCMOD_MASK;
1643 /*TRACE("_modifier(0x%08lx) ", mask);*/
1644 switch (mask) {
1645 case D3DSPSM_NONE: break;
1646 case D3DSPSM_NEG: break;
1647 case D3DSPSM_BIAS: TRACE("_bias"); break;
1648 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1649 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1650 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1651 case D3DSPSM_COMP: break;
1652 case D3DSPSM_X2: TRACE("_x2"); break;
1653 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1654 case D3DSPSM_DZ: TRACE("_dz"); break;
1655 case D3DSPSM_DW: TRACE("_dw"); break;
1656 default:
1657 TRACE("_unknown(0x%08lx)", mask);
1662 * swizzle bits fields:
1663 * RRGGBBAA
1665 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1666 if (swizzle_r == swizzle_g &&
1667 swizzle_r == swizzle_b &&
1668 swizzle_r == swizzle_a) {
1669 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1670 } else {
1671 TRACE(".%c%c%c%c",
1672 swizzle_reg_chars[swizzle_r],
1673 swizzle_reg_chars[swizzle_g],
1674 swizzle_reg_chars[swizzle_b],
1675 swizzle_reg_chars[swizzle_a]);
1681 inline static void pshader_program_dump_decl_usage(
1682 IWineD3DPixelShaderImpl *This, DWORD decl, DWORD param) {
1684 DWORD regtype = shader_get_regtype(param);
1685 TRACE("dcl_");
1687 if (regtype == D3DSPR_SAMPLER) {
1688 DWORD ttype = decl & D3DSP_TEXTURETYPE_MASK;
1690 switch (ttype) {
1691 case D3DSTT_2D: TRACE("2d "); break;
1692 case D3DSTT_CUBE: TRACE("cube "); break;
1693 case D3DSTT_VOLUME: TRACE("volume "); break;
1694 default: TRACE("unknown_ttype(%08lx) ", ttype);
1697 } else {
1699 DWORD usage = decl & D3DSP_DCL_USAGE_MASK;
1700 DWORD idx = (decl & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
1702 switch(usage) {
1703 case D3DDECLUSAGE_POSITION:
1704 TRACE("%s%ld ", "position", idx);
1705 break;
1706 case D3DDECLUSAGE_BLENDINDICES:
1707 TRACE("%s ", "blend");
1708 break;
1709 case D3DDECLUSAGE_BLENDWEIGHT:
1710 TRACE("%s ", "weight");
1711 break;
1712 case D3DDECLUSAGE_NORMAL:
1713 TRACE("%s%ld ", "normal", idx);
1714 break;
1715 case D3DDECLUSAGE_PSIZE:
1716 TRACE("%s ", "psize");
1717 break;
1718 case D3DDECLUSAGE_COLOR:
1719 if(idx == 0) {
1720 TRACE("%s ", "color");
1721 } else {
1722 TRACE("%s%ld ", "specular", (idx - 1));
1724 break;
1725 case D3DDECLUSAGE_TEXCOORD:
1726 TRACE("%s%ld ", "texture", idx);
1727 break;
1728 case D3DDECLUSAGE_TANGENT:
1729 TRACE("%s ", "tangent");
1730 break;
1731 case D3DDECLUSAGE_BINORMAL:
1732 TRACE("%s ", "binormal");
1733 break;
1734 case D3DDECLUSAGE_TESSFACTOR:
1735 TRACE("%s ", "tessfactor");
1736 break;
1737 case D3DDECLUSAGE_POSITIONT:
1738 TRACE("%s%ld ", "positionT", idx);
1739 break;
1740 case D3DDECLUSAGE_FOG:
1741 TRACE("%s ", "fog");
1742 break;
1743 case D3DDECLUSAGE_DEPTH:
1744 TRACE("%s ", "depth");
1745 break;
1746 case D3DDECLUSAGE_SAMPLE:
1747 TRACE("%s ", "sample");
1748 break;
1749 default:
1750 FIXME("Unrecognised dcl %08lx", usage);
1755 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1756 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1757 const DWORD* pToken = pFunction;
1758 const SHADER_OPCODE *curOpcode = NULL;
1759 DWORD len = 0;
1760 DWORD i;
1761 TRACE("(%p) : Parsing programme\n", This);
1763 if (NULL != pToken) {
1764 while (D3DPS_END() != *pToken) {
1765 if (pshader_is_version_token(*pToken)) { /** version */
1766 pshader_set_version(This, *pToken);
1767 ++pToken;
1768 ++len;
1769 continue;
1771 if (pshader_is_comment_token(*pToken)) { /** comment */
1772 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1773 ++pToken;
1774 TRACE("//%s\n", (char*)pToken);
1775 pToken += comment_len;
1776 len += comment_len + 1;
1777 continue;
1779 if (!This->baseShader.version) {
1780 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1782 curOpcode = pshader_program_get_opcode(This, *pToken);
1783 ++pToken;
1784 ++len;
1785 if (NULL == curOpcode) {
1787 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1788 while (*pToken & 0x80000000) {
1790 /* unknown current opcode ... */
1791 TRACE("unrecognized opcode: %08lx", *pToken);
1792 ++pToken;
1793 ++len;
1794 TRACE("\n");
1797 } else {
1798 if (curOpcode->opcode == D3DSIO_DCL) {
1799 pshader_program_dump_decl_usage(This, *pToken, *(pToken + 1));
1800 ++pToken;
1801 ++len;
1802 pshader_program_dump_ps_param(*pToken, 0);
1803 ++pToken;
1804 ++len;
1805 } else
1806 if (curOpcode->opcode == D3DSIO_DEF) {
1807 TRACE("def c%lu = ", *pToken & 0xFF);
1808 ++pToken;
1809 ++len;
1810 TRACE("%f ,", *(float *)pToken);
1811 ++pToken;
1812 ++len;
1813 TRACE("%f ,", *(float *)pToken);
1814 ++pToken;
1815 ++len;
1816 TRACE("%f ,", *(float *)pToken);
1817 ++pToken;
1818 ++len;
1819 TRACE("%f", *(float *)pToken);
1820 ++pToken;
1821 ++len;
1822 } else {
1823 TRACE("%s", curOpcode->name);
1824 if (curOpcode->num_params > 0) {
1825 pshader_program_dump_ins_modifiers(*pToken);
1826 TRACE(" ");
1827 pshader_program_dump_ps_param(*pToken, 0);
1828 ++pToken;
1829 ++len;
1830 for (i = 1; i < curOpcode->num_params; ++i) {
1831 TRACE(", ");
1832 pshader_program_dump_ps_param(*pToken, 1);
1833 ++pToken;
1834 ++len;
1838 TRACE("\n");
1841 This->baseShader.functionLength = (len + 1) * sizeof(DWORD);
1842 } else {
1843 This->baseShader.functionLength = 1; /* no Function defined use fixed function vertex processing */
1846 /* Generate HW shader in needed */
1847 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1848 TRACE("(%p) : Generating hardware program\n", This);
1849 #if 1
1850 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1851 #endif
1854 TRACE("(%p) : Copying the function\n", This);
1855 /* copy the function ... because it will certainly be released by application */
1856 if (NULL != pFunction) {
1857 This->baseShader.function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->baseShader.functionLength);
1858 memcpy((void *)This->baseShader.function, pFunction, This->baseShader.functionLength);
1859 } else {
1860 This->baseShader.function = NULL;
1863 /* TODO: Some proper return values for failures */
1864 TRACE("(%p) : Returning WINED3D_OK\n", This);
1865 return WINED3D_OK;
1868 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1870 /*** IUnknown methods ***/
1871 IWineD3DPixelShaderImpl_QueryInterface,
1872 IWineD3DPixelShaderImpl_AddRef,
1873 IWineD3DPixelShaderImpl_Release,
1874 /*** IWineD3DBase methods ***/
1875 IWineD3DPixelShaderImpl_GetParent,
1876 /*** IWineD3DBaseShader methods ***/
1877 IWineD3DPixelShaderImpl_SetFunction,
1878 /*** IWineD3DPixelShader methods ***/
1879 IWineD3DPixelShaderImpl_GetDevice,
1880 IWineD3DPixelShaderImpl_GetFunction