wined3d: Move the SHADER_OPCODE definition to wined3d_private.h.
[wine/multimedia.git] / dlls / wined3d / pixelshader.c
blob64fe501ac7a11d1f563c42af52b5fb8115dbf59a
1 /*
2 * shaders implementation
4 * Copyright 2005 Oliver Stieber
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include "config.h"
23 #include <math.h>
24 #include <stdio.h>
26 #include "wined3d_private.h"
28 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
30 #define GLINFO_LOCATION ((IWineD3DImpl *)(((IWineD3DDeviceImpl *)This->wineD3DDevice)->wineD3D))->gl_info
32 #if 0 /* Must not be 1 in cvs version */
33 # define PSTRACE(A) TRACE A
34 # define TRACE_VSVECTOR(name) TRACE( #name "=(%f, %f, %f, %f)\n", name.x, name.y, name.z, name.w)
35 #else
36 # define PSTRACE(A)
37 # define TRACE_VSVECTOR(name)
38 #endif
40 /* The maximum size of the program */
41 #define PGMSIZE 65535
43 #define REGMASK 0x00001FFF
45 #define GLNAME_REQUIRE_GLSL ((const char *)1)
46 /* *******************************************
47 IWineD3DPixelShader IUnknown parts follow
48 ******************************************* */
49 HRESULT WINAPI IWineD3DPixelShaderImpl_QueryInterface(IWineD3DPixelShader *iface, REFIID riid, LPVOID *ppobj)
51 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
52 TRACE("(%p)->(%s,%p)\n",This,debugstr_guid(riid),ppobj);
53 if (IsEqualGUID(riid, &IID_IUnknown)
54 || IsEqualGUID(riid, &IID_IWineD3DBase)
55 || IsEqualGUID(riid, &IID_IWineD3DPixelShader)) {
56 IUnknown_AddRef(iface);
57 *ppobj = This;
58 return D3D_OK;
60 return E_NOINTERFACE;
63 ULONG WINAPI IWineD3DPixelShaderImpl_AddRef(IWineD3DPixelShader *iface) {
64 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
65 TRACE("(%p) : AddRef increasing from %ld\n", This, This->ref);
66 return InterlockedIncrement(&This->ref);
69 ULONG WINAPI IWineD3DPixelShaderImpl_Release(IWineD3DPixelShader *iface) {
70 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
71 ULONG ref;
72 TRACE("(%p) : Releasing from %ld\n", This, This->ref);
73 ref = InterlockedDecrement(&This->ref);
74 if (ref == 0) {
75 HeapFree(GetProcessHeap(), 0, This);
77 return ref;
80 /* TODO: At the momeny the function parser is single pass, it achievs this
81 by passing constants to a couple of functions where they are then modified.
82 At some point the parser need to be made two pass (So that GLSL can be used if it's required by the shader)
83 when happens constants should be worked out in the first pass to tidy up the second pass a bit.
86 /* *******************************************
87 IWineD3DPixelShader IWineD3DPixelShader parts follow
88 ******************************************* */
90 HRESULT WINAPI IWineD3DPixelShaderImpl_GetParent(IWineD3DPixelShader *iface, IUnknown** parent){
91 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
93 *parent = This->parent;
94 IUnknown_AddRef(*parent);
95 TRACE("(%p) : returning %p\n", This, *parent);
96 return D3D_OK;
99 HRESULT WINAPI IWineD3DPixelShaderImpl_GetDevice(IWineD3DPixelShader* iface, IWineD3DDevice **pDevice){
100 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
101 IWineD3DDevice_AddRef((IWineD3DDevice *)This->wineD3DDevice);
102 *pDevice = (IWineD3DDevice *)This->wineD3DDevice;
103 TRACE("(%p) returning %p\n", This, *pDevice);
104 return D3D_OK;
108 HRESULT WINAPI IWineD3DPixelShaderImpl_GetFunction(IWineD3DPixelShader* impl, VOID* pData, UINT* pSizeOfData) {
109 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)impl;
110 FIXME("(%p) : pData(%p), pSizeOfData(%p)\n", This, pData, pSizeOfData);
112 if (NULL == pData) {
113 *pSizeOfData = This->functionLength;
114 return D3D_OK;
116 if (*pSizeOfData < This->functionLength) {
117 *pSizeOfData = This->functionLength;
118 return D3DERR_MOREDATA;
120 if (NULL == This->function) { /* no function defined */
121 TRACE("(%p) : GetFunction no User Function defined using NULL to %p\n", This, pData);
122 (*(DWORD **) pData) = NULL;
123 } else {
124 if (This->functionLength == 0) {
127 TRACE("(%p) : GetFunction copying to %p\n", This, pData);
128 memcpy(pData, This->function, This->functionLength);
130 return D3D_OK;
133 /*******************************
134 * pshader functions software VM
137 void pshader_add(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
138 d->x = s0->x + s1->x;
139 d->y = s0->y + s1->y;
140 d->z = s0->z + s1->z;
141 d->w = s0->w + s1->w;
142 PSTRACE(("executing add: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
143 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
146 void pshader_dp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
147 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z;
148 PSTRACE(("executing dp3: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
149 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
152 void pshader_dp4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
153 d->x = d->y = d->z = d->w = s0->x * s1->x + s0->y * s1->y + s0->z * s1->z + s0->w * s1->w;
154 PSTRACE(("executing dp4: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
155 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
158 void pshader_dst(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
159 d->x = 1.0f;
160 d->y = s0->y * s1->y;
161 d->z = s0->z;
162 d->w = s1->w;
163 PSTRACE(("executing dst: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
164 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
167 void pshader_expp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
168 union {
169 float f;
170 DWORD d;
171 } tmp;
173 tmp.f = floorf(s0->w);
174 d->x = powf(2.0f, tmp.f);
175 d->y = s0->w - tmp.f;
176 tmp.f = powf(2.0f, s0->w);
177 tmp.d &= 0xFFFFFF00U;
178 d->z = tmp.f;
179 d->w = 1.0f;
180 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
181 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
184 void pshader_lit(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
185 d->x = 1.0f;
186 d->y = (0.0f < s0->x) ? s0->x : 0.0f;
187 d->z = (0.0f < s0->x && 0.0f < s0->y) ? powf(s0->y, s0->w) : 0.0f;
188 d->w = 1.0f;
189 PSTRACE(("executing lit: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
190 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
193 void pshader_logp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
194 float tmp_f = fabsf(s0->w);
195 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
196 PSTRACE(("executing logp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
197 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
200 void pshader_mad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
201 d->x = s0->x * s1->x + s2->x;
202 d->y = s0->y * s1->y + s2->y;
203 d->z = s0->z * s1->z + s2->z;
204 d->w = s0->w * s1->w + s2->w;
205 PSTRACE(("executing mad: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) s2=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
206 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, s2->x, s2->y, s2->z, s2->w, d->x, d->y, d->z, d->w));
209 void pshader_max(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
210 d->x = (s0->x >= s1->x) ? s0->x : s1->x;
211 d->y = (s0->y >= s1->y) ? s0->y : s1->y;
212 d->z = (s0->z >= s1->z) ? s0->z : s1->z;
213 d->w = (s0->w >= s1->w) ? s0->w : s1->w;
214 PSTRACE(("executing max: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
215 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
218 void pshader_min(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
219 d->x = (s0->x < s1->x) ? s0->x : s1->x;
220 d->y = (s0->y < s1->y) ? s0->y : s1->y;
221 d->z = (s0->z < s1->z) ? s0->z : s1->z;
222 d->w = (s0->w < s1->w) ? s0->w : s1->w;
223 PSTRACE(("executing min: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
224 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
227 void pshader_mov(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
228 d->x = s0->x;
229 d->y = s0->y;
230 d->z = s0->z;
231 d->w = s0->w;
232 PSTRACE(("executing mov: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
233 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
236 void pshader_mul(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
237 d->x = s0->x * s1->x;
238 d->y = s0->y * s1->y;
239 d->z = s0->z * s1->z;
240 d->w = s0->w * s1->w;
241 PSTRACE(("executing mul: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
242 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
245 void pshader_nop(void) {
246 /* NOPPPP ahhh too easy ;) */
247 PSTRACE(("executing nop\n"));
250 void pshader_rcp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
251 d->x = d->y = d->z = d->w = (0.0f == s0->w) ? HUGE_VAL : 1.0f / s0->w;
252 PSTRACE(("executing rcp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
253 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
256 void pshader_rsq(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
257 float tmp_f = fabsf(s0->w);
258 d->x = d->y = d->z = d->w = (0.0f == tmp_f) ? HUGE_VAL : ((1.0f != tmp_f) ? 1.0f / sqrtf(tmp_f) : 1.0f);
259 PSTRACE(("executing rsq: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
260 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
263 void pshader_sge(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
264 d->x = (s0->x >= s1->x) ? 1.0f : 0.0f;
265 d->y = (s0->y >= s1->y) ? 1.0f : 0.0f;
266 d->z = (s0->z >= s1->z) ? 1.0f : 0.0f;
267 d->w = (s0->w >= s1->w) ? 1.0f : 0.0f;
268 PSTRACE(("executing sge: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
269 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
272 void pshader_slt(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
273 d->x = (s0->x < s1->x) ? 1.0f : 0.0f;
274 d->y = (s0->y < s1->y) ? 1.0f : 0.0f;
275 d->z = (s0->z < s1->z) ? 1.0f : 0.0f;
276 d->w = (s0->w < s1->w) ? 1.0f : 0.0f;
277 PSTRACE(("executing slt: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
278 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
281 void pshader_sub(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
282 d->x = s0->x - s1->x;
283 d->y = s0->y - s1->y;
284 d->z = s0->z - s1->z;
285 d->w = s0->w - s1->w;
286 PSTRACE(("executing sub: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
287 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
291 * Version 1.1 specific
294 void pshader_exp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
295 d->x = d->y = d->z = d->w = powf(2.0f, s0->w);
296 PSTRACE(("executing exp: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
297 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
300 void pshader_log(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
301 float tmp_f = fabsf(s0->w);
302 d->x = d->y = d->z = d->w = (0.0f != tmp_f) ? logf(tmp_f) / logf(2.0f) : -HUGE_VAL;
303 PSTRACE(("executing log: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
304 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
307 void pshader_frc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
308 d->x = s0->x - floorf(s0->x);
309 d->y = s0->y - floorf(s0->y);
310 d->z = 0.0f;
311 d->w = 1.0f;
312 PSTRACE(("executing frc: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
313 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
316 typedef FLOAT D3DMATRIX44[4][4];
317 typedef FLOAT D3DMATRIX43[4][3];
318 typedef FLOAT D3DMATRIX34[3][4];
319 typedef FLOAT D3DMATRIX33[3][3];
320 typedef FLOAT D3DMATRIX23[2][3];
322 void pshader_m4x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, /*WINED3DSHADERVECTOR* mat1*/ D3DMATRIX44 mat) {
324 * Buggy CODE: here only if cast not work for copy/paste
325 WINED3DSHADERVECTOR* mat2 = mat1 + 1;
326 WINED3DSHADERVECTOR* mat3 = mat1 + 2;
327 WINED3DSHADERVECTOR* mat4 = mat1 + 3;
328 d->x = mat1->x * s0->x + mat2->x * s0->y + mat3->x * s0->z + mat4->x * s0->w;
329 d->y = mat1->y * s0->x + mat2->y * s0->y + mat3->y * s0->z + mat4->y * s0->w;
330 d->z = mat1->z * s0->x + mat2->z * s0->y + mat3->z * s0->z + mat4->z * s0->w;
331 d->w = mat1->w * s0->x + mat2->w * s0->y + mat3->w * s0->z + mat4->w * s0->w;
333 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
334 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
335 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
336 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z + mat[3][3] * s0->w;
337 PSTRACE(("executing m4x4(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
338 PSTRACE(("executing m4x4(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
339 PSTRACE(("executing m4x4(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
340 PSTRACE(("executing m4x4(4): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], mat[3][3], s0->w, d->w));
343 void pshader_m4x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX34 mat) {
344 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z + mat[0][3] * s0->w;
345 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z + mat[1][3] * s0->w;
346 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z + mat[2][3] * s0->w;
347 d->w = 1.0f;
348 PSTRACE(("executing m4x3(1): mat=(%f, %f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], mat[0][3], s0->x, d->x));
349 PSTRACE(("executing m4x3(2): mat=(%f, %f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], mat[1][3], s0->y, d->y));
350 PSTRACE(("executing m4x3(3): mat=(%f, %f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], mat[2][3], s0->z, d->z));
351 PSTRACE(("executing m4x3(4): (%f) (%f) \n", s0->w, d->w));
354 void pshader_m3x4(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX43 mat) {
355 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
356 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
357 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
358 d->w = mat[3][0] * s0->x + mat[3][1] * s0->y + mat[3][2] * s0->z;
359 PSTRACE(("executing m3x4(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
360 PSTRACE(("executing m3x4(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
361 PSTRACE(("executing m3x4(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
362 PSTRACE(("executing m3x4(4): mat=(%f, %f, %f) (%f) (%f) \n", mat[3][0], mat[3][1], mat[3][2], s0->w, d->w));
365 void pshader_m3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX33 mat) {
366 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
367 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
368 d->z = mat[2][0] * s0->x + mat[2][1] * s0->y + mat[2][2] * s0->z;
369 d->w = 1.0f;
370 PSTRACE(("executing m3x3(1): mat=(%f, %f, %f) s0=(%f) d=(%f) \n", mat[0][0], mat[0][1], mat[0][2], s0->x, d->x));
371 PSTRACE(("executing m3x3(2): mat=(%f, %f, %f) (%f) (%f) \n", mat[1][0], mat[1][1], mat[1][2], s0->y, d->y));
372 PSTRACE(("executing m3x3(3): mat=(%f, %f, %f) X (%f) = (%f) \n", mat[2][0], mat[2][1], mat[2][2], s0->z, d->z));
373 PSTRACE(("executing m3x3(4): (%f) \n", d->w));
376 void pshader_m3x2(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, D3DMATRIX23 mat) {
377 FIXME("check\n");
378 d->x = mat[0][0] * s0->x + mat[0][1] * s0->y + mat[0][2] * s0->z;
379 d->y = mat[1][0] * s0->x + mat[1][1] * s0->y + mat[1][2] * s0->z;
380 d->z = 0.0f;
381 d->w = 1.0f;
385 * Version 2.0 specific
387 void pshader_lrp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
388 d->x = s0->x * (s1->x - s2->x) + s2->x;
389 d->y = s0->y * (s1->y - s2->y) + s2->y;
390 d->z = s0->z * (s1->z - s2->z) + s2->z;
391 d->w = s0->w * (s1->w - s2->w) + s2->w;
394 void pshader_crs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
395 d->x = s0->y * s1->z - s0->z * s1->y;
396 d->y = s0->z * s1->x - s0->x * s1->z;
397 d->z = s0->x * s1->y - s0->y * s1->x;
398 d->w = 0.9f; /* w is undefined, so set it to something safeish */
400 PSTRACE(("executing crs: s0=(%f, %f, %f, %f) s1=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
401 s0->x, s0->y, s0->z, s0->w, s1->x, s1->y, s1->z, s1->w, d->x, d->y, d->z, d->w));
404 void pshader_abs(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
405 d->x = fabsf(s0->x);
406 d->y = fabsf(s0->y);
407 d->z = fabsf(s0->z);
408 d->w = fabsf(s0->w);
409 PSTRACE(("executing abs: s0=(%f, %f, %f, %f) => d=(%f, %f, %f, %f)\n",
410 s0->x, s0->y, s0->z, s0->w, d->x, d->y, d->z, d->w));
413 /* Stubs */
414 void pshader_texcoord(WINED3DSHADERVECTOR* d) {
415 FIXME(" : Stub\n");
418 void pshader_texkill(WINED3DSHADERVECTOR* d) {
419 FIXME(" : Stub\n");
422 void pshader_tex(WINED3DSHADERVECTOR* d) {
423 FIXME(" : Stub\n");
425 void pshader_texld(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
426 FIXME(" : Stub\n");
429 void pshader_texbem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
430 FIXME(" : Stub\n");
433 void pshader_texbeml(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
434 FIXME(" : Stub\n");
437 void pshader_texreg2ar(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
438 FIXME(" : Stub\n");
441 void pshader_texreg2gb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
442 FIXME(" : Stub\n");
445 void pshader_texm3x2pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
446 FIXME(" : Stub\n");
449 void pshader_texm3x2tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
450 FIXME(" : Stub\n");
453 void pshader_texm3x3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
454 FIXME(" : Stub\n");
457 void pshader_texm3x3pad(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
458 FIXME(" : Stub\n");
461 void pshader_texm3x3diff(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
462 FIXME(" : Stub\n");
465 void pshader_texm3x3spec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
466 FIXME(" : Stub\n");
469 void pshader_texm3x3vspec(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
470 FIXME(" : Stub\n");
473 void pshader_cnd(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
474 FIXME(" : Stub\n");
477 /* Def is C[n] = {n.nf, n.nf, n.nf, n.nf} */
478 void pshader_def(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2, WINED3DSHADERVECTOR* s3) {
479 FIXME(" : Stub\n");
482 void pshader_texreg2rgb(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
483 FIXME(" : Stub\n");
486 void pshader_texdp3tex(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
487 FIXME(" : Stub\n");
490 void pshader_texm3x2depth(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
491 FIXME(" : Stub\n");
494 void pshader_texdp3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
495 FIXME(" : Stub\n");
498 void pshader_texm3x3(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
499 FIXME(" : Stub\n");
502 void pshader_texdepth(WINED3DSHADERVECTOR* d) {
503 FIXME(" : Stub\n");
506 void pshader_cmp(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1, WINED3DSHADERVECTOR* s2) {
507 FIXME(" : Stub\n");
510 void pshader_bem(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
511 FIXME(" : Stub\n");
514 void pshader_call(WINED3DSHADERVECTOR* d) {
515 FIXME(" : Stub\n");
518 void pshader_callnz(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
519 FIXME(" : Stub\n");
522 void pshader_loop(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
523 FIXME(" : Stub\n");
526 void pshader_ret(WINED3DSHADERVECTOR* d) {
527 FIXME(" : Stub\n");
530 void pshader_endloop(WINED3DSHADERVECTOR* d) {
531 FIXME(" : Stub\n");
534 void pshader_dcl(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
535 FIXME(" : Stub\n");
538 void pshader_pow(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0, WINED3DSHADERVECTOR* s1) {
539 FIXME(" : Stub\n");
542 void pshader_sng(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
543 FIXME(" : Stub\n");
546 void pshader_nrm(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
547 FIXME(" : Stub\n");
550 void pshader_sincos(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
551 FIXME(" : Stub\n");
554 void pshader_rep(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
555 FIXME(" : Stub\n");
558 void pshader_endrep(void) {
559 FIXME(" : Stub\n");
562 void pshader_if(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
563 FIXME(" : Stub\n");
566 void pshader_ifc(WINED3DSHADERVECTOR* d, WINED3DSHADERVECTOR* s0) {
567 FIXME(" : Stub\n");
570 void pshader_else(WINED3DSHADERVECTOR* d) {
571 FIXME(" : Stub\n");
574 void pshader_label(WINED3DSHADERVECTOR* d) {
575 FIXME(" : Stub\n");
578 void pshader_endif(WINED3DSHADERVECTOR* d) {
579 FIXME(" : Stub\n");
582 void pshader_break(WINED3DSHADERVECTOR* d) {
583 FIXME(" : Stub\n");
586 void pshader_breakc(WINED3DSHADERVECTOR* d) {
587 FIXME(" : Stub\n");
590 void pshader_mova(WINED3DSHADERVECTOR* d) {
591 FIXME(" : Stub\n");
594 void pshader_defb(WINED3DSHADERVECTOR* d) {
595 FIXME(" : Stub\n");
598 void pshader_defi(WINED3DSHADERVECTOR* d) {
599 FIXME(" : Stub\n");
602 void pshader_dp2add(WINED3DSHADERVECTOR* d) {
603 FIXME(" : Stub\n");
606 void pshader_dsx(WINED3DSHADERVECTOR* d) {
607 FIXME(" : Stub\n");
610 void pshader_dsy(WINED3DSHADERVECTOR* d) {
611 FIXME(" : Stub\n");
614 void pshader_texldd(WINED3DSHADERVECTOR* d) {
615 FIXME(" : Stub\n");
618 void pshader_setp(WINED3DSHADERVECTOR* d) {
619 FIXME(" : Stub\n");
622 void pshader_texldl(WINED3DSHADERVECTOR* d) {
623 FIXME(" : Stub\n");
626 void pshader_breakp(WINED3DSHADERVECTOR* d) {
627 FIXME(" : Stub\n");
630 * log, exp, frc, m*x* seems to be macros ins ... to see
632 static CONST SHADER_OPCODE pshader_ins [] = {
633 {D3DSIO_NOP, "nop", "NOP", 0, pshader_nop, 0, 0},
634 {D3DSIO_MOV, "mov", "MOV", 2, pshader_mov, 0, 0},
635 {D3DSIO_ADD, "add", "ADD", 3, pshader_add, 0, 0},
636 {D3DSIO_SUB, "sub", "SUB", 3, pshader_sub, 0, 0},
637 {D3DSIO_MAD, "mad", "MAD", 4, pshader_mad, 0, 0},
638 {D3DSIO_MUL, "mul", "MUL", 3, pshader_mul, 0, 0},
639 {D3DSIO_RCP, "rcp", "RCP", 2, pshader_rcp, 0, 0},
640 {D3DSIO_RSQ, "rsq", "RSQ", 2, pshader_rsq, 0, 0},
641 {D3DSIO_DP3, "dp3", "DP3", 3, pshader_dp3, 0, 0},
642 {D3DSIO_DP4, "dp4", "DP4", 3, pshader_dp4, 0, 0},
643 {D3DSIO_MIN, "min", "MIN", 3, pshader_min, 0, 0},
644 {D3DSIO_MAX, "max", "MAX", 3, pshader_max, 0, 0},
645 {D3DSIO_SLT, "slt", "SLT", 3, pshader_slt, 0, 0},
646 {D3DSIO_SGE, "sge", "SGE", 3, pshader_sge, 0, 0},
647 {D3DSIO_ABS, "abs", "ABS", 2, pshader_abs, 0, 0},
648 {D3DSIO_EXP, "exp", "EX2", 2, pshader_exp, 0, 0},
649 {D3DSIO_LOG, "log", "LG2", 2, pshader_log, 0, 0},
650 {D3DSIO_LIT, "lit", "LIT", 2, pshader_lit, 0, 0},
651 {D3DSIO_DST, "dst", "DST", 3, pshader_dst, 0, 0},
652 {D3DSIO_LRP, "lrp", "LRP", 4, pshader_lrp, 0, 0},
653 {D3DSIO_FRC, "frc", "FRC", 2, pshader_frc, 0, 0},
654 {D3DSIO_M4x4, "m4x4", "undefined", 3, pshader_m4x4, 0, 0},
655 {D3DSIO_M4x3, "m4x3", "undefined", 3, pshader_m4x3, 0, 0},
656 {D3DSIO_M3x4, "m3x4", "undefined", 3, pshader_m3x4, 0, 0},
657 {D3DSIO_M3x3, "m3x3", "undefined", 3, pshader_m3x3, 0, 0},
658 {D3DSIO_M3x2, "m3x2", "undefined", 3, pshader_m3x2, 0, 0},
661 /** FIXME: use direct access so add the others opcodes as stubs */
662 /* NOTE: gl function is currently NULL for calls and loops because they are not yet supported
663 They can be easily managed in software by introducing a call/loop stack and should be possible to implement in glsl ol NV_shader's */
664 {D3DSIO_CALL, "call", GLNAME_REQUIRE_GLSL, 1, pshader_call, 0, 0},
665 {D3DSIO_CALLNZ, "callnz", GLNAME_REQUIRE_GLSL, 2, pshader_callnz, 0, 0},
666 {D3DSIO_LOOP, "loop", GLNAME_REQUIRE_GLSL, 2, pshader_loop, 0, 0},
667 {D3DSIO_RET, "ret", GLNAME_REQUIRE_GLSL, 0, pshader_ret, 0, 0},
668 {D3DSIO_ENDLOOP, "endloop", GLNAME_REQUIRE_GLSL, 0, pshader_endloop, 0, 0},
669 {D3DSIO_LABEL, "label", GLNAME_REQUIRE_GLSL, 1, pshader_label, 0, 0},
670 /* DCL is a specil operation */
671 {D3DSIO_DCL, "dcl", NULL, 1, pshader_dcl, 0, 0},
672 {D3DSIO_POW, "pow", "POW", 3, pshader_pow, 0, 0},
673 {D3DSIO_CRS, "crs", "XPS", 3, pshader_crs, 0, 0},
674 /* TODO: sng can possibly be performed as
675 RCP tmp, vec
676 MUL out, tmp, vec*/
677 {D3DSIO_SGN, "sng", NULL, 2, pshader_sng, 0, 0},
678 /* TODO: xyz normalise can be performed as VS_ARB using one temporary register,
679 DP3 tmp , vec, vec;
680 RSQ tmp, tmp.x;
681 MUL vec.xyz, vec, tmp;
682 but I think this is better because it accounts for w properly.
683 DP3 tmp , vec, vec;
684 RSQ tmp, tmp.x;
685 MUL vec, vec, tmp;
688 {D3DSIO_NRM, "nrm", NULL, 2, pshader_nrm, 0, 0},
689 {D3DSIO_SINCOS, "sincos", NULL, 2, pshader_sincos, 0, 0},
690 {D3DSIO_REP , "rep", GLNAME_REQUIRE_GLSL, 2, pshader_rep, 0, 0},
691 {D3DSIO_ENDREP, "endrep", GLNAME_REQUIRE_GLSL, 0, pshader_endrep, 0, 0},
692 {D3DSIO_IF, "if", GLNAME_REQUIRE_GLSL, 2, pshader_if, 0, 0},
693 {D3DSIO_IFC, "ifc", GLNAME_REQUIRE_GLSL, 2, pshader_ifc, 0, 0},
694 {D3DSIO_ELSE, "else", GLNAME_REQUIRE_GLSL, 2, pshader_else, 0, 0},
695 {D3DSIO_ENDIF, "endif", GLNAME_REQUIRE_GLSL, 2, pshader_endif, 0, 0},
696 {D3DSIO_BREAK, "break", GLNAME_REQUIRE_GLSL, 2, pshader_break, 0, 0},
697 {D3DSIO_BREAKC, "breakc", GLNAME_REQUIRE_GLSL, 2, pshader_breakc, 0, 0},
698 {D3DSIO_MOVA, "mova", GLNAME_REQUIRE_GLSL, 2, pshader_mova, 0, 0},
699 {D3DSIO_DEFB, "defb", GLNAME_REQUIRE_GLSL, 2, pshader_defb, 0, 0},
700 {D3DSIO_DEFI, "defi", GLNAME_REQUIRE_GLSL, 2, pshader_defi, 0, 0},
702 {D3DSIO_TEXCOORD, "texcoord", "undefined", 1, pshader_texcoord, 0, D3DPS_VERSION(1,3)},
703 {D3DSIO_TEXCOORD, "texcrd", "undefined", 2, pshader_texcoord, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
704 {D3DSIO_TEXKILL, "texkill", "KIL", 1, pshader_texkill, D3DPS_VERSION(1,0), D3DPS_VERSION(1,4)},
705 {D3DSIO_TEX, "tex", "undefined", 1, pshader_tex, 0, D3DPS_VERSION(1,3)},
706 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 2, pshader_texld, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
707 {D3DSIO_TEX, "texld", GLNAME_REQUIRE_GLSL, 3, pshader_texld, D3DPS_VERSION(2,0), -1},
708 {D3DSIO_TEXBEM, "texbem", "undefined", 2, pshader_texbem, 0, D3DPS_VERSION(1,3)},
709 {D3DSIO_TEXBEML, "texbeml", GLNAME_REQUIRE_GLSL, 2, pshader_texbeml, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
710 {D3DSIO_TEXREG2AR,"texreg2ar","undefined", 2, pshader_texreg2ar, D3DPS_VERSION(1,1), D3DPS_VERSION(1,3)},
711 {D3DSIO_TEXREG2GB,"texreg2gb","undefined", 2, pshader_texreg2gb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
712 {D3DSIO_TEXM3x2PAD, "texm3x2pad", "undefined", 2, pshader_texm3x2pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
713 {D3DSIO_TEXM3x2TEX, "texm3x2tex", "undefined", 2, pshader_texm3x2tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
714 {D3DSIO_TEXM3x3PAD, "texm3x3pad", "undefined", 2, pshader_texm3x3pad, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
715 {D3DSIO_TEXM3x3DIFF, "texm3x3diff", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3diff, D3DPS_VERSION(0,0), D3DPS_VERSION(0,0)},
716 {D3DSIO_TEXM3x3SPEC, "texm3x3spec", "undefined", 3, pshader_texm3x3spec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
717 {D3DSIO_TEXM3x3VSPEC, "texm3x3vspe", "undefined", 2, pshader_texm3x3vspec, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
718 {D3DSIO_TEXM3x3TEX, "texm3x3tex", "undefined", 2, pshader_texm3x3tex, D3DPS_VERSION(1,0), D3DPS_VERSION(1,3)},
719 {D3DSIO_EXPP, "expp", "EXP", 2, pshader_expp, 0, 0},
720 {D3DSIO_LOGP, "logp", "LOG", 2, pshader_logp, 0, 0},
721 {D3DSIO_CND, "cnd", GLNAME_REQUIRE_GLSL, 4, pshader_cnd, D3DPS_VERSION(1,1), D3DPS_VERSION(1,4)},
722 /* def is a special operation */
723 {D3DSIO_DEF, "def", "undefined", 5, pshader_def, 0, 0},
724 {D3DSIO_TEXREG2RGB, "texreg2rgb", GLNAME_REQUIRE_GLSL, 2, pshader_texreg2rgb, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
725 {D3DSIO_TEXDP3TEX, "texdp3tex", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3tex, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
726 {D3DSIO_TEXM3x2DEPTH, "texm3x2depth", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x2depth,D3DPS_VERSION(1,3), D3DPS_VERSION(1,3)},
727 {D3DSIO_TEXDP3, "texdp3", GLNAME_REQUIRE_GLSL, 2, pshader_texdp3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
728 {D3DSIO_TEXM3x3, "texm3x3", GLNAME_REQUIRE_GLSL, 2, pshader_texm3x3, D3DPS_VERSION(1,2), D3DPS_VERSION(1,3)},
729 {D3DSIO_TEXDEPTH, "texdepth", GLNAME_REQUIRE_GLSL,1, pshader_texdepth, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
730 {D3DSIO_CMP, "cmp", GLNAME_REQUIRE_GLSL, 4, pshader_cmp, D3DPS_VERSION(1,1), D3DPS_VERSION(3,0)},
731 {D3DSIO_BEM, "bem", GLNAME_REQUIRE_GLSL, 3, pshader_bem, D3DPS_VERSION(1,4), D3DPS_VERSION(1,4)},
732 /* TODO: dp2add can be made out of multiple instuctions */
733 {D3DSIO_DP2ADD, "dp2add", GLNAME_REQUIRE_GLSL, 2, pshader_dp2add, 0, 0},
734 {D3DSIO_DSX, "dsx", GLNAME_REQUIRE_GLSL, 2, pshader_dsx, 0, 0},
735 {D3DSIO_DSY, "dsy", GLNAME_REQUIRE_GLSL, 2, pshader_dsy, 0, 0},
736 {D3DSIO_TEXLDD, "texldd", GLNAME_REQUIRE_GLSL, 2, pshader_texldd, 0, 0},
737 {D3DSIO_SETP, "setp", GLNAME_REQUIRE_GLSL, 2, pshader_setp, 0, 0},
738 {D3DSIO_TEXLDL, "texdl", GLNAME_REQUIRE_GLSL, 2, pshader_texldl, 0, 0},
739 {D3DSIO_BREAKP, "breakp", GLNAME_REQUIRE_GLSL, 2, pshader_breakp, 0, 0},
740 {D3DSIO_PHASE, "phase", GLNAME_REQUIRE_GLSL, 0, pshader_nop, 0, 0},
741 {0, NULL, NULL, 0, NULL, 0, 0}
745 inline static const SHADER_OPCODE* pshader_program_get_opcode(IWineD3DPixelShaderImpl *This, const DWORD code) {
746 DWORD i = 0;
747 DWORD version = This->version;
748 DWORD hex_version = D3DPS_VERSION(version/10, version%10);
749 /** TODO: use dichotomic search */
750 while (NULL != pshader_ins[i].name) {
751 if (((code & D3DSI_OPCODE_MASK) == pshader_ins[i].opcode) &&
752 (((hex_version >= pshader_ins[i].min_version) && (hex_version <= pshader_ins[i].max_version)) ||
753 ((pshader_ins[i].min_version == 0) && (pshader_ins[i].max_version == 0)))) {
754 return &pshader_ins[i];
756 ++i;
758 FIXME("Unsupported opcode %lx(%ld) masked %lx version %ld\n", code, code, code & D3DSI_OPCODE_MASK, version);
759 return NULL;
762 inline static BOOL pshader_is_version_token(DWORD token) {
763 return 0xFFFF0000 == (token & 0xFFFF0000);
766 inline static BOOL pshader_is_comment_token(DWORD token) {
767 return D3DSIO_COMMENT == (token & D3DSI_OPCODE_MASK);
771 inline static void get_register_name(const DWORD param, char* regstr, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
772 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
774 DWORD reg = param & REGMASK;
775 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT);
777 switch (regtype) {
778 case D3DSPR_TEMP:
779 sprintf(regstr, "R%lu", reg);
780 break;
781 case D3DSPR_INPUT:
782 if (reg==0) {
783 strcpy(regstr, "fragment.color.primary");
784 } else {
785 strcpy(regstr, "fragment.color.secondary");
787 break;
788 case D3DSPR_CONST:
789 if (constants[reg])
790 sprintf(regstr, "C%lu", reg);
791 else
792 sprintf(regstr, "program.env[%lu]", reg);
793 break;
794 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
795 sprintf(regstr,"T%lu", reg);
796 break;
797 case D3DSPR_RASTOUT:
798 sprintf(regstr, "%s", rastout_reg_names[reg]);
799 break;
800 case D3DSPR_ATTROUT:
801 sprintf(regstr, "oD[%lu]", reg);
802 break;
803 case D3DSPR_TEXCRDOUT:
804 sprintf(regstr, "oT[%lu]", reg);
805 break;
806 default:
807 FIXME("Unhandled register name Type(%ld)\n", regtype);
808 break;
812 inline static void get_write_mask(const DWORD output_reg, char *write_mask) {
813 *write_mask = 0;
814 if ((output_reg & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
815 strcat(write_mask, ".");
816 if (output_reg & D3DSP_WRITEMASK_0) strcat(write_mask, "r");
817 if (output_reg & D3DSP_WRITEMASK_1) strcat(write_mask, "g");
818 if (output_reg & D3DSP_WRITEMASK_2) strcat(write_mask, "b");
819 if (output_reg & D3DSP_WRITEMASK_3) strcat(write_mask, "a");
823 inline static void get_input_register_swizzle(const DWORD instr, char *swzstring) {
824 static const char swizzle_reg_chars[] = "rgba";
825 DWORD swizzle = (instr & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
826 DWORD swizzle_x = swizzle & 0x03;
827 DWORD swizzle_y = (swizzle >> 2) & 0x03;
828 DWORD swizzle_z = (swizzle >> 4) & 0x03;
829 DWORD swizzle_w = (swizzle >> 6) & 0x03;
831 * swizzle bits fields:
832 * WWZZYYXX
834 *swzstring = 0;
835 if ((D3DSP_NOSWIZZLE >> D3DSP_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
836 if (swizzle_x == swizzle_y &&
837 swizzle_x == swizzle_z &&
838 swizzle_x == swizzle_w) {
839 sprintf(swzstring, ".%c", swizzle_reg_chars[swizzle_x]);
840 } else {
841 sprintf(swzstring, ".%c%c%c%c",
842 swizzle_reg_chars[swizzle_x],
843 swizzle_reg_chars[swizzle_y],
844 swizzle_reg_chars[swizzle_z],
845 swizzle_reg_chars[swizzle_w]);
850 inline static void addline(unsigned int *lineNum, char *pgm, unsigned int *pgmLength, char *line) {
851 int lineLen = strlen(line);
852 if(lineLen + *pgmLength > PGMSIZE - 1 /* - 1 to allow a NULL at the end */) {
853 ERR("The buffer allocated for the vertex program string pgmStr is too small at %d bytes, at least %d bytes in total are required.\n", PGMSIZE, lineLen + *pgmLength);
854 return;
855 } else {
856 memcpy(pgm + *pgmLength, line, lineLen);
859 *pgmLength += lineLen;
860 ++(*lineNum);
861 TRACE("GL HW (%u, %u) : %s", *lineNum, *pgmLength, line);
864 static const char* shift_tab[] = {
865 "dummy", /* 0 (none) */
866 "coefmul.x", /* 1 (x2) */
867 "coefmul.y", /* 2 (x4) */
868 "coefmul.z", /* 3 (x8) */
869 "coefmul.w", /* 4 (x16) */
870 "dummy", /* 5 (x32) */
871 "dummy", /* 6 (x64) */
872 "dummy", /* 7 (x128) */
873 "dummy", /* 8 (d256) */
874 "dummy", /* 9 (d128) */
875 "dummy", /* 10 (d64) */
876 "dummy", /* 11 (d32) */
877 "coefdiv.w", /* 12 (d16) */
878 "coefdiv.z", /* 13 (d8) */
879 "coefdiv.y", /* 14 (d4) */
880 "coefdiv.x" /* 15 (d2) */
883 inline static void gen_output_modifier_line(int saturate, char *write_mask, int shift, char *regstr, char* line) {
884 /* Generate a line that does the output modifier computation */
885 sprintf(line, "MUL%s %s%s, %s, %s;", saturate ? "_SAT" : "", regstr, write_mask, regstr, shift_tab[shift]);
888 inline static int gen_input_modifier_line(const DWORD instr, int tmpreg, char *outregstr, char *line, char constants[WINED3D_PSHADER_MAX_CONSTANTS]) {
889 /* Generate a line that does the input modifier computation and return the input register to use */
890 static char regstr[256];
891 static char tmpline[256];
892 int insert_line;
894 /* Assume a new line will be added */
895 insert_line = 1;
897 /* Get register name */
898 get_register_name(instr, regstr, constants);
900 TRACE(" Register name %s\n", regstr);
901 switch (instr & D3DSP_SRCMOD_MASK) {
902 case D3DSPSM_NONE:
903 strcpy(outregstr, regstr);
904 insert_line = 0;
905 break;
906 case D3DSPSM_NEG:
907 sprintf(outregstr, "-%s", regstr);
908 insert_line = 0;
909 break;
910 case D3DSPSM_BIAS:
911 sprintf(line, "ADD T%c, %s, -coefdiv.x;", 'A' + tmpreg, regstr);
912 break;
913 case D3DSPSM_BIASNEG:
914 sprintf(line, "ADD T%c, -%s, coefdiv.x;", 'A' + tmpreg, regstr);
915 break;
916 case D3DSPSM_SIGN:
917 sprintf(line, "MAD T%c, %s, coefmul.x, -one.x;", 'A' + tmpreg, regstr);
918 break;
919 case D3DSPSM_SIGNNEG:
920 sprintf(line, "MAD T%c, %s, -coefmul.x, one.x;", 'A' + tmpreg, regstr);
921 break;
922 case D3DSPSM_COMP:
923 sprintf(line, "SUB T%c, one.x, %s;", 'A' + tmpreg, regstr);
924 break;
925 case D3DSPSM_X2:
926 sprintf(line, "ADD T%c, %s, %s;", 'A' + tmpreg, regstr, regstr);
927 break;
928 case D3DSPSM_X2NEG:
929 sprintf(line, "ADD T%c, -%s, -%s;", 'A' + tmpreg, regstr, regstr);
930 break;
931 case D3DSPSM_DZ:
932 sprintf(line, "RCP T%c, %s.z;", 'A' + tmpreg, regstr);
933 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
934 strcat(line, "\n"); /* Hack */
935 strcat(line, tmpline);
936 break;
937 case D3DSPSM_DW:
938 sprintf(line, "RCP T%c, %s;", 'A' + tmpreg, regstr);
939 sprintf(tmpline, "MUL T%c, %s, T%c;", 'A' + tmpreg, regstr, 'A' + tmpreg);
940 strcat(line, "\n"); /* Hack */
941 strcat(line, tmpline);
942 break;
943 default:
944 strcpy(outregstr, regstr);
945 insert_line = 0;
948 if (insert_line) {
949 /* Substitute the register name */
950 sprintf(outregstr, "T%c", 'A' + tmpreg);
953 return insert_line;
955 /* NOTE: A description of how to parse tokens can be found at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/graphics/hh/graphics/usermodedisplaydriver_shader_cc8e4e05-f5c3-4ec0-8853-8ce07c1551b2.xml.asp */
956 inline static VOID IWineD3DPixelShaderImpl_GenerateProgramArbHW(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
957 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
958 const DWORD *pToken = pFunction;
959 const SHADER_OPCODE *curOpcode = NULL;
960 const DWORD *pInstr;
961 DWORD i;
962 unsigned lineNum = 0; /* The line number of the generated program (for loging)*/
963 char *pgmStr = NULL; /* A pointer to the program data generated by this function */
964 char tmpLine[255];
965 DWORD nUseAddressRegister = 0;
966 #if 0 /* TODO: loop register (just another address register ) */
967 BOOL hasLoops = FALSE;
968 #endif
970 BOOL saturate; /* clamp to 0.0 -> 1.0*/
971 int row = 0; /* not sure, something to do with macros? */
972 DWORD tcw[2];
973 int version = 0; /* The version of the shader */
975 /* Keep a running length for pgmStr so that we don't have to caculate strlen every time we concatanate */
976 unsigned int pgmLength = 0;
978 #if 0 /* FIXME: Use the buffer that is held by the device, this is ok since fixups will be skipped for software shaders
979 it also requires entering a critical section but cuts down the runtime footprint of wined3d and any memory fragmentation that may occur... */
980 if (This->device->fixupVertexBufferSize < PGMSIZE) {
981 HeapFree(GetProcessHeap(), 0, This->fixupVertexBuffer);
982 This->fixupVertexBuffer = HeapAlloc(GetProcessHeap() , 0, PGMSIZE);
983 This->fixupVertexBufferSize = PGMSIZE;
984 This->fixupVertexBuffer[0] = 0;
986 pgmStr = This->device->fixupVertexBuffer;
987 #else
988 pgmStr = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, PGMSIZE); /* 64kb should be enough */
989 #endif
992 /* TODO: Think about using a first pass to work out what's required for the second pass. */
993 for(i = 0; i < WINED3D_PSHADER_MAX_CONSTANTS; i++)
994 This->constants[i] = 0;
996 if (NULL != pToken) {
997 while (D3DPS_END() != *pToken) {
998 #if 0 /* For pixel and vertex shader versions 2_0 and later, bits 24 through 27 specify the size in DWORDs of the instruction */
999 if (version >= 2) {
1000 instructionSize = pToken & SIZEBITS >> 27;
1002 #endif
1003 if (pshader_is_version_token(*pToken)) { /** version */
1004 int numTemps;
1005 int numConstants;
1007 /* Extract version *10 into integer value (ie. 1.0 == 10, 1.1==11 etc */
1008 version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1010 TRACE("found version token ps.%lu.%lu;\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1012 /* Each release of pixel shaders has had different numbers of temp registers */
1013 switch (version) {
1014 case 10:
1015 case 11:
1016 case 12:
1017 case 13:
1018 case 14: numTemps=12;
1019 numConstants=8;
1020 strcpy(tmpLine, "!!ARBfp1.0\n");
1021 break;
1022 case 20: numTemps=12;
1023 numConstants=8;
1024 strcpy(tmpLine, "!!ARBfp2.0\n");
1025 FIXME("No work done yet to support ps2.0 in hw\n");
1026 break;
1027 case 30: numTemps=32;
1028 numConstants=8;
1029 strcpy(tmpLine, "!!ARBfp3.0\n");
1030 FIXME("No work done yet to support ps3.0 in hw\n");
1031 break;
1032 default:
1033 numTemps=12;
1034 numConstants=8;
1035 strcpy(tmpLine, "!!ARBfp1.0\n");
1036 FIXME("Unrecognized pixel shader version!\n");
1038 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1040 /* TODO: find out how many registers are really needed */
1041 for(i = 0; i < 6; i++) {
1042 sprintf(tmpLine, "TEMP T%lu;\n", i);
1043 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1046 for(i = 0; i < 6; i++) {
1047 sprintf(tmpLine, "TEMP R%lu;\n", i);
1048 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1051 sprintf(tmpLine, "TEMP TMP;\n");
1052 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1053 sprintf(tmpLine, "TEMP TMP2;\n");
1054 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1055 sprintf(tmpLine, "TEMP TA;\n");
1056 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1057 sprintf(tmpLine, "TEMP TB;\n");
1058 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1059 sprintf(tmpLine, "TEMP TC;\n");
1060 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1062 strcpy(tmpLine, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
1063 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1064 strcpy(tmpLine, "PARAM coefmul = { 2, 4, 8, 16 };\n");
1065 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1066 strcpy(tmpLine, "PARAM one = { 1.0, 1.0, 1.0, 1.0 };\n");
1067 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1069 for(i = 0; i < 4; i++) {
1070 sprintf(tmpLine, "MOV T%lu, fragment.texcoord[%lu];\n", i, i);
1071 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1074 ++pToken;
1075 continue;
1078 if (pshader_is_comment_token(*pToken)) { /** comment */
1079 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1080 ++pToken;
1081 FIXME("#%s\n", (char*)pToken);
1082 pToken += comment_len;
1083 continue;
1085 /* here */
1086 #if 0 /* Not sure what these are here for, they're not required for vshaders */
1087 code = *pToken;
1088 #endif
1089 pInstr = pToken;
1090 curOpcode = pshader_program_get_opcode(This, *pToken);
1091 ++pToken;
1092 if (NULL == curOpcode) {
1093 /* unknown current opcode ... (shouldn't be any!) */
1094 while (*pToken & 0x80000000) { /* TODO: Think of a sensible name for 0x80000000 */
1095 FIXME("unrecognized opcode: %08lx\n", *pToken);
1096 ++pToken;
1098 } else if (GLNAME_REQUIRE_GLSL == curOpcode->glname) {
1099 /* if the token isn't supported by this cross compiler then skip it and its parameters */
1100 FIXME("Token %s requires greater functionality than Fragment_Progarm_ARB supports\n", curOpcode->name);
1101 pToken += curOpcode->num_params;
1102 } else {
1103 TRACE("Found opcode %s %s\n", curOpcode->name, curOpcode->glname);
1104 saturate = FALSE;
1106 /* Build opcode for GL vertex_program */
1107 switch (curOpcode->opcode) {
1108 case D3DSIO_NOP:
1109 case D3DSIO_PHASE:
1110 continue;
1111 case D3DSIO_MOV:
1112 /* Address registers must be loaded with the ARL instruction */
1113 if ((((*pToken) & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) == D3DSPR_ADDR) {
1114 if (((*pToken) & REGMASK) < nUseAddressRegister) {
1115 strcpy(tmpLine, "ARL");
1116 break;
1117 } else
1118 FIXME("(%p) Try to load A%ld an undeclared address register!\n", This, ((*pToken) & REGMASK));
1120 /* fall through */
1121 case D3DSIO_CND:
1122 case D3DSIO_CMP:
1123 case D3DSIO_ADD:
1124 case D3DSIO_SUB:
1125 case D3DSIO_MAD:
1126 case D3DSIO_MUL:
1127 case D3DSIO_RCP:
1128 case D3DSIO_RSQ:
1129 case D3DSIO_DP3:
1130 case D3DSIO_DP4:
1131 case D3DSIO_MIN:
1132 case D3DSIO_MAX:
1133 case D3DSIO_SLT:
1134 case D3DSIO_SGE:
1135 case D3DSIO_LIT:
1136 case D3DSIO_DST:
1137 case D3DSIO_FRC:
1138 case D3DSIO_EXPP:
1139 case D3DSIO_LOGP:
1140 case D3DSIO_EXP:
1141 case D3DSIO_LOG:
1142 case D3DSIO_LRP:
1143 case D3DSIO_TEXKILL:
1144 TRACE("Appending glname %s to tmpLine\n", curOpcode->glname);
1145 strcpy(tmpLine, curOpcode->glname);
1146 break;
1147 case D3DSIO_DEF:
1149 DWORD reg = *pToken & REGMASK;
1150 sprintf(tmpLine, "PARAM C%lu = { %f, %f, %f, %f };\n", reg,
1151 *((const float *)(pToken + 1)),
1152 *((const float *)(pToken + 2)),
1153 *((const float *)(pToken + 3)),
1154 *((const float *)(pToken + 4)) );
1156 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1158 This->constants[reg] = 1;
1159 pToken += 5;
1160 continue;
1162 break;
1163 case D3DSIO_TEX:
1165 char tmp[20];
1166 get_write_mask(*pToken, tmp);
1167 if (version != 14) {
1168 DWORD reg = *pToken & REGMASK;
1169 sprintf(tmpLine,"TEX T%lu%s, T%lu, texture[%lu], 2D;\n", reg, tmp, reg, reg);
1170 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1171 ++pToken;
1172 } else {
1173 char reg[20];
1174 DWORD reg1 = *pToken & REGMASK;
1175 DWORD reg2 = *++pToken & REGMASK;
1176 if (gen_input_modifier_line(*pToken, 0, reg, tmpLine, This->constants)) {
1177 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1179 sprintf(tmpLine,"TEX R%lu%s, %s, texture[%lu], 2D;\n", reg1, tmp, reg, reg2);
1180 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1181 ++pToken;
1183 continue;
1185 break;
1186 case D3DSIO_TEXCOORD:
1188 char tmp[20];
1189 get_write_mask(*pToken, tmp);
1190 if (version != 14) {
1191 DWORD reg = *pToken & REGMASK;
1192 sprintf(tmpLine, "MOV T%lu%s, fragment.texcoord[%lu];\n", reg, tmp, reg);
1193 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1194 ++pToken;
1195 } else {
1196 DWORD reg1 = *pToken & REGMASK;
1197 DWORD reg2 = *++pToken & REGMASK;
1198 sprintf(tmpLine, "MOV R%lu%s, fragment.texcoord[%lu];\n", reg1, tmp, reg2);
1199 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1200 ++pToken;
1202 continue;
1204 break;
1205 case D3DSIO_TEXM3x2PAD:
1207 DWORD reg = *pToken & REGMASK;
1208 char buf[50];
1209 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1210 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1212 sprintf(tmpLine, "DP3 TMP.x, T%lu, %s;\n", reg, buf);
1213 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1214 ++pToken;
1215 continue;
1217 break;
1218 case D3DSIO_TEXM3x2TEX:
1220 DWORD reg = *pToken & REGMASK;
1221 char buf[50];
1222 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1223 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1225 sprintf(tmpLine, "DP3 TMP.y, T%lu, %s;\n", reg, buf);
1226 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1227 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg, reg);
1228 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1229 ++pToken;
1230 continue;
1232 break;
1233 case D3DSIO_TEXREG2AR:
1235 DWORD reg1 = *pToken & REGMASK;
1236 DWORD reg2 = *++pToken & REGMASK;
1237 sprintf(tmpLine, "MOV TMP.r, T%lu.a;\n", reg2);
1238 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1239 sprintf(tmpLine, "MOV TMP.g, T%lu.r;\n", reg2);
1240 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1241 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1242 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1243 ++pToken;
1244 continue;
1246 break;
1247 case D3DSIO_TEXREG2GB:
1249 DWORD reg1 = *pToken & REGMASK;
1250 DWORD reg2 = *++pToken & REGMASK;
1251 sprintf(tmpLine, "MOV TMP.r, T%lu.g;\n", reg2);
1252 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1253 sprintf(tmpLine, "MOV TMP.g, T%lu.b;\n", reg2);
1254 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1255 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1256 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1257 ++pToken;
1258 continue;
1260 break;
1261 case D3DSIO_TEXBEM:
1263 DWORD reg1 = *pToken & REGMASK;
1264 DWORD reg2 = *++pToken & REGMASK;
1266 /* FIXME: Should apply the BUMPMAPENV matrix */
1267 sprintf(tmpLine, "ADD TMP.rg, fragment.texcoord[%lu], T%lu;\n", reg1, reg2);
1268 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1269 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], 2D;\n", reg1, reg1);
1270 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1271 ++pToken;
1272 continue;
1274 break;
1275 case D3DSIO_TEXM3x3PAD:
1277 DWORD reg = *pToken & REGMASK;
1278 char buf[50];
1279 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1280 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1282 sprintf(tmpLine, "DP3 TMP.%c, T%lu, %s;\n", 'x'+row, reg, buf);
1283 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1284 tcw[row++] = reg;
1285 ++pToken;
1286 continue;
1288 break;
1289 case D3DSIO_TEXM3x3TEX:
1291 DWORD reg = *pToken & REGMASK;
1292 char buf[50];
1293 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1294 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1297 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1298 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1300 /* Cubemap textures will be more used than 3D ones. */
1301 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1302 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1303 row = 0;
1304 ++pToken;
1305 continue;
1307 case D3DSIO_TEXM3x3VSPEC:
1309 DWORD reg = *pToken & REGMASK;
1310 char buf[50];
1311 if (gen_input_modifier_line(*++pToken, 0, buf, tmpLine, This->constants)) {
1312 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1314 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1315 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1317 /* Construct the eye-ray vector from w coordinates */
1318 sprintf(tmpLine, "MOV TMP2.x, fragment.texcoord[%lu].w;\n", tcw[0]);
1319 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1320 sprintf(tmpLine, "MOV TMP2.y, fragment.texcoord[%lu].w;\n", tcw[1]);
1321 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1322 sprintf(tmpLine, "MOV TMP2.z, fragment.texcoord[%lu].w;\n", reg);
1323 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1325 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1326 sprintf(tmpLine, "DP3 TMP.w, TMP, TMP2;\n");
1327 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1328 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1329 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1330 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
1331 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1333 /* Cubemap textures will be more used than 3D ones. */
1334 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1335 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1336 row = 0;
1337 ++pToken;
1338 continue;
1340 break;
1341 case D3DSIO_TEXM3x3SPEC:
1343 DWORD reg = *pToken & REGMASK;
1344 DWORD reg3 = *(pToken + 2) & REGMASK;
1345 char buf[50];
1346 if (gen_input_modifier_line(*(pToken + 1), 0, buf, tmpLine, This->constants)) {
1347 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1349 sprintf(tmpLine, "DP3 TMP.z, T%lu, %s;\n", reg, buf);
1350 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1352 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
1353 sprintf(tmpLine, "DP3 TMP.w, TMP, C[%lu];\n", reg3);
1354 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1356 sprintf(tmpLine, "MUL TMP, TMP.w, TMP;\n");
1357 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1358 sprintf(tmpLine, "MAD TMP, coefmul.x, TMP, -C[%lu];\n", reg3);
1359 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1361 /* Cubemap textures will be more used than 3D ones. */
1362 sprintf(tmpLine, "TEX T%lu, TMP, texture[%lu], CUBE;\n", reg, reg);
1363 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1364 row = 0;
1365 pToken += 3;
1366 continue;
1368 break;
1370 default:
1371 if (curOpcode->glname == GLNAME_REQUIRE_GLSL) {
1372 FIXME("Opcode %s requires Gl Shader languange 1.0\n", curOpcode->name);
1373 } else {
1374 FIXME("Can't handle opcode %s in hwShader\n", curOpcode->name);
1376 pToken += curOpcode->num_params; /* maybe + 1 */
1377 continue;
1380 if (0 != (*pToken & D3DSP_DSTMOD_MASK)) {
1381 DWORD mask = *pToken & D3DSP_DSTMOD_MASK;
1382 switch (mask) {
1383 case D3DSPDM_SATURATE: saturate = TRUE; break;
1384 #if 0 /* as yet unhandled modifiers */
1385 case D3DSPDM_CENTROID: centroid = TRUE; break;
1386 case D3DSPDM_PP: partialpresision = TRUE; break;
1387 case D3DSPDM_X2: X2 = TRUE; break;
1388 case D3DSPDM_X4: X4 = TRUE; break;
1389 case D3DSPDM_X8: X8 = TRUE; break;
1390 case D3DSPDM_D2: D2 = TRUE; break;
1391 case D3DSPDM_D4: D4 = TRUE; break;
1392 case D3DSPDM_D8: D8 = TRUE; break;
1393 #endif
1394 default:
1395 TRACE("_unhandled_modifier(0x%08lx)\n", mask);
1399 /* Generate input and output registers */
1400 if (curOpcode->num_params > 0) {
1401 char regs[5][50];
1402 char operands[4][100];
1403 char swzstring[20];
1404 int saturate = 0;
1405 char tmpOp[256];
1406 TRACE("(%p): Opcode has %d params\n", This, curOpcode->num_params);
1408 /* Generate lines that handle input modifier computation */
1409 for (i = 1; i < curOpcode->num_params; ++i) {
1410 TRACE("(%p) : Param %ld token %lx\n", This, i, *(pToken + i));
1411 if (gen_input_modifier_line(*(pToken + i), i - 1, regs[i - 1], tmpOp, This->constants)) {
1412 addline(&lineNum, pgmStr, &pgmLength, tmpOp);
1416 /* Handle saturation only when no shift is present in the output modifier */
1417 if ((*pToken & D3DSPDM_SATURATE) && (0 == (*pToken & D3DSP_DSTSHIFT_MASK)))
1418 saturate = 1;
1420 /* Handle output register */
1421 get_register_name(*pToken, tmpOp, This->constants);
1422 strcpy(operands[0], tmpOp);
1423 get_write_mask(*pToken, tmpOp);
1424 strcat(operands[0], tmpOp);
1426 /* This function works because of side effects from gen_input_modifier_line */
1427 /* Handle input registers */
1428 for (i = 1; i < curOpcode->num_params; ++i) {
1429 TRACE("(%p) : Regs = %s\n", This, regs[i - 1]);
1430 strcpy(operands[i], regs[i - 1]);
1431 get_input_register_swizzle(*(pToken + i), swzstring);
1432 strcat(operands[i], swzstring);
1435 switch(curOpcode->opcode) {
1436 case D3DSIO_CMP:
1437 sprintf(tmpLine, "CMP%s %s, %s, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[1], operands[3], operands[2]);
1438 break;
1439 case D3DSIO_CND:
1440 sprintf(tmpLine, "ADD TMP, -%s, coefdiv.x;", operands[1]);
1441 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1442 sprintf(tmpLine, "CMP%s %s, TMP, %s, %s;\n", (saturate ? "_SAT" : ""), operands[0], operands[2], operands[3]);
1443 break;
1444 default:
1445 if (saturate)
1446 strcat(tmpLine, "_SAT");
1447 strcat(tmpLine, " ");
1448 strcat(tmpLine, operands[0]);
1449 for (i = 1; i < curOpcode->num_params; i++) {
1450 strcat(tmpLine, ", ");
1451 strcat(tmpLine, operands[i]);
1453 strcat(tmpLine,";\n");
1455 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1456 pToken += curOpcode->num_params;
1458 #if 0 /* I Think this isn't needed because the code above generates the input / output registers. */
1459 if (curOpcode->num_params > 0) {
1460 DWORD param = *(pInstr + 1);
1461 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1463 /* Generate a line that handle the output modifier computation */
1464 char regstr[100];
1465 char write_mask[20];
1466 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1467 get_register_name(param, regstr, This->constants);
1468 get_write_mask(param, write_mask);
1469 gen_output_modifier_line(saturate, write_mask, shift, regstr, tmpLine);
1470 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1473 #endif
1476 /* TODO: What about result.depth? */
1477 strcpy(tmpLine, "MOV result.color, R0;\n");
1478 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1480 strcpy(tmpLine, "END\n");
1481 addline(&lineNum, pgmStr, &pgmLength, tmpLine);
1484 /* finally null terminate the pgmStr*/
1485 pgmStr[pgmLength] = 0;
1486 if (GL_SUPPORT(ARB_VERTEX_PROGRAM)) {
1487 /* Create the hw shader */
1489 /* pgmStr sometimes gets too long for a normal TRACE */
1490 TRACE("Generated program:\n");
1491 if (TRACE_ON(d3d_shader)) {
1492 fprintf(stderr, "%s\n", pgmStr);
1495 /* TODO: change to resource.glObjectHandel or something like that */
1496 GL_EXTCALL(glGenProgramsARB(1, &This->prgId));
1498 TRACE("Creating a hw pixel shader, prg=%d\n", This->prgId);
1499 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, This->prgId));
1501 TRACE("Created hw pixel shader, prg=%d\n", This->prgId);
1502 /* Create the program and check for errors */
1503 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(pgmStr), pgmStr));
1504 if (glGetError() == GL_INVALID_OPERATION) {
1505 GLint errPos;
1506 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
1507 FIXME("HW PixelShader Error at position %d: %s\n",
1508 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
1509 This->prgId = -1;
1512 #if 1 /* if were using the data buffer of device then we don't need to free it */
1513 HeapFree(GetProcessHeap(), 0, pgmStr);
1514 #endif
1517 inline static void pshader_program_dump_ps_param(const DWORD param, int input) {
1518 static const char* rastout_reg_names[] = { "oC0", "oC1", "oC2", "oC3", "oDepth" };
1519 static const char swizzle_reg_chars[] = "rgba";
1521 /* the unknown mask is for bits not yet accounted for by any other mask... */
1522 #define UNKNOWN_MASK 0xC000
1524 /* for registeres about 7 we have to add on bits 11 and 12 to get the correct register */
1525 #define EXTENDED_REG 0x1800
1527 DWORD reg = param & D3DSP_REGNUM_MASK;
1528 DWORD regtype = ((param & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | ((param & EXTENDED_REG) >> 8);
1530 if (input) {
1531 if ( ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_NEG) ||
1532 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_BIASNEG) ||
1533 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_SIGNNEG) ||
1534 ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_X2NEG) )
1535 TRACE("-");
1536 else if ((param & D3DSP_SRCMOD_MASK) == D3DSPSM_COMP)
1537 TRACE("1-");
1540 switch (regtype /* << D3DSP_REGTYPE_SHIFT (I don't know why this was here)*/) {
1541 case D3DSPR_TEMP:
1542 TRACE("r%lu", reg);
1543 break;
1544 case D3DSPR_INPUT:
1545 TRACE("v%lu", reg);
1546 break;
1547 case D3DSPR_CONST:
1548 TRACE("c%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1549 break;
1551 case D3DSPR_TEXTURE: /* case D3DSPR_ADDR: */
1552 TRACE("t%lu", reg);
1553 break;
1554 case D3DSPR_RASTOUT:
1555 TRACE("%s", rastout_reg_names[reg]);
1556 break;
1557 case D3DSPR_ATTROUT:
1558 TRACE("oD%lu", reg);
1559 break;
1560 case D3DSPR_TEXCRDOUT:
1561 TRACE("oT%lu", reg);
1562 break;
1563 case D3DSPR_CONSTINT:
1564 TRACE("i%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1565 break;
1566 case D3DSPR_CONSTBOOL:
1567 TRACE("b%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1568 break;
1569 case D3DSPR_LABEL:
1570 TRACE("l%lu", reg);
1571 break;
1572 case D3DSPR_LOOP:
1573 TRACE("aL%s%lu", (param & D3DVS_ADDRMODE_RELATIVE) ? "a0.x + " : "", reg);
1574 break;
1575 default:
1576 break;
1579 if (!input) {
1580 /** operand output */
1582 * for better debugging traces it's done into opcode dump code
1583 * @see pshader_program_dump_opcode
1584 if (0 != (param & D3DSP_DSTMOD_MASK)) {
1585 DWORD mask = param & D3DSP_DSTMOD_MASK;
1586 switch (mask) {
1587 case D3DSPDM_SATURATE: TRACE("_sat"); break;
1588 default:
1589 TRACE("_unhandled_modifier(0x%08lx)", mask);
1592 if (0 != (param & D3DSP_DSTSHIFT_MASK)) {
1593 DWORD shift = (param & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
1594 if (shift > 0) {
1595 TRACE("_x%u", 1 << shift);
1599 if ((param & D3DSP_WRITEMASK_ALL) != D3DSP_WRITEMASK_ALL) {
1600 TRACE(".");
1601 if (param & D3DSP_WRITEMASK_0) TRACE(".r");
1602 if (param & D3DSP_WRITEMASK_1) TRACE(".g");
1603 if (param & D3DSP_WRITEMASK_2) TRACE(".b");
1604 if (param & D3DSP_WRITEMASK_3) TRACE(".a");
1606 } else {
1607 /** operand input */
1608 DWORD swizzle = (param & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
1609 DWORD swizzle_r = swizzle & 0x03;
1610 DWORD swizzle_g = (swizzle >> 2) & 0x03;
1611 DWORD swizzle_b = (swizzle >> 4) & 0x03;
1612 DWORD swizzle_a = (swizzle >> 6) & 0x03;
1614 if (0 != (param & D3DSP_SRCMOD_MASK)) {
1615 DWORD mask = param & D3DSP_SRCMOD_MASK;
1616 /*TRACE("_modifier(0x%08lx) ", mask);*/
1617 switch (mask) {
1618 case D3DSPSM_NONE: break;
1619 case D3DSPSM_NEG: break;
1620 case D3DSPSM_BIAS: TRACE("_bias"); break;
1621 case D3DSPSM_BIASNEG: TRACE("_bias"); break;
1622 case D3DSPSM_SIGN: TRACE("_bx2"); break;
1623 case D3DSPSM_SIGNNEG: TRACE("_bx2"); break;
1624 case D3DSPSM_COMP: break;
1625 case D3DSPSM_X2: TRACE("_x2"); break;
1626 case D3DSPSM_X2NEG: TRACE("_x2"); break;
1627 case D3DSPSM_DZ: TRACE("_dz"); break;
1628 case D3DSPSM_DW: TRACE("_dw"); break;
1629 default:
1630 TRACE("_unknown(0x%08lx)", mask);
1635 * swizzle bits fields:
1636 * RRGGBBAA
1638 if ((D3DVS_NOSWIZZLE >> D3DVS_SWIZZLE_SHIFT) != swizzle) { /* ! D3DVS_NOSWIZZLE == 0xE4 << D3DVS_SWIZZLE_SHIFT */
1639 if (swizzle_r == swizzle_g &&
1640 swizzle_r == swizzle_b &&
1641 swizzle_r == swizzle_a) {
1642 TRACE(".%c", swizzle_reg_chars[swizzle_r]);
1643 } else {
1644 TRACE(".%c%c%c%c",
1645 swizzle_reg_chars[swizzle_r],
1646 swizzle_reg_chars[swizzle_g],
1647 swizzle_reg_chars[swizzle_b],
1648 swizzle_reg_chars[swizzle_a]);
1654 inline static void pshader_program_dump_decl_usage(IWineD3DPixelShaderImpl *This, DWORD token) {
1655 TRACE("dcl_");
1656 switch(token & 0xFFFF) {
1657 case D3DDECLUSAGE_POSITION:
1658 TRACE("%s%ld ", "position",(token & 0xF0000) >> 16);
1659 break;
1660 case D3DDECLUSAGE_BLENDINDICES:
1661 TRACE("%s ", "blend");
1662 break;
1663 case D3DDECLUSAGE_BLENDWEIGHT:
1664 TRACE("%s ", "weight");
1665 break;
1666 case D3DDECLUSAGE_NORMAL:
1667 TRACE("%s%ld ", "normal",(token & 0xF0000) >> 16);
1668 break;
1669 case D3DDECLUSAGE_PSIZE:
1670 TRACE("%s ", "psize");
1671 break;
1672 case D3DDECLUSAGE_COLOR:
1673 if((token & 0xF0000) >> 16 == 0) {
1674 TRACE("%s ", "color");
1675 } else {
1676 TRACE("%s%ld ", "specular", ((token & 0xF0000) >> 16) - 1);
1678 break;
1679 case D3DDECLUSAGE_TEXCOORD:
1680 TRACE("%s%ld ", "texture", (token & 0xF0000) >> 16);
1681 break;
1682 case D3DDECLUSAGE_TANGENT:
1683 TRACE("%s ", "tangent");
1684 break;
1685 case D3DDECLUSAGE_BINORMAL:
1686 TRACE("%s ", "binormal");
1687 break;
1688 case D3DDECLUSAGE_TESSFACTOR:
1689 TRACE("%s ", "tessfactor");
1690 break;
1691 case D3DDECLUSAGE_POSITIONT:
1692 TRACE("%s%ld ", "positionT",(token & 0xF0000) >> 16);
1693 break;
1694 case D3DDECLUSAGE_FOG:
1695 TRACE("%s ", "fog");
1696 break;
1697 case D3DDECLUSAGE_DEPTH:
1698 TRACE("%s ", "depth");
1699 break;
1700 case D3DDECLUSAGE_SAMPLE:
1701 TRACE("%s ", "sample");
1702 break;
1703 default:
1704 FIXME("Unrecognised dcl %08lx", token & 0xFFFF);
1708 HRESULT WINAPI IWineD3DPixelShaderImpl_SetFunction(IWineD3DPixelShader *iface, CONST DWORD *pFunction) {
1709 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
1710 const DWORD* pToken = pFunction;
1711 const SHADER_OPCODE *curOpcode = NULL;
1712 DWORD len = 0;
1713 DWORD i;
1714 TRACE("(%p) : Parsing programme\n", This);
1716 if (NULL != pToken) {
1717 while (D3DPS_END() != *pToken) {
1718 if (pshader_is_version_token(*pToken)) { /** version */
1719 This->version = (((*pToken >> 8) & 0x0F) * 10) + (*pToken & 0x0F);
1720 TRACE("ps_%lu_%lu\n", (*pToken >> 8) & 0x0F, (*pToken & 0x0F));
1721 ++pToken;
1722 ++len;
1723 continue;
1725 if (pshader_is_comment_token(*pToken)) { /** comment */
1726 DWORD comment_len = (*pToken & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
1727 ++pToken;
1728 TRACE("//%s\n", (char*)pToken);
1729 pToken += comment_len;
1730 len += comment_len + 1;
1731 continue;
1733 if (!This->version) {
1734 WARN("(%p) : pixel shader doesn't have a valid version identifier\n", This);
1736 curOpcode = pshader_program_get_opcode(This, *pToken);
1737 ++pToken;
1738 ++len;
1739 if (NULL == curOpcode) {
1741 /* TODO: Think of a good name for 0x80000000 and replace it with a constant */
1742 while (*pToken & 0x80000000) {
1744 /* unknown current opcode ... */
1745 TRACE("unrecognized opcode: %08lx", *pToken);
1746 ++pToken;
1747 ++len;
1748 TRACE("\n");
1751 } else {
1752 if (curOpcode->opcode == D3DSIO_DCL) {
1753 pshader_program_dump_decl_usage(This, *pToken);
1754 ++pToken;
1755 ++len;
1756 pshader_program_dump_ps_param(*pToken, 0);
1757 ++pToken;
1758 ++len;
1759 } else
1760 if (curOpcode->opcode == D3DSIO_DEF) {
1761 TRACE("def c%lu = ", *pToken & 0xFF);
1762 ++pToken;
1763 ++len;
1764 TRACE("%f ,", *(float *)pToken);
1765 ++pToken;
1766 ++len;
1767 TRACE("%f ,", *(float *)pToken);
1768 ++pToken;
1769 ++len;
1770 TRACE("%f ,", *(float *)pToken);
1771 ++pToken;
1772 ++len;
1773 TRACE("%f", *(float *)pToken);
1774 ++pToken;
1775 ++len;
1776 } else {
1777 TRACE("%s ", curOpcode->name);
1778 if (curOpcode->num_params > 0) {
1779 pshader_program_dump_ps_param(*pToken, 0);
1780 ++pToken;
1781 ++len;
1782 for (i = 1; i < curOpcode->num_params; ++i) {
1783 TRACE(", ");
1784 pshader_program_dump_ps_param(*pToken, 1);
1785 ++pToken;
1786 ++len;
1790 TRACE("\n");
1793 This->functionLength = (len + 1) * sizeof(DWORD);
1794 } else {
1795 This->functionLength = 1; /* no Function defined use fixed function vertex processing */
1798 /* Generate HW shader in needed */
1799 if (NULL != pFunction && wined3d_settings.vs_mode == VS_HW) {
1800 TRACE("(%p) : Generating hardware program\n", This);
1801 #if 1
1802 IWineD3DPixelShaderImpl_GenerateProgramArbHW(iface, pFunction);
1803 #endif
1806 TRACE("(%p) : Copying the function\n", This);
1807 /* copy the function ... because it will certainly be released by application */
1808 if (NULL != pFunction) {
1809 This->function = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, This->functionLength);
1810 memcpy((void *)This->function, pFunction, This->functionLength);
1811 } else {
1812 This->function = NULL;
1815 /* TODO: Some proper return values for failures */
1816 TRACE("(%p) : Returning D3D_OK\n", This);
1817 return D3D_OK;
1820 const IWineD3DPixelShaderVtbl IWineD3DPixelShader_Vtbl =
1822 /*** IUnknown methods ***/
1823 IWineD3DPixelShaderImpl_QueryInterface,
1824 IWineD3DPixelShaderImpl_AddRef,
1825 IWineD3DPixelShaderImpl_Release,
1826 /*** IWineD3DPixelShader methods ***/
1827 IWineD3DPixelShaderImpl_GetParent,
1828 IWineD3DPixelShaderImpl_GetDevice,
1829 IWineD3DPixelShaderImpl_GetFunction,
1830 /* not part of d3d */
1831 IWineD3DPixelShaderImpl_SetFunction